diff --git a/README.md b/README.md index e801708..a2d2510 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ To get even better throughput(see [Benchmarking](#benchmarking) section), comput com.sgnatiuk kombi - 3.0.0 + 3.0.1 ``` @@ -35,7 +35,7 @@ repositories { } dependencies { - compile 'com.sgnatiuk:kombi:3.0.0' + compile 'com.sgnatiuk:kombi:3.0.1' } ``` ## Combinations @@ -155,49 +155,61 @@ There is an overloaded builder method `CartesianBuilder.cartesianProductOf(..., ``` ## Benchmarking -Measured throughput of generation of combination/cartesian product item (generated items per second) +Measured time of generation of combination/cartesian product items (microseconds to generate all items) -Benchmark results: +Feel free to run benchmarks by yourself: +``` +./gradlew clean kombi-jmh:jmh +``` + + +Benchmark results(less is better): ``` Ubuntu 18.04.4 LTS Intel® Core™ i7-6500U CPU @ 2.50GHz × 4 -JMH version: 1.19 -VM version: JDK 1.8.0_242, VM 25.242-b08 -VM invoker: /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java -VM options: -Xms4g -Xmx4g -Warmup: 10 iterations, 1 s each -Measurement: 10 iterations, 1 s each -Timeout: 10 min per iteration -Threads: 1 thread, will synchronize iterations -Benchmark mode: Average time, time/op +# JMH version: 1.22 +# VM version: JDK 1.8.0_242, OpenJDK 64-Bit Server VM, 25.242-b08 +# VM invoker: /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java +# VM options: -Xms512m -Xmx1g +# Warmup: 5 iterations, 10 s each +# Measurement: 5 iterations, 10 s each +# Timeout: 10 min per iteration +# Threads: 1 thread, will synchronize iterations +# Benchmark mode: Average time, time/op Benchmark (itemsQuantity) Mode Cnt Score Error Units -c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Lists 3 avgt 10 0.416 ± 0.004 us/op -c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Lists 5 avgt 10 8.983 ± 0.045 us/op -c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Lists 7 avgt 10 525.439 ± 2.649 us/op -c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Lists 11 avgt 10 6402100.731 ± 208391.654 us/op -c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Sets 3 avgt 10 0.835 ± 0.010 us/op -c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Sets 5 avgt 10 16.800 ± 0.203 us/op -c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Sets 7 avgt 10 745.600 ± 13.021 us/op -c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Sets 11 avgt 10 8956251.389 ± 54373.550 us/op -c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists 3 avgt 10 0.525 ± 0.002 us/op -c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists 5 avgt 10 10.777 ± 0.165 us/op -c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists 7 avgt 10 535.627 ± 44.617 us/op -c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists 11 avgt 10 5461936.892 ± 20583.561 us/op -c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists_keepingOrder 3 avgt 10 0.608 ± 0.007 us/op -c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists_keepingOrder 5 avgt 10 12.682 ± 1.236 us/op -c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists_keepingOrder 7 avgt 10 578.047 ± 3.468 us/op -c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists_keepingOrder 11 avgt 10 6385427.416 ± 271354.148 us/op -c.s.b.cartesian.CartesianMapBenchmark.Kombi_cartesianProduct_Maps 7 avgt 10 1019.071 ± 15.560 us/op -c.s.b.cartesian.CartesianMapBenchmark.Kombi_cartesianProduct_Maps_keepingOrder 7 avgt 10 1086.156 ± 10.034 us/op -c.s.b.combination.CombinationBenchmark.Kombi_combinations_list 11 avgt 10 220.303 ± 1.957 us/op -c.s.b.combination.CombinationBenchmark.Kombi_combinations_list 19 avgt 10 78645.589 ± 1509.309 us/op -c.s.b.combination.CombinationBenchmark.Kombi_combinations_map 11 avgt 10 463.854 ± 1.873 us/op -c.s.b.combination.CombinationBenchmark.Kombi_combinations_map 19 avgt 10 169522.011 ± 695.623 us/op +c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Lists 3 avgt 10 0.396 ± 0.002 us/op +c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Lists 5 avgt 10 8.592 ± 0.190 us/op +c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Lists 7 avgt 10 507.613 ± 3.286 us/op +c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Lists 11 avgt 10 6047357.993 ± 11218.642 us/op +c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists 3 avgt 10 0.363 ± 0.005 us/op +c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists 5 avgt 10 6.838 ± 0.176 us/op +c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists 7 avgt 10 374.914 ± 69.084 us/op +c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists 11 avgt 10 3360446.209 ± 45311.037 us/op -``` +c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Sets 3 avgt 10 0.815 ± 0.066 us/op +c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Sets 5 avgt 10 15.611 ± 0.578 us/op +c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Sets 7 avgt 10 645.309 ± 43.153 us/op +c.s.b.cartesian.CartesianListBenchmark.Guava_cartesianProduct_Sets 11 avgt 10 7492806.803 ± 137744.113 us/op + +c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists_keepingOrder 3 avgt 10 0.449 ± 0.059 us/op +c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists_keepingOrder 5 avgt 10 8.432 ± 0.260 us/op +c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists_keepingOrder 7 avgt 10 407.532 ± 1.454 us/op +c.s.b.cartesian.CartesianListBenchmark.Kombi_cartesianProduct_Lists_keepingOrder 11 avgt 10 4061078.368 ± 42057.603 us/op + +c.s.b.cartesian.CartesianMapBenchmark.Kombi_cartesianProduct_Maps 5 avgt 10 18.971 ± 1.902 us/op +c.s.b.cartesian.CartesianMapBenchmark.Kombi_cartesianProduct_Maps 7 avgt 10 1050.295 ± 20.054 us/op +c.s.b.cartesian.CartesianMapBenchmark.Kombi_cartesianProduct_Maps_keepingOrder 5 avgt 10 19.619 ± 2.603 us/op +c.s.b.cartesian.CartesianMapBenchmark.Kombi_cartesianProduct_Maps_keepingOrder 7 avgt 10 1212.077 ± 139.650 us/op + +c.s.b.combination.CombinationBenchmark.Kombi_combinations_list 11 avgt 10 216.704 ± 3.633 us/op +c.s.b.combination.CombinationBenchmark.Kombi_combinations_list 19 avgt 10 77641.630 ± 1561.054 us/op + +c.s.b.combination.CombinationBenchmark.Kombi_combinations_map 11 avgt 10 467.513 ± 2.014 us/op +c.s.b.combination.CombinationBenchmark.Kombi_combinations_map 19 avgt 10 170390.506 ± 3108.922 us/op -Feel free to run benchmarks by yourself: -``` -./gradlew clean kombi-jmh:jmh ``` +Comparing performance with Guava(microseconds per generation, less is better): + +![](kombi-jmh/charts/items_39916800.jpg) + diff --git a/RELEASENOTES.md b/RELEASENOTES.md new file mode 100644 index 0000000..062102a --- /dev/null +++ b/RELEASENOTES.md @@ -0,0 +1,12 @@ +v3.0.1 +* 'Cartesian product' from Collection> performance tuning.(~40% speed-up) + +v3.0.0 +* The library is fully migrated from Kotlin to Java to avoid adding of Kotlin runtime dependency to Java-only projects. +* Small performance fixes in the generation of the cartesian product. + +v2.2 +* Provided stream support(java.util.stream.Stream) + +v2.1 +* Provided split functionality allowing to split the cartesian product or combinations generation into equals chunks, so cartesian product can be generated in few threads independently. diff --git a/kombi-jmh/build.gradle b/kombi-jmh/build.gradle index 6d8c4af..9205955 100644 --- a/kombi-jmh/build.gradle +++ b/kombi-jmh/build.gradle @@ -5,6 +5,7 @@ plugins { description = '' dependencies { jmh project(':kombi-lib') + jmh 'org.openjdk.jmh:jmh-generator-annprocess:1.22' compile "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version" compile group: 'com.google.guava', name: 'guava', version: '28.2-jre' @@ -19,33 +20,12 @@ compileTestKotlin { jmhJar.archiveFileName = 'benchmarks.jar' jmh { - jmhVersion = '1.21' // Specifies JMH version - - include = ['.*'] // include pattern (regular expression) for benchmarks to be executed - jvmArgs = ['-Xms4g', '-Xmx4g'] - - - benchmarkMode = ['avgt'] // Benchmark mode. Available modes are: [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all] - timeUnit = 'us'// Output time unit. Available time units are: [m, s, ms, us, ns]. - verbosity = 'NORMAL' // Verbosity mode. Available modes are: [SILENT, NORMAL, EXTRA] - forceGC = true // Should JMH force GC between iterations? - failOnError = false // Should JMH fail immediately if any benchmark had experienced the unrecoverable error? - - operationsPerInvocation = 1 // Operations per invocation. - batchSize = 1 // Batch size: number of benchmark method calls per operation. (some benchmark modes can ignore this setting) - warmupBatchSize = 1 // Warmup batch size: number of benchmark method calls per operation. - fork = 1 // How many times to forks a single benchmark. Use 0 to disable forking altogether - warmupForks = 0 // How many warmup forks to make for a single benchmark. 0 to disable warmup forks. - threads = 1 // Number of worker threads to run with. - - iterations = 5 // Number of measurement iterations to do. - warmupIterations = 5 // Number of warmup iterations to do. + jmhVersion = '1.22' + jvmArgs = ['-Xms512m', '-Xmx1g'] humanOutputFile = project.file("${project.buildDir}/reports/jmh/human.txt") // human-readable output file resultsFile = project.file("${project.buildDir}/reports/jmh/results.csv") // results file - resultFormat = 'CSV' // Result format type (one of CSV, JSON, NONE, SCSV, TEXT) - - //more options by the link https://github.com/melix/jmh-gradle-plugin + resultFormat = 'TEXT' // Result format type (one of CSV, JSON, NONE, SCSV, TEXT) } diff --git a/kombi-jmh/charts/items_39916800.jpg b/kombi-jmh/charts/items_39916800.jpg new file mode 100644 index 0000000..c8ab4a8 Binary files /dev/null and b/kombi-jmh/charts/items_39916800.jpg differ diff --git a/kombi-jmh/charts/items_5040.jpg b/kombi-jmh/charts/items_5040.jpg new file mode 100644 index 0000000..c14fdd4 Binary files /dev/null and b/kombi-jmh/charts/items_5040.jpg differ diff --git a/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/cartesian/CartesianListBenchmark.kt b/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/cartesian/CartesianListBenchmark.kt index 6bdc374..e1d74a9 100644 --- a/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/cartesian/CartesianListBenchmark.kt +++ b/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/cartesian/CartesianListBenchmark.kt @@ -5,8 +5,14 @@ import com.google.common.collect.Sets import com.sgnatiuk.cartesian.CartesianBuilder.cartesianProductOf import org.openjdk.jmh.annotations.* import org.openjdk.jmh.infra.Blackhole +import java.util.concurrent.TimeUnit @State(Scope.Benchmark) +@Fork(2) +@Warmup(iterations = 5) +@Measurement(iterations = 5) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) open class CartesianListBenchmark { @Param("3", "5", "7", "11") @@ -18,44 +24,49 @@ open class CartesianListBenchmark { @Setup(Level.Trial) fun doSetup() { listOfLists = List(itemsQuantity) { i -> - List(i + 1) { it } + List(i + 1) { it + 1 } } listOfSets = listOfLists.map { it.toSet() } + println("\n=================================================") + println("combinationsQuantity=${cartesianProductOf(listOfLists).combinationsCount()}") + println("Data:") + listOfLists.forEach { + println(it) + } + println("=================================================\n") } @Benchmark fun Kombi_cartesianProduct_Lists(blackhole: Blackhole) { for (combination in cartesianProductOf(listOfLists, false)) { - for (combinationItem in combination) { - blackhole.consume(combinationItem) - } + iterateWithIterator(combination, blackhole) } } @Benchmark fun Kombi_cartesianProduct_Lists_keepingOrder(blackhole: Blackhole) { for (combination in cartesianProductOf(listOfSets, true)) { - for (combinationItem in combination) { - blackhole.consume(combinationItem) - } + iterateWithIterator(combination, blackhole) } } @Benchmark fun Guava_cartesianProduct_Sets(blackhole: Blackhole) { for (combination in Sets.cartesianProduct(listOfSets)) { - for (combinationItem in combination) { - blackhole.consume(combinationItem) - } + iterateWithIterator(combination, blackhole) } } @Benchmark fun Guava_cartesianProduct_Lists(blackhole: Blackhole) { for (combination in Lists.cartesianProduct(listOfLists)) { - for (combinationItem in combination) { - blackhole.consume(combinationItem) - } + iterateWithIterator(combination, blackhole) + } + } + + private fun iterateWithIterator(combination: MutableList, blackhole: Blackhole) { + for (combinationItem in combination) { + blackhole.consume(combinationItem) } } } diff --git a/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/cartesian/CartesianMapBenchmark.kt b/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/cartesian/CartesianMapBenchmark.kt index 731d176..5aaba8c 100644 --- a/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/cartesian/CartesianMapBenchmark.kt +++ b/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/cartesian/CartesianMapBenchmark.kt @@ -3,11 +3,17 @@ package com.sgnatiuk.benchmark.cartesian import com.sgnatiuk.cartesian.CartesianBuilder.cartesianProductOf import org.openjdk.jmh.annotations.* import org.openjdk.jmh.infra.Blackhole +import java.util.concurrent.TimeUnit @State(Scope.Benchmark) +@Fork(2) +@Warmup(iterations = 5) +@Measurement(iterations = 5) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) open class CartesianMapBenchmark { - @Param("7") + @Param("5", "7") var itemsQuantity: Int = 0 lateinit var mapOf: Map> diff --git a/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/combination/CombinationBenchmark.kt b/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/combination/CombinationBenchmark.kt index 2765fbc..09b20e4 100644 --- a/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/combination/CombinationBenchmark.kt +++ b/kombi-jmh/src/jmh/kotlin/com/sgnatiuk/benchmark/combination/CombinationBenchmark.kt @@ -3,9 +3,14 @@ package com.sgnatiuk.benchmark.combination import com.sgnatiuk.combination.CombinationsBuilder.combinationsOf import org.openjdk.jmh.annotations.* import org.openjdk.jmh.infra.Blackhole - +import java.util.concurrent.TimeUnit @State(Scope.Benchmark) +@Fork(2) +@Warmup(iterations = 5) +@Measurement(iterations = 5) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) open class CombinationBenchmark { @Param("11", "19") @@ -18,6 +23,10 @@ open class CombinationBenchmark { fun doSetup() { list = List(itemsQuantity){ it } map = (1..itemsQuantity).map { it to it.toString() }.toMap() + println("\n=================================================") + println("itemsQuantity=$itemsQuantity") + println("combinationsQuantity=${combinationsOf(list).combinationsNumber()}") + println("=================================================\n") } @Benchmark diff --git a/kombi-lib/build.gradle b/kombi-lib/build.gradle index bfae1ea..69c85ee 100644 --- a/kombi-lib/build.gradle +++ b/kombi-lib/build.gradle @@ -13,7 +13,7 @@ plugins { } ext{ - libVersion = '3.0.0' + libVersion = '3.0.1' libPackage = 'com.sgnatiuk' libName = 'kombi' } diff --git a/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductMap.java b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductMap.java index 81fdb83..9b7dcf1 100644 --- a/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductMap.java +++ b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductMap.java @@ -6,22 +6,26 @@ class CartesianProductMap extends EncodableCartesianProduct> implements Serializable { - private final Map> values; - private final ArrayList dataKeys; - - CartesianProductMap(Map> values) { - this(values, false); - } + private final Map values; + private final Object[] dataKeys; + private final boolean keepOrder; CartesianProductMap(Map> values, boolean keepOrder) { this.values = copyWithOrder( values, keepOrder ? (o1, o2) -> 0 : new ValuesCountDesc<>() ); - this.dataKeys = new ArrayList<>(this.values.keySet()); + this.dataKeys = this.values.keySet().toArray(); + this.keepOrder = keepOrder; + } + + private CartesianProductMap(boolean keepOrder, Map values) { + this.values = values; + this.dataKeys = this.values.keySet().toArray(); + this.keepOrder = keepOrder; } - private Map> copyWithOrder( + private Map copyWithOrder( Map> data, Comparator>> comparator ) { @@ -34,23 +38,22 @@ private Map> copyWithOrder( )) .collect(Collectors.toMap( AbstractMap.SimpleEntry::getKey, - AbstractMap.SimpleEntry::getValue, + pair -> pair.getValue().toArray(), (v1, v2) -> { - throw new IllegalStateException("Unexpected key duplication in data:" + data); + throw new IllegalArgumentException("Unexpected key duplication in data:" + data); }, LinkedHashMap::new )); } - + @SuppressWarnings("unchecked") @Override protected MaskDecoder> maskDecoder() { return encoded -> { - HashMap decoded = new HashMap<>(); + HashMap decoded = new HashMap<>(encoded.length); for (int i = 0; i < encoded.length; i++) { - K fieldKey = dataKeys.get(i); - V value = values.get(fieldKey) - .get(encoded[i]); + K fieldKey = (K) dataKeys[i]; + V value = (V) values.get(fieldKey)[encoded[i]]; decoded.put(fieldKey, value); } return decoded; @@ -58,32 +61,45 @@ protected MaskDecoder> maskDecoder() { } @Override - protected Collection> values() { - return values.values(); + protected Object[][] values() { + Object[][] data = new Object[values.size()][]; + int index = 0; + for (Object[] value : values.values()) { + data[index++] = value; + } + return data; } + @SuppressWarnings("unchecked") @Override public List>> split(int n) { - + if (n < 2) { + return Collections.singletonList(this); + } List>> splitList = new ArrayList<>(n); - Map> descSortedData = copyWithOrder(values, new ValuesCountDesc<>()); - Map.Entry> firstEntry = descSortedData.entrySet().stream().findFirst().orElseThrow( - () -> new IllegalStateException("Expected at least one item in: " + descSortedData) - ); - ArrayList firstValue = firstEntry.getValue(); - int parts = Math.min(n, firstValue.size()); + + K keyOfMaxLengthArr = (K) dataKeys[0]; + Object[] arrWithMaxLength = values.get(keyOfMaxLengthArr); + for (int i = 1; i < dataKeys.length; i++) { + Object[] nextArr = values.get(dataKeys[i]); + if (nextArr.length > arrWithMaxLength.length) { + keyOfMaxLengthArr = (K) dataKeys[i]; + arrWithMaxLength = nextArr; + } + } + int parts = Math.min(n, arrWithMaxLength.length); int from = 0; for (int i = 0; i < parts; i++) { - int valuesPerChunk = (firstValue.size() - from) / (parts - i); + int valuesPerChunk = (arrWithMaxLength.length - from) / (parts - i); int to = from + valuesPerChunk; - LinkedHashMap> newData = new LinkedHashMap<>(descSortedData); - newData.put( - firstEntry.getKey(), - firstValue.subList(from, to) + Map nd = new HashMap<>(values); + nd.put( + keyOfMaxLengthArr, + Arrays.copyOfRange(arrWithMaxLength, from, to) ); - splitList.add(new CartesianProductMap<>(newData)); + splitList.add(new CartesianProductMap<>(keepOrder, nd)); from = to; } return splitList; diff --git a/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductSet.java b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductSet.java index 1583531..b980b09 100644 --- a/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductSet.java +++ b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductSet.java @@ -1,77 +1,134 @@ package com.sgnatiuk.cartesian; import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.List; +import java.util.*; class CartesianProductSet extends EncodableCartesianProduct> implements Serializable { - private final List> values; - - CartesianProductSet(Collection> values) { - this(values, false); - } + private final Object[][] valuesArr; + private final boolean keepOrder; CartesianProductSet(Collection> values, boolean keepOrder) { - this.values = convertToFixedOrderMap(values, keepOrder); + this(copyToArray(values, keepOrder), keepOrder); } - private List> convertToFixedOrderMap(Collection> data, boolean keepOrder) { - List> res = new ArrayList<>(); - for (Collection originData : data) { - res.add(new ArrayList<>(originData)); - } + private CartesianProductSet(Object[][] data, boolean keepOrder) { + this.valuesArr = data; + this.keepOrder = keepOrder; if (!keepOrder) { - res.sort(new ValuesCountDesc<>()); + Arrays.sort(valuesArr, new ArrValuesCountDesc()); } - return res; } + private static Object[][] copyToArray(Collection> data, boolean keepOrder) { + Object[][] res = new Object[data.size()][]; + int index = 0; + for (Collection originData : data) { + Object[] objects = originData.toArray(); + if (objects.length == 0) { + return new Object[0][]; + } + res[index++] = objects; + } + return res; + } @Override protected MaskDecoder> maskDecoder() { - return encoded -> { - List res = new ArrayList<>(encoded.length); - for (int i = 0; i < encoded.length; i++) { - res.add(values.get(i).get(encoded[i])); - } - return res; - }; + return DecodableCombination::new; + } + + private class DecodableCombination extends AbstractList { + + private final int[] localEncoded; + + public DecodableCombination(int[] encoded) { + this.localEncoded = new int[encoded.length]; + System.arraycopy(encoded, 0, localEncoded, 0, localEncoded.length); + } + + @Override + public int size() { + return localEncoded.length; + } + + @SuppressWarnings("unchecked") + @Override + public T get(int index) { + return (T) valuesArr[index][localEncoded[index]]; + } + + @Override + public Iterator iterator() { + return new Iterator() { + int index = 0; + int size = localEncoded.length; + + @Override + public boolean hasNext() { + return index < size; + } + + @Override + public T next() { + return get(index++); + } + }; + } } @Override - protected Collection> values() { - return values; + protected Object[][] values() { + return valuesArr; } @Override public List>> split(int n) { + if (n < 2) { + return Collections.singletonList(this); + } List>> splitList = new ArrayList<>(n); - ArrayList> descSortedData = new ArrayList<>(values); - descSortedData.sort(new ValuesCountDesc<>()); - List firstFieldValues = descSortedData.get(0); - int parts = Math.min(n, firstFieldValues.size()); + int maxLengthArrIndex = indexOfMaxLengthArray(); + int maxLength = valuesArr[maxLengthArrIndex].length; + + Object[] maxLengthArray = valuesArr[maxLengthArrIndex]; + int parts = Math.min(n, maxLength); int from = 0; for (int i = 0; i < parts; i++) { - int valuesPerChunk = (firstFieldValues.size() - from) / (parts - i); + int valuesPerChunk = (maxLength - from) / (parts - i); int to = from + valuesPerChunk; - ArrayList> newData = new ArrayList<>(descSortedData); - newData.set(0, firstFieldValues.subList(from, to)); - splitList.add(new CartesianProductSet<>(newData)); + + Object[][] data = new Object[valuesArr.length][]; + System.arraycopy(valuesArr, 0, data, 0, data.length); + data[maxLengthArrIndex] = Arrays.copyOfRange(maxLengthArray, from, to); + + splitList.add(new CartesianProductSet<>(data, keepOrder)); from = to; } return splitList; } - private static class ValuesCountDesc implements Comparator> { + private int indexOfMaxLengthArray() { + int maxLengthArrIndex = 0; + int maxLength = valuesArr[0].length; + + for (int i = 1; i < valuesArr.length; i++) { + if (valuesArr[i].length > maxLength) { + maxLength = valuesArr[i].length; + maxLengthArrIndex = i; + } + } + return maxLengthArrIndex; + } + + private static class ArrValuesCountDesc implements Comparator { + @Override - public int compare(Collection o1, Collection o2) { - return o2.size() - o1.size(); + public int compare(Object[] o1, Object[] o2) { + return o2.length - o1.length; } } } \ No newline at end of file diff --git a/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductSpliterator.java b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductSpliterator.java new file mode 100644 index 0000000..4e3eb92 --- /dev/null +++ b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CartesianProductSpliterator.java @@ -0,0 +1,65 @@ +package com.sgnatiuk.cartesian; + +import java.math.BigInteger; +import java.util.*; +import java.util.function.Consumer; + +class CartesianProductSpliterator implements Spliterator { + + private CartesianProduct cartesianProduct; + private Iterator cartesianProductIterator; + private boolean isSizeKnown; + private long size; + + CartesianProductSpliterator(CartesianProduct cartesianProduct) { + this.cartesianProduct = cartesianProduct; + cartesianProductIterator = cartesianProduct.iterator(); + + Map.Entry sizeInfo = computeSize(); + isSizeKnown = sizeInfo.getKey(); + size = sizeInfo.getValue(); + } + + @Override + public boolean tryAdvance(Consumer action) { + if (cartesianProductIterator.hasNext()) { + action.accept(cartesianProductIterator.next()); + return true; + } else { + return false; + } + } + + @Override + public Spliterator trySplit() { + List> cartesianProducts = cartesianProduct.split(2); + + cartesianProduct = cartesianProducts.get(1); + cartesianProductIterator = cartesianProduct.iterator(); + Map.Entry sizeInfo = computeSize(); + isSizeKnown = sizeInfo.getKey(); + size = sizeInfo.getValue(); + + return new CartesianProductSpliterator<>(cartesianProducts.get(0)); + } + + @Override + public long estimateSize() { + return size; + } + + @Override + public int characteristics() { + int flags = Spliterator.CONCURRENT | Spliterator.IMMUTABLE | Spliterator.ORDERED; + return isSizeKnown + ? flags | Spliterator.SIZED | Spliterator.SUBSIZED + : flags; + } + + private Map.Entry computeSize() { + BigInteger combinationsCount = cartesianProduct.combinationsCount(); + return combinationsCount.compareTo(BigInteger.valueOf(Long.MAX_VALUE)) > 0 + ? new AbstractMap.SimpleEntry<>(false, Long.MAX_VALUE) + : new AbstractMap.SimpleEntry<>(true, combinationsCount.longValueExact()); + } +} diff --git a/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CombinationMask.java b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CombinationMask.java index a90834b..fb251cf 100644 --- a/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CombinationMask.java +++ b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/CombinationMask.java @@ -42,6 +42,6 @@ private boolean increment() { overflow = incrementedCell / bases[index++]; } while (overflow != 0 && index < bases.length); - return overflow > 0 && index == bases.length; + return overflow > 0; } } diff --git a/kombi-lib/src/main/java/com/sgnatiuk/cartesian/EncodableCartesianProduct.java b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/EncodableCartesianProduct.java index 645f409..981daad 100644 --- a/kombi-lib/src/main/java/com/sgnatiuk/cartesian/EncodableCartesianProduct.java +++ b/kombi-lib/src/main/java/com/sgnatiuk/cartesian/EncodableCartesianProduct.java @@ -1,9 +1,9 @@ package com.sgnatiuk.cartesian; import java.math.BigInteger; -import java.util.*; -import java.util.function.Consumer; -import java.util.function.Function; +import java.util.Collections; +import java.util.Iterator; +import java.util.function.Supplier; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -11,20 +11,22 @@ abstract class EncodableCartesianProduct implements CartesianProduct { protected abstract MaskDecoder maskDecoder(); - protected abstract Collection> values(); + protected abstract Object[][] values(); + + private Lazy combinationsCount = new Lazy<>(() -> multiplySubArraysLength(values())); @Override public BigInteger combinationsCount() { - return multiplyAll(values(), Collection::size); + return combinationsCount.get(); } @Override public Iterator iterator() { - if (values().isEmpty()) { + if (values().length == 0) { return Collections.emptyIterator(); } return new Iterator() { - private final CombinationMask dataEncoder = new CombinationMask(bases()); + private final CombinationMask dataEncoder = new CombinationMask(radixes()); private final MaskDecoder maskDecoder = maskDecoder(); @Override @@ -47,87 +49,41 @@ public Stream stream() { ); } - int[] bases() { - Collection> values = values(); - int[] radixes = new int[values.size()]; - int index = 0; - for (Collection value : values) { - radixes[index++] = value.size(); + int[] radixes() { + Object[][] values = values(); + int[] radixes = new int[values.length]; + for (int i = 0; i < values.length; i++) { + radixes[i] = values[i].length; } return radixes; } - public static BigInteger multiplyAll(Iterable items, Function intValue) { + private static BigInteger multiplySubArraysLength(Object[][] items) { + if (items.length == 0) { + return BigInteger.ZERO; + } BigInteger result = BigInteger.ONE; - boolean collectionEmpty = true; - - for (T item : items) { - collectionEmpty = false; - result = result.multiply(BigInteger.valueOf( - intValue.apply(item) - )); + for (Object[] item : items) { + result = result.multiply( + BigInteger.valueOf(item.length) + ); } - - return collectionEmpty ? BigInteger.ZERO : result; + return result; } -} - -class CartesianProductSpliterator implements Spliterator { - private CartesianProduct cartesianProduct; - private Iterator cartesianProductIterator; - private boolean isSizeKnown; - private long size; + private static class Lazy { + private final Supplier supplier; + private T value; - CartesianProductSpliterator(CartesianProduct cartesianProduct) { - this.cartesianProduct = cartesianProduct; - cartesianProductIterator = cartesianProduct.iterator(); - - Map.Entry sizeInfo = computeSize(); - isSizeKnown = sizeInfo.getKey(); - size = sizeInfo.getValue(); - } - - @Override - public boolean tryAdvance(Consumer action) { - if (cartesianProductIterator.hasNext()) { - action.accept(cartesianProductIterator.next()); - return true; - } else { - return false; + private Lazy(Supplier supplier) { + this.supplier = supplier; } - } - - @Override - public Spliterator trySplit() { - List> cartesianProducts = cartesianProduct.split(2); - - cartesianProduct = cartesianProducts.get(1); - cartesianProductIterator = cartesianProduct.iterator(); - Map.Entry sizeInfo = computeSize(); - isSizeKnown = sizeInfo.getKey(); - size = sizeInfo.getValue(); - - return new CartesianProductSpliterator<>(cartesianProducts.get(0)); - } - - @Override - public long estimateSize() { - return size; - } - - @Override - public int characteristics() { - int flags = Spliterator.CONCURRENT | Spliterator.IMMUTABLE | Spliterator.ORDERED; - return isSizeKnown - ? flags | Spliterator.SIZED | Spliterator.SUBSIZED - : flags; - } - private Map.Entry computeSize() { - BigInteger combinationsCount = cartesianProduct.combinationsCount(); - return combinationsCount.compareTo(BigInteger.valueOf(Long.MAX_VALUE)) > 0 - ? new AbstractMap.SimpleEntry<>(false, Long.MAX_VALUE) - : new AbstractMap.SimpleEntry<>(true, combinationsCount.longValueExact()); + private T get() { + if (value == null) { + value = supplier.get(); + } + return value; + } } } diff --git a/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductMapTest.kt b/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductMapTest.kt index 543b3b6..6f74ab9 100644 --- a/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductMapTest.kt +++ b/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductMapTest.kt @@ -7,6 +7,7 @@ import com.sgnatiuk.extensions.BigInt import org.junit.Assert.assertEquals import org.junit.Assert.assertTrue import org.junit.Test +import java.util.* import java.util.stream.Collectors internal class CartesianProductMapTest { @@ -51,6 +52,16 @@ internal class CartesianProductMapTest { assertEquals(splitFactor, splitCartesian.size) } + @Test(expected = IllegalArgumentException::class) + fun `cartesian product should throw on duplicated key in data`() { + val duplicatedKeyMap = TreeMap> { _, _ -> -1 }.apply { + put(1, listOf(1)) + put(1, listOf(2)) + } + + cartesianProductOf(duplicatedKeyMap) + } + @Test fun `verify split with factor 1 produces the same cartesian product`() { val cartesianProductMap = cartesianProductOf(dataMap) diff --git a/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductSetTest.kt b/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductSetTest.kt index 4825e7a..b682afa 100644 --- a/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductSetTest.kt +++ b/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductSetTest.kt @@ -14,11 +14,54 @@ internal class CartesianProductSetTest { @Test fun `verify empty collection is returned when passed empty collection`() { val emptyCollection = emptyList>() - cartesianProductOf(emptyCollection).forEach { + checkCartesianProductIsEmpty(emptyCollection) + } + + @Test + fun `cartesian product of single zero length collection should be empty`() { + val emptyCollection = listOf(emptyList()) + checkCartesianProductIsEmpty(emptyCollection) + } + + @Test + fun `cartesian product of list with at least one empty collection should be empty`() { + val emptyCollection = listOf( + emptyList(), + listOf(1), + listOf(1, 2) + ) + checkCartesianProductIsEmpty(emptyCollection) + } + + private fun checkCartesianProductIsEmpty(list: List>) { + cartesianProductOf(list).apply { + assertEquals(0, combinationsCount().longValueExact()) + }.forEach { _ -> throw RuntimeException("expected empty collection") } } + @Test + fun `stream of cartesian product of single zero length collection should be empty`() { + val emptyCollection = listOf(emptyList()) + val count = cartesianProductOf(emptyCollection) + .stream() + .flatMap { it.stream() } + .count() + assertEquals(0, count) + } + + @Test + fun `parallel stream of cartesian product of single zero length collection should be empty`() { + val emptyCollection = listOf(emptyList()) + val count = cartesianProductOf(emptyCollection) + .stream() + .parallel() + .flatMap { it.stream() } + .count() + assertEquals(0, count) + } + @Test fun `verify Cartesian product set returns all possible combinations`() { val result: List> = cartesianProductOf(dataList, false).toList() @@ -28,10 +71,7 @@ internal class CartesianProductSetTest { @Test fun `verify Cartesian product set with keep order returns all possible combinations`() { val result: List> = cartesianProductOf(dataList, true).toList() - assertContainsAll(expectedCartesianList, result) - expectedCartesianList.forEach { - assertTrue(result.contains(it)) - } + assertContainsAllWithOrder(expectedCartesianList, result) } @Test @@ -91,6 +131,16 @@ internal class CartesianProductSetTest { assertTrue(threads.size > 1) } + @Test + fun `parallel stream should keep order`() { + val cartesianProductSet = cartesianProductOf(dataList, true) + val actual = cartesianProductSet.stream() + .parallel() + .collect(Collectors.toSet()) + + assertContainsAllWithOrder(expectedCartesianList, actual) + } + private fun assertContainsAll( expected: Collection>, actual: Collection> @@ -106,4 +156,15 @@ internal class CartesianProductSetTest { assertTrue("Expected $expectedCombination, but not found in $actual", foundCombination) } } + + private fun assertContainsAllWithOrder( + expected: Collection>, + actual: Collection> + ) { + expected.forEach { expectedCombination -> + if (!actual.contains(expectedCombination)) { + throw AssertionError("Expected $expectedCombination in the same order, but not found in $actual") + } + } + } } \ No newline at end of file diff --git a/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductSpliteratorTest.kt b/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductSpliteratorTest.kt new file mode 100644 index 0000000..46218cc --- /dev/null +++ b/kombi-lib/src/test/kotlin/com/sgnatiuk/cartesian/CartesianProductSpliteratorTest.kt @@ -0,0 +1,24 @@ +package com.sgnatiuk.cartesian + +import org.junit.Assert.assertEquals +import org.junit.Test +import java.util.* + +class CartesianProductSpliteratorTest { + + @Test + fun `spliterator not should contain flag sized when cartesian product has combinations more then max long`() { + val n = 1000 + val list = List(n) { it + 1 } + val data = listOf( + list, list, + list, list, + list, list, + list + ) + + val spliterator = CartesianProductSpliterator(CartesianBuilder.cartesianProductOf(data)) + assertEquals(0, spliterator.characteristics() and Spliterator.SIZED) + + } +} \ No newline at end of file