diff --git a/.rat-excludes b/.rat-excludes index a4f316a4aaa04..8b5061415ff4c 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -25,6 +25,16 @@ graphlib-dot.min.js sorttable.js vis.min.js vis.min.css +dataTables.bootstrap.css +dataTables.bootstrap.min.js +dataTables.rowsGroup.js +jquery.blockUI.min.js +jquery.cookies.2.2.0.min.js +jquery.dataTables.1.10.4.min.css +jquery.dataTables.1.10.4.min.js +jquery.mustache.js +jsonFormatter.min.css +jsonFormatter.min.js .*avsc .*txt .*json @@ -63,12 +73,12 @@ logs .*dependency-reduced-pom.xml known_translations json_expectation -local-1422981759269/* -local-1422981780767/* -local-1425081759269/* -local-1426533911241/* -local-1426633911242/* -local-1430917381534/* +local-1422981759269 +local-1422981780767 +local-1425081759269 +local-1426533911241 +local-1426633911242 +local-1430917381534 local-1430917381535_1 local-1430917381535_2 DESCRIPTION diff --git a/LICENSE b/LICENSE index 9c944ac610afe..9fc29db8d3f22 100644 --- a/LICENSE +++ b/LICENSE @@ -291,3 +291,9 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3) (MIT License) sorttable (https://github.com/stuartlangridge/sorttable) (MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE) + (MIT License) datatables (http://datatables.net/license) + (MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE) + (MIT License) cookies (http://code.google.com/p/cookies/wiki/License) + (MIT License) blockUI (http://jquery.malsup.com/block/) + (MIT License) RowsGroup (http://datatables.net/license/mit) + (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html) diff --git a/assembly/pom.xml b/assembly/pom.xml index 6c79f9189787d..477d4931c3a88 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -20,13 +20,13 @@ 4.0.0 org.apache.spark - spark-parent_2.10 + spark-parent_2.11 2.0.0-SNAPSHOT ../pom.xml org.apache.spark - spark-assembly_2.10 + spark-assembly_2.11 Spark Project Assembly http://spark.apache.org/ pom diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 67723fa421ab1..442043cb51164 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -21,13 +21,13 @@ 4.0.0 org.apache.spark - spark-parent_2.10 + spark-parent_2.11 2.0.0-SNAPSHOT ../../pom.xml org.apache.spark - spark-sketch_2.10 + spark-sketch_2.11 jar Spark Project Sketch http://spark.apache.org/ @@ -35,6 +35,13 @@ sketch + + + org.apache.spark + spark-test-tags_${scala.binary.version} + + + target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java index 2a0484e324b13..480a0a79db32d 100644 --- a/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java +++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/BitArray.java @@ -22,7 +22,7 @@ import java.io.IOException; import java.util.Arrays; -public final class BitArray { +final class BitArray { private final long[] data; private long bitCount; diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java index 81772fcea0ec2..c0b425e729595 100644 --- a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java +++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java @@ -22,16 +22,10 @@ import java.io.OutputStream; /** - * A Bloom filter is a space-efficient probabilistic data structure, that is used to test whether - * an element is a member of a set. It returns false when the element is definitely not in the - * set, returns true when the element is probably in the set. - * - * Internally a Bloom filter is initialized with 2 information: how many space to use(number of - * bits) and how many hash values to calculate for each record. To get as lower false positive - * probability as possible, user should call {@link BloomFilter#create} to automatically pick a - * best combination of these 2 parameters. - * - * Currently the following data types are supported: + * A Bloom filter is a space-efficient probabilistic data structure that offers an approximate + * containment test with one-sided error: if it claims that an item is contained in it, this + * might be in error, but if it claims that an item is not contained in it, then this is + * definitely true. Currently supported data types include: * + * The false positive probability ({@code FPP}) of a Bloom filter is defined as the probability that + * {@linkplain #mightContain(Object)} will erroneously return {@code true} for an object that hasu + * not actually been put in the {@code BloomFilter}. * - * The implementation is largely based on the {@code BloomFilter} class from guava. + * The implementation is largely based on the {@code BloomFilter} class from Guava. */ public abstract class BloomFilter { public enum Version { /** - * {@code BloomFilter} binary format version 1 (all values written in big-endian order): + * {@code BloomFilter} binary format version 1. All values written in big-endian order: * * - * See http://www.eecs.harvard.edu/~michaelm/CS222/countmin.pdf for technical details, - * including proofs of the estimates and error bounds used in this implementation. - * * This implementation is largely based on the {@code CountMinSketch} class from stream-lib. */ abstract public class CountMinSketch { public enum Version { /** - * {@code CountMinSketch} binary format version 1 (all values written in big-endian order): + * {@code CountMinSketch} binary format version 1. All values written in big-endian order: *