Skip to content

Commit

Permalink
Ignore benchmark (again) and fix docs
Browse files Browse the repository at this point in the history
  • Loading branch information
aarondav committed Jul 21, 2014
1 parent 034bf10 commit ec395c8
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 18 deletions.
41 changes: 38 additions & 3 deletions core/src/main/scala/org/apache/spark/util/collection/Sorter.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
import java.util.Comparator;

/**
* A port of the OpenJDK 6 Arrays.sort(Object[]) function, which utilizes a simple merge sort.
* This has been kept in Java with the original style in order to match very closely with the JDK
* source code, and thus be easy to verify correctness.
* A port of the Android Timsort class, which utilizes a "stable, adaptive, iterative mergesort."
* See the method comment on sort() for more details.
*
* This has been kept in Java with the original style in order to match very closely with the
* Anroid source code, and thus be easy to verify correctness.
*
* The purpose of the port is to generalize the interface to the sort to accept input data formats
* besides simple arrays where every element is sorted individually. For instance, the AppendOnlyMap
Expand Down Expand Up @@ -58,6 +60,39 @@ public Sorter(SortDataFormat<K, Buffer> sortDataFormat) {
this.s = sortDataFormat;
}

/**
* A stable, adaptive, iterative mergesort that requires far fewer than
* n lg(n) comparisons when running on partially sorted arrays, while
* offering performance comparable to a traditional mergesort when run
* on random arrays. Like all proper mergesorts, this sort is stable and
* runs O(n log n) time (worst case). In the worst case, this sort requires
* temporary storage space for n/2 object references; in the best case,
* it requires only a small constant amount of space.
*
* This implementation was adapted from Tim Peters's list sort for
* Python, which is described in detail here:
*
* http://svn.python.org/projects/python/trunk/Objects/listsort.txt
*
* Tim's C code may be found here:
*
* http://svn.python.org/projects/python/trunk/Objects/listobject.c
*
* The underlying techniques are described in this paper (and may have
* even earlier origins):
*
* "Optimistic Sorting and Information Theoretic Complexity"
* Peter McIlroy
* SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
* pp 467-474, Austin, Texas, 25-27 January 1993.
*
* While the API to this class consists solely of static methods, it is
* (privately) instantiable; a TimSort instance holds the state of an ongoing
* sort, assuming the input array is large enough to warrant the full-blown
* TimSort. Small arrays are sorted in place, using a binary insertion sort.
*
* @author Josh Bloch
*/
void sort(Buffer a, int lo, int hi, Comparator<? super K> c) {
assert c != null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class SorterSuite extends FunSuite {
* Note that the Java implementation varies tremendously between Java 6 and Java 7, when
* the Java sort changed from merge sort to Timsort.
*/
test("Sorter benchmark") {
ignore("Sorter benchmark") {

/** Runs an experiment several times. */
def runExperiment(name: String)(f: => Unit): Unit = {
Expand All @@ -96,21 +96,9 @@ class SorterSuite extends FunSuite {
val numElements = 25000000 // 25 mil
val rand = new XORShiftRandom(123)

// Test primitive sort on float array
val primitiveKeys = Array.tabulate[Float](numElements) { i => rand.nextFloat() }
runExperiment("Java Arrays.sort() on primitive keys") {
Arrays.sort(primitiveKeys)
}

// Test non-primitive sort on float array
val keys = Array.tabulate[JFloat](numElements) { i =>
new JFloat(rand.nextFloat())
}
runExperiment("Java Arrays.sort()") {
Arrays.sort(keys, new Comparator[JFloat] {
override def compare(x: JFloat, y: JFloat): Int = Ordering.Float.compare(x, y)
})
}

// Test our key-value pairs where each element is a Tuple2[Float, Integer)
val kvTupleArray = Array.tabulate[AnyRef](numElements) { i =>
Expand All @@ -123,17 +111,29 @@ class SorterSuite extends FunSuite {
})
}

// Test our Sorter where each element alternates between Float and Integer, non-primitive.
// Test our Sorter where each element alternates between Float and Integer, non-primitive
val keyValueArray = Array.tabulate[AnyRef](numElements * 2) { i =>
if (i % 2 == 0) keys(i / 2) else new Integer(i / 2)
}

val sorter = new Sorter(new KVArraySortDataFormat[JFloat, AnyRef])
runExperiment("KV-sort using Sorter") {
sorter.sort(keyValueArray, 0, keys.length, new Comparator[JFloat] {
override def compare(x: JFloat, y: JFloat): Int = Ordering.Float.compare(x, y)
})
}

// Test non-primitive sort on float array
runExperiment("Java Arrays.sort()") {
Arrays.sort(keys, new Comparator[JFloat] {
override def compare(x: JFloat, y: JFloat): Int = Ordering.Float.compare(x, y)
})
}

// Test primitive sort on float array
val primitiveKeys = Array.tabulate[Float](numElements) { i => rand.nextFloat() }
runExperiment("Java Arrays.sort() on primitive keys") {
Arrays.sort(primitiveKeys)
}
}
}

Expand Down

0 comments on commit ec395c8

Please sign in to comment.