From db890ea8f7899022d9c02dcc4a32c2fce9f13c47 Mon Sep 17 00:00:00 2001
From: mcheah <mcheah@palantir.com>
Date: Thu, 12 Mar 2015 19:40:33 -0700
Subject: [PATCH] Removing CastedArray and just using ScalaRunTime.

---
 .../org/apache/spark/util/CastedArray.scala   | 147 ------------------
 .../apache/spark/util/PrimitiveSizes.scala    |  32 ----
 .../org/apache/spark/util/SizeEstimator.scala |  55 ++++---
 3 files changed, 32 insertions(+), 202 deletions(-)
 delete mode 100644 core/src/main/scala/org/apache/spark/util/CastedArray.scala
 delete mode 100644 core/src/main/scala/org/apache/spark/util/PrimitiveSizes.scala
diff --git a/core/src/main/scala/org/apache/spark/util/CastedArray.scala b/core/src/main/scala/org/apache/spark/util/CastedArray.scala
deleted file mode 100644
index a1378dcd88839..0000000000000
--- a/core/src/main/scala/org/apache/spark/util/CastedArray.scala
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.spark.util
-
-/**
- * Provides a wrapper around an object that is known to be an array, but the specific
- * type for the array is unknown.
- *
- * Normally, in situations when such an array is to be accessed reflectively, one would use
- * {@link java.lang.reflect.Array} using getLength() and get() methods. However, it turns
- * out that such methods are ill-performant.
- *
- * It turns out it is better to just use instanceOf and lots of casting over calling through
- * to the native C implementation. There is some discussion and a sample code snippet in
- * <a href=https://bugs.openjdk.java.net/browse/JDK-8051447>an open JDK ticket</a>. In this
- * class, that approach is implemented in an alternative way: creating a wrapper object to
- * wrap the array allows the cast to be done once, so the overhead of casting multiple times
- * is also avoided. It turns out we invoke the get() method to get the value of the array
- * numerous times, so doing the cast just once is worth the cost of constructing the wrapper
- * object for larger arrays.
- *
- * In general, these classes were designed to avoid the need to cast as much as possible. As
- * soon as the type of the array is known, it is casted immediately once and all of its metadata
- * (primitive type size, length, and whether or not it is a primitive array) is available
- * immediately without any further reflection or introspecting on class objects.
- */
-sealed trait CastedArray extends Any {
-  def get(i: Int): AnyRef
-  def getLength(): Int
-  def isPrimitiveArray(): Boolean
-  def getElementSize(): Int
-}
-
-object CastedArray {
-  // Sizes of primitive types
-
-  def castAndWrap(obj: AnyRef): CastedArray = {
-    obj match {
-      case arr: Array[Boolean] => new BooleanCastedArray(arr)
-      case arr: Array[Byte] => new ByteCastedArray(arr)
-      case arr: Array[Char] => new CharCastedArray(arr)
-      case arr: Array[Double] => new DoubleCastedArray(arr)
-      case arr: Array[Float] => new FloatCastedArray(arr)
-      case arr: Array[Int] => new IntCastedArray(arr)
-      case arr: Array[Long] => new LongCastedArray(arr)
-      case arr: Array[Object] => new ObjectCastedArray(arr)
-      case arr: Array[Short] => new ShortCastedArray(arr)
-      case default => throw createBadArrayException(obj)
-    }
-  }
-
-  // Boxing is not ideal, but we want to return AnyRef here. An alternative implementation
-  // that used Java wouldn't force explicitly boxing... but returning Object there would
-  // make the boxing happen implicitly anyways. In practice this tends to be okay
-  // in terms of performance.
-  private class BooleanCastedArray(val arr: Array[Boolean]) extends AnyVal with CastedArray {
-    override def get(i: Int): AnyRef = Boolean.box(arr(i))
-    override def getLength(): Int = arr.length
-    override def isPrimitiveArray(): Boolean = true
-    override def getElementSize(): Int = PrimitiveSizes.BOOLEAN_SIZE
-  }
-
-  private class ByteCastedArray(val arr: Array[Byte]) extends AnyVal with CastedArray {
-    override def get(i: Int): AnyRef = Byte.box(arr(i))
-    override def getLength(): Int = arr.length
-    override def isPrimitiveArray(): Boolean = true
-    override def getElementSize(): Int = PrimitiveSizes.BYTE_SIZE
-  }
-
-  private class CharCastedArray(val arr: Array[Char]) extends AnyVal with CastedArray {
-    override def get(i: Int): AnyRef = Char.box(arr(i))
-    override def getLength(): Int = arr.length
-    override def isPrimitiveArray(): Boolean = true
-    override def getElementSize(): Int = PrimitiveSizes.CHAR_SIZE
-  }
-
-  private class DoubleCastedArray(val arr: Array[Double]) extends AnyVal with CastedArray {
-    override def get(i: Int): AnyRef = Double.box(arr(i))
-    override def getLength(): Int = arr.length
-    override def isPrimitiveArray(): Boolean = true
-    override def getElementSize(): Int = PrimitiveSizes.DOUBLE_SIZE
-  }
-
-  private class FloatCastedArray(val arr: Array[Float]) extends AnyVal with CastedArray {
-    override def get(i: Int): AnyRef = Float.box(arr(i))
-    override def getLength(): Int = arr.length
-    override def isPrimitiveArray(): Boolean = true
-    override def getElementSize(): Int = PrimitiveSizes.FLOAT_SIZE
-  }
-
-  private class IntCastedArray(val arr: Array[Int]) extends AnyVal with CastedArray {
-    override def get(i: Int): AnyRef = Int.box(arr(i))
-    override def getLength(): Int = arr.length
-    override def isPrimitiveArray(): Boolean = true
-    override def getElementSize(): Int = PrimitiveSizes.INT_SIZE
-  }
-
-  private class LongCastedArray(val arr: Array[Long]) extends AnyVal with CastedArray {
-    override def get(i: Int): AnyRef = Long.box(arr(i))
-    override def getLength(): Int = arr.length
-    override def isPrimitiveArray(): Boolean = true
-    override def getElementSize(): Int = PrimitiveSizes.LONG_SIZE
-  }
-
-  private class ObjectCastedArray(val arr: Array[Object]) extends AnyVal with CastedArray {
-    override def get(i: Int): Object = arr(i)
-    override def getLength(): Int = arr.length
-    override def isPrimitiveArray(): Boolean = false
-    override def getElementSize(): Int = {
-      throw new UnsupportedOperationException("Cannot introspect " +
-        " the size of an element in an object array.")
-    }
-  }
-
-  private class ShortCastedArray(val arr: Array[Short]) extends AnyVal with CastedArray {
-    override def get(i: Int): AnyRef = Short.box(arr(i))
-    override def getLength(): Int = arr.length
-    override def isPrimitiveArray(): Boolean = true
-    override def getElementSize(): Int = PrimitiveSizes.SHORT_SIZE
-  }
-
-  private def createBadArrayException(badArray : Object): RuntimeException = {
-    if (badArray == null) {
-      return new NullPointerException("Array argument is null");
-    } else if (!badArray.getClass().isArray()) {
-      return new IllegalArgumentException("Argument is not an array");
-    } else {
-      return new IllegalArgumentException("Array is of incompatible type");
-    }
-  }
-}
-
diff --git a/core/src/main/scala/org/apache/spark/util/PrimitiveSizes.scala b/core/src/main/scala/org/apache/spark/util/PrimitiveSizes.scala
deleted file mode 100644
index 7d335af07c090..0000000000000
--- a/core/src/main/scala/org/apache/spark/util/PrimitiveSizes.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util
-
-/**
- * Constants for the sizes of primitive types in bytes.
- */
-object PrimitiveSizes {
-  val BYTE_SIZE    = 1
-  val BOOLEAN_SIZE = 1
-  val CHAR_SIZE    = 2
-  val SHORT_SIZE   = 2
-  val INT_SIZE     = 4
-  val LONG_SIZE    = 8
-  val FLOAT_SIZE   = 4
-  val DOUBLE_SIZE  = 8
-}
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index d2726da01dd17..f00a8fb024ea9 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -18,17 +18,16 @@
 package org.apache.spark.util
 
 import java.lang.management.ManagementFactory
-import java.lang.reflect.Field
-import java.lang.reflect.Modifier
-import java.util.IdentityHashMap
-import java.util.Random
+import java.lang.reflect.{Field, Modifier}
+import java.util.{IdentityHashMap, Random}
 import java.util.concurrent.ConcurrentHashMap
 
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.Logging
 import org.apache.spark.util.collection.OpenHashSet
 
+import scala.collection.mutable.ArrayBuffer
+import scala.runtime.ScalaRunTime
+
 /**
  * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in
  * memory-aware caches.
@@ -38,6 +37,16 @@ import org.apache.spark.util.collection.OpenHashSet
  */
 private[spark] object SizeEstimator extends Logging {
 
+  // Sizes of primitive types
+  private val BYTE_SIZE    = 1
+  private val BOOLEAN_SIZE = 1
+  private val CHAR_SIZE    = 2
+  private val SHORT_SIZE   = 2
+  private val INT_SIZE     = 4
+  private val LONG_SIZE    = 8
+  private val FLOAT_SIZE   = 4
+  private val DOUBLE_SIZE  = 8
+
   // Alignment boundary for objects
   // TODO: Is this arch dependent ?
   private val ALIGN_SIZE = 8
@@ -155,7 +164,7 @@ private[spark] object SizeEstimator extends Logging {
   private def visitSingleObject(obj: AnyRef, state: SearchState) {
     val cls = obj.getClass
     if (cls.isArray) {
-      visitArray(obj, state)
+      visitArray(obj, cls, state)
     } else if (obj.isInstanceOf[ClassLoader] || obj.isInstanceOf[Class[_]]) {
       // Hadoop JobConfs created in the interpreter have a ClassLoader, which greatly confuses
       // the size estimator since it references the whole REPL. Do nothing in this case. In
@@ -173,15 +182,15 @@ private[spark] object SizeEstimator extends Logging {
   private val ARRAY_SIZE_FOR_SAMPLING = 200
   private val ARRAY_SAMPLE_SIZE = 100 // should be lower than ARRAY_SIZE_FOR_SAMPLING
 
-  private def visitArray(array: AnyRef, state: SearchState) {
-    val castedArray: CastedArray = CastedArray.castAndWrap(array)
-    val length = castedArray.getLength
+  private def visitArray(array: AnyRef, arrayClass: Class[_], state: SearchState) {
+    val length = ScalaRunTime.array_length(array)
+    val elementClass = arrayClass.getComponentType()
 
     // Arrays have object header and length field which is an integer
-    var arrSize: Long = alignSize(objectSize + PrimitiveSizes.INT_SIZE)
+    var arrSize: Long = alignSize(objectSize + INT_SIZE)
 
-    if (castedArray.isPrimitiveArray()) {
-      arrSize += alignSize(length * castedArray.getElementSize())
+    if (elementClass.isPrimitive()) {
+      arrSize += alignSize(length * primitiveSize(elementClass))
       state.size += arrSize
     } else {
       arrSize += alignSize(length * pointerSize)
@@ -189,7 +198,7 @@ private[spark] object SizeEstimator extends Logging {
 
       if (length <= ARRAY_SIZE_FOR_SAMPLING) {
         for (i <- 0 until length) {
-          state.enqueue(castedArray.get(i))
+          state.enqueue(ScalaRunTime.array_apply(array, i).asInstanceOf[AnyRef])
         }
       } else {
         // Estimate the size of a large array by sampling elements without replacement.
@@ -202,7 +211,7 @@ private[spark] object SizeEstimator extends Logging {
             index = rand.nextInt(length)
           } while (drawn.contains(index))
           drawn.add(index)
-          val elem = castedArray.get(index)
+          val elem = ScalaRunTime.array_apply(array, index).asInstanceOf[AnyRef]
           size += SizeEstimator.estimate(elem, state.visited)
         }
         state.size += ((length / (ARRAY_SAMPLE_SIZE * 1.0)) * size).toLong
@@ -212,21 +221,21 @@ private[spark] object SizeEstimator extends Logging {
 
   private def primitiveSize(cls: Class[_]): Long = {
     if (cls == classOf[Byte]) {
-      PrimitiveSizes.BYTE_SIZE
+      BYTE_SIZE
     } else if (cls == classOf[Boolean]) {
-      PrimitiveSizes.BOOLEAN_SIZE
+      BOOLEAN_SIZE
     } else if (cls == classOf[Char]) {
-      PrimitiveSizes.CHAR_SIZE
+      CHAR_SIZE
     } else if (cls == classOf[Short]) {
-      PrimitiveSizes.SHORT_SIZE
+      SHORT_SIZE
     } else if (cls == classOf[Int]) {
-      PrimitiveSizes.INT_SIZE
+      INT_SIZE
     } else if (cls == classOf[Long]) {
-      PrimitiveSizes.LONG_SIZE
+      LONG_SIZE
     } else if (cls == classOf[Float]) {
-      PrimitiveSizes.FLOAT_SIZE
+      FLOAT_SIZE
     } else if (cls == classOf[Double]) {
-      PrimitiveSizes.DOUBLE_SIZE
+      DOUBLE_SIZE
     } else {
       throw new IllegalArgumentException(
       "Non-primitive class " + cls + " passed to primitiveSize()")