From d91b44accbe40b9879cda259912e3ca38759d716 Mon Sep 17 00:00:00 2001 From: Kris Mok Date: Tue, 26 Jun 2018 04:02:25 -0700 Subject: [PATCH 1/3] SPARK-24659: GenericArrayData.equals should respect element type differences --- .../spark/sql/catalyst/util/GenericArrayData.scala | 2 +- .../spark/sql/catalyst/util/ComplexDataSuite.scala | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala index 9e39ed9c3a778..83ad08d8e1758 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala @@ -122,7 +122,7 @@ class GenericArrayData(val array: Array[Any]) extends ArrayData { if (!o2.isInstanceOf[Double] || ! java.lang.Double.isNaN(o2.asInstanceOf[Double])) { return false } - case _ => if (o1 != o2) { + case _ => if (!o1.equals(o2)) { return false } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala index 9d285916bcf42..a0443f89ffff0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala @@ -104,4 +104,13 @@ class ComplexDataSuite extends SparkFunSuite { // The copied data should not be changed externally. assert(copied.getStruct(0, 1).getUTF8String(0).toString == "a") } + + test("SPARK-24659: GenericArrayData.equals should respect element type differences") { + // Spark SQL considers array and array to be incompatible, + // so an underlying implementation of array type should return false in this case. + val array1 = new GenericArrayData(Array[Int](123)) + val array2 = new GenericArrayData(Array[Long](123L)) + + assert(!array1.equals(array2)) + } } From 083673c9a9f851fc40dc53b42754776fba5f100a Mon Sep 17 00:00:00 2001 From: Kris Mok Date: Tue, 26 Jun 2018 12:15:19 -0700 Subject: [PATCH 2/3] Enhance the unit test case to cover more positive and negative cases. --- .../sql/catalyst/util/ComplexDataSuite.scala | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala index a0443f89ffff0..2f97c9f4cf99d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala @@ -106,11 +106,36 @@ class ComplexDataSuite extends SparkFunSuite { } test("SPARK-24659: GenericArrayData.equals should respect element type differences") { - // Spark SQL considers array and array to be incompatible, - // so an underlying implementation of array type should return false in this case. - val array1 = new GenericArrayData(Array[Int](123)) - val array2 = new GenericArrayData(Array[Long](123L)) + import scala.reflect.ClassTag - assert(!array1.equals(array2)) + // Expected positive cases + def arraysShouldEqual[T: ClassTag](element: T*): Unit = { + val array1 = new GenericArrayData(Array[T](element: _*)) + val array2 = new GenericArrayData(Array[T](element: _*)) + assert(array1.equals(array2)) + } + arraysShouldEqual(true, false) // Boolean + arraysShouldEqual(0.toByte, 123.toByte, (-123).toByte) // Byte + arraysShouldEqual(0.toShort, 123.toShort, (-256).toShort) // Short + arraysShouldEqual(0, 123, -65536) // Int + arraysShouldEqual(0L, 123L, -65536L) // Long + arraysShouldEqual(0.0F, 123.0F, -65536.0F) // Float + arraysShouldEqual(0.0, 123.0, -65536.0) // Double + arraysShouldEqual(Array[Byte](123.toByte), null) // Binary (Array[Byte]) + arraysShouldEqual(UTF8String.fromString("foo"), null) // String (UTF8String) + + // Expected negative cases + // Spark SQL considers cases like array vs array to be incompatible, + // so an underlying implementation of array type should return false in such cases. + def arraysShouldNotEqual[T: ClassTag, U: ClassTag](element1: T, element2: U): Unit = { + val array1 = new GenericArrayData(Array[T](element1)) + val array2 = new GenericArrayData(Array[U](element2)) + assert(!array1.equals(array2)) + } + arraysShouldNotEqual(true, 1) // Boolean <-> Int + arraysShouldNotEqual(123.toByte, 123) // Byte <-> Int + arraysShouldNotEqual(123.toByte, 123L) // Byte <-> Long + arraysShouldNotEqual(123.toShort, 123) // Short <-> Int + arraysShouldNotEqual(123, 123L) // Int <-> Long } } From a30de22183cab3cde90fce6029cd03dfc5b5758a Mon Sep 17 00:00:00 2001 From: Kris Mok Date: Tue, 26 Jun 2018 12:38:05 -0700 Subject: [PATCH 3/3] Add more boundary values to the test case. --- .../sql/catalyst/util/ComplexDataSuite.scala | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala index 2f97c9f4cf99d..229e32479082c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ComplexDataSuite.scala @@ -114,15 +114,17 @@ class ComplexDataSuite extends SparkFunSuite { val array2 = new GenericArrayData(Array[T](element: _*)) assert(array1.equals(array2)) } - arraysShouldEqual(true, false) // Boolean - arraysShouldEqual(0.toByte, 123.toByte, (-123).toByte) // Byte - arraysShouldEqual(0.toShort, 123.toShort, (-256).toShort) // Short - arraysShouldEqual(0, 123, -65536) // Int - arraysShouldEqual(0L, 123L, -65536L) // Long - arraysShouldEqual(0.0F, 123.0F, -65536.0F) // Float - arraysShouldEqual(0.0, 123.0, -65536.0) // Double - arraysShouldEqual(Array[Byte](123.toByte), null) // Binary (Array[Byte]) - arraysShouldEqual(UTF8String.fromString("foo"), null) // String (UTF8String) + arraysShouldEqual(true, false) // Boolean + arraysShouldEqual(0.toByte, 123.toByte, Byte.MinValue, Byte.MaxValue) // Byte + arraysShouldEqual(0.toShort, 123.toShort, Short.MinValue, Short.MaxValue) // Short + arraysShouldEqual(0, 123, -65536, Int.MinValue, Int.MaxValue) // Int + arraysShouldEqual(0L, 123L, -65536L, Long.MinValue, Long.MaxValue) // Long + arraysShouldEqual(0.0F, 123.0F, Float.MinValue, Float.MaxValue, Float.MinPositiveValue, + Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN) // Float + arraysShouldEqual(0.0, 123.0, Double.MinValue, Double.MaxValue, Double.MinPositiveValue, + Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN) // Double + arraysShouldEqual(Array[Byte](123.toByte), Array[Byte](), null) // SQL Binary + arraysShouldEqual(UTF8String.fromString("foo"), null) // SQL String // Expected negative cases // Spark SQL considers cases like array vs array to be incompatible,