From ce50a563d311ccfe36d1fcc4f0743e4e4d7d8116 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 22 Aug 2023 21:04:32 +0300 Subject: [PATCH] [SPARK-44840][SQL] Make `array_insert()` 1-based for negative indexes ### What changes were proposed in this pull request? In the PR, I propose to make the `array_insert` function 1-based for negative indexes. So, the maximum index is -1 should point out to the last element, and the function should insert new element at the end of the given array for the index -1. The old behaviour can be restored via the SQL config `spark.sql.legacy.negativeIndexInArrayInsert`. ### Why are the changes needed? 1. To match the behaviour of functions such as `substr()` and `element_at()`. ```sql spark-sql (default)> select element_at(array('a', 'b'), -1), substr('ab', -1); b b ``` 2. To fix an inconsistency in `array_insert` in which positive indexes are 1-based, but negative indexes are 0-based. ### Does this PR introduce _any_ user-facing change? Yes. Before: ```sql spark-sql (default)> select array_insert(array('a', 'b'), -1, 'c'); ["a","c","b"] ``` After: ```sql spark-sql (default)> select array_insert(array('a', 'b'), -1, 'c'); ["a","b","c"] ``` ### How was this patch tested? By running the modified test suite: ``` $ build/sbt "test:testOnly *CollectionExpressionsSuite" $ build/sbt "test:testOnly *DataFrameFunctionsSuite" $ PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` Closes #42564 from MaxGekk/fix-array_insert. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../function_array_insert.explain | 2 +- .../function_array_prepend.explain | 2 +- docs/sql-migration-guide.md | 1 + python/pyspark/sql/functions.py | 2 +- .../expressions/collectionOperations.scala | 37 +++++++++----- .../apache/spark/sql/internal/SQLConf.scala | 16 ++++++ .../CollectionExpressionsSuite.scala | 50 ++++++++++--------- .../org/apache/spark/sql/functions.scala | 2 +- .../analyzer-results/ansi/array.sql.out | 44 ++++++++++++---- .../sql-tests/analyzer-results/array.sql.out | 44 ++++++++++++---- .../test/resources/sql-tests/inputs/array.sql | 5 ++ .../sql-tests/results/ansi/array.sql.out | 34 ++++++++++++- .../resources/sql-tests/results/array.sql.out | 34 ++++++++++++- .../spark/sql/DataFrameFunctionsSuite.scala | 6 ++- 14 files changed, 218 insertions(+), 61 deletions(-) diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain index edcd790596bd2..f5096a363a3a6 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain @@ -1,2 +1,2 @@ -Project [array_insert(e#0, 0, 1) AS array_insert(e, 0, 1)#0] +Project [array_insert(e#0, 0, 1, false) AS array_insert(e, 0, 1)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain index 4c3e7c85d64ee..1b20682b09d00 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain @@ -1,2 +1,2 @@ -Project [array_insert(e#0, 1, 1) AS array_prepend(e, 1)#0] +Project [array_insert(e#0, 1, 1, false) AS array_prepend(e, 1)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index c71b16cd8d6bf..5fc323ec1b0ea 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -29,6 +29,7 @@ license: | - Since Spark 3.5, Row's json and prettyJson methods are moved to `ToJsonUtil`. - Since Spark 3.5, the `plan` field is moved from `AnalysisException` to `EnhancedAnalysisException`. - Since Spark 3.5, `spark.sql.optimizer.canChangeCachedPlanOutputPartitioning` is enabled by default. To restore the previous behavior, set `spark.sql.optimizer.canChangeCachedPlanOutputPartitioning` to `false`. +- Since Spark 3.5, the `array_insert` function is 1-based for negative indexes. It inserts new element at the end of input arrays for the index -1. To restore the previous behavior, set `spark.sql.legacy.negativeIndexInArrayInsert` to `true`. ## Upgrading from Spark SQL 3.3 to 3.4 diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 95e33fd9a76e0..3115b0199ec44 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -11367,7 +11367,7 @@ def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: An ... ['data', 'pos', 'val'] ... ) >>> df.select(array_insert(df.data, df.pos.cast('integer'), df.val).alias('data')).collect() - [Row(data=['a', 'd', 'b', 'c']), Row(data=['c', 'd', 'b', 'a'])] + [Row(data=['a', 'd', 'b', 'c']), Row(data=['c', 'b', 'd', 'a'])] >>> df.select(array_insert(df.data, 5, 'hello').alias('data')).collect() [Row(data=['a', 'b', 'c', None, 'hello']), Row(data=['c', 'b', 'a', None, 'hello'])] """ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 316661d29a6ce..fe9c4015c15ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1418,7 +1418,7 @@ case class ArrayContains(left: Expression, right: Expression) case class ArrayPrepend(left: Expression, right: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes with BinaryLike[Expression] with QueryErrorsBase { - override lazy val replacement: Expression = ArrayInsert(left, Literal(1), right) + override lazy val replacement: Expression = new ArrayInsert(left, Literal(1), right) override def inputTypes: Seq[AbstractDataType] = { (left.dataType, right.dataType) match { @@ -4674,7 +4674,8 @@ case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryL @ExpressionDescription( usage = """ _FUNC_(x, pos, val) - Places val into index pos of array x. - Array indices start at 1, or start from the end if index is negative. + Array indices start at 1. The maximum negative index is -1 for which the function inserts + new element after the current last element. Index above array size appends the array, or prepends the array if index is negative, with 'null' elements. """, @@ -4682,15 +4683,25 @@ case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryL Examples: > SELECT _FUNC_(array(1, 2, 3, 4), 5, 5); [1,2,3,4,5] - > SELECT _FUNC_(array(5, 3, 2, 1), -3, 4); + > SELECT _FUNC_(array(5, 4, 3, 2), -1, 1); + [5,4,3,2,1] + > SELECT _FUNC_(array(5, 3, 2, 1), -4, 4); [5,4,3,2,1] """, group = "array_funcs", since = "3.4.0") -case class ArrayInsert(srcArrayExpr: Expression, posExpr: Expression, itemExpr: Expression) +case class ArrayInsert( + srcArrayExpr: Expression, + posExpr: Expression, + itemExpr: Expression, + legacyNegativeIndex: Boolean) extends TernaryExpression with ImplicitCastInputTypes with ComplexTypeMergingExpression with QueryErrorsBase with SupportQueryContext { + def this(srcArrayExpr: Expression, posExpr: Expression, itemExpr: Expression) = { + this(srcArrayExpr, posExpr, itemExpr, SQLConf.get.legacyNegativeIndexInArrayInsert) + } + override def inputTypes: Seq[AbstractDataType] = { (srcArrayExpr.dataType, posExpr.dataType, itemExpr.dataType) match { case (ArrayType(e1, hasNull), e2: IntegralType, e3) if (e2 != LongType) => @@ -4784,11 +4795,12 @@ case class ArrayInsert(srcArrayExpr: Expression, posExpr: Expression, itemExpr: val newPosExtendsArrayLeft = (posInt < 0) && (-posInt > baseArr.numElements()) if (newPosExtendsArrayLeft) { + val baseOffset = if (legacyNegativeIndex) 1 else 0 // special case- if the new position is negative but larger than the current array size // place the new item at start of array, place the current array contents at the end // and fill the newly created array elements inbetween with a null - val newArrayLength = -posInt + 1 + val newArrayLength = -posInt + baseOffset if (newArrayLength > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) { throw QueryExecutionErrors.concatArraysWithElementsExceedLimitError(newArrayLength) @@ -4798,7 +4810,7 @@ case class ArrayInsert(srcArrayExpr: Expression, posExpr: Expression, itemExpr: baseArr.foreach(elementType, (i, v) => { // current position, offset by new item + new null array elements - val elementPosition = i + 1 + math.abs(posInt + baseArr.numElements()) + val elementPosition = i + baseOffset + math.abs(posInt + baseArr.numElements()) newArray(elementPosition) = v }) @@ -4807,7 +4819,7 @@ case class ArrayInsert(srcArrayExpr: Expression, posExpr: Expression, itemExpr: new GenericArrayData(newArray) } else { if (posInt < 0) { - posInt = posInt + baseArr.numElements() + posInt = posInt + baseArr.numElements() + (if (legacyNegativeIndex) 0 else 1) } else if (posInt > 0) { posInt = posInt - 1 } @@ -4883,6 +4895,7 @@ case class ArrayInsert(srcArrayExpr: Expression, posExpr: Expression, itemExpr: |""".stripMargin } else { val pos = posExpr.value + val baseOffset = if (legacyNegativeIndex) 1 else 0 s""" |int $itemInsertionIndex = 0; |int $resLength = 0; @@ -4895,21 +4908,21 @@ case class ArrayInsert(srcArrayExpr: Expression, posExpr: Expression, itemExpr: | |if ($pos < 0 && (java.lang.Math.abs($pos) > $arr.numElements())) { | - | $resLength = java.lang.Math.abs($pos) + 1; + | $resLength = java.lang.Math.abs($pos) + $baseOffset; | if ($resLength > ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) { | throw QueryExecutionErrors.createArrayWithElementsExceedLimitError($resLength); | } | | $allocation | for (int $i = 0; $i < $arr.numElements(); $i ++) { - | $adjustedAllocIdx = $i + 1 + java.lang.Math.abs($pos + $arr.numElements()); + | $adjustedAllocIdx = $i + $baseOffset + java.lang.Math.abs($pos + $arr.numElements()); | $assignment | } | ${CodeGenerator.setArrayElement( values, elementType, itemInsertionIndex, item, Some(insertedItemIsNull))} | - | for (int $j = $pos + $arr.numElements(); $j < 0; $j ++) { - | $values.setNullAt($j + 1 + java.lang.Math.abs($pos + $arr.numElements())); + | for (int $j = ${if (legacyNegativeIndex) 0 else 1} + $pos + $arr.numElements(); $j < 0; $j ++) { + | $values.setNullAt($j + $baseOffset + java.lang.Math.abs($pos + $arr.numElements())); | } | | ${ev.value} = $values; @@ -4917,7 +4930,7 @@ case class ArrayInsert(srcArrayExpr: Expression, posExpr: Expression, itemExpr: | | $itemInsertionIndex = 0; | if ($pos < 0) { - | $itemInsertionIndex = $pos + $arr.numElements(); + | $itemInsertionIndex = $pos + $arr.numElements() + ${if (legacyNegativeIndex) 0 else 1}; | } else if ($pos > 0) { | $itemInsertionIndex = $pos - 1; | } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index ca155683ec006..556d6b7c798e5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -4378,6 +4378,18 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_NEGATIVE_INDEX_IN_ARRAY_INSERT = + buildConf("spark.sql.legacy.negativeIndexInArrayInsert") + .internal() + .doc("When set to true, restores the legacy behavior of `array_insert` for " + + "negative indexes - 0-based: the function inserts new element before the last one " + + "for the index -1. For example, `array_insert(['a', 'b'], -1, 'x')` returns " + + "`['a', 'x', 'b']`. When set to false, the -1 index points out to the last element, " + + "and the given example produces `['a', 'b', 'x']`.") + .version("3.5.0") + .booleanConf + .createWithDefault(false) + /** * Holds information about keys that have been deprecated. * @@ -5231,6 +5243,10 @@ class SQLConf extends Serializable with Logging with SqlApiConf { def usePartitionEvaluator: Boolean = getConf(SQLConf.USE_PARTITION_EVALUATOR) + def legacyNegativeIndexInArrayInsert: Boolean = { + getConf(SQLConf.LEGACY_NEGATIVE_INDEX_IN_ARRAY_INSERT) + } + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 485579230c0be..1787f6ac72dd4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -2279,61 +2279,63 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper val a11 = Literal.create(null, ArrayType(StringType)) // basic additions per type - checkEvaluation(ArrayInsert(a1, Literal(3), Literal(3)), Seq(1, 2, 3, 4)) + checkEvaluation(new ArrayInsert(a1, Literal(3), Literal(3)), Seq(1, 2, 3, 4)) checkEvaluation( - ArrayInsert(a3, Literal.create(3, IntegerType), Literal(true)), + new ArrayInsert(a3, Literal.create(3, IntegerType), Literal(true)), Seq[Boolean](true, false, true, true) ) checkEvaluation( - ArrayInsert( + new ArrayInsert( a4, Literal(3), Literal.create(5.asInstanceOf[Byte], ByteType)), Seq[Byte](1, 2, 5, 3, 2)) checkEvaluation( - ArrayInsert( + new ArrayInsert( a5, Literal(3), Literal.create(3.asInstanceOf[Short], ShortType)), Seq[Short](1, 2, 3, 3, 2)) checkEvaluation( - ArrayInsert(a7, Literal(4), Literal(4.4)), + new ArrayInsert(a7, Literal(4), Literal(4.4)), Seq[Double](1.1, 2.2, 3.3, 4.4, 2.2) ) checkEvaluation( - ArrayInsert(a6, Literal(4), Literal(4.4F)), + new ArrayInsert(a6, Literal(4), Literal(4.4F)), Seq(1.1F, 2.2F, 3.3F, 4.4F, 2.2F) ) - checkEvaluation(ArrayInsert(a8, Literal(3), Literal(3L)), Seq(1L, 2L, 3L, 4L)) - checkEvaluation(ArrayInsert(a9, Literal(3), Literal("d")), Seq("b", "a", "d", "c")) + checkEvaluation(new ArrayInsert(a8, Literal(3), Literal(3L)), Seq(1L, 2L, 3L, 4L)) + checkEvaluation(new ArrayInsert(a9, Literal(3), Literal("d")), Seq("b", "a", "d", "c")) // index edge cases - checkEvaluation(ArrayInsert(a1, Literal(2), Literal(3)), Seq(1, 3, 2, 4)) - checkEvaluation(ArrayInsert(a1, Literal(1), Literal(3)), Seq(3, 1, 2, 4)) - checkEvaluation(ArrayInsert(a1, Literal(4), Literal(3)), Seq(1, 2, 4, 3)) - checkEvaluation(ArrayInsert(a1, Literal(-2), Literal(3)), Seq(1, 3, 2, 4)) - checkEvaluation(ArrayInsert(a1, Literal(-3), Literal(3)), Seq(3, 1, 2, 4)) - checkEvaluation(ArrayInsert(a1, Literal(-4), Literal(3)), Seq(3, null, 1, 2, 4)) + checkEvaluation(new ArrayInsert(a1, Literal(2), Literal(3)), Seq(1, 3, 2, 4)) + checkEvaluation(new ArrayInsert(a1, Literal(1), Literal(3)), Seq(3, 1, 2, 4)) + checkEvaluation(new ArrayInsert(a1, Literal(4), Literal(3)), Seq(1, 2, 4, 3)) + checkEvaluation(new ArrayInsert(a1, Literal(-2), Literal(3)), Seq(1, 2, 3, 4)) + checkEvaluation(new ArrayInsert(a1, Literal(-3), Literal(3)), Seq(1, 3, 2, 4)) + checkEvaluation(new ArrayInsert(a1, Literal(-4), Literal(3)), Seq(3, 1, 2, 4)) + checkEvaluation(new ArrayInsert(a1, Literal(-5), Literal(3)), Seq(3, null, 1, 2, 4)) checkEvaluation( - ArrayInsert(a1, Literal(10), Literal(3)), + new ArrayInsert(a1, Literal(10), Literal(3)), Seq(1, 2, 4, null, null, null, null, null, null, 3) ) checkEvaluation( - ArrayInsert(a1, Literal(-10), Literal(3)), - Seq(3, null, null, null, null, null, null, null, 1, 2, 4) + new ArrayInsert(a1, Literal(-10), Literal(3)), + Seq(3, null, null, null, null, null, null, 1, 2, 4) ) // null handling - checkEvaluation(ArrayInsert( + checkEvaluation(new ArrayInsert( a1, Literal(3), Literal.create(null, IntegerType)), Seq(1, 2, null, 4) ) - checkEvaluation(ArrayInsert(a2, Literal(3), Literal(3)), Seq(1, 2, 3, null, 4, 5, null)) - checkEvaluation(ArrayInsert(a10, Literal(3), Literal("d")), Seq("b", null, "d", "a", "g", null)) - checkEvaluation(ArrayInsert(a11, Literal(3), Literal("d")), null) - checkEvaluation(ArrayInsert(a10, Literal.create(null, IntegerType), Literal("d")), null) + checkEvaluation(new ArrayInsert(a2, Literal(3), Literal(3)), Seq(1, 2, 3, null, 4, 5, null)) + checkEvaluation(new ArrayInsert(a10, Literal(3), Literal("d")), + Seq("b", null, "d", "a", "g", null)) + checkEvaluation(new ArrayInsert(a11, Literal(3), Literal("d")), null) + checkEvaluation(new ArrayInsert(a10, Literal.create(null, IntegerType), Literal("d")), null) } test("Array Intersect") { @@ -2754,14 +2756,14 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper test("SPARK-42401: Array insert of null value (explicit)") { val a = Literal.create(Seq("b", "a", "c"), ArrayType(StringType, false)) - checkEvaluation(ArrayInsert( + checkEvaluation(new ArrayInsert( a, Literal(2), Literal.create(null, StringType)), Seq("b", null, "a", "c") ) } test("SPARK-42401: Array insert of null value (implicit)") { val a = Literal.create(Seq("b", "a", "c"), ArrayType(StringType, false)) - checkEvaluation(ArrayInsert( + checkEvaluation(new ArrayInsert( a, Literal(5), Literal.create("q", StringType)), Seq("b", "a", "c", null, "q") ) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index a9f9a8ffe5bbf..f6699b66af989 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -6155,7 +6155,7 @@ object functions { * @since 3.4.0 */ def array_insert(arr: Column, pos: Column, value: Column): Column = withExpr { - ArrayInsert(arr.expr, pos.expr, value.expr) + new ArrayInsert(arr.expr, pos.expr, value.expr) } /** diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out index 12ad7ce436b85..cd101c7a524a1 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out @@ -447,28 +447,28 @@ Project [get(array(1, 2, 3), -1) AS get(array(1, 2, 3), -1)#x] -- !query select array_insert(array(1, 2, 3), 3, 4) -- !query analysis -Project [array_insert(array(1, 2, 3), 3, 4) AS array_insert(array(1, 2, 3), 3, 4)#x] +Project [array_insert(array(1, 2, 3), 3, 4, false) AS array_insert(array(1, 2, 3), 3, 4)#x] +- OneRowRelation -- !query select array_insert(array(2, 3, 4), 0, 1) -- !query analysis -Project [array_insert(array(2, 3, 4), 0, 1) AS array_insert(array(2, 3, 4), 0, 1)#x] +Project [array_insert(array(2, 3, 4), 0, 1, false) AS array_insert(array(2, 3, 4), 0, 1)#x] +- OneRowRelation -- !query select array_insert(array(2, 3, 4), 1, 1) -- !query analysis -Project [array_insert(array(2, 3, 4), 1, 1) AS array_insert(array(2, 3, 4), 1, 1)#x] +Project [array_insert(array(2, 3, 4), 1, 1, false) AS array_insert(array(2, 3, 4), 1, 1)#x] +- OneRowRelation -- !query select array_insert(array(1, 3, 4), -2, 2) -- !query analysis -Project [array_insert(array(1, 3, 4), -2, 2) AS array_insert(array(1, 3, 4), -2, 2)#x] +Project [array_insert(array(1, 3, 4), -2, 2, false) AS array_insert(array(1, 3, 4), -2, 2)#x] +- OneRowRelation @@ -499,38 +499,64 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException -- !query select array_insert(cast(NULL as ARRAY), 1, 1) -- !query analysis -Project [array_insert(cast(null as array), 1, 1) AS array_insert(NULL, 1, 1)#x] +Project [array_insert(cast(null as array), 1, 1, false) AS array_insert(NULL, 1, 1)#x] +- OneRowRelation -- !query select array_insert(array(1, 2, 3, NULL), cast(NULL as INT), 4) -- !query analysis -Project [array_insert(array(1, 2, 3, cast(null as int)), cast(null as int), 4) AS array_insert(array(1, 2, 3, NULL), CAST(NULL AS INT), 4)#x] +Project [array_insert(array(1, 2, 3, cast(null as int)), cast(null as int), 4, false) AS array_insert(array(1, 2, 3, NULL), CAST(NULL AS INT), 4)#x] +- OneRowRelation -- !query select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT)) -- !query analysis -Project [array_insert(array(1, 2, 3, cast(null as int)), 4, cast(null as int)) AS array_insert(array(1, 2, 3, NULL), 4, CAST(NULL AS INT))#x] +Project [array_insert(array(1, 2, 3, cast(null as int)), 4, cast(null as int), false) AS array_insert(array(1, 2, 3, NULL), 4, CAST(NULL AS INT))#x] +- OneRowRelation -- !query select array_insert(array(2, 3, NULL, 4), 5, 5) -- !query analysis -Project [array_insert(array(2, 3, cast(null as int), 4), 5, 5) AS array_insert(array(2, 3, NULL, 4), 5, 5)#x] +Project [array_insert(array(2, 3, cast(null as int), 4), 5, 5, false) AS array_insert(array(2, 3, NULL, 4), 5, 5)#x] +- OneRowRelation -- !query select array_insert(array(2, 3, NULL, 4), -5, 1) -- !query analysis -Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1) AS array_insert(array(2, 3, NULL, 4), -5, 1)#x] +Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1, false) AS array_insert(array(2, 3, NULL, 4), -5, 1)#x] +- OneRowRelation +-- !query +set spark.sql.legacy.negativeIndexInArrayInsert=true +-- !query analysis +SetCommand (spark.sql.legacy.negativeIndexInArrayInsert,Some(true)) + + +-- !query +select array_insert(array(1, 3, 4), -2, 2) +-- !query analysis +Project [array_insert(array(1, 3, 4), -2, 2, true) AS array_insert(array(1, 3, 4), -2, 2)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, NULL, 4), -5, 1) +-- !query analysis +Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1, true) AS array_insert(array(2, 3, NULL, 4), -5, 1)#x] ++- OneRowRelation + + +-- !query +set spark.sql.legacy.negativeIndexInArrayInsert=false +-- !query analysis +SetCommand (spark.sql.legacy.negativeIndexInArrayInsert,Some(false)) + + -- !query select array_compact(id) from values (1) as t(id) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out index af06b9959ee59..8279fb3362e54 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out @@ -447,28 +447,28 @@ Project [get(array(1, 2, 3), -1) AS get(array(1, 2, 3), -1)#x] -- !query select array_insert(array(1, 2, 3), 3, 4) -- !query analysis -Project [array_insert(array(1, 2, 3), 3, 4) AS array_insert(array(1, 2, 3), 3, 4)#x] +Project [array_insert(array(1, 2, 3), 3, 4, false) AS array_insert(array(1, 2, 3), 3, 4)#x] +- OneRowRelation -- !query select array_insert(array(2, 3, 4), 0, 1) -- !query analysis -Project [array_insert(array(2, 3, 4), 0, 1) AS array_insert(array(2, 3, 4), 0, 1)#x] +Project [array_insert(array(2, 3, 4), 0, 1, false) AS array_insert(array(2, 3, 4), 0, 1)#x] +- OneRowRelation -- !query select array_insert(array(2, 3, 4), 1, 1) -- !query analysis -Project [array_insert(array(2, 3, 4), 1, 1) AS array_insert(array(2, 3, 4), 1, 1)#x] +Project [array_insert(array(2, 3, 4), 1, 1, false) AS array_insert(array(2, 3, 4), 1, 1)#x] +- OneRowRelation -- !query select array_insert(array(1, 3, 4), -2, 2) -- !query analysis -Project [array_insert(array(1, 3, 4), -2, 2) AS array_insert(array(1, 3, 4), -2, 2)#x] +Project [array_insert(array(1, 3, 4), -2, 2, false) AS array_insert(array(1, 3, 4), -2, 2)#x] +- OneRowRelation @@ -499,38 +499,64 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException -- !query select array_insert(cast(NULL as ARRAY), 1, 1) -- !query analysis -Project [array_insert(cast(null as array), 1, 1) AS array_insert(NULL, 1, 1)#x] +Project [array_insert(cast(null as array), 1, 1, false) AS array_insert(NULL, 1, 1)#x] +- OneRowRelation -- !query select array_insert(array(1, 2, 3, NULL), cast(NULL as INT), 4) -- !query analysis -Project [array_insert(array(1, 2, 3, cast(null as int)), cast(null as int), 4) AS array_insert(array(1, 2, 3, NULL), CAST(NULL AS INT), 4)#x] +Project [array_insert(array(1, 2, 3, cast(null as int)), cast(null as int), 4, false) AS array_insert(array(1, 2, 3, NULL), CAST(NULL AS INT), 4)#x] +- OneRowRelation -- !query select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT)) -- !query analysis -Project [array_insert(array(1, 2, 3, cast(null as int)), 4, cast(null as int)) AS array_insert(array(1, 2, 3, NULL), 4, CAST(NULL AS INT))#x] +Project [array_insert(array(1, 2, 3, cast(null as int)), 4, cast(null as int), false) AS array_insert(array(1, 2, 3, NULL), 4, CAST(NULL AS INT))#x] +- OneRowRelation -- !query select array_insert(array(2, 3, NULL, 4), 5, 5) -- !query analysis -Project [array_insert(array(2, 3, cast(null as int), 4), 5, 5) AS array_insert(array(2, 3, NULL, 4), 5, 5)#x] +Project [array_insert(array(2, 3, cast(null as int), 4), 5, 5, false) AS array_insert(array(2, 3, NULL, 4), 5, 5)#x] +- OneRowRelation -- !query select array_insert(array(2, 3, NULL, 4), -5, 1) -- !query analysis -Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1) AS array_insert(array(2, 3, NULL, 4), -5, 1)#x] +Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1, false) AS array_insert(array(2, 3, NULL, 4), -5, 1)#x] +- OneRowRelation +-- !query +set spark.sql.legacy.negativeIndexInArrayInsert=true +-- !query analysis +SetCommand (spark.sql.legacy.negativeIndexInArrayInsert,Some(true)) + + +-- !query +select array_insert(array(1, 3, 4), -2, 2) +-- !query analysis +Project [array_insert(array(1, 3, 4), -2, 2, true) AS array_insert(array(1, 3, 4), -2, 2)#x] ++- OneRowRelation + + +-- !query +select array_insert(array(2, 3, NULL, 4), -5, 1) +-- !query analysis +Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1, true) AS array_insert(array(2, 3, NULL, 4), -5, 1)#x] ++- OneRowRelation + + +-- !query +set spark.sql.legacy.negativeIndexInArrayInsert=false +-- !query analysis +SetCommand (spark.sql.legacy.negativeIndexInArrayInsert,Some(false)) + + -- !query select array_compact(id) from values (1) as t(id) -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql index d3c36b79d1f3a..48edc6b474254 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/array.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql @@ -142,6 +142,11 @@ select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT)); select array_insert(array(2, 3, NULL, 4), 5, 5); select array_insert(array(2, 3, NULL, 4), -5, 1); +set spark.sql.legacy.negativeIndexInArrayInsert=true; +select array_insert(array(1, 3, 4), -2, 2); +select array_insert(array(2, 3, NULL, 4), -5, 1); +set spark.sql.legacy.negativeIndexInArrayInsert=false; + -- function array_compact select array_compact(id) from values (1) as t(id); select array_compact(array("1", null, "2", null)); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out index 62a74435d9bf4..03be0f9d84b1b 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out @@ -590,7 +590,7 @@ select array_insert(array(1, 3, 4), -2, 2) -- !query schema struct> -- !query output -[1,2,3,4] +[1,3,2,4] -- !query @@ -651,6 +651,30 @@ struct> [2,3,null,4,5] +-- !query +select array_insert(array(2, 3, NULL, 4), -5, 1) +-- !query schema +struct> +-- !query output +[1,2,3,null,4] + + +-- !query +set spark.sql.legacy.negativeIndexInArrayInsert=true +-- !query schema +struct +-- !query output +spark.sql.legacy.negativeIndexInArrayInsert true + + +-- !query +select array_insert(array(1, 3, 4), -2, 2) +-- !query schema +struct> +-- !query output +[1,2,3,4] + + -- !query select array_insert(array(2, 3, NULL, 4), -5, 1) -- !query schema @@ -659,6 +683,14 @@ struct> [1,null,2,3,null,4] +-- !query +set spark.sql.legacy.negativeIndexInArrayInsert=false +-- !query schema +struct +-- !query output +spark.sql.legacy.negativeIndexInArrayInsert false + + -- !query select array_compact(id) from values (1) as t(id) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out index 6c87afccea20d..9dbf4fbebc20b 100644 --- a/sql/core/src/test/resources/sql-tests/results/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out @@ -471,7 +471,7 @@ select array_insert(array(1, 3, 4), -2, 2) -- !query schema struct> -- !query output -[1,2,3,4] +[1,3,2,4] -- !query @@ -532,6 +532,30 @@ struct> [2,3,null,4,5] +-- !query +select array_insert(array(2, 3, NULL, 4), -5, 1) +-- !query schema +struct> +-- !query output +[1,2,3,null,4] + + +-- !query +set spark.sql.legacy.negativeIndexInArrayInsert=true +-- !query schema +struct +-- !query output +spark.sql.legacy.negativeIndexInArrayInsert true + + +-- !query +select array_insert(array(1, 3, 4), -2, 2) +-- !query schema +struct> +-- !query output +[1,2,3,4] + + -- !query select array_insert(array(2, 3, NULL, 4), -5, 1) -- !query schema @@ -540,6 +564,14 @@ struct> [1,null,2,3,null,4] +-- !query +set spark.sql.legacy.negativeIndexInArrayInsert=false +-- !query schema +struct +-- !query output +spark.sql.legacy.negativeIndexInArrayInsert false + + -- !query select array_compact(id) from values (1) as t(id) -- !query schema diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 0d197da63fd5c..55ccd9ec1ff32 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -3402,7 +3402,11 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { Seq(Row(null)) ) checkAnswer(df1.selectExpr("array_insert(a, 7, c)"), Seq(Row(Seq(3, 2, 5, 1, 2, null, 3)))) - checkAnswer(df1.selectExpr("array_insert(a, -6, c)"), Seq(Row(Seq(3, null, 3, 2, 5, 1, 2)))) + checkAnswer(df1.selectExpr("array_insert(a, -6, c)"), Seq(Row(Seq(3, 3, 2, 5, 1, 2)))) + + withSQLConf(SQLConf.LEGACY_NEGATIVE_INDEX_IN_ARRAY_INSERT.key -> "true") { + checkAnswer(df1.selectExpr("array_insert(a, -6, c)"), Seq(Row(Seq(3, null, 3, 2, 5, 1, 2)))) + } } test("transform function - array for primitive type not containing null") {