Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-26211][SQL] Fix InSet for binary, and struct and array with null. #23176

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -367,31 +367,26 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
}

@transient lazy val set: Set[Any] = child.dataType match {
case _: AtomicType => hset
case t: AtomicType if !t.isInstanceOf[BinaryType] => hset
case _: NullType => hset
case _ =>
// for structs use interpreted ordering to be able to compare UnsafeRows with non-UnsafeRows
TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ hset
TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ (hset - null)
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val setTerm = ctx.addReferenceObj("set", set)
val childGen = child.genCode(ctx)
val setIsNull = if (hasNull) {
s"${ev.isNull} = !${ev.value};"
} else {
""
}
ev.copy(code =
code"""
|${childGen.code}
|${CodeGenerator.JAVA_BOOLEAN} ${ev.isNull} = ${childGen.isNull};
|${CodeGenerator.JAVA_BOOLEAN} ${ev.value} = false;
|if (!${ev.isNull}) {
| ${ev.value} = $setTerm.contains(${childGen.value});
| $setIsNull
|}
""".stripMargin)
nullSafeCodeGen(ctx, ev, c => {
val setTerm = ctx.addReferenceObj("set", set)
val setIsNull = if (hasNull) {
s"${ev.isNull} = !${ev.value};"
} else {
""
}
s"""
|${ev.value} = $setTerm.contains($c);
|$setIsNull
""".stripMargin
})
}

override def sql: String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(InSet(nl, nS), null)

val primitiveTypes = Seq(IntegerType, FloatType, DoubleType, StringType, ByteType, ShortType,
LongType, BinaryType, BooleanType, DecimalType.USER_DEFAULT, TimestampType)
LongType, BooleanType, DecimalType.USER_DEFAULT, TimestampType)
primitiveTypes.foreach { t =>
val dataGen = RandomDataGenerator.forType(t, nullable = true).get
val inputData = Seq.fill(10) {
Expand All @@ -293,6 +293,54 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}

test("INSET: binary") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding the semantics, InSet is equal to In. Could we combine the test cases? Test both?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea! we should test In and InSet together

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I'll do it later. Thanks!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Submitted #23187.

val hS = HashSet[Any]() + Array(1.toByte, 2.toByte) + Array(3.toByte)
val nS = HashSet[Any]() + Array(1.toByte, 2.toByte) + Array(3.toByte) + null
val onetwo = Literal(Array(1.toByte, 2.toByte))
val three = Literal(Array(3.toByte))
val threefour = Literal(Array(3.toByte, 4.toByte))
val nl = Literal(null, onetwo.dataType)
checkEvaluation(InSet(onetwo, hS), true)
checkEvaluation(InSet(three, hS), true)
checkEvaluation(InSet(three, nS), true)
checkEvaluation(InSet(threefour, hS), false)
checkEvaluation(InSet(threefour, nS), null)
checkEvaluation(InSet(nl, hS), null)
checkEvaluation(InSet(nl, nS), null)
}

test("INSET: struct") {
val hS = HashSet[Any]() + Literal.create((1, "a")).value + Literal.create((2, "b")).value
val nS = HashSet[Any]() + Literal.create((1, "a")).value + Literal.create((2, "b")).value + null
val oneA = Literal.create((1, "a"))
val twoB = Literal.create((2, "b"))
val twoC = Literal.create((2, "c"))
val nl = Literal(null, oneA.dataType)
checkEvaluation(InSet(oneA, hS), true)
checkEvaluation(InSet(twoB, hS), true)
checkEvaluation(InSet(twoB, nS), true)
checkEvaluation(InSet(twoC, hS), false)
checkEvaluation(InSet(twoC, nS), null)
checkEvaluation(InSet(nl, hS), null)
checkEvaluation(InSet(nl, nS), null)
}

test("INSET: array") {
val hS = HashSet[Any]() + Literal.create(Seq(1, 2)).value + Literal.create(Seq(3)).value
val nS = HashSet[Any]() + Literal.create(Seq(1, 2)).value + Literal.create(Seq(3)).value + null
val onetwo = Literal.create(Seq(1, 2))
val three = Literal.create(Seq(3))
val threefour = Literal.create(Seq(3, 4))
val nl = Literal(null, onetwo.dataType)
checkEvaluation(InSet(onetwo, hS), true)
checkEvaluation(InSet(three, hS), true)
checkEvaluation(InSet(three, nS), true)
checkEvaluation(InSet(threefour, hS), false)
checkEvaluation(InSet(threefour, nS), null)
checkEvaluation(InSet(nl, hS), null)
checkEvaluation(InSet(nl, nS), null)
}

private case class MyStruct(a: Long, b: String)
private case class MyStruct2(a: MyStruct, b: Array[Int])
private val udt = new ExamplePointUDT
Expand Down