Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[NSE-955] Support md5/sha1/sha2 functions #1055

Merged
merged 9 commits into from
Aug 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ case class ColumnarConditionProjectExec(
// check datatype
originalInputAttributes.toList.foreach(attr => {
try {
ConverterUtils.checkIfTypeSupported(attr.dataType)
ConverterUtils.checkIfTypeSupportedInProjection(attr.dataType)
} catch {
case e : UnsupportedOperationException =>
throw new UnsupportedOperationException(
Expand All @@ -81,7 +81,7 @@ case class ColumnarConditionProjectExec(
// check expr
if (condExpr != null) {
try {
ConverterUtils.checkIfTypeSupported(condExpr.dataType)
ConverterUtils.checkIfTypeSupportedInProjection(condExpr.dataType)
} catch {
case e : UnsupportedOperationException =>
throw new UnsupportedOperationException(
Expand All @@ -92,7 +92,7 @@ case class ColumnarConditionProjectExec(
if (projectList != null) {
for (expr <- projectList) {
try {
ConverterUtils.checkIfTypeSupported(expr.dataType)
ConverterUtils.checkIfTypeSupportedInProjection(expr.dataType)
} catch {
case e : UnsupportedOperationException =>
throw new UnsupportedOperationException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,25 @@ class ColumnarFindInSet(left: Expression, right: Expression, original: Expressio
}
}

class ColumnarSha2(left: Expression, right: Expression) extends Sha2(left, right)
with ColumnarExpression with Logging {

override def supportColumnarCodegen(args: java.lang.Object): Boolean = {
false
}

override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
val (leftNode, _): (TreeNode, ArrowType) =
left.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
val (rightNode, _): (TreeNode, ArrowType) =
right.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
val resultType = new ArrowType.Utf8()
val funcNode = TreeBuilder.makeFunction("sha2",
Lists.newArrayList(leftNode, rightNode), resultType)
(funcNode, resultType)
}
}

object ColumnarBinaryExpression {

def create(left: Expression, right: Expression, original: Expression): Expression =
Expand All @@ -182,6 +201,8 @@ object ColumnarBinaryExpression {
new ColumnarPow(left, right, pow)
case f: FindInSet =>
new ColumnarFindInSet(left, right, f)
case _: Sha2 =>
new ColumnarSha2(left, right)
case other =>
throw new UnsupportedOperationException(s"not currently supported: $other.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class ColumnarBoundReference(ordinal: Int, dataType: DataType, nullable: Boolean
dataType match {
case at: ArrayType =>
case _ =>
ConverterUtils.checkIfTypeSupported(dataType)
ConverterUtils.checkIfTypeSupportedInProjection(dataType)
}
} catch {
case e: UnsupportedOperationException =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ class ColumnarCast(
def buildCheck(): Unit = {
if (!datatype.isInstanceOf[DecimalType]) {
try {
ConverterUtils.checkIfTypeSupported(datatype)
ConverterUtils.checkIfTypeSupportedInProjection(datatype)
} catch {
case e: UnsupportedOperationException =>
throw new UnsupportedOperationException(s"${datatype} is not supported in ColumnarCast")
Expand Down Expand Up @@ -577,8 +577,15 @@ class ColumnarCast(
throw new UnsupportedOperationException(
s"${child.dataType} is not supported in castTIMESTAMP")
}
} else if (dataType == BinaryType) {
val supported = List(StringType)
if (supported.indexOf(child.dataType) == -1) {
throw new UnsupportedOperationException(s"${child.dataType}" +
s" is not supported in casting to binary.")
}
} else {
throw new UnsupportedOperationException(s"not currently supported: ${dataType}.")
throw new UnsupportedOperationException(s"not currently supported" +
s" data type in cast: ${dataType}.")
}
}

Expand Down Expand Up @@ -788,6 +795,15 @@ class ColumnarCast(
intermediateType)
}
ConverterUtils.convertTimestampToMicro(funcNode, intermediateType)
} else if (dataType == BinaryType) {
val funcNode = child.dataType match {
case _: StringType =>
TreeBuilder.makeFunction("binary_string",
Lists.newArrayList(child_node0), new ArrowType.Binary())
case _ =>
throw new UnsupportedOperationException (s"not currently supported: ${dataType}.")
}
(funcNode, new ArrowType.Binary())
} else {
throw new UnsupportedOperationException(s"not currently supported: ${dataType}.")
}
Expand Down Expand Up @@ -957,7 +973,7 @@ class ColumnarLength(child: Expression) extends Length(child: Expression)
(TreeBuilder.makeFunction("char_length", Lists.newArrayList(child_node),
resultType), resultType)
case BinaryType =>
(TreeBuilder.makeFunction("length", Lists.newArrayList(child_node),
(TreeBuilder.makeFunction("lengthUtf8", Lists.newArrayList(child_node),
resultType), resultType)
case _ =>
throw new RuntimeException("Fix me. Either StringType or BinaryType is allowed!")
Expand Down Expand Up @@ -1003,6 +1019,40 @@ class ColumnarBin(child: Expression) extends Bin(child: Expression)
}
}

class ColumnarMd5(child: Expression) extends Md5(child) with ColumnarExpression
with Logging {

override def supportColumnarCodegen(args: java.lang.Object): Boolean = {
false
}

override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
val (childNode, _): (TreeNode, ArrowType) =
child.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
val resultType = new ArrowType.Utf8()
val funcNode = TreeBuilder.makeFunction("md5",
Lists.newArrayList(childNode), resultType)
(funcNode, resultType)
}
}

class ColumnarSha1(child: Expression) extends Sha1(child) with ColumnarExpression
with Logging {

override def supportColumnarCodegen(args: java.lang.Object): Boolean = {
false
}

override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
val (childNode, _): (TreeNode, ArrowType) =
child.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
val resultType = new ArrowType.Utf8()
val funcNode = TreeBuilder.makeFunction("sha1",
Lists.newArrayList(childNode), resultType)
(funcNode, resultType)
}
}

object ColumnarUnaryOperator {

def create(child: Expression, original: Expression): Expression = original match {
Expand Down Expand Up @@ -1078,6 +1128,10 @@ object ColumnarUnaryOperator {
new ColumnarHex(child)
case _: Bin =>
new ColumnarBin(child)
case _: Md5 =>
new ColumnarMd5(child)
case _: Sha1 =>
new ColumnarSha1(child)
case other =>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe also add Sha/Sha1/Sha2?looks the API is same

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just supported them in latest commits. BTW, calling sha will actually invoke sha1.

child.dataType match {
case _: DateType => other match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,14 @@ object ConverterUtils extends Logging {
builder.build.toByteArray
}

// Currently, we enable projection to support BinaryType.
// TODO: support BinaryType in all other operators.
def checkIfTypeSupportedInProjection(dt: DataType): Unit = dt match {
case _: BinaryType =>
case other =>
checkIfTypeSupported(other)
}

def checkIfTypeSupported(dt: DataType): Unit = dt match {
case d: BooleanType =>
case d: ByteType =>
Expand Down
8 changes: 4 additions & 4 deletions native-sql-engine/tools/run_ut.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,21 @@ tests_total=0
module_tested=0
module_should_test=7
while read -r line ; do
num=$(echo "$line" | grep -o -E '[0-9]+')
num=$(echo "$line" | grep -a -o -E '[0-9]+')
tests_total=$((tests_total+num))
done <<<"$(grep "Total number of tests run:" log-file.log)"
done <<<"$(grep -a "Total number of tests run:" log-file.log)"

succeed_total=0
while read -r line ; do
[[ $line =~ [^0-9]*([0-9]+)\, ]]
num=${BASH_REMATCH[1]}
succeed_total=$((succeed_total+num))
let module_tested++
done <<<"$(grep "succeeded" log-file.log)"
done <<<"$(grep -a "succeeded" log-file.log)"
failed_count=$((tests_total-succeed_total))
echo "Tests total: $tests_total, Succeed Total: $succeed_total, Known Fails: $known_fails, Actual Fails: $failed_count."

cat log-file.log | grep "\*** FAILED \***" | grep -v "TESTS FAILED ***" | grep -v "TEST FAILED ***" &> new_failed_list.log
cat log-file.log | grep -a "\*** FAILED \***" | grep -v -a "TESTS FAILED ***" | grep -v -a "TEST FAILED ***" &> new_failed_list.log
comm -1 -3 <(sort failed_ut_list.log) <(sort new_failed_list.log) &> newly_failed_tests.log
comm -2 -3 <(sort failed_ut_list.log) <(sort new_failed_list.log) &> fixed_tests.log
if [ -s newly_failed_tests.log ]
Expand Down