From 20770fbbc470ee3d1f5fc28b733d6dae2d5ad410 Mon Sep 17 00:00:00 2001 From: "Prashant K. Sharma" Date: Sat, 4 May 2024 12:39:23 +0900 Subject: [PATCH 1/6] Fixes Issue #361: Use enum to represent CAST eval_mode in expr.proto --- core/src/execution/proto/expr.proto | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/src/execution/proto/expr.proto b/core/src/execution/proto/expr.proto index 042a981f4..c2a88a7eb 100644 --- a/core/src/execution/proto/expr.proto +++ b/core/src/execution/proto/expr.proto @@ -238,7 +238,11 @@ message Cast { DataType datatype = 2; string timezone = 3; // LEGACY, ANSI, or TRY - string eval_mode = 4; + enum EvalMode { + LEGACY = 0; + TRY = 1; + ANSI = 2; + } } message Equal { From f48bd932adca026eba0671c1a95b3782ae425de8 Mon Sep 17 00:00:00 2001 From: "Prashant K. Sharma" Date: Sat, 4 May 2024 15:47:23 +0900 Subject: [PATCH 2/6] Update expr.proto and QueryPlanSerde.scala for handling enum EvalMode for cast message --- core/src/execution/proto/expr.proto | 16 ++++++++++------ .../apache/comet/serde/QueryPlanSerde.scala | 18 ++++++++++++++++-- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/core/src/execution/proto/expr.proto b/core/src/execution/proto/expr.proto index c2a88a7eb..605e08aad 100644 --- a/core/src/execution/proto/expr.proto +++ b/core/src/execution/proto/expr.proto @@ -233,16 +233,20 @@ message Remainder { DataType return_type = 4; } +enum EvalMode { + LEGACY = 0; + TRY = 1; + ANSI = 2; +} + message Cast { Expr child = 1; DataType datatype = 2; string timezone = 3; - // LEGACY, ANSI, or TRY - enum EvalMode { - LEGACY = 0; - TRY = 1; - ANSI = 2; - } + // Depreciateid: LEGACY, ANSI, or TRY - preserved for backward compatibity + string eval_mode_string= 4; // for backward compatibility + EvalMode eval_mode = 5; // New enum field + } message Equal { diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 6eda0547f..170504c55 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -525,6 +525,18 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde { * @return * The protobuf representation of the expression, or None if the expression is not supported */ + + def stringToEvalMode(evalModeStr: String): ExprOuterClass.EvalMode = + evalModeStr.toUpperCase match { + case "LEGACY" => ExprOuterClass.EvalMode.LEGACY + case "TRY" => ExprOuterClass.EvalMode.TRY + case "ANSI" => ExprOuterClass.EvalMode.ANSI + case _ => + throw new IllegalArgumentException( + "Invalid eval mode" + ) // Assuming we want to catch errors strictly + } + def exprToProto( expr: Expression, input: Seq[Attribute], @@ -535,12 +547,14 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde { childExpr: Option[Expr], evalMode: String): Option[Expr] = { val dataType = serializeDataType(dt) + val evalModeEnum = stringToEvalMode(evalMode) // Convert string to enum if (childExpr.isDefined && dataType.isDefined) { val castBuilder = ExprOuterClass.Cast.newBuilder() castBuilder.setChild(childExpr.get) castBuilder.setDatatype(dataType.get) - castBuilder.setEvalMode(evalMode) + // castBuilder.setEvalMode(evalMode) + castBuilder.setEvalMode(evalModeEnum) // Set the enum in protobuf val timeZone = timeZoneId.getOrElse("UTC") castBuilder.setTimezone(timeZone) @@ -1207,7 +1221,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde { .newBuilder() .setChild(e) .setDatatype(serializeDataType(IntegerType).get) - .setEvalMode("LEGACY") // year is not affected by ANSI mode + .setEvalMode(stringToEvalMode("LEGACY")) // year is not affected by ANSI mode .build()) .build() }) From 5e6a35c5d7b6881203621da38e5422935f7890c7 Mon Sep 17 00:00:00 2001 From: "Prashant K. Sharma" Date: Sat, 11 May 2024 15:01:16 +0900 Subject: [PATCH 3/6] issue 361 fixed type issue for eval_mode in planner.rs --- core/src/execution/datafusion/planner.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/execution/datafusion/planner.rs b/core/src/execution/datafusion/planner.rs index 72174790b..fb8f18a3f 100644 --- a/core/src/execution/datafusion/planner.rs +++ b/core/src/execution/datafusion/planner.rs @@ -346,10 +346,10 @@ impl PhysicalPlanner { let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?; let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap()); let timezone = expr.timezone.clone(); - let eval_mode = match expr.eval_mode.as_str() { - "ANSI" => EvalMode::Ansi, - "TRY" => EvalMode::Try, - "LEGACY" => EvalMode::Legacy, + let eval_mode = match expr.eval_mode { + 0 => EvalMode::Legacy, + 1 => EvalMode::Try, + 2 => EvalMode::Ansi, other => { return Err(ExecutionError::GeneralError(format!( "Invalid Cast EvalMode: \"{other}\"" From 0b76c4a3708dfbd73860909ab35678ae15aefdb0 Mon Sep 17 00:00:00 2001 From: "Prashant K. Sharma" Date: Sat, 11 May 2024 16:16:35 +0900 Subject: [PATCH 4/6] issue 361 refactored QueryPlanSerde.scala for enhanced type safety and localization compliance, including a new string-to-enum conversion function and improved import organization. --- .../main/scala/org/apache/comet/serde/QueryPlanSerde.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 170504c55..cb12bd456 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -19,6 +19,8 @@ package org.apache.comet.serde +import java.util.Locale + import scala.collection.JavaConverters._ import org.apache.spark.internal.Logging @@ -527,7 +529,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde { */ def stringToEvalMode(evalModeStr: String): ExprOuterClass.EvalMode = - evalModeStr.toUpperCase match { + evalModeStr.toUpperCase(Locale.ROOT) match { case "LEGACY" => ExprOuterClass.EvalMode.LEGACY case "TRY" => ExprOuterClass.EvalMode.TRY case "ANSI" => ExprOuterClass.EvalMode.ANSI From 773d8d17ccae6a17b1be3600afa67788d0203317 Mon Sep 17 00:00:00 2001 From: "Prashant K. Sharma" Date: Sat, 1 Jun 2024 13:00:01 +0900 Subject: [PATCH 5/6] Updated planner.rs, expr.proto, QueryPlanSerde.scala for enum support --- core/src/execution/datafusion/planner.rs | 14 +++++--------- core/src/execution/proto/expr.proto | 4 +--- .../org/apache/comet/serde/QueryPlanSerde.scala | 6 +++--- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/core/src/execution/datafusion/planner.rs b/core/src/execution/datafusion/planner.rs index fb8f18a3f..a89cdd93e 100644 --- a/core/src/execution/datafusion/planner.rs +++ b/core/src/execution/datafusion/planner.rs @@ -346,16 +346,12 @@ impl PhysicalPlanner { let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?; let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap()); let timezone = expr.timezone.clone(); - let eval_mode = match expr.eval_mode { - 0 => EvalMode::Legacy, - 1 => EvalMode::Try, - 2 => EvalMode::Ansi, - other => { - return Err(ExecutionError::GeneralError(format!( - "Invalid Cast EvalMode: \"{other}\"" - ))) - } + let eval_mode = match spark_expression::EvalMode::try_from(expr.eval_mode)? { + spark_expression::EvalMode::Legacy => EvalMode::Legacy, + spark_expression::EvalMode::Try => EvalMode::Try, + spark_expression::EvalMode::Ansi => EvalMode::Ansi, }; + Ok(Arc::new(Cast::new(child, datatype, eval_mode, timezone))) } ExprStruct::Hour(expr) => { diff --git a/core/src/execution/proto/expr.proto b/core/src/execution/proto/expr.proto index 605e08aad..cafd1526f 100644 --- a/core/src/execution/proto/expr.proto +++ b/core/src/execution/proto/expr.proto @@ -243,9 +243,7 @@ message Cast { Expr child = 1; DataType datatype = 2; string timezone = 3; - // Depreciateid: LEGACY, ANSI, or TRY - preserved for backward compatibity - string eval_mode_string= 4; // for backward compatibility - EvalMode eval_mode = 5; // New enum field + EvalMode eval_mode = 4; } diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index cb12bd456..9e7f8ecc0 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -533,9 +533,9 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde { case "LEGACY" => ExprOuterClass.EvalMode.LEGACY case "TRY" => ExprOuterClass.EvalMode.TRY case "ANSI" => ExprOuterClass.EvalMode.ANSI - case _ => + case invalid => throw new IllegalArgumentException( - "Invalid eval mode" + s"Invalid eval mode '$invalid' " ) // Assuming we want to catch errors strictly } @@ -1223,7 +1223,7 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde { .newBuilder() .setChild(e) .setDatatype(serializeDataType(IntegerType).get) - .setEvalMode(stringToEvalMode("LEGACY")) // year is not affected by ANSI mode + .setEvalMode(ExprOuterClass.EvalMode.LEGACY) .build()) .build() }) From 8184242d95513f81578aac1f6c5d06539e12e914 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Mon, 3 Jun 2024 09:50:09 -0600 Subject: [PATCH 6/6] Update spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala --- spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 9e7f8ecc0..c2e4907b0 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -555,7 +555,6 @@ object QueryPlanSerde extends Logging with ShimQueryPlanSerde { val castBuilder = ExprOuterClass.Cast.newBuilder() castBuilder.setChild(childExpr.get) castBuilder.setDatatype(dataType.get) - // castBuilder.setEvalMode(evalMode) castBuilder.setEvalMode(evalModeEnum) // Set the enum in protobuf val timeZone = timeZoneId.getOrElse("UTC")