Skip to content

Commit

Permalink
[SPARK-47261][SQL] Assign better name for errors _LEGACY_ERROR_TEMP_1…
Browse files Browse the repository at this point in the history
…172, _LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174

### What changes were proposed in this pull request?

Assign better name for error _LEGACY_ERROR_TEMP_1172, _LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174.

### Why are the changes needed?

Proper name improves user experience with Spark SQL.

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

Added new tests and ran all the tests in the suite:
```
org.apache.spark.sql.execution.datasources.parquetParquetSchemaSuite
org.apache.spark.SparkThrowableSuite
```

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47421 from junyuc25/SPARK-47261.

Authored-by: junyuc25 <JunyuChen-0115@users.noreply.github.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
  • Loading branch information
junyuc25 authored and MaxGekk committed Aug 9, 2024
1 parent 81948bb commit 731a104
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 22 deletions.
33 changes: 18 additions & 15 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -3543,6 +3543,24 @@
],
"sqlState" : "42805"
},
"PARQUET_TYPE_ILLEGAL" : {
"message" : [
"Illegal Parquet type: <parquetType>."
],
"sqlState" : "42846"
},
"PARQUET_TYPE_NOT_RECOGNIZED" : {
"message" : [
"Unrecognized Parquet type: <field>."
],
"sqlState" : "42846"
},
"PARQUET_TYPE_NOT_SUPPORTED" : {
"message" : [
"Parquet type not yet supported: <parquetType>."
],
"sqlState" : "42846"
},
"PARSE_EMPTY_STATEMENT" : {
"message" : [
"Syntax error, unexpected empty statement."
Expand Down Expand Up @@ -5881,21 +5899,6 @@
"createTableColumnTypes option column <col> not found in schema <schema>."
]
},
"_LEGACY_ERROR_TEMP_1172" : {
"message" : [
"Parquet type not yet supported: <parquetType>."
]
},
"_LEGACY_ERROR_TEMP_1173" : {
"message" : [
"Illegal Parquet type: <parquetType>."
]
},
"_LEGACY_ERROR_TEMP_1174" : {
"message" : [
"Unrecognized Parquet type: <field>."
]
},
"_LEGACY_ERROR_TEMP_1181" : {
"message" : [
"Stream-stream join without equality predicate is not supported."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1995,19 +1995,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat

def parquetTypeUnsupportedYetError(parquetType: String): Throwable = {
new AnalysisException(
errorClass = "_LEGACY_ERROR_TEMP_1172",
errorClass = "PARQUET_TYPE_NOT_SUPPORTED",
messageParameters = Map("parquetType" -> parquetType))
}

def illegalParquetTypeError(parquetType: String): Throwable = {
new AnalysisException(
errorClass = "_LEGACY_ERROR_TEMP_1173",
errorClass = "PARQUET_TYPE_ILLEGAL",
messageParameters = Map("parquetType" -> parquetType))
}

def unrecognizedParquetTypeError(field: String): Throwable = {
new AnalysisException(
errorClass = "_LEGACY_ERROR_TEMP_1174",
errorClass = "PARQUET_TYPE_NOT_RECOGNIZED",
messageParameters = Map("field" -> field))
}

Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -1111,10 +1111,37 @@ class ParquetSchemaSuite extends ParquetSchemaTest {

test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default nanosAsLong=false)") {
val testDataPath = testFile("test-data/timestamp-nanos.parquet")
val e = intercept[AnalysisException] {
spark.read.parquet(testDataPath).collect()
}
assert(e.getMessage.contains("Illegal Parquet type: INT64 (TIMESTAMP(NANOS,true))."))
checkError(
exception = intercept[AnalysisException] {
spark.read.parquet(testDataPath).collect()
},
errorClass = "PARQUET_TYPE_ILLEGAL",
parameters = Map("parquetType" -> "INT64 (TIMESTAMP(NANOS,true))")
)
}

test("SPARK-47261: parquet file with unsupported type") {
val testDataPath = testFile("test-data/interval-using-fixed-len-byte-array.parquet")
checkError(
exception = intercept[AnalysisException] {
spark.read.parquet(testDataPath).collect()
},
errorClass = "PARQUET_TYPE_NOT_SUPPORTED",
parameters = Map("parquetType" -> "FIXED_LEN_BYTE_ARRAY (INTERVAL)")
)
}

test("SPARK-47261: parquet file with unrecognized parquet type") {
val testDataPath = testFile("test-data/group-field-with-enum-as-logical-annotation.parquet")
val expectedParameter = "required group my_list (ENUM) {\n repeated group list {\n" +
" optional binary element (STRING);\n }\n}"
checkError(
exception = intercept[AnalysisException] {
spark.read.parquet(testDataPath).collect()
},
errorClass = "PARQUET_TYPE_NOT_RECOGNIZED",
parameters = Map("field" -> expectedParameter)
)
}

// =======================================================
Expand Down

0 comments on commit 731a104

Please sign in to comment.