Skip to content

Commit

Permalink
Merge pull request #1636 from apache/master
Browse files Browse the repository at this point in the history
Create a new pull request by comparing changes across two branches
  • Loading branch information
GulajavaMinistudio authored Apr 2, 2024
2 parents ca2f2d3 + 1fd3089 commit 526244b
Show file tree
Hide file tree
Showing 223 changed files with 8,195 additions and 1,792 deletions.
15 changes: 8 additions & 7 deletions .github/workflows/maven_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ jobs:
- hive2.3
modules:
- >-
core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch,common#utils
core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch,common#utils,common#variant
- >-
graphx,streaming,hadoop-cloud
- >-
mllib-local,mllib
- >-
repl,sql#hive-thriftserver
- >-
connector#kafka-0-10,connector#kafka-0-10-sql,connector#kafka-0-10-token-provider,connector#spark-ganglia-lgpl,connector#protobuf,connector#avro
connector#kafka-0-10,connector#kafka-0-10-sql,connector#kafka-0-10-token-provider,connector#spark-ganglia-lgpl,connector#protobuf,connector#avro,connector#kinesis-asl
- >-
sql#api,sql#catalyst,resource-managers#yarn,resource-managers#kubernetes#core
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
Expand Down Expand Up @@ -188,20 +188,21 @@ jobs:
export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
export JAVA_VERSION=${{ matrix.java }}
export ENABLE_KINESIS_TESTS=0
# Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10
export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"`
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} clean install
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install
if [[ "$INCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae
elif [[ "$EXCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
# To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
else
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} test -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
fi
- name: Clean up local Maven repository
run: |
Expand Down
4 changes: 4 additions & 0 deletions common/utils/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-layout-template-json</artifactId>
</dependency>
</dependencies>
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
Expand Down
69 changes: 44 additions & 25 deletions common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -304,14 +304,6 @@
],
"sqlState" : "22007"
},
"CANNOT_READ_FILE_FOOTER" : {
"message" : [
"Could not read footer for file: <file>. Please ensure that the file is in either ORC or Parquet format.",
"If not, please convert it to a valid format. If the file is in the valid format, please check if it is corrupt.",
"If it is, you can choose to either ignore it or fix the corruption."
],
"sqlState" : "KD001"
},
"CANNOT_RECOGNIZE_HIVE_TYPE" : {
"message" : [
"Cannot recognize hive type string: <fieldType>, column: <fieldName>. The specified data type for the field cannot be recognized by Spark SQL. Please check the data type of the specified field and ensure that it is a valid Spark SQL data type. Refer to the Spark SQL documentation for a list of valid data types and their format. If the data type is correct, please ensure that you are using a supported version of Spark SQL."
Expand Down Expand Up @@ -1257,6 +1249,31 @@
"message" : [
"Encountered error while reading file <path>."
],
"subClass" : {
"CANNOT_READ_FILE_FOOTER" : {
"message" : [
"Could not read footer. Please ensure that the file is in either ORC or Parquet format.",
"If not, please convert it to a valid format. If the file is in the valid format, please check if it is corrupt.",
"If it is, you can choose to either ignore it or fix the corruption."
]
},
"FILE_NOT_EXIST" : {
"message" : [
"File does not exist. It is possible the underlying files have been updated.",
"You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved."
]
},
"NO_HINT" : {
"message" : [
""
]
},
"PARQUET_COLUMN_DATA_TYPE_MISMATCH" : {
"message" : [
"Data type mismatches when reading Parquet column <column>. Expected Spark type <expectedType>, actual Parquet type <actualType>."
]
}
},
"sqlState" : "KD001"
},
"FAILED_REGISTER_CLASS_WITH_KRYO" : {
Expand Down Expand Up @@ -2790,6 +2807,19 @@
],
"sqlState" : "42K09"
},
"INVALID_VARIANT_CAST" : {
"message" : [
"The variant value `<value>` cannot be cast into `<dataType>`. Please use `try_variant_get` instead."
],
"sqlState" : "22023"
},
"INVALID_VARIANT_GET_PATH" : {
"message" : [
"The path `<path>` is not a valid variant extraction path in `<functionName>`.",
"A valid path should start with `$` and is followed by zero or more segments like `[123]`, `.name`, `['name']`, or `[\"name\"]`."
],
"sqlState" : "22023"
},
"INVALID_VIEW_TEXT" : {
"message" : [
"The view <viewName> cannot be displayed due to invalid view text: <viewText>. This may be caused by an unauthorized modification of the view or an incorrect query syntax. Please check your query syntax and verify that the view has not been tampered with."
Expand Down Expand Up @@ -3542,6 +3572,12 @@
],
"sqlState" : "42802"
},
"STATEFUL_PROCESSOR_CANNOT_REINITIALIZE_STATE_ON_KEY" : {
"message" : [
"Cannot re-initialize state on the same grouping key during initial state handling for stateful processor. Invalid grouping key=<groupingKey>."
],
"sqlState" : "42802"
},
"STATE_STORE_CANNOT_CREATE_COLUMN_FAMILY_WITH_RESERVED_CHARS" : {
"message" : [
"Failed to create column family with unsupported starting character and name=<colFamilyName>."
Expand Down Expand Up @@ -6125,12 +6161,6 @@
"buildReader is not supported for <format>."
]
},
"_LEGACY_ERROR_TEMP_2055" : {
"message" : [
"<message>",
"It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved."
]
},
"_LEGACY_ERROR_TEMP_2056" : {
"message" : [
"Unable to clear output directory <staticPrefixPath> prior to writing to it."
Expand Down Expand Up @@ -6163,17 +6193,6 @@
"No records should be returned from EmptyDataReader."
]
},
"_LEGACY_ERROR_TEMP_2062" : {
"message" : [
"<message>",
"It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by recreating the Dataset/DataFrame involved."
]
},
"_LEGACY_ERROR_TEMP_2063" : {
"message" : [
"Parquet column cannot be converted in file <filePath>. Column: <column>, Expected: <logicalType>, Found: <physicalType>."
]
},
"_LEGACY_ERROR_TEMP_2065" : {
"message" : [
"Cannot create columnar reader."
Expand Down
38 changes: 38 additions & 0 deletions common/utils/src/main/resources/org/apache/spark/SparkLayout.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"ts": {
"$resolver": "timestamp"
},
"level": {
"$resolver": "level",
"field": "name"
},
"msg": {
"$resolver": "message",
"stringified": true
},
"context": {
"$resolver": "mdc"
},
"exception": {
"class": {
"$resolver": "exception",
"field": "className"
},
"msg": {
"$resolver": "exception",
"field": "message",
"stringified": true
},
"stacktrace": {
"$resolver": "exception",
"field": "stackTrace",
"stackTrace": {
"stringified": true
}
}
},
"logger": {
"$resolver": "logger",
"field": "name"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ rootLogger.appenderRef.stdout.ref = console
appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
appender.console.layout.type = JsonTemplateLayout
appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json

# Settings to quiet third party logs that are too verbose
logger.jetty.name = org.sparkproject.jetty
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Set everything to be logged to the console
rootLogger.level = info
rootLogger.appenderRef.stdout.ref = console

appender.console.type = Console
appender.console.name = console
appender.console.target = SYSTEM_ERR
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex

# Settings to quiet third party logs that are too verbose
logger.jetty.name = org.sparkproject.jetty
logger.jetty.level = warn
logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
logger.jetty2.level = error
logger.repl1.name = org.apache.spark.repl.SparkIMain$exprTyper
logger.repl1.level = info
logger.repl2.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
logger.repl2.level = info

# Set the default spark-shell log level to WARN. When running the spark-shell, the
# log level for this class is used to overwrite the root logger's log level, so that
# the user can have different defaults for the shell and regular Spark apps.
logger.repl.name = org.apache.spark.repl.Main
logger.repl.level = warn

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs
# in SparkSQL with Hive support
logger.metastore.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
logger.metastore.level = fatal
logger.hive_functionregistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
logger.hive_functionregistry.level = error

# Parquet related logging
logger.parquet.name = org.apache.parquet.CorruptStatistics
logger.parquet.level = error
logger.parquet2.name = parquet.CorruptStatistics
logger.parquet2.level = error
25 changes: 25 additions & 0 deletions common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.internal

/**
* Various keys used for mapped diagnostic contexts(MDC) in logging.
* All structured logging keys should be defined here for standardization.
*/
object LogKey extends Enumeration {
val EXECUTOR_ID, MIN_SIZE, MAX_SIZE = Value
}
Loading

0 comments on commit 526244b

Please sign in to comment.