Skip to content

Commit

Permalink
resolve
Browse files Browse the repository at this point in the history
  • Loading branch information
itholic committed Aug 8, 2023
2 parents 4126def + d2b60ff commit 649180a
Show file tree
Hide file tree
Showing 194 changed files with 3,475 additions and 2,654 deletions.
51 changes: 48 additions & 3 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ jobs:
restore-keys: |
${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
- name: Free up disk space
run: ./dev/free_disk_space
run: |
if [ -f ./dev/free_disk_space ]; then
./dev/free_disk_space
fi
- name: Install Java ${{ matrix.java }}
uses: actions/setup-java@v3
with:
Expand Down Expand Up @@ -350,9 +353,11 @@ jobs:
- >-
pyspark-errors
- >-
pyspark-sql, pyspark-mllib, pyspark-resource, pyspark-testing
pyspark-sql, pyspark-resource, pyspark-testing
- >-
pyspark-core, pyspark-streaming, pyspark-ml
pyspark-core, pyspark-streaming
- >-
pyspark-mllib, pyspark-ml, pyspark-ml-connect
- >-
pyspark-pandas
- >-
Expand Down Expand Up @@ -410,6 +415,16 @@ jobs:
key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
pyspark-coursier-
- name: Free up disk space
shell: 'script -q -e -c "bash {0}"'
run: |
if [[ "$MODULES_TO_TEST" != *"pyspark-ml"* ]]; then
# uninstall libraries dedicated for ML testing
python3.9 -m pip uninstall -y torch torchvision torcheval torchtnt tensorboard mlflow
fi
if [ -f ./dev/free_disk_space_container ]; then
./dev/free_disk_space_container
fi
- name: Install Java ${{ matrix.java }}
uses: actions/setup-java@v3
with:
Expand All @@ -424,6 +439,7 @@ jobs:
run: |
curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
rm miniconda.sh
# Run the tests.
- name: Run tests
env: ${{ fromJSON(inputs.envs) }}
Expand Down Expand Up @@ -507,6 +523,11 @@ jobs:
key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
sparkr-coursier-
- name: Free up disk space
run: |
if [ -f ./dev/free_disk_space_container ]; then
./dev/free_disk_space_container
fi
- name: Install Java ${{ inputs.java }}
uses: actions/setup-java@v3
with:
Expand Down Expand Up @@ -615,6 +636,11 @@ jobs:
key: docs-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
docs-maven-
- name: Free up disk space
run: |
if [ -f ./dev/free_disk_space_container ]; then
./dev/free_disk_space_container
fi
- name: Install Java 8
uses: actions/setup-java@v3
with:
Expand All @@ -631,7 +657,22 @@ jobs:
- name: Spark connect jvm client mima check
if: inputs.branch != 'branch-3.3'
run: ./dev/connect-jvm-client-mima-check
- name: Install Python linter dependencies for branch-3.3
if: inputs.branch == 'branch-3.3'
run: |
# SPARK-44554: Copy from https://github.com/apache/spark/blob/073d0b60d31bf68ebacdc005f59b928a5902670f/.github/workflows/build_and_test.yml#L501-L508
# Should delete this section after SPARK 3.3 EOL.
python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==21.12b0'
python3.9 -m pip install 'pandas-stubs==1.2.0.53'
- name: Install Python linter dependencies for branch-3.4
if: inputs.branch == 'branch-3.4'
run: |
# SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578
# Should delete this section after SPARK 3.4 EOL.
python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
- name: Install Python linter dependencies
if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4'
run: |
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
Expand All @@ -642,13 +683,16 @@ jobs:
- name: Python linter
run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
- name: Install dependencies for Python code generation check
if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4'
run: |
# See more in "Installation" https://docs.buf.build/installation#tarball
curl -LO https://github.com/bufbuild/buf/releases/download/v1.24.0/buf-Linux-x86_64.tar.gz
mkdir -p $HOME/buf
tar -xvzf buf-Linux-x86_64.tar.gz -C $HOME/buf --strip-components 1
rm buf-Linux-x86_64.tar.gz
python3.9 -m pip install 'protobuf==3.20.3' 'mypy-protobuf==3.3.0'
- name: Python code generation check
if: inputs.branch != 'branch-3.3' && inputs.branch != 'branch-3.4'
run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi
- name: Install JavaScript linter dependencies
run: |
Expand Down Expand Up @@ -1027,6 +1071,7 @@ jobs:
# TODO(SPARK-44495): Resume to use the latest minikube for k8s-integration-tests.
curl -LO https://storage.googleapis.com/minikube/releases/v1.30.1/minikube-linux-amd64
sudo install minikube-linux-amd64 /usr/local/bin/minikube
rm minikube-linux-amd64
# Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
minikube start --cpus 2 --memory 6144
- name: Print K8S pods and nodes info
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/maven_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ jobs:
- hive2.3
modules:
- >-
core,repl,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch
core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch
- >-
graphx,streaming,mllib-local,mllib,hadoop-cloud
- >-
sql#hive-thriftserver
repl,sql#hive-thriftserver
- >-
connector#kafka-0-10,connector#kafka-0-10-sql,connector#kafka-0-10-token-provider,connector#spark-ganglia-lgpl,connector#protobuf,connector#avro
- >-
Expand Down Expand Up @@ -187,9 +187,9 @@ jobs:
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
./build/mvn $MAVEN_CLI_OPTS -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae
elif [[ "$MODULES_TO_TEST" == "sql#hive-thriftserver" ]]; then
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
# To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead
./build/mvn $MAVEN_CLI_OPTS -pl sql/hive-thriftserver -Phive -Phive-thriftserver -Djava.version=${JAVA_VERSION/-ea} clean install -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
else
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} test -fae
fi
Expand Down
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,6 @@ spark-warehouse/
node_modules

# For Antlr
sql/catalyst/gen/
sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.tokens
sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/gen/
sql/api/gen/
sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.tokens
sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/gen/
50 changes: 36 additions & 14 deletions common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -809,12 +809,12 @@
"subClass" : {
"BOTH_POSITIONAL_AND_NAMED" : {
"message" : [
"A positional argument and named argument both referred to the same parameter."
"A positional argument and named argument both referred to the same parameter. Please remove the named argument referring to this parameter."
]
},
"DOUBLE_NAMED_ARGUMENT_REFERENCE" : {
"message" : [
"More than one named argument referred to the same parameter."
"More than one named argument referred to the same parameter. Please assign a value only once."
]
}
},
Expand All @@ -831,6 +831,11 @@
"Not found an encoder of the type <typeName> to Spark SQL internal representation. Consider to change the input type to one of supported at '<docroot>/sql-ref-datatypes.html'."
]
},
"ERROR_READING_AVRO_UNKNOWN_FINGERPRINT" : {
"message" : [
"Error reading avro data -- encountered an unknown fingerprint: <fingerprint>, not sure what schema to use. This could happen if you registered additional schemas after starting your spark context."
]
},
"EVENT_TIME_IS_NOT_ON_TIMESTAMP_TYPE" : {
"message" : [
"The event time <eventName> has the invalid type <eventType>, but expected \"TIMESTAMP\"."
Expand Down Expand Up @@ -864,6 +869,11 @@
],
"sqlState" : "22018"
},
"FAILED_REGISTER_CLASS_WITH_KRYO" : {
"message" : [
"Failed to register classes with Kryo."
]
},
"FAILED_RENAME_PATH" : {
"message" : [
"Failed to rename <sourcePath> to <targetPath> as destination already exists."
Expand Down Expand Up @@ -1564,6 +1574,12 @@
],
"sqlState" : "22032"
},
"INVALID_KRYO_SERIALIZER_BUFFER_SIZE" : {
"message" : [
"The value of the config \"<bufferSizeConfKey>\" must be less than 2048 MiB, but got <bufferSizeConfValue> MiB."
],
"sqlState" : "F0000"
},
"INVALID_LAMBDA_FUNCTION_CALL" : {
"message" : [
"Invalid lambda function call."
Expand Down Expand Up @@ -2006,6 +2022,11 @@
"The join condition <joinCondition> has the invalid type <conditionType>, expected \"BOOLEAN\"."
]
},
"KRYO_BUFFER_OVERFLOW" : {
"message" : [
"Kryo serialization failed: <exceptionMsg>. To avoid this, increase \"<bufferSizeConfKey>\" value."
]
},
"LOAD_DATA_PATH_NOT_EXISTS" : {
"message" : [
"LOAD DATA input path does not exist: <path>."
Expand Down Expand Up @@ -2043,6 +2064,11 @@
"Parsing JSON arrays as structs is forbidden."
]
},
"CANNOT_PARSE_STRING_AS_DATATYPE" : {
"message" : [
"Cannot parse the value <fieldValue> of the field <fieldName> as target spark data type <targetType> from the input type <inputType>."
]
},
"WITHOUT_SUGGESTION" : {
"message" : [
""
Expand Down Expand Up @@ -2446,7 +2472,7 @@
},
"REQUIRED_PARAMETER_NOT_FOUND" : {
"message" : [
"Cannot invoke function <functionName> because the parameter named <parameterName> is required, but the function call did not supply a value. Please update the function call to supply an argument value (either positionally or by name) and retry the query again."
"Cannot invoke function <functionName> because the parameter named <parameterName> is required, but the function call did not supply a value. Please update the function call to supply an argument value (either positionally at index <index> or by name) and retry the query again."
],
"sqlState" : "4274K"
},
Expand All @@ -2471,6 +2497,12 @@
],
"sqlState" : "42883"
},
"RULE_ID_NOT_FOUND" : {
"message" : [
"Not found an id for the rule name \"<ruleName>\". Please modify RuleIdCollection.scala if you are adding a new rule."
],
"sqlState" : "22023"
},
"SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION" : {
"message" : [
"The correlated scalar subquery '<sqlExpr>' is neither present in GROUP BY, nor in an aggregate function. Add it to GROUP BY using ordinal position or wrap it in `first()` (or `first_value`) if you don't care which value you get."
Expand Down Expand Up @@ -2647,7 +2679,7 @@
},
"UNEXPECTED_POSITIONAL_ARGUMENT" : {
"message" : [
"Cannot invoke function <functionName> because it contains positional argument(s) following named argument(s); please rearrange them so the positional arguments come first and then retry the query again."
"Cannot invoke function <functionName> because it contains positional argument(s) following the named argument assigned to <parameterName>; please rearrange them so the positional arguments come first and then retry the query again."
],
"sqlState" : "4274K"
},
Expand Down Expand Up @@ -5312,11 +5344,6 @@
"Exception when registering StreamingQueryListener."
]
},
"_LEGACY_ERROR_TEMP_2133" : {
"message" : [
"Cannot parse field name <fieldName>, field value <fieldValue>, [<token>] as target spark data type [<dataType>]."
]
},
"_LEGACY_ERROR_TEMP_2134" : {
"message" : [
"Cannot parse field value <value> for pattern <pattern> as target spark data type [<dataType>]."
Expand Down Expand Up @@ -5489,11 +5516,6 @@
"<plan>."
]
},
"_LEGACY_ERROR_TEMP_2175" : {
"message" : [
"Rule id not found for <ruleName>. Please modify RuleIdCollection.scala if you are adding a new rule."
]
},
"_LEGACY_ERROR_TEMP_2176" : {
"message" : [
"Cannot create array with <numElements> elements of data due to exceeding the limit <maxRoundedArrayLength> elements for ArrayData. <additionalErrorMessage>"
Expand Down
Loading

0 comments on commit 649180a

Please sign in to comment.