Skip to content

Commit

Permalink
Merge pull request #1643 from apache/master
Browse files Browse the repository at this point in the history
Create a new pull request by comparing changes across two branches
  • Loading branch information
GulajavaMinistudio authored May 8, 2024
2 parents bfc83d3 + f5401ba commit 4d1d141
Show file tree
Hide file tree
Showing 118 changed files with 1,965 additions and 497 deletions.
105 changes: 96 additions & 9 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,21 @@ jobs:
yarn=`./dev/is-changed.py -m yarn`
kubernetes=`./dev/is-changed.py -m kubernetes`
sparkr=`./dev/is-changed.py -m sparkr`
tpcds=`./dev/is-changed.py -m sql`
docker=`./dev/is-changed.py -m docker-integration-tests`
buf=true
ui=true
docs=true
else
pandas=false
yarn=false
kubernetes=false
sparkr=false
tpcds=false
docker=false
buf=false
ui=false
docs=false
fi
build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,protobuf,yarn,connect,sql,hive"`
precondition="
Expand All @@ -100,9 +106,10 @@ jobs:
\"pyspark\": \"$pyspark\",
\"pyspark-pandas\": \"$pandas\",
\"sparkr\": \"$sparkr\",
\"tpcds-1g\": \"false\",
\"docker-integration-tests\": \"false\",
\"tpcds-1g\": \"$tpcds\",
\"docker-integration-tests\": \"$docker\",
\"lint\" : \"true\",
\"docs\" : \"$docs\",
\"yarn\" : \"$yarn\",
\"k8s-integration-tests\" : \"$kubernetes\",
\"buf\" : \"$buf\",
Expand Down Expand Up @@ -156,9 +163,8 @@ jobs:
mllib-local, mllib, graphx
- >-
streaming, sql-kafka-0-10, streaming-kafka-0-10, streaming-kinesis-asl,
kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf
kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf, connect
- yarn
- connect
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [""]
excluded-tags: [""]
Expand Down Expand Up @@ -622,12 +628,12 @@ jobs:
- name: Python CodeGen check
run: ./dev/connect-check-protos.py

# Static analysis, and documentation build
# Static analysis
lint:
needs: [precondition, infra-image]
# always run if lint == 'true', even infra-image is skip (such as non-master job)
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).lint == 'true'
name: Linters, licenses, dependencies and documentation generation
name: Linters, licenses, and dependencies
runs-on: ubuntu-latest
timeout-minutes: 180
env:
Expand Down Expand Up @@ -765,7 +771,90 @@ jobs:
Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')"
- name: Install R linter dependencies and SparkR
run: ./R/install-dev.sh
# Should delete this section after SPARK 3.5 EOL.
- name: R linter
run: ./dev/lint-r

# Documentation build
docs:
needs: [precondition, infra-image]
# always run if lint == 'true', even infra-image is skip (such as non-master job)
if: (!cancelled()) && fromJson(needs.precondition.outputs.required).docs == 'true'
name: Documentation generation
runs-on: ubuntu-latest
timeout-minutes: 180
env:
LC_ALL: C.UTF-8
LANG: C.UTF-8
NOLINT_ON_COMPILE: false
PYSPARK_DRIVER_PYTHON: python3.9
PYSPARK_PYTHON: python3.9
GITHUB_PREV_SHA: ${{ github.event.before }}
container:
image: ${{ needs.precondition.outputs.image_url }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v4
with:
fetch-depth: 0
repository: apache/spark
ref: ${{ inputs.branch }}
- name: Add GITHUB_WORKSPACE to git trust safe.directory
run: |
git config --global --add safe.directory ${GITHUB_WORKSPACE}
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
run: |
echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
# Cache local repositories. Note that GitHub Actions cache has a 10G limit.
- name: Cache SBT and Maven
uses: actions/cache@v4
with:
path: |
build/apache-maven-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v4
with:
path: ~/.cache/coursier
key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
docs-coursier-
- name: Cache Maven local repository
uses: actions/cache@v4
with:
path: ~/.m2/repository
key: docs-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
docs-maven-
- name: Free up disk space
run: |
if [ -f ./dev/free_disk_space_container ]; then
./dev/free_disk_space_container
fi
- name: Install Java ${{ inputs.java }}
uses: actions/setup-java@v4
with:
distribution: zulu
java-version: ${{ inputs.java }}
- name: Install Python dependencies for python linter and documentation generation
if: inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5'
run: |
# Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
# See 'ipython_genutils' in SPARK-38517
# See 'docutils<0.18.0' in SPARK-39421
python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
python3.9 -m pip list
- name: Install dependencies for documentation generation for branch-3.4, branch-3.5
if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5'
run: |
Expand All @@ -786,8 +875,6 @@ jobs:
gem install bundler -v 2.4.22
cd docs
bundle install
- name: R linter
run: ./dev/lint-r
- name: Run documentation build
run: |
# We need this link because the jekyll build calls `python`.
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/build_non_ansi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# under the License.
#

name: "Build / NON-ANSI (master, Hadoop 3, JDK 17, Scala 2.13)"
name: "Build / Non-ANSI (master, Hadoop 3, JDK 17, Scala 2.13)"

on:
schedule:
Expand All @@ -41,6 +41,7 @@ jobs:
jobs: >-
{
"build": "true",
"docs": "true",
"pyspark": "true",
"sparkr": "true",
"tpcds-1g": "true",
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/build_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ jobs:
fail-fast: false
matrix:
include:
- pyversion: ${{ github.event.schedule == '0 15 * * *' && "pypy3" }}
- pyversion: ${{ github.event.schedule == '0 17 * * *' && "python3.10" }}
- pyversion: ${{ github.event.schedule == '0 19 * * *' && "python3.12" }}
- pyversion: ${{ github.event.schedule == '0 15 * * *' && 'pypy3' }}
- pyversion: ${{ github.event.schedule == '0 17 * * *' && 'python3.10' }}
- pyversion: ${{ github.event.schedule == '0 19 * * *' && 'python3.12' }}
permissions:
packages: write
name: Run
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/cancel_duplicate_workflow_runs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# under the License.
#

name: Cancelling Duplicates
name: Cancelling duplicates
on:
workflow_run:
workflows:
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/maven_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,18 +190,18 @@ jobs:
export ENABLE_KINESIS_TESTS=0
# Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10
export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"`
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install
if [[ "$INCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae
elif [[ "$EXCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
# To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
else
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
fi
- name: Clean up local Maven repository
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish_snapshot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# under the License.
#

name: Publish Snapshot
name: Publish snapshot

on:
schedule:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ private static class RocksDBLogger extends org.rocksdb.Logger {
private static final Logger LOG = LoggerFactory.getLogger(RocksDBLogger.class);

RocksDBLogger(Options options) {
super(options);
super(options.infoLogLevel());
}

@Override
Expand Down
21 changes: 20 additions & 1 deletion common/utils/src/main/java/org/apache/spark/internal/Logger.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ public class Logger {
this.slf4jLogger = slf4jLogger;
}

public boolean isErrorEnabled() {
return slf4jLogger.isErrorEnabled();
}

public void error(String msg) {
slf4jLogger.error(msg);
}
Expand All @@ -58,6 +62,10 @@ public void error(String msg, Throwable throwable, MDC... mdcs) {
}
}

public boolean isWarnEnabled() {
return slf4jLogger.isWarnEnabled();
}

public void warn(String msg) {
slf4jLogger.warn(msg);
}
Expand All @@ -82,6 +90,10 @@ public void warn(String msg, Throwable throwable, MDC... mdcs) {
}
}

public boolean isInfoEnabled() {
return slf4jLogger.isInfoEnabled();
}

public void info(String msg) {
slf4jLogger.info(msg);
}
Expand All @@ -106,6 +118,10 @@ public void info(String msg, Throwable throwable, MDC... mdcs) {
}
}

public boolean isDebugEnabled() {
return slf4jLogger.isDebugEnabled();
}

public void debug(String msg) {
slf4jLogger.debug(msg);
}
Expand All @@ -126,6 +142,10 @@ public void debug(String msg, Throwable throwable) {
slf4jLogger.debug(msg, throwable);
}

public boolean isTraceEnabled() {
return slf4jLogger.isTraceEnabled();
}

public void trace(String msg) {
slf4jLogger.trace(msg);
}
Expand All @@ -146,7 +166,6 @@ public void trace(String msg, Throwable throwable) {
slf4jLogger.trace(msg, throwable);
}


private void withLogContext(
String pattern,
MDC[] mdcs,
Expand Down
14 changes: 14 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@
],
"sqlState" : "428FR"
},
"CANNOT_ASSIGN_EVENT_TIME_COLUMN_WITHOUT_WATERMARK" : {
"message" : [
"Watermark needs to be defined to reassign event time column. Failed to find watermark definition in the streaming query."
],
"sqlState" : "42611"
},
"CANNOT_CAST_DATATYPE" : {
"message" : [
"Cannot cast <sourceType> to <targetType>."
Expand Down Expand Up @@ -1057,6 +1063,14 @@
},
"sqlState" : "4274K"
},
"EMITTING_ROWS_OLDER_THAN_WATERMARK_NOT_ALLOWED" : {
"message" : [
"Previous node emitted a row with eventTime=<emittedRowEventTime> which is older than current_watermark_value=<currentWatermark>",
"This can lead to correctness issues in the stateful operators downstream in the execution pipeline.",
"Please correct the operator logic to emit rows after current global watermark value."
],
"sqlState" : "42815"
},
"EMPTY_JSON_FIELD_VALUE" : {
"message" : [
"Failed to parse an empty string for data type <dataType>."
Expand Down
Loading

0 comments on commit 4d1d141

Please sign in to comment.