Skip to content

Commit

Permalink
Merge branch 'master' into SPARK-24497-recursive-cte
Browse files Browse the repository at this point in the history
  • Loading branch information
peter-toth committed Dec 15, 2023
2 parents 386c038 + 4f65413 commit a325020
Show file tree
Hide file tree
Showing 2,576 changed files with 66,785 additions and 23,895 deletions.
40 changes: 5 additions & 35 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,6 @@
# under the License.
#

#
# Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler
#
# Note that we currently cannot use the negatioon operator (i.e. `!`) for miniglob matches as they
# would match any file that doesn't touch them. What's needed is the concept of `any `, which takes a
# list of constraints / globs and then matches all of the constraints for either `any` of the files or
# `all` of the files in the change set.
#
# However, `any`/`all` are not supported in a released version and testing off of the `main` branch
# resulted in some other errors when testing.
#
# An issue has been opened upstream requesting that a release be cut that has support for all/any:
# - https://github.com/actions/labeler/issues/111
#
# While we wait for this issue to be handled upstream, we can remove
# the negated / `!` matches for now and at least have labels again.
#
INFRA:
- ".github/**/*"
- "appveyor.yml"
Expand All @@ -45,32 +28,24 @@ INFRA:
- "dev/merge_spark_pr.py"
- "dev/run-tests-jenkins*"
BUILD:
# Can be supported when a stable release with correct all/any is released
#- any: ['dev/**/*', '!dev/merge_spark_pr.py', '!dev/.rat-excludes']
- "dev/**/*"
- any: ['dev/**/*', '!dev/merge_spark_pr.py', '!dev/run-tests-jenkins*']
- "build/**/*"
- "project/**/*"
- "assembly/**/*"
- "**/*pom.xml"
- "bin/docker-image-tool.sh"
- "bin/find-spark-home*"
- "scalastyle-config.xml"
# These can be added in the above `any` clause (and the /dev/**/* glob removed) when
# `any`/`all` support is released
# - "!dev/merge_spark_pr.py"
# - "!dev/run-tests-jenkins*"
# - "!dev/.rat-excludes"
DOCS:
- "docs/**/*"
- "**/README.md"
- "**/CONTRIBUTING.md"
- "python/docs/**/*"
EXAMPLES:
- "examples/**/*"
- "bin/run-example*"
# CORE needs to be updated when all/any are released upstream.
CORE:
# - any: ["core/**/*", "!**/*UI.scala", "!**/ui/**/*"] # If any file matches all of the globs defined in the list started by `any`, label is applied.
- "core/**/*"
- any: ["core/**/*", "!**/*UI.scala", "!**/ui/**/*"]
- "common/kvstore/**/*"
- "common/network-common/**/*"
- "common/network-shuffle/**/*"
Expand All @@ -82,12 +57,8 @@ SPARK SHELL:
- "repl/**/*"
- "bin/spark-shell*"
SQL:
#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming.py"]
- "**/sql/**/*"
- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming*.py"]
- "common/unsafe/**/*"
#- "!python/pyspark/sql/avro/**/*"
#- "!python/pyspark/sql/streaming/**/*"
#- "!python/pyspark/sql/tests/streaming/test_streaming.py"
- "bin/spark-sql*"
- "bin/beeline*"
- "sbin/*thriftserver*.sh"
Expand Down Expand Up @@ -123,7 +94,7 @@ STRUCTURED STREAMING:
- "**/sql/**/streaming/**/*"
- "connector/kafka-0-10-sql/**/*"
- "python/pyspark/sql/streaming/**/*"
- "python/pyspark/sql/tests/streaming/test_streaming.py"
- "python/pyspark/sql/tests/streaming/test_streaming*.py"
- "**/*streaming.R"
PYTHON:
- "bin/pyspark*"
Expand All @@ -148,7 +119,6 @@ DEPLOY:
- "sbin/**/*"
CONNECT:
- "connector/connect/**/*"
- "**/sql/sparkconnect/**/*"
- "python/pyspark/sql/**/connect/**/*"
- "python/pyspark/ml/**/connect/**/*"
PROTOBUF:
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v3
uses: actions/checkout@v4
# In order to get diff files
with:
fetch-depth: 0
Expand Down Expand Up @@ -95,7 +95,7 @@ jobs:
key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- name: Checkout tpcds-kit repository
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: databricks/tpcds-kit
ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069
Expand All @@ -105,7 +105,7 @@ jobs:
run: cd tpcds-kit/tools && make OS=LINUX
- name: Install Java ${{ github.event.inputs.jdk }}
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/setup-java@v3
uses: actions/setup-java@v4
with:
distribution: zulu
java-version: ${{ github.event.inputs.jdk }}
Expand Down Expand Up @@ -134,7 +134,7 @@ jobs:
SPARK_TPCDS_DATA: ${{ github.workspace }}/tpcds-sf-1
steps:
- name: Checkout Spark repository
uses: actions/checkout@v3
uses: actions/checkout@v4
# In order to get diff files
with:
fetch-depth: 0
Expand All @@ -157,7 +157,7 @@ jobs:
restore-keys: |
benchmark-coursier-${{ github.event.inputs.jdk }}
- name: Install Java ${{ github.event.inputs.jdk }}
uses: actions/setup-java@v3
uses: actions/setup-java@v4
with:
distribution: zulu
java-version: ${{ github.event.inputs.jdk }}
Expand All @@ -177,7 +177,7 @@ jobs:
# In benchmark, we use local as master so set driver memory only. Note that GitHub Actions has 7 GB memory limit.
bin/spark-submit \
--driver-memory 6g --class org.apache.spark.benchmark.Benchmarks \
--jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \
--jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`,`find ~/.cache/coursier -name 'curator-test-*.jar'`" \
"`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
"${{ github.event.inputs.class }}"
# To keep the directory structure and file permissions, tar them
Expand Down
Loading

0 comments on commit a325020

Please sign in to comment.