Merge branch 'master' into SPARK-24497-recursive-cte

apache · Dec 15, 2023 · a325020 · a325020
2 parents 386c038 + 4f65413
commit a325020
Show file tree

Hide file tree

Showing 2,576 changed files with 66,785 additions and 23,895 deletions.
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -17,23 +17,6 @@
 # under the License.
 #
 
-#
-# Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler
-#
-# Note that we currently cannot use the negatioon operator  (i.e. `!`)  for miniglob matches as they
-# would match any file that doesn't touch them. What's needed is the concept of `any `, which takes a
-# list of constraints / globs and then matches all of the constraints for either `any` of the files or
-# `all` of the files in the change set.
-#
-# However, `any`/`all` are not supported in a released version and testing off of the `main` branch
-# resulted in some other errors when testing.
-#
-# An issue has been opened upstream requesting that a release be cut that has support for all/any:
-#   - https://github.com/actions/labeler/issues/111
-#
-# While we wait for this issue to be handled upstream, we can remove
-# the negated / `!` matches for now and at least have labels again.
-#
 INFRA:
   - ".github/**/*"
   - "appveyor.yml"
@@ -45,32 +28,24 @@ INFRA:
   - "dev/merge_spark_pr.py"
   - "dev/run-tests-jenkins*"
 BUILD:
- # Can be supported when a stable release with correct all/any is released
- #- any: ['dev/**/*', '!dev/merge_spark_pr.py', '!dev/.rat-excludes']
- - "dev/**/*"
+ - any: ['dev/**/*', '!dev/merge_spark_pr.py', '!dev/run-tests-jenkins*']
  - "build/**/*"
  - "project/**/*"
  - "assembly/**/*"
  - "**/*pom.xml"
  - "bin/docker-image-tool.sh"
  - "bin/find-spark-home*"
  - "scalastyle-config.xml"
- # These can be added in the above `any` clause (and the /dev/**/* glob removed) when
- # `any`/`all` support is released
- # - "!dev/merge_spark_pr.py"
- # - "!dev/run-tests-jenkins*"
- # - "!dev/.rat-excludes"
 DOCS:
   - "docs/**/*"
   - "**/README.md"
   - "**/CONTRIBUTING.md"
+  - "python/docs/**/*"
 EXAMPLES:
   - "examples/**/*"
   - "bin/run-example*"
-# CORE needs to be updated when all/any are released upstream.
 CORE:
-  # - any: ["core/**/*", "!**/*UI.scala", "!**/ui/**/*"] # If any file matches all of the globs defined in the list started by `any`, label is applied.
-  - "core/**/*"
+  - any: ["core/**/*", "!**/*UI.scala", "!**/ui/**/*"]
   - "common/kvstore/**/*"
   - "common/network-common/**/*"
   - "common/network-shuffle/**/*"
@@ -82,12 +57,8 @@ SPARK SHELL:
   - "repl/**/*"
   - "bin/spark-shell*"
 SQL:
-#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming.py"]
-  - "**/sql/**/*"
+  - any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming*.py"]
   - "common/unsafe/**/*"
-  #- "!python/pyspark/sql/avro/**/*"
-  #- "!python/pyspark/sql/streaming/**/*"
-  #- "!python/pyspark/sql/tests/streaming/test_streaming.py"
   - "bin/spark-sql*"
   - "bin/beeline*"
   - "sbin/*thriftserver*.sh"
@@ -123,7 +94,7 @@ STRUCTURED STREAMING:
   - "**/sql/**/streaming/**/*"
   - "connector/kafka-0-10-sql/**/*"
   - "python/pyspark/sql/streaming/**/*"
-  - "python/pyspark/sql/tests/streaming/test_streaming.py"
+  - "python/pyspark/sql/tests/streaming/test_streaming*.py"
   - "**/*streaming.R"
 PYTHON:
   - "bin/pyspark*"
@@ -148,7 +119,6 @@ DEPLOY:
   - "sbin/**/*"
 CONNECT:
   - "connector/connect/**/*"
-  - "**/sql/sparkconnect/**/*"
   - "python/pyspark/sql/**/connect/**/*"
   - "python/pyspark/ml/**/connect/**/*"
 PROTOBUF:

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -65,7 +65,7 @@ jobs:
       SPARK_LOCAL_IP: localhost
     steps:
       - name: Checkout Spark repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         # In order to get diff files
         with:
           fetch-depth: 0
@@ -95,7 +95,7 @@ jobs:
           key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
       - name: Checkout tpcds-kit repository
         if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: databricks/tpcds-kit
           ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069
@@ -105,7 +105,7 @@ jobs:
         run: cd tpcds-kit/tools && make OS=LINUX
       - name: Install Java ${{ github.event.inputs.jdk }}
         if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
-        uses: actions/setup-java@v3
+        uses: actions/setup-java@v4
         with:
           distribution: zulu
           java-version: ${{ github.event.inputs.jdk }}
@@ -134,7 +134,7 @@ jobs:
       SPARK_TPCDS_DATA: ${{ github.workspace }}/tpcds-sf-1
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v3
+      uses: actions/checkout@v4
       # In order to get diff files
       with:
         fetch-depth: 0
@@ -157,7 +157,7 @@ jobs:
         restore-keys: |
           benchmark-coursier-${{ github.event.inputs.jdk }}
     - name: Install Java ${{ github.event.inputs.jdk }}
-      uses: actions/setup-java@v3
+      uses: actions/setup-java@v4
       with:
         distribution: zulu
         java-version: ${{ github.event.inputs.jdk }}
@@ -177,7 +177,7 @@ jobs:
         # In benchmark, we use local as master so set driver memory only. Note that GitHub Actions has 7 GB memory limit.
         bin/spark-submit \
           --driver-memory 6g --class org.apache.spark.benchmark.Benchmarks \
-          --jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \
+          --jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`,`find ~/.cache/coursier -name 'curator-test-*.jar'`" \
           "`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
           "${{ github.event.inputs.class }}"
         # To keep the directory structure and file permissions, tar them