Skip to content

Commit

Permalink
Resolved conflict.
Browse files Browse the repository at this point in the history
  • Loading branch information
sarutak committed Oct 28, 2020
2 parents d6e8cbe + ea709d6 commit 95454a2
Show file tree
Hide file tree
Showing 889 changed files with 14,393 additions and 138,835 deletions.
228 changes: 164 additions & 64 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ jobs:
# Build: build Spark and run the tests for specified modules.
build:
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
runs-on: ubuntu-latest
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
Expand All @@ -41,12 +42,6 @@ jobs:
streaming, sql-kafka-0-10, streaming-kafka-0-10,
mllib-local, mllib,
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
- >-
pyspark-sql, pyspark-mllib, pyspark-resource
- >-
pyspark-core, pyspark-streaming, pyspark-ml
- >-
sparkr
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [""]
excluded-tags: [""]
Expand Down Expand Up @@ -127,59 +122,20 @@ jobs:
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}
# PySpark
- name: Install PyPy3
# Note that order of Python installations here matters because default python3 is
# overridden by pypy3.
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark')
with:
python-version: pypy3
architecture: x64
- name: Install Python 3.6
uses: actions/setup-python@v2
if: contains(matrix.modules, 'pyspark')
with:
python-version: 3.6
architecture: x64
- name: Install Python 3.8
uses: actions/setup-python@v2
# We should install one Python that is higher then 3+ for SQL and Yarn because:
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
# - Yarn has a Python specific test too, for example, YarnClusterSuite.
if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
with:
python-version: 3.8
architecture: x64
- name: Install Python packages (Python 3.6 and PyPy3)
if: contains(matrix.modules, 'pyspark')
# PyArrow is not supported in PyPy yet, see ARROW-2651.
# TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
run: |
python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
python3.6 -m pip list
# PyPy does not have xmlrunner
pypy3 -m pip install numpy pandas
pypy3 -m pip list
- name: Install Python packages (Python 3.8)
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
run: |
python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner
python3.8 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
python3.8 -m pip list
# SparkR
- name: Install R 4.0
uses: r-lib/actions/setup-r@v1
if: contains(matrix.modules, 'sparkr')
with:
r-version: 4.0
- name: Install R packages
if: contains(matrix.modules, 'sparkr')
run: |
# qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497.
sudo apt-get install -y libcurl4-openssl-dev qpdf
sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
# Show installed packages in R.
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
# Run the tests.
- name: Run tests
run: |
Expand All @@ -201,10 +157,158 @@ jobs:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/unit-tests.log"

pyspark:
name: "Build modules: ${{ matrix.modules }}"
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20201015
strategy:
fail-fast: false
matrix:
modules:
- >-
pyspark-sql, pyspark-mllib, pyspark-resource
- >-
pyspark-core, pyspark-streaming, pyspark-ml
env:
MODULES_TO_TEST: ${{ matrix.modules }}
HADOOP_PROFILE: hadoop3.2
HIVE_PROFILE: hive2.3
# GitHub Actions' default miniconda to use in pip packaging test.
CONDA_PREFIX: /usr/share/miniconda
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# In order to fetch changed files
with:
fetch-depth: 0
- name: Merge dispatched input branch
if: ${{ github.event.inputs.target != '' }}
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/zinc-*
build/scala-*
build/*.jar
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: pyspark-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
pyspark-maven-
- name: Cache Ivy local repository
uses: actions/cache@v2
with:
path: ~/.ivy2/cache
key: pyspark-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
pyspark-ivy-
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
python-version: 3.6
architecture: x64
# This step takes much less time (~30s) than other Python versions so it is not included
# in the Docker image being used. There is also a technical issue to install Python 3.6 on
# Ubuntu 20.04. See also SPARK-33162.
- name: Install Python packages (Python 3.6)
run: |
python3.6 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
python3.6 -m pip list
# Run the tests.
- name: Run tests
run: |
mkdir -p ~/.m2
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"

sparkr:
name: Build modules - sparkr
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20201025
env:
HADOOP_PROFILE: hadoop3.2
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# In order to fetch changed files
with:
fetch-depth: 0
- name: Merge dispatched input branch
if: ${{ github.event.inputs.target != '' }}
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/zinc-*
build/scala-*
build/*.jar
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: sparkr-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
sparkr-maven-
- name: Cache Ivy local repository
uses: actions/cache@v2
with:
path: ~/.ivy2/cache
key: sparkr-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
sparkr-ivy-
- name: Run tests
run: |
mkdir -p ~/.m2
# The followings are also used by `r-lib/actions/setup-r` to avoid
# R issues at docker environment
export TZ=UTC
export _R_CHECK_SYSTEM_CLOCK_=FALSE
./dev/run-tests --parallelism 2 --modules sparkr
rm -rf ~/.m2/repository/org/apache/spark
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-sparkr--1.8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"

# Static analysis, and documentation build
lint:
name: Linters, licenses, dependencies and documentation generation
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
Expand All @@ -228,7 +332,7 @@ jobs:
run: |
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx
pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc
- name: Install R 4.0
uses: r-lib/actions/setup-r@v1
with:
Expand All @@ -249,7 +353,7 @@ jobs:
sudo apt-get install -y libcurl4-openssl-dev pandoc
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx
pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
gem install jekyll jekyll-redirect-from rouge
sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
- name: Scala linter
Expand All @@ -271,7 +375,7 @@ jobs:
java11:
name: Java 11 build
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
Expand All @@ -296,26 +400,22 @@ jobs:
scala-213:
name: Scala 2.13 build
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Maven local repository
- name: Cache Ivy local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: scala-213-maven-${{ hashFiles('**/pom.xml') }}
path: ~/.ivy2/cache
key: scala-213-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
scala-213-maven-
scala-213-ivy-
- name: Install Java 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Build with Maven
- name: Build with SBT
run: |
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
mkdir -p ~/.m2
./dev/change-scala-version.sh 2.13
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 -Pscala-2.13 install
rm -rf ~/.m2/repository/org/apache/spark
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ python/docs/source/reference/api/
python/test_coverage/coverage_data
python/test_coverage/htmlcov
python/pyspark/python
.mypy_cache/
reports/
scalastyle-on-compile.generated.xml
scalastyle-output.xml
Expand Down
17 changes: 17 additions & 0 deletions .sbtopts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

-J-Xmx4G
-J-Xss4m
8 changes: 8 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,11 @@ exportMethods("%<=>%",
"arrays_zip",
"arrays_zip_with",
"asc",
"asc_nulls_first",
"asc_nulls_last",
"ascii",
"asin",
"assert_true",
"atan",
"atan2",
"avg",
Expand Down Expand Up @@ -272,6 +275,9 @@ exportMethods("%<=>%",
"degrees",
"dense_rank",
"desc",
"desc_nulls_first",
"desc_nulls_last",
"dropFields",
"element_at",
"encode",
"endsWith",
Expand Down Expand Up @@ -360,6 +366,7 @@ exportMethods("%<=>%",
"posexplode_outer",
"quarter",
"radians",
"raise_error",
"rand",
"randn",
"rank",
Expand Down Expand Up @@ -427,6 +434,7 @@ exportMethods("%<=>%",
"variance",
"var_pop",
"var_samp",
"vector_to_array",
"weekofyear",
"when",
"window",
Expand Down
Loading

0 comments on commit 95454a2

Please sign in to comment.