From 32832af093abc53fe1fb6ce9784fc9905edc4ffb Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 3 Jun 2024 12:37:24 +0900 Subject: [PATCH] [SPARK-48204][INFRA][FOLLOW] fix release scripts for the "finalize" step ### What changes were proposed in this pull request? Necessary fixes to finalize the spark 4.0 preview release. The major one is that pypi now requires API token instead of username/password for authentication. ### Why are the changes needed? release ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? manual ### Was this patch authored or co-authored using generative AI tooling? no Closes #46840 from cloud-fan/script. Authored-by: Wenchen Fan Signed-off-by: Hyukjin Kwon --- dev/create-release/do-release-docker.sh | 6 ++-- dev/create-release/release-build.sh | 45 ++++++++++++++----------- dev/create-release/spark-rm/Dockerfile | 2 +- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh index c44d0193069b8..132f6b78c3db6 100755 --- a/dev/create-release/do-release-docker.sh +++ b/dev/create-release/do-release-docker.sh @@ -84,8 +84,8 @@ if [ ! -z "$RELEASE_STEP" ] && [ "$RELEASE_STEP" = "finalize" ]; then error "Exiting." fi - if [ -z "$PYPI_PASSWORD" ]; then - stty -echo && printf "PyPi password: " && read PYPI_PASSWORD && printf '\n' && stty echo + if [ -z "$PYPI_API_TOKEN" ]; then + stty -echo && printf "PyPi API token: " && read PYPI_API_TOKEN && printf '\n' && stty echo fi fi @@ -142,7 +142,7 @@ GIT_NAME=$GIT_NAME GIT_EMAIL=$GIT_EMAIL GPG_KEY=$GPG_KEY ASF_PASSWORD=$ASF_PASSWORD -PYPI_PASSWORD=$PYPI_PASSWORD +PYPI_API_TOKEN=$PYPI_API_TOKEN GPG_PASSPHRASE=$GPG_PASSPHRASE RELEASE_STEP=$RELEASE_STEP USER=$USER diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 0435960c93cd0..19589b951a6e1 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -98,8 +98,8 @@ init_java init_maven_sbt if [[ "$1" == "finalize" ]]; then - if [[ -z "$PYPI_PASSWORD" ]]; then - error 'The environment variable PYPI_PASSWORD is not set. Exiting.' + if [[ -z "$PYPI_API_TOKEN" ]]; then + error 'The environment variable PYPI_API_TOKEN is not set. Exiting.' fi git config --global user.name "$GIT_NAME" @@ -107,31 +107,36 @@ if [[ "$1" == "finalize" ]]; then # Create the git tag for the new release echo "Creating the git tag for the new release" - rm -rf spark - git clone "https://$ASF_USERNAME:$ASF_PASSWORD@$ASF_SPARK_REPO" -b master - cd spark - git tag "v$RELEASE_VERSION" "$RELEASE_TAG" - git push origin "v$RELEASE_VERSION" - cd .. - rm -rf spark - echo "git tag v$RELEASE_VERSION created" + if check_for_tag "v$RELEASE_VERSION"; then + echo "v$RELEASE_VERSION already exists. Skip creating it." + else + rm -rf spark + git clone "https://$ASF_USERNAME:$ASF_PASSWORD@$ASF_SPARK_REPO" -b master + cd spark + git tag "v$RELEASE_VERSION" "$RELEASE_TAG" + git push origin "v$RELEASE_VERSION" + cd .. + rm -rf spark + echo "git tag v$RELEASE_VERSION created" + fi # download PySpark binary from the dev directory and upload to PyPi. echo "Uploading PySpark to PyPi" svn co --depth=empty "$RELEASE_STAGING_LOCATION/$RELEASE_TAG-bin" svn-spark cd svn-spark - svn update "pyspark-$RELEASE_VERSION.tar.gz" - svn update "pyspark-$RELEASE_VERSION.tar.gz.asc" - TWINE_USERNAME=spark-upload TWINE_PASSWORD="$PYPI_PASSWORD" twine upload \ + PYSPARK_VERSION=`echo "$RELEASE_VERSION" | sed -e "s/-/./" -e "s/preview/dev/"` + svn update "pyspark-$PYSPARK_VERSION.tar.gz" + svn update "pyspark-$PYSPARK_VERSION.tar.gz.asc" + twine upload -u __token__ -p $PYPI_API_TOKEN \ --repository-url https://upload.pypi.org/legacy/ \ - "pyspark-$RELEASE_VERSION.tar.gz" \ - "pyspark-$RELEASE_VERSION.tar.gz.asc" - svn update "pyspark_connect-$RELEASE_VERSION.tar.gz" - svn update "pyspark_connect-$RELEASE_VERSION.tar.gz.asc" - TWINE_USERNAME=spark-upload TWINE_PASSWORD="$PYPI_PASSWORD" twine upload \ + "pyspark-$PYSPARK_VERSION.tar.gz" \ + "pyspark-$PYSPARK_VERSION.tar.gz.asc" + svn update "pyspark_connect-$PYSPARK_VERSION.tar.gz" + svn update "pyspark_connect-$PYSPARK_VERSION.tar.gz.asc" + twine upload -u __token__ -p $PYPI_API_TOKEN \ --repository-url https://upload.pypi.org/legacy/ \ - "pyspark_connect-$RELEASE_VERSION.tar.gz" \ - "pyspark_connect-$RELEASE_VERSION.tar.gz.asc" + "pyspark_connect-$PYSPARK_VERSION.tar.gz" \ + "pyspark_connect-$PYSPARK_VERSION.tar.gz.asc" cd .. rm -rf svn-spark echo "PySpark uploaded" diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index 5fdaf58feee2e..ca9e10bebfc53 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -95,7 +95,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.2' scipy coverage matplotlib lxml -ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.2 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2" +ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.2 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1" # Python deps for Spark Connect ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==4.25.1 googleapis-common-protos==1.56.4"