Skip to content

Limit the size of bundles of elements emitted by SDK into the data output stream. #28283

Limit the size of bundles of elements emitted by SDK into the data output stream.

Limit the size of bundles of elements emitted by SDK into the data output stream. #28283

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# To learn more about GitHub Actions in Apache Beam check the CI.md
name: Build python source distribution and wheels
on:
schedule:
- cron: '10 2 * * *'
push:
branches: ['master', 'release-*']
tags: 'v*'
pull_request:
branches: ['master', 'release-*']
tags: 'v*'
paths: ['sdks/python/**', 'model/**', 'release/**']
workflow_dispatch:
# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login}}'
cancel-in-progress: true
env:
GCP_PATH: "gs://${{ secrets.GCP_PYTHON_WHEELS_BUCKET }}/${GITHUB_REF##*/}/${GITHUB_SHA}-${GITHUB_RUN_ID}/"
jobs:
check_env_variables:
timeout-minutes: 5
name: "Check environment variables"
runs-on: ubuntu-latest
env:
EVENT_NAME: ${{ github.event_name }}
PY_VERSIONS_FULL: "cp38-* cp39-* cp310-* cp311-* cp312-*"
outputs:
gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }}
py-versions-full: ${{ steps.set-py-versions.outputs.py-versions-full }}
py-versions-test: ${{ steps.set-py-versions.outputs.py-versions-test }}
steps:
- uses: actions/checkout@v4
- name: "Check are GCP variables set"
run: "./scripts/ci/ci_check_are_gcp_variables_set.sh"
id: check_gcp_variables
env:
GCP_SA_EMAIL: ${{ secrets.GCP_SA_EMAIL }}
GCP_SA_KEY: ${{ secrets.GCP_SA_KEY }}
GCP_PYTHON_WHEELS_BUCKET: ${{ secrets.GCP_PYTHON_WHEELS_BUCKET }}
GCP_PROJECT_ID: "not-needed-here"
GCP_REGION: "not-needed-here"
GCP_TESTING_BUCKET: "not-needed-here"
- name: Set Python Versions for different environments
id: set-py-versions
run: |
set -xeu
if [ $EVENT_NAME == "pull_request" ]; then
# run highest supported version on pull request.
echo "py-versions-test=${PY_VERSIONS_FULL##* }" >> $GITHUB_OUTPUT
else
# run full version for push and cron jobs.
echo "py-versions-test=$PY_VERSIONS_FULL" >> $GITHUB_OUTPUT
fi
# Output full set of versions so that we can test all languages on pull requests for certain platforms.
echo "py-versions-full=$PY_VERSIONS_FULL" >> $GITHUB_OUTPUT
build_source:
runs-on: ubuntu-latest
name: Build python source distribution
outputs:
is_rc: ${{ steps.is_rc.outputs.is_rc }}
rc_num: ${{ steps.get_rc_version.outputs.RC_NUM }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install python
uses: actions/setup-python@v5
with:
python-version: 3.8
- name: Get tag
id: get_tag
run: |
echo "TAG=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT
- name: Check whether an -RC tag was applied to the commit.
id: is_rc
run: |
echo ${{ steps.get_tag.outputs.TAG }} > temp
OUTPUT=$( if grep -e '-RC.' -q temp; then echo 1; else echo 0; fi)
echo "is_rc=$OUTPUT" >> $GITHUB_OUTPUT
- name: Get RELEASE_VERSION and RC_NUM
if: steps.is_rc.outputs.is_rc == 1
id: get_rc_version
run: |
RC_NUM=$(sed -n "s/^.*-RC\([0-9]*\)/\1/p" temp)
RELEASE_VERSION=$(sed -n "s/^v\(.*\)-RC[0-9]/\1/p" temp)
echo "RC_NUM=$RC_NUM" >> $GITHUB_OUTPUT
echo "RELEASE_VERSION=$RELEASE_VERSION" >> $GITHUB_OUTPUT
- name: Build source
working-directory: ./sdks/python
run: pip install -U build && python -m build --sdist
- name: Add checksums
working-directory: ./sdks/python/dist
run: |
file=$(ls | grep .tar.gz | head -n 1)
sha512sum $file > ${file}.sha512
- name: Unzip source
working-directory: ./sdks/python
run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1)
- name: Rename source directory
working-directory: ./sdks/python
# https://github.com/pypa/setuptools/issues/4300 changed naming. Match both old and new names.
run: mv $(ls | grep "apache-beam-\|apache_beam-") apache-beam-source
- name: Upload source as artifact
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/upload-artifact@v3
with:
name: source
path: sdks/python/apache-beam-source
- name: Upload compressed sources as artifacts
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/upload-artifact@v3
with:
name: source_zip
path: sdks/python/dist
- name: Clear dist
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
run: |
rm -r ./dist
rm -rd apache-beam-source
- name: Rewrite SDK version to include RC number
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
run: |
RELEASE_VERSION=${{ steps.get_rc_version.outputs.RELEASE_VERSION }}
RC_NUM=${{ steps.get_rc_version.outputs.RC_NUM }}
sed -i -e "s/${RELEASE_VERSION}/${RELEASE_VERSION}rc${RC_NUM}/g" apache_beam/version.py
- name: Build RC source
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
run: pip install -U build && python -m build --sdist
- name: Add RC checksums
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python/dist
run: |
file=$(ls | grep .tar.gz | head -n 1)
sha512sum $file > ${file}.sha512
- name: Unzip RC source
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
run: tar -xzvf dist/$(ls dist | grep .tar.gz | head -n 1)
- name: Rename RC source directory
if: steps.is_rc.outputs.is_rc == 1
working-directory: ./sdks/python
# https://github.com/pypa/setuptools/issues/4300 changed naming. Match both old and new names.
run: mv $(ls | grep "apache-beam-\|apache_beam-") apache-beam-source-rc
- name: Upload RC source as artifact
if: steps.is_rc.outputs.is_rc == 1
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/upload-artifact@v3
with:
name: source_rc${{ steps.get_rc_version.outputs.RC_NUM }}
path: sdks/python/apache-beam-source-rc
- name: Upload compressed RC sources as artifacts
if: steps.is_rc.outputs.is_rc == 1
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/upload-artifact@v3
with:
name: source_zip_rc${{ steps.get_rc_version.outputs.RC_NUM }}
path: sdks/python/dist
prepare_gcs:
name: Prepare GCS
needs:
- build_source
- check_env_variables
runs-on: ubuntu-latest
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' && github.event_name != 'pull_request'
steps:
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
- name: Remove existing files on GCS bucket
run: gsutil rm -r ${{ env.GCP_PATH }} || true
upload_source_to_gcs:
name: Upload python source distribution to GCS bucket
needs:
- prepare_gcs
- check_env_variables
runs-on: ubuntu-latest
if: needs.check_env_variables.outputs.gcp-variables-set == 'true'
steps:
- name: Download compressed sources from artifacts
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/download-artifact@v3
with:
name: source_zip
path: source/
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
- name: Copy sources to GCS bucket
run: gsutil cp -r -a public-read source/* ${{ env.GCP_PATH }}
build_wheels:
name: Build python wheels on ${{matrix.arch}} for ${{ matrix.os_python.os }}
needs:
- check_env_variables
- build_source
env:
CIBW_ARCHS_LINUX: ${{matrix.arch}}
runs-on: ${{ matrix.os_python.os }}
strategy:
matrix:
os_python: [
{"os": "ubuntu-latest", "python": "${{ needs.check_env_variables.outputs.py-versions-full }}" },
# Temporarily pin to macos-13 because macos-latest breaks this build
# TODO(https://github.com/apache/beam/issues/31114)
{"os": "macos-13", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}" },
{"os": "windows-latest", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}" },
]
arch: [auto]
include:
- os_python: {"os": "ubuntu-latest", "python": "${{ needs.check_env_variables.outputs.py-versions-test }}" }
arch: aarch64
steps:
- name: Download python source distribution from artifacts
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/download-artifact@v3
with:
name: source
path: apache-beam-source
- name: Download Python SDK RC source distribution from artifacts
if: ${{ needs.build_source.outputs.is_rc == 1 }}
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/download-artifact@v3
with:
name: source_rc${{ needs.build_source.outputs.rc_num }}
path: apache-beam-source-rc
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: 3.8
- uses: docker/setup-qemu-action@v1
if: ${{matrix.arch == 'aarch64'}}
name: Set up QEMU
- name: Install cibuildwheel
# note: sync cibuildwheel version with gradle task sdks:python:bdistPy* steps
run: pip install cibuildwheel==2.17.0 setuptools
- name: Build wheel
working-directory: apache-beam-source
env:
CIBW_BUILD: ${{ matrix.os_python.python }}
# TODO: https://github.com/apache/beam/issues/23048
CIBW_SKIP: "*-musllinux_*"
CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools
run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse
shell: bash
- name: install sha512sum on MacOS
if: startsWith(matrix.os_python.os, 'macos')
run: brew install coreutils
- name: Add checksums
working-directory: apache-beam-source/wheelhouse/
run: |
for file in *.whl; do
sha512sum $file > ${file}.sha512
done
shell: bash
- name: Upload wheels as artifacts
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/upload-artifact@v3
with:
name: wheelhouse-${{ matrix.os_python.os }}${{ (matrix.arch == 'aarch64' && '-aarch64') || '' }}
path: apache-beam-source/wheelhouse/
- name: Build RC wheels
if: ${{ needs.build_source.outputs.is_rc == 1 }}
working-directory: apache-beam-source-rc
env:
CIBW_BUILD: ${{ matrix.os_python.python }}
# TODO: https://github.com/apache/beam/issues/23048
CIBW_SKIP: "*-musllinux_*"
CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools
run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse
shell: bash
- name: Add RC checksums
if: ${{ needs.build_source.outputs.is_rc == 1 }}
working-directory: apache-beam-source-rc/wheelhouse/
run: |
for file in *.whl; do
sha512sum $file > ${file}.sha512
done
shell: bash
- name: Upload RC wheels as artifacts
if: ${{ needs.build_source.outputs.is_rc == 1 }}
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/upload-artifact@v3
with:
name: wheelhouse-rc${{ needs.build_source.outputs.rc_num }}-${{ matrix.os_python.os }}${{ (matrix.arch == 'aarch64' && '-aarch64') || '' }}
path: apache-beam-source-rc/wheelhouse/
upload_wheels_to_gcs:
name: Upload wheels to GCS bucket
needs:
- build_wheels
- check_env_variables
runs-on: ubuntu-latest
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' && github.event_name != 'pull_request'
strategy:
matrix:
# Temporarily pin to macos-13 because macos-latest breaks this build
# TODO(https://github.com/apache/beam/issues/31114)
os : [ubuntu-latest, macos-13, windows-latest]
arch: [auto]
include:
- os: "ubuntu-latest"
arch: aarch64
steps:
- name: Download wheels from artifacts
# Pinned to v3 because of https://github.com/actions/download-artifact/issues/249
uses: actions/download-artifact@v3
with:
name: wheelhouse-${{ matrix.os }}${{ (matrix.arch == 'aarch64' && '-aarch64') || '' }}
path: wheelhouse/
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
- name: Copy wheels to GCS bucket
run: gsutil cp -r -a public-read wheelhouse/* ${{ env.GCP_PATH }}
- name: Create github action information file on GCS bucket
run: |
cat > github_action_info <<EOF
GITHUB_WORKFLOW=$GITHUB_WORKFLOW
GITHUB_RUN_ID=$GITHUB_RUN_ID
GITHUB_RUN_NUMBER=$GITHUB_RUN_NUMBER
GITHUB_ACTION=$GITHUB_ACTION
GITHUB_ACTOR=$GITHUB_ACTOR
GITHUB_REPOSITORY=$GITHUB_REPOSITORY
GITHUB_EVENT_NAME=$GITHUB_EVENT_NAME
GITHUB_SHA=$GITHUB_SHA
GITHUB_REF=$GITHUB_REF
# only for forked repositiories
GITHUB_HEAD_REF=$GITHUB_HEAD_REF
GITHUB_BASE_REF=$GITHUB_BASE_REF
EOF
echo $(cat github_action_info)
gsutil cp -a public-read github_action_info ${{ env.GCP_PATH }}
- name: Upload GitHub event file to GCS bucket
run: gsutil cp -a public-read ${GITHUB_EVENT_PATH} ${{ env.GCP_PATH }}
list_files_on_gcs:
name: List files on Google Cloud Storage Bucket
needs:
- upload_wheels_to_gcs
- check_env_variables
runs-on: ubuntu-latest
if: needs.check_env_variables.outputs.gcp-variables-set == 'true' && github.event_name != 'pull_request'
steps:
- name: Authenticate on GCP
uses: google-github-actions/setup-gcloud@v0
with:
service_account_email: ${{ secrets.GCP_SA_EMAIL }}
service_account_key: ${{ secrets.GCP_SA_KEY }}
- name: List file on Google Cloud Storage Bucket
run: gsutil ls "${{ env.GCP_PATH }}*"
branch_repo_nightly:
permissions:
contents: write
name: Branch repo nightly
needs:
- build_source
- build_wheels
runs-on: ubuntu-latest
timeout-minutes: 60
if: github.repository_owner == 'apache' && github.event_name == 'schedule'
steps:
- name: Checkout code on master branch
uses: actions/checkout@v4
with:
persist-credentials: false
submodules: recursive
- name: Branch commit
run: |
BRANCH_NAME=${GITHUB_REF##*/}
echo "Updating nightly-${BRANCH_NAME}"
git branch -f nightly-${BRANCH_NAME} HEAD
- name: Push branch
uses: ./.github/actions/github-push-action
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
force: true
branch: nightly-${{ github.ref }}