Skip to content

Fix for inability to read some parquet files (issue #816) #218

Fix for inability to read some parquet files (issue #816)

Fix for inability to read some parquet files (issue #816) #218

name: Test Data Processing libs and (Optionally) Push base DPK images
on:
workflow_dispatch:
push:
branches:
- "dev"
- "releases/**"
tags:
- "*"
paths:
# Note: the transform workflows are expected to trigger when data-processing-lib/** changes
- ".make.*"
- "data-processing-lib/**"
- "!data-processing-lib/**.md"
- "!data-processing-lib/**/doc/**"
- "!data-processing-lib/**/.gitignore"
pull_request:
branches:
- "dev"
- "releases/**"
paths:
# Note: the transform workflows are expected to trigger when data-processing-lib/** changes
- ".make.*"
- "data-processing-lib/**"
- "!data-processing-lib/**.md"
- "!data-processing-lib/**/doc/**"
- "!data-processing-lib/**/.gitignore"
# taken from https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
check_if_push_images:
# check whether the Docker images should be pushed to the remote repository
# The images are pushed if it is a merge to dev branch or a new tag is created.
# The latter being part of the release process.
# The images tag is derived from the value of the DOCKER_IMAGE_VERSION variable set in the .make.versions file.
runs-on: ubuntu-22.04
outputs:
publish_images: ${{ steps.version.outputs.publish_images }}
steps:
- id: version
run: |
publish_images='false'
if [[ ${GITHUB_REF} == refs/heads/dev && ${GITHUB_EVENT_NAME} != 'pull_request' && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
then
publish_images='true'
fi
if [[ ${GITHUB_REF} == refs/tags/* && ${GITHUB_REPOSITORY} == IBM/data-prep-kit ]] ;
then
publish_images='true'
fi
echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT"
test-python-lib:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Test data-processing-lib/python
run: |
make -C data-processing-lib/python DOCKER=docker venv test
test-ray-lib:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Test data-processing-lib/ray
run: |
make -C data-processing-lib/ray DOCKER=docker venv test
test-spark-lib:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Test data-processing-lib/spark
run: |
make -C data-processing-lib/spark DOCKER=docker venv test
test-data-processing-lib-images:
needs: [check_if_push_images]
if: needs.check_if_push_images.outputs.publish_images == 'true'
runs-on: ubuntu-22.04
env:
DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }}
DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }}
timeout-minutes: 30
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free up space in github runner
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/lib/jvm /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test Code Transform Images
run: |
make -C data-processing-lib/spark DOCKER=docker image
- name:
Print space
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
docker images
- name: Publish images
if: needs.check_if_push_images.outputs.publish_images == 'true'
run: |
make -C data-processing-lib/spark publish-image