Skip to content

ci: refactor Docker jobs for specific build digest handling #105

ci: refactor Docker jobs for specific build digest handling

ci: refactor Docker jobs for specific build digest handling #105

name: docker_publish
# Controls when the action will run.
on:
# Triggers the workflow on push or pull request events but only for the main branch
push:
branches:
- "master"
tags:
- "*"
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# Build the ubi-no_model without cache export
docker-ubi-no_model:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Setup docker
id: setup
uses: ./.github/workflows/docker-reused-setup-steps
with:
token: ${{ secrets.CR_PAT }}
tag: ubi-no_model
- name: Build and push:ubi-no_model
uses: docker/build-push-action@v4
id: build
with:
context: .
file: ./ubi.Dockerfile
push: true
load: true
target: no_model
tags: ${{ steps.setup.outputs.tags }}
labels: ${{ steps.setup.outputs.labels }}
platforms: linux/amd64, linux/arm64
- name: Test ubi-no_model docker image
run: |
docker run --group-add 0 -v ".:/app" ghcr.io/jim60105/whisperx:ubi-no_model@${{ steps.build.outputs.digest }} -- --model base --language en --device cpu --compute_type int8 --output_format srt .github/workflows/test/en.webm;
if [ ! -f en.srt ]; then
echo "The en.srt file does not exist"
exit 1
fi
echo "cat en.srt:";
cat en.srt;
if ! grep -qi 'no' en.srt; then
echo "The en.srt file does not contain the word 'no'"
exit 1
fi
echo "Test passed."
# Run the no_model build first ensure that the code at least builds
docker-no_model:
# The type of runner that the job will run on
runs-on: ubuntu-latest
outputs:
digest: ${{ steps.build.outputs.digest }}
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Setup docker
id: setup
uses: ./.github/workflows/docker-reused-setup-steps
with:
token: ${{ secrets.CR_PAT }}
- name: Build and push:no_model
uses: docker/build-push-action@v4
id: build
with:
context: .
file: ./Dockerfile
push: true
load: true
target: no_model
tags: ${{ steps.setup.outputs.tags }}
labels: ${{ steps.setup.outputs.labels }}
platforms: linux/amd64, linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Test no_model docker image
run: |
docker run --group-add 0 -v ".:/app" ghcr.io/jim60105/whisperx:no_model@${{ steps.build.outputs.digest }} -- --model base --language en --device cpu --compute_type int8 --output_format srt .github/workflows/test/en.webm;
if [ ! -f en.srt ]; then
echo "The en.srt file does not exist"
exit 1
fi
echo "cat en.srt:";
cat en.srt;
if ! grep -qi 'no' en.srt; then
echo "The en.srt file does not contain the word 'no'"
exit 1
fi
echo "Test passed."
# Download whisper model cache
docker-cache:
# The type of runner that the job will run on
runs-on: ubuntu-latest
outputs:
digest: ${{ steps.build.outputs.digest }}
strategy:
fail-fast: true
matrix:
model:
- tiny
- base
- small
- medium
- large-v2
- large-v3
needs: docker-no_model # wait for docker-no_model to finish
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Setup docker
id: setup
uses: ./.github/workflows/docker-reused-setup-steps
with:
token: ${{ secrets.CR_PAT }}
tag: cache-${{ matrix.model }}
- name: Build cache:${{ matrix.model }}
uses: docker/build-push-action@v4
id: build
with:
context: .
file: ./Dockerfile
push: true
target: load_whisper
tags: ${{ steps.setup.outputs.tags }}
labels: ${{ steps.setup.outputs.labels }}
build-args: |
WHISPER_MODEL=${{ matrix.model }}
NO_MODEL_STAGE=ghcr.io/jim60105/whisperx:no_model@${{ needs.docker-no_model.outputs.digest }}
platforms: linux/amd64, linux/arm64
cache-from: type=gha
# Run the rest of the builds in parallel
docker:
# The type of runner that the job will run on
runs-on: ubuntu-latest
strategy:
fail-fast: false
# max-parallel: 6
matrix:
lang:
- en
- fr
- de
- es
- it
- ja
- zh
- nl
- uk
- pt
- ar
- cs
- ru
- pl
- hu
- fi
- fa
- el
- tr
- da
- he
- vi
- ko
- ur
- te
- hi
- ca
- ml
- no
- nn
model:
- tiny
- base
- small
- medium
- large-v2
- large-v3
needs:
- docker-no_model # wait for docker-no_model to finish
- docker-cache # wait for docker-cache to finish
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- name: Setup docker
id: setup
uses: ./.github/workflows/docker-reused-setup-steps
with:
token: ${{ secrets.CR_PAT }}
tag: ${{ matrix.model }}-${{ matrix.lang }}
- name: Build and push:${{ matrix.model }}-${{ matrix.lang }}
uses: docker/build-push-action@v4
with:
context: .
file: ./Dockerfile
target: final
push: true
tags: ${{ steps.setup.outputs.tags }}
labels: ${{ steps.setup.outputs.labels }}
build-args: |
WHISPER_MODEL=${{ matrix.model }}
LANG=${{ matrix.lang }}
LOAD_WHISPER_STAGE=ghcr.io/jim60105/whisperx:cache-${{ matrix.model }}
NO_MODEL_STAGE=ghcr.io/jim60105/whisperx:no_model@${{ needs.docker-no_model.outputs.digest }}
platforms: linux/amd64, linux/arm64
cache-from: type=gha
test-large-v3-zh:
name: Test large-v3-zh docker image
runs-on: ubuntu-latest
needs: docker
steps:
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: true
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: false
- name: Checkout
uses: actions/checkout@v4
with:
sparse-checkout: |
.github/workflows/test/**
sparse-checkout-cone-mode: false
- name: Test large-v3-zh docker image
run: |
docker run --group-add 0 -v ".:/app" ghcr.io/jim60105/whisperx:large-v3-zh -- --device cpu --compute_type int8 --output_format srt .github/workflows/test/zh.webm;
if [ ! -f zh.srt ]; then
echo "The zh.srt file does not exist"
exit 1
fi
echo "cat zh.srt:";
cat zh.srt;
if ! grep -qi -e '充满' -e '充滿' zh.srt; then
echo "The zh.srt file does not contain the word '充满' or '充滿'"
exit 1
fi
echo "Test passed."