ci: use GitHub token instead of PAT and push sbom, provenance to regi… #129

Workflow file for this run

.github/workflows/docker_publish.yml at a37e667

	# Check this guide for more information about publishing to ghcr.io with GitHub Actions:
	# https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#upgrading-a-workflow-that-accesses-ghcrio

	# Build the Docker image and push it to the registry
	name: docker_publish

	on:
	push:
	branches:
	- "master"
	tags:
	- "*"

	# Allows you to run this workflow manually from the Actions tab
	workflow_dispatch:

	# Sets the permissions granted to the GITHUB_TOKEN for the actions in this job.
	permissions:
	contents: read
	packages: write

	jobs:
	# Build the ubi-no_model without cache export
	docker-ubi-no_model:
	runs-on: ubuntu-latest
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: true

	- name: Setup docker
	id: setup
	uses: ./.github/workflows/docker-reused-steps
	with:
	token: ${{ secrets.CR_PAT }}
	tag: ubi-no_model

	- name: Build:ubi-no_model
	uses: docker/build-push-action@v5
	id: build
	with:
	context: .
	file: ./ubi.Dockerfile
	load: true
	target: no_model
	labels: ${{ steps.setup.outputs.labels }}
	build-args: \|
	VERSION=${{ github.ref_name }}
	RELEASE=${{ github.run_number }}
	platforms: linux/amd64
	# Cache to regietry instead of gha to avoid the capacity limit.
	cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache
	cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max

	- name: Test ubi-no_model docker image
	run: \|
	docker run --group-add 0 -v ".:/app" ${{ steps.build.outputs.imageid }} -- --model base --language en --device cpu --compute_type int8 --output_format srt .github/workflows/test/en.webm;
	if [ ! -f en.srt ]; then
	echo "The en.srt file does not exist"
	exit 1
	fi
	echo "cat en.srt:";
	cat en.srt;
	if ! grep -qi 'no' en.srt; then
	echo "The en.srt file does not contain the word 'no'"
	exit 1
	fi
	echo "Test passed."

	- name: Build and push:ubi-no_model
	uses: docker/build-push-action@v5
	with:
	context: .
	file: ./ubi.Dockerfile
	push: true
	target: no_model
	tags: ${{ steps.setup.outputs.tags }}
	labels: ${{ steps.setup.outputs.labels }}
	build-args: \|
	VERSION=${{ github.ref_name }}
	RELEASE=${{ github.run_number }}
	platforms: linux/amd64, linux/arm64
	# Cache to regietry instead of gha to avoid the capacity limit.
	cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache
	cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max
	sbom: true
	provenance: true

	# Run the no_model build first ensure that the code at least builds
	docker-no_model:
	runs-on: ubuntu-latest
	outputs:
	digest: ${{ steps.publish.outputs.digest }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: true

	- name: Setup docker
	id: setup
	uses: ./.github/workflows/docker-reused-steps

	- name: Build:no_model
	uses: docker/build-push-action@v5
	id: build
	with:
	context: .
	file: ./Dockerfile
	load: true
	target: no_model
	labels: ${{ steps.setup.outputs.labels }}
	build-args: \|
	VERSION=${{ github.ref_name }}
	RELEASE=${{ github.run_number }}
	platforms: linux/amd64
	# Cache to regietry instead of gha to avoid the capacity limit.
	cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache
	cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max

	- name: Test no_model docker image
	run: \|
	docker run --group-add 0 -v ".:/app" ${{ steps.build.outputs.imageid }} -- --model base --language en --device cpu --compute_type int8 --output_format srt .github/workflows/test/en.webm;
	if [ ! -f en.srt ]; then
	echo "The en.srt file does not exist"
	exit 1
	fi
	echo "cat en.srt:";
	cat en.srt;
	if ! grep -qi 'no' en.srt; then
	echo "The en.srt file does not contain the word 'no'"
	exit 1
	fi
	echo "Test passed."

	- name: Build and push:no_model
	uses: docker/build-push-action@v5
	id: publish
	with:
	context: .
	file: ./Dockerfile
	push: true
	target: no_model
	tags: ${{ steps.setup.outputs.tags }}
	labels: ${{ steps.setup.outputs.labels }}
	build-args: \|
	VERSION=${{ github.ref_name }}
	RELEASE=${{ github.run_number }}
	platforms: linux/amd64, linux/arm64
	# Cache to regietry instead of gha to avoid the capacity limit.
	cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache
	cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max
	sbom: true
	provenance: true

	# Download whisper model cache
	docker-cache:
	runs-on: ubuntu-latest
	strategy:
	fail-fast: true
	matrix:
	model:
	- tiny
	- base
	- small
	- medium
	- large-v2
	- large-v3
	needs: docker-no_model # wait for docker-no_model to finish

	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: true

	- name: Setup docker
	id: setup
	uses: ./.github/workflows/docker-reused-steps
	with:
	tag: cache-${{ matrix.model }}

	- name: Build cache:${{ matrix.model }}
	uses: docker/build-push-action@v5
	id: build
	with:
	context: .
	file: ./Dockerfile
	push: true
	target: load_whisper
	tags: ${{ steps.setup.outputs.tags }}
	labels: ${{ steps.setup.outputs.labels }}
	build-args: \|
	WHISPER_MODEL=${{ matrix.model }}
	NO_MODEL_STAGE=ghcr.io/jim60105/whisperx:no_model@${{ needs.docker-no_model.outputs.digest }}
	VERSION=${{ github.ref_name }}
	RELEASE=${{ github.run_number }}
	platforms: linux/amd64, linux/arm64
	# Cache to regietry instead of gha to avoid the capacity limit.
	cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache
	cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max
	sbom: true
	provenance: true

	# Run the rest of the builds in parallel
	docker:
	# The type of runner that the job will run on
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	# max-parallel: 6
	matrix:
	lang:
	- en
	- fr
	- de
	- es
	- it
	- ja
	- zh
	- nl
	- uk
	- pt
	- ar
	- cs
	- ru
	- pl
	- hu
	- fi
	- fa
	- el
	- tr
	- da
	- he
	- vi
	- ko
	- ur
	- te
	- hi
	- ca
	- ml
	# Disable language 'no' because this transcribe model is too large to build on the GitHub Actions
	# https://github.com/jim60105/docker-whisperX/actions/runs/8405597972
	# - no
	- nn
	model:
	- tiny
	- base
	- small
	- medium
	- large-v2
	- large-v3
	needs:
	- docker-no_model # wait for docker-no_model to finish
	- docker-cache # wait for docker-cache to finish

	# Steps represent a sequence of tasks that will be executed as part of the job
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: true

	- name: Setup docker
	id: setup
	uses: ./.github/workflows/docker-reused-steps
	with:
	tag: ${{ matrix.model }}-${{ matrix.lang }}

	- name: Get short SHA
	id: get-sha
	run: \|
	id=$(echo ${{ github.sha }} \| cut -c 1-7)
	echo "id=$id" >> $GITHUB_OUTPUT

	- name: Build and push:${{ matrix.model }}-${{ matrix.lang }}
	uses: docker/build-push-action@v5
	with:
	context: .
	file: ./Dockerfile
	target: final
	push: true
	tags: ${{ steps.setup.outputs.tags }}
	labels: ${{ steps.setup.outputs.labels }}
	build-args: \|
	WHISPER_MODEL=${{ matrix.model }}
	LANG=${{ matrix.lang }}
	LOAD_WHISPER_STAGE=ghcr.io/jim60105/whisperx:cache-${{ matrix.model }}-${{ steps.get-sha.outputs.id }}
	NO_MODEL_STAGE=ghcr.io/jim60105/whisperx:no_model@${{ needs.docker-no_model.outputs.digest }}
	VERSION=${{ github.ref_name }}
	RELEASE=${{ github.run_number }}
	platforms: linux/amd64, linux/arm64
	# Cache to regietry instead of gha to avoid the capacity limit.
	cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache
	cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/whisperx:cache,mode=max
	sbom: true
	provenance: true

	test-large-v3-zh:
	name: Test large-v3-zh docker image
	runs-on: ubuntu-latest
	needs: docker
	steps:
	# We require additional space due to the large size of our image. (~10GB)
	- name: Free Disk Space (Ubuntu)
	uses: jlumbroso/free-disk-space@main
	with:
	tool-cache: true
	android: true
	dotnet: true
	haskell: true
	large-packages: true
	docker-images: true
	swap-storage: false

	- name: Checkout
	uses: actions/checkout@v4
	with:
	sparse-checkout: \|
	.github/workflows/test/**
	sparse-checkout-cone-mode: false

	- name: Get short SHA
	id: get-sha
	run: \|
	id=$(echo ${{ github.sha }} \| cut -c 1-7)
	echo "id=$id" >> $GITHUB_OUTPUT

	- name: Test large-v3-zh docker image
	run: \|
	docker run --group-add 0 -v ".:/app" ghcr.io/jim60105/whisperx:large-v3-zh-${{ steps.get-sha.outputs.id }} -- --device cpu --compute_type int8 --output_format srt .github/workflows/test/zh.webm;
	if [ ! -f zh.srt ]; then
	echo "The zh.srt file does not exist"
	exit 1
	fi
	echo "cat zh.srt:";
	cat zh.srt;
	if ! grep -qi -e '充满' -e '充滿' zh.srt; then
	echo "The zh.srt file does not contain the word '充满' or '充滿'"
	exit 1
	fi
	echo "Test passed."

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ci: use GitHub token instead of PAT and push sbom, provenance to regi… #129

Workflow file

ci: use GitHub token instead of PAT and push sbom, provenance to regi… #129

Jobs

Run details

Workflow file for this run