Skip to content

Commit

Permalink
working on workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
mmguero committed Feb 24, 2024
1 parent 29336fa commit b72a6c1
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 52 deletions.
121 changes: 121 additions & 0 deletions .github/workflows/monkeyplug-build-push-vosk-ghcr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
---
name: monkeyplug-build-push-vosk-ghcr

on:
push:
branches:
- 'main'
pull_request:
workflow_dispatch:
repository_dispatch:
schedule:
- cron: '0 12 15 * *'

env:
REGISTRY: ghcr.io
IMAGE_NAME: ghcr.io/${{ github.repository_owner }}/monkeyplug
IMAGE_ARCH: amd64
REPO_CONTEXT: .
REPO_CONTAINERFILE: ./docker/Dockerfile

jobs:
setup:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.matrix.outputs.value }}
steps:
-
name: Cancel previous run in progress
id: cancel-previous-runs
uses: styfle/cancel-workflow-action@0.11.0
with:
ignore_sha: true
all_but_latest: true
access_token: ${{ secrets.GITHUB_TOKEN }}
-
name: Enumerate VOSK URLs
id: matrix
run: |
echo "value=[\"small|https://alphacephei.com/kaldi/models/vosk-model-small-en-us-0.15.zip\", \"large|http://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip\"]" >> $GITHUB_OUTPUT
-
run: |
echo "${{ steps.matrix.outputs.value }}"
buildah:
needs: [ setup ]
runs-on: ubuntu-latest
permissions:
actions: write
packages: write
contents: read
security-events: write
strategy:
matrix:
value: ${{fromJSON(needs.setup.outputs.matrix)}}
steps:
-
name: Checkout
id: repo-checkout
uses: actions/checkout@v3
-
name: Set up QEMU
id: setup-qemu
uses: docker/setup-qemu-action@v2
-
name: Log in to registry
id: registry-login
uses: redhat-actions/podman-login@v1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
-
name: Extract tag name
shell: bash
run: echo "tag=$(echo ${{ matrix.value }} | cut -d'|' -f1)" >> $GITHUB_OUTPUT
id: extract_tag
-
name: Extract model URL
shell: bash
run: echo "url=$(echo ${{ matrix.value }} | cut -d'|' -f2)" >> $GITHUB_OUTPUT
id: extract_url
-
name: Build (VOSK)
id: build-vosk-image
uses: redhat-actions/buildah-build@v2
with:
image: ${{ env.IMAGE_NAME }}
tags: vosk-${{ steps.extract_tag.outputs.tag }}
context: ${{ env.REPO_CONTEXT }}
containerfiles: ${{ env.REPO_CONTAINERFILE }}
archs: ${{ env.IMAGE_ARCH }}
target: vosk
build-args: |
VOSK_MODEL_URL=${{ steps.extract_url.outputs.url }}
-
name: Run Trivy vulnerability scanner (VOSK)
id: trivy-scan
uses: aquasecurity/trivy-action@master
with:
scan-type: 'image'
image-ref: ${{ steps.build-vosk-image.outputs.image }}:${{ steps.build-vosk-image.outputs.tags }}
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'HIGH,CRITICAL'
vuln-type: 'os,library'
hide-progress: true
ignore-unfixed: true
exit-code: '0'
-
name: Upload Trivy scan results to GitHub Security tab (VOSK)
uses: github/codeql-action/upload-sarif@v2
if: always()
with:
sarif_file: 'trivy-results.sarif'
-
name: Push (VOSK)
id: push-vosk-image
uses: redhat-actions/push-to-registry@v2
with:
image: ${{ steps.build-vosk-image.outputs.image }}
tags: ${{ steps.build-vosk-image.outputs.tags }}
registry: ${{ env.REGISTRY }}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
name: monkeyplug-build-push-ghcr
name: monkeyplug-build-push-whisper-ghcr

on:
push:
Expand All @@ -19,13 +19,10 @@ env:
REPO_CONTAINERFILE: ./docker/Dockerfile

jobs:
buildah:
setup:
runs-on: ubuntu-latest
permissions:
actions: write
packages: write
contents: read
security-events: write
outputs:
matrix: ${{ steps.matrix.outputs.value }}
steps:
-
name: Cancel previous run in progress
Expand All @@ -35,6 +32,26 @@ jobs:
ignore_sha: true
all_but_latest: true
access_token: ${{ secrets.GITHUB_TOKEN }}
-
name: Enumerate Whisper models
id: matrix
run: |
echo "value=[\"tiny.en\", \"tiny\", \"base.en\", \"base\", \"small.en\", \"small\", \"medium.en\", \"medium\", \"large-v1\", \"large-v2\", \"large-v3\", \"large\"]" >> $GITHUB_OUTPUT
-
run: |
echo "${{ steps.matrix.outputs.value }}"
buildah:
needs: [ setup ]
runs-on: ubuntu-latest
permissions:
actions: write
packages: write
contents: read
security-events: write
strategy:
matrix:
value: ${{fromJSON(needs.setup.outputs.matrix)}}
steps:
-
name: Checkout
id: repo-checkout
Expand All @@ -52,34 +69,25 @@ jobs:
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
-
name: Build Small
id: build-small-image
name: Build (Whisper)
id: build-whisper-image
uses: redhat-actions/buildah-build@v2
with:
image: ${{ env.IMAGE_NAME }}
tags: small
context: ${{ env.REPO_CONTEXT }}
containerfiles: ${{ env.REPO_CONTAINERFILE }}
archs: ${{ env.IMAGE_ARCH }}
-
name: Build Large
id: build-large-image
uses: redhat-actions/buildah-build@v2
with:
image: ${{ env.IMAGE_NAME }}
tags: large
tags: whisper-${{ matrix.value }}
context: ${{ env.REPO_CONTEXT }}
containerfiles: ${{ env.REPO_CONTAINERFILE }}
archs: ${{ env.IMAGE_ARCH }}
target: whisper
build-args: |
VOSK_MODEL_URL=http://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip
WHISPER_MODEL_NAME=${{ matrix.value }}
-
name: Run Trivy vulnerability scanner
name: Run Trivy vulnerability scanner (Whisper)
id: trivy-scan
uses: aquasecurity/trivy-action@master
with:
scan-type: 'image'
image-ref: ${{ env.IMAGE_NAME }}:small
image-ref: ${{ steps.build-vosk-image.outputs.image }}:${{ steps.build-whisper-image.outputs.tags }}
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'HIGH,CRITICAL'
Expand All @@ -88,24 +96,16 @@ jobs:
ignore-unfixed: true
exit-code: '0'
-
name: Upload Trivy scan results to GitHub Security tab
name: Upload Trivy scan results to GitHub Security tab (Whisper)
uses: github/codeql-action/upload-sarif@v2
if: always()
with:
sarif_file: 'trivy-results.sarif'
-
name: Push Small
id: push-small-to-ghcr
uses: redhat-actions/push-to-registry@v2
with:
image: ${{ steps.build-small-image.outputs.image }}
tags: ${{ steps.build-small-image.outputs.tags }}
registry: ${{ env.REGISTRY }}
-
name: Push Large
id: push-large-to-ghcr
name: Push (Whisper)
id: push-whisper-image
uses: redhat-actions/push-to-registry@v2
with:
image: ${{ steps.build-large-image.outputs.image }}
tags: ${{ steps.build-large-image.outputs.tags }}
image: ${{ steps.build-whisper-image.outputs.image }}
tags: ${{ steps.build-whisper-image.outputs.tags }}
registry: ${{ env.REGISTRY }}
21 changes: 14 additions & 7 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
########################################################################################################################
FROM python:3-slim as base

LABEL maintainer="mero.mero.guero@gmail.com"
LABEL org.opencontainers.image.authors='mero.mero.guero@gmail.com'
LABEL org.opencontainers.image.url='https://github.com/mmguero/monkeyplug'
LABEL org.opencontainers.image.source='https://github.com/mmguero/monkeyplug'
LABEL org.opencontainers.image.title='mmguero/monkeyplug'
LABEL org.opencontainers.image.description='Dockerized monkeyplug'

ENV DEBIAN_FRONTEND noninteractive
ENV TERM xterm
ENV PYTHONUNBUFFERED 1
Expand All @@ -32,6 +25,13 @@ CMD []
########################################################################################################################
FROM base as vosk

LABEL maintainer="mero.mero.guero@gmail.com"
LABEL org.opencontainers.image.authors='mero.mero.guero@gmail.com'
LABEL org.opencontainers.image.url='https://github.com/mmguero/monkeyplug'
LABEL org.opencontainers.image.source='https://github.com/mmguero/monkeyplug'
LABEL org.opencontainers.image.title='mmguero/monkeyplug'
LABEL org.opencontainers.image.description='Dockerized monkeyplug (VOSK-based)'

ENV MONKEYPLUG_MODE vosk
ENV VOSK_MODEL_DIR /opt/vosk_model

Expand All @@ -51,6 +51,13 @@ RUN python3 -m pip install --no-cache vosk && \
########################################################################################################################
FROM base as whisper

LABEL maintainer="mero.mero.guero@gmail.com"
LABEL org.opencontainers.image.authors='mero.mero.guero@gmail.com'
LABEL org.opencontainers.image.url='https://github.com/mmguero/monkeyplug'
LABEL org.opencontainers.image.source='https://github.com/mmguero/monkeyplug'
LABEL org.opencontainers.image.title='mmguero/monkeyplug'
LABEL org.opencontainers.image.description='Dockerized monkeyplug (Whisper-based)'

ENV MONKEYPLUG_MODE whisper
ENV WHISPER_MODEL_DIR /opt/whisper_model

Expand Down
23 changes: 14 additions & 9 deletions src/monkeyplug/monkeyplug.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def pairwise(iterable):
return zip(a, b)


def scrubword(value):
return str(value).lower().strip().translate(str.maketrans('', '', string.punctuation))


###################################################################################################
# download to file
def DownloadToFile(url, local_filename=None, chunk_bytes=4096, debug=False):
Expand Down Expand Up @@ -318,13 +322,11 @@ def __init__(
raise IOError(errno.ENOENT, os.strerror(errno.ENOENT), iSwearsFileSpec)
lines = []
with open(self.swearsFileSpec) as f:
lines = [line.rstrip("\n").lower() for line in f]
lines = [line.rstrip("\n") for line in f]
for line in lines:
lineMap = line.split("|")
if len(lineMap) > 1:
self.swearsMap[lineMap[0]] = lineMap[1]
else:
self.swearsMap[lineMap[0]] = "*****"
self.swearsMap[scrubword(lineMap[0])] = lineMap[1] if len(lineMap) > 1 else "*****"
mmguero.eprint(self.swearsMap)

if self.debug:
mmguero.eprint(f'Input: {self.inputFileSpec}')
Expand Down Expand Up @@ -575,14 +577,17 @@ def RecognizeSpeech(self):
if "result" in res:
self.wordList.extend(
[
dict(r, **{'scrub': mmguero.DeepGet(r, ["word"]) in self.swearsMap})
dict(r, **{'scrub': scrubword(mmguero.DeepGet(r, ["word"])) in self.swearsMap})
for r in res["result"]
]
)
res = json.loads(rec.FinalResult())
if "result" in res:
self.wordList.extend(
[dict(r, **{'scrub': mmguero.DeepGet(r, ["word"]) in self.swearsMap}) for r in res["result"]]
[
dict(r, **{'scrub': scrubword(mmguero.DeepGet(r, ["word"])) in self.swearsMap})
for r in res["result"]
]
)

if self.debug:
Expand Down Expand Up @@ -658,8 +663,8 @@ def RecognizeSpeech(self):
for segment in self.transcript['segments']:
if 'words' in segment:
for word in segment['words']:
word['word'] = word['word'].lower().strip().translate(str.maketrans('', '', string.punctuation))
word['scrub'] = word['word'] in self.swearsMap
word['word'] = word['word'].strip()
word['scrub'] = scrubword(word['word']) in self.swearsMap
self.wordList.append(word)

if self.debug:
Expand Down

0 comments on commit b72a6c1

Please sign in to comment.