Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/NVIDIA/NeMo into aot/nemo-u…
Browse files Browse the repository at this point in the history
…x-nemotron
  • Loading branch information
suiyoubi committed Aug 19, 2024
2 parents 7fff1f0 + 72a39f4 commit 374c584
Show file tree
Hide file tree
Showing 79 changed files with 4,466 additions and 670 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/changelog-build.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
name: 'Changelog Build (Release)'

on:
workflow_dispatch:
push:
tags:
- '*'

jobs:
changelog:
if: startsWith(github.ref, 'refs/tags/')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -39,7 +39,7 @@ jobs:
ignorePreReleases: "false"
failOnError: "false"
fromTag: ${{ steps.previous_tag.outputs.tag_name }}
toTag: ${{ github.ref_name }}
toTag: ${{ github.ref_name || github.sha }}

- name: Print Changelog
run: |
Expand Down
82 changes: 81 additions & 1 deletion .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,32 @@ on:
- 'main'
- 'r**'
types: [ labeled ]
workflow_dispatch:
inputs:
test_to_run:
required: false
default: all
type: string
description: Comma-separated list of tests to run. Use "all" to run the full test suite.

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
pre-flight:
runs-on: ubuntu-latest
outputs:
test_to_run: ${{ steps.main.outputs.test_to_run }}
steps:
- name: Parse test_to_run
id: main
run: |
parsed_string=$(echo ${{ inputs.test_to_run }} | jq -c --raw-input 'split(",")')
echo "test_to_run=${parsed_string}" >> "$GITHUB_ENV"
gpu-test:
needs: [pre-flight]
runs-on: self-hosted-azure
if: ${{ github.event.label.name == 'Run CICD' }}
steps:
Expand All @@ -36,6 +55,7 @@ jobs:
cicd-cluster-clean:
runs-on: self-hosted-azure-builder
needs: [pre-flight]
if: ${{ github.event.label.name == 'Run CICD' }}
steps:
- name: Clean server from old files
Expand Down Expand Up @@ -94,7 +114,6 @@ jobs:
'
### \'\'

L0_Unit_Tests_GPU:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
Expand Down Expand Up @@ -822,6 +841,67 @@ jobs:
AFTER_SCRIPT: |
rm -rf examples/asr/speech_to_text_adapters_mha_results
# L2: OOMptimizer
L2_Speech_Estimate_Duration_Bins:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
with:
RUNNER: self-hosted-azure
SCRIPT: |
set -x
# 1D buckets [SSL, CTC]
python scripts/speech_recognition/estimate_duration_bins.py \
/home/TestData/an4_dataset/an4_train.json \
--buckets 5
# 2D buckets [CTC, RNNT, TDT] / with tokenizer
python scripts/speech_recognition/estimate_duration_bins_2d.py \
/home/TestData/an4_dataset/an4_train_lang.json \
--tokenizer /home/TestData/asr_tokenizers/canary/en/tokenizer_spe_bpe_v1024_max_4/tokenizer.model \
--buckets 5 \
--sub-buckets 2
# TODO(pzelasko): Figure out how to quote the value in the test properly for CI to accept it...
# 2D buckets with prompt [AED/Canary, SpeechLM] / with aggregate tokenizer + prompt format
# python scripts/speech_recognition/estimate_duration_bins_2d.py \
# /home/TestData/an4_dataset/an4_train_lang.json \
# --tokenizer /home/TestData/asr_tokenizers/canary/canary_spl_tokenizer_v32/tokenizer.model \
# /home/TestData/asr_tokenizers/canary/en/tokenizer_spe_bpe_v1024_max_4/tokenizer.model \
# /home/TestData/asr_tokenizers/canary/es/tokenizer_spe_bpe_v1024_max_4/tokenizer.model \
# --langs spl_tokens en es \
# --prompt-format canary \
# --prompt '[{"role":"user","slots":{"source_lang":"en","target_lang":"en","task":"asr","pnc":"yes"}}]' \
# --buckets 5 \
# --sub-buckets 2
# L2: OOMptimizer
L2_Speech_Batch_Size_OOMptimizer:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
with:
RUNNER: self-hosted-azure
SCRIPT: |
# 1D bucketing
python scripts/speech_recognition/oomptimizer.py \
-c /home/TestData/oomptimizer/fast-conformer_ctc_bpe.yaml \
-m nemo.collections.asr.models.EncDecCTCModelBPE \
-b "[5.0,10.0]"
# 2D bucketing
python scripts/speech_recognition/oomptimizer.py \
-c /home/TestData/oomptimizer/fast-conformer_ctc_bpe.yaml \
-m nemo.collections.asr.models.EncDecCTCModelBPE \
-b "[[5.0,30],[5.0,45],[10.0,57],[10.0,71]]"
# L2: OOMptimizer Canary (has a different batch schema)
L2_Speech_Batch_Size_OOMptimizer_Canary:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
with:
RUNNER: self-hosted-azure
SCRIPT: |
python scripts/speech_recognition/oomptimizer.py \
-c /home/TestData/oomptimizer/fast-conformer_aed.yaml \
-m nemo.collections.asr.models.EncDecMultiTaskModel \
-b "[[5.0,30],[5.0,45],[10.0,57],[10.0,71]]"
# L2: Speech Transcription
L2_Speech_Transcription_Speech_to_Text_Transcribe:
needs: [cicd-test-container-setup]
Expand Down
155 changes: 38 additions & 117 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -1,119 +1,69 @@
name: "NeMo Code release"

on:
issue_comment:
types: [created]

workflow_dispatch:
inputs:
branch:
description: Branch to release
required: true
type: string
jobs:
main:
if: >
github.event_name == 'issue_comment' &&
github.event.issue.pull_request &&
startsWith(github.event.comment.body, '/release-please') &&
contains(fromJSON('["ko3n1g"]'), github.actor)
if: contains(fromJSON('["ko3n1g"]'), github.actor)
runs-on: ubuntu-latest
environment:
name: main
steps:
- name: Update PR issue comment
shell: bash
env:
message: ${{ github.event.comment.body }}
run: |
message="$message
---
Releasebot 🤖: Release processes started...
"
message="${message//$'\n'/<br>}"
curl -L \
-X PATCH \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }} \
-d '{"body":"'"$message"'"}'
- name: Get PR number
shell: bash
id: get-pr-num
run: |
PR_URL="${{ github.event.issue.pull_request.url }}"
PR_NUM=${PR_URL##*/}
echo "pr_number=$PR_NUM" >> $GITHUB_OUTPUT
- name: Get Pull Request Information
uses: actions/github-script@v6
id: get-pr-branch
with:
result-encoding: string
script: |
const pr = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: ${{ steps.get-pr-num.outputs.pr_number }}
});
console.log('Pull Request Information:', pr.data);
return pr.data.head.ref;

- name: Checkout repository
uses: actions/checkout@v4
with:
path: ${{ github.run_id }}
ref: ${{ steps.get-pr-branch.outputs.result }}
ref: ${{ inputs.branch }}

- name: Get version number
- name: Create release
id: version-number
run: |
cd ${{ github.run_id }}
VERSION=$(python -c "import nemo; print(nemo.__version__)")
echo "VERSION=$VERSION" >> "$GITHUB_OUTPUT"
- name: Extract changelog
id: extract-changelog
uses: peter-evans/find-comment@v3
with:
issue-number: ${{ steps.get-pr-num.outputs.pr_number }}
body-includes: '# Detailed Changelogs'

- name: Extract summary
id: extract-summary
uses: peter-evans/find-comment@v3
with:
issue-number: ${{ steps.get-pr-num.outputs.pr_number }}
body-includes: '# Highlights'

- name: Create Release doc
id: create-release-doc
env:
SUMMARY: ${{ steps.extract-summary.outputs.comment-body }}
CHANGELOG: ${{ steps.extract-changelog.outputs.comment-body }}
run: |
echo "TITLE<<EOF" >> $GITHUB_ENV
echo "NVIDIA Neural Modules ${{ steps.version-number.outputs.VERSION }}" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
NAME="NVIDIA Neural Modules ${VERSION}"
CHANGELOG=$(awk '/^## '"$NAME"'/{flag=1; next} /^## /{flag=0} flag' CHANGELOG.md)
CHANGELOG=$(echo "$CHANGELOG" | sed '/./,$!d' | sed ':a;N;$!ba;s/\n$//')
PAYLOAD=$(jq \
-n \
-c \
--arg CI_COMMIT_BRANCH "${{ inputs.branch }}" \
--arg NAME "$NAME" \
--arg BODY "$CHANGELOG" \
'{
"tag_name": $CI_COMMIT_BRANCH,
"target_commitish": $CI_COMMIT_BRANCH,
"name": $NAME,
"body": $BODY,
"draft": false,
"prerelease": false,
"generate_release_notes": false
}'
)
echo "BODY<<EOF" >> $GITHUB_ENV
echo "$SUMMARY" >> $GITHUB_ENV
echo "$CHANGELOG" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
- name: Create Release
uses: softprops/action-gh-release@v2
with:
name: ${{ env.TITLE }}
tag_name: ${{ steps.version-number.outputs.VERSION }}
body: ${{ env.BODY }}
curl -L \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.PAT }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/NVIDIA/NeMo/releases \
-d "$PAYLOAD"
- name: Build, test, and release wheel
env:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
run: |
cd ${{ github.run_id }}
EXPECTED_VERSION=$(python -c 'import nemo; print(nemo.__version__)')
python3 -m pip install --upgrade build
python3 -m build
Expand All @@ -122,7 +72,6 @@ jobs:
cd ../
INSTALLED_VERSION=$(python -c 'import nemo; print(nemo.__version__)')
EXPECTED_VERSION=${{ steps.version-number.outputs.VERSION }}
if [[ "$INSTALLED_VERSION" != "$EXPECTED_VERSION" ]]; then
echo 'Wheel has an outdated version, mission abort immediately!'
Expand All @@ -134,34 +83,6 @@ jobs:
python3 -m pip install --upgrade twine
python3 -m twine upload --repository pypi dist/*
- name: Update PR issue comment
shell: bash
env:
message: ${{ github.event.comment.body }}
run: |
message="$message
---
Releasebot 🤖: Release done 🎉
"
message="${message//$'\n'/<br>}"
curl -L \
-X PATCH \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }} \
-d '{"body":"'"$message"'"}'
- name: Close Pull
run: |
cd ${{ github.run_id }}
gh pr close --comment "Releasebot 🤖: Closing PR" "${{ steps.get-pr-num.outputs.pr_number }}"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: notify
run: |
MESSAGE='{
Expand Down
Loading

0 comments on commit 374c584

Please sign in to comment.