Skip to content

Commit

Permalink
#7502: Consolidate all FD nightly tests into one job
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-rkim authored and dimitri-tenstorrent committed Apr 30, 2024
1 parent dd708bb commit 819b8d2
Showing 1 changed file with 13 additions and 120 deletions.
133 changes: 13 additions & 120 deletions .github/workflows/fast-dispatch-full-regressions-and-models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,23 @@ jobs:
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
fd-models-common:
fd-nightly:
needs: build-artifact
name: Common Models - ${{ matrix.arch }}
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
arch: [grayskull, wormhole_b0]
test-group:
[
{ name: "Common models GS", arch: grayskull, cmd: tests/scripts/nightly/run_models_2.sh },
{ name: "Common models WH B0", arch: wormhole_b0, cmd: tests/scripts/nightly/run_models_2.sh },
{ name: "GS-only models and ttnn nightly", arch: grayskull, cmd: tests/scripts/nightly/run_models_1.sh },
{ name: "WH-only models", arch: wormhole_b0, cmd: tests/scripts/nightly/run_models_3.sh },
{ name: "API tests GS", arch: grayskull, cmd: ./tests/scripts/run_tests.sh --tt-arch grayskull --pipeline-type frequent_api --dispatch-mode fast },
{ name: "API tests WH B0", arch: wormhole_b0, cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast },
]
name: FD ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }}
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
Expand All @@ -42,125 +50,10 @@ jobs:
run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run frequent reg tests scripts
timeout-minutes: 40
timeout-minutes: 90
run: |
source ${{ github.workspace }}/python_env/bin/activate
pip install tox
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
VENV_DIR=${{ github.workspace }}/python_env TT_ARCH=${{ matrix.arch }} FREQUENT_TYPE=${{ matrix.frequent-type }} tox -e nightly_fast_dispatch
fd-models-ttnn-gs-only:
needs: build-artifact
name: Grayskull only models and ttnn integration tests
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
arch: [grayskull]
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.arch }}
CONFIG: ci
LOGURU_LEVEL: INFO
runs-on: model-runner-${{ matrix.arch }}
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
- name: Ensure weka mount is active
run: |
sudo systemctl restart mnt-MLPerf.mount
sudo /etc/rc.local
ls -al /mnt/MLPerf/bit_error_tests
- name: Set up dyanmic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: actions/download-artifact@v4
with:
name: TTMetal_build_${{ matrix.arch }}
- name: Extract files
run: tar -xvf ttm_${{ matrix.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run frequent reg tests scripts
timeout-minutes: 165
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
tests/scripts/nightly/run_models_1.sh
models-whb0-only:
needs: build-artifact
name: WH B0 only models
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
arch: [wormhole_b0]
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.arch }}
CONFIG: ci
LOGURU_LEVEL: INFO
runs-on: model-runner-${{ matrix.arch }}
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
- name: Ensure weka mount is active
run: |
sudo systemctl restart mnt-MLPerf.mount
sudo /etc/rc.local
ls -al /mnt/MLPerf/bit_error_tests
- name: Set up dyanmic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: actions/download-artifact@v4
with:
name: TTMetal_build_${{ matrix.arch }}
- name: Extract files
run: tar -xvf ttm_${{ matrix.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run frequent reg tests scripts
timeout-minutes: 165
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
tests/scripts/nightly/run_models_3.sh
fd-frequent-api-tests:
needs: build-artifact
name: ${{ matrix.frequent-type }} ${{ matrix.arch }} tests
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
arch: [grayskull, wormhole_b0]
frequent-type: [api]
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.arch }}
CONFIG: ci
LOGURU_LEVEL: INFO
runs-on: model-runner-${{ matrix.arch }}
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
- name: Ensure weka mount is active
run: |
sudo systemctl restart mnt-MLPerf.mount
sudo /etc/rc.local
ls -al /mnt/MLPerf/bit_error_tests
- name: Set up dyanmic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: actions/download-artifact@v4
with:
name: TTMetal_build_${{ matrix.arch }}
- name: Extract files
run: tar -xvf ttm_${{ matrix.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run frequent reg tests scripts
timeout-minutes: 75
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
./tests/scripts/run_tests.sh --tt-arch ${{ matrix.arch }} --pipeline-type frequent_${{ matrix.frequent-type }} --dispatch-mode fast
${{ matrix.test-group.cmd }}

0 comments on commit 819b8d2

Please sign in to comment.