(T3K) T3000 model perf tests #764
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "(T3K) T3000 model perf tests" | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: "0 */12 * * *" # This cron schedule runs the workflow every 12 hours | |
jobs: | |
build-artifact: | |
uses: ./.github/workflows/build-artifact.yaml | |
with: | |
arch: '["wormhole_b0"]' | |
secrets: inherit | |
t3000-model-perf-tests: | |
needs: build-artifact | |
strategy: | |
fail-fast: false | |
matrix: | |
test-group: [ | |
{ name: "t3k LLM falcon7b model perf tests", model: "falcob7b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic | |
{ name: "t3k LLM mixtral model perf tests", model: "mixtral", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 75, owner_id: U03PUAKE719}, # Miguel Tairum | |
{ name: "t3k LLM llama2 model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 75, owner_id: U03FJB5TM5Y}, # Colman Glagovich | |
{ name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: U053W15B6JF}, # Djordje Ivanovic | |
{ name: "t3k CNN resnet50 model perf tests", model: "resnet50", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_resnet50_tests, timeout: 75, owner_id: U013121KDH9}, # Austin Ho | |
#{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run? | |
] | |
name: ${{ matrix.test-group.name }} | |
env: | |
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }} | |
ARCH_NAME: ${{ matrix.test-group.arch }} | |
LOGURU_LEVEL: INFO | |
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib | |
environment: dev | |
runs-on: ["arch-wormhole_b0", "config-t3000", "in-service", "pipeline-perf"] | |
steps: | |
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0 | |
- name: Enable performance mode | |
run: | | |
sudo cpupower frequency-set -g performance | |
- name: Ensure weka mount is active | |
run: | | |
sudo systemctl restart mnt-MLPerf.mount | |
sudo /etc/rc.local | |
ls -al /mnt/MLPerf/bit_error_tests | |
- name: Set up dynamic env vars for build | |
run: | | |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV | |
echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV | |
- uses: actions/download-artifact@v4 | |
with: | |
name: TTMetal_build_${{ matrix.test-group.arch }} | |
- name: Extract files | |
run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar | |
- uses: ./.github/actions/install-python-deps | |
- name: Run model perf regression tests | |
shell: bash {0} | |
timeout-minutes: ${{ matrix.test-group.timeout }} | |
run: | | |
source ${{ github.workspace }}/python_env/bin/activate | |
cd $TT_METAL_HOME | |
export PYTHONPATH=$TT_METAL_HOME | |
source ${{ github.workspace }}/tests/scripts/t3000/run_t3000_model_perf_tests.sh | |
${{ matrix.test-group.cmd }} | |
env python models/perf/merge_perf_results.py | |
- name: Check perf report exists | |
id: check-perf-report | |
if: ${{ !cancelled() }} | |
run: | | |
ls -hal | |
export PERF_REPORT_FILENAME="Models_Perf_$(date +%Y_%m_%d).csv" | |
ls -hal $PERF_REPORT_FILENAME | |
echo "perf_report_filename=$PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT" | |
- name: Upload perf report | |
if: ${{ !cancelled() && steps.check-perf-report.conclusion == 'success' }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: perf-report-csv-${{ matrix.test-group.model-type }}-${{ matrix.test-group.arch }}-${{ matrix.test-group.model }}-bare-metal | |
path: "${{ steps.check-perf-report.outputs.perf_report_filename }}" | |
- uses: ./.github/actions/slack-report | |
if: ${{ failure() }} | |
with: | |
slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }} | |
owner: ${{ matrix.test-group.owner_id }} | |
- name: Disable performance mode | |
if: always() | |
run: | | |
sudo cpupower frequency-set -g ondemand |