Skip to content

Commit

Permalink
Merge branch 'main' into nkpatel/upsample2d-generalization-remove-res…
Browse files Browse the repository at this point in the history
…trictions-input_width
  • Loading branch information
nkpatel-tt authored Dec 13, 2024
2 parents fdb514b + f47602f commit b677eea
Show file tree
Hide file tree
Showing 87 changed files with 3,098 additions and 1,362 deletions.
29 changes: 25 additions & 4 deletions .github/workflows/build-and-unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,29 @@ on:
default: "ubuntu-20.04"

jobs:
unit-tests-slow-dispatch:
name: ${{ inputs.arch }} ${{ inputs.runner-label }}
cpp-unit-tests-slow-dispatch:
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
test-group: [
{name: All C++, cmd: ./tests/scripts/run_cpp_unit_tests.sh},
{name: tools, cmd: ./tests/scripts/run_tools_tests.sh},

{name: user kernel path, cmd: "rm -rf /tmp/kernels && TT_METAL_KERNEL_PATH=/tmp/kernels ./build/test/tt_metal/unit_tests_api --gtest_filter=CompileProgramWithKernelPathEnvVarFixture.*"},
{name: api, cmd: "./build/test/tt_metal/unit_tests_api"},
{name: debug_tools, cmd: "./build/test/tt_metal/unit_tests_debug_tools"},
{name: device, cmd: "./build/test/tt_metal/unit_tests_device"},
{name: dispatch, cmd: "./build/test/tt_metal/unit_tests_dispatch"},
{name: eth, cmd: "./build/test/tt_metal/unit_tests_eth"},
{name: llk, cmd: "./build/test/tt_metal/unit_tests_llk"},
{name: stl, cmd: "./build/test/tt_metal/unit_tests_stl"},
{name: distributed, cmd: "./build/test/tt_metal/distributed/distributed_unit_tests --gtest_filter=MeshDeviceSuite.*"},

{name: FD2, cmd: ./tests/scripts/run_cpp_fd2_tests.sh},
]
name: ${{ inputs.arch }} ${{ inputs.runner-label }} ${{ matrix.test-group.name }}
runs-on:
- ${{ inputs.runner-label }}
- cloud-virtual-machine
Expand All @@ -58,7 +79,7 @@ jobs:
- uses: ./.github/actions/prepare-metal-run
with:
arch: ${{ inputs.arch }}
- name: Run pre/post regression tests
- name: ${{ matrix.test-group.name }} tests
timeout-minutes: ${{ inputs.timeout }}
uses: ./.github/actions/docker-run
with:
Expand All @@ -73,7 +94,7 @@ jobs:
pip install --force-reinstall pip==21.2.4
pip install -r tt_metal/python_env/requirements-dev.txt
pip install -e .
./tests/scripts/run_tests.sh --tt-arch ${{ inputs.arch }} --pipeline-type post_commit --dispatch-mode slow
${{ matrix.test-group.cmd }}
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
Expand Down
27 changes: 18 additions & 9 deletions .github/workflows/cpp-post-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:
timeout:
required: false
type: number
default: 80
default: 35
os:
required: false
type: string
Expand All @@ -37,39 +37,48 @@ on:
timeout:
required: false
type: number
default: 60
default: 35
os:
required: false
type: string
default: "ubuntu-20.04"

jobs:
models:
cpp-unit-tests:
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
test-group: [
{name: C++, cmd: ./tests/scripts/run_cpp_unit_tests.sh},
{name: All C++, cmd: ./tests/scripts/run_cpp_unit_tests.sh},
{name: tools, cmd: ./tests/scripts/run_tools_tests.sh},

{name: user kernel path, cmd: "rm -rf /tmp/kernels && TT_METAL_KERNEL_PATH=/tmp/kernels ./build/test/tt_metal/unit_tests_api --gtest_filter=CompileProgramWithKernelPathEnvVarFixture.*"},
{name: api, cmd: "./build/test/tt_metal/unit_tests_api"},
{name: debug_tools, cmd: "./build/test/tt_metal/unit_tests_debug_tools"},
{name: device, cmd: "./build/test/tt_metal/unit_tests_device"},
{name: dispatch, cmd: "./build/test/tt_metal/unit_tests_dispatch"},
{name: eth, cmd: "./build/test/tt_metal/unit_tests_eth"},
{name: llk, cmd: "./build/test/tt_metal/unit_tests_llk"},
{name: stl, cmd: "./build/test/tt_metal/unit_tests_stl"},
{name: distributed, cmd: "./build/test/tt_metal/distributed/distributed_unit_tests --gtest_filter=MeshDeviceSuite.*"},

{name: dispatch multicmd queue, cmd: "TT_METAL_GTEST_NUM_HW_CQS=2 ./build/test/tt_metal/unit_tests_dispatch --gtest_filter=MultiCommandQueue*Fixture.*"},

{name: ttnn cpp tests, cmd: ./build/test/ttnn/unit_tests_ttnn},
{name: ttnn ccl cpp unit tests, cmd: ./build/test/ttnn/unit_tests_ttnn_ccl},
]
name: ${{ matrix.test-group.name }} ${{ inputs.arch }} ${{ inputs.runner-label }}
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ inputs.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
runs-on:
- ${{ inputs.runner-label }}
- cloud-virtual-machine
- in-service
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: ./.github/actions/prepare-metal-run
with:
arch: ${{ inputs.arch }}
Expand Down
59 changes: 59 additions & 0 deletions .github/workflows/fast-dispatch-frequent-tests-impl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: "[internal] Fast dispatch frequent tests impl"

on:
workflow_call:
inputs:
os:
required: false
type: string
default: "ubuntu-20.04"

jobs:
fd-frequent:
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
test-group:
[
{
name: "WH N300 pgm dispatch nightly",
arch: wormhole_b0,
runs-on: ["cloud-virtual-machine", "N300", "in-service"],
cmd: ./tests/tt_metal/tt_metal/perf_microbenchmark/dispatch/compare_pgm_dispatch_perf_ci.sh,
timeout: 10
},
]
name: ${{ matrix.test-group.name }}
env:
LOGURU_LEVEL: INFO
runs-on: ${{ matrix.test-group.runs-on }}
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- uses: ./.github/actions/prepare-metal-run
with:
arch: ${{ matrix.test-group.arch }}
- name: ${{ matrix.test-group.name }} tests
timeout-minutes: ${{ matrix.test-group.timeout }}
uses: ./.github/actions/docker-run
with:
docker_os_arch: tt-metalium/${{ inputs.os }}-amd64
docker_password: ${{ secrets.GITHUB_TOKEN }}
docker_opts: |
-e TT_METAL_HOME=${{ github.workspace }}
-e ARCH_NAME=${{ matrix.test-group.arch }}
-e LD_LIBRARY_PATH=${{ github.workspace }}/build/lib
run_args: |
${{ matrix.test-group.cmd }}
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
owner: U01Q0T3J3D0 # Paul Keller
- uses: ./.github/actions/upload-artifact-with-job-uuid
if: ${{ !cancelled() }}
with:
path: |
generated/test_reports/
prefix: "test_reports_"
19 changes: 19 additions & 0 deletions .github/workflows/fast-dispatch-frequent-tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: "(Single-card) Fast dispatch frequent tests"

on:
workflow_dispatch:
workflow_call:
schedule:
- cron: "0 */4 * * *"
push:
branches:
- "rkim/0-new-fd-frequent"

jobs:
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
fd-nightly:
needs: build-artifact
uses: ./.github/workflows/fast-dispatch-frequent-tests-impl.yaml
secrets: inherit
48 changes: 48 additions & 0 deletions .github/workflows/tg-demo-tests-impl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: "[internal] TG demo tests impl"

on:
workflow_call:

jobs:
tg-demo-tests:
strategy:
fail-fast: false
matrix:
test-group: [
{ name: "TG Llama3 demo tests", arch: wormhole_b0, model: llama3, timeout: 180, owner_id: U06F3ER8X9A}, # Stuti Raizada
{ name: "TG Falcon7b demo tests", arch: wormhole_b0, model: falcon7b, timeout: 120, owner_id: U05RWH3QUPM}, # Salar Hosseini
]
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
runs-on:
- arch-wormhole_b0
- config-tg
- in-service
- bare-metal
- pipeline-functional
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: actions/download-artifact@v4
with:
name: TTMetal_build_${{ matrix.test-group.arch }}
- name: Extract files
run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run demo regression tests
timeout-minutes: ${{ matrix.test-group.timeout }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type demos_tg_device --dispatch-mode "" --model ${{ matrix.test-group.model }}
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
owner: ${{ matrix.test-group.owner_id }}
38 changes: 2 additions & 36 deletions .github/workflows/tg-demo-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,39 +13,5 @@ jobs:
secrets: inherit
tg-demo-tests:
needs: build-artifact
strategy:
fail-fast: false
matrix:
test-group: [
{
name: "TG demo tests",
arch: wormhole_b0,
runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-functional"],
cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type demos_tg_device --dispatch-mode ""'
},
]
name: ${{ matrix.test-group.name }}
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
runs-on: ${{ matrix.test-group.runs-on }}
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: actions/download-artifact@v4
with:
name: TTMetal_build_${{ matrix.test-group.arch }}
- name: Extract files
run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run demo regression tests
timeout-minutes: 180
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
${{ matrix.test-group.cmd }}
secrets: inherit
uses: ./.github/workflows/tg-demo-tests-impl.yaml
27 changes: 17 additions & 10 deletions .github/workflows/tg-frequent-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,22 @@ jobs:
fail-fast: false
matrix:
test-group: [
{
name: "TG frequent tests",
arch: wormhole_b0,
runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-functional"],
cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_tg_device --dispatch-mode ""'
},
{ name: "TG Llama3 frequent tests", arch: wormhole_b0, model: llama3, timeout: 90, owner_id: U06F3ER8X9A}, # Stuti Raizada
{ name: "TG Llama3-70B (old) frequent tests", arch: wormhole_b0, model: llama3-70b-old, timeout: 90, owner_id: U03FJB5TM5Y}, #Colman Glagovich
{ name: "TG resnet50 frequent tests", arch: wormhole_b0, model: resnet50, timeout: 90, owner_id: U013121KDH9}, # Austin Ho
{ name: "TG unit/distributed frequent tests", arch: wormhole_b0, model: unit, timeout: 90, owner_id: XXXXX}, # Add owner
]
name: ${{ matrix.test-group.name }}
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
runs-on: ${{ matrix.test-group.runs-on }}
runs-on:
- arch-wormhole_b0
- config-tg
- in-service
- bare-metal
- pipeline-functional
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- name: Set up dynamic env vars for build
Expand All @@ -35,9 +37,14 @@ jobs:
run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run frequent regression tests
timeout-minutes: 90
timeout-minutes: ${{ matrix.test-group.timeout }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
${{ matrix.test-group.cmd }}
./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_tg_device --dispatch-mode "" --model ${{ matrix.test-group.model }}
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
owner: ${{ matrix.test-group.owner_id }}
26 changes: 16 additions & 10 deletions .github/workflows/tg-unit-tests-impl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ jobs:
cmd: "./build/test/umd/galaxy/unit_tests_glx"
},
]
name: ${{ matrix.test-group.name }}
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
Expand All @@ -41,20 +40,22 @@ jobs:
fail-fast: false
matrix:
test-group: [
{
name: "TG unit tests",
arch: wormhole_b0,
runs-on: ["arch-wormhole_b0", "config-tg", "in-service", "bare-metal", "pipeline-functional"],
cmd: './tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type unit_tg_device --dispatch-mode ""'
},
{ name: "TG unit tests", arch: wormhole_b0, model: unit, timeout: 30, owner_id: XXXXX}, # Add owner
{ name: "TG Llama3-small unit tests", arch: wormhole_b0, model: llama3-small, timeout: 45, owner_id: U06F3ER8X9A}, # Stuti Raizada
{ name: "TG Llama3-70b unit tests", arch: wormhole_b0, model: llama3-70b, timeout: 45, owner_id: U06F3ER8X9A}, # Stuti Raizada
]
name: ${{ matrix.test-group.name }}
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
LOGURU_LEVEL: INFO
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
runs-on: ${{ matrix.test-group.runs-on }}
runs-on:
- arch-wormhole_b0
- config-tg
- in-service
- bare-metal
- pipeline-functional
steps:
- uses: tenstorrent/tt-metal/.github/actions/checkout-with-submodule-lfs@main
- name: Set up dynamic env vars for build
Expand All @@ -67,9 +68,14 @@ jobs:
run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
- uses: ./.github/actions/install-python-deps
- name: Run unit regression tests
timeout-minutes: 45
timeout-minutes: ${{ matrix.test-group.timeout }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
${{ matrix.test-group.cmd }}
./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type unit_tg_device --dispatch-mode "" --model ${{ matrix.test-group.model }}
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
owner: ${{ matrix.test-group.owner_id }}
2 changes: 1 addition & 1 deletion .github/workflows/tg-unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
with:
arch: '["wormhole_b0"]'
secrets: inherit
TG-UMD-tests:
TG-Unit-tests:
needs: build-artifact
secrets: inherit
uses: ./.github/workflows/tg-unit-tests-impl.yaml
Loading

0 comments on commit b677eea

Please sign in to comment.