Skip to content

ttnn - Run sweeps #1690

ttnn - Run sweeps

ttnn - Run sweeps #1690

name: "ttnn - Run sweeps"
on:
workflow_dispatch:
inputs:
sweep_name:
type: choice
description: "Which sweep module to run?"
required: true
default: "ALL SWEEPS (Nightly)"
options:
- ALL SWEEPS (Nightly)
- add
- tilize
- untilize
- ccl.line_all_gather
- ccl.all_gather_n300
- ccl.all_gather_n300_focused
- creation.zeros.zeros
- creation.empty.empty
- creation.zeros_like.zeros_like
- eltwise.unary.abs.abs_pytorch2
- eltwise.unary.relu.relu
- eltwise.unary.relu.relu_pytorch2
- eltwise.unary.gelu.gelu
- eltwise.unary.gelu.gelu_pytorch2
- eltwise.unary.hardsigmoid.hardsigmoid_pytorch2
- eltwise.unary.leaky_relu.leaky_relu_pytorch2
- eltwise.unary.abs.abs
- eltwise.unary.cos.cos
- eltwise.unary.cos.cos_pytorch2
- eltwise.unary.sin.sin
- eltwise.unary.sin.sin_pytorch2
- eltwise.unary.tril.tril_pytorch2
- eltwise.unary.clamp.clamp
- eltwise.unary.clamp.clamp_pytorch2
- eltwise.unary.clamp.clamp_min_pytorch2
- eltwise.unary.clip.clip
- eltwise.unary.cbrt.cbrt
- eltwise.unary.rsub.rsub
- eltwise.unary.rsub.rsub_pytorch2
- eltwise.unary.rsqrt.rsqrt_pytorch2
- eltwise.unary.rdiv.rdiv
- eltwise.unary.frac.frac
- eltwise.unary.ceil.ceil
- eltwise.unary.ceil.ceil_pytorch2
- eltwise.unary.trunc.trunc
- eltwise.unary.floor.floor
- eltwise.unary.floor.floor_pytorch2
- eltwise.unary.clone.clone
- eltwise.unary.elu.elu
- eltwise.unary.elu.elu_pytorch2
- eltwise.unary.erfc.erfc
- eltwise.unary.exp.exp
- eltwise.unary.exp.exp_pytorch2
- eltwise.unary.exp2.exp2
- eltwise.unary.expm1.expm1
- eltwise.unary.tanh.tanh
- eltwise.unary.tanh.tanh_pytorch2
- eltwise.unary.atanh.atanh
- eltwise.unary.atan.atan
- eltwise.unary.sign.sign
- eltwise.unary.rad2deg.rad2deg
- eltwise.unary.deg2rad.deg2rad
- eltwise.unary.relu6.relu6
- eltwise.unary.log.log
- eltwise.unary.log.log_pytorch2
- eltwise.unary.log1p.log1p
- eltwise.unary.log2.log2
- eltwise.unary.log10.log10
- eltwise.unary.bitwise.bitwise_and
- eltwise.unary.bitwise.bitwise_left_shift
- eltwise.unary.bitwise.bitwise_not
- eltwise.unary.bitwise.bitwise_not_pytorch2
- eltwise.unary.bitwise.bitwise_or
- eltwise.unary.bitwise.bitwise_right_shift
- eltwise.unary.bitwise.bitwise_xor
- eltwise.unary.log_sigmoid.log_sigmoid
- eltwise.unary.logical_not.logical_not_
- eltwise.unary.logical_not.logical_not
- eltwise.unary.logical_not.logical_not_output
- eltwise.unary.logical_not.logical_not_pytorch2
- eltwise.unary.neg.neg_pytorch2
- eltwise.unary.erf.erf
- eltwise.unary.erfinv.erfinv
- eltwise.unary.i0.i0
- eltwise.unary.silu.silu
- eltwise.unary.silu.silu_pytorch2
- eltwise.unary.glu.glu
- eltwise.unary.geglu.geglu
- eltwise.unary.swiglu.swiglu
- eltwise.unary.sigmoid.sigmoid
- eltwise.unary.sigmoid.sigmoid_pytorch2
- eltwise.unary.sigmoid_accurate.sigmoid_accurate
- eltwise.unary.tril.tril
- eltwise.unary.triu.triu
- eltwise.unary.normalize_hw.normalize_hw
- eltwise.unary.normalize_global.normalize_global
- eltwise.unary.heaviside.heaviside
- eltwise.unary.hardtanh.hardtanh
- eltwise.unary.hardswish.hardswish
- eltwise.unary.hardsigmoid.hardsigmoid
- eltwise.unary.hardshrink.hardshrink
- eltwise.unary.softmax.softmax
- eltwise.unary.identity.identity
- eltwise.unary.neg.neg
- eltwise.unary.sinh.sinh
- eltwise.unary.asinh.asinh
- eltwise.unary.cosh.cosh
- eltwise.unary.relu_min.relu_min
- eltwise.unary.relu_max.relu_max
- eltwise.unary.softplus.softplus
- eltwise.unary_backward.fill_zero_bw
- eltwise.unary_backward.log_sigmoid_bw
- eltwise.unary_backward.logit_bw
- eltwise.unary_backward.neg_bw
- eltwise.unary_backward.hardshrink_bw
- eltwise.unary_backward.softshrink_bw
- eltwise.unary_backward.acos_bw.acos_bw
- eltwise.unary_backward.acosh_bw.acosh_bw
- eltwise.unary_backward.atan_bw.atan_bw
- eltwise.unary_backward.cos_bw.cos_bw
- eltwise.unary_backward.frac_bw.frac_bw
- eltwise.unary_backward.i0_bw.i0_bw
- eltwise.unary_backward.rad2deg_bw.rad2deg_bw
- eltwise.unary_backward.relu_bw.relu_bw
- eltwise.unary_backward.rsqrt_bw.rsqrt_bw
- eltwise.unary_backward.sigmoid_bw.sigmoid_bw
- eltwise.unary_backward.tan_bw.tan_bw
- eltwise.unary_backward.trunc_bw.trunc_bw
- eltwise.unary_backward.clamp_bw.clamp_bw
- eltwise.unary_backward.hardtanh_bw.hardtanh_bw
- eltwise.unary_backward.mul_bw.mul_bw
- eltwise.unary_backward.softplus_bw.softplus_bw
- eltwise.unary_backward.threshold_bw.threshold_bw
- eltwise.unary_backward.div_bw.div_bw
- eltwise.unary_backward.log_bw.log_bw
- eltwise.unary_backward.relu6_bw.relu6_bw
- eltwise.unary_backward.log10_bw.log10_bw
- eltwise.unary_backward.abs_bw.abs_bw
- eltwise.unary_backward.sinh_bw.sinh_bw
- eltwise.unary_backward.sin_bw.sin_bw
- eltwise.unary_backward.square_bw.square_bw
- eltwise.unary_backward.rdiv_bw.rdiv_bw
- eltwise.unary_backward.bias_gelu_bw.bias_gelu_bw
- eltwise.unary_backward.pow_bw.pow_bw
- eltwise.unary_backward.exp_bw.exp_bw
- eltwise.unary_backward.tanh_bw.tanh_bw
- eltwise.unary_backward.sqrt_bw.sqrt_bw
- eltwise.unary_backward.add_bw.add_bw
- eltwise.unary_backward.assign_bw.assign_bw
- eltwise.unary_backward.fill_bw.fill_bw
- eltwise.unary_backward.hardsigmoid_bw.hardsigmoid_bw
- eltwise.unary_backward.lgamma_bw.lgamma_bw
- eltwise.unary_backward.multigammaln_bw.multigammaln_bw
- eltwise.unary_backward.leaky_relu_bw.leaky_relu_bw
- eltwise.unary_backward.elu_bw.elu_bw
- eltwise.unary_backward.celu_bw.celu_bw
- eltwise.unary_backward.selu_bw.selu_bw
- eltwise.unary_backward.silu_bw.silu_bw
- eltwise.unary_backward.floor_bw.floor_bw
- eltwise.unary_backward.round_bw.round_bw
- eltwise.unary_backward.tanhshrink_bw.tanhshrink_bw
- eltwise.unary_backward.hardswish_bw.hardswish_bw
- eltwise.unary_backward.rpow_bw.rpow_bw
- eltwise.unary_complex.conj
- eltwise.unary_complex.reciprocal
- eltwise.unary_complex.reciprocal_bw
- eltwise.binary_complex.div_bw.div_bw
- eltwise.binary_complex.add_bw.add_bw
- eltwise.unary.lgamma
- eltwise.unary.logit
- eltwise.unary.mish
- eltwise.unary.multigammaln
- eltwise.unary.isfinite
- eltwise.unary.isinf
- eltwise.unary.isnan
- eltwise.unary.isneginf
- eltwise.unary.isposinf
- eltwise.binary.add.add_all_pytorch2
- eltwise.binary.add.add_different_memory_configs
- eltwise.unary.gtz.gtz
- eltwise.unary.ltz.ltz
- eltwise.unary.gez.gez
- eltwise.unary.lez.lez
- eltwise.unary.nez.nez
- eltwise.unary.prelu.prelu
- eltwise.unary.hardswish.hardswish_pytorch2
- eltwise.unary.hardtanh.hardtanh_pytorch2
- eltwise.unary.leaky_relu.leaky_relu
- eltwise.unary.reglu.reglu
- eltwise.unary_complex.polar.polar
- eltwise.unary_complex.angle.angle
- eltwise.unary_complex.polar_bw.polar_bw
- eltwise.unary_complex.angle_bw.angle_bw
- eltwise.binary.subtract.subtract
- eltwise.binary.subtract.subtract_tensor_pytorch2
- eltwise.binary.multiply.multiply
- eltwise.binary.multiply.mul_tensor_pytorch2
- eltwise.binary.multiply.multiply_scalar_pytorch2
- eltwise.binary.div.div
- eltwise.binary.div.div_tensor_pytorch2
- eltwise.binary.div_no_nan.div_no_nan
- eltwise.binary.logical_or.logical_or_
- eltwise.binary.logical_or.logical_or
- eltwise.binary.logical_or.logical_or_output
- eltwise.binary.logical_xor.logical_xor_
- eltwise.binary.logical_xor.logical_xor
- eltwise.binary.logical_and.logical_and_
- eltwise.binary.logical_and.logical_and
- eltwise.binary.logical_and.logical_and_output
- eltwise.binary.polyval.polyval
- eltwise.binary.remainder.remainder
- eltwise.binary.squared_difference.squared_difference
- eltwise.binary.squared_difference_output.squared_difference_output
- eltwise.binary.remainder.remainder_scalar_pytorch2
- eltwise.binary.bcast.bcast_h_sharded
- eltwise.binary.bcast.bcast
- eltwise.binary.eq.eq_scalar_pytorch2
- eltwise.binary.gt.gt_scalar_pytorch2
- eltwise.binary.le.le_tensor_pytorch2
- eltwise.binary.fmod.fmod
- eltwise.binary.floor_divide.floor_divide_pytorch2
- eltwise.binary.logaddexp.logaddexp
- eltwise.binary.logaddexp2.logaddexp2
- eltwise.binary.ldexp.ldexp
- eltwise.binary.lt.lt_tensor_pytorch2
- eltwise.binary.lt.lt_scalar_pytorch2
- eltwise.binary.ne.ne_scalar_pytorch2
- eltwise.binary.hypot.hypot
- eltwise.binary.xlogy.xlogy
- eltwise.binary_backward.ldexp_bw
- eltwise.binary_backward.logaddexp_bw
- eltwise.binary_backward.logaddexp2_bw
- eltwise.binary_backward.embedding_bw.embedding_bw
- eltwise.binary_backward.addalpha_bw.addalpha_bw
- eltwise.binary_backward.subalpha_bw.subalpha_bw
- eltwise.binary_backward.xlogy_bw.xlogy_bw
- eltwise.binary_backward.hypot_bw.hypot_bw
- eltwise.binary_backward.add_bw.add_bw
- eltwise.binary_backward.sub_bw.sub_bw
- eltwise.binary_backward.mul_bw.mul_bw
- eltwise.binary_backward.div_bw.div_bw
- eltwise.binary_backward.fmod_bw.fmod_bw
- eltwise.binary_backward.remainder_bw.remainder_bw
- eltwise.binary_backward.rsub_bw.rsub_bw
- eltwise.binary_backward.squared_difference_bw.squared_difference_bw
- eltwise.composite.binary.addalpha.addalpha
- eltwise.composite.binary.subalpha.subalpha
- eltwise.composite.binary.minimum.minimum
- eltwise.composite.binary.minimum.minimum_pytorch2
- eltwise.composite.binary.maximum.maximum
- eltwise.composite.binary.maximum.maximum_pytorch2
- eltwise.composite.binary.pow.pow_pytorch2
- eltwise.composite.binary.pow.pow_scalar_pytorch2
- eltwise.composite.binary.pow.pow_tensor_pytorch2
- eltwise.ternary.addcmul.addcmul
- eltwise.ternary.addcdiv.addcdiv
- eltwise.ternary.mac.mac
- eltwise.ternary.lerp.lerp
- eltwise.ternary.where.where
- eltwise.ternary.where.where_pytorch2
- eltwise.ternary_backward.addcmul_bw
- eltwise.ternary_backward.addcdiv_bw
- embedding.embedding
- reduction.backward.prod_bw.prod_bw
- reduction.topk.topk
- reduction.argmax.argmax
- reduction.prod
- reduction.sum
- reduction.var.var
- reduction.std.std
- reduction.mean.mean
- matmul.full.matmul_default_block_sharded
- matmul.full.matmul_default_height_sharded
- matmul.full.matmul_default_interleaved
- matmul.full.matmul_default_width_sharded
- matmul.short.matmul_create_program_config
- matmul.short.matmul_default_sharded
- matmul.short.matmul_default
- matmul.short.matmul_user_program_config_mcast_1d
- matmul.short.matmul_user_program_config_mcast_2d
- matmul.short.matmul_user_program_config
- matmul.short.matmul
- losses.l1_loss
- losses.mse_loss
- data_movement.concat.concat_interleaved_n_tensors
- data_movement.concat.concat_interleaved
- data_movement.concat.concat_sharded
- data_movement.concat.concat_pytorch2
- data_movement.slice.slice_pytorch2_rm
- data_movement.slice.slice_pytorch2_tiled
- data_movement.permute.permute
- data_movement.permute.permute_pytorch2_tiled
- data_movement.permute.permute_pytorch2_rm
- data_movement.transpose.transpose_pytorch2
- data_movement.transpose.transpose_interleaved
- data_movement.transpose.t_pytorch2
- data_movement.copy.copy
- data_movement.expand.expand_pytorch2
- data_movement.fill.fill_pytorch2
- data_movement.index_select.index_select_pytorch2
- data_movement.split.split_with_sizes_pytorch2
- data_movement.repeat.repeat
- data_movement.reshape.reshape
- data_movement.repeat_interleave.repeat_interleave
- data_movement.nonzero.nonzero
- conv2d.full.conv2d_misc
- conv2d.full.conv2d_sharding
- conv2d.full.conv2d_sliding_window
- conv2d.short.conv2d_short_sweep
- max_pool2d.short.max_pool2d_short_sweep
- transformer.concatenate_heads.concatenate_heads
- transformer.split_query_key_value_and_split_heads.split_query_key_value_and_split_heads
- transformer.split_query_key_value_and_split_heads.split_query_key_value_and_split_heads_kv_input
- data_movement.stack.stack_pytorch2
- data_movement.repeat.repeat_pytorch2
- data_movement.split.split_pytorch2
- data_movement.unsqueeze.unsqueeze_pytorch2
- data_movement.squeeze.squeeze_pytorch2
- data_movement.embedding.embedding_pytorch2
- data_movement.view.view_pytorch2
schedule:
- cron: "0 21 * * *" # This cron schedule runs the workflow at 9:00pm UTC nightly
jobs:
build-artifact:
uses: ./.github/workflows/build-artifact.yaml
secrets: inherit
ttnn-generate-sweeps:
needs: build-artifact
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: wormhole_b0
ELASTIC_USERNAME: ${{ secrets.SWEEPS_ELASTIC_USERNAME }}
ELASTIC_PASSWORD: ${{ secrets.SWEEPS_ELASTIC_PASSWORD }}
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
timeout-minutes: 30
runs-on: [build, in-service]
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: ./.github/actions/prepare-metal-run
with:
arch: wormhole_b0
- name: Run ttnn sweeps generation (single sweep)
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.sweep_name != 'ALL SWEEPS (Nightly)' }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
python tests/sweep_framework/sweeps_parameter_generator.py --module-name ${{ github.event.inputs.sweep_name }} --elastic cloud --tag ci-main --explicit
- name: Run ttnn sweeps generation (all sweeps)
if: ${{ github.event_name == 'schedule' || github.event.inputs.sweep_name == 'ALL SWEEPS (Nightly)' }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
python tests/sweep_framework/sweeps_parameter_generator.py --elastic cloud --tag ci-main --explicit
ttnn-run-sweeps:
needs: ttnn-generate-sweeps
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
test-group:
[
{
name: "Grayskull E150 Sweeps",
arch: grayskull,
runs-on: ["cloud-virtual-machine", "E150", "in-service"],
tt-smi-cmd: "tt-smi-metal -r 0"
},
{
name: "Wormhole N150 Sweeps",
arch: wormhole_b0,
runs-on: ["cloud-virtual-machine", "N150", "in-service"],
tt-smi-cmd: "tt-smi-metal -r 0"
},
{
name: "Wormhole N300 Sweeps",
arch: wormhole_b0,
runs-on: ["cloud-virtual-machine", "N300", "in-service"],
tt-smi-cmd: "tt-smi-metal -r 0"
}
]
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.test-group.arch }}
ELASTIC_USERNAME: ${{ secrets.SWEEPS_ELASTIC_USERNAME }}
ELASTIC_PASSWORD: ${{ secrets.SWEEPS_ELASTIC_PASSWORD }}
TT_SMI_RESET_COMMAND: ${{ matrix.test-group.tt-smi-cmd }}
LD_LIBRARY_PATH: ${{ github.workspace }}/build/lib
timeout-minutes: 720
runs-on: ${{ matrix.test-group.runs-on }}
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@v2.0.0
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- uses: ./.github/actions/prepare-metal-run
with:
arch: ${{ matrix.test-group.arch }}
- name: Run ttnn sweeps (single sweep)
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.sweep_name != 'ALL SWEEPS (Nightly)' }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
python tests/sweep_framework/sweeps_runner.py --module-name ${{ github.event.inputs.sweep_name }} --elastic cloud --tag ci-main
- name: Run ttnn sweeps (all sweeps, nightly)
if: ${{ github.event_name == 'schedule' || github.event.inputs.sweep_name == 'ALL SWEEPS (Nightly)' }}
run: |
source ${{ github.workspace }}/python_env/bin/activate
cd $TT_METAL_HOME
export PYTHONPATH=$TT_METAL_HOME
python tests/sweep_framework/sweeps_runner.py --elastic cloud --tag ci-main --suite-name nightly