From 89e031dcde81e4a104c31c9e332fc70b4e5f58d3 Mon Sep 17 00:00:00 2001 From: "Alexey Kondratiev(AMD)" <143633163+alexeykondrat@users.noreply.github.com> Date: Tue, 10 Sep 2024 14:51:15 -0400 Subject: [PATCH] [CI/Build] Enabling kernels tests for AMD, ignoring some of then that fail (#8130) Signed-off-by: Alvant --- .buildkite/run-amd-test.sh | 24 +++++++++++++++++++++++- .buildkite/test-pipeline.yaml | 1 + 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh index 972c62a091aea..c9b72a3264e82 100755 --- a/.buildkite/run-amd-test.sh +++ b/.buildkite/run-amd-test.sh @@ -71,13 +71,35 @@ mkdir -p ${HF_CACHE} HF_MOUNT="/root/.cache/huggingface" commands=$@ +echo "Commands:$commands" +#ignore certain kernels tests +if [[ $commands == *" kernels "* ]]; then + commands="${commands} \ + --ignore=kernels/test_attention.py \ + --ignore=kernels/test_attention_selector.py \ + --ignore=kernels/test_blocksparse_attention.py \ + --ignore=kernels/test_causal_conv1d.py \ + --ignore=kernels/test_cutlass.py \ + --ignore=kernels/test_encoder_decoder_attn.py \ + --ignore=kernels/test_flash_attn.py \ + --ignore=kernels/test_flashinfer.py \ + --ignore=kernels/test_int8_quant.py \ + --ignore=kernels/test_machete_gemm.py \ + --ignore=kernels/test_mamba_ssm.py \ + --ignore=kernels/test_marlin_gemm.py \ + --ignore=kernels/test_prefix_prefill.py \ + --ignore=kernels/test_rand.py \ + --ignore=kernels/test_sampler.py" +fi + PARALLEL_JOB_COUNT=8 # check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs. if [[ $commands == *"--shard-id="* ]]; then for GPU in $(seq 0 $(($PARALLEL_JOB_COUNT-1))); do #replace shard arguments - commands=${@//"--shard-id= "/"--shard-id=${GPU} "} + commands=${commands//"--shard-id= "/"--shard-id=${GPU} "} commands=${commands//"--num-shards= "/"--num-shards=${PARALLEL_JOB_COUNT} "} + echo "Shard ${GPU} commands:$commands" docker run \ --device /dev/kfd --device /dev/dri \ --network host \ diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index a0c7b7442b3b3..e4f70c5d4920a 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -228,6 +228,7 @@ steps: parallelism: 4 - label: Kernels Test %N # 30min each + mirror_hardwares: [amd] source_file_dependencies: - csrc/ - vllm/attention