diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml index a637c4138..43f0f27a0 100644 --- a/.github/workflows/ci-llama-large-tests.yaml +++ b/.github/workflows/ci-llama-large-tests.yaml @@ -28,7 +28,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: llama-mi300x-1 + runs-on: linux-mi300-1gpu-ossci defaults: run: shell: bash diff --git a/.github/workflows/ci-sdxl.yaml b/.github/workflows/ci-sdxl.yaml index 9fff66717..b5b50e3c4 100644 --- a/.github/workflows/ci-sdxl.yaml +++ b/.github/workflows/ci-sdxl.yaml @@ -37,7 +37,7 @@ env: jobs: install-and-test: name: Install and test - runs-on: mi300x-3 + runs-on: linux-mi300-1gpu-ossci steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml index 3e6f1f9e1..c1b9502aa 100644 --- a/.github/workflows/ci-sglang-benchmark.yml +++ b/.github/workflows/ci-sglang-benchmark.yml @@ -40,7 +40,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: mi300x-3 + runs-on: linux-mi300-1gpu-ossci defaults: run: shell: bash @@ -82,7 +82,9 @@ jobs: - name: Login to huggingface continue-on-error: true - run: huggingface-cli login --token ${{ secrets.HF_TOKEN }} + run: | + pip install -U "huggingface_hub[cli]" + huggingface-cli login --token ${{ secrets.HF_TOKEN }} - name: Run Shortfin Benchmark Tests run: | @@ -101,7 +103,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: mi300x-3 + runs-on: linux-mi300-1gpu-ossci defaults: run: shell: bash @@ -187,15 +189,11 @@ jobs: needs: benchmark_sglang name: "Docker Cleanup" if: always() - runs-on: mi300x-3 + runs-on: linux-mi300-1gpu-ossci steps: - name: Stop sglang-server run: docker stop sglang-server || true # Stop container if it's running - # Deleting image after run due to large disk space requirement (83 GB) - - name: Cleanup SGLang Image - run: docker image rm lmsysorg/sglang:v0.3.5.post1-rocm620 - merge_and_upload_reports: name: "Merge and upload benchmark reports" needs: [benchmark_shortfin, benchmark_sglang] diff --git a/.github/workflows/ci-sglang-integration-tests.yml b/.github/workflows/ci-sglang-integration-tests.yml index d00269220..44f8eac0c 100644 --- a/.github/workflows/ci-sglang-integration-tests.yml +++ b/.github/workflows/ci-sglang-integration-tests.yml @@ -29,7 +29,7 @@ jobs: matrix: version: [3.11] fail-fast: false - runs-on: mi300x-3 + runs-on: linux-mi300-1gpu-ossci defaults: run: shell: bash @@ -69,7 +69,6 @@ jobs: pip install sentence_transformers pip freeze - - name: Run Integration Tests run: | source ${VENV_DIR}/bin/activate diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py index f872f9d4a..1f6bd356a 100644 --- a/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py @@ -60,7 +60,7 @@ def test_shortfin_benchmark( request, ): # TODO: Remove when multi-device is fixed - os.environ["ROCR_VISIBLE_DEVICES"] = "1" + os.environ["ROCR_VISIBLE_DEVICES"] = "0" process, port = server diff --git a/app_tests/integration_tests/llm/sglang/conftest.py b/app_tests/integration_tests/llm/sglang/conftest.py index b1741fb7d..9f2662639 100644 --- a/app_tests/integration_tests/llm/sglang/conftest.py +++ b/app_tests/integration_tests/llm/sglang/conftest.py @@ -54,7 +54,7 @@ def model_artifacts(request, tmp_path_factory): @pytest.fixture(scope="module") def start_server(request, model_artifacts): - os.environ["ROCR_VISIBLE_DEVICES"] = "1" + os.environ["ROCR_VISIBLE_DEVICES"] = "0" device_settings = request.param["device_settings"] server_config = ServerConfig(