From 6f191e9e8d7d852786f2735fc00595afbac34922 Mon Sep 17 00:00:00 2001
From: Jane Xu <janeyx@meta.com>
Date: Fri, 13 Dec 2024 12:23:36 -0800
Subject: [PATCH] Migrate optimizer userbenchmarks to linux aws 100 runners
 (#2557)

Summary:
The migration is larger than I thought--I will test this time to ensure it's correct.

Tested here: https://github.com/pytorch/benchmark/actions/runs/12321260782/job/34392279551

Pull Request resolved: https://github.com/pytorch/benchmark/pull/2557

Reviewed By: kit1980

Differential Revision: D67211361

Pulled By: janeyx99

fbshipit-source-id: 686e07aab132c18c1fe1a0ffd444ec66f29802ef
---
 .../userbenchmark-regression-detector.yml     | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/userbenchmark-regression-detector.yml b/.github/workflows/userbenchmark-regression-detector.yml
index e70b20073..5e1f7fcc0 100644
--- a/.github/workflows/userbenchmark-regression-detector.yml
+++ b/.github/workflows/userbenchmark-regression-detector.yml
@@ -15,44 +15,41 @@ jobs:
     timeout-minutes: 1440 # 24 hours
     environment: docker-s3-upload
     env:
-      BASE_CONDA_ENV: "torchbench"
       CONDA_ENV: "optim"
       PLATFORM_NAME: "gcp_a100"
       TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
       AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
       AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-      SETUP_SCRIPT: "/workspace/setup_instance.sh"
       HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
     steps:
       - name: Checkout TorchBench
         uses: actions/checkout@v3
         with:
           path: benchmark
-      - name: Tune Nvidia GPU
+      - name: Install Conda
         run: |
-          sudo nvidia-smi -pm 1
-          sudo nvidia-smi -ac 1215,1410
-          nvidia-smi
-      - name: Clone and setup Conda env
-        run: |
-          CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
-          conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
+          set -x
+          pushd benchmark
+          bash ./.ci/torchbench/install-conda.sh
       - name: Install TorchBench
         run: |
           set -x
-          . "${SETUP_SCRIPT}"
+          . "${HOME}"/miniconda3/etc/profile.d/conda.sh
+          conda activate "${CONDA_ENV}"
           pushd benchmark
           # only install the subset of models currently running.
           python install.py BERT_pytorch DALLE2_pytorch hf_GPT2_large hf_T5_large resnet50 timm_vision_transformer_large yolov3
       - name: Print torch.version.git_version
         run: |
           set -x
-          . "${SETUP_SCRIPT}"
+          . "${HOME}"/miniconda3/etc/profile.d/conda.sh
+          conda activate "${CONDA_ENV}"
           python -c "import torch; print(torch.version.git_version)"
       - name: Run optim user benchmark
         run: |
           set -x
-          . "${SETUP_SCRIPT}"
+          . "${HOME}"/miniconda3/etc/profile.d/conda.sh
+          conda activate "${CONDA_ENV}"
           # remove old results
           if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
           pushd benchmark
@@ -65,7 +62,9 @@ jobs:
       - name: Detect potential regressions
         continue-on-error: true
         run: |
-          . "${SETUP_SCRIPT}"
+          set -x
+          . "${HOME}"/miniconda3/etc/profile.d/conda.sh
+          conda activate "${CONDA_ENV}"
           pushd benchmark
           RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 | sort -r))
           # TODO: the following assumes only one metrics-*.json is found. It will keep
@@ -86,7 +85,8 @@ jobs:
             torchbench-perf-report
       - name: Upload result jsons to Scribe and S3
         run: |
-          . "${SETUP_SCRIPT}"
+          . "${HOME}"/miniconda3/etc/profile.d/conda.sh
+          conda activate "${CONDA_ENV}"
           pushd benchmark
           RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 | sort -r))
           echo "Uploading result jsons: ${RESULTS}"
@@ -102,6 +102,7 @@ jobs:
       - name: Finally, error if errors.txt exists
         if: always()
         run: |
+          set -x
           # Do not error earlier as we want all artifacts and regressions to be reported first
           # TODO: potentially move errors.txt to benchmark-output so it gets uploaded to S3
           pushd benchmark
@@ -109,6 +110,5 @@ jobs:
       - name: Remove conda environment
         if: always()
         run: |
-          . "${SETUP_SCRIPT}"
-          conda deactivate && conda deactivate
+          . ${HOME}/miniconda3/etc/profile.d/conda.sh
           conda remove -n "${CONDA_ENV}" --all