Merge pull request #331 from mlcommons/mlperf-inference

mlperf-inference to main (to run tests)
mlcommons · Oct 4, 2024 · b10d07e · b10d07e
2 parents 9be5704 + 238325f
commit b10d07e
Show file tree

Hide file tree

Showing 34 changed files with 385 additions and 131 deletions.
diff --git a/.github/workflows/check-broken-links.md → .github/workflows/check-broken-links.yml b/.github/workflows/check-broken-links.md → .github/workflows/check-broken-links.yml
@@ -1,13 +1,16 @@
-name: Check .md README files for broken links
+name: "Check .md README files for broken links"
 
-on: [pull_request]
+on:
+  push: 
+    branches: 
+    - master
 
 jobs:
   markdown-link-check:
     runs-on: ubuntu-latest
     # check out the latest version of the code
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     # Checks the status of hyperlinks in .md files in verbose mode
     - name: Check links

diff --git a/.github/workflows/code-review.yml b/.github/workflows/code-review.yml
@@ -2,7 +2,7 @@ name: OpenAI Code Review
 
 on:
   pull_request_target:
-    types: [opened, synchronize]
+    types: [opened]
     paths:
       - 'automation/**'
       - 'script/**'
@@ -15,7 +15,7 @@ permissions:
 jobs:
   code_review:
     runs-on: ubuntu-latest
-    if: github.repository_owner == 'gateoverflow' && github.event.pull_request.changed_files > 0
+    if: github.repository_owner == 'gateoverflow_off' && github.event.pull_request.changed_files > 0
     steps:
       # Run code review via OpenAI 
       # Step to run the OpenAI Code Review using the GATEOverflow action

diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml
@@ -5,12 +5,12 @@ name: MLPerf inference GPT-J
 
 on:
   schedule:
-    - cron: "1 1 * * */3"
+    - cron: "1 2 * * *"
 
 jobs:
   build:
     if: github.repository_owner == 'gateoverflow'
-    runs-on: [ self-hosted, linux, x64 ]
+    runs-on: [ self-hosted, linux, x64, GO-spr ]
     strategy:
       fail-fast: false
       matrix:
@@ -24,7 +24,10 @@ jobs:
         source gh_action/bin/deactivate || python3 -m venv gh_action
         source gh_action/bin/activate
         export CM_REPOS=$HOME/GH_CM
-        cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+        python3 -m pip install cm4mlops
+        cm pull repo
     - name: Test MLPerf Inference GPTJ
       run: |
         cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions
+
diff --git a/.github/workflows/test-mlperf-inference-llama2.yml b/.github/workflows/test-mlperf-inference-llama2.yml
@@ -0,0 +1,33 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: MLPerf inference LLAMA 2 70B
+
+on:
+  schedule:
+    - cron: "30 19 * * 4"
+
+jobs:
+  build_reference:
+    if: github.repository_owner == 'gateoverflow'
+    runs-on: [ self-hosted, GO-i9, linux, x64 ]
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [ "3.12" ]
+        backend: [ "pytorch" ]
+        device: [ "cpu" ]
+
+    steps:
+    - name: Install dependencies
+      run: |
+        source gh_action/bin/deactivate || python3 -m venv gh_action
+        source gh_action/bin/activate
+        export CM_REPOS=$HOME/GH_CM
+        python3 -m pip install cm4mlops
+        cm pull repo
+        python3 -m pip install "huggingface_hub[cli]"
+        huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential
+    - name: Test MLPerf Inference LLAMA 2 70B reference implementation
+      run: |
+        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }}  --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --adr.inference-src.tags=_repo.https://github.com/anandhu-eng/inference.git --clean
diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml
@@ -4,7 +4,7 @@
 name: MLPerf inference ResNet50
 
 on:
-  pull_request:
+  pull_request_target:
     branches: [ "main", "dev", "mlperf-inference" ]
     paths:
       - '.github/workflows/test-mlperf-inference-resnet50.yml'
@@ -28,9 +28,7 @@ jobs:
           - os: macos-latest
             backend: tf
           - os: windows-latest
-#               MLPerf requires interaction when installing LLVM on Windows - that's why we excluded it here
-
-
+            implementation: cpp
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
@@ -41,6 +39,26 @@ jobs:
       run: |
         python3 -m pip install cmind
         cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
-    - name: Test MLPerf Inference ResNet50
+    - name: Test MLPerf Inference ResNet50 (Windows)
+      if: matrix.os == 'windows-latest'
       run: |
-        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="cTuning" --hw_name=default --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet 
+        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_windows --model=resnet50 --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet 
+    - name: Test MLPerf Inference ResNet50 (Linux/macOS)
+      if: matrix.os != 'windows-latest'
+      run: |
+        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet 
+    - name: Push Results
+      if: github.repository_owner == 'gateoverflow'
+      env:
+          USER: "GitHub Action"
+          EMAIL: "admin@gateoverflow.com"
+      run: |
+        git config --global user.name "$USER"
+        git config --global user.email "$EMAIL"
+        git config --global credential.https://git.luolix.top.helper ""
+        git config --global credential.https://git.luolix.top.helper "!gh auth git-credential"
+        git config --global credential.https://gist.git.luolix.top.helper ""
+        git config --global credential.https://gist.git.luolix.top.helper "!gh auth git-credential"
+
+        cm run script --tags=auth,gh,cli --with_token="${{ secrets.TEST_RESULTS_GITHUB_TOKEN }}"
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R50 GH action" --quiet
diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml
@@ -1,12 +1,12 @@
 name: MLPerf inference SDXL
-
+#off now as we have SCC24 test doing the same
 on:
   schedule:
     - cron: "1 2 * * *"
 
 jobs:
   build_reference:
-    if: github.repository_owner == 'gateoverflow'
+    if: github.repository_owner == 'gateoverflow_off'
     runs-on: [ self-hosted, linux, x64 ]
     strategy:
       fail-fast: false
@@ -15,18 +15,17 @@ jobs:
         backend: [ "pytorch" ]
         precision: [ "float16" ]
     steps:
-    - name: Install dependencies
+    - name: Test MLPerf Inference SDXL Reference
       run: |
         source gh_action/bin/deactivate || python3 -m venv gh_action
         source gh_action/bin/activate
         export CM_REPOS=$HOME/GH_CM
-        cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
-    - name: Test MLPerf Inference SDXL
-      run: |
+        python3 -m pip install cm4mlops
+        cm pull repo
         cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
 
   build_nvidia:
-      if: github.repository_owner == 'gateoverflow'
+      if: github.repository_owner == 'gateoverflow_off'
       runs-on: [ self-hosted, linux, x64 ]
       strategy:
         fail-fast: false
@@ -36,12 +35,10 @@ jobs:
           precision: [ "float16" ]
           implementation: [ "nvidia" ]
       steps:
-      - name: Install dependencies
+      - name: Test MLPerf Inference SDXL Nvidia
         run: |
           source gh_action/bin/deactivate || python3 -m venv gh_action
           source gh_action/bin/activate
           export CM_REPOS=$HOME/GH_CM
-          cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
-      - name: Test MLPerf Inference SDXL
-        run: |
+          cm pull repo
           cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml
@@ -1,13 +1,15 @@
-name: MLPerf inference SDXL
+name: MLPerf inference SDXL (SCC)
 
 on:
   schedule:
-    - cron: "43 1 * * *"
+    - cron: "1 3 * * *"
 
 jobs:
   build_reference:
     if: github.repository_owner == 'gateoverflow'
-    runs-on: [ self-hosted, linux, x64 ]
+    runs-on: [ self-hosted, linux, x64, GO-spr ]
+    env:
+      CM_REPOS: $HOME/GH_CM
     strategy:
       fail-fast: false
       matrix:
@@ -16,23 +18,23 @@ jobs:
         precision: [ "float16" ]
         device: [ "cuda" ]
     steps:
-    - name: Install dependencies
+    - name: Test MLPerf Inference reference SDXL SCC 
       run: |
-        source gh_action/bin/deactivate || python3 -m venv gh_action
+        if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi
+        python3 -m venv gh_action
         source gh_action/bin/activate
         export CM_REPOS=$HOME/GH_CM
-        cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
-    - name: Test MLPerf Inference reference SDXL SCC 
-      env:
-        GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
-      run: |
-        cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --quiet --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --precision=float16 --clean |
-        cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons |
-        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet
+        pip install --upgrade cm4mlops
+        pip install tabulate
+        cm pull repo
+        cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
+        cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
+        cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions
         
   build_nvidia:
       if: github.repository_owner == 'gateoverflow'
-      runs-on: [ self-hosted, linux, x64 ]
+      runs-on: [ self-hosted, linux, x64, GO-spr]
       strategy:
         fail-fast: false
         matrix:
@@ -41,16 +43,16 @@ jobs:
           precision: [ "float16" ]
           implementation: [ "nvidia" ]
       steps:
-      - name: Install dependencies
+      - name: Test MLPerf Inference NVIDIA SDXL SCC
         run: |
-          source gh_action/bin/deactivate || python3 -m venv gh_action
+          if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi
+          python3 -m venv gh_action
           source gh_action/bin/activate
           export CM_REPOS=$HOME/GH_CM
-          cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
-      - name: Test MLPerf Inference NVIDIA SDXL SCC
-        env:
-          GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
-        run: |
-          cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --precision=float16 --clean |
-          cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons |
-          cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet
+          pip install --upgrade cm4mlops
+          pip install tabulate
+          cm pull repo
+          cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --clean
+          cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
+          cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
+          cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions
diff --git a/README.md b/README.md
@@ -1,6 +1,5 @@
 ## Unified and cross-platform CM interface for DevOps, MLOps and MLPerf
 
-[![arXiv](https://img.shields.io/badge/arXiv-2406.16791-b31b1b.svg)](https://arxiv.org/abs/2406.16791)
 [![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md)
 [![Python Version](https://img.shields.io/badge/python-3+-blue.svg)](https://github.com/mlcommons/ck/tree/master/cm/cmind)
 [![Powered by CM](https://img.shields.io/badge/Powered_by-MLCommons%20CM-blue)](https://github.com/mlcommons/ck).
@@ -141,8 +140,7 @@ cm run script \
 
 ## CM concepts
 
-* https://doi.org/10.5281/zenodo.8105339
-* https://arxiv.org/abs/2406.16791
+Check our [ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339).
 
 ## Authors
 

diff --git a/script/app-image-corner-detection/_cm.json b/script/app-image-corner-detection/_cm.json
diff --git a/script/app-mlperf-inference-ctuning-cpp-tflite/_cm.json b/script/app-mlperf-inference-ctuning-cpp-tflite/_cm.json
@@ -120,7 +120,7 @@
     {
       "names": [
         "tensorflow",
-	"tflite"
+        "tflite"
       ],
       "tags": "get,tensorflow,lib,_tflite"
     },