Merge branch 'mlperf-inference' into sdxl_accuracy_fixes

GATEOverflow · Oct 17, 2024 · 73f9435 · 73f9435
2 parents 372c633 + f414de4
commit 73f9435
Show file tree

Hide file tree

Showing 4 changed files with 105 additions and 11 deletions.
diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
@@ -0,0 +1,86 @@
+name: Build wheels and release them into PYPI
+
+on:
+  release:
+    types: [published]
+  push:
+    branches:
+      - main
+      - mlperf_inference
+    paths:
+      - VERSION
+      - setup.py
+
+jobs:
+  build_wheels:
+    name: Build wheel
+    runs-on: ubuntu-latest
+    environment: release
+    permissions:
+      id-token: write
+    strategy:
+      fail-fast: false
+    steps:
+      # Step 1: Checkout the code
+      - uses: actions/checkout@v3
+
+      # Step 2: Set up Python
+      - uses: actions/setup-python@v3
+
+      # Step 3: Check if VERSION file has changed in this push
+      - name: Check if VERSION file has changed
+        id: version_changed
+        run: |
+          if git diff --name-only HEAD~1 | grep -q "VERSION"; then
+            echo "VERSION file has been modified"
+            echo "::set-output name=version_changed::true"
+            new_version=$(cat VERSION)
+          else
+            echo "VERSION file has NOT been modified"
+            echo "::set-output name=version_changed::false"
+          fi
+          echo "::set-output name=new_version::$new_version"
+
+      # Step 4: Increment version if VERSION was not changed
+      - name: Increment version if necessary
+        if: steps.version_changed.outputs.version_changed == 'false'
+        run: |
+          # Check if VERSION file exists, else initialize it
+          if [ ! -f VERSION ]; then
+            echo "0.0.0" > VERSION
+          fi
+          
+          version=$(cat VERSION)
+          IFS='.' read -r major minor patch <<< "$version"
+          patch=$((patch + 1))
+          new_version="$major.$minor.$patch"
+          echo $new_version > VERSION
+          echo "New version: $new_version"
+          echo "::set-output name=new_version::$new_version"
+
+      # Step 5: Commit the updated version to the repository
+      - name: Commit updated version
+        run: |
+          git config --global user.name "${{ github.actor }}"
+          git config --global user.email "${{ github.actor }}@users.noreply.github.com"
+          git add VERSION
+          git commit -m "Increment version to ${{ steps.version_changed.outputs.new_version }}"
+          git push
+
+      # Step 6: Install required dependencies
+      - name: Install requirements
+        run: python3 -m pip install setuptools wheel
+
+      # Step 7: Build the Python wheel
+      - name: Build wheels
+        run: python3 setup.py bdist_wheel
+
+      # Step 8: Publish to PyPI
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          verify-metadata: true
+          skip-existing: true
+          packages-dir: dist
+          repository-url: https://upload.pypi.org/legacy/
+          verbose: true
diff --git a/VERSION b/VERSION
@@ -0,0 +1 @@
+0.3.0
diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml
@@ -89,7 +89,6 @@ input_mapping:
   embedding_weights_on_gpu_part: CM_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART
   sdxl_batcher_time_limit: CM_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT
 
-
 # Dependencies on other CM scripts
 
 deps:
@@ -1107,6 +1106,14 @@ variations:
     env:
       SKIP_POLICIES: '1'
 
+  server,resnet50:
+    env:
+      CM_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 2000
+      CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT: True
+      CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE: True
+      CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: 9
+      CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: 2
+
   multistream,resnet50:
     env:
       SKIP_POLICIES: '1'

diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py
@@ -378,7 +378,7 @@ def preprocess(i):
         if audio_batch_size:
             run_config += f" --audio_batch_size={audio_batch_size}"
 
-        disable_encoder_plugin = env.get('CM_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN')
+        disable_encoder_plugin = str(env.get('CM_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN'))
         if disable_encoder_plugin and disable_encoder_plugin.lower() not in [ "no", "false" ]:
             run_config += " --disable_encoder_plugin"
 
@@ -393,31 +393,31 @@ def preprocess(i):
         if log_dir:
             run_config += f" --log_dir={log_dir}"
 
-        use_graphs = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS')
+        use_graphs = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS'))
         if use_graphs  and use_graphs.lower() not in [ "no", "false" ]:
             run_config += " --use_graphs"
 
-        use_deque_limit = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT')
+        use_deque_limit = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT'))
         if use_deque_limit  and use_deque_limit.lower() not in [ "no", "false" ]:
             run_config += " --use_deque_limit"
 
             deque_timeout_usec = env.get('CM_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC')
             if deque_timeout_usec:
                 run_config += f" --deque_timeout_usec={deque_timeout_usec}"
 
-        use_cuda_thread_per_device = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE')
+        use_cuda_thread_per_device = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE'))
         if use_cuda_thread_per_device  and use_cuda_thread_per_device.lower() not in [ "no", "false" ]:
             run_config += " --use_cuda_thread_per_device"
 
-        run_infer_on_copy_streams = env.get('CM_MLPERF_NVIDIA_HARNESS_RUN_INFER_ON_COPY_STREAMS')
+        run_infer_on_copy_streams = str(env.get('CM_MLPERF_NVIDIA_HARNESS_RUN_INFER_ON_COPY_STREAMS'))
         if run_infer_on_copy_streams  and run_infer_on_copy_streams.lower() not in [ "no", "false" ]:
             run_config += " --run_infer_on_copy_streams"
 
-        start_from_device = env.get('CM_MLPERF_NVIDIA_HARNESS_START_FROM_DEVICE')
+        start_from_device = str(env.get('CM_MLPERF_NVIDIA_HARNESS_START_FROM_DEVICE'))
         if start_from_device  and start_from_device.lower() not in [ "no", "false" ]:
             run_config += " --start_from_device"
 
-        end_on_device = env.get('CM_MLPERF_NVIDIA_HARNESS_END_ON_DEVICE')
+        end_on_device = str(env.get('CM_MLPERF_NVIDIA_HARNESS_END_ON_DEVICE'))
         if end_on_device  and end_on_device.lower() not in [ "no", "false" ]:
             run_config += " --end_on_device"
 
@@ -437,15 +437,15 @@ def preprocess(i):
         if soft_drop:
             run_config += f" --soft_drop={soft_drop}"
 
-        use_small_tile_gemm_plugin = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN')
+        use_small_tile_gemm_plugin = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN'))
         if use_small_tile_gemm_plugin  and use_small_tile_gemm_plugin.lower() not in [ "no", "false" ]:
             run_config += f" --use_small_tile_gemm_plugin"
 
         audio_buffer_num_lines = env.get('CM_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES')
         if audio_buffer_num_lines:
             run_config += f" --audio_buffer_num_lines={audio_buffer_num_lines}"
 
-        use_fp8 = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_FP8')
+        use_fp8 = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_FP8'))
         if use_fp8 and use_fp8.lower() not in [ "no", "false" ]:
             run_config += f" --use_fp8"
 
@@ -473,7 +473,7 @@ def preprocess(i):
         if num_warmups != '':
             run_config += f" --num_warmups={num_warmups}"
 
-        skip_postprocess = env.get('CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS')
+        skip_postprocess = str(env.get('CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS'))
         if skip_postprocess and skip_postprocess.lower() not in [ "no", "false" ]:
             run_config += f" --skip_postprocess"