Skip to content

Commit

Permalink
Merge branch 'mlperf-inference' into sdxl_accuracy_fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
anandhu-eng authored Oct 17, 2024
2 parents 372c633 + f414de4 commit 73f9435
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 11 deletions.
86 changes: 86 additions & 0 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
name: Build wheels and release them into PYPI

on:
release:
types: [published]
push:
branches:
- main
- mlperf_inference
paths:
- VERSION
- setup.py

jobs:
build_wheels:
name: Build wheel
runs-on: ubuntu-latest
environment: release
permissions:
id-token: write
strategy:
fail-fast: false
steps:
# Step 1: Checkout the code
- uses: actions/checkout@v3

# Step 2: Set up Python
- uses: actions/setup-python@v3

# Step 3: Check if VERSION file has changed in this push
- name: Check if VERSION file has changed
id: version_changed
run: |
if git diff --name-only HEAD~1 | grep -q "VERSION"; then
echo "VERSION file has been modified"
echo "::set-output name=version_changed::true"
new_version=$(cat VERSION)
else
echo "VERSION file has NOT been modified"
echo "::set-output name=version_changed::false"
fi
echo "::set-output name=new_version::$new_version"
# Step 4: Increment version if VERSION was not changed
- name: Increment version if necessary
if: steps.version_changed.outputs.version_changed == 'false'
run: |
# Check if VERSION file exists, else initialize it
if [ ! -f VERSION ]; then
echo "0.0.0" > VERSION
fi
version=$(cat VERSION)
IFS='.' read -r major minor patch <<< "$version"
patch=$((patch + 1))
new_version="$major.$minor.$patch"
echo $new_version > VERSION
echo "New version: $new_version"
echo "::set-output name=new_version::$new_version"
# Step 5: Commit the updated version to the repository
- name: Commit updated version
run: |
git config --global user.name "${{ github.actor }}"
git config --global user.email "${{ github.actor }}@users.noreply.github.com"
git add VERSION
git commit -m "Increment version to ${{ steps.version_changed.outputs.new_version }}"
git push
# Step 6: Install required dependencies
- name: Install requirements
run: python3 -m pip install setuptools wheel

# Step 7: Build the Python wheel
- name: Build wheels
run: python3 setup.py bdist_wheel

# Step 8: Publish to PyPI
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verify-metadata: true
skip-existing: true
packages-dir: dist
repository-url: https://upload.pypi.org/legacy/
verbose: true
1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.3.0
9 changes: 8 additions & 1 deletion script/app-mlperf-inference-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ input_mapping:
embedding_weights_on_gpu_part: CM_MLPERF_NVIDIA_HARNESS_EMBEDDING_WEIGHTS_ON_GPU_PART
sdxl_batcher_time_limit: CM_MLPERF_NVIDIA_HARNESS_SDXL_SERVER_BATCHER_TIME_LIMIT


# Dependencies on other CM scripts

deps:
Expand Down Expand Up @@ -1107,6 +1106,14 @@ variations:
env:
SKIP_POLICIES: '1'

server,resnet50:
env:
CM_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC: 2000
CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT: True
CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE: True
CM_MLPERF_NVIDIA_HARNESS_GPU_COPY_STREAMS: 9
CM_MLPERF_NVIDIA_HARNESS_GPU_INFERENCE_STREAMS: 2

multistream,resnet50:
env:
SKIP_POLICIES: '1'
Expand Down
20 changes: 10 additions & 10 deletions script/app-mlperf-inference-nvidia/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def preprocess(i):
if audio_batch_size:
run_config += f" --audio_batch_size={audio_batch_size}"

disable_encoder_plugin = env.get('CM_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN')
disable_encoder_plugin = str(env.get('CM_MLPERF_NVIDIA_HARNESS_DISABLE_ENCODER_PLUGIN'))
if disable_encoder_plugin and disable_encoder_plugin.lower() not in [ "no", "false" ]:
run_config += " --disable_encoder_plugin"

Expand All @@ -393,31 +393,31 @@ def preprocess(i):
if log_dir:
run_config += f" --log_dir={log_dir}"

use_graphs = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS')
use_graphs = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_GRAPHS'))
if use_graphs and use_graphs.lower() not in [ "no", "false" ]:
run_config += " --use_graphs"

use_deque_limit = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT')
use_deque_limit = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_DEQUE_LIMIT'))
if use_deque_limit and use_deque_limit.lower() not in [ "no", "false" ]:
run_config += " --use_deque_limit"

deque_timeout_usec = env.get('CM_MLPERF_NVIDIA_HARNESS_DEQUE_TIMEOUT_USEC')
if deque_timeout_usec:
run_config += f" --deque_timeout_usec={deque_timeout_usec}"

use_cuda_thread_per_device = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE')
use_cuda_thread_per_device = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_CUDA_THREAD_PER_DEVICE'))
if use_cuda_thread_per_device and use_cuda_thread_per_device.lower() not in [ "no", "false" ]:
run_config += " --use_cuda_thread_per_device"

run_infer_on_copy_streams = env.get('CM_MLPERF_NVIDIA_HARNESS_RUN_INFER_ON_COPY_STREAMS')
run_infer_on_copy_streams = str(env.get('CM_MLPERF_NVIDIA_HARNESS_RUN_INFER_ON_COPY_STREAMS'))
if run_infer_on_copy_streams and run_infer_on_copy_streams.lower() not in [ "no", "false" ]:
run_config += " --run_infer_on_copy_streams"

start_from_device = env.get('CM_MLPERF_NVIDIA_HARNESS_START_FROM_DEVICE')
start_from_device = str(env.get('CM_MLPERF_NVIDIA_HARNESS_START_FROM_DEVICE'))
if start_from_device and start_from_device.lower() not in [ "no", "false" ]:
run_config += " --start_from_device"

end_on_device = env.get('CM_MLPERF_NVIDIA_HARNESS_END_ON_DEVICE')
end_on_device = str(env.get('CM_MLPERF_NVIDIA_HARNESS_END_ON_DEVICE'))
if end_on_device and end_on_device.lower() not in [ "no", "false" ]:
run_config += " --end_on_device"

Expand All @@ -437,15 +437,15 @@ def preprocess(i):
if soft_drop:
run_config += f" --soft_drop={soft_drop}"

use_small_tile_gemm_plugin = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN')
use_small_tile_gemm_plugin = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_SMALL_TILE_GEMM_PLUGIN'))
if use_small_tile_gemm_plugin and use_small_tile_gemm_plugin.lower() not in [ "no", "false" ]:
run_config += f" --use_small_tile_gemm_plugin"

audio_buffer_num_lines = env.get('CM_MLPERF_NVIDIA_HARNESS_AUDIO_BUFFER_NUM_LINES')
if audio_buffer_num_lines:
run_config += f" --audio_buffer_num_lines={audio_buffer_num_lines}"

use_fp8 = env.get('CM_MLPERF_NVIDIA_HARNESS_USE_FP8')
use_fp8 = str(env.get('CM_MLPERF_NVIDIA_HARNESS_USE_FP8'))
if use_fp8 and use_fp8.lower() not in [ "no", "false" ]:
run_config += f" --use_fp8"

Expand Down Expand Up @@ -473,7 +473,7 @@ def preprocess(i):
if num_warmups != '':
run_config += f" --num_warmups={num_warmups}"

skip_postprocess = env.get('CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS')
skip_postprocess = str(env.get('CM_MLPERF_NVIDIA_HARNESS_SKIP_POSTPROCESS'))
if skip_postprocess and skip_postprocess.lower() not in [ "no", "false" ]:
run_config += f" --skip_postprocess"

Expand Down

0 comments on commit 73f9435

Please sign in to comment.