Skip to content

Commit

Permalink
Merge pull request #19 from arjunsuresh/mlperf-inference
Browse files Browse the repository at this point in the history
Fixes for MLPerf inference v4.0
  • Loading branch information
arjunsuresh authored May 21, 2024
2 parents 03eaf74 + 31f31c3 commit e6e1d9a
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 11 deletions.
2 changes: 1 addition & 1 deletion automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ def _run(self, i):
if str(state['docker'].get('run', True)).lower() in ['false', '0', 'no']:
print (recursion_spaces+' - Skipping script::{} run as we are inside docker'.format(found_script_artifact))
return {'return': 0}
elif str(state['docker'].get('docker_real_run', True)).lower() in ['false', '0', 'no']:
elif str(state['docker'].get('real_run', True)).lower() in ['false', '0', 'no']:
print (recursion_spaces+' - Doing fake run for script::{} as we are inside docker'.format(found_script_artifact))
fake_run = True
env['CM_TMP_FAKE_RUN']='yes'
Expand Down
15 changes: 14 additions & 1 deletion script/add-custom-nvidia-system/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ automation_alias: script
automation_uid: 5b4e0237da074764

category: "MLPerf benchmark support"

docker:
real_run: False

# User-friendly tags to find this CM script
tags:
Expand Down Expand Up @@ -94,6 +95,13 @@ variations:
add_deps_recursive:
nvidia-inference-common-code:
tags: _ctuning
go:
group: code
add_deps_recursive:
nvidia-inference-common-code:
tags: _go




versions:
Expand All @@ -111,3 +119,8 @@ versions:
add_deps_recursive:
nvidia-inference-common-code:
version: r3.1

r4.0:
add_deps_recursive:
nvidia-inference-common-code:
version: r4.0
3 changes: 3 additions & 0 deletions script/app-mlperf-inference-nvidia/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ def preprocess(i):
return {'return':1, 'error': 'Windows is not supported in this script yet'}
env = i['env']

if str(env.get('CM_RUN_STATE_DOCKER', '')).lower() in ['1', 'true', 'yes']:
return {'return': 0}

if env.get('CM_MODEL', '') == '':
return {'return': 1, 'error': 'Please select a variation specifying the model to run'}

Expand Down
13 changes: 7 additions & 6 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -280,12 +280,6 @@ variations:
CM_IMAGENET_ACCURACY_DTYPE: int32
CM_CNNDM_ACCURACY_DTYPE: int32
CM_LIBRISPEECH_ACCURACY_DTYPE: int8
deps:
- tags: get,cuda-devices
skip_if_env:
CM_CUDA_DEVICE_PROP_GLOBAL_MEMORY:
- "yes"
- "on"
prehook_deps:
- names:
- nvidia-original-mlperf-inference
Expand Down Expand Up @@ -911,6 +905,12 @@ variations:
add_deps_recursive:
mlperf-inference-implementation:
tags: _cuda
deps:
- tags: get,cuda-devices
skip_if_env:
CM_CUDA_DEVICE_PROP_GLOBAL_MEMORY:
- "yes"
- "on"
rocm:
docker:
all_gpus: 'yes'
Expand Down Expand Up @@ -1266,6 +1266,7 @@ docker:
- tags: get,mlperf,inference,results,dir
- tags: get,mlperf,inference,submission,dir
pre_run_cmds:
#- cm pull repo && cm run script --tags=get,git,repo,_repo.https://github.com/GATEOverflow/inference_results_v4.0.git --update
- cm pull repo
mounts:
- "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}"
Expand Down
2 changes: 0 additions & 2 deletions script/build-mlperf-inference-server-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ variations:
names:
- pytorchvision
- torchvision
- tags: install,nccl,libs,_cuda

versions:
r2.1:
Expand Down Expand Up @@ -250,7 +249,6 @@ versions:
names:
- pytorchvision
- torchvision
- tags: install,nccl,libs,_cuda

docker:
skip_run_cmd: 'no'
Expand Down
1 change: 1 addition & 0 deletions script/get-git-repo/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"folder": "CM_GIT_CHECKOUT_FOLDER",
"patch": "CM_GIT_PATCH",
"update": "CM_GIT_REPO_PULL",
"pull": "CM_GIT_REPO_PULL",
"env_key": "CM_GIT_ENV_KEY",
"submodules": "CM_GIT_RECURSE_SUBMODULES"
},
Expand Down
12 changes: 11 additions & 1 deletion script/install-torchvision-from-src/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,17 @@
}
},
"for-nvidia-mlperf-inference-v4.0": {
"alias": "for-nvidia-mlperf-inference-v3.1"
"base": [
"sha.657027f3",
"cuda"
],
"deps": [
{
"tags": "install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0"
}
],
"env": {
}
},
"for-nvidia-mlperf-inference-v3.1": {
"base": [
Expand Down

0 comments on commit e6e1d9a

Please sign in to comment.