diff --git a/automation/script/README-specs.md b/automation/script/README-specs.md
index 58526d168..4b40feeba 100644
--- a/automation/script/README-specs.md
+++ b/automation/script/README-specs.md
@@ -27,7 +27,7 @@ See the [automatically generated catalog](https://github.com/mlcommons/ck/blob/m
When we run a CM script we can also pass inputs to it and any input added in `input_mapping` dictionary inside `_cm.json` gets converted to the corresponding `ENV` variable.
### Conditional execution of any `deps`, `post_deps`
-We can use `skip_if_env` dictionary inside any `deps`, `prehook_deps`, `posthook_deps` or `post_deps` to make its executional conditional
+We can use `skip_if_env` dictionary inside any `deps`, `prehook_deps`, `posthook_deps` or `post_deps` to make its execution conditional
### Versions
We can specify any specific version of a script using `version`. `version_max` and `version_min` are also possible options.
@@ -73,9 +73,7 @@ Sometimes it is difficult to add all variations needed for a script like say `ba
### Script workflow (env, deps, native scripts)
-![](assets/scripts-workflow.png)
+
-
-
-© 2022-23 [MLCommons](https://mlcommons.org)
+© 2022-24 [MLCommons](https://mlcommons.org)
diff --git a/automation/script/module.py b/automation/script/module.py
index eee99803c..e79b33a3e 100644
--- a/automation/script/module.py
+++ b/automation/script/module.py
@@ -4314,12 +4314,12 @@ def enable_or_skip_script(meta, env):
(AND function)
"""
for key in meta:
+ meta_key = [str(v).lower() for v in meta[key]]
if key in env:
value = str(env[key]).lower()
- meta_key = [str(v).lower() for v in meta[key]]
-
if set(meta_key) & set(["yes", "on", "true", "1"]):
+ # Any set value other than false is taken as set
if value not in ["no", "off", "false", "0"]:
continue
elif set(meta_key) & set(["no", "off", "false", "0"]):
@@ -4327,6 +4327,11 @@ def enable_or_skip_script(meta, env):
continue
elif value in meta_key:
continue
+ else:
+ if set(meta_key) & set(["no", "off", "false", "0"]):
+ # If key is missing in env, and if the expected value is False, consider it a match
+ continue
+
return False
return True
diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py
index 91baf5cb6..e76a68fed 100644
--- a/automation/script/module_misc.py
+++ b/automation/script/module_misc.py
@@ -1873,7 +1873,7 @@ def docker(i):
dockerfilename_suffix = dockerfilename_suffix[len(dockerfilename_suffix) - 1]
- cm_repo=i.get('docker_cm_repo', 'mlcommons@cm4mlops')
+ cm_repo=i.get('docker_cm_repo', docker_settings.get('cm_repo', 'mlcommons@cm4mlops'))
docker_path = i.get('docker_path', '').strip()
if docker_path == '':
diff --git a/script/app-mlperf-inference-intel/_cm.yaml b/script/app-mlperf-inference-intel/_cm.yaml
index b33c0e8b5..ce70f07d6 100644
--- a/script/app-mlperf-inference-intel/_cm.yaml
+++ b/script/app-mlperf-inference-intel/_cm.yaml
@@ -293,7 +293,7 @@ variations:
adr:
conda-python:
version: "3.9"
- - tags: install,llvm,src,_tag.llvmorg-16.0.6,_clang,_release,_for-intel-mlperf-inference-v3.1-gptj
+ - tags: install,llvm,src,_for-intel-mlperf-inference-v3.1-gptj
- names:
- conda-package
- ncurses
diff --git a/script/app-mlperf-inference-intel/run_gptj_harness.sh b/script/app-mlperf-inference-intel/run_gptj_harness.sh
index f006f673b..43e57bbb0 100644
--- a/script/app-mlperf-inference-intel/run_gptj_harness.sh
+++ b/script/app-mlperf-inference-intel/run_gptj_harness.sh
@@ -1,6 +1,8 @@
#!/bin/bash
export PATH=${CM_CONDA_BIN_PATH}:$PATH
+KMP_BLOCKTIME=${KMP_BLOCKTIME:-10}
+
export KMP_BLOCKTIME=${KMP_BLOCKTIME}
export KMP_AFFINITY=granularity=fine,compact,1,0
export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
@@ -9,11 +11,11 @@ export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
export num_physical_cores=`lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l`
num_numa=$(numactl --hardware|grep available|awk -F' ' '{ print $2 }')
-NUM_PROC=${NUM_PROC:-num_numa}
+NUM_PROC=${NUM_PROC:-$num_numa}
CPUS_PER_PROC=$((num_physical_cores/num_numa))
-WORKERS_PER_PROC=${WORKERS_PER_PROC}
+WORKERS_PER_PROC=${WORKERS_PER_PROC:-1}
TOTAL_SAMPLE_COUNT=13368
-BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE}
+BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE:-8}
TIMESTAMP=$(date +%m-%d-%H-%M)
HOSTNAME=$(hostname)
#OUTPUT_DIR=offline-output-${HOSTNAME}-batch-${BATCH_SIZE}-procs-${NUM_PROC}-ins-per-proc-${WORKERS_PER_PROC}-${TIMESTAMP}
@@ -28,7 +30,7 @@ USER_CONF="${CM_MLPERF_USER_CONF}"
cmd="python runner.py --workload-name gptj \
- --scenario ${${CM_MLPERF_LOADGEN_SCENARIO}} \
+ --scenario ${CM_MLPERF_LOADGEN_SCENARIO} \
--mode ${LOADGEN_MODE} \
--num-proc ${NUM_PROC} \
--cpus-per-proc ${CPUS_PER_PROC} \
diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
index 67e60b166..b9fc789f8 100644
--- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml
+++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -104,6 +104,8 @@ deps:
# Detect CUDA if required
- tags: get,cuda,_cudnn
+ names:
+ - cuda
enable_if_env:
CM_MLPERF_DEVICE:
- gpu
diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py
index 23a738453..ee97038ff 100644
--- a/script/app-mlperf-inference-mlcommons-python/customize.py
+++ b/script/app-mlperf-inference-mlcommons-python/customize.py
@@ -177,7 +177,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j")
cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \
- " main.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
+ " run.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
scenario_extra_options + mode_extra_options + dataset_options
cmd = cmd.replace("--count", "--max_examples")
@@ -188,7 +188,6 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
gpu_options = ""
cmd = cmd + gpu_options
env['LOG_PATH'] = env['CM_MLPERF_OUTPUT_DIR']
- return cmd, env['RUN_DIR']
if env['CM_MODEL'] in [ "resnet50", "retinanet" ]:
diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml
index 19e789ae0..337c64375 100644
--- a/script/app-mlperf-inference-nvidia/_cm.yaml
+++ b/script/app-mlperf-inference-nvidia/_cm.yaml
@@ -261,7 +261,10 @@ deps:
CM_MLPERF_NVIDIA_HARNESS_RUN_MODE:
- run_harness
- - tags: get,generic-python-lib,_package.nvmitten,_path./opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl
+ - tags: get,generic-python-lib,_package.nvmitten
+ update_tags_from_env_with_prefix:
+ _path.:
+ - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH
enable_if_env:
CM_RUN_STATE_DOCKER:
- 'yes'
@@ -338,6 +341,7 @@ variations:
CM_ML_MODEL_WEIGHTS_DATA_TYPE: int8
deps:
- tags: get,generic-python-lib,_onnx-graphsurgeon
+ version: 0.3.27
- tags: get,generic-python-lib,_package.onnx
version: 1.13.1
diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml
index 681620c2f..a4bb7b6a3 100644
--- a/script/app-mlperf-inference/_cm.yaml
+++ b/script/app-mlperf-inference/_cm.yaml
@@ -182,6 +182,8 @@ variations:
tags: _float32
librispeech-accuracy-script:
tags: _int32
+ cnndm-accuracy-script:
+ tags: _int32
env:
CM_MLPERF_PYTHON: 'yes'
CM_MLPERF_IMPLEMENTATION: mlcommons_python
@@ -189,6 +191,7 @@ variations:
CM_IMAGENET_ACCURACY_DTYPE: float32
CM_OPENIMAGES_ACCURACY_DTYPE: float32
CM_LIBRISPEECH_ACCURACY_DTYPE: float32
+ CM_CNNDM_ACCURACY_DTYPE: int32
prehook_deps:
- names:
- python-reference-mlperf-inference
@@ -235,6 +238,10 @@ variations:
default_variations:
backend: onnxruntime
+ nvidia-original,r4.1_default:
+ docker:
+ base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public
+
nvidia-original:
docker:
interactive: True
@@ -430,7 +437,7 @@ variations:
tags: run,accuracy,mlperf,_imagenet
docker:
deps:
- - tags: get,dataset,imagenet,original
+ - tags: get,dataset,imagenet,validation,original
names:
- imagenet-original
- dataset-original
@@ -1142,6 +1149,25 @@ variations:
default_env:
CM_SKIP_SYS_UTILS: 'yes'
CM_REGENERATE_MEASURE_FILES: 'yes'
+ env:
+ CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl'
+
+ r4.1_default:
+ group:
+ reproducibility
+ add_deps_recursive:
+ nvidia-inference-common-code:
+ version: r4.0
+ tags: _go
+ nvidia-inference-server:
+ version: r4.0
+ tags: _go
+ default_env:
+ CM_SKIP_SYS_UTILS: 'yes'
+ CM_REGENERATE_MEASURE_FILES: 'yes'
+ env:
+ CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl'
+
invalid_variation_combinations:
-
@@ -1250,10 +1276,10 @@ docker:
shm_size: '32gb'
interactive: True
extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
- docker_os: ubuntu
- docker_cm_repo: gateoverflow@cm4mlops
- docker_real_run: False
- docker_os_version: '22.04'
+ os: ubuntu
+ cm_repo: gateoverflow@cm4mlops
+ real_run: False
+ os_version: '22.04'
docker_input_mapping:
imagenet_path: IMAGENET_PATH
gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH
diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py
index 23a7f75ce..183290828 100644
--- a/script/app-mlperf-inference/customize.py
+++ b/script/app-mlperf-inference/customize.py
@@ -46,8 +46,8 @@ def postprocess(i):
env['CMD'] = ''
state = i['state']
- if env.get('CM_MLPERF_USER_CONF', '') == '':
- return {'return': 0}
+ #if env.get('CM_MLPERF_USER_CONF', '') == '':
+ # return {'return': 0}
output_dir = env['CM_MLPERF_OUTPUT_DIR']
mode = env['CM_MLPERF_LOADGEN_MODE']
@@ -254,16 +254,16 @@ def postprocess(i):
if env.get('CM_HOST_SYSTEM_NAME','')!='': host_info['system_name']=env['CM_HOST_SYSTEM_NAME']
# Check CM automation repository
- repo_name = 'mlcommons@ck'
+ repo_name = 'mlcommons@cm4mlops'
repo_hash = ''
- r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@ck,a4705959af8e447a'})
+ r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@cm4mlops,9e97bb72b0474657'})
if r['return']==0 and len(r['list'])==1:
repo_path = r['list'][0].path
if os.path.isdir(repo_path):
repo_name = os.path.basename(repo_path)
- # Check Grigori's dev
- if repo_name == 'ck': repo_name = 'ctuning@mlcommons-ck'
+ # Check dev
+ if repo_name == 'cm4mlops': repo_name = 'gateoverflow@cm4mlops'
r = cm.access({'action':'system',
'automation':'utils',
@@ -275,54 +275,6 @@ def postprocess(i):
host_info['cm_repo_name'] = repo_name
host_info['cm_repo_git_hash'] = repo_hash
- # Check a few important MLCommons repos
- xhashes = []
- md_xhashes = ''
-
- for x in [('get,git,inference', ['inference']),
- ('get,git,mlperf,power', ['power-dev'])]:
- xtags = x[0]
- xdirs = x[1]
-
- rx = cm.access({'action':'find', 'automation':'cache', 'tags':xtags})
- if rx['return']>0: return rx
- for cache in rx['list']:
- xurl = ''
- xhash = ''
-
- for xd in xdirs:
- xpath = os.path.join(cache.path, xd)
- if os.path.isdir(xpath):
- r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git rev-parse HEAD'})
- if r['return'] == 0 and r['ret'] == 0:
- xhash = r['stdout']
-
- r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git config --get remote.origin.url'})
- if r['return'] == 0 and r['ret'] == 0:
- xurl = r['stdout']
-
- if xurl!='' and xhash!='':
- break
-
- if xurl!='' and xhash!='':
- # Check if doesn't exist
- found = False
-
- for xh in xhashes:
- if xh['mlcommons_git_url'] == xurl and xh['mlcommons_git_hash'] == xhash:
- found = True
- break
-
- if not found:
- xhashes.append({'mlcommons_git_url': xurl,
- 'mlcommons_git_hash': xhash,
- 'cm_cache_tags':cache.meta['tags']})
-
- md_xhashes +='* MLCommons Git {} ({})\n'.format(xurl, xhash)
-
- if len(xhashes)>0:
- host_info['mlcommons_repos'] = xhashes
-
with open ("cm-host-info.json", "w") as fp:
fp.write(json.dumps(host_info, indent=2)+'\n')
@@ -336,10 +288,10 @@ def postprocess(i):
readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/ck).\n\n"
- readme_init+= "*Check [CM MLPerf docs](https://github.com/mlcommons/ck/tree/master/docs/mlperf) for more details.*\n\n"
+ readme_init+= "*Check [CM MLPerf docs](https://mlcommons.github.io/inference) for more details.*\n\n"
- readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n{}\n\n".format(platform.platform(),
- platform.processor(), sys.version, cm.__version__, md_xhashes)
+ readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(),
+ platform.processor(), sys.version, cm.__version__)
x = repo_name
if repo_hash!='': x+=' --checkout='+str(repo_hash)
diff --git a/script/build-mlperf-inference-server-nvidia/_cm.yaml b/script/build-mlperf-inference-server-nvidia/_cm.yaml
index 149941282..359614e34 100644
--- a/script/build-mlperf-inference-server-nvidia/_cm.yaml
+++ b/script/build-mlperf-inference-server-nvidia/_cm.yaml
@@ -164,6 +164,11 @@ variations:
add_deps_recursive:
nvidia-inference-common-code:
tags: _ctuning
+ go:
+ group: code
+ add_deps_recursive:
+ nvidia-inference-common-code:
+ tags: _go
nvidia-only:
group: code
add_deps_recursive:
@@ -180,6 +185,23 @@ variations:
nvidia-inference-common-code:
tags: _mlcommons
+ r4.0:
+ group: version
+ add_deps_recursive:
+ nvidia-inference-common-code:
+ version: r4.0
+ nvidia-scratch-space:
+ tags: _version.4_1
+ deps:
+ - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0
+ names:
+ - pytorch
+ - torch
+ - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0
+ names:
+ - pytorchvision
+ - torchvision
+ - tags: install,nccl,libs,_cuda
versions:
r2.1:
@@ -213,16 +235,33 @@ versions:
- torchvision
- tags: install,nccl,libs,_cuda
+ r4.0:
+ add_deps_recursive:
+ nvidia-inference-common-code:
+ version: r4.0
+ nvidia-scratch-space:
+ tags: _version.4_1
+ deps:
+ - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0
+ names:
+ - pytorch
+ - torch
+ - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0
+ names:
+ - pytorchvision
+ - torchvision
+ - tags: install,nccl,libs,_cuda
+
docker:
skip_run_cmd: 'no'
all_gpus: 'yes'
shm_size: '32gb'
extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
- docker_os: ubuntu
+ os: ubuntu
cm_repo_flags1: ' --branch=mlperf-inference'
- docker_real_run: False
+ real_run: False
interactive: True
- docker_os_version: '20.04'
+ os_version: '20.04'
base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public
docker_input_mapping:
imagenet_path: IMAGENET_PATH
diff --git a/script/get-cache-dir/_cm.json b/script/get-cache-dir/_cm.json
new file mode 100644
index 000000000..b5fd30277
--- /dev/null
+++ b/script/get-cache-dir/_cm.json
@@ -0,0 +1,36 @@
+{
+ "alias": "get-cache-dir",
+ "automation_alias": "script",
+ "automation_uid": "5b4e0237da074764",
+ "cache": true,
+ "category": "CM Interface",
+ "deps": [],
+ "docker": {
+ "run": false
+ },
+ "input_description": {},
+ "new_env_keys": [
+ "CM_CACHE_DIR",
+ "<<>>"
+ ],
+ "new_state_keys": [],
+ "post_deps": [],
+ "posthook_deps": [],
+ "prehook_deps": [],
+ "tags": [
+ "get",
+ "cache",
+ "dir",
+ "directory"
+ ],
+ "uid": "48f4622e059b45ce",
+ "variations": {
+ "name.#": {
+ "env": {
+ "CM_CACHE_DIR_NAME": "#"
+ }
+ }
+ },
+ "versions": {
+ }
+}
diff --git a/script/get-cache-dir/customize.py b/script/get-cache-dir/customize.py
new file mode 100644
index 000000000..6e8a76460
--- /dev/null
+++ b/script/get-cache-dir/customize.py
@@ -0,0 +1,29 @@
+from cmind import utils
+import os
+
+def preprocess(i):
+
+ os_info = i['os_info']
+
+ env = i['env']
+
+ meta = i['meta']
+
+ automation = i['automation']
+
+ quiet = (env.get('CM_QUIET', False) == 'yes')
+
+ return {'return':0}
+
+def postprocess(i):
+
+ env = i['env']
+
+ cache_dir = os.getcwd()
+ if env.get('CM_CACHE_DIR_ENV_NAME', '') != '':
+ env[env['CM_CACHE_DIR_ENV_NAME']] = cache_dir
+
+ env['CM_CACHE_DIR'] = cache_dir
+ env['CM_GET_DEPENDENT_CACHED_PATH'] = cache_dir
+
+ return {'return':0}
diff --git a/script/get-mlperf-inference-nvidia-common-code/_cm.json b/script/get-mlperf-inference-nvidia-common-code/_cm.json
index 46f12477e..7b47209ca 100644
--- a/script/get-mlperf-inference-nvidia-common-code/_cm.json
+++ b/script/get-mlperf-inference-nvidia-common-code/_cm.json
@@ -39,6 +39,9 @@
},
"nvidia-only": {
"group": "repo-owner"
+ },
+ "go": {
+ "group": "repo-owner"
}
},
"versions": {
@@ -62,6 +65,13 @@
"version": "v3.1"
}
}
+ },
+ "r4.0": {
+ "add_deps_recursive": {
+ "mlperf-inference-results": {
+ "version": "v4.0"
+ }
+ }
}
}
}
diff --git a/script/get-mlperf-inference-results/_cm.json b/script/get-mlperf-inference-results/_cm.json
index 46feecd04..f4870d57f 100644
--- a/script/get-mlperf-inference-results/_cm.json
+++ b/script/get-mlperf-inference-results/_cm.json
@@ -57,6 +57,12 @@
"CM_MLPERF_INFERENCE_RESULTS_VERSION_NAME": "v3.1",
"CM_GIT_URL": "https://github.com/<<>>/inference_results_v3.1.git"
}
+ },
+ "v4.0": {
+ "env": {
+ "CM_MLPERF_INFERENCE_RESULTS_VERSION_NAME": "v4.0",
+ "CM_GIT_URL": "https://github.com/<<>>/inference_results_v4.0.git"
+ }
}
},
"variations": {
@@ -85,6 +91,12 @@
"GITHUB_REPO_OWNER": "GATEOverflow",
"NVIDIA_ONLY": "yes"
}
+ },
+ "go": {
+ "group": "source-repo",
+ "env": {
+ "GITHUB_REPO_OWNER": "GATEOverflow"
+ }
}
}
}
diff --git a/script/get-mlperf-inference-src/_cm.json b/script/get-mlperf-inference-src/_cm.json
index 4e4c4806d..0c1bac0c8 100644
--- a/script/get-mlperf-inference-src/_cm.json
+++ b/script/get-mlperf-inference-src/_cm.json
@@ -68,7 +68,7 @@
}
}
],
- "print_env_at_the_end": {
+ "print_env_at_the_end_disabled": {
"CM_MLPERF_INFERENCE_CONF_PATH": "Path to the MLPerf inference benchmark configuration file",
"CM_MLPERF_INFERENCE_SOURCE": "Path to MLPerf inference benchmark sources"
},
diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py
index 7916a1bde..c42db0263 100644
--- a/script/get-mlperf-inference-src/customize.py
+++ b/script/get-mlperf-inference-src/customize.py
@@ -82,7 +82,7 @@ def postprocess(i):
if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '':
env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH']
- return {'return':0}
+ return {'return':0, 'version': env['CM_VERSION']}
def get_valid_models(mlperf_version, mlperf_path):
diff --git a/script/get-mlperf-inference-sut-configs/_cm.json b/script/get-mlperf-inference-sut-configs/_cm.json
index 7ad8376f3..012aca664 100644
--- a/script/get-mlperf-inference-sut-configs/_cm.json
+++ b/script/get-mlperf-inference-sut-configs/_cm.json
@@ -2,7 +2,7 @@
"alias": "get-mlperf-inference-sut-configs",
"automation_alias": "script",
"automation_uid": "5b4e0237da074764",
- "cache": true,
+ "cache": false,
"category": "MLPerf benchmark support",
"new_env_keys": [
"CM_HW_*",
@@ -28,23 +28,16 @@
"configs",
"sut-configs"
],
- "uid": "c2fbf72009e2445b",
- "variations": {
- "octoml": {
+ "deps": [
+ {
+ "tags": "get,cache,dir,_name.mlperf-inference-sut-configs",
"env": {
- "CM_SUT_USE_EXTERNAL_CONFIG_REPO": "yes",
- "CM_GIT_CHECKOUT_FOLDER": "configs",
- "CM_GIT_URL": "https://github.com/arjunsuresh/mlperf-inference-configs"
+ "CM_CACHE_DIR_ENV_NAME": "CM_SUT_CONFIGS_PATH"
},
- "prehook_deps": [
- {
- "force_env_keys": [
- "CM_GIT_URL",
- "CM_GIT_CHECKOUT_*"
- ],
- "tags": "get,git,repo,_repo.mlperf_inference_configs_octoml"
- }
- ]
+ "extra_cache_tags": "mlperf,inference,sut,configs"
}
+ ],
+ "uid": "c2fbf72009e2445b",
+ "variations": {
}
}
diff --git a/script/get-mlperf-inference-sut-configs/customize.py b/script/get-mlperf-inference-sut-configs/customize.py
index f074ed30b..8bca2a401 100644
--- a/script/get-mlperf-inference-sut-configs/customize.py
+++ b/script/get-mlperf-inference-sut-configs/customize.py
@@ -36,7 +36,7 @@ def postprocess(i):
if env.get('CM_SUT_NAME', '') == '':
env['CM_SUT_NAME'] = env['CM_HW_NAME'] + "-" + implementation_string + "-" + device + "-" + backend + "-" + backend_version + "-" + run_config_string
- if env.get('CM_SUT_CONFIGS_PATH',''):
+ if env.get('CM_SUT_CONFIGS_PATH','') != '':
path = env['CM_SUT_CONFIGS_PATH']
elif env.get('CM_SUT_USE_EXTERNAL_CONFIG_REPO', '') == "yes":
path = env.get('CM_GIT_CHECKOUT_PATH')
diff --git a/script/get-mlperf-inference-sut-description/_cm.json b/script/get-mlperf-inference-sut-description/_cm.json
index 4c3f998e5..a160722c2 100644
--- a/script/get-mlperf-inference-sut-description/_cm.json
+++ b/script/get-mlperf-inference-sut-description/_cm.json
@@ -2,7 +2,7 @@
"alias": "get-mlperf-inference-sut-description",
"automation_alias": "script",
"automation_uid": "5b4e0237da074764",
- "cache": true,
+ "cache": false,
"category": "MLPerf benchmark support",
"deps": [
{
@@ -46,6 +46,13 @@
},
{
"tags": "get,generic-python-lib,_package.dmiparser"
+ },
+ {
+ "tags": "get,cache,dir,_name.mlperf-inference-sut-descriptions",
+ "extra_cache_tags": "mlperf,inference,sut,descriptions",
+ "env": {
+ "CM_CACHE_DIR_ENV_NAME": "CM_MLPERF_INFERENCE_SUT_DESC_PATH"
+ }
}
],
"default_env": {
diff --git a/script/get-mlperf-inference-sut-description/customize.py b/script/get-mlperf-inference-sut-description/customize.py
index cd0c2f754..71636941f 100644
--- a/script/get-mlperf-inference-sut-description/customize.py
+++ b/script/get-mlperf-inference-sut-description/customize.py
@@ -33,7 +33,9 @@ def preprocess(i):
sut = hw_name + sut_suffix
script_path = i['run_script_input']['path']
- sut_path = os.path.join(os.getcwd(), "suts", sut + ".json")
+ sut_desc_path=env['CM_MLPERF_INFERENCE_SUT_DESC_PATH']
+
+ sut_path = os.path.join(sut_desc_path, "suts", sut + ".json")
if os.path.exists(sut_path) and env.get('CM_SUT_DESC_CACHE', '') == "yes":
print(f"Reusing SUT description file {sut}")
state['CM_SUT_META'] = json.load(open(sut_path))
diff --git a/script/get-mlperf-power-dev/customize.py b/script/get-mlperf-power-dev/customize.py
index 2af085d74..50afb3ba4 100644
--- a/script/get-mlperf-power-dev/customize.py
+++ b/script/get-mlperf-power-dev/customize.py
@@ -18,4 +18,4 @@ def postprocess(i):
if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '':
env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH']
- return {'return':0}
+ return {'return':0, 'version': env['CM_VERSION']}
diff --git a/script/get-preprocessed-dataset-criteo/preprocess_multihot.sh b/script/get-preprocessed-dataset-criteo/preprocess_multihot.sh
new file mode 100644
index 000000000..058cd76ee
--- /dev/null
+++ b/script/get-preprocessed-dataset-criteo/preprocess_multihot.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+cd ${CM_MLPERF_TRAINING_SOURCE}/recommendation_v2_torchrec_dlrm/
+${CM_PYTHON_BIN_WITH_PATH} materialize_synthetic_multihot_dataset.py \
+ --in_memory_binary_criteo_path $PREPROCESSED_CRITEO_1TB_CLICK_LOGS_DATASET_PATH \
+ --output_path $MATERIALIZED_DATASET_PATH \
+ --num_embeddings_per_feature 40000000,39060,17295,7424,20265,3,7122,1543,63,40000000,3067956,405282,10,2209,11938,155,4,976,14,40000000,40000000,40000000,590152,12973,108,36 \
+ --multi_hot_sizes 3,2,1,2,6,1,1,1,1,7,3,8,1,6,9,5,1,1,1,12,100,27,10,3,1,1 \
+ --multi_hot_distribution_type uniform
+test $? -eq 0 || exit $?
diff --git a/script/install-ipex-from-src/_cm.json b/script/install-ipex-from-src/_cm.json
index f9774e143..f7bb3d675 100644
--- a/script/install-ipex-from-src/_cm.json
+++ b/script/install-ipex-from-src/_cm.json
@@ -34,6 +34,9 @@
},
"tags": "get,pytorch,from.src"
},
+ {
+ "tags": "get,generic,conda-package,_package.ninja"
+ },
{
"env": {
"CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_IPEX_SRC_REPO_PATH"
@@ -123,6 +126,20 @@
],
"tags": "get,generic,conda-package,_package.setuptools,_source.conda-forge"
},
+ {
+ "names": [
+ "conda-package",
+ "typing-extensions"
+ ],
+ "tags": "get,generic,conda-package,_package.typing-extensions,_source.conda-forge"
+ },
+ {
+ "names": [
+ "conda-package",
+ "sympy"
+ ],
+ "tags": "get,generic,conda-package,_package.sympy,_source.conda-forge"
+ },
{
"tags": "install,llvm,src,_for-intel-mlperf-inference-v3.1-gptj"
}
diff --git a/script/install-llvm-src/_cm.json b/script/install-llvm-src/_cm.json
index 3ae795695..42e7e20df 100644
--- a/script/install-llvm-src/_cm.json
+++ b/script/install-llvm-src/_cm.json
@@ -281,6 +281,20 @@
"+ CXXFLAGS": [ "-Wno-nonnull", "-Wno-maybe-uninitialized", "-Wno-uninitialized", "-Wno-free-nonheap-object" ]
}
},
+ {
+ "names": [
+ "conda-package",
+ "typing-extensions"
+ ],
+ "tags": "get,generic,conda-package,_package.typing-extensions,_source.conda-forge"
+ },
+ {
+ "names": [
+ "conda-package",
+ "sympy"
+ ],
+ "tags": "get,generic,conda-package,_package.sympy,_source.conda-forge"
+ },
{
"tags": "get,generic-python-lib,_custom-python,_package.setuptools",
"env": {
diff --git a/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh b/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh
index df23aa3e3..30b612b2b 100644
--- a/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh
+++ b/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh
@@ -1,11 +1,15 @@
#!/bin/bash
export PATH=${CM_CONDA_BIN_PATH}:${PATH}
-export ABI=$(python -c "import torch; print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")
+#export LD_LIBRARY_PATH=${CM_CONDA_LIB_PATH}:${LD_LIBRARY_PATH}
+ABI=$(python -c "import torch; print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")
+test $? -eq 0 || exit $?
+export ABI=$ABI
mkdir -p llvm-project && cd llvm-project
wget -nc https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.6/cmake-16.0.6.src.tar.xz
wget -nc https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.6/llvm-16.0.6.src.tar.xz
tar -xf cmake-16.0.6.src.tar.xz
+test $? -eq 0 || exit $?
mv cmake-16.0.6.src cmake
tar -xf llvm-16.0.6.src.tar.xz
mv llvm-16.0.6.src llvm
@@ -13,9 +17,15 @@ rm -rf build
mkdir -p build
cd build
export DEB_BUILD_MAINT_OPTIONS=hardening=-format
+export CC=${CM_C_COMPILER_WITH_PATH}
+export CXX=${CM_CXX_COMPILER_WITH_PATH}
cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=${ABI}" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_INCLUDE_BENCHMARKS=OFF ../llvm/
+test $? -eq 0 || exit $?
cmake --build . -j $(nproc)
+test $? -eq 0 || exit $?
export LLVM_ROOT=$CONDA_PREFIX
cmake -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_SHARED_LINKER_FLAGS="-L$CONDA_PREFIX -Wl,-rpath,$CONDA_PREFIX" -P cmake_install.cmake
-ln -sf ${LLVM_ROOT}/bin/llvm-config ${LLVM_ROOT}/bin/llvm-config-13
+test $? -eq 0 || exit $?
+ln -sf ${LLVM_ROOT}/bin/llvm-config ${LLVM_ROOT}/bin/llvm-config-13
+test $? -eq 0 || exit $?
diff --git a/script/install-pytorch-from-src/_cm.json b/script/install-pytorch-from-src/_cm.json
index 057dbe681..2bdd46e68 100644
--- a/script/install-pytorch-from-src/_cm.json
+++ b/script/install-pytorch-from-src/_cm.json
@@ -220,6 +220,23 @@
}
]
},
+ "for-nvidia-mlperf-inference-v4.0": {
+ "base": [
+ "sha.32f93b1",
+ "cuda"
+ ],
+ "deps": [
+ {
+ "tags": "get,cmake",
+ "version_min": "3.25.0"
+ }
+ ],
+ "ad": {
+ "pytorch-src-repo": {
+ "tags": "_no-recurse-submodules,_full-history"
+ }
+ }
+ },
"for-nvidia-mlperf-inference-v3.1": {
"base": [
"sha.b5021ba9",
diff --git a/script/install-torchvision-from-src/_cm.json b/script/install-torchvision-from-src/_cm.json
index a474bb484..34965bc60 100644
--- a/script/install-torchvision-from-src/_cm.json
+++ b/script/install-torchvision-from-src/_cm.json
@@ -94,6 +94,9 @@
"TORCH_CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=1"
}
},
+ "for-nvidia-mlperf-inference-v4.0": {
+ "alias": "for-nvidia-mlperf-inference-v3.1"
+ },
"for-nvidia-mlperf-inference-v3.1": {
"base": [
"sha.657027f3",
@@ -102,7 +105,7 @@
"deps": [
{
"tags": "install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1"
- }
+ }
],
"env": {
}
diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py
index f7b13c16d..895227b44 100644
--- a/script/process-mlperf-accuracy/customize.py
+++ b/script/process-mlperf-accuracy/customize.py
@@ -25,7 +25,7 @@ def preprocess(i):
results_dir_split = results_dir.split(xsep)
dataset = env['CM_DATASET']
- regenerate_accuracy_file = env.get('CM_MLPERF_REGENERATE_ACCURACY_FILE', False)
+ regenerate_accuracy_file = env.get('CM_MLPERF_REGENERATE_ACCURACY_FILE', env.get('CM_RERUN', False))
for result_dir in results_dir_split:
@@ -105,9 +105,7 @@ def preprocess(i):
else:
return {'return': 1, 'error': 'Unsupported dataset'}
- outfile = os.path.join(result_dir, "accuracy.txt")
- if not os.path.exists(outfile) or (os.stat(outfile).st_size == 0) or env.get("CM_REGENERATE_MEASURE_FILES", False):
- run_cmds.append(CMD)
+ run_cmds.append(CMD)
if os_info['platform'] == 'windows':
diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml
index 7e19560e2..be61dae5c 100644
--- a/script/run-mlperf-inference-app/_cm.yaml
+++ b/script/run-mlperf-inference-app/_cm.yaml
@@ -37,6 +37,7 @@ default_env:
input_mapping:
backend: CM_MLPERF_BACKEND
+ beam_size: GPTJ_BEAM_SIZE
category: CM_MLPERF_SUBMISSION_SYSTEM_TYPE
clean: CM_MLPERF_CLEAN_ALL
compliance: CM_MLPERF_LOADGEN_COMPLIANCE
@@ -179,7 +180,6 @@ variations:
CM_MLPERF_DASHBOARD: 'on'
find-performance:
- default: true
env:
CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes'
CM_MLPERF_LOADGEN_ALL_MODES: 'no'
@@ -254,6 +254,12 @@ variations:
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.0_default
group: benchmark-version
+ r4.1:
+ env:
+ CM_MLPERF_INFERENCE_VERSION: '4.1'
+ CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1_default
+ group: benchmark-version
+
short:
add_deps_recursive:
submission-checker:
@@ -264,6 +270,7 @@ variations:
group: submission-generation-style
performance-and-accuracy:
+ default: true
base:
- all-modes
default_variations:
diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py
index 1e7b13761..861207c29 100644
--- a/script/run-mlperf-inference-app/customize.py
+++ b/script/run-mlperf-inference-app/customize.py
@@ -14,6 +14,7 @@ def preprocess(i):
os_info = i['os_info']
env = i['env']
+
inp = i['input']
state = i['state']
script_path = i['run_script_input']['path']
@@ -176,6 +177,9 @@ def preprocess(i):
del(env['OUTPUT_BASE_DIR'])
state = {}
docker_extra_input = {}
+
+ del(env['CM_HW_NAME'])
+
for k in inp:
if k.startswith("docker_"):
docker_extra_input[k] = inp[k]
@@ -215,6 +219,9 @@ def preprocess(i):
r = cm.access(ii)
if r['return'] > 0:
return r
+ if action == "docker":
+ return {'return': 0} # We run commands interactively inside the docker container
+
if state.get('docker', {}):
del(state['docker'])
@@ -237,8 +244,7 @@ def preprocess(i):
if state.get("cm-mlperf-inference-results"):
#print(state["cm-mlperf-inference-results"])
for sut in state["cm-mlperf-inference-results"]:#only one sut will be there
- # Grigori: that may not work properly since customize may have another Python than MLPerf
- # (for example, if we use virtual env)
+ # Better to do this in a stand alone CM script with proper deps but currently we manage this by modifying the sys path of the python executing CM
import mlperf_utils
print(sut)