From a091782dc693df96a98242c1ac484fd503422dfc Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 29 May 2024 02:49:02 +0530 Subject: [PATCH 01/15] Fix skip_if_env for empty env key --- automation/script/module.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/automation/script/module.py b/automation/script/module.py index 602439123..f6f7fac05 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -4319,14 +4319,13 @@ def enable_or_skip_script(meta, env): for key in meta: meta_key = [str(v).lower() for v in meta[key]] if key in env: - value = str(env[key]).lower() - + value = str(env[key]).lower().strip() if set(meta_key) & set(["yes", "on", "true", "1"]): # Any set value other than false is taken as set - if value not in ["no", "off", "false", "0"]: + if value not in ["no", "off", "false", "0", ""]: continue elif set(meta_key) & set(["no", "off", "false", "0"]): - if value in ["no", "off", "false", "0"]: + if value in ["no", "off", "false", "0", ""]: continue elif value in meta_key: continue From c90c545b4aa926353035a1df1e7c8f494225ca92 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 29 May 2024 02:50:32 +0530 Subject: [PATCH 02/15] Fix skip_if_env for empty env key --- automation/script/module.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/automation/script/module.py b/automation/script/module.py index f6f7fac05..eb3bf4d80 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -4330,7 +4330,7 @@ def enable_or_skip_script(meta, env): elif value in meta_key: continue else: - if set(meta_key) & set(["no", "off", "false", "0"]): + if set(meta_key) & set(["no", "off", "false", "0", ""]): # If key is missing in env, and if the expected value is False, consider it a match continue @@ -4347,15 +4347,15 @@ def any_enable_or_skip_script(meta, env): for key in meta: found = False if key in env: - value = str(env[key]).lower() + value = str(env[key]).lower().strip() meta_key = [str(v).lower() for v in meta[key]] if set(meta_key) & set(["yes", "on", "true", "1"]): - if value not in ["no", "off", "false", "0"]: + if value not in ["no", "off", "false", "0", ""]: found = True - elif set(meta_key) & set(["no", "off", "false", "0"]): - if value in ["no", "off", "false", "0"]: + elif set(meta_key) & set(["no", "off", "false", "0", ""]): + if value in ["no", "off", "false", "0", ""]: found = True elif value in meta_key: found = True From 6656410f648efb89ab099128e768d1f7cca00494 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 29 May 2024 02:34:01 -0700 Subject: [PATCH 03/15] int8 datatype alias added for intel mlperf inference --- script/app-mlperf-inference-intel/_cm.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/script/app-mlperf-inference-intel/_cm.yaml b/script/app-mlperf-inference-intel/_cm.yaml index 9a5848326..ae0dbcade 100644 --- a/script/app-mlperf-inference-intel/_cm.yaml +++ b/script/app-mlperf-inference-intel/_cm.yaml @@ -615,6 +615,9 @@ variations: dataset-preprocessed: tags: _uint8,_rgb8 + int8: + alias: uint8 + int4,gptj_: env: INTEL_GPTJ_INT4: 'yes' From 1a449a8d3c6b997c4e566118021a3c4cef7e446c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 29 May 2024 16:16:27 +0530 Subject: [PATCH 04/15] don't output mlperf inference power efficiency when none --- script/get-mlperf-inference-utils/mlperf_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/script/get-mlperf-inference-utils/mlperf_utils.py b/script/get-mlperf-inference-utils/mlperf_utils.py index 8b5b967a3..4a61d1c4a 100644 --- a/script/get-mlperf-inference-utils/mlperf_utils.py +++ b/script/get-mlperf-inference-utils/mlperf_utils.py @@ -262,6 +262,10 @@ def get_result_table(results): row.append(val) row.append("-") + val1 = results[model][scenario].get('TEST01') + val2 = results[model][scenario].get('TEST05') + val3 = results[model][scenario].get('TEST04') + #if results[model][scenario].get('power','') != '': # row.append(results[model][scenario]['power']) if results[model][scenario].get('power_efficiency','') != '': @@ -269,12 +273,9 @@ def get_result_table(results): if not results[model][scenario].get('power_valid', True): val = "X "+val row.append(val) - else: + elif val1 or val2 or val3: #Don't output unless there are any further column data row.append(None) - val1 = results[model][scenario].get('TEST01') - val2 = results[model][scenario].get('TEST05') - val3 = results[model][scenario].get('TEST04') if val1: row.append(val1) if val2: From 56fb30b9a0cead7fe1b4aff17ab2ef49ee506580 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 29 May 2024 16:00:58 +0100 Subject: [PATCH 05/15] Clean TMP variables from docker env in the run command --- automation/script/module_misc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 0221602e7..bcb6646bb 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1427,6 +1427,11 @@ def dockerfile(i): continue ''' + d_env = i_run_cmd_arc.get('env', {}) + for key in list(d_env.keys()): + if key.startswith("CM_TMP_"): + del(d_env[key]) + # Check if need to update/map/mount inputs and env r = process_inputs({'run_cmd_arc': i_run_cmd_arc, 'docker_settings': docker_settings, @@ -1949,7 +1954,6 @@ def docker(i): 'docker_settings':docker_settings, 'docker_run_cmd_prefix':i.get('docker_run_cmd_prefix','')}) if r['return']>0: return r - run_cmd = r['run_cmd_string'] + ' ' + container_env_string + ' --docker_run_deps ' env['CM_RUN_STATE_DOCKER'] = True From dc14736f3c7b78b8e9935a68361f02d9dcc5e84a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 29 May 2024 20:40:49 +0530 Subject: [PATCH 06/15] Typo fix --- script/app-mlperf-inference/_cm.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index f4434961a..583285139 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -252,7 +252,8 @@ variations: interactive: True extra_run_args: ' --runtime=nvidia --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public - docker:os_version: "20.04" + os: "ubuntu" + os_version: "20.04" deps: - tags: get,mlperf,inference,nvidia,scratch,space - tags: get,nvidia-docker From cb5bfa70aad0f5cfc52c8e5b0425e2a98fa03608 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 29 May 2024 16:48:04 +0100 Subject: [PATCH 07/15] Not execute postdeps for fake_runs --- automation/script/module.py | 25 +++++++++++----------- script/get-mlperf-inference-loadgen/run.sh | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/automation/script/module.py b/automation/script/module.py index eb3bf4d80..aafdbb4fa 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -1165,26 +1165,27 @@ def _run(self, i): - # Check chain of posthook dependencies on other CM scripts. We consider them same as postdeps when - # script is in cache - if verbose: - print (recursion_spaces + ' - Checking posthook dependencies on other CM scripts:') + if not fake_run: + # Check chain of posthook dependencies on other CM scripts. We consider them same as postdeps when + # script is in cache + if verbose: + print (recursion_spaces + ' - Checking posthook dependencies on other CM scripts:') - clean_env_keys_post_deps = meta.get('clean_env_keys_post_deps',[]) + clean_env_keys_post_deps = meta.get('clean_env_keys_post_deps',[]) - r = self._call_run_deps(posthook_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, + r = self._call_run_deps(posthook_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) - if r['return']>0: return r + if r['return']>0: return r - if verbose: - print (recursion_spaces + ' - Checking post dependencies on other CM scripts:') + if verbose: + print (recursion_spaces + ' - Checking post dependencies on other CM scripts:') - # Check chain of post dependencies on other CM scripts - r = self._call_run_deps(post_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, + # Check chain of post dependencies on other CM scripts + r = self._call_run_deps(post_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) - if r['return']>0: return r + if r['return']>0: return r diff --git a/script/get-mlperf-inference-loadgen/run.sh b/script/get-mlperf-inference-loadgen/run.sh index c35ce4bdd..372fddfb7 100644 --- a/script/get-mlperf-inference-loadgen/run.sh +++ b/script/get-mlperf-inference-loadgen/run.sh @@ -45,7 +45,7 @@ MLPERF_INFERENCE_PYTHON_SITE_BASE=${INSTALL_DIR}"/python" cd "${CM_MLPERF_INFERENCE_SOURCE}/loadgen" CFLAGS="-std=c++14 -O3" ${CM_PYTHON_BIN_WITH_PATH} setup.py bdist_wheel -${CM_PYTHON_BIN_WITH_PATH} -m pip install --force-reinstall `ls dist/mlperf_loadgen-*cp3${PYTHON_MINOR_VERSION}*.whl` --target=${MLPERF_INFERENCE_PYTHON_SITE_BASE} +${CM_PYTHON_BIN_WITH_PATH} -m pip install --force-reinstall `ls dist/mlperf_loadgen-*cp3${PYTHON_MINOR_VERSION}*.whl` --target="${MLPERF_INFERENCE_PYTHON_SITE_BASE}" if [ "${?}" != "0" ]; then exit 1; fi # Clean the built wheel From cfbcc12af519aed9403a355be92d3a133e3d6509 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 30 May 2024 03:33:55 +0530 Subject: [PATCH 08/15] Fixes the code version for Intel mlperf inference v3.1 --- script/app-mlperf-inference-intel/_cm.yaml | 1 + script/app-mlperf-inference/_cm.yaml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/script/app-mlperf-inference-intel/_cm.yaml b/script/app-mlperf-inference-intel/_cm.yaml index ae0dbcade..d164a3378 100644 --- a/script/app-mlperf-inference-intel/_cm.yaml +++ b/script/app-mlperf-inference-intel/_cm.yaml @@ -173,6 +173,7 @@ variations: inference-results version: v4.0 v3.1: + group: version env: CM_MLPERF_INFERENCE_CODE_VERSION: "v3.1" adr: diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 583285139..c79967b87 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -1152,6 +1152,8 @@ variations: nvidia-inference-server: version: r3.1 tags: _ctuning + intel-harness: + tags: _v3.1 default_env: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' From f0d54aed4e129cce785063e215e0410881ac4067 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 30 May 2024 15:06:30 +0530 Subject: [PATCH 09/15] Fixes for intel mlperf inference bert --- script/app-mlperf-inference-intel/_cm.yaml | 12 ++++++++++-- .../app-mlperf-inference-intel/run_bert_harness.sh | 3 ++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-intel/_cm.yaml b/script/app-mlperf-inference-intel/_cm.yaml index d164a3378..7860e1123 100644 --- a/script/app-mlperf-inference-intel/_cm.yaml +++ b/script/app-mlperf-inference-intel/_cm.yaml @@ -185,6 +185,7 @@ variations: inference-results version: v3.1 + # Target devices cpu: group: device @@ -686,9 +687,16 @@ variations: default_env: CM_MLPERF_LOADGEN_BATCH_SIZE: 1 - sapphire-rapids.24c,bert-99: + sapphire-rapids.24c,bert_: env: WORKERS_PER_PROC: 1 + sapphire-rapids.112c,bert_,offline: + env: + WORKERS_PER_PROC: 4 + sapphire-rapids.112c,bert_,server: + env: + WORKERS_PER_PROC: 8 + docker: - docker_real_run: False + real_run: False diff --git a/script/app-mlperf-inference-intel/run_bert_harness.sh b/script/app-mlperf-inference-intel/run_bert_harness.sh index 2875fac72..b49783c6f 100644 --- a/script/app-mlperf-inference-intel/run_bert_harness.sh +++ b/script/app-mlperf-inference-intel/run_bert_harness.sh @@ -1,6 +1,7 @@ #!/bin/bash -THREADS_PER_INSTANCE=$(((4 * ${CM_HOST_CPU_THREADS_PER_CORE}) / ${CM_HOST_CPU_SOCKETS})) +WORKERS_PER_PROC=${WORKERS_PER_PROC:-4} +THREADS_PER_INSTANCE=$((( ${WORKERS_PER_PROC} * ${CM_HOST_CPU_THREADS_PER_CORE}) / ${CM_HOST_CPU_SOCKETS})) export LD_PRELOAD=${CONDA_PREFIX}/lib/libjemalloc.so export MALLOC_CONF="oversize_threshold:1,background_thread:true,percpu_arena:percpu,metadata_thp:always,dirty_decay_ms:9000000000,muzzy_decay_ms:9000000000"; From cacf36c357ac4756320f570b562b4cb4a12066c0 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 31 May 2024 20:37:38 +0530 Subject: [PATCH 10/15] Added pytorch base image for reference implementation and cuda device --- script/app-mlperf-inference/_cm.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index c79967b87..a5b4af572 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -900,6 +900,10 @@ variations: add_deps_recursive: mlperf-inference-implementation: tags: _cpu + + cuda,reference: + docker: + base_image: nvcr.io/nvidia/pytorch:24.03-py3 cuda: docker: all_gpus: 'yes' From d47420493e88444784e612e2f13bc573b81fd2ce Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 1 Jun 2024 02:42:52 +0530 Subject: [PATCH 11/15] Added get,docker deps for cm docker script --- automation/script/module_misc.py | 7 ++ script/get-docker/customize.py | 67 +++++++++++++++++++ .../{run-ubuntu.sh => install-ubuntu.sh} | 3 + script/get-docker/run.sh | 3 + 4 files changed, 80 insertions(+) create mode 100644 script/get-docker/customize.py rename script/get-docker/{run-ubuntu.sh => install-ubuntu.sh} (93%) create mode 100644 script/get-docker/run.sh diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index bcb6646bb..577f706d1 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1721,6 +1721,13 @@ def docker(i): if image_repo == '': image_repo = 'cknowledge' + # Host system needs to have docker + r = self_module.cmind.access({'action':'run', + 'automation':'script', + 'tags': "get,docker"}) + if r['return'] > 0: + return r + for artifact in sorted(lst, key = lambda x: x.meta.get('alias','')): meta = artifact.meta diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py new file mode 100644 index 000000000..383eca7cd --- /dev/null +++ b/script/get-docker/customize.py @@ -0,0 +1,67 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + automation = i['automation'] + + recursion_spaces = i['recursion_spaces'] + + file_name = 'docker.exe' if os_info['platform'] == 'windows' else 'docker' + env['FILE_NAME'] = file_name + + if 'CM_DOCKER_BIN_WITH_PATH' not in env: + r = i['automation'].find_artifact({'file_name': file_name, + 'env': env, + 'os_info':os_info, + 'default_path_env_key': 'PATH', + 'detect_version':True, + 'env_path_key':'CM_DOCKER_BIN_WITH_PATH', + 'run_script_input':i['run_script_input'], + 'recursion_spaces':recursion_spaces}) + if r['return'] >0 : + if r['return'] == 16: + if env['CM_HOST_OS_FLAVOR'] == "ubuntu": + run_file_name = "install-ubuntu.sh" + r = automation.run_native_script({'run_script_input':i['run_script_input'], 'env':env, 'script_name':run_file_name}) + if r['return'] >0: return r + else: + return {'return': 1, 'error': 'Please install docker to continue. Once installed you might need to relogin to get permission to run docker'} + else: + return r + + return {'return':0} + +def detect_version(i): + r = i['automation'].parse_version({'match_text': r'Docker version\s*([\d.]+)', + 'group_number': 1, + 'env_key':'CM_DOCKER_VERSION', + 'which_env':i['env']}) + if r['return'] >0: return r + + version = r['version'] + + print (i['recursion_spaces'] + ' Detected version: {}'.format(version)) + return {'return':0, 'version':version} + +def postprocess(i): + env = i['env'] + + r = detect_version(i) + + if r['return'] >0: return r + + version = r['version'] + found_file_path = env['CM_DOCKER_BIN_WITH_PATH'] + + found_path = os.path.dirname(found_file_path) + env['CM_DOCKER_INSTALLED_PATH'] = found_path + env['+PATH'] = [ found_path ] + + env['CM_DOCKER_CACHE_TAGS'] = 'version-'+version + + return {'return':0, 'version': version} diff --git a/script/get-docker/run-ubuntu.sh b/script/get-docker/install-ubuntu.sh similarity index 93% rename from script/get-docker/run-ubuntu.sh rename to script/get-docker/install-ubuntu.sh index 6aafc26aa..b0b6eb3a6 100644 --- a/script/get-docker/run-ubuntu.sh +++ b/script/get-docker/install-ubuntu.sh @@ -35,6 +35,9 @@ cmd="sudo usermod -aG docker $USER" echo "$cmd" eval "$cmd" test $? -eq 0 || exit $? + +echo "Please relogin to the shell so that the new group is effective" +exit 1 #exec newgrp docker #sudo su - $USER diff --git a/script/get-docker/run.sh b/script/get-docker/run.sh new file mode 100644 index 000000000..f7f946a7f --- /dev/null +++ b/script/get-docker/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash +docker --version > tmp-ver.out +test $? -eq 0 || exit 1 From 1b4cc185ff2dec469816d5b44f6e58294ec2cbc3 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 31 May 2024 22:26:44 +0100 Subject: [PATCH 12/15] Cleanup of docker install scripts --- script/get-docker/customize.py | 9 +++------ script/get-docker/install-centos.sh | 13 +++++++++++++ script/get-docker/install.bat | 2 ++ script/get-docker/install.sh | 2 ++ 4 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 script/get-docker/install-centos.sh create mode 100644 script/get-docker/install.bat create mode 100644 script/get-docker/install.sh diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py index 383eca7cd..c4a99f1a2 100644 --- a/script/get-docker/customize.py +++ b/script/get-docker/customize.py @@ -25,12 +25,9 @@ def preprocess(i): 'recursion_spaces':recursion_spaces}) if r['return'] >0 : if r['return'] == 16: - if env['CM_HOST_OS_FLAVOR'] == "ubuntu": - run_file_name = "install-ubuntu.sh" - r = automation.run_native_script({'run_script_input':i['run_script_input'], 'env':env, 'script_name':run_file_name}) - if r['return'] >0: return r - else: - return {'return': 1, 'error': 'Please install docker to continue. Once installed you might need to relogin to get permission to run docker'} + run_file_name = "install" + r = automation.run_native_script({'run_script_input':i['run_script_input'], 'env':env, 'script_name':run_file_name}) + if r['return'] >0: return r else: return r diff --git a/script/get-docker/install-centos.sh b/script/get-docker/install-centos.sh new file mode 100644 index 000000000..46cbbc166 --- /dev/null +++ b/script/get-docker/install-centos.sh @@ -0,0 +1,13 @@ +sudo yum install -y yum-utils +sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo +sudo yum install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +cmd="sudo usermod -aG docker $USER" +echo "$cmd" +eval "$cmd" +test $? -eq 0 || exit $? + +echo "Please relogin to the shell so that the new group is effective" +exit 1 +#exec newgrp docker +#sudo su - $USER diff --git a/script/get-docker/install.bat b/script/get-docker/install.bat new file mode 100644 index 000000000..d6bdb8295 --- /dev/null +++ b/script/get-docker/install.bat @@ -0,0 +1,2 @@ +echo "Please install docker to continue" +exit 1 diff --git a/script/get-docker/install.sh b/script/get-docker/install.sh new file mode 100644 index 000000000..d6bdb8295 --- /dev/null +++ b/script/get-docker/install.sh @@ -0,0 +1,2 @@ +echo "Please install docker to continue" +exit 1 From a8f4c0e6ef98bb1224a2f407851bf0c9fec25bfd Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 1 Jun 2024 03:29:04 +0530 Subject: [PATCH 13/15] Docker run cleanups --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 3 +++ script/app-mlperf-inference/_cm.yaml | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 2d7cdca58..257225f01 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -31,6 +31,9 @@ default_env: CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: reference CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: '' +docker: + real_run: False + # Map script inputs to environment variables input_mapping: count: CM_MLPERF_LOADGEN_QUERY_COUNT diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index a5b4af572..dddbac175 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -319,8 +319,6 @@ variations: real_run: false run: true docker_input_mapping: - imagenet_path: IMAGENET_PATH - gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH dlrm_data_path: DLRM_DATA_PATH intel_gptj_int8_model_path: CM_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH From c67122a023ef51f39ac74b48a5ae86d003896f64 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 1 Jun 2024 03:42:41 +0530 Subject: [PATCH 14/15] Add nvidia,docker deps for docker run --- script/app-mlperf-inference/_cm.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index dddbac175..22e545e65 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -902,9 +902,12 @@ variations: cuda,reference: docker: base_image: nvcr.io/nvidia/pytorch:24.03-py3 + cuda: docker: all_gpus: 'yes' + deps: + - tags: get,nvidia-docker group: device env: From 4ef87a4568d8e440dd2b005f1b65b828963e3904 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 31 May 2024 23:37:43 +0100 Subject: [PATCH 15/15] Cleanups for mobilenet runs --- .../_cm.json | 25 +++++++++- .../customize.py | 50 +++++++++++++------ 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/script/run-mlperf-inference-mobilenet-models/_cm.json b/script/run-mlperf-inference-mobilenet-models/_cm.json index 29a440f19..d0f578087 100644 --- a/script/run-mlperf-inference-mobilenet-models/_cm.json +++ b/script/run-mlperf-inference-mobilenet-models/_cm.json @@ -65,11 +65,14 @@ "CM_MLPERF_SUBMISSION_MODE": "yes" } }, - "populate-readme": { + "performance-and-accuracy": { "group": "run-mode", + "default": "true", "env": { "CM_MLPERF_FIND_PERFORMANCE_MODE": "no", - "CM_MLPERF_POPULATE_README": "yes" + "CM_MLPERF_PERFORMANCE_MODE": "yes", + "CM_MLPERF_ACCURACY_MODE": "yes", + "CM_MLPERF_SUBMISSION_MODE": "no" } }, "all-models": { @@ -86,6 +89,24 @@ "CM_MLPERF_RUN_MOBILENETS": "yes" } }, + "mobilenet-v1": { + "group": "model-selection", + "env": { + "CM_MLPERF_RUN_MOBILENET_V1": "yes" + } + }, + "mobilenet-v2": { + "group": "model-selection", + "env": { + "CM_MLPERF_RUN_MOBILENET_V2": "yes" + } + }, + "mobilenet-v3": { + "group": "model-selection", + "env": { + "CM_MLPERF_RUN_MOBILENET_V3": "yes" + } + }, "efficientnet": { "group": "model-selection", "env": { diff --git a/script/run-mlperf-inference-mobilenet-models/customize.py b/script/run-mlperf-inference-mobilenet-models/customize.py index e14e660d5..ace19a6fd 100644 --- a/script/run-mlperf-inference-mobilenet-models/customize.py +++ b/script/run-mlperf-inference-mobilenet-models/customize.py @@ -21,7 +21,7 @@ def preprocess(i): quiet = (env.get('CM_QUIET', False) == 'yes') verbose = (env.get('CM_VERBOSE', False) == 'yes') - models = { + models_all = { "mobilenet": { "v1": { "multiplier": [ "multiplier-1.0", "multiplier-0.75", "multiplier-0.5", "multiplier-0.25" ], @@ -47,6 +47,22 @@ def preprocess(i): } } } + + models = {} + if env.get('CM_MLPERF_RUN_MOBILENET_V1', '') == "yes": + models['mobilenet'] = {} + models['mobilenet']['v1'] = models_all['mobilenet']['v1'] + elif env.get('CM_MLPERF_RUN_MOBILENET_V2', '') == "yes": + models['mobilenet'] = {} + models['mobilenet']['v2'] = models_all['mobilenet']['v2'] + elif env.get('CM_MLPERF_RUN_MOBILENET_V3', '') == "yes": + models['mobilenet'] = {} + models['mobilenet']['v3'] = models_all['mobilenet']['v3'] + elif env.get('CM_MLPERF_RUN_MOBILENETS', '') == "yes": + models['mobilenet'] = models_all['mobilenet'] + elif env.get('CM_MLPERF_RUN_EFFICIENTNETS', '') == "yes": + models['efficientnet'] = models_all['efficientnet'] + variation_strings = {} for t1 in models: variation_strings[t1] = [] @@ -73,12 +89,12 @@ def preprocess(i): variation_list.append("_"+k3) variation_strings[t1].append(",".join(variation_list)) - if env.get('CM_MLPERF_POPULATE_README','') == "yes": - var="_populate-readme" - execution_mode="valid" - elif env.get('CM_MLPERF_SUBMISSION_MODE','') == "yes": + if env.get('CM_MLPERF_SUBMISSION_MODE','') == "yes": var="_submission" execution_mode="valid" + elif env.get('CM_MLPERF_ACCURACY_MODE','') == "yes" and env.get('CM_MLPERF_PERFORMANCE_MODE','') == "yes": + var="_full,_performance-and-accuracy" + execution_mode="valid" elif env.get('CM_MLPERF_ACCURACY_MODE','') == "yes": var="_full,_accuracy-only" execution_mode="valid" @@ -151,22 +167,26 @@ def preprocess(i): if env.get('CM_MLPERF_INFERENCE_SUBMISSION_DIR', '') != '': cm_input['submission_dir'] = env['CM_MLPERF_INFERENCE_SUBMISSION_DIR'] - if env.get('CM_MLPERF_ACCURACY_MODE','') == "yes": - cm_input['mode'] = 'accuracy' - - if env.get('CM_MLPERF_PERFORMANCE_MODE','') == "yes": - cm_input['mode'] = 'performance' - if env.get('CM_MLPERF_FIND_PERFORMANCE_MODE','') == "yes" and env.get('CM_MLPERF_NO_RERUN','') != 'yes': cm_input['rerun'] = True if env.get('CM_MLPERF_POWER','') == "yes": cm_input['power'] = 'yes' - print(cm_input) - r = cmind.access(cm_input) - if r['return'] > 0: - return r + if env.get('CM_MLPERF_ACCURACY_MODE','') == "yes": + cm_input['mode'] = 'accuracy' + print(cm_input) + r = cmind.access(cm_input) + if r['return'] > 0: + return r + + if env.get('CM_MLPERF_PERFORMANCE_MODE','') == "yes": + cm_input['mode'] = 'performance' + + print(cm_input) + r = cmind.access(cm_input) + if r['return'] > 0: + return r if env.get('CM_TEST_ONE_RUN', '') == "yes": return {'return':0}