diff --git a/automation/script/README-specs.md b/automation/script/README-specs.md index 58526d168..4b40feeba 100644 --- a/automation/script/README-specs.md +++ b/automation/script/README-specs.md @@ -27,7 +27,7 @@ See the [automatically generated catalog](https://github.com/mlcommons/ck/blob/m When we run a CM script we can also pass inputs to it and any input added in `input_mapping` dictionary inside `_cm.json` gets converted to the corresponding `ENV` variable. ### Conditional execution of any `deps`, `post_deps` -We can use `skip_if_env` dictionary inside any `deps`, `prehook_deps`, `posthook_deps` or `post_deps` to make its executional conditional +We can use `skip_if_env` dictionary inside any `deps`, `prehook_deps`, `posthook_deps` or `post_deps` to make its execution conditional ### Versions We can specify any specific version of a script using `version`. `version_max` and `version_min` are also possible options. @@ -73,9 +73,7 @@ Sometimes it is difficult to add all variations needed for a script like say `ba ### Script workflow (env, deps, native scripts) -![](assets/scripts-workflow.png) + - - -© 2022-23 [MLCommons](https://mlcommons.org)
+© 2022-24 [MLCommons](https://mlcommons.org)
diff --git a/automation/script/module.py b/automation/script/module.py index eee99803c..e79b33a3e 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -4314,12 +4314,12 @@ def enable_or_skip_script(meta, env): (AND function) """ for key in meta: + meta_key = [str(v).lower() for v in meta[key]] if key in env: value = str(env[key]).lower() - meta_key = [str(v).lower() for v in meta[key]] - if set(meta_key) & set(["yes", "on", "true", "1"]): + # Any set value other than false is taken as set if value not in ["no", "off", "false", "0"]: continue elif set(meta_key) & set(["no", "off", "false", "0"]): @@ -4327,6 +4327,11 @@ def enable_or_skip_script(meta, env): continue elif value in meta_key: continue + else: + if set(meta_key) & set(["no", "off", "false", "0"]): + # If key is missing in env, and if the expected value is False, consider it a match + continue + return False return True diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 91baf5cb6..e76a68fed 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1873,7 +1873,7 @@ def docker(i): dockerfilename_suffix = dockerfilename_suffix[len(dockerfilename_suffix) - 1] - cm_repo=i.get('docker_cm_repo', 'mlcommons@cm4mlops') + cm_repo=i.get('docker_cm_repo', docker_settings.get('cm_repo', 'mlcommons@cm4mlops')) docker_path = i.get('docker_path', '').strip() if docker_path == '': diff --git a/script/app-mlperf-inference-intel/_cm.yaml b/script/app-mlperf-inference-intel/_cm.yaml index b33c0e8b5..ce70f07d6 100644 --- a/script/app-mlperf-inference-intel/_cm.yaml +++ b/script/app-mlperf-inference-intel/_cm.yaml @@ -293,7 +293,7 @@ variations: adr: conda-python: version: "3.9" - - tags: install,llvm,src,_tag.llvmorg-16.0.6,_clang,_release,_for-intel-mlperf-inference-v3.1-gptj + - tags: install,llvm,src,_for-intel-mlperf-inference-v3.1-gptj - names: - conda-package - ncurses diff --git a/script/app-mlperf-inference-intel/run_gptj_harness.sh b/script/app-mlperf-inference-intel/run_gptj_harness.sh index f006f673b..43e57bbb0 100644 --- a/script/app-mlperf-inference-intel/run_gptj_harness.sh +++ b/script/app-mlperf-inference-intel/run_gptj_harness.sh @@ -1,6 +1,8 @@ #!/bin/bash export PATH=${CM_CONDA_BIN_PATH}:$PATH +KMP_BLOCKTIME=${KMP_BLOCKTIME:-10} + export KMP_BLOCKTIME=${KMP_BLOCKTIME} export KMP_AFFINITY=granularity=fine,compact,1,0 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so @@ -9,11 +11,11 @@ export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so export num_physical_cores=`lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l` num_numa=$(numactl --hardware|grep available|awk -F' ' '{ print $2 }') -NUM_PROC=${NUM_PROC:-num_numa} +NUM_PROC=${NUM_PROC:-$num_numa} CPUS_PER_PROC=$((num_physical_cores/num_numa)) -WORKERS_PER_PROC=${WORKERS_PER_PROC} +WORKERS_PER_PROC=${WORKERS_PER_PROC:-1} TOTAL_SAMPLE_COUNT=13368 -BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE} +BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE:-8} TIMESTAMP=$(date +%m-%d-%H-%M) HOSTNAME=$(hostname) #OUTPUT_DIR=offline-output-${HOSTNAME}-batch-${BATCH_SIZE}-procs-${NUM_PROC}-ins-per-proc-${WORKERS_PER_PROC}-${TIMESTAMP} @@ -28,7 +30,7 @@ USER_CONF="${CM_MLPERF_USER_CONF}" cmd="python runner.py --workload-name gptj \ - --scenario ${${CM_MLPERF_LOADGEN_SCENARIO}} \ + --scenario ${CM_MLPERF_LOADGEN_SCENARIO} \ --mode ${LOADGEN_MODE} \ --num-proc ${NUM_PROC} \ --cpus-per-proc ${CPUS_PER_PROC} \ diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 67e60b166..b9fc789f8 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -104,6 +104,8 @@ deps: # Detect CUDA if required - tags: get,cuda,_cudnn + names: + - cuda enable_if_env: CM_MLPERF_DEVICE: - gpu diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 23a738453..ee97038ff 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -177,7 +177,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j") cmd = env['CM_PYTHON_BIN_WITH_PATH'] + \ - " main.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + " run.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \ scenario_extra_options + mode_extra_options + dataset_options cmd = cmd.replace("--count", "--max_examples") @@ -188,7 +188,6 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio gpu_options = "" cmd = cmd + gpu_options env['LOG_PATH'] = env['CM_MLPERF_OUTPUT_DIR'] - return cmd, env['RUN_DIR'] if env['CM_MODEL'] in [ "resnet50", "retinanet" ]: diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml index 19e789ae0..337c64375 100644 --- a/script/app-mlperf-inference-nvidia/_cm.yaml +++ b/script/app-mlperf-inference-nvidia/_cm.yaml @@ -261,7 +261,10 @@ deps: CM_MLPERF_NVIDIA_HARNESS_RUN_MODE: - run_harness - - tags: get,generic-python-lib,_package.nvmitten,_path./opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl + - tags: get,generic-python-lib,_package.nvmitten + update_tags_from_env_with_prefix: + _path.: + - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH enable_if_env: CM_RUN_STATE_DOCKER: - 'yes' @@ -338,6 +341,7 @@ variations: CM_ML_MODEL_WEIGHTS_DATA_TYPE: int8 deps: - tags: get,generic-python-lib,_onnx-graphsurgeon + version: 0.3.27 - tags: get,generic-python-lib,_package.onnx version: 1.13.1 diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 681620c2f..a4bb7b6a3 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -182,6 +182,8 @@ variations: tags: _float32 librispeech-accuracy-script: tags: _int32 + cnndm-accuracy-script: + tags: _int32 env: CM_MLPERF_PYTHON: 'yes' CM_MLPERF_IMPLEMENTATION: mlcommons_python @@ -189,6 +191,7 @@ variations: CM_IMAGENET_ACCURACY_DTYPE: float32 CM_OPENIMAGES_ACCURACY_DTYPE: float32 CM_LIBRISPEECH_ACCURACY_DTYPE: float32 + CM_CNNDM_ACCURACY_DTYPE: int32 prehook_deps: - names: - python-reference-mlperf-inference @@ -235,6 +238,10 @@ variations: default_variations: backend: onnxruntime + nvidia-original,r4.1_default: + docker: + base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + nvidia-original: docker: interactive: True @@ -430,7 +437,7 @@ variations: tags: run,accuracy,mlperf,_imagenet docker: deps: - - tags: get,dataset,imagenet,original + - tags: get,dataset,imagenet,validation,original names: - imagenet-original - dataset-original @@ -1142,6 +1149,25 @@ variations: default_env: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl' + + r4.1_default: + group: + reproducibility + add_deps_recursive: + nvidia-inference-common-code: + version: r4.0 + tags: _go + nvidia-inference-server: + version: r4.0 + tags: _go + default_env: + CM_SKIP_SYS_UTILS: 'yes' + CM_REGENERATE_MEASURE_FILES: 'yes' + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' + invalid_variation_combinations: - @@ -1250,10 +1276,10 @@ docker: shm_size: '32gb' interactive: True extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' - docker_os: ubuntu - docker_cm_repo: gateoverflow@cm4mlops - docker_real_run: False - docker_os_version: '22.04' + os: ubuntu + cm_repo: gateoverflow@cm4mlops + real_run: False + os_version: '22.04' docker_input_mapping: imagenet_path: IMAGENET_PATH gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index 23a7f75ce..183290828 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -46,8 +46,8 @@ def postprocess(i): env['CMD'] = '' state = i['state'] - if env.get('CM_MLPERF_USER_CONF', '') == '': - return {'return': 0} + #if env.get('CM_MLPERF_USER_CONF', '') == '': + # return {'return': 0} output_dir = env['CM_MLPERF_OUTPUT_DIR'] mode = env['CM_MLPERF_LOADGEN_MODE'] @@ -254,16 +254,16 @@ def postprocess(i): if env.get('CM_HOST_SYSTEM_NAME','')!='': host_info['system_name']=env['CM_HOST_SYSTEM_NAME'] # Check CM automation repository - repo_name = 'mlcommons@ck' + repo_name = 'mlcommons@cm4mlops' repo_hash = '' - r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@ck,a4705959af8e447a'}) + r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@cm4mlops,9e97bb72b0474657'}) if r['return']==0 and len(r['list'])==1: repo_path = r['list'][0].path if os.path.isdir(repo_path): repo_name = os.path.basename(repo_path) - # Check Grigori's dev - if repo_name == 'ck': repo_name = 'ctuning@mlcommons-ck' + # Check dev + if repo_name == 'cm4mlops': repo_name = 'gateoverflow@cm4mlops' r = cm.access({'action':'system', 'automation':'utils', @@ -275,54 +275,6 @@ def postprocess(i): host_info['cm_repo_name'] = repo_name host_info['cm_repo_git_hash'] = repo_hash - # Check a few important MLCommons repos - xhashes = [] - md_xhashes = '' - - for x in [('get,git,inference', ['inference']), - ('get,git,mlperf,power', ['power-dev'])]: - xtags = x[0] - xdirs = x[1] - - rx = cm.access({'action':'find', 'automation':'cache', 'tags':xtags}) - if rx['return']>0: return rx - for cache in rx['list']: - xurl = '' - xhash = '' - - for xd in xdirs: - xpath = os.path.join(cache.path, xd) - if os.path.isdir(xpath): - r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git rev-parse HEAD'}) - if r['return'] == 0 and r['ret'] == 0: - xhash = r['stdout'] - - r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git config --get remote.origin.url'}) - if r['return'] == 0 and r['ret'] == 0: - xurl = r['stdout'] - - if xurl!='' and xhash!='': - break - - if xurl!='' and xhash!='': - # Check if doesn't exist - found = False - - for xh in xhashes: - if xh['mlcommons_git_url'] == xurl and xh['mlcommons_git_hash'] == xhash: - found = True - break - - if not found: - xhashes.append({'mlcommons_git_url': xurl, - 'mlcommons_git_hash': xhash, - 'cm_cache_tags':cache.meta['tags']}) - - md_xhashes +='* MLCommons Git {} ({})\n'.format(xurl, xhash) - - if len(xhashes)>0: - host_info['mlcommons_repos'] = xhashes - with open ("cm-host-info.json", "w") as fp: fp.write(json.dumps(host_info, indent=2)+'\n') @@ -336,10 +288,10 @@ def postprocess(i): readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/ck).\n\n" - readme_init+= "*Check [CM MLPerf docs](https://github.com/mlcommons/ck/tree/master/docs/mlperf) for more details.*\n\n" + readme_init+= "*Check [CM MLPerf docs](https://mlcommons.github.io/inference) for more details.*\n\n" - readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n{}\n\n".format(platform.platform(), - platform.processor(), sys.version, cm.__version__, md_xhashes) + readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), + platform.processor(), sys.version, cm.__version__) x = repo_name if repo_hash!='': x+=' --checkout='+str(repo_hash) diff --git a/script/build-mlperf-inference-server-nvidia/_cm.yaml b/script/build-mlperf-inference-server-nvidia/_cm.yaml index 149941282..359614e34 100644 --- a/script/build-mlperf-inference-server-nvidia/_cm.yaml +++ b/script/build-mlperf-inference-server-nvidia/_cm.yaml @@ -164,6 +164,11 @@ variations: add_deps_recursive: nvidia-inference-common-code: tags: _ctuning + go: + group: code + add_deps_recursive: + nvidia-inference-common-code: + tags: _go nvidia-only: group: code add_deps_recursive: @@ -180,6 +185,23 @@ variations: nvidia-inference-common-code: tags: _mlcommons + r4.0: + group: version + add_deps_recursive: + nvidia-inference-common-code: + version: r4.0 + nvidia-scratch-space: + tags: _version.4_1 + deps: + - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0 + names: + - pytorch + - torch + - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0 + names: + - pytorchvision + - torchvision + - tags: install,nccl,libs,_cuda versions: r2.1: @@ -213,16 +235,33 @@ versions: - torchvision - tags: install,nccl,libs,_cuda + r4.0: + add_deps_recursive: + nvidia-inference-common-code: + version: r4.0 + nvidia-scratch-space: + tags: _version.4_1 + deps: + - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0 + names: + - pytorch + - torch + - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0 + names: + - pytorchvision + - torchvision + - tags: install,nccl,libs,_cuda + docker: skip_run_cmd: 'no' all_gpus: 'yes' shm_size: '32gb' extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' - docker_os: ubuntu + os: ubuntu cm_repo_flags1: ' --branch=mlperf-inference' - docker_real_run: False + real_run: False interactive: True - docker_os_version: '20.04' + os_version: '20.04' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public docker_input_mapping: imagenet_path: IMAGENET_PATH diff --git a/script/get-cache-dir/_cm.json b/script/get-cache-dir/_cm.json new file mode 100644 index 000000000..b5fd30277 --- /dev/null +++ b/script/get-cache-dir/_cm.json @@ -0,0 +1,36 @@ +{ + "alias": "get-cache-dir", + "automation_alias": "script", + "automation_uid": "5b4e0237da074764", + "cache": true, + "category": "CM Interface", + "deps": [], + "docker": { + "run": false + }, + "input_description": {}, + "new_env_keys": [ + "CM_CACHE_DIR", + "<<>>" + ], + "new_state_keys": [], + "post_deps": [], + "posthook_deps": [], + "prehook_deps": [], + "tags": [ + "get", + "cache", + "dir", + "directory" + ], + "uid": "48f4622e059b45ce", + "variations": { + "name.#": { + "env": { + "CM_CACHE_DIR_NAME": "#" + } + } + }, + "versions": { + } +} diff --git a/script/get-cache-dir/customize.py b/script/get-cache-dir/customize.py new file mode 100644 index 000000000..6e8a76460 --- /dev/null +++ b/script/get-cache-dir/customize.py @@ -0,0 +1,29 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + return {'return':0} + +def postprocess(i): + + env = i['env'] + + cache_dir = os.getcwd() + if env.get('CM_CACHE_DIR_ENV_NAME', '') != '': + env[env['CM_CACHE_DIR_ENV_NAME']] = cache_dir + + env['CM_CACHE_DIR'] = cache_dir + env['CM_GET_DEPENDENT_CACHED_PATH'] = cache_dir + + return {'return':0} diff --git a/script/get-mlperf-inference-nvidia-common-code/_cm.json b/script/get-mlperf-inference-nvidia-common-code/_cm.json index 46f12477e..7b47209ca 100644 --- a/script/get-mlperf-inference-nvidia-common-code/_cm.json +++ b/script/get-mlperf-inference-nvidia-common-code/_cm.json @@ -39,6 +39,9 @@ }, "nvidia-only": { "group": "repo-owner" + }, + "go": { + "group": "repo-owner" } }, "versions": { @@ -62,6 +65,13 @@ "version": "v3.1" } } + }, + "r4.0": { + "add_deps_recursive": { + "mlperf-inference-results": { + "version": "v4.0" + } + } } } } diff --git a/script/get-mlperf-inference-results/_cm.json b/script/get-mlperf-inference-results/_cm.json index 46feecd04..f4870d57f 100644 --- a/script/get-mlperf-inference-results/_cm.json +++ b/script/get-mlperf-inference-results/_cm.json @@ -57,6 +57,12 @@ "CM_MLPERF_INFERENCE_RESULTS_VERSION_NAME": "v3.1", "CM_GIT_URL": "https://github.com/<<>>/inference_results_v3.1.git" } + }, + "v4.0": { + "env": { + "CM_MLPERF_INFERENCE_RESULTS_VERSION_NAME": "v4.0", + "CM_GIT_URL": "https://github.com/<<>>/inference_results_v4.0.git" + } } }, "variations": { @@ -85,6 +91,12 @@ "GITHUB_REPO_OWNER": "GATEOverflow", "NVIDIA_ONLY": "yes" } + }, + "go": { + "group": "source-repo", + "env": { + "GITHUB_REPO_OWNER": "GATEOverflow" + } } } } diff --git a/script/get-mlperf-inference-src/_cm.json b/script/get-mlperf-inference-src/_cm.json index 4e4c4806d..0c1bac0c8 100644 --- a/script/get-mlperf-inference-src/_cm.json +++ b/script/get-mlperf-inference-src/_cm.json @@ -68,7 +68,7 @@ } } ], - "print_env_at_the_end": { + "print_env_at_the_end_disabled": { "CM_MLPERF_INFERENCE_CONF_PATH": "Path to the MLPerf inference benchmark configuration file", "CM_MLPERF_INFERENCE_SOURCE": "Path to MLPerf inference benchmark sources" }, diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py index 7916a1bde..c42db0263 100644 --- a/script/get-mlperf-inference-src/customize.py +++ b/script/get-mlperf-inference-src/customize.py @@ -82,7 +82,7 @@ def postprocess(i): if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '': env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH'] - return {'return':0} + return {'return':0, 'version': env['CM_VERSION']} def get_valid_models(mlperf_version, mlperf_path): diff --git a/script/get-mlperf-inference-sut-configs/_cm.json b/script/get-mlperf-inference-sut-configs/_cm.json index 7ad8376f3..012aca664 100644 --- a/script/get-mlperf-inference-sut-configs/_cm.json +++ b/script/get-mlperf-inference-sut-configs/_cm.json @@ -2,7 +2,7 @@ "alias": "get-mlperf-inference-sut-configs", "automation_alias": "script", "automation_uid": "5b4e0237da074764", - "cache": true, + "cache": false, "category": "MLPerf benchmark support", "new_env_keys": [ "CM_HW_*", @@ -28,23 +28,16 @@ "configs", "sut-configs" ], - "uid": "c2fbf72009e2445b", - "variations": { - "octoml": { + "deps": [ + { + "tags": "get,cache,dir,_name.mlperf-inference-sut-configs", "env": { - "CM_SUT_USE_EXTERNAL_CONFIG_REPO": "yes", - "CM_GIT_CHECKOUT_FOLDER": "configs", - "CM_GIT_URL": "https://github.com/arjunsuresh/mlperf-inference-configs" + "CM_CACHE_DIR_ENV_NAME": "CM_SUT_CONFIGS_PATH" }, - "prehook_deps": [ - { - "force_env_keys": [ - "CM_GIT_URL", - "CM_GIT_CHECKOUT_*" - ], - "tags": "get,git,repo,_repo.mlperf_inference_configs_octoml" - } - ] + "extra_cache_tags": "mlperf,inference,sut,configs" } + ], + "uid": "c2fbf72009e2445b", + "variations": { } } diff --git a/script/get-mlperf-inference-sut-configs/customize.py b/script/get-mlperf-inference-sut-configs/customize.py index f074ed30b..8bca2a401 100644 --- a/script/get-mlperf-inference-sut-configs/customize.py +++ b/script/get-mlperf-inference-sut-configs/customize.py @@ -36,7 +36,7 @@ def postprocess(i): if env.get('CM_SUT_NAME', '') == '': env['CM_SUT_NAME'] = env['CM_HW_NAME'] + "-" + implementation_string + "-" + device + "-" + backend + "-" + backend_version + "-" + run_config_string - if env.get('CM_SUT_CONFIGS_PATH',''): + if env.get('CM_SUT_CONFIGS_PATH','') != '': path = env['CM_SUT_CONFIGS_PATH'] elif env.get('CM_SUT_USE_EXTERNAL_CONFIG_REPO', '') == "yes": path = env.get('CM_GIT_CHECKOUT_PATH') diff --git a/script/get-mlperf-inference-sut-description/_cm.json b/script/get-mlperf-inference-sut-description/_cm.json index 4c3f998e5..a160722c2 100644 --- a/script/get-mlperf-inference-sut-description/_cm.json +++ b/script/get-mlperf-inference-sut-description/_cm.json @@ -2,7 +2,7 @@ "alias": "get-mlperf-inference-sut-description", "automation_alias": "script", "automation_uid": "5b4e0237da074764", - "cache": true, + "cache": false, "category": "MLPerf benchmark support", "deps": [ { @@ -46,6 +46,13 @@ }, { "tags": "get,generic-python-lib,_package.dmiparser" + }, + { + "tags": "get,cache,dir,_name.mlperf-inference-sut-descriptions", + "extra_cache_tags": "mlperf,inference,sut,descriptions", + "env": { + "CM_CACHE_DIR_ENV_NAME": "CM_MLPERF_INFERENCE_SUT_DESC_PATH" + } } ], "default_env": { diff --git a/script/get-mlperf-inference-sut-description/customize.py b/script/get-mlperf-inference-sut-description/customize.py index cd0c2f754..71636941f 100644 --- a/script/get-mlperf-inference-sut-description/customize.py +++ b/script/get-mlperf-inference-sut-description/customize.py @@ -33,7 +33,9 @@ def preprocess(i): sut = hw_name + sut_suffix script_path = i['run_script_input']['path'] - sut_path = os.path.join(os.getcwd(), "suts", sut + ".json") + sut_desc_path=env['CM_MLPERF_INFERENCE_SUT_DESC_PATH'] + + sut_path = os.path.join(sut_desc_path, "suts", sut + ".json") if os.path.exists(sut_path) and env.get('CM_SUT_DESC_CACHE', '') == "yes": print(f"Reusing SUT description file {sut}") state['CM_SUT_META'] = json.load(open(sut_path)) diff --git a/script/get-mlperf-power-dev/customize.py b/script/get-mlperf-power-dev/customize.py index 2af085d74..50afb3ba4 100644 --- a/script/get-mlperf-power-dev/customize.py +++ b/script/get-mlperf-power-dev/customize.py @@ -18,4 +18,4 @@ def postprocess(i): if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '': env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH'] - return {'return':0} + return {'return':0, 'version': env['CM_VERSION']} diff --git a/script/get-preprocessed-dataset-criteo/preprocess_multihot.sh b/script/get-preprocessed-dataset-criteo/preprocess_multihot.sh new file mode 100644 index 000000000..058cd76ee --- /dev/null +++ b/script/get-preprocessed-dataset-criteo/preprocess_multihot.sh @@ -0,0 +1,9 @@ +#!/bin/bash +cd ${CM_MLPERF_TRAINING_SOURCE}/recommendation_v2_torchrec_dlrm/ +${CM_PYTHON_BIN_WITH_PATH} materialize_synthetic_multihot_dataset.py \ + --in_memory_binary_criteo_path $PREPROCESSED_CRITEO_1TB_CLICK_LOGS_DATASET_PATH \ + --output_path $MATERIALIZED_DATASET_PATH \ + --num_embeddings_per_feature 40000000,39060,17295,7424,20265,3,7122,1543,63,40000000,3067956,405282,10,2209,11938,155,4,976,14,40000000,40000000,40000000,590152,12973,108,36 \ + --multi_hot_sizes 3,2,1,2,6,1,1,1,1,7,3,8,1,6,9,5,1,1,1,12,100,27,10,3,1,1 \ + --multi_hot_distribution_type uniform +test $? -eq 0 || exit $? diff --git a/script/install-ipex-from-src/_cm.json b/script/install-ipex-from-src/_cm.json index f9774e143..f7bb3d675 100644 --- a/script/install-ipex-from-src/_cm.json +++ b/script/install-ipex-from-src/_cm.json @@ -34,6 +34,9 @@ }, "tags": "get,pytorch,from.src" }, + { + "tags": "get,generic,conda-package,_package.ninja" + }, { "env": { "CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_IPEX_SRC_REPO_PATH" @@ -123,6 +126,20 @@ ], "tags": "get,generic,conda-package,_package.setuptools,_source.conda-forge" }, + { + "names": [ + "conda-package", + "typing-extensions" + ], + "tags": "get,generic,conda-package,_package.typing-extensions,_source.conda-forge" + }, + { + "names": [ + "conda-package", + "sympy" + ], + "tags": "get,generic,conda-package,_package.sympy,_source.conda-forge" + }, { "tags": "install,llvm,src,_for-intel-mlperf-inference-v3.1-gptj" } diff --git a/script/install-llvm-src/_cm.json b/script/install-llvm-src/_cm.json index 3ae795695..42e7e20df 100644 --- a/script/install-llvm-src/_cm.json +++ b/script/install-llvm-src/_cm.json @@ -281,6 +281,20 @@ "+ CXXFLAGS": [ "-Wno-nonnull", "-Wno-maybe-uninitialized", "-Wno-uninitialized", "-Wno-free-nonheap-object" ] } }, + { + "names": [ + "conda-package", + "typing-extensions" + ], + "tags": "get,generic,conda-package,_package.typing-extensions,_source.conda-forge" + }, + { + "names": [ + "conda-package", + "sympy" + ], + "tags": "get,generic,conda-package,_package.sympy,_source.conda-forge" + }, { "tags": "get,generic-python-lib,_custom-python,_package.setuptools", "env": { diff --git a/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh b/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh index df23aa3e3..30b612b2b 100644 --- a/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh +++ b/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh @@ -1,11 +1,15 @@ #!/bin/bash export PATH=${CM_CONDA_BIN_PATH}:${PATH} -export ABI=$(python -c "import torch; print(int(torch._C._GLIBCXX_USE_CXX11_ABI))") +#export LD_LIBRARY_PATH=${CM_CONDA_LIB_PATH}:${LD_LIBRARY_PATH} +ABI=$(python -c "import torch; print(int(torch._C._GLIBCXX_USE_CXX11_ABI))") +test $? -eq 0 || exit $? +export ABI=$ABI mkdir -p llvm-project && cd llvm-project wget -nc https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.6/cmake-16.0.6.src.tar.xz wget -nc https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.6/llvm-16.0.6.src.tar.xz tar -xf cmake-16.0.6.src.tar.xz +test $? -eq 0 || exit $? mv cmake-16.0.6.src cmake tar -xf llvm-16.0.6.src.tar.xz mv llvm-16.0.6.src llvm @@ -13,9 +17,15 @@ rm -rf build mkdir -p build cd build export DEB_BUILD_MAINT_OPTIONS=hardening=-format +export CC=${CM_C_COMPILER_WITH_PATH} +export CXX=${CM_CXX_COMPILER_WITH_PATH} cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=${ABI}" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_BUILD_LLVM_DYLIB=ON -DLLVM_INCLUDE_BENCHMARKS=OFF ../llvm/ +test $? -eq 0 || exit $? cmake --build . -j $(nproc) +test $? -eq 0 || exit $? export LLVM_ROOT=$CONDA_PREFIX cmake -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_SHARED_LINKER_FLAGS="-L$CONDA_PREFIX -Wl,-rpath,$CONDA_PREFIX" -P cmake_install.cmake -ln -sf ${LLVM_ROOT}/bin/llvm-config ${LLVM_ROOT}/bin/llvm-config-13 +test $? -eq 0 || exit $? +ln -sf ${LLVM_ROOT}/bin/llvm-config ${LLVM_ROOT}/bin/llvm-config-13 +test $? -eq 0 || exit $? diff --git a/script/install-pytorch-from-src/_cm.json b/script/install-pytorch-from-src/_cm.json index 057dbe681..2bdd46e68 100644 --- a/script/install-pytorch-from-src/_cm.json +++ b/script/install-pytorch-from-src/_cm.json @@ -220,6 +220,23 @@ } ] }, + "for-nvidia-mlperf-inference-v4.0": { + "base": [ + "sha.32f93b1", + "cuda" + ], + "deps": [ + { + "tags": "get,cmake", + "version_min": "3.25.0" + } + ], + "ad": { + "pytorch-src-repo": { + "tags": "_no-recurse-submodules,_full-history" + } + } + }, "for-nvidia-mlperf-inference-v3.1": { "base": [ "sha.b5021ba9", diff --git a/script/install-torchvision-from-src/_cm.json b/script/install-torchvision-from-src/_cm.json index a474bb484..34965bc60 100644 --- a/script/install-torchvision-from-src/_cm.json +++ b/script/install-torchvision-from-src/_cm.json @@ -94,6 +94,9 @@ "TORCH_CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=1" } }, + "for-nvidia-mlperf-inference-v4.0": { + "alias": "for-nvidia-mlperf-inference-v3.1" + }, "for-nvidia-mlperf-inference-v3.1": { "base": [ "sha.657027f3", @@ -102,7 +105,7 @@ "deps": [ { "tags": "install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1" - } + } ], "env": { } diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index f7b13c16d..895227b44 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -25,7 +25,7 @@ def preprocess(i): results_dir_split = results_dir.split(xsep) dataset = env['CM_DATASET'] - regenerate_accuracy_file = env.get('CM_MLPERF_REGENERATE_ACCURACY_FILE', False) + regenerate_accuracy_file = env.get('CM_MLPERF_REGENERATE_ACCURACY_FILE', env.get('CM_RERUN', False)) for result_dir in results_dir_split: @@ -105,9 +105,7 @@ def preprocess(i): else: return {'return': 1, 'error': 'Unsupported dataset'} - outfile = os.path.join(result_dir, "accuracy.txt") - if not os.path.exists(outfile) or (os.stat(outfile).st_size == 0) or env.get("CM_REGENERATE_MEASURE_FILES", False): - run_cmds.append(CMD) + run_cmds.append(CMD) if os_info['platform'] == 'windows': diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 7e19560e2..be61dae5c 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -37,6 +37,7 @@ default_env: input_mapping: backend: CM_MLPERF_BACKEND + beam_size: GPTJ_BEAM_SIZE category: CM_MLPERF_SUBMISSION_SYSTEM_TYPE clean: CM_MLPERF_CLEAN_ALL compliance: CM_MLPERF_LOADGEN_COMPLIANCE @@ -179,7 +180,6 @@ variations: CM_MLPERF_DASHBOARD: 'on' find-performance: - default: true env: CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes' CM_MLPERF_LOADGEN_ALL_MODES: 'no' @@ -254,6 +254,12 @@ variations: CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.0_default group: benchmark-version + r4.1: + env: + CM_MLPERF_INFERENCE_VERSION: '4.1' + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1_default + group: benchmark-version + short: add_deps_recursive: submission-checker: @@ -264,6 +270,7 @@ variations: group: submission-generation-style performance-and-accuracy: + default: true base: - all-modes default_variations: diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py index 1e7b13761..861207c29 100644 --- a/script/run-mlperf-inference-app/customize.py +++ b/script/run-mlperf-inference-app/customize.py @@ -14,6 +14,7 @@ def preprocess(i): os_info = i['os_info'] env = i['env'] + inp = i['input'] state = i['state'] script_path = i['run_script_input']['path'] @@ -176,6 +177,9 @@ def preprocess(i): del(env['OUTPUT_BASE_DIR']) state = {} docker_extra_input = {} + + del(env['CM_HW_NAME']) + for k in inp: if k.startswith("docker_"): docker_extra_input[k] = inp[k] @@ -215,6 +219,9 @@ def preprocess(i): r = cm.access(ii) if r['return'] > 0: return r + if action == "docker": + return {'return': 0} # We run commands interactively inside the docker container + if state.get('docker', {}): del(state['docker']) @@ -237,8 +244,7 @@ def preprocess(i): if state.get("cm-mlperf-inference-results"): #print(state["cm-mlperf-inference-results"]) for sut in state["cm-mlperf-inference-results"]:#only one sut will be there - # Grigori: that may not work properly since customize may have another Python than MLPerf - # (for example, if we use virtual env) + # Better to do this in a stand alone CM script with proper deps but currently we manage this by modifying the sys path of the python executing CM import mlperf_utils print(sut)