diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 2db29879b..5bc3466b8 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1889,6 +1889,8 @@ def docker(i): all_gpus = i.get('docker_all_gpus', docker_settings.get('all_gpus')) + num_gpus = i.get('docker_num_gpus', docker_settings.get('num_gpus')) + device = i.get('docker_device', docker_settings.get('device')) r = check_gh_token(i, docker_settings, quiet) @@ -1983,6 +1985,9 @@ def docker(i): if all_gpus: cm_docker_input['all_gpus'] = True + if num_gpus: + cm_docker_input['num_gpus'] = str(num_gpus) + if device: cm_docker_input['device'] = device diff --git a/automation/utils/module_cfg.py b/automation/utils/module_cfg.py index 94636bdb6..145c388f2 100644 --- a/automation/utils/module_cfg.py +++ b/automation/utils/module_cfg.py @@ -282,14 +282,19 @@ def select_cfg(i): meta = r['meta'] ss['meta'] = meta + selector = sorted(selector, key = lambda x: x['meta'].get('name','')) + s = 0 + for ss in selector: alias = ss['alias'] - name = meta.get('name','') + name = ss['meta'].get('name','') x = name if x!='': x+=' ' x += '('+alias+')' print ('{}) {}'.format(s, x)) + + s+=1 print ('') select = input ('Enter configuration number of press Enter for 0: ') diff --git a/cfg/docker-basic-configurations/nvidia-ubuntu-20.04-cuda-11.8-cudnn-8.6.0-pytorch-1.13.0.yaml b/cfg/docker-basic-configurations/nvidia-ubuntu-20.04-cuda-11.8-cudnn-8.6.0-pytorch-1.13.0.yaml new file mode 100644 index 000000000..1e71c67ce --- /dev/null +++ b/cfg/docker-basic-configurations/nvidia-ubuntu-20.04-cuda-11.8-cudnn-8.6.0-pytorch-1.13.0.yaml @@ -0,0 +1,9 @@ +uid: 854e65fb31584d63 + +name: "Nvidia Ubuntu 20.04 CUDA 11.8 cuDNN 8.6.0 PyTorch 1.13.0" + +input: + docker_base_image: 'nvcr.io/nvidia/pytorch:22.10-py3' + docker_os: ubuntu + docker_os_version: '20.04' + \ No newline at end of file diff --git a/cfg/docker-basic-configurations/nvidia-ubuntu-22.04-cuda-12.1-cudnn-8.9.1-pytorch-2.0.0.yaml b/cfg/docker-basic-configurations/nvidia-ubuntu-22.04-cuda-12.1-cudnn-8.9.1-pytorch-2.0.0.yaml new file mode 100644 index 000000000..4e146ca66 --- /dev/null +++ b/cfg/docker-basic-configurations/nvidia-ubuntu-22.04-cuda-12.1-cudnn-8.9.1-pytorch-2.0.0.yaml @@ -0,0 +1,9 @@ +uid: e0e7167139a74e36 + +name: "Nvidia Ubuntu 22.04 CUDA 12.1 cuDNN 8.9.1 PyTorch 2.0.0" + +input: + docker_base_image: 'nvcr.io/nvidia/pytorch:23.05-py3' + docker_os: ubuntu + docker_os_version: '22.04' + \ No newline at end of file diff --git a/cfg/docker-basic-configurations/nvidia-ubuntu-22.04-cuda-12.4-cudnn-9.0.0-pytorch-2.3.0.yaml b/cfg/docker-basic-configurations/nvidia-ubuntu-22.04-cuda-12.4-cudnn-9.0.0-pytorch-2.3.0.yaml new file mode 100644 index 000000000..a671d699a --- /dev/null +++ b/cfg/docker-basic-configurations/nvidia-ubuntu-22.04-cuda-12.4-cudnn-9.0.0-pytorch-2.3.0.yaml @@ -0,0 +1,9 @@ +uid: 49fc51f2999b4545 + +name: "Nvidia Ubuntu 22.04 CUDA 12.4 cuDNN 9.0.0 PyTorch 2.3.0" + +input: + docker_base_image: 'nvcr.io/nvidia/pytorch:24.03-py3' + docker_os: ubuntu + docker_os_version: '22.04' + \ No newline at end of file diff --git a/script/app-image-classification-onnx-py/_cm.yaml b/script/app-image-classification-onnx-py/_cm.yaml index 2fbf9f74f..b2b155881 100644 --- a/script/app-image-classification-onnx-py/_cm.yaml +++ b/script/app-image-classification-onnx-py/_cm.yaml @@ -117,5 +117,3 @@ docker: - env.CM_IMAGE - output - j - pre_run_cmds: - - echo \"CM pre run commands\" diff --git a/script/app-loadgen-generic-python/README-extra.md b/script/app-loadgen-generic-python/README-extra.md index 1bcdabddf..cdd08ef41 100644 --- a/script/app-loadgen-generic-python/README-extra.md +++ b/script/app-loadgen-generic-python/README-extra.md @@ -18,7 +18,7 @@ and pull CM repository with portable automation scripts to benchmark ML Systems: ```bash pip install cmind -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops --checkout=dev ``` ### Clean CM cache diff --git a/script/app-mlperf-inference-nvidia/README-about.md b/script/app-mlperf-inference-nvidia/README-about.md index 8c353e9ac..b78d64b62 100644 --- a/script/app-mlperf-inference-nvidia/README-about.md +++ b/script/app-mlperf-inference-nvidia/README-about.md @@ -52,13 +52,13 @@ Assuming all the downloaded files are to the user home directory please do the f --cudnn_tar_file_path=$HOME/cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz \ --imagenet_path=$HOME/imagenet-2012-val \ --scratch_path=$HOME/mlperf_scratch \ - --docker_cm_repo=mlcommons@ck \ + --docker_cm_repo=mlcommons@cm4mlops \ --results_dir=$HOME/results_dir \ --submission_dir=$HOME/submission_dir \ --adr.compiler.tags=gcc ``` * Use `--docker_cache=no` to turn off docker caching - * Use `--docker_run_cmd_prefix="cm pull repo mlcommons@ck"` to update the CK repository when docker caching is used + * Use `--docker_run_cmd_prefix="cm pull repo mlcommons@cm4mlops --checkout=dev"` to update the CK repository when docker caching is used * Use `--custom_system=no` if you are using a similar system to the [Nvidia submission systems for MLPerf inference 3.0](https://github.com/mlcommons/inference_results_v3.0/tree/main/closed/NVIDIA/systems). 6. At the end of the build you'll get the following prompt unless you have chosen `--custom_system=no`. Please give a system name and say yes to generating the configuration files diff --git a/script/app-mlperf-inference-nvidia/README.md b/script/app-mlperf-inference-nvidia/README.md index 53b6220a7..c7f83ff09 100644 --- a/script/app-mlperf-inference-nvidia/README.md +++ b/script/app-mlperf-inference-nvidia/README.md @@ -65,13 +65,13 @@ Assuming all the downloaded files are to the user home directory please do the f --cudnn_tar_file_path=$HOME/cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz \ --imagenet_path=$HOME/imagenet-2012-val \ --scratch_path=$HOME/mlperf_scratch \ - --docker_cm_repo=mlcommons@ck \ + --docker_cm_repo=mlcommons@cm4mlops \ --results_dir=$HOME/results_dir \ --submission_dir=$HOME/submission_dir \ --adr.compiler.tags=gcc ``` * Use `--docker_cache=no` to turn off docker caching - * Use `--docker_run_cmd_prefix="cm pull repo mlcommons@ck"` to update the CK repository when docker caching is used + * Use `--docker_run_cmd_prefix="cm pull repo mlcommons@cm4mlops"` to update the CK repository when docker caching is used * Use `--custom_system=no` if you are using a similar system to the [Nvidia submission systems for MLPerf inference 3.0](https://github.com/mlcommons/inference_results_v3.0/tree/main/closed/NVIDIA/systems). 6. At the end of the build you'll get the following prompt unless you have chosen `--custom_system=no`. Please give a system name and say yes to generating the configuration files diff --git a/script/app-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md b/script/app-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md index 7dde06647..311b3b182 100644 --- a/script/app-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md +++ b/script/app-mlperf-inference-qualcomm/README_aws_dl2q.24xlarge.md @@ -12,7 +12,7 @@ image from the Community AMIs is the recommended OS image as it comes with the Q ``` sudo yum install -y python38-devel git python3.8 -m pip install cmind -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops cm run script --tags=get,python --version_min=3.8.1 ``` diff --git a/script/app-mlperf-inference/README-extra.md b/script/app-mlperf-inference/README-extra.md index 90f17dffb..e661f3e53 100644 --- a/script/app-mlperf-inference/README-extra.md +++ b/script/app-mlperf-inference/README-extra.md @@ -18,16 +18,16 @@ source .profile Next you need to install a CM repository with [cross-platform CM scripts](https://github.com/mlcommons/cm4mlops/tree/main/script) for ML Systems: ```bash -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops --checkout=dev ``` -Note that you can fork [this repository](https://github.com/mlcommons/ck) and use it instead of mlcommons@ck +Note that you can fork [this repository](https://github.com/mlcommons/cm4mlops) and use it instead of mlcommons@cm4mlops to add CM scripts for your own public and private ML models, data sets, software and hardware. -In such case, just change mlcommons@ck to your own fork in the above command. +In such case, just change mlcommons@cm4mlops to your own fork in the above command. You can find the location of this repository on your system as follows: ```bash -cm find repo mlcommons@ck +cm find repo mlcommons@cm4mlops ``` Now we suggest you to set up a virtual python via CM to avoid mixing up your native Python installation: diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index a8f363c23..8a18e84ff 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -244,6 +244,10 @@ variations: deps: - tags: get,mlperf,inference,nvidia,scratch,space - tags: get,nvidia-docker + skip_if_env: + CM_SKIP_GET_NVIDIA_DOCKER: + - yes + mounts: - "${{ CM_CUDNN_TAR_FILE_PATH }}:${{ CM_CUDNN_TAR_FILE_PATH }}" - "${{ CM_TENSORRT_TAR_FILE_PATH }}:${{ CM_TENSORRT_TAR_FILE_PATH }}" diff --git a/script/build-docker-image/_cm.json b/script/build-docker-image/_cm.json deleted file mode 100644 index a39d6d379..000000000 --- a/script/build-docker-image/_cm.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "alias": "build-docker-image", - "automation_alias": "script", - "automation_uid": "5b4e0237da074764", - "category": "Docker automation", - "cache": false, - "input_mapping": { - "cache": "CM_DOCKER_CACHE", - "cm_repo": "CM_MLOPS_REPO", - "docker_os": "CM_DOCKER_OS", - "docker_os_version": "CM_DOCKER_OS_VERSION", - "dockerfile": "CM_DOCKERFILE_WITH_PATH", - "gh_token": "CM_GH_TOKEN", - "image_repo": "CM_DOCKER_IMAGE_REPO", - "image_name": "CM_DOCKER_IMAGE_NAME", - "image_tag": "CM_DOCKER_IMAGE_TAG", - "script_tags": "CM_DOCKER_RUN_SCRIPT_TAGS", - "real_run": "CM_REAL_RUN", - "pre_run_cmds": "CM_DOCKER_PRE_RUN_COMMANDS", - "post_run_cmds": "CM_DOCKER_POST_RUN_COMMANDS" - }, - "default_env": { - "CM_DOCKER_IMAGE_REPO": "local", - "CM_DOCKER_IMAGE_TAG": "latest" - }, - "new_env_keys": [ - "CM_DOCKER_*" - ], - "prehook_deps": [ - { - "enable_if_env": { - "CM_BUILD_DOCKERFILE": [ - "yes", - "1" - ] - }, - "tags": "build,dockerfile" - } - ], - "tags": [ - "build", - "docker", - "image", - "docker-image", - "dockerimage" - ], - "uid": "2c3c4ba2413442e7" -} diff --git a/script/build-dockerfile/_cm.json b/script/build-dockerfile/_cm.json deleted file mode 100644 index 0a438e8fd..000000000 --- a/script/build-dockerfile/_cm.json +++ /dev/null @@ -1,69 +0,0 @@ -{ - "alias": "build-dockerfile", - "automation_alias": "script", - "automation_uid": "5b4e0237da074764", - "category": "Docker automation", - "cache": false, - "input_mapping": { - "extra_sys_deps": "CM_DOCKER_EXTRA_SYS_DEPS", - "build": "CM_BUILD_DOCKER_IMAGE", - "cache": "CM_DOCKER_CACHE", - "cm_repo": "CM_MLOPS_REPO", - "cm_repo_flags": "CM_DOCKER_ADD_FLAG_TO_CM_MLOPS_REPO", - "cm_repos": "CM_DOCKER_EXTRA_CM_REPOS", - "docker_os": "CM_DOCKER_OS", - "docker_os_version": "CM_DOCKER_OS_VERSION", - "docker_base_image": "CM_DOCKER_IMAGE_BASE", - "fake_run_option": "CM_DOCKER_FAKE_RUN_OPTION", - "file_path": "CM_DOCKERFILE_WITH_PATH", - "gh_token": "CM_GH_TOKEN", - "fake_docker_deps": "CM_DOCKER_FAKE_DEPS", - "image_repo": "CM_DOCKER_IMAGE_REPO", - "image_tag": "CM_DOCKER_IMAGE_TAG", - "real_run": "CM_REAL_RUN", - "run_cmd": "CM_DOCKER_RUN_CMD", - "script_tags": "CM_DOCKER_RUN_SCRIPT_TAGS", - "comments": "CM_DOCKER_RUN_COMMENTS", - "run_cmd_extra": "CM_DOCKER_RUN_CMD_EXTRA", - "pre_run_cmds": "CM_DOCKER_PRE_RUN_COMMANDS", - "post_run_cmds": "CM_DOCKER_POST_RUN_COMMANDS", - "post_file": "DOCKER_IMAGE_POST_FILE", - "copy_files": "CM_DOCKER_COPY_FILES", - "pip_extra_flags": "CM_DOCKER_PIP_INSTALL_EXTRA_FLAGS", - "package_manager_update_cmd": "CM_PACKAGE_MANAGER_UPDATE_CMD" - }, - "default_env": { - "CM_DOCKER_BUILD_SLIM": "no", - "CM_DOCKER_OS": "ubuntu", - "CM_DOCKER_IMAGE_EOL": "\n" - }, - "new_env_keys": [ - "CM_DOCKERFILE_*" - ], - "post_deps": [ - { - "enable_if_env": { - "CM_BUILD_DOCKER_IMAGE": [ - "yes", - "1" - ] - }, - "names": [ - "build-docker-image" - ], - "tags": "build,docker,image" - } - ], - "tags": [ - "build", - "dockerfile" - ], - "uid": "e66a7483230d4641", - "variations": { - "slim": { - "env": { - "CM_DOCKER_BUILD_SLIM": "yes" - } - } - } -} diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index 3285224d4..2e35d831f 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -41,7 +41,7 @@ def preprocess(i): continue arg=env_ if env_ in default_env: #other inputs to be done later - arg=arg+"="+default_env[env_] + arg=arg+"="+str(default_env[env_]) #build_args.append(arg) #input_args.append("--"+input_+"="+"$"+env_) diff --git a/script/build-mlperf-inference-server-nvidia/_cm.yaml b/script/build-mlperf-inference-server-nvidia/_cm.yaml index bb6f60a68..c673111c5 100644 --- a/script/build-mlperf-inference-server-nvidia/_cm.yaml +++ b/script/build-mlperf-inference-server-nvidia/_cm.yaml @@ -240,6 +240,10 @@ docker: - tags: get,mlperf,inference,results,dir - tags: get,mlperf,inference,submission,dir - tags: get,nvidia-docker + skip_if_env: + CM_SKIP_GET_NVIDIA_DOCKER: + - yes + pre_run_cmds: - cm pull repo run_cmd_prefix: sudo apt remove -y cmake diff --git a/script/generate-mlperf-tiny-report/README-extra.md b/script/generate-mlperf-tiny-report/README-extra.md index cf2e3366a..36a0c58fc 100644 --- a/script/generate-mlperf-tiny-report/README-extra.md +++ b/script/generate-mlperf-tiny-report/README-extra.md @@ -11,7 +11,7 @@ Install [MLCommons CM framework](https://github.com/mlcommons/ck/blob/master/doc Pull the MLCommons CK repository with automation recipes for interoperable MLOps: ```bash -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops --checkout=dev ``` Install repositories with raw MLPerf inference benchmark results: diff --git a/script/get-cuda-devices/_cm.json b/script/get-cuda-devices/_cm.json deleted file mode 100644 index 9b41c8cfe..000000000 --- a/script/get-cuda-devices/_cm.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "alias": "get-cuda-devices", - "automation_alias": "script", - "automation_uid": "5b4e0237da074764", - "can_force_cache": true, - "cache": false, - "category": "CUDA automation", - "clean_files": [ - "tmp-run.out" - ], - "new_env_keys": [ - "CM_CUDA_DEVICE_*" - ], - "new_state_keys": [ - "cm_cuda_device_prop" - ], - "deps": [ - { - "names": [ - "cuda" - ], - "tags": "get,cuda,_toolkit" - } - ], - "print_files_if_script_error": [ - "tmp-run.out" - ], - "tags": [ - "get", - "cuda-devices" - ], - "docker": { - "run": false - }, - "uid": "7a3ede4d3558427a" -} diff --git a/script/get-cuda/_cm.json b/script/get-cuda/_cm.json deleted file mode 100644 index fa5dac580..000000000 --- a/script/get-cuda/_cm.json +++ /dev/null @@ -1,130 +0,0 @@ -{ - "alias": "get-cuda", - "automation_alias": "script", - "automation_uid": "5b4e0237da074764", - "cache": true, - "category": "CUDA automation", - "clean_files": [], - "default_env": { - "CM_CUDA_PATH_LIB_CUDNN_EXISTS": "no", - "CM_REQUIRE_INSTALL": "no" - }, - "docker": { - }, - "input_mapping": { - "cudnn_tar_path": "CM_CUDNN_TAR_FILE_PATH", - "cudnn_tar_file": "CM_CUDNN_TAR_FILE_PATH" - }, - "new_env_keys": [ - "CUDA_HOME", - "CUDA_PATH", - "CM_CUDA_*", - "CM_NVCC_*", - "+PATH", - "+C_INCLUDE_PATH", - "+CPLUS_INCLUDE_PATH", - "+LD_LIBRARY_PATH", - "+DYLD_FALLBACK_LIBRARY_PATH", - "+ LDFLAGS" - ], - "deps": [ - { - "tags": "detect,os" - }, - { - "enable_if_env": { - "CM_CUDA_FULL_TOOLKIT_INSTALL": [ - "yes" - ], - "CM_HOST_OS_TYPE": [ - "windows" - ] - }, - "names": [ - "compiler" - ], - "tags": "get,cl" - } - ], - "prehook_deps": [ - { - "enable_if_env": { - "CM_REQUIRE_INSTALL": [ - "yes" - ] - }, - "names": [ - "install-cuda-prebuilt" - ], - "reuse_version": true, - "tags": "install,cuda,prebuilt" - }, - { - "enable_if_env": { - "CM_CUDA_PACKAGE_MANAGER_INSTALL": [ - "yes" - ] - }, - "tags": "get,generic-sys-util,_nvidia-cuda-toolkit" - } - ], - "print_files_if_script_error": [ - "tmp-ver.out" - ], - "tags": [ - "get", - "cuda", - "cuda-compiler", - "cuda-lib", - "toolkit", - "lib", - "nvcc", - "get-nvcc", - "get-cuda" - ], - "uid": "46d133d9ef92422d", - "variations": { - "cudnn": { - "env": { - "CM_CUDA_NEEDS_CUDNN": "yes" - }, - "post_deps": [ - { - "names": [ - "cudnn" - ], - "tags": "get,nvidia,cudnn" - } - ] - }, - "lib-only": { - "env": { - "CM_CUDA_FULL_TOOLKIT_INSTALL": "no", - "CM_TMP_FILE_TO_CHECK_UNIX": "libcudart.so", - "CM_TMP_FILE_TO_CHECK_WINDOWS": "libcudart.dll" - }, - "group": "installation-mode" - }, - "package-manager": { - "env": { - "CM_CUDA_PACKAGE_MANAGER_INSTALL": "yes" - } - }, - "toolkit": { - "default": true, - "env": { - "CM_CUDA_FULL_TOOLKIT_INSTALL": "yes", - "CM_TMP_FILE_TO_CHECK_UNIX": "nvcc", - "CM_TMP_FILE_TO_CHECK_WINDOWS": "nvcc.exe" - }, - "group": "installation-mode" - } - }, - "print_env_at_the_end" : { - "CM_CUDA_PATH_LIB_CUDNN_EXISTS": "", - "CM_CUDA_VERSION": "", - "CM_CUDA_VERSION_STRING": "", - "CM_NVCC_BIN_WITH_PATH": "", - "CUDA_HOME": "" - } -} diff --git a/script/get-cuda/_cm.yaml b/script/get-cuda/_cm.yaml new file mode 100644 index 000000000..5c0d6cb34 --- /dev/null +++ b/script/get-cuda/_cm.yaml @@ -0,0 +1,100 @@ +alias: get-cuda +uid: 46d133d9ef92422d + +automation_alias: script +automation_uid: 5b4e0237da074764 + +tags: +- get +- cuda +- cuda-compiler +- cuda-lib +- toolkit +- lib +- nvcc +- get-nvcc +- get-cuda +- 46d133d9ef92422d + +cache: true + +category: CUDA automation + +default_env: + CM_CUDA_PATH_LIB_CUDNN_EXISTS: 'no' + CM_REQUIRE_INSTALL: 'no' + +deps: +- tags: detect,os +- enable_if_env: + CM_CUDA_FULL_TOOLKIT_INSTALL: + - 'yes' + CM_HOST_OS_TYPE: + - windows + names: + - compiler + tags: get,cl + +input_mapping: + cudnn_tar_file: CM_CUDNN_TAR_FILE_PATH + cudnn_tar_path: CM_CUDNN_TAR_FILE_PATH + +new_env_keys: +- CUDA_HOME +- CUDA_PATH +- CM_CUDA_* +- CM_NVCC_* +- +PATH +- +C_INCLUDE_PATH +- +CPLUS_INCLUDE_PATH +- +LD_LIBRARY_PATH +- +DYLD_FALLBACK_LIBRARY_PATH +- + LDFLAGS + +prehook_deps: +- enable_if_env: + CM_REQUIRE_INSTALL: + - 'yes' + names: + - install-cuda-prebuilt + reuse_version: true + tags: install,cuda,prebuilt +- enable_if_env: + CM_CUDA_PACKAGE_MANAGER_INSTALL: + - 'yes' + tags: get,generic-sys-util,_nvidia-cuda-toolkit + +print_env_at_the_end: + CM_CUDA_PATH_LIB_CUDNN_EXISTS: '' + CM_CUDA_VERSION: '' + CM_CUDA_VERSION_STRING: '' + CM_NVCC_BIN_WITH_PATH: '' + CUDA_HOME: '' + +print_files_if_script_error: +- tmp-ver.out + +variations: + cudnn: + env: + CM_CUDA_NEEDS_CUDNN: 'yes' + post_deps: + - names: + - cudnn + tags: get,nvidia,cudnn + lib-only: + env: + CM_CUDA_FULL_TOOLKIT_INSTALL: 'no' + CM_TMP_FILE_TO_CHECK_UNIX: libcudart.so + CM_TMP_FILE_TO_CHECK_WINDOWS: libcudart.dll + group: installation-mode + package-manager: + env: + CM_CUDA_PACKAGE_MANAGER_INSTALL: 'yes' + toolkit: + default: true + env: + CM_CUDA_FULL_TOOLKIT_INSTALL: 'yes' + CM_TMP_FILE_TO_CHECK_UNIX: nvcc + CM_TMP_FILE_TO_CHECK_WINDOWS: nvcc.exe + group: installation-mode diff --git a/script/get-cudnn/_cm.json b/script/get-cudnn/_cm.json deleted file mode 100644 index 15fe03988..000000000 --- a/script/get-cudnn/_cm.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "alias": "get-cudnn", - "automation_alias": "script", - "automation_uid": "5b4e0237da074764", - "cache": true, - "category": "CUDA automation", - "clean_files": [], - "default_env": { - "CM_SUDO": "sudo", - "CM_INPUT": "" - }, - "input_mapping": { - "input": "CM_INPUT", - "tar_file": "CM_CUDNN_TAR_FILE_PATH" - }, - "input_description": { - "input": {"desc":"Full path to the installed cuDNN library"}, - "tar_file": {"desc":"Full path to the cuDNN Tar file downloaded from Nvidia website (https://developer.nvidia.com/cudnn)"} - }, - "deps": [ - { - "tags": "detect,os" - }, - { - "names": [ "cuda" ], - "tags": "get,cuda", - "skip_if_env": { - "CM_CUDA_PATH_LIB": [ "on" ], - "CM_CUDA_PATH_INCLUDE": [ "on" ] - } - } - ], - "new_env_keys": [ - "CM_CUDNN_*", - "CM_CUDA_PATH_LIB_CUDNN", - "CM_CUDA_PATH_INCLUDE_CUDNN", - "CM_CUDA_PATH_LIB_CUDNN_EXISTS", - "+PATH", - "+C_INCLUDE_PATH", - "+CPLUS_INCLUDE_PATH", - "+LD_LIBRARY_PATH", - "+DYLD_FALLBACK_LIBRARY_PATH" - ], - "tags": [ - "get", - "cudnn", - "nvidia" - ], - "uid": "d73ee19baee14df8", - "docker": { - } -} diff --git a/script/get-cudnn/_cm.yaml b/script/get-cudnn/_cm.yaml new file mode 100644 index 000000000..b01506f6d --- /dev/null +++ b/script/get-cudnn/_cm.yaml @@ -0,0 +1,55 @@ +alias: get-cudnn +uid: d73ee19baee14df8 + +automation_alias: script +automation_uid: 5b4e0237da074764 + +tags: +- get +- cudnn +- nvidia + +cache: true + +category: CUDA automation + +default_env: + CM_INPUT: '' + CM_SUDO: sudo + +deps: +- tags: detect,os +- names: + - cuda + skip_if_env: + CM_CUDA_PATH_INCLUDE: + - 'on' + CM_CUDA_PATH_LIB: + - 'on' + tags: get,cuda + +input_description: + input: + desc: Full path to the installed cuDNN library + tar_file: + desc: Full path to the cuDNN Tar file downloaded from Nvidia website (https://developer.nvidia.com/cudnn) + +input_mapping: + input: CM_INPUT + tar_file: CM_CUDNN_TAR_FILE_PATH + +new_env_keys: +- CM_CUDNN_* +- CM_CUDA_PATH_LIB_CUDNN +- CM_CUDA_PATH_INCLUDE_CUDNN +- CM_CUDA_PATH_LIB_CUDNN_EXISTS +- +PATH +- +C_INCLUDE_PATH +- +CPLUS_INCLUDE_PATH +- +LD_LIBRARY_PATH +- +DYLD_FALLBACK_LIBRARY_PATH + +print_env_at_the_end: + CM_CUDA_PATH_LIB_CUDNN: '' + CM_CUDA_PATH_INCLUDE_CUDNN: '' + CM_CUDNN_VERSION: '' diff --git a/script/get-cudnn/customize.py b/script/get-cudnn/customize.py index 12c45e5a0..db43d93d3 100644 --- a/script/get-cudnn/customize.py +++ b/script/get-cudnn/customize.py @@ -91,6 +91,10 @@ def preprocess(i): else: return r else: + # On Linux we may detected file instead of path to cudnn + if os.path.isfile(env['CM_CUDA_PATH_LIB_CUDNN']): + env['CM_CUDA_PATH_LIB_CUDNN'] = os.path.dirname(env['CM_CUDA_PATH_LIB_CUDNN']) + return {'return':0} if env.get('CM_CUDNN_TAR_FILE_PATH','')=='': @@ -136,7 +140,54 @@ def postprocess(i): os_info = i['os_info'] env = i['env'] + version = env['CM_CUDNN_VERSION'] + + if version == 'vdetected': + path_to_cudnn = env.get('CM_CUDA_PATH_LIB_CUDNN','') + if os.path.isdir(path_to_cudnn): + path_to_include = path_to_cudnn + path_to_include_file = '' + for j in range(0,2): + path_to_include = os.path.dirname(path_to_include) + x = os.path.join(path_to_include, 'include', 'cudnn_version.h') + if os.path.isfile(x): + path_to_include_file = x + break + + if path_to_include_file == '' and path_to_cudnn.startswith('/lib'): + x = os.path.join('/usr','include','cudnn_version.h') + if os.path.isfile(x): + path_to_include_file = x + + if path_to_include_file != '': + env['CM_CUDA_PATH_INCLUDE_CUDNN'] = os.path.dirname(path_to_include_file) + + r = utils.load_txt(path_to_include_file, split=True) + if r['return'] == 0: + lst = r['list'] + + xversion = '' + + for l in lst: + l=l.strip() + + x = '#define CUDNN_MAJOR ' + if l.startswith(x): + xversion=l[len(x):] + + x = '#define CUDNN_MINOR ' + if l.startswith(x): + xversion+='.'+l[len(x):] + + x = '#define CUDNN_PATCHLEVEL ' + if l.startswith(x): + xversion+='.'+l[len(x):] + + if xversion != '': + version = xversion + env['CM_CUDNN_VERSION'] = xversion + env['CM_CUDA_PATH_LIB_CUDNN_EXISTS']='yes' return {'return':0, 'version': version} diff --git a/script/get-dataset-imagenet-val/README-extra.md b/script/get-dataset-imagenet-val/README-extra.md index 06d67c949..75b310b29 100644 --- a/script/get-dataset-imagenet-val/README-extra.md +++ b/script/get-dataset-imagenet-val/README-extra.md @@ -6,7 +6,7 @@ However, it seems that you can still download it via [Academic Torrents](https:/ You can then register in the MLCommons CM using this portable CM script as follows: ```bash -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops --checkout=dev ``` ```bash diff --git a/script/import-experiment-to-sqlite/README.md b/script/import-experiment-to-sqlite/README.md index 33b3324ce..cf987d9cb 100644 --- a/script/import-experiment-to-sqlite/README.md +++ b/script/import-experiment-to-sqlite/README.md @@ -28,7 +28,7 @@ #### Summary * Category: *DevOps automation.* -* CM GitHub repository: *[mlcommons@ck](https://github.com/mlcommons/ck/tree/master/cm-mlops)* +* CM GitHub repository: *[mlcommons@cm4mlops](https://github.com/mlcommons/cm4mlops)* * GitHub directory for this script: *[GitHub](https://github.com/mlcommons/cm4mlops/tree/main/script/import-experiment-to-sqlite)* * CM meta description for this script: *[_cm.yaml](_cm.yaml)* * CM "database" tags to find this script: *import,experiment2sqlite* @@ -43,7 +43,7 @@ ___ #### Pull CM repository with this automation -```cm pull repo mlcommons@ck``` +```cm pull repo mlcommons@cm4mlops --checkout=dev``` #### Run this script from command line diff --git a/script/import-mlperf-inference-to-experiment/README-extra.md b/script/import-mlperf-inference-to-experiment/README-extra.md index 64b604d16..968c63d2d 100644 --- a/script/import-mlperf-inference-to-experiment/README-extra.md +++ b/script/import-mlperf-inference-to-experiment/README-extra.md @@ -21,7 +21,7 @@ Install [MLCommons CM framework](https://github.com/mlcommons/ck/blob/master/doc Pull the MLCommons CK repository with automation recipes for interoperable MLOps: ```bash -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops --checkout=dev ``` Pull already imported results (v2.0, v2.1, v3.0, v3.1) from this [mlcommons@cm4mlperf-results repo](https://github.com/mlcommons/cm4mlperf-results): diff --git a/script/import-mlperf-tiny-to-experiment/README-extra.md b/script/import-mlperf-tiny-to-experiment/README-extra.md index 105e7ea4a..6d3e51d2a 100644 --- a/script/import-mlperf-tiny-to-experiment/README-extra.md +++ b/script/import-mlperf-tiny-to-experiment/README-extra.md @@ -21,7 +21,7 @@ Install [MLCommons CM framework](https://github.com/mlcommons/ck/blob/master/doc Pull the MLCommons CK repository with automation recipes for interoperable MLOps: ```bash -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops --checkout=dev ``` Install repositories with raw MLPerf inference benchmark results: diff --git a/script/import-mlperf-training-to-experiment/README-extra.md b/script/import-mlperf-training-to-experiment/README-extra.md index abfc76423..05b4f592d 100644 --- a/script/import-mlperf-training-to-experiment/README-extra.md +++ b/script/import-mlperf-training-to-experiment/README-extra.md @@ -19,7 +19,7 @@ Install [MLCommons CM automation language](https://github.com/mlcommons/ck/blob/ Pull the MLCommons CK repository with automation recipes for interoperable MLOps: ```bash -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops --checkout=dev ``` Install repositories with raw MLPerf training benchmark results: diff --git a/script/reproduce-ipol-paper-2022-439/README-extra.md b/script/reproduce-ipol-paper-2022-439/README-extra.md index 0434ba49d..28afd8094 100644 --- a/script/reproduce-ipol-paper-2022-439/README-extra.md +++ b/script/reproduce-ipol-paper-2022-439/README-extra.md @@ -39,7 +39,7 @@ CM scripts are implemented for a demo on Ubuntu and must be tested across differ 2. Install MLCommons repository with CM automation scripts: ```bash -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops --checkout=dev ``` 3. Install src from IPOL 2022 439 paper: diff --git a/script/reproduce-micro-paper-2023-victima/README-extra.md b/script/reproduce-micro-paper-2023-victima/README-extra.md index 2aca8543f..b4c01e133 100644 --- a/script/reproduce-micro-paper-2023-victima/README-extra.md +++ b/script/reproduce-micro-paper-2023-victima/README-extra.md @@ -10,12 +10,12 @@ Install MLCommmons CM using [this guide](https://github.com/mlcommons/ck/blob/ma Install reusable MLCommons automations: ```bash -cm pull repo mlcommons@ck +cm pull repo mlcommons@cm4mlops --checkout=dev ``` ### Run Victima via CM interface -The core CM script for Victima will be available under ```/CM/repos/mlcommons@ck/script/reproduce-micro-2023-paper-victima``` +The core CM script for Victima will be available under ```/CM/repos/mlcommons@cm4mlops/script/reproduce-micro-2023-paper-victima``` It is described by `_cm.yaml` and several native scripts. diff --git a/script/reproduce-micro-paper-2023-victima/README.md b/script/reproduce-micro-paper-2023-victima/README.md index 53ff934c4..41b899e1e 100644 --- a/script/reproduce-micro-paper-2023-victima/README.md +++ b/script/reproduce-micro-paper-2023-victima/README.md @@ -32,7 +32,7 @@ See extra [notes](README-extra.md) from the authors and contributors. #### Summary * Category: *Reproducibility and artifact evaluation.* -* CM GitHub repository: *[mlcommons@ck](https://github.com/mlcommons/ck/tree/master/cm-mlops)* +* CM GitHub repository: *[mlcommons@cm4mlops](https://github.com/mlcommons/cm4mlops)* * GitHub directory for this script: *[GitHub](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-victima)* * CM meta description for this script: *[_cm.yaml](_cm.yaml)* * CM "database" tags to find this script: *reproduce,project,paper,micro,micro-2023,victima* @@ -47,7 +47,7 @@ ___ #### Pull CM repository with this automation -```cm pull repo mlcommons@ck``` +```cm pull repo mlcommons@cm4mlops --checkout=dev``` #### Run this script from command line diff --git a/script/reproduce-micro-paper-2023-xyz/README.md b/script/reproduce-micro-paper-2023-xyz/README.md index b593da18a..0b4f7dcaa 100644 --- a/script/reproduce-micro-paper-2023-xyz/README.md +++ b/script/reproduce-micro-paper-2023-xyz/README.md @@ -31,7 +31,7 @@ See extra [notes](README-extra.md) from the authors and contributors. #### Summary -* CM GitHub repository: *[mlcommons@ck](https://github.com/mlcommons/ck/tree/master/cm-mlops)* +* CM GitHub repository: *[mlcommons@cm4mlops](https://github.com/mlcommons/cm4mlops)* * GitHub directory for this script: *[GitHub](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-micro-paper-2023-xyz)* * CM meta description for this script: *[_cm.yaml](_cm.yaml)* * CM "database" tags to find this script: *reproduce,paper,micro,micro-2023,victima* @@ -46,7 +46,7 @@ ___ #### Pull CM repository with this automation -```cm pull repo mlcommons@ck``` +```cm pull repo mlcommons@cm4mlops --checkout=dev``` #### Run this script from command line diff --git a/script/reproduce-mlperf-inference-dummy/README.md b/script/reproduce-mlperf-inference-dummy/README.md index 1009b9e4d..36f245ef7 100644 --- a/script/reproduce-mlperf-inference-dummy/README.md +++ b/script/reproduce-mlperf-inference-dummy/README.md @@ -29,7 +29,7 @@ #### Summary * Category: *Modular MLPerf benchmarks.* -* CM GitHub repository: *[mlcommons@ck](https://github.com/mlcommons/ck/tree/master/cm-mlops)* +* CM GitHub repository: *[mlcommons@cm4mlops](https://github.com/mlcommons/cm4mlops)* * GitHub directory for this script: *[GitHub](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-mlperf-inference-dummy)* * CM meta description for this script: *[_cm.yaml](_cm.yaml)* * CM "database" tags to find this script: *reproduce,mlcommons,mlperf,inference,harness,dummy-harness,dummy,dummy-harness,dummy* diff --git a/script/reproduce-mlperf-training-nvidia/_cm.yaml b/script/reproduce-mlperf-training-nvidia/_cm.yaml index b61b49555..a118ee3f7 100644 --- a/script/reproduce-mlperf-training-nvidia/_cm.yaml +++ b/script/reproduce-mlperf-training-nvidia/_cm.yaml @@ -41,6 +41,9 @@ deps: # Install system dependencies on a given host - tags: get,nvidia-docker + skip_if_env: + CM_SKIP_GET_NVIDIA_DOCKER: + - yes # Detect CUDA - names: diff --git a/script/run-all-mlperf-models/README.md b/script/run-all-mlperf-models/README.md index a1e9d51bd..01f5427b1 100644 --- a/script/run-all-mlperf-models/README.md +++ b/script/run-all-mlperf-models/README.md @@ -28,7 +28,7 @@ #### Summary * Category: *MLPerf benchmark support.* -* CM GitHub repository: *[mlcommons@ck](https://github.com/mlcommons/ck/tree/master/cm-mlops)* +* CM GitHub repository: *[mlcommons@cm4mlops](https://github.com/mlcommons/cm4mlops)* * GitHub directory for this script: *[GitHub](https://github.com/mlcommons/cm4mlops/tree/main/script/run-all-mlperf-models)* * CM meta description for this script: *[_cm.yaml](_cm.yaml)* * CM "database" tags to find this script: *run,natively,all,mlperf-models* @@ -43,7 +43,7 @@ ___ #### Pull CM repository with this automation -```cm pull repo mlcommons@ck``` +```cm pull repo mlcommons@cm4mlops --checkout=dev``` #### Run this script from command line diff --git a/script/run-docker-container/README-extra.md b/script/run-docker-container/README-extra.md index 8fd260472..b930ef964 100644 --- a/script/run-docker-container/README-extra.md +++ b/script/run-docker-container/README-extra.md @@ -9,7 +9,7 @@ cm run script \ ### Options 1. `--script_tags="get,gcc"`: Script tags for the CM script to be run inside the docker container. If this is not set the cm command run inside the docker container is `cm version` -2. `--cm_repo=ctuning@mlcommons-ck`: To use a different repo for CM scripts like "ctuning@mlcommons-ck". Default: `mlcommons@ck` +2. `--cm_repo=ctuning@mlcommons-ck`: To use a different repo for CM scripts like "ctuning@mlcommons-ck". Default: `mlcommons@cm4mlops` 3. `--base="ubuntu:22.04"`: Specify the base image for Dockerfile. Default: "ubuntu:20.04" 4. `--recreate=yes`: To recreate docker image even when existing. Default: "no" 5. `--adr.build-docker-image.tags=_cache`: To use build cache for docker image build. Default: "" (`nocache`) diff --git a/script/run-docker-container/_cm.json b/script/run-docker-container/_cm.json deleted file mode 100644 index 30d490bf5..000000000 --- a/script/run-docker-container/_cm.json +++ /dev/null @@ -1,61 +0,0 @@ -{ - "alias": "run-docker-container", - "automation_alias": "script", - "automation_uid": "5b4e0237da074764", - "category": "Docker automation", - "cache": false, - "clean_files": [], - "default_env": { - "CM_DOCKER_DETACHED_MODE": "yes" - }, - "input_mapping": { - "interactive": "CM_DOCKER_INTERACTIVE_MODE", - "base": "CM_DOCKER_IMAGE_BASE", - "cm_repo": "CM_MLOPS_REPO", - "recreate": "CM_DOCKER_IMAGE_RECREATE", - "gh_token": "CM_GH_TOKEN", - "it":"CM_DOCKER_INTERACTIVE", - "fake_run_option": "CM_DOCKER_FAKE_RUN_OPTION", - "detached": "CM_DOCKER_DETACHED_MODE", - "image_repo": "CM_DOCKER_IMAGE_REPO", - "image_name": "CM_DOCKER_IMAGE_NAME", - "image_tag": "CM_DOCKER_IMAGE_TAG", - "docker_os": "CM_DOCKER_OS", - "docker_os_version": "CM_DOCKER_OS_VERSION", - "docker_image_base": "CM_DOCKER_IMAGE_BASE", - "script_tags": "CM_DOCKER_RUN_SCRIPT_TAGS", - "run_cmd_extra": "CM_DOCKER_RUN_CMD_EXTRA", - "real_run": "CM_REAL_RUN", - "run_cmd": "CM_DOCKER_RUN_CMD", - "pre_run_cmds": "CM_DOCKER_PRE_RUN_COMMANDS", - "post_run_cmds": "CM_DOCKER_POST_RUN_COMMANDS", - "pass_user_group": "CM_DOCKER_PASS_USER_GROUP", - "mounts": "CM_DOCKER_VOLUME_MOUNTS", - "port_maps": "CM_DOCKER_PORT_MAPS", - "shm_size": "CM_DOCKER_SHM_SIZE", - "extra_run_args": "CM_DOCKER_EXTRA_RUN_ARGS", - "device": "CM_DOCKER_ADD_DEVICE", - "cache": "CM_DOCKER_CACHE", - "all_gpus": "CM_DOCKER_ADD_ALL_GPUS", - "save_script": "CM_DOCKER_SAVE_SCRIPT" - }, - "prehook_deps": [ - { - "names": [ - "build-docker-image" - ], - "skip_if_env": { - "CM_DOCKER_IMAGE_EXISTS": [ - "yes" - ] - }, - "tags": "build,docker,image" - } - ], - "tags": [ - "run", - "docker", - "container" - ], - "uid": "1e0c884107514b46" -} diff --git a/script/run-docker-container/_cm.yaml b/script/run-docker-container/_cm.yaml index 864b5b202..8064f0dea 100644 --- a/script/run-docker-container/_cm.yaml +++ b/script/run-docker-container/_cm.yaml @@ -18,6 +18,7 @@ default_env: input_mapping: all_gpus: CM_DOCKER_ADD_ALL_GPUS + num_gpus: CM_DOCKER_ADD_NUM_GPUS base: CM_DOCKER_IMAGE_BASE cache: CM_DOCKER_CACHE cm_repo: CM_MLOPS_REPO diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 754c62a90..4930c960b 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -107,7 +107,9 @@ def postprocess(i): if env.get('CM_DOCKER_ADD_DEVICE', '') != '': run_opts += " --device="+env['CM_DOCKER_ADD_DEVICE'] - if env.get('CM_DOCKER_ADD_ALL_GPUS', '') != '': + if env.get('CM_DOCKER_ADD_NUM_GPUS', '') != '': + run_opts += " --gpus={}".format(env['CM_DOCKER_ADD_NUM_GPUS']) + elif env.get('CM_DOCKER_ADD_ALL_GPUS', '') != '': run_opts += " --gpus=all" if env.get('CM_DOCKER_SHM_SIZE', '') != '': diff --git a/script/run-mlperf-inference-mobilenet-models/README-about.md b/script/run-mlperf-inference-mobilenet-models/README-about.md index a3d6991b2..beaa467a8 100644 --- a/script/run-mlperf-inference-mobilenet-models/README-about.md +++ b/script/run-mlperf-inference-mobilenet-models/README-about.md @@ -12,7 +12,7 @@ CM commands are expected to run natively but if you prefer not to modify the hos ``` cm docker script --tags=run,mobilenet-models,_tflite,_accuracy-only \ --adr.compiler.tags=gcc \ ---docker_cm_repo=mlcommons@ck \ +--docker_cm_repo=mlcommons@cm4mlops \ --imagenet_path=$HOME/imagenet-2012-val \ --results_dir=$HOME/mobilenet_results \ --submission_dir=$HOME/inference_submission_3.1 \ diff --git a/script/run-mlperf-inference-mobilenet-models/README.md b/script/run-mlperf-inference-mobilenet-models/README.md index 6e835a0d6..a72c5e798 100644 --- a/script/run-mlperf-inference-mobilenet-models/README.md +++ b/script/run-mlperf-inference-mobilenet-models/README.md @@ -25,7 +25,7 @@ CM commands are expected to run natively but if you prefer not to modify the hos ``` cm docker script --tags=run,mobilenet-models,_tflite,_accuracy-only \ --adr.compiler.tags=gcc \ ---docker_cm_repo=mlcommons@ck \ +--docker_cm_repo=mlcommons@cm4mlops \ --imagenet_path=$HOME/imagenet-2012-val \ --results_dir=$HOME/mobilenet_results \ --submission_dir=$HOME/inference_submission_3.1 \