diff --git a/README.md b/README.md index d447da2cfd..f3a728ec28 100755 --- a/README.md +++ b/README.md @@ -77,6 +77,9 @@ cmr "get generic-python-lib _package.torchvision" --version=0.16.2 cmr "python app image-classification torch" --input=computer_mouse.jpg +cm rm repo mlcommons@ck +cm pull repo --url=https://zenodo.org/records/10581696/files/cm-mlops-repo-20240129.zip + cmr "install llvm prebuilt" --version=17.0.6 cmr "app image corner-detection" diff --git a/cm-mlops/automation/script/module_misc.py b/cm-mlops/automation/script/module_misc.py index 92da5d42d1..ae6bc5e03b 100644 --- a/cm-mlops/automation/script/module_misc.py +++ b/cm-mlops/automation/script/module_misc.py @@ -1391,15 +1391,21 @@ def dockerfile(i): run_cmd = r['run_cmd_string'] - + docker_base_image = i.get('docker_base_image', docker_settings.get('base_image')) docker_os = i.get('docker_os', docker_settings.get('docker_os', 'ubuntu')) docker_os_version = i.get('docker_os_version', docker_settings.get('docker_os_version', '22.04')) + if not docker_base_image: + dockerfilename_suffix = docker_os +'_'+docker_os_version + else: + dockerfilename_suffix = docker_base_image.split("/") + dockerfilename_suffix = dockerfilename_suffix[len(dockerfilename_suffix) - 1] + fake_run_deps = i.get('fake_run_deps', docker_settings.get('fake_run_deps', False)) docker_run_final_cmds = docker_settings.get('docker_run_final_cmds', []) gh_token = i.get('docker_gh_token') - if i.get('docker_real_run', False): + if i.get('docker_real_run', docker_settings.get('docker_real_run',False)): fake_run_option = " " fake_run_deps = False else: @@ -1409,7 +1415,7 @@ def dockerfile(i): env['CM_DOCKER_PRE_RUN_COMMANDS'] = docker_run_final_cmds - dockerfile_path = os.path.join(script_path,'dockerfiles', docker_os +'_'+docker_os_version +'.Dockerfile') + dockerfile_path = os.path.join(script_path,'dockerfiles', dockerfilename_suffix +'.Dockerfile') if i.get('print_deps'): cm_input = {'action': 'run', 'automation': 'script', @@ -1434,6 +1440,7 @@ def dockerfile(i): 'automation': 'script', 'tags': 'build,dockerfile', 'cm_repo': cm_repo, + 'docker_base_image': docker_base_image, 'docker_os': docker_os, 'docker_os_version': docker_os_version, 'file_path': dockerfile_path, @@ -1659,9 +1666,18 @@ def docker(i): mount_string = "" if len(mounts)==0 else ",".join(mounts) + docker_base_image = i.get('docker_base_image', docker_settings.get('base_image')) + docker_os = i.get('docker_os', docker_settings.get('docker_os', 'ubuntu')) + docker_os_version = i.get('docker_os_version', docker_settings.get('docker_os_version', '22.04')) + if not docker_base_image: + dockerfilename_suffix = docker_os +'_'+docker_os_version + else: + dockerfilename_suffix = docker_base_image.split("/") + dockerfilename_suffix = dockerfilename_suffix[len(dockerfilename_suffix) - 1] + cm_repo=i.get('docker_cm_repo', 'mlcommons@ck') - dockerfile_path = os.path.join(script_path,'dockerfiles', _os +'_'+version +'.Dockerfile') + dockerfile_path = os.path.join(script_path,'dockerfiles', dockerfilename_suffix +'.Dockerfile') docker_skip_run_cmd = i.get('docker_skip_run_cmd', docker_settings.get('skip_run_cmd', False)) #skips docker run cmd and gives an interactive shell to the user @@ -1712,7 +1728,9 @@ def docker(i): 'automation': 'script', 'tags': 'run,docker,container', 'recreate': 'yes', + 'docker_base_image': docker_base_image, 'docker_os': _os, + 'docker_os_version': version, 'cm_repo': cm_repo, 'env': env, 'image_repo': image_repo, @@ -1720,7 +1738,6 @@ def docker(i): 'mounts': mounts, 'image_name': 'cm-script-'+script_alias, # 'image_tag': script_alias, - 'docker_os_version': version, 'detached': detached, 'script_tags': f'{tag_string}', 'run_cmd': run_cmd if docker_skip_run_cmd not in [ 'yes', True, 'True' ] else 'echo "cm version"', diff --git a/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml b/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml index 80f28a0ff3..b1cd539b0c 100644 --- a/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml @@ -250,7 +250,7 @@ variations: offline,resnet50: default_variations: - batch-size: batch-size.8 + batch-size: batch-size.32 multistream,retinanet: default_variations: diff --git a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/_cm.yaml b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/_cm.yaml index 020eadfec4..d9162d9211 100644 --- a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/_cm.yaml +++ b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/_cm.yaml @@ -58,7 +58,7 @@ variations: nvidia: group: implementation env: - IMPLEMENTATION: nvidia + IMPLEMENTATION: nvidia-original default_env: MODELS: resnet50,retinanet,bert-99,bert-99.9,3d-unet-99,rnnt,gptj-99,gptj-99.9,dlrmv2-99,dlrmv2-99.9 BACKENDS: tensorrt diff --git a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/customize.py b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/customize.py index b62daab258..174bf4c5eb 100644 --- a/cm-mlops/script/benchmark-any-mlperf-inference-implementation/customize.py +++ b/cm-mlops/script/benchmark-any-mlperf-inference-implementation/customize.py @@ -109,9 +109,9 @@ def preprocess(i): test_query_count = 100 else: if model == "resnet50": - test_query_count = 10000 + test_query_count = 40000 else: - test_query_count = 1000 + test_query_count = 2000 cmd = f'run_test "{model}" "{backend}" "{test_query_count}" "{implementation}" "{device}" "$find_performance_cmd"' cmds.append(cmd) #second argument is unused for submission_cmd diff --git a/cm-mlops/script/build-dockerfile/_cm.json b/cm-mlops/script/build-dockerfile/_cm.json index 8959031639..3737cd40c6 100644 --- a/cm-mlops/script/build-dockerfile/_cm.json +++ b/cm-mlops/script/build-dockerfile/_cm.json @@ -10,6 +10,7 @@ "cm_repo": "CM_MLOPS_REPO", "docker_os": "CM_DOCKER_OS", "docker_os_version": "CM_DOCKER_OS_VERSION", + "docker_base_image": "CM_DOCKER_IMAGE_BASE", "fake_run_option": "CM_DOCKER_FAKE_RUN_OPTION", "file_path": "CM_DOCKERFILE_WITH_PATH", "gh_token": "CM_GH_TOKEN", diff --git a/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml b/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml index 9a2a54f713..733bc9ca03 100644 --- a/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml +++ b/cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml @@ -197,6 +197,10 @@ versions: docker: skip_run_cmd: 'no' all_gpus: 'yes' + docker_os: ubuntu + docker_real_run: True + docker_os_version: '20.04' + base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public docker_input_mapping: imagenet_path: IMAGENET_PATH results_dir: RESULTS_DIR @@ -213,6 +217,3 @@ docker: - "${{ CM_TENSORRT_TAR_FILE_PATH }}:${{ CM_TENSORRT_TAR_FILE_PATH }}" - "${{ CUDA_RUN_FILE_LOCAL_PATH }}:${{ CUDA_RUN_FILE_LOCAL_PATH }}" - "${{ MLPERF_SCRATCH_PATH }}:${{ MLPERF_SCRATCH_PATH }}" - pre_run_cmds: - - cm pull repo mlcommons@ck - - cm run script --tags=get,dataset,original,imagenet,_full --imagenet_path=/data/imagenet-val diff --git a/cm-mlops/script/get-cuda/_cm.json b/cm-mlops/script/get-cuda/_cm.json index d397624bd9..7d5d171010 100644 --- a/cm-mlops/script/get-cuda/_cm.json +++ b/cm-mlops/script/get-cuda/_cm.json @@ -10,7 +10,6 @@ "CM_REQUIRE_INSTALL": "no" }, "docker": { - "run": false }, "input_mapping": { "cudnn_tar_path": "CM_CUDNN_TAR_FILE_PATH", diff --git a/cm-mlops/script/get-generic-python-lib/_cm.json b/cm-mlops/script/get-generic-python-lib/_cm.json index 88ab4ce47c..e1dd645bf7 100644 --- a/cm-mlops/script/get-generic-python-lib/_cm.json +++ b/cm-mlops/script/get-generic-python-lib/_cm.json @@ -389,8 +389,8 @@ } ], "env": { - "CM_GENERIC_PYTHON_PACKAGE_NAME": "nvidia-dali-cuda110", - "CM_GENERIC_PYTHON_PIP_EXTRA": " --upgrade", + "CM_GENERIC_PYTHON_PACKAGE_NAME": "nvidia-dali-cuda120", + "CM_GENERIC_PYTHON_PIP_EXTRA": " --upgrade --default-timeout=900", "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": "https://developer.download.nvidia.com/compute/redist" }, "new_env_keys": [ diff --git a/cm-mlops/script/get-ml-model-bert-large-squad/_cm.json b/cm-mlops/script/get-ml-model-bert-large-squad/_cm.json index 59a845f521..354688e618 100644 --- a/cm-mlops/script/get-ml-model-bert-large-squad/_cm.json +++ b/cm-mlops/script/get-ml-model-bert-large-squad/_cm.json @@ -100,7 +100,8 @@ }, "onnx,int8": { "env": { - "CM_ML_MODEL_F1": "90.067" + "CM_ML_MODEL_F1": "90.067", + "CM_PACKAGE_URL": "https://zenodo.org/record/3750364/files/bert_large_v1_1_fake_quant.onnx" } }, "onnx,int8,zenodo": { diff --git a/cm-mlops/script/get-tensorrt/customize.py b/cm-mlops/script/get-tensorrt/customize.py index 6a3e5bb638..b18fe35c3a 100644 --- a/cm-mlops/script/get-tensorrt/customize.py +++ b/cm-mlops/script/get-tensorrt/customize.py @@ -11,7 +11,8 @@ def preprocess(i): env = i['env'] - if env.get('CM_TENSORRT_TAR_FILE_PATH','')=='' and env.get('CM_TENSORRT_REQUIRE_DEV', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR', '') != 'aarch64': + #Not enforcing dev requirement for now + if env.get('CM_TENSORRT_TAR_FILE_PATH','')=='' and env.get('CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR', '') != 'aarch64': if os_info['platform'] == 'windows': extra_pre='' diff --git a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml index d5390c9eb2..6a3f615ae9 100644 --- a/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml +++ b/cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml @@ -299,6 +299,10 @@ variations: default: true env: CM_MODEL: resnet50 + deps: + - tags: get,generic-python-lib,_onnx-graphsurgeon + - tags: get,generic-python-lib,_package.onnx + version: 1.13.1 retinanet: group: model @@ -312,6 +316,9 @@ variations: - tags: get,generic-python-lib,_opencv-python - tags: get,generic-python-lib,_numpy - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_onnx-graphsurgeon + - tags: get,generic-python-lib,_package.onnx + version: 1.13.1 bert_: deps: diff --git a/cm-mlops/script/run-docker-container/_cm.json b/cm-mlops/script/run-docker-container/_cm.json index 046873adb0..2b20bdf4eb 100644 --- a/cm-mlops/script/run-docker-container/_cm.json +++ b/cm-mlops/script/run-docker-container/_cm.json @@ -22,6 +22,7 @@ "image_tag": "CM_DOCKER_IMAGE_TAG", "docker_os": "CM_DOCKER_OS", "docker_os_version": "CM_DOCKER_OS_VERSION", + "docker_image_base": "CM_DOCKER_IMAGE_BASE", "script_tags": "CM_DOCKER_RUN_SCRIPT_TAGS", "run_cmd_extra": "CM_DOCKER_RUN_CMD_EXTRA", "real_run": "CM_REAL_RUN", diff --git a/docs/getting-started.md b/docs/getting-started.md index cc144a3cf1..32b4f10b9c 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -1,160 +1,4 @@ [ [Back to documentation](README.md) ] -*Under development* - -# CM Getting Started Guide - -## Image classification example - -One of the goals of the [MLCommons CM workflow automation framework (CM)](https://github.com/mlcommons/ck?tab=readme-ov-file#about) -is to provide a common, simple and human readable interface to run and manage complex software projects and benchmarks -on any platform with any software stack in a unified and automated way. - -This tutorial explains how CM works and should help you start using it with existing projects -or to modularize and unify your own projects. - -Let us test CM to run image classification from the command line on any platform with Windows, Linux and MacOS. - -### Installing CM - -CM is implemented as a [very small Python library](https://github.com/mlcommons/ck/tree/master/cm/cmind) -with `cm` and `cmr` front-ends and minimal dependencies (Python 3+, git and wget) -that can be installed via PIP: - - -```bash -pip install cmind -``` - -You may need to re-login to update the PATH to `cm` and `cmr` front-ends. - -Note that CM can be also installed from virtual environment (required in Ubuntu 23.04+) and inside containers. -You can check a detailed guide to install CM on different platforms [here](installation.md). - -### Pulling some repository with embedded CM interface - -Let's now pull a Git repository that has embedded CM interface -(note that if your Git repository doesn't have CM interface embedded, -CM will automatically initialize one): - -```bash -cm pull repo mlcommons@ck -``` - -CM will pull GitHub repository from `https://github.com/mlcommons/ck` to the `CM/repos` directory in your local HOME directory. -You can use flag `--url=https://github.com/mlcommons/ck` instead of `mlcommons@ck` to pull any Git repository. - -CM will then check if this repository has a CM interface by checking the [`cmr.yaml`](https://github.com/mlcommons/ck/blob/master/cmr.yaml) -file in the root directory of this repository (abbreviation for `C`ollective `M`ind `R`epository): - -```yaml -git: true -alias: mlcommons@ck -uid: a4705959af8e447a -version: 1.5.4 -prefix: cm-mlops -``` - -Note that this file will be automatically generated if it doesn't exist in your repository. - -While working on modularizing, unifying and automating MLPerf benchmarks, -we decided to embed a CM interface to this development repository -in the [cm-mlops directory](https://github.com/mlcommons/ck/tree/master/cm-mlops) - -The `prefix` in `cmr.yaml` tells CM to search for the CM interface in some sub-directory of a given repository -to avoid altering the original structure of software projects. - -### Using CM interface to run a given software project - -You can now invoke a human-friendly CM command to run your project such as image classification -(we will show how to use Python API later): - -```bash -cm run script "python app image-classification onnx" -``` - -CM will recursively walk through all pulled or downloaded repositories in your home `CM/repos` directory -and search for matching tags `python,app,image-classification,onnx` in all `_cm.yaml` or `_cm.json` -files in a `script` sub-directory of all repositories. - -In our case, CM will find 1 match in -the [`cm-mlops/script/app-image-classification-onnx-py/_cm.yaml`](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml). - -This file tells CM how to prepare environment variables, paths and command lines -to run a native script or tool on any platform. - - -#### - - - - - - - - - - - - -#### Using inputs and environment variables - -env - -const - - -default_env - -input_mapping - - - -#### Using variations - -using the same code/script/tool but altering it's behavior and sub-dependencies - -CUDA - -_cuda - - -#### Reporting issues - -The community helped us test this example on many platforms but if you still encounter -some issues, please report them [here](https://github.com/mlcommons/ck/issues) - CM is not a magic (yet) -and our concept is to collaboratively extend CM workflows to gradually improve their portability and reproducibility -across diverse software and hardware. - - - -#### Debugging CM interface - - -#### Extending CM interface - - -### Reusing automation recipes - - - -### Adding CM interface to your own project - - - -### Using CM with containers - - -### Using CM GUI - - -### Running MLPerf and other projects via CM - -Recent examples from MLPerf and ML, compiler and systems conferences - - -### Participating in collaborative developments - -This is a community project being developed by the [MLCommons Task Force on Automation and Reproducibility](taskforce.md) -based on your feedback - please join our [public Discord server](https://discord.gg/JjWNWXKxwT) if you -would like to help with developments or have questions, suggestions and feature requests. +*20240130: we are updating this page based on the feedback from the [CM users and MLPerf submitters](https://github.com/mlcommons/ck/issues/1052) - + it should be ready within a week - please [stay tuned](https://discord.gg/JjWNWXKxwT)*. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c3726e8bfe..cfb2e331c3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +cmind>=1.6.0 pyyaml