Skip to content

Commit

Permalink
cleaning up docs and improving MLPerf inference benchmaks (#1079)
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh authored Jan 30, 2024
2 parents 808ed7a + bf17659 commit 582190e
Show file tree
Hide file tree
Showing 15 changed files with 51 additions and 175 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ cmr "get generic-python-lib _package.torchvision" --version=0.16.2
cmr "python app image-classification torch" --input=computer_mouse.jpg


cm rm repo mlcommons@ck
cm pull repo --url=https://zenodo.org/records/10581696/files/cm-mlops-repo-20240129.zip

cmr "install llvm prebuilt" --version=17.0.6
cmr "app image corner-detection"

Expand Down
27 changes: 22 additions & 5 deletions cm-mlops/automation/script/module_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1391,15 +1391,21 @@ def dockerfile(i):
run_cmd = r['run_cmd_string']



docker_base_image = i.get('docker_base_image', docker_settings.get('base_image'))
docker_os = i.get('docker_os', docker_settings.get('docker_os', 'ubuntu'))
docker_os_version = i.get('docker_os_version', docker_settings.get('docker_os_version', '22.04'))
if not docker_base_image:
dockerfilename_suffix = docker_os +'_'+docker_os_version
else:
dockerfilename_suffix = docker_base_image.split("/")
dockerfilename_suffix = dockerfilename_suffix[len(dockerfilename_suffix) - 1]

fake_run_deps = i.get('fake_run_deps', docker_settings.get('fake_run_deps', False))
docker_run_final_cmds = docker_settings.get('docker_run_final_cmds', [])

gh_token = i.get('docker_gh_token')

if i.get('docker_real_run', False):
if i.get('docker_real_run', docker_settings.get('docker_real_run',False)):
fake_run_option = " "
fake_run_deps = False
else:
Expand All @@ -1409,7 +1415,7 @@ def dockerfile(i):

env['CM_DOCKER_PRE_RUN_COMMANDS'] = docker_run_final_cmds

dockerfile_path = os.path.join(script_path,'dockerfiles', docker_os +'_'+docker_os_version +'.Dockerfile')
dockerfile_path = os.path.join(script_path,'dockerfiles', dockerfilename_suffix +'.Dockerfile')
if i.get('print_deps'):
cm_input = {'action': 'run',
'automation': 'script',
Expand All @@ -1434,6 +1440,7 @@ def dockerfile(i):
'automation': 'script',
'tags': 'build,dockerfile',
'cm_repo': cm_repo,
'docker_base_image': docker_base_image,
'docker_os': docker_os,
'docker_os_version': docker_os_version,
'file_path': dockerfile_path,
Expand Down Expand Up @@ -1659,9 +1666,18 @@ def docker(i):

mount_string = "" if len(mounts)==0 else ",".join(mounts)

docker_base_image = i.get('docker_base_image', docker_settings.get('base_image'))
docker_os = i.get('docker_os', docker_settings.get('docker_os', 'ubuntu'))
docker_os_version = i.get('docker_os_version', docker_settings.get('docker_os_version', '22.04'))
if not docker_base_image:
dockerfilename_suffix = docker_os +'_'+docker_os_version
else:
dockerfilename_suffix = docker_base_image.split("/")
dockerfilename_suffix = dockerfilename_suffix[len(dockerfilename_suffix) - 1]

cm_repo=i.get('docker_cm_repo', 'mlcommons@ck')

dockerfile_path = os.path.join(script_path,'dockerfiles', _os +'_'+version +'.Dockerfile')
dockerfile_path = os.path.join(script_path,'dockerfiles', dockerfilename_suffix +'.Dockerfile')

docker_skip_run_cmd = i.get('docker_skip_run_cmd', docker_settings.get('skip_run_cmd', False)) #skips docker run cmd and gives an interactive shell to the user

Expand Down Expand Up @@ -1712,15 +1728,16 @@ def docker(i):
'automation': 'script',
'tags': 'run,docker,container',
'recreate': 'yes',
'docker_base_image': docker_base_image,
'docker_os': _os,
'docker_os_version': version,
'cm_repo': cm_repo,
'env': env,
'image_repo': image_repo,
'interactive': interactive,
'mounts': mounts,
'image_name': 'cm-script-'+script_alias,
# 'image_tag': script_alias,
'docker_os_version': version,
'detached': detached,
'script_tags': f'{tag_string}',
'run_cmd': run_cmd if docker_skip_run_cmd not in [ 'yes', True, 'True' ] else 'echo "cm version"',
Expand Down
2 changes: 1 addition & 1 deletion cm-mlops/script/app-mlperf-inference-cpp/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ variations:

offline,resnet50:
default_variations:
batch-size: batch-size.8
batch-size: batch-size.32

multistream,retinanet:
default_variations:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ variations:
nvidia:
group: implementation
env:
IMPLEMENTATION: nvidia
IMPLEMENTATION: nvidia-original
default_env:
MODELS: resnet50,retinanet,bert-99,bert-99.9,3d-unet-99,rnnt,gptj-99,gptj-99.9,dlrmv2-99,dlrmv2-99.9
BACKENDS: tensorrt
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ def preprocess(i):
test_query_count = 100
else:
if model == "resnet50":
test_query_count = 10000
test_query_count = 40000
else:
test_query_count = 1000
test_query_count = 2000
cmd = f'run_test "{model}" "{backend}" "{test_query_count}" "{implementation}" "{device}" "$find_performance_cmd"'
cmds.append(cmd)
#second argument is unused for submission_cmd
Expand Down
1 change: 1 addition & 0 deletions cm-mlops/script/build-dockerfile/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"cm_repo": "CM_MLOPS_REPO",
"docker_os": "CM_DOCKER_OS",
"docker_os_version": "CM_DOCKER_OS_VERSION",
"docker_base_image": "CM_DOCKER_IMAGE_BASE",
"fake_run_option": "CM_DOCKER_FAKE_RUN_OPTION",
"file_path": "CM_DOCKERFILE_WITH_PATH",
"gh_token": "CM_GH_TOKEN",
Expand Down
7 changes: 4 additions & 3 deletions cm-mlops/script/build-mlperf-inference-server-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ versions:
docker:
skip_run_cmd: 'no'
all_gpus: 'yes'
docker_os: ubuntu
docker_real_run: True
docker_os_version: '20.04'
base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public
docker_input_mapping:
imagenet_path: IMAGENET_PATH
results_dir: RESULTS_DIR
Expand All @@ -213,6 +217,3 @@ docker:
- "${{ CM_TENSORRT_TAR_FILE_PATH }}:${{ CM_TENSORRT_TAR_FILE_PATH }}"
- "${{ CUDA_RUN_FILE_LOCAL_PATH }}:${{ CUDA_RUN_FILE_LOCAL_PATH }}"
- "${{ MLPERF_SCRATCH_PATH }}:${{ MLPERF_SCRATCH_PATH }}"
pre_run_cmds:
- cm pull repo mlcommons@ck
- cm run script --tags=get,dataset,original,imagenet,_full --imagenet_path=/data/imagenet-val
1 change: 0 additions & 1 deletion cm-mlops/script/get-cuda/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
"CM_REQUIRE_INSTALL": "no"
},
"docker": {
"run": false
},
"input_mapping": {
"cudnn_tar_path": "CM_CUDNN_TAR_FILE_PATH",
Expand Down
4 changes: 2 additions & 2 deletions cm-mlops/script/get-generic-python-lib/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -389,8 +389,8 @@
}
],
"env": {
"CM_GENERIC_PYTHON_PACKAGE_NAME": "nvidia-dali-cuda110",
"CM_GENERIC_PYTHON_PIP_EXTRA": " --upgrade",
"CM_GENERIC_PYTHON_PACKAGE_NAME": "nvidia-dali-cuda120",
"CM_GENERIC_PYTHON_PIP_EXTRA": " --upgrade --default-timeout=900",
"CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": "https://developer.download.nvidia.com/compute/redist"
},
"new_env_keys": [
Expand Down
3 changes: 2 additions & 1 deletion cm-mlops/script/get-ml-model-bert-large-squad/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@
},
"onnx,int8": {
"env": {
"CM_ML_MODEL_F1": "90.067"
"CM_ML_MODEL_F1": "90.067",
"CM_PACKAGE_URL": "https://zenodo.org/record/3750364/files/bert_large_v1_1_fake_quant.onnx"
}
},
"onnx,int8,zenodo": {
Expand Down
3 changes: 2 additions & 1 deletion cm-mlops/script/get-tensorrt/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ def preprocess(i):
env = i['env']


if env.get('CM_TENSORRT_TAR_FILE_PATH','')=='' and env.get('CM_TENSORRT_REQUIRE_DEV', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR', '') != 'aarch64':
#Not enforcing dev requirement for now
if env.get('CM_TENSORRT_TAR_FILE_PATH','')=='' and env.get('CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR', '') != 'aarch64':

if os_info['platform'] == 'windows':
extra_pre=''
Expand Down
7 changes: 7 additions & 0 deletions cm-mlops/script/reproduce-mlperf-inference-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,10 @@ variations:
default: true
env:
CM_MODEL: resnet50
deps:
- tags: get,generic-python-lib,_onnx-graphsurgeon
- tags: get,generic-python-lib,_package.onnx
version: 1.13.1

retinanet:
group: model
Expand All @@ -312,6 +316,9 @@ variations:
- tags: get,generic-python-lib,_opencv-python
- tags: get,generic-python-lib,_numpy
- tags: get,generic-python-lib,_pycocotools
- tags: get,generic-python-lib,_onnx-graphsurgeon
- tags: get,generic-python-lib,_package.onnx
version: 1.13.1

bert_:
deps:
Expand Down
1 change: 1 addition & 0 deletions cm-mlops/script/run-docker-container/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"image_tag": "CM_DOCKER_IMAGE_TAG",
"docker_os": "CM_DOCKER_OS",
"docker_os_version": "CM_DOCKER_OS_VERSION",
"docker_image_base": "CM_DOCKER_IMAGE_BASE",
"script_tags": "CM_DOCKER_RUN_SCRIPT_TAGS",
"run_cmd_extra": "CM_DOCKER_RUN_CMD_EXTRA",
"real_run": "CM_REAL_RUN",
Expand Down
160 changes: 2 additions & 158 deletions docs/getting-started.md
Original file line number Diff line number Diff line change
@@ -1,160 +1,4 @@
[ [Back to documentation](README.md) ]

*Under development*

# CM Getting Started Guide

## Image classification example

One of the goals of the [MLCommons CM workflow automation framework (CM)](https://github.com/mlcommons/ck?tab=readme-ov-file#about)
is to provide a common, simple and human readable interface to run and manage complex software projects and benchmarks
on any platform with any software stack in a unified and automated way.

This tutorial explains how CM works and should help you start using it with existing projects
or to modularize and unify your own projects.

Let us test CM to run image classification from the command line on any platform with Windows, Linux and MacOS.

### Installing CM

CM is implemented as a [very small Python library](https://github.com/mlcommons/ck/tree/master/cm/cmind)
with `cm` and `cmr` front-ends and minimal dependencies (Python 3+, git and wget)
that can be installed via PIP:


```bash
pip install cmind
```

You may need to re-login to update the PATH to `cm` and `cmr` front-ends.

Note that CM can be also installed from virtual environment (required in Ubuntu 23.04+) and inside containers.
You can check a detailed guide to install CM on different platforms [here](installation.md).

### Pulling some repository with embedded CM interface

Let's now pull a Git repository that has embedded CM interface
(note that if your Git repository doesn't have CM interface embedded,
CM will automatically initialize one):

```bash
cm pull repo mlcommons@ck
```

CM will pull GitHub repository from `https://github.com/mlcommons/ck` to the `CM/repos` directory in your local HOME directory.
You can use flag `--url=https://github.com/mlcommons/ck` instead of `mlcommons@ck` to pull any Git repository.

CM will then check if this repository has a CM interface by checking the [`cmr.yaml`](https://github.com/mlcommons/ck/blob/master/cmr.yaml)
file in the root directory of this repository (abbreviation for `C`ollective `M`ind `R`epository):

```yaml
git: true
alias: mlcommons@ck
uid: a4705959af8e447a
version: 1.5.4
prefix: cm-mlops
```
Note that this file will be automatically generated if it doesn't exist in your repository.
While working on modularizing, unifying and automating MLPerf benchmarks,
we decided to embed a CM interface to this development repository
in the [cm-mlops directory](https://github.com/mlcommons/ck/tree/master/cm-mlops)
The `prefix` in `cmr.yaml` tells CM to search for the CM interface in some sub-directory of a given repository
to avoid altering the original structure of software projects.

### Using CM interface to run a given software project

You can now invoke a human-friendly CM command to run your project such as image classification
(we will show how to use Python API later):

```bash
cm run script "python app image-classification onnx"
```

CM will recursively walk through all pulled or downloaded repositories in your home `CM/repos` directory
and search for matching tags `python,app,image-classification,onnx` in all `_cm.yaml` or `_cm.json`
files in a `script` sub-directory of all repositories.

In our case, CM will find 1 match in
the [`cm-mlops/script/app-image-classification-onnx-py/_cm.yaml`](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-image-classification-onnx-py/_cm.yaml).

This file tells CM how to prepare environment variables, paths and command lines
to run a native script or tool on any platform.


####












#### Using inputs and environment variables

env

const


default_env

input_mapping



#### Using variations

using the same code/script/tool but altering it's behavior and sub-dependencies

CUDA

_cuda


#### Reporting issues

The community helped us test this example on many platforms but if you still encounter
some issues, please report them [here](https://github.com/mlcommons/ck/issues) - CM is not a magic (yet)
and our concept is to collaboratively extend CM workflows to gradually improve their portability and reproducibility
across diverse software and hardware.



#### Debugging CM interface


#### Extending CM interface


### Reusing automation recipes



### Adding CM interface to your own project



### Using CM with containers


### Using CM GUI


### Running MLPerf and other projects via CM

Recent examples from MLPerf and ML, compiler and systems conferences


### Participating in collaborative developments

This is a community project being developed by the [MLCommons Task Force on Automation and Reproducibility](taskforce.md)
based on your feedback - please join our [public Discord server](https://discord.gg/JjWNWXKxwT) if you
would like to help with developments or have questions, suggestions and feature requests.
*20240130: we are updating this page based on the feedback from the [CM users and MLPerf submitters](https://github.com/mlcommons/ck/issues/1052) -
it should be ready within a week - please [stay tuned](https://discord.gg/JjWNWXKxwT)*.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
cmind>=1.6.0
pyyaml

0 comments on commit 582190e

Please sign in to comment.