-
Notifications
You must be signed in to change notification settings - Fork 114
/
_cm.yaml
501 lines (468 loc) · 12.5 KB
/
_cm.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
alias: run-mlperf-inference-app
uid: 4a5d5b13fd7e4ac8
automation_alias: script
automation_uid: 5b4e0237da074764
category: Modular MLPerf inference benchmark pipeline
developers: "[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh), [Grigori Fursin](https://cKnowledge.org/gfursin)"
gui:
title: CM GUI to run MLPerf inference benchmarks and prepare submissions
clean_output_files:
- open.tar.gz
- summary.csv
- summary.json
tags:
- run
- common
- generate-run-cmds
- run-mlperf
- run-mlperf-inference
- vision
- mlcommons
- mlperf
- inference
- reference
tags_help: "run-mlperf,inference"
default_env:
CM_MLPERF_IMPLEMENTATION: reference
CM_MLPERF_MODEL: resnet50
CM_MLPERF_RUN_STYLE: test
input_mapping:
backend: CM_MLPERF_BACKEND
category: CM_MLPERF_SUBMISSION_SYSTEM_TYPE
clean: CM_MLPERF_CLEAN_ALL
compliance: CM_MLPERF_LOADGEN_COMPLIANCE
dashboard_wb_project: CM_MLPERF_DASHBOARD_WANDB_PROJECT
dashboard_wb_user: CM_MLPERF_DASHBOARD_WANDB_USER
debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM
device: CM_MLPERF_DEVICE
division: CM_MLPERF_SUBMISSION_DIVISION
docker: CM_MLPERF_USE_DOCKER
dump_version_info: CM_DUMP_VERSION_INFO
save_console_log: CM_SAVE_CONSOLE_LOG
execution_mode: CM_MLPERF_RUN_STYLE
find_performance: CM_MLPERF_FIND_PERFORMANCE_MODE
gpu_name: CM_NVIDIA_GPU_NAME
hw_name: CM_HW_NAME
hw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA
imagenet_path: IMAGENET_PATH
implementation: CM_MLPERF_IMPLEMENTATION
lang: CM_MLPERF_IMPLEMENTATION
mode: CM_MLPERF_LOADGEN_MODE
model: CM_MLPERF_MODEL
multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY
network: CM_NETWORK_LOADGEN
offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS
output_dir: OUTPUT_BASE_DIR
output_summary: MLPERF_INFERENCE_SUBMISSION_SUMMARY
output_tar: MLPERF_INFERENCE_SUBMISSION_TAR_FILE
performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT
power: CM_SYSTEM_POWER
precision: CM_MLPERF_MODEL_PRECISION
preprocess_submission: CM_RUN_MLPERF_SUBMISSION_PREPROCESSOR
push_to_github: CM_MLPERF_RESULT_PUSH_TO_GITHUB
readme: CM_MLPERF_README
regenerate_accuracy_file: CM_MLPERF_REGENERATE_ACCURACY_FILE
regenerate_files: CM_REGENERATE_MEASURE_FILES
rerun: CM_RERUN
results_dir: OUTPUT_BASE_DIR
results_git_url: CM_MLPERF_RESULTS_GIT_REPO_URL
run_checker: CM_RUN_SUBMISSION_CHECKER
run_style: CM_MLPERF_RUN_STYLE
scenario: CM_MLPERF_LOADGEN_SCENARIO
server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS
singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY
skip_submission_generation: CM_MLPERF_SKIP_SUBMISSION_GENERATION
skip_truncation: CM_SKIP_TRUNCATE_ACCURACY
submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR
submitter: CM_MLPERF_SUBMITTER
sut_servers: CM_NETWORK_LOADGEN_SUT_SERVERS
sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA
system_type: CM_MLPERF_SUBMISSION_SYSTEM_TYPE
target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY
target_qps: CM_MLPERF_LOADGEN_TARGET_QPS
test_query_count: CM_TEST_QUERY_COUNT
threads: CM_NUM_THREADS
batch_size: CM_MLPERF_LOADGEN_MAX_BATCHSIZE
sut: CM_MLPERF_INFERENCE_SUT_VARIATION
new_state_keys:
- app_mlperf_inference_*
- cm-mlperf-inference-results*
deps:
- tags: detect,os
skip_if_env:
CM_MLPERF_USE_DOCKER: [ on ]
- tags: detect,cpu
skip_if_env:
CM_MLPERF_USE_DOCKER: [ on ]
- names:
- python
- python3
tags: get,python3
skip_if_env:
CM_MLPERF_USE_DOCKER: [ on ]
- names:
- inference-src
tags: get,mlcommons,inference,src
- tags: get,sut,description
- tags: get,mlperf,inference,results,dir
names:
- get-mlperf-inference-results-dir
enable_if_env:
CM_MLPERF_USE_DOCKER: [ off ]
skip_if_env:
OUTPUT_BASE_DIR: [ on ]
- tags: install,pip-package,for-cmind-python,_package.tabulate
- tags: get,mlperf,inference,utils
docker:
mounts:
- ${{ INSTALL_DATA_PATH }}:/install_data
- ${{ DATA_PATH }}:/data
- ${{ CM_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH }}:${{ CM_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH }}
- ${{ GPTJ_CHECKPOINT_PATH }}:${{ GPTJ_CHECKPOINT_PATH }}
skip_run_cmd: 'no'
shm_size: '32gb'
extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
docker_os: ubuntu
docker_real_run: false
run: true
interactive: true
docker_input_mapping:
imagenet_path: IMAGENET_PATH
gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH
criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH
results_dir: RESULTS_DIR
submission_dir: SUBMISSION_DIR
dlrm_data_path: DLRM_DATA_PATH
intel_gptj_int8_model_path: CM_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH
variations:
accuracy-only:
default_variations:
submission-generation-style: full
env:
CM_MLPERF_LOADGEN_MODE: accuracy
CM_MLPERF_SUBMISSION_RUN: 'yes'
CM_RUN_MLPERF_ACCURACY: 'on'
CM_RUN_SUBMISSION_CHECKER: 'no'
group: submission-generation
all-modes:
env:
CM_MLPERF_LOADGEN_ALL_MODES: 'yes'
group: mode
all-scenarios:
env:
CM_MLPERF_LOADGEN_ALL_SCENARIOS: 'yes'
compliance:
env:
CM_MLPERF_LOADGEN_COMPLIANCE: 'yes'
dashboard:
default_gui: false
env:
CM_MLPERF_DASHBOARD: 'on'
find-performance:
default: true
env:
CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes'
CM_MLPERF_LOADGEN_ALL_MODES: 'no'
CM_MLPERF_LOADGEN_MODE: performance
CM_MLPERF_RESULT_PUSH_TO_GITHUB: false
group: submission-generation
full:
add_deps_recursive:
coco2014-original:
tags: _full
coco2014-preprocessed:
tags: _full
imagenet-original:
tags: _full
imagenet-preprocessed:
tags: _full
openimages-original:
tags: _full
openimages-preprocessed:
tags: _full
openorca-original:
tags: _full
openorca-preprocessed:
tags: _full
env:
CM_MLPERF_SUBMISSION_GENERATION_STYLE: full
CM_MLPERF_SKIP_SUBMISSION_GENERATION: 'yes'
group: submission-generation-style
performance-only:
default_variations:
submission-generation-style: full
env:
CM_MLPERF_LOADGEN_MODE: performance
CM_MLPERF_SUBMISSION_RUN: 'yes'
CM_RUN_SUBMISSION_CHECKER: 'no'
group: submission-generation
populate-readme:
base:
- all-modes
default_variations:
submission-generation-style: full
env:
CM_MLPERF_README: 'yes'
CM_MLPERF_SUBMISSION_RUN: 'yes'
CM_RUN_SUBMISSION_CHECKER: 'no'
group: submission-generation
r2.1:
env:
CM_MLPERF_INFERENCE_VERSION: '2.1'
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r2.1_default
group: benchmark-version
r3.0:
env:
CM_MLPERF_INFERENCE_VERSION: '3.0'
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r3.0_default
group: benchmark-version
r3.1:
env:
CM_MLPERF_INFERENCE_VERSION: '3.1'
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r3.1_default
group: benchmark-version
r4.0:
env:
CM_MLPERF_INFERENCE_VERSION: '4.0'
CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.0_default
group: benchmark-version
short:
add_deps_recursive:
submission-checker:
tags: _short-run
default: 'true'
env:
CM_MLPERF_SUBMISSION_GENERATION_STYLE: short
group: submission-generation-style
submission:
base:
- all-modes
default_gui: true
default_variations:
submission-generation-style: full
env:
CM_MLPERF_LOADGEN_COMPLIANCE: 'yes'
CM_MLPERF_SUBMISSION_RUN: 'yes'
CM_RUN_MLPERF_ACCURACY: 'on'
CM_RUN_SUBMISSION_CHECKER: 'yes'
CM_TAR_SUBMISSION_DIR: 'yes'
group: submission-generation
post_deps:
- names:
- submission-generator
enable_if_env:
CM_MLPERF_SKIP_SUBMISSION_GENERATION:
- 'no'
- 'false'
- 'False'
- '0'
tags: generate,mlperf,inference,submission
versions:
master: {}
r2.1: {}
input_description:
division:
choices:
- 'open'
- 'closed'
default: 'open'
desc: MLPerf division
sort: 50
category:
choices:
- 'edge'
- 'datacenter'
- 'network'
default: 'edge'
desc: MLPerf category
sort: 60
device:
choices:
- cpu
- cuda
- rocm
- qaic
default: cpu
desc: MLPerf device
sort: 100
model:
choices:
- resnet50
- retinanet
- bert-99
- bert-99.9
- 3d-unet-99
- 3d-unet-99.9
- rnnt
- dlrm-v2-99
- dlrm-v2-99.9
- gptj-99
- gptj-99.9
- sdxl
- llama2-70b-99
- llama2-70b-99.9
- mobilenet
- efficientnet
default: resnet50
desc: MLPerf model
sort: 200
precision:
choices:
- float32
- float16
- bfloat16
- int8
- uint8
default: ''
desc: MLPerf model precision
sort: 250
implementation:
choices:
- mlcommons-python
- mlcommons-cpp
- nvidia
- intel
- qualcomm
- ctuning-cpp-tflite
default: mlcommons-python
desc: MLPerf implementation
sort: 300
backend:
choices:
- onnxruntime
- tf
- pytorch
- deepsparse
- tensorrt
- glow
- tvm-onnx
default: onnxruntime
desc: MLPerf framework (backend)
sort: 400
scenario:
choices:
- Offline
- Server
- SingleStream
- MultiStream
default: Offline
desc: MLPerf scenario
sort: 500
mode:
choices:
- ''
- accuracy
- performance
default: ''
desc: MLPerf benchmark mode
sort: 600
execution_mode:
choices:
- test
- fast
- valid
default: test
desc: MLPerf execution mode
sort: 700
sut:
default: ''
desc: SUT configuration (if known)
sort: 750
submitter:
default: CTuning
desc: Submitter name (without space)
sort: 800
results_dir:
desc: Folder path to store results (defaults to the current working directory)
default: ''
sort: 900
submission_dir:
desc: Folder path to store MLPerf submission tree
default: ''
sort: 1000
adr.compiler.tags:
desc: Compiler for loadgen and any C/C++ part of implementation
adr.inference-src-loadgen.env.CM_GIT_URL:
default: ''
desc: Git URL for MLPerf inference sources to build LoadGen (to enable non-reference
implementations)
adr.inference-src.env.CM_GIT_URL:
default: ''
desc: Git URL for MLPerf inference sources to run benchmarks (to enable non-reference
implementations)
adr.mlperf-inference-implementation.max_batchsize:
desc: Maximum batchsize to be used
adr.mlperf-inference-implementation.num_threads:
desc: Number of threads (reference & C++ implementation only)
adr.python.name:
desc: Python virtual environment name (optional)
adr.python.version:
desc: Force Python version (must have all system deps)
adr.python.version_min:
default: '3.8'
desc: Minimal Python version
power:
choices:
- 'yes'
- 'no'
default: 'no'
desc: Measure power
sort: 5000
adr.mlperf-power-client.power_server:
default: '192.168.0.15'
desc: MLPerf Power server IP address
sort: 5005
adr.mlperf-power-client.port:
default: 4950
desc: MLPerf Power server port
sort: 5010
clean:
boolean: true
default: false
desc: Clean run
compliance:
choices:
- 'yes'
- 'no'
default: 'no'
desc: Whether to run compliance tests (applicable only for closed division)
dashboard_wb_project:
desc: W&B dashboard project
default: cm-mlperf-dse-testing
dashboard_wb_user:
desc: W&B dashboard user
default: cmind
hw_name:
desc: MLPerf hardware name (for example "gcp.c3_standard_8", "nvidia_orin", "lenovo_p14s_gen_4_windows_11", "macbook_pro_m1_2", "thundercomm_rb6" ...)
multistream_target_latency:
desc: Set MultiStream target latency
offline_target_qps:
desc: Set LoadGen Offline target QPS
quiet:
boolean: true
default: true
desc: Quiet run (select default values for all questions)
server_target_qps:
desc: Set Server target QPS
singlestream_target_latency:
desc: Set SingleStream target latency
target_latency:
desc: Set Target latency
target_qps:
desc: Set LoadGen target QPS
j:
desc: Print results dictionary to console at the end of the run
boolean: true
default: false
repro:
desc: Record input/output/state/info files to make it easier to reproduce results
boolean: true
default: false
time:
desc: Print script execution time at the end of the run
boolean: true
default: true
debug:
desc: Debug this script
boolean: true
default: false