Skip to content

Commit

Permalink
Merge branch 'mlperf-inference' into mixtral+gha+selfhosted
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh authored Oct 8, 2024
2 parents bcec9ec + 454a92b commit 72cf058
Show file tree
Hide file tree
Showing 15 changed files with 206 additions and 30 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/test-mlperf-inference-dlrm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: MLPerf inference DLRM-v2

on:
schedule:
- cron: "30 21 * * *"

jobs:
build_reference:
if: github.repository_owner == 'gateoverflow'
runs-on: [ self-hosted, GO-spr, linux, x64 ]
strategy:
fail-fast: false
matrix:
python-version: [ "3.12" ]
backend: [ "pytorch" ]
device: [ "cpu", "cuda" ]

steps:
- name: Test MLPerf Inference DLRM-v2 reference implementation
run: |
source gh_action/bin/deactivate || python3 -m venv gh_action
source gh_action/bin/activate
export CM_REPOS=$HOME/GH_CM
python3 -m pip install cm4mlops
cm pull repo
cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
build_intel:
if: github.repository_owner == 'gateoverflow'
runs-on: [ self-hosted, GO-spr, linux, x64 ]
strategy:
fail-fast: false
matrix:
python-version: [ "3.12" ]
backend: [ "pytorch" ]
device: [ "cpu" ]

steps:
- name: Test MLPerf Inference DLRM-v2 INTEL implementation
run: |
source gh_action/bin/deactivate || python3 -m venv gh_action
source gh_action/bin/activate
export CM_REPOS=$HOME/GH_CM
python3 -m pip install cm4mlops
cm pull repo
cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean
59 changes: 36 additions & 23 deletions automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,6 @@ def _run(self, i):

ignore_script_error = i.get('ignore_script_error', False)

# Get constant env and state
const = i.get('const',{})
const_state = i.get('const_state',{})

# Detect current path and record in env for further use in native scripts
current_path = os.path.abspath(os.getcwd())
r = _update_env(env, 'CM_TMP_CURRENT_PATH', current_path)
Expand Down Expand Up @@ -838,8 +834,8 @@ def _run(self, i):
script_artifact_env = meta.get('env',{})
env.update(script_artifact_env)



script_artifact_state = meta.get('state',{})
utils.merge_dicts({'dict1':state, 'dict2':script_artifact_state, 'append_lists':True, 'append_unique':True})



Expand All @@ -853,7 +849,7 @@ def _run(self, i):


# STEP 700: Overwrite env with keys from the script input (to allow user friendly CLI)
# IT HAS THE PRIORITY OVER meta['default_env'] and meta['env']
# IT HAS THE PRIORITY OVER meta['default_env'] and meta['env'] but not over the meta from versions/variations
# (env OVERWRITE - user enforces it from CLI)
# (it becomes const)
if input_mapping:
Expand All @@ -866,7 +862,9 @@ def _run(self, i):
# update_env_from_input_mapping(const, i, docker_input_mapping)



#Update env/state with cost
env.update(const)
utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True})



Expand All @@ -882,7 +880,7 @@ def _run(self, i):
variations = script_artifact.meta.get('variations', {})
state['docker'] = meta.get('docker', {})

r = self._update_state_from_variations(i, meta, variation_tags, variations, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose)
r = self._update_state_from_variations(i, meta, variation_tags, variations, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose)
if r['return'] > 0:
return r

Expand Down Expand Up @@ -952,7 +950,7 @@ def _run(self, i):

if version!='' and version in versions:
versions_meta = versions[version]
r = update_state_from_meta(versions_meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i)
r = update_state_from_meta(versions_meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i)
if r['return']>0: return r
adr=get_adr(versions_meta)
if adr:
Expand Down Expand Up @@ -1328,7 +1326,7 @@ def _run(self, i):

if default_version in versions:
versions_meta = versions[default_version]
r = update_state_from_meta(versions_meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i)
r = update_state_from_meta(versions_meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i)
if r['return']>0: return r

if "add_deps_recursive" in versions_meta:
Expand Down Expand Up @@ -1374,7 +1372,6 @@ def _run(self, i):
r = update_env_with_values(env)
if r['return']>0: return r


# Clean some output files
clean_tmp_files(clean_files, recursion_spaces)

Expand Down Expand Up @@ -1451,8 +1448,12 @@ def _run(self, i):
elif pip_version_max != '':
pip_version_string = '<='+pip_version_max

env.update(const)
utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True})

r = _update_env(env, 'CM_TMP_PIP_VERSION_STRING', pip_version_string)
if r['return']>0: return r

if pip_version_string != '':
logging.debug(recursion_spaces+' # potential PIP version string (if needed): '+pip_version_string)

Expand All @@ -1462,10 +1463,6 @@ def _run(self, i):

logging.debug(recursion_spaces+' - Running preprocess ...')

# Update env and state with const
utils.merge_dicts({'dict1':env, 'dict2':const, 'append_lists':True, 'append_unique':True})
utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True})

run_script_input['run_state'] = run_state

ii = copy.deepcopy(customize_common_input)
Expand Down Expand Up @@ -1916,7 +1913,7 @@ def _dump_version_info_for_script(self, output_dir = os.getcwd(), quiet = False,
return {'return': 0}

######################################################################################
def _update_state_from_variations(self, i, meta, variation_tags, variations, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose):
def _update_state_from_variations(self, i, meta, variation_tags, variations, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, add_deps_recursive, run_state, recursion_spaces, verbose):

# Save current explicit variations
import copy
Expand Down Expand Up @@ -2019,7 +2016,7 @@ def _update_state_from_variations(self, i, meta, variation_tags, variations, env
if variation_tag_dynamic_suffix:
self._update_variation_meta_with_dynamic_suffix(variation_meta, variation_tag_dynamic_suffix)

r = update_state_from_meta(variation_meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i)
r = update_state_from_meta(variation_meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i)
if r['return']>0: return r

if variation_meta.get('script_name', '')!='':
Expand Down Expand Up @@ -2050,7 +2047,7 @@ def _update_state_from_variations(self, i, meta, variation_tags, variations, env

combined_variation_meta = variations[combined_variation]

r = update_state_from_meta(combined_variation_meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i)
r = update_state_from_meta(combined_variation_meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys_from_meta, new_state_keys_from_meta, i)
if r['return']>0: return r

adr=get_adr(combined_variation_meta)
Expand Down Expand Up @@ -3012,8 +3009,8 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a
'remembered_selections': remembered_selections,
'env':env,
'state':state,
'const':const,
'const_state':const_state,
'const':copy.deepcopy(const),
'const_state':copy.deepcopy(const_state),
'add_deps_recursive':add_deps_recursive,
'debug_script_tags':debug_script_tags,
'verbose':verbose,
Expand All @@ -3040,6 +3037,11 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a
r = update_env_with_values(env)
if r['return']>0: return r

#Update env/state with cost
env.update(const)
utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True})


return {'return': 0}

##############################################################################
Expand Down Expand Up @@ -4418,7 +4420,7 @@ def update_env_with_values(env, fail_on_not_found=False, extra_env={}):

# Check cases such as --env.CM_SKIP_COMPILE
if type(value)==bool:
env[key] = str(value)
env[key] = value
continue

tmp_values = re.findall(r'<<<(.*?)>>>', str(value))
Expand Down Expand Up @@ -5110,20 +5112,31 @@ def update_env_from_input_mapping(env, inp, input_mapping):
env[input_mapping[key]] = inp[key]

##############################################################################
def update_state_from_meta(meta, env, state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys, new_state_keys, i):
def update_state_from_meta(meta, env, state, const, const_state, deps, post_deps, prehook_deps, posthook_deps, new_env_keys, new_state_keys, i):
"""
Internal: update env and state from meta
"""

default_env = meta.get('default_env',{})
for key in default_env:
env.setdefault(key, default_env[key])

update_env = meta.get('env', {})
env.update(update_env)

update_const = meta.get('const', {})
if update_const:
const.update(update_const)
env.update(const)

update_state = meta.get('state', {})
utils.merge_dicts({'dict1':state, 'dict2':update_state, 'append_lists':True, 'append_unique':True})

update_const_state = meta.get('const_state', {})
if const_state:
utils.merge_dicts({'dict1':const_state, 'dict2':update_const_state, 'append_lists':True, 'append_unique':True})
utils.merge_dicts({'dict1':state, 'dict2':const_state, 'append_lists':True, 'append_unique':True})

new_deps = meta.get('deps', [])
if len(new_deps)>0:
append_deps(deps, new_deps)
Expand Down
8 changes: 6 additions & 2 deletions automation/script/module_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1393,6 +1393,8 @@ def dockerfile(i):

env=i.get('env', {})
state = i.get('state', {})
const=i.get('const', {})
const_state = i.get('const_state', {})
script_automation = i['self_module']

dockerfile_env=i.get('dockerfile_env', {})
Expand Down Expand Up @@ -1420,7 +1422,7 @@ def dockerfile(i):
state['docker'] = docker_settings
add_deps_recursive = i.get('add_deps_recursive', {})

r = script_automation._update_state_from_variations(i, meta, variation_tags, variations, env, state, deps = [], post_deps = [], prehook_deps = [], posthook_deps = [], new_env_keys_from_meta = [], new_state_keys_from_meta = [], add_deps_recursive = add_deps_recursive, run_state = {}, recursion_spaces='', verbose = False)
r = script_automation._update_state_from_variations(i, meta, variation_tags, variations, env, state, const, const_state, deps = [], post_deps = [], prehook_deps = [], posthook_deps = [], new_env_keys_from_meta = [], new_state_keys_from_meta = [], add_deps_recursive = add_deps_recursive, run_state = {}, recursion_spaces='', verbose = False)
if r['return'] > 0:
return r

Expand Down Expand Up @@ -1741,6 +1743,8 @@ def docker(i):
env['CM_RUN_STATE_DOCKER'] = False
script_automation = i['self_module']
state = i.get('state', {})
const = i.get('const', {})
const_state = i.get('const_state', {})

tags_split = i.get('tags', '').split(",")
variation_tags = [ t[1:] for t in tags_split if t.startswith("_") ]
Expand Down Expand Up @@ -1793,7 +1797,7 @@ def docker(i):
state['docker'] = docker_settings
add_deps_recursive = i.get('add_deps_recursive', {})

r = script_automation._update_state_from_variations(i, meta, variation_tags, variations, env, state, deps = [], post_deps = [], prehook_deps = [], posthook_deps = [], new_env_keys_from_meta = [], new_state_keys_from_meta = [], add_deps_recursive = add_deps_recursive, run_state = {}, recursion_spaces='', verbose = False)
r = script_automation._update_state_from_variations(i, meta, variation_tags, variations, env, state, const, const_state, deps = [], post_deps = [], prehook_deps = [], posthook_deps = [], new_env_keys_from_meta = [], new_state_keys_from_meta = [], add_deps_recursive = add_deps_recursive, run_state = {}, recursion_spaces='', verbose = False)
if r['return'] > 0:
return r

Expand Down
5 changes: 5 additions & 0 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,7 @@ variations:

3d-unet_,reference:
docker:
image_name: mlperf-inference-mlcommons-python-implementation-3d-unet
deps:
- enable_if_env:
CM_MLPERF_DATASET_3DUNET_DOWNLOAD_TO_HOST:
Expand Down Expand Up @@ -698,6 +699,7 @@ variations:

sdxl,reference,float16:
docker:
image_name: mlperf-inference-mlcommons-python-implementation-sdxl-float16
deps:
- enable_if_env:
CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST:
Expand All @@ -706,6 +708,7 @@ variations:

sdxl,reference,bfloat16:
docker:
image_name: mlperf-inference-mlcommons-python-implementation-sdxl-bfloat16
deps:
- enable_if_env:
CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST:
Expand All @@ -714,6 +717,7 @@ variations:

sdxl,reference,float32:
docker:
image_name: mlperf-inference-mlcommons-python-implementation-sdxl-float32
deps:
- enable_if_env:
CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST:
Expand Down Expand Up @@ -765,6 +769,7 @@ variations:

llama2-70b_,reference:
docker:
image_name: mlperf-inference-mlcommons-python-implementation-llama2-70b
deps:
- enable_if_env:
CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST:
Expand Down
1 change: 1 addition & 0 deletions script/get-cuda/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ new_env_keys:
- CUDA_PATH
- CM_CUDA_*
- CM_NVCC_*
- CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX5
- +PATH
- +C_INCLUDE_PATH
- +CPLUS_INCLUDE_PATH
Expand Down
1 change: 1 addition & 0 deletions script/get-cuda/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,5 +214,6 @@ def postprocess(i):
env['+ LDFLAGS'].append("-L"+x)

env['CM_CUDA_VERSION_STRING'] = "cu"+env['CM_CUDA_VERSION'].replace(".", "")
env['CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX5'] = env['CM_CUDA_VERSION_STRING']

return {'return':0, 'version': version}
2 changes: 1 addition & 1 deletion script/get-mlperf-inference-sut-configs/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def postprocess(i):
implementation_string = env['CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX'] if env.get('CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX', '') != '' else env.get('CM_MLPERF_IMPLEMENTATION', 'default')

run_config = []
for i in range(1,5):
for i in range(1,6):
if env.get(f'CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX{i}', '') != '':
run_config.append(env.get(f'CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX{i}'))

Expand Down
6 changes: 5 additions & 1 deletion script/run-mlperf-inference-app/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ input_mapping:
category: CM_MLPERF_SUBMISSION_SYSTEM_TYPE
clean: CM_MLPERF_CLEAN_ALL
compliance: CM_MLPERF_LOADGEN_COMPLIANCE
custom_system_nvidia: CM_CUSTOM_SYSTEM_NVIDIA
dashboard_wb_project: CM_MLPERF_DASHBOARD_WANDB_PROJECT
dashboard_wb_user: CM_MLPERF_DASHBOARD_WANDB_USER
debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM
Expand Down Expand Up @@ -140,7 +141,8 @@ deps:
- tags: install,pip-package,for-cmind-python,_package.tabulate
- tags: get,mlperf,inference,utils

docker:
#We use this script as a command generator to run docker via app-mlperf-inference script
docker_off:
mounts:
- ${{ INSTALL_DATA_PATH }}:/install_data
- ${{ DATA_PATH }}:/data
Expand Down Expand Up @@ -248,6 +250,7 @@ variations:
- short
env:
CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX4: scc24-base
CM_DOCKER_IMAGE_NAME: scc24
adr:
coco2014-preprocessed:
tags: _size.50,_with-sample-ids
Expand All @@ -271,6 +274,7 @@ variations:
extra_cache_tags: "scc24-main"
env:
CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX4: scc24-main
CM_DOCKER_IMAGE_NAME: scc24
deps:
- tags: clean,nvidia,scratch,_sdxl,_downloaded-data
extra_cache_rm_tags: scc24-base
Expand Down
Loading

0 comments on commit 72cf058

Please sign in to comment.