Skip to content

Commit

Permalink
Merge pull request #255 from GATEOverflow/mlperf-inference
Browse files Browse the repository at this point in the history
Merge from go
  • Loading branch information
arjunsuresh authored Sep 18, 2024
2 parents fd363fa + 6c85bc2 commit 3ac6245
Show file tree
Hide file tree
Showing 15 changed files with 125 additions and 39 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/test-mlperf-inference-abtf-poc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
cm pull repo mlcommons@cm4abtf --branch=poc
- name: Test MLPerf Inference ABTF POC using ${{ matrix.backend }} on docker
run: |
cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=5 --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet -v
cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet -v
build2:
runs-on: ${{ matrix.os }}
Expand All @@ -62,7 +62,7 @@ jobs:
cm pull repo mlcommons@cm4abtf --branch=poc
- name: Test MLPerf Inference ABTF POC using ${{ matrix.backend }} on ${{ matrix.os }}
run: |
cm run script --tags=run-abtf,inference,_poc-demo --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet -v
cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet -v
build3:
runs-on: ${{ matrix.os }}
Expand All @@ -89,4 +89,4 @@ jobs:
cm pull repo mlcommons@cm4abtf --branch=poc
- name: Test MLPerf Inference ABTF POC using ${{ matrix.backend }} on ${{ matrix.os }}
run: |
cm run script --tags=run-abtf,inference,_poc-demo --quiet --env.CM_MLPERF_LOADGEN_BUILD_FROM_SRC=off --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 -v
cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --quiet --env.CM_MLPERF_LOADGEN_BUILD_FROM_SRC=off --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 -v
53 changes: 39 additions & 14 deletions automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,15 +351,16 @@ def _run(self, i):

debug_uid = i.get('debug_uid', '')
if debug_uid!='':
env['CM_TMP_DEBUG_UID'] = debug_uid
r = _update_env(env, 'CM_TMP_DEBUG_UID', debug_uid)
if r['return']>0: return r

fake_deps = i.get('fake_deps', False)
if fake_deps: env['CM_TMP_FAKE_DEPS']='yes'

if str(i.get('skip_sys_utils', '')).lower() in ['true', 'yes']:
env['CM_SKIP_SYS_UTILS']='yes'
env['CM_SKIP_SYS_UTILS']='yes'
if str(i.get('skip_sudo', '')).lower() in ['true', 'yes']:
env['CM_TMP_SKIP_SUDO']='yes'
env['CM_TMP_SKIP_SUDO']='yes'

run_state = i.get('run_state', self.run_state)
if not run_state.get('version_info', []):
Expand Down Expand Up @@ -387,9 +388,9 @@ def _run(self, i):
elif 'v' in i: verbose=i['v']

if verbose:
env['CM_VERBOSE']='yes'
run_state['tmp_verbose']=True
logging.getLogger().setLevel(logging.DEBUG)
env['CM_VERBOSE']='yes'
run_state['tmp_verbose']=True
logging.getLogger().setLevel(logging.DEBUG)


print_deps = i.get('print_deps', False)
Expand Down Expand Up @@ -418,7 +419,8 @@ def _run(self, i):

# Detect current path and record in env for further use in native scripts
current_path = os.path.abspath(os.getcwd())
env['CM_TMP_CURRENT_PATH'] = current_path
r = _update_env(env, 'CM_TMP_CURRENT_PATH', current_path)
if r['return']>0: return r

# Check if quiet mode
quiet = i.get('quiet', False) if 'quiet' in i else (env.get('CM_QUIET','').lower() == 'yes')
Expand Down Expand Up @@ -472,6 +474,9 @@ def _run(self, i):
if value != '':
env['CM_' + key.upper()] = value

r = update_env_with_values(env)
if r['return']>0: return r


############################################################################################################
# Check if we want to skip cache (either by skip_cache or by fake_run)
Expand Down Expand Up @@ -1317,7 +1322,8 @@ def _run(self, i):

logging.debug(recursion_spaces+' - Version is not specified - use either default_version from meta or min/max/usable: {}'.format(version))

env['CM_VERSION'] = version
r = _update_env(env, 'CM_VERSION', version)
if r['return']>0: return r

if 'version-'+version not in cached_tags: cached_tags.append('version-'+version)

Expand All @@ -1329,8 +1335,9 @@ def _run(self, i):
if "add_deps_recursive" in versions_meta:
self._merge_dicts_with_tags(add_deps_recursive, versions_meta['add_deps_recursive'])

env['CM_TMP_CURRENT_SCRIPT_PATH'] = path

r = _update_env(env, 'CM_TMP_CURRENT_SCRIPT_PATH', path)
if r['return']>0: return r

# Run chain of docker dependencies if current run cmd is from inside a docker container
docker_deps = []
if i.get('docker_run_deps'):
Expand Down Expand Up @@ -1444,7 +1451,8 @@ def _run(self, i):
elif pip_version_max != '':
pip_version_string = '<='+pip_version_max

env['CM_TMP_PIP_VERSION_STRING'] = pip_version_string
r = _update_env(env, 'CM_TMP_PIP_VERSION_STRING', pip_version_string)
if r['return']>0: return r
if pip_version_string != '':
logging.debug(recursion_spaces+' # potential PIP version string (if needed): '+pip_version_string)

Expand Down Expand Up @@ -4359,6 +4367,20 @@ def any_enable_or_skip_script(meta, env):

return False

############################################################################################################
def _update_env(env, key=None, value=None):
if key == None or value == None:
return {'return': 1, 'error': 'None value not expected in key and value arguments in _update_env.'}
if not isinstance(key, str):
return {'return': 1, 'error': 'String value expected inside key argument.'}

env[key] = value

r = update_env_with_values(env)
if r['return']>0: return r

return {'return': 0}

############################################################################################################
def update_env_with_values(env, fail_on_not_found=False, extra_env={}):
"""
Expand Down Expand Up @@ -4532,9 +4554,12 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"):
path = '"' + path + '"'

cur_dir = os.getcwd()

env['CM_TMP_CURRENT_SCRIPT_PATH'] = path
env['CM_TMP_CURRENT_SCRIPT_WORK_PATH'] = cur_dir

r = _update_env(env, 'CM_TMP_CURRENT_SCRIPT_PATH', path)
if r['return']>0: return r

r = _update_env(env, 'CM_TMP_CURRENT_SCRIPT_WORK_PATH', cur_dir)
if r['return']>0: return r

# Record state
if tmp_file_state != '':
Expand Down
2 changes: 1 addition & 1 deletion script/app-mlperf-inference-nvidia/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ variations:
CM_MLPERF_NVIDIA_HARNESS_MAXN: yes

preprocess-data:
alias: preprocess-data
alias: preprocess_data

preprocess_data:
group: run-mode
Expand Down
13 changes: 13 additions & 0 deletions script/download-file/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@
"deps": [
{
"tags": "get,rclone"
},
{
"tags": "get,rclone-config",
"update_tags_from_env_with_prefix": {
"_": [
"CM_RCLONE_CONFIG_NAME"
]
},
"enable_if_env": {
"CM_RCLONE_CONFIG_NAME": [
"on"
]
}
}
],
"env": {
Expand Down
16 changes: 3 additions & 13 deletions script/download-file/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def preprocess(i):
env['CM_DOWNLOAD_CMD'] += f" || ((rm -f {env['CM_DOWNLOAD_FILENAME']} || true) && gdown {extra_download_options} {url})"

elif tool == "rclone":
if env.get('CM_RCLONE_CONFIG_CMD', '') != '':
if env.get('CM_RCLONE_CONFIG_CMD', '') != '': #keeping this for backward compatibility. Ideally should be done via get,rclone-config script
env['CM_DOWNLOAD_CONFIG_CMD'] = env['CM_RCLONE_CONFIG_CMD']
rclone_copy_using = env.get('CM_RCLONE_COPY_USING', 'sync')
if rclone_copy_using == "sync":
Expand All @@ -168,19 +168,9 @@ def preprocess(i):
# have to modify the variable from url to temp_url if it is going to be used anywhere after this point
url = url.replace("%", "%%")
temp_download_file = env['CM_DOWNLOAD_FILENAME'].replace("%", "%%")
env['CM_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), temp_download_file)}{q} -P"
env['CM_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), temp_download_file)}{q} -P --error-on-no-transfer"
else:
env['CM_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), env['CM_DOWNLOAD_FILENAME'])}{q} -P"
for i in range(1,5):
url = env.get('CM_DOWNLOAD_URL'+str(i),'')
if url == '':
break
if env["CM_HOST_OS_TYPE"] == "windows":
url = url.replace("%", "%%")
temp_download_file = env['CM_DOWNLOAD_FILENAME'].replace("%", "%%")
env['CM_DOWNLOAD_CMD'] = f" || ((rm -f {env['CM_DOWNLOAD_FILENAME']} || true) && rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), temp_download_file)}{q} -P)"
else:
env['CM_DOWNLOAD_CMD'] = f" || ((rm -f {env['CM_DOWNLOAD_FILENAME']} || true) && rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), env['CM_DOWNLOAD_FILENAME'])}{q} -P"
env['CM_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), env['CM_DOWNLOAD_FILENAME'])}{q} -P --error-on-no-transfer"

filename = env['CM_DOWNLOAD_FILENAME']
env['CM_DOWNLOAD_DOWNLOADED_FILENAME'] = filename
Expand Down
6 changes: 4 additions & 2 deletions script/download-file/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ fi

if [[ ${require_download} == "1" ]]; then
echo ""
echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
${CM_PRE_DOWNLOAD_CLEAN_CMD}
if [ -e "${CM_PRE_DOWNLOAD_CLEAN}" ]; then
echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
${CM_PRE_DOWNLOAD_CLEAN_CMD}
fi

echo ""
echo "${CM_DOWNLOAD_CMD}"
Expand Down
2 changes: 1 addition & 1 deletion script/get-ml-model-dlrm-terabyte/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
},
"pytorch,fp32,weight_sharded,rclone": {
"env": {
"CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
"CM_RCLONE_CONFIG_NAME": "mlc-inference",
"CM_PACKAGE_URL": "mlc-inference:mlcommons-inference-wg-public/model_weights"
}
},
Expand Down
2 changes: 1 addition & 1 deletion script/get-ml-model-gptj/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
"CM_UNZIP": "yes",
"CM_DOWNLOAD_CHECKSUM_NOT_USED": "e677e28aaf03da84584bb3073b7ee315",
"CM_PACKAGE_URL": "https://cloud.mlcommons.org/index.php/s/QAZ2oM94MkFtbQx/download",
"CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
"CM_RCLONE_CONFIG_NAME": "mlc-inference",
"CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/gpt-j"
},
"required_disk_space": 22700
Expand Down
2 changes: 1 addition & 1 deletion script/get-ml-model-stable-diffusion/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@
"rclone": {
"group": "download-tool",
"env": {
"CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
"CM_RCLONE_CONFIG_NAME": "mlc-inference",
"CM_DOWNLOAD_TOOL": "rclone"
},
"adr": {
Expand Down
2 changes: 1 addition & 1 deletion script/get-preprocessed-dataset-criteo/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@
],
"extra_cache_tags": "criteo,preprocessed,dataset",
"env": {
"CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
"CM_RCLONE_CONFIG_NAME": "mlc-inference",
"CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed",
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH",
"CM_EXTRACT_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH",
Expand Down
4 changes: 2 additions & 2 deletions script/get-preprocessed-dataset-openorca/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,15 @@
"mlcommons": {
"env": {
"CM_DATASET_PREPROCESSED_BY_MLC": "yes",
"CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
"CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/open_orca"
},
"deps": [
{
"env": {
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_OPENORCA_PREPROCESSED_ROOT",
"CM_EXTRACT_FINAL_ENV_NAME": "CM_OPENORCA_PREPROCESSED_ROOT",
"CM_EXTRACT_TO_FOLDER": "openorca-preprocessed"
"CM_EXTRACT_TO_FOLDER": "openorca-preprocessed",
"CM_RCLONE_CONFIG_NAME": "mlc-inference"
},
"tags": "download-and-extract,_rclone",
"update_tags_from_env_with_prefix": {
Expand Down
13 changes: 13 additions & 0 deletions script/get-rclone-config/_cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
alias: get-rclone-config
automation_alias: script
automation_uid: 5b4e0237da074764
cache: false #keeping cache off as rerunning the command is safe
can_force_cache: true
tags:
- get
- rclone-config
uid: 6c59ddbc6cd046e3
variations:
mlc-inference:
env:
CM_RCLONE_CONFIG_CMD: 'rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com'
25 changes: 25 additions & 0 deletions script/get-rclone-config/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from cmind import utils
import os

def preprocess(i):

os_info = i['os_info']

env = i['env']

meta = i['meta']

automation = i['automation']

quiet = (env.get('CM_QUIET', False) == 'yes')

if env.get('CM_RCLONE_CONFIG_CMD', '') != '':
env['CM_RUN_CMD'] = env['CM_RCLONE_CONFIG_CMD']

return {'return':0}

def postprocess(i):

env = i['env']

return {'return':0}
1 change: 1 addition & 0 deletions script/get-rclone-config/run.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rem native script
17 changes: 17 additions & 0 deletions script/get-rclone-config/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH}

#To export any variable
#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out

#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency

echo "Running: "
echo "${CM_RUN_CMD}"
echo ""

if [[ ${CM_FAKE_RUN} != "yes" ]]; then
eval "${CM_RUN_CMD}"
test $? -eq 0 || exit 1
fi

0 comments on commit 3ac6245

Please sign in to comment.