Skip to content

Commit

Permalink
updating TinyMLPerf tutorials (#789)
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh authored Jun 7, 2023
2 parents ab0c4e4 + 240190a commit a974b5b
Show file tree
Hide file tree
Showing 11 changed files with 226 additions and 14 deletions.
4 changes: 3 additions & 1 deletion cm-mlops/automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2384,7 +2384,9 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a
}

utils.merge_dicts({'dict1':ii, 'dict2':d, 'append_lists':True, 'append_unique':True})


update_env_with_values(ii['env']) #to update env local to a dependency

r = self.cmind.access(ii)
if r['return']>0: return r

Expand Down
8 changes: 8 additions & 0 deletions cm-mlops/script/download-and-extract/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@
}
}
},
"gdown": {
"group": "download-tool",
"add_deps_recursive": {
"download-script": {
"tags": "_gdown"
}
}
},
"torrent": {
"group": "download-tool",
"prehook_deps": [
Expand Down
14 changes: 13 additions & 1 deletion cm-mlops/script/download-file/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
},
"input_description": {},
"input_mapping": {
"url": "CM_DOWNLOAD_URL"
"url": "CM_DOWNLOAD_URL",
"download_path": "CM_DOWNLOAD_PATH"
},
"new_env_keys": [
"CM_DOWNLOAD_DOWNLOADED_PATH",
Expand Down Expand Up @@ -45,6 +46,17 @@
},
"group": "download-tool"
},
"gdown": {
"env": {
"CM_DOWNLOAD_TOOL": "gdown"
},
"group": "download-tool",
"deps": [
{
"tags": "get,generic-python-lib,_package.gdown"
}
]
},
"cmutil": {
"default": true,
"env": {
Expand Down
14 changes: 13 additions & 1 deletion cm-mlops/script/download-file/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,22 @@ def preprocess(i):

env = i['env']

if env.get('CM_DOWNLOAD_URL','')=='':
return {'return':1, 'error': 'please specify URL using --url={URL} or --env.CM_DOWNLOAD_URL={URL}'}


meta = i['meta']

automation = i['automation']

quiet = (env.get('CM_QUIET', False) == 'yes')

if env.get('CM_DOWNLOAD_PATH', '') != '':
download_path = env['CM_DOWNLOAD_PATH']
if not os.path.exists(download_path):
os.makedirs(download_path, exist_ok = True)
os.chdir(download_path)

if not env.get('CM_DOWNLOAD_FILENAME'):
urltail = os.path.basename(env['CM_DOWNLOAD_URL'])
urlhead = os.path.dirname(env['CM_DOWNLOAD_URL'])
Expand All @@ -38,6 +48,8 @@ def preprocess(i):
env['CM_DOWNLOAD_CMD'] = f"wget -nc {extra_download_options} {url}"
elif env['CM_DOWNLOAD_TOOL'] == "curl":
env['CM_DOWNLOAD_CMD'] = f"curl {extra_download_options} {url}"
elif env['CM_DOWNLOAD_TOOL'] == "gdown":
env['CM_DOWNLOAD_CMD'] = f"gdown -c {extra_download_options} {url}"

filename = env['CM_DOWNLOAD_FILENAME']
env['CM_DOWNLOAD_DOWNLOADED_FILENAME'] = filename
Expand All @@ -63,7 +75,7 @@ def postprocess(i):
if not os.path.exists(filepath):
return {'return':1, 'error': 'CM_DOWNLOAD_FILENAME is not set and CM_DOWNLOAD_URL given is not pointing to a file'}

if env.get('CM_DOWNLOAD_FINAL_ENV_NAME') and env.get(env['CM_DOWNLOAD_FINAL_ENV_NAME'], '') == '':
if env.get('CM_DOWNLOAD_FINAL_ENV_NAME','') != '' and env.get(env['CM_DOWNLOAD_FINAL_ENV_NAME'], '') == '':
env[env['CM_DOWNLOAD_FINAL_ENV_NAME']] = filepath

env['CM_GET_DEPENDENT_CACHED_PATH'] = filepath
Expand Down
7 changes: 3 additions & 4 deletions cm-mlops/script/download-file/run.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#!/bin/bash
if [ ! -f ${CM_DOWNLOAD_DOWNLOADED_FILENAME} ]; then
require_download=1
else
require_download="1"
if [ -e ${CM_DOWNLOAD_DOWNLOADED_PATH} ]; then
CMD=${CM_DOWNLOAD_CHECKSUM_CMD}
echo ${CMD}
eval ${CMD}
test $? -eq 0 || require_download=1
test $? -eq 0 || require_download="1"
fi

if [[ ${require_download} == "1" ]]; then
Expand Down
19 changes: 16 additions & 3 deletions cm-mlops/script/extract-file/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ def preprocess(i):
if 'CM_EXTRACT_FILEPATH' not in env:
return {'return': 1, 'error': 'Extract with no download requested and CM_EXTRACT_FILEPATH is not set'}

if env.get('CM_EXTRACT_PATH', '') != '':
extract_path = env['CM_EXTRACT_PATH']
if not os.path.exists(extract_path):
os.makedirs(extract_path, exist_ok = True)
os.chdir(extract_path)

filename = env['CM_EXTRACT_FILEPATH']
env['CM_EXTRACT_FILENAME'] = filename

Expand All @@ -36,7 +42,7 @@ def preprocess(i):
env['CM_EXTRACT_TOOL_OPTIONS'] = ' -xvf'
env['CM_EXTRACT_TOOL'] = 'tar '
else:
env['CM_EXTRACT_TOOL_OPTIONS'] = ' -xvzf'
env['CM_EXTRACT_TOOL_OPTIONS'] = ' --skip-old-files -xvzf '
env['CM_EXTRACT_TOOL'] = 'tar '
elif filename.endswith(".tar.xz"):
env['CM_EXTRACT_TOOL_OPTIONS'] = ' -xvJf'
Expand Down Expand Up @@ -65,8 +71,15 @@ def preprocess(i):

env['CM_EXTRACT_CMD'] = env['CM_EXTRACT_TOOL'] + ' ' + env.get('CM_EXTRACT_TOOL_EXTRA_OPTIONS', '') + ' ' + env.get('CM_EXTRACT_TOOL_OPTIONS', '')+ ' '+ filename

if env.get('CM_EXTRACT_EXTRACTED_CHECKSUM', '') != '':
env['CM_EXTRACT_EXTRACTED_CHECKSUM_CMD'] = "echo {} {} | md5sum -c".format(env.get('CM_EXTRACT_EXTRACTED_CHECKSUM'), env['CM_EXTRACT_EXTRACTED_FILENAME'])
final_file = env.get('CM_EXTRACT_EXTRACTED_FILENAME')

if final_file:
if env.get('CM_EXTRACT_EXTRACTED_CHECKSUM_FILE', '') != '':
env['CM_EXTRACT_EXTRACTED_CHECKSUM_CMD'] = "cd {} && md5sum -c {}".format(final_file, env.get('CM_EXTRACT_EXTRACTED_CHECKSUM_FILE'))
elif env.get('CM_EXTRACT_EXTRACTED_CHECKSUM', '') != '':
env['CM_EXTRACT_EXTRACTED_CHECKSUM_CMD'] = "echo {} {} | md5sum -c".format(env.get('CM_EXTRACT_EXTRACTED_CHECKSUM'), env['CM_EXTRACT_EXTRACTED_FILENAME'])
else:
env['CM_EXTRACT_EXTRACTED_CHECKSUM_CMD'] = ""
else:
env['CM_EXTRACT_EXTRACTED_CHECKSUM_CMD'] = ""

Expand Down
15 changes: 11 additions & 4 deletions cm-mlops/script/extract-file/run.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
#!/bin/bash
echo $PWD
if [ -e "${CM_EXTRACT_EXTRACTED_FILENAME}" ] ; then
CMD=${CM_EXTRACT_EXTRACTED_CHECKSUM_CMD}
echo "${CMD}"
eval "${CMD}"
test $? -eq 0 && exit 0
fi

CMD=${CM_EXTRACT_CMD}
echo ${CMD}
eval ${CMD}
echo "${CMD}"
eval "${CMD}"
test $? -eq 0 || exit $?

CMD=${CM_EXTRACT_EXTRACTED_CHECKSUM_CMD}
echo ${CMD}
eval ${CMD}
echo "${CMD}"
eval "${CMD}"
test $? -eq 0 || exit $?
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
94 changes: 94 additions & 0 deletions cm-mlops/script/prepare-training-data-bert/_cm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"alias": "prepare-training-data-bert",
"automation_alias": "script",
"automation_uid": "5b4e0237da074764",
"cache": true,
"deps": [],
"input_description": {},
"input_mapping": {
"data_dir": "CM_DATA_DIR"
},
"new_env_keys": [],
"new_state_keys": [],
"post_deps": [],
"posthook_deps": [],
"prehook_deps": [],
"tags": [
"prepare",
"training",
"data",
"input",
"bert"
],
"uid": "1e06a7abe23545eb",
"variations": {
"nvidia": {
"group": "implementation",
"default": true,
"deps": [
{
"tags": "get,git,repo,_repo.https://github.com/mlcommons/training_results_v2.1"
}
],
"prehook_deps": [
{
"tags": "download,file,_gdown,_url.https://drive.google.com/uc?id=1fbGClQMi2CoMv7fwrwTC5YYPooQBdcFW",
"env": {
"CM_DOWNLOAD_FILENAME": "bert_config.json",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_CONFIG_DOWNLOAD_DIR>>>"
}
},
{
"tags": "download,file,_gdown,_url.https://drive.google.com/uc?id=1USK108J6hMM_d27xCHi738qBL8_BT1u1",
"env": {
"CM_DOWNLOAD_FILENAME": "vocab.txt",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_VOCAB_DOWNLOAD_DIR>>>"
}
},
{
"tags": "download,file,_gdown,_url.https://drive.google.com/uc?id=1tmMgLwoBvbEJEHXh77sqrXYw5RpqT8R_",
"env": {
"CM_DOWNLOAD_FILENAME": "bert_reference_results_text_md5.txt",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_DATA_DOWNLOAD_DIR>>>",
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_BERT_REFERENCE_RESULTS_TEXT_MD5_FILE_PATH"
}
},
{
"tags": "download-and-extract,file,_gdown,_extract,_url.https://drive.google.com/uc?id=14xV2OUGSQDG_yDBrmbSdcDC-QGeqpfs_",
"env": {
"CM_DOWNLOAD_FILENAME": "results_text.tar.gz",
"CM_EXTRACT_EXTRACTED_FILENAME": "results4",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_DATA_DOWNLOAD_DIR>>>",
"CM_EXTRACT_PATH": "<<<CM_BERT_DATA_DOWNLOAD_DIR>>>",
"CM_EXTRACT_EXTRACTED_CHECKSUM_FILE": "<<<CM_BERT_REFERENCE_RESULTS_TEXT_MD5_FILE_PATH>>>"
}
},
{
"tags": "download,file,_gdown,_url.https://drive.google.com/uc?id=1chiTBljF0Eh1U5pKs6ureVHgSbtU8OG_",
"env": {
"CM_DOWNLOAD_FILENAME": "model.ckpt-28252.data-00000-of-00001",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_CHECKPOINT_DOWNLOAD_DIR>>>"
}
},
{
"tags": "download,file,_gdown,_url.https://drive.google.com/uc?id=1Q47V3K3jFRkbJ2zGCrKkKk-n0fvMZsa0",
"env": {
"CM_DOWNLOAD_FILENAME": "model.ckpt-28252.index",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_CHECKPOINT_DOWNLOAD_DIR>>>"
}
},
{
"tags": "download,file,_gdown,_url.https://drive.google.com/uc?id=1vAcVmXSLsLeQ1q7gvHnQUSth5W_f_pwv",
"env": {
"CM_DOWNLOAD_FILENAME": "model.ckpt-28252.meta",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_CHECKPOINT_DOWNLOAD_DIR>>>"
}
}
],
"env": {
"CM_TMP_VARIATION": "nvidia"
}
}
},
"versions": {}
}
31 changes: 31 additions & 0 deletions cm-mlops/script/prepare-training-data-bert/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from cmind import utils
import os

def preprocess(i):

os_info = i['os_info']

env = i['env']

meta = i['meta']

automation = i['automation']

quiet = (env.get('CM_QUIET', False) == 'yes')
if env.get("CM_TMP_VARIATION", "") == "nvidia":
datadir = env.get('CM_DATA_DIR', os.getcwd())
env['CM_BERT_CONFIG_DOWNLOAD_DIR'] = os.path.join(datadir, "phase1")
env['CM_BERT_VOCAB_DOWNLOAD_DIR'] = os.path.join(datadir, "phase1")
env['CM_BERT_DATA_DOWNLOAD_DIR'] = os.path.join(datadir, "download")

env['CM_BERT_CHECKPOINT_DOWNLOAD_DIR'] = os.path.join(datadir, "phase1")
code_path = os.path.join(env['CM_GIT_REPO_CHECKOUT_PATH'], 'NVIDIA', 'benchmarks', 'bert', 'implementations', 'pytorch-22.09')
env['CM_RUN_DIR'] = code_path

return {'return':0}

def postprocess(i):

env = i['env']

return {'return':1}
34 changes: 34 additions & 0 deletions cm-mlops/script/prepare-training-data-bert/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash

#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH}

#To export any variable
#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out

#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency



function exit_if_error() {
test $? -eq 0 || exit $?
}

function run() {
echo "Running: "
echo "$1"
echo ""
if [[ ${CM_FAKE_RUN} != 'yes' ]]; then
eval "$1"
exit_if_error
fi
}

#Add your run commands here...
# run "$CM_RUN_CMD"

CUR=${CM_DATA_DIR:-"$PWD/data"}
run "cd \"${CM_RUN_DIR}\""
run "docker build --pull -t mlperf-nvidia:language_model ."
run "ID=`docker run -dt --runtime=nvidia --ipc=host -v $CUR:/workspace/bert_data mlperf-nvidia:language_model bash`"
#run "docker exec $ID bash -c 'python3 -m pip install --upgrade gdown && cd /workspace/bert && ./input_preprocessing/prepare_data.sh -s --outputdir /workspace/bert_data'"
run "docker exec $ID bash -c 'cd /workspace/bert && ./input_preprocessing/prepare_data.sh -s --outputdir /workspace/bert_data'"

0 comments on commit a974b5b

Please sign in to comment.