Merge pull request #255 from GATEOverflow/mlperf-inference

Merge from go
mlcommons · Sep 18, 2024 · 3ac6245 · 3ac6245
2 parents fd363fa + 6c85bc2
commit 3ac6245
Show file tree

Hide file tree

Showing 15 changed files with 125 additions and 39 deletions.
diff --git a/.github/workflows/test-mlperf-inference-abtf-poc.yml b/.github/workflows/test-mlperf-inference-abtf-poc.yml
@@ -37,7 +37,7 @@ jobs:
         cm pull repo mlcommons@cm4abtf --branch=poc
     - name: Test MLPerf Inference ABTF POC using ${{ matrix.backend }} on docker
       run: |
-        cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=5 --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet  -v
+        cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet  -v
 
   build2:
     runs-on: ${{ matrix.os }}
@@ -62,7 +62,7 @@ jobs:
         cm pull repo mlcommons@cm4abtf --branch=poc
     - name: Test MLPerf Inference ABTF POC using ${{ matrix.backend }} on ${{ matrix.os }}
       run: |
-        cm run script --tags=run-abtf,inference,_poc-demo --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet  -v
+        cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --adr.compiler.tags=gcc --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7 --quiet  -v
     
   build3:
     runs-on: ${{ matrix.os }}
@@ -89,4 +89,4 @@ jobs:
         cm pull repo mlcommons@cm4abtf --branch=poc
     - name: Test MLPerf Inference ABTF POC using ${{ matrix.backend }} on ${{ matrix.os }}
       run: |
-        cm run script --tags=run-abtf,inference,_poc-demo --quiet --env.CM_MLPERF_LOADGEN_BUILD_FROM_SRC=off --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7  -v
+        cm run script --tags=run-abtf,inference,_poc-demo --test_query_count=2 --quiet --env.CM_MLPERF_LOADGEN_BUILD_FROM_SRC=off --adr.cocoeval.version_max=1.5.7 --adr.cocoeval.version_max_usable=1.5.7  -v
diff --git a/automation/script/module.py b/automation/script/module.py
@@ -351,15 +351,16 @@ def _run(self, i):
 
         debug_uid = i.get('debug_uid', '')
         if debug_uid!='':
-            env['CM_TMP_DEBUG_UID'] = debug_uid
+            r = _update_env(env, 'CM_TMP_DEBUG_UID', debug_uid)
+            if r['return']>0: return r
 
         fake_deps = i.get('fake_deps', False)
         if fake_deps: env['CM_TMP_FAKE_DEPS']='yes'
 
         if str(i.get('skip_sys_utils', '')).lower() in ['true', 'yes']:
-            env['CM_SKIP_SYS_UTILS']='yes' 
+            env['CM_SKIP_SYS_UTILS']='yes'
         if str(i.get('skip_sudo', '')).lower() in ['true', 'yes']:
-            env['CM_TMP_SKIP_SUDO']='yes' 
+            env['CM_TMP_SKIP_SUDO']='yes'
 
         run_state = i.get('run_state', self.run_state)
         if not run_state.get('version_info', []):
@@ -387,9 +388,9 @@ def _run(self, i):
         elif 'v' in i: verbose=i['v']
 
         if verbose:
-           env['CM_VERBOSE']='yes'
-           run_state['tmp_verbose']=True
-           logging.getLogger().setLevel(logging.DEBUG)
+            env['CM_VERBOSE']='yes'
+            run_state['tmp_verbose']=True
+            logging.getLogger().setLevel(logging.DEBUG)
 
 
         print_deps = i.get('print_deps', False)
@@ -418,7 +419,8 @@ def _run(self, i):
 
         # Detect current path and record in env for further use in native scripts
         current_path = os.path.abspath(os.getcwd())
-        env['CM_TMP_CURRENT_PATH'] = current_path
+        r = _update_env(env, 'CM_TMP_CURRENT_PATH', current_path)
+        if r['return']>0: return r
 
         # Check if quiet mode
         quiet = i.get('quiet', False) if 'quiet' in i else (env.get('CM_QUIET','').lower() == 'yes')
@@ -472,6 +474,9 @@ def _run(self, i):
             if value != '':
                 env['CM_' + key.upper()] = value
 
+        r = update_env_with_values(env)
+        if r['return']>0: return r 
+
 
         ############################################################################################################
         # Check if we want to skip cache (either by skip_cache or by fake_run)
@@ -1317,7 +1322,8 @@ def _run(self, i):
 
                     logging.debug(recursion_spaces+'  - Version is not specified - use either default_version from meta or min/max/usable: {}'.format(version))
 
-                    env['CM_VERSION'] = version
+                    r = _update_env(env, 'CM_VERSION', version)
+                    if r['return']>0: return r
 
                     if 'version-'+version not in cached_tags: cached_tags.append('version-'+version)
 
@@ -1329,8 +1335,9 @@ def _run(self, i):
                         if "add_deps_recursive" in versions_meta:
                             self._merge_dicts_with_tags(add_deps_recursive, versions_meta['add_deps_recursive'])
 
-            env['CM_TMP_CURRENT_SCRIPT_PATH'] = path
-
+            r = _update_env(env, 'CM_TMP_CURRENT_SCRIPT_PATH', path)
+            if r['return']>0: return r
+
             # Run chain of docker dependencies if current run cmd is from inside a docker container
             docker_deps = []
             if i.get('docker_run_deps'):
@@ -1444,7 +1451,8 @@ def _run(self, i):
             elif pip_version_max != '':
                 pip_version_string = '<='+pip_version_max
 
-            env['CM_TMP_PIP_VERSION_STRING'] = pip_version_string
+            r = _update_env(env, 'CM_TMP_PIP_VERSION_STRING', pip_version_string)
+            if r['return']>0: return r
             if pip_version_string != '':
                 logging.debug(recursion_spaces+'    # potential PIP version string (if needed): '+pip_version_string)
 
@@ -4359,6 +4367,20 @@ def any_enable_or_skip_script(meta, env):
 
     return False
 
+############################################################################################################
+def _update_env(env, key=None, value=None):
+    if key == None or value == None:
+        return {'return': 1, 'error': 'None value not expected in key and value arguments in _update_env.'}
+    if not isinstance(key, str):
+        return {'return': 1, 'error': 'String value expected inside key argument.'}
+
+    env[key] = value
+
+    r = update_env_with_values(env)
+    if r['return']>0: return r 
+
+    return {'return': 0}
+
 ############################################################################################################
 def update_env_with_values(env, fail_on_not_found=False, extra_env={}):
     """
@@ -4532,9 +4554,12 @@ def prepare_and_run_script_with_postprocessing(i, postprocess="postprocess"):
         path = '"' + path + '"'
 
     cur_dir = os.getcwd()
-
-    env['CM_TMP_CURRENT_SCRIPT_PATH'] = path
-    env['CM_TMP_CURRENT_SCRIPT_WORK_PATH'] = cur_dir
+
+    r = _update_env(env, 'CM_TMP_CURRENT_SCRIPT_PATH', path)
+    if r['return']>0: return r
+
+    r = _update_env(env, 'CM_TMP_CURRENT_SCRIPT_WORK_PATH', cur_dir)
+    if r['return']>0: return r
 
     # Record state
     if tmp_file_state != '':

diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml
@@ -783,7 +783,7 @@ variations:
       CM_MLPERF_NVIDIA_HARNESS_MAXN: yes
 
   preprocess-data:
-    alias: preprocess-data
+    alias: preprocess_data
 
   preprocess_data:
     group: run-mode

diff --git a/script/download-file/_cm.json b/script/download-file/_cm.json
@@ -83,6 +83,19 @@
       "deps": [
         {
           "tags": "get,rclone"
+        },
+        {
+          "tags": "get,rclone-config",
+          "update_tags_from_env_with_prefix": {
+            "_": [
+              "CM_RCLONE_CONFIG_NAME"
+	    ]
+          },
+          "enable_if_env": {
+            "CM_RCLONE_CONFIG_NAME": [
+              "on"
+            ]
+          }
         }
       ],
       "env": {

diff --git a/script/download-file/customize.py b/script/download-file/customize.py
@@ -159,7 +159,7 @@ def preprocess(i):
                 env['CM_DOWNLOAD_CMD'] += f" || ((rm -f {env['CM_DOWNLOAD_FILENAME']} || true) && gdown {extra_download_options} {url})"
 
         elif tool == "rclone":
-            if env.get('CM_RCLONE_CONFIG_CMD', '') != '':
+            if env.get('CM_RCLONE_CONFIG_CMD', '') != '': #keeping this for backward compatibility. Ideally should be done via get,rclone-config script
                 env['CM_DOWNLOAD_CONFIG_CMD'] = env['CM_RCLONE_CONFIG_CMD']
             rclone_copy_using = env.get('CM_RCLONE_COPY_USING', 'sync')
             if rclone_copy_using == "sync":
@@ -168,19 +168,9 @@ def preprocess(i):
                 # have to modify the variable from url to temp_url if it is going to be used anywhere after this point
                 url = url.replace("%", "%%")
                 temp_download_file = env['CM_DOWNLOAD_FILENAME'].replace("%", "%%")
-                env['CM_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), temp_download_file)}{q} -P"
+                env['CM_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), temp_download_file)}{q} -P --error-on-no-transfer"
             else:
-                env['CM_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), env['CM_DOWNLOAD_FILENAME'])}{q} -P"
-            for i in range(1,5):
-                url = env.get('CM_DOWNLOAD_URL'+str(i),'')
-                if url == '':
-                    break
-                if env["CM_HOST_OS_TYPE"] == "windows":
-                    url = url.replace("%", "%%")
-                    temp_download_file = env['CM_DOWNLOAD_FILENAME'].replace("%", "%%")
-                    env['CM_DOWNLOAD_CMD'] = f" || ((rm -f {env['CM_DOWNLOAD_FILENAME']} || true) && rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), temp_download_file)}{q} -P)"
-                else:
-                    env['CM_DOWNLOAD_CMD'] = f" || ((rm -f {env['CM_DOWNLOAD_FILENAME']} || true) && rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), env['CM_DOWNLOAD_FILENAME'])}{q} -P"
+                env['CM_DOWNLOAD_CMD'] = f"rclone {rclone_copy_using} {q}{url}{q} {q}{os.path.join(os.getcwd(), env['CM_DOWNLOAD_FILENAME'])}{q} -P --error-on-no-transfer"
 
         filename = env['CM_DOWNLOAD_FILENAME']
         env['CM_DOWNLOAD_DOWNLOADED_FILENAME'] = filename

diff --git a/script/download-file/run.sh b/script/download-file/run.sh
@@ -35,8 +35,10 @@ fi
 
 if [[ ${require_download} == "1" ]]; then
   echo ""
-  echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
-  ${CM_PRE_DOWNLOAD_CLEAN_CMD}
+  if [ -e "${CM_PRE_DOWNLOAD_CLEAN}" ]; then
+    echo ${CM_PRE_DOWNLOAD_CLEAN_CMD}
+    ${CM_PRE_DOWNLOAD_CLEAN_CMD}
+  fi
 
   echo ""
   echo "${CM_DOWNLOAD_CMD}"

diff --git a/script/get-ml-model-dlrm-terabyte/_cm.json b/script/get-ml-model-dlrm-terabyte/_cm.json
@@ -101,7 +101,7 @@
     },
     "pytorch,fp32,weight_sharded,rclone": {
       "env": {
-        "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
+        "CM_RCLONE_CONFIG_NAME": "mlc-inference",
         "CM_PACKAGE_URL": "mlc-inference:mlcommons-inference-wg-public/model_weights"
       }
     },

diff --git a/script/get-ml-model-gptj/_cm.json b/script/get-ml-model-gptj/_cm.json
@@ -87,7 +87,7 @@
         "CM_UNZIP": "yes",
         "CM_DOWNLOAD_CHECKSUM_NOT_USED": "e677e28aaf03da84584bb3073b7ee315",
         "CM_PACKAGE_URL": "https://cloud.mlcommons.org/index.php/s/QAZ2oM94MkFtbQx/download",
-        "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
+        "CM_RCLONE_CONFIG_NAME": "mlc-inference",
         "CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/gpt-j"
       },
       "required_disk_space": 22700

diff --git a/script/get-ml-model-stable-diffusion/_cm.json b/script/get-ml-model-stable-diffusion/_cm.json
@@ -160,7 +160,7 @@
     "rclone": {
       "group": "download-tool",
       "env": {
-        "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
+        "CM_RCLONE_CONFIG_NAME": "mlc-inference",
         "CM_DOWNLOAD_TOOL": "rclone"
       },
       "adr": {

diff --git a/script/get-preprocessed-dataset-criteo/_cm.json b/script/get-preprocessed-dataset-criteo/_cm.json
@@ -191,7 +191,7 @@
           ],
           "extra_cache_tags": "criteo,preprocessed,dataset",
           "env": {
-            "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
+            "CM_RCLONE_CONFIG_NAME": "mlc-inference",
             "CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed",
             "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH",
             "CM_EXTRACT_FINAL_ENV_NAME": "CM_DATASET_PREPROCESSED_PATH",

diff --git a/script/get-preprocessed-dataset-openorca/_cm.json b/script/get-preprocessed-dataset-openorca/_cm.json
@@ -143,15 +143,15 @@
     "mlcommons": {
       "env": {
         "CM_DATASET_PREPROCESSED_BY_MLC": "yes",
-        "CM_RCLONE_CONFIG_CMD": "rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com",
         "CM_RCLONE_URL": "mlc-inference:mlcommons-inference-wg-public/open_orca"
       },
       "deps": [
         {
           "env": {
             "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_OPENORCA_PREPROCESSED_ROOT",
             "CM_EXTRACT_FINAL_ENV_NAME": "CM_OPENORCA_PREPROCESSED_ROOT",
-            "CM_EXTRACT_TO_FOLDER": "openorca-preprocessed"
+            "CM_EXTRACT_TO_FOLDER": "openorca-preprocessed",
+            "CM_RCLONE_CONFIG_NAME": "mlc-inference"
           },
           "tags": "download-and-extract,_rclone",
           "update_tags_from_env_with_prefix": {

diff --git a/script/get-rclone-config/_cm.yaml b/script/get-rclone-config/_cm.yaml
@@ -0,0 +1,13 @@
+alias: get-rclone-config
+automation_alias: script
+automation_uid: 5b4e0237da074764
+cache: false #keeping cache off as rerunning the command is safe
+can_force_cache: true
+tags:
+- get
+- rclone-config
+uid: 6c59ddbc6cd046e3
+variations:
+  mlc-inference:
+    env:
+      CM_RCLONE_CONFIG_CMD: 'rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com'
diff --git a/script/get-rclone-config/customize.py b/script/get-rclone-config/customize.py
@@ -0,0 +1,25 @@
+from cmind import utils
+import os
+
+def preprocess(i):
+
+    os_info = i['os_info']
+
+    env = i['env']
+
+    meta = i['meta']
+
+    automation = i['automation']
+
+    quiet = (env.get('CM_QUIET', False) == 'yes')
+
+    if env.get('CM_RCLONE_CONFIG_CMD', '') != '':
+      env['CM_RUN_CMD'] = env['CM_RCLONE_CONFIG_CMD']
+
+    return {'return':0}
+
+def postprocess(i):
+
+    env = i['env']
+
+    return {'return':0}
diff --git a/script/get-rclone-config/run.bat b/script/get-rclone-config/run.bat
@@ -0,0 +1 @@
+rem native script
diff --git a/script/get-rclone-config/run.sh b/script/get-rclone-config/run.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH}
+
+#To export any variable
+#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out
+
+#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency
+
+echo "Running: "
+echo "${CM_RUN_CMD}"
+echo ""
+
+if [[ ${CM_FAKE_RUN} != "yes" ]]; then
+  eval "${CM_RUN_CMD}"
+  test $? -eq 0 || exit 1
+fi