Merge branch 'GATEOverflow:mlperf-inference' into mlperf-inference

GATEOverflow · May 19, 2024 · 8d0d5ee · 8d0d5ee
2 parents 8930565 + 8556c62
commit 8d0d5ee
Show file tree

Hide file tree

Showing 16 changed files with 123 additions and 92 deletions.
diff --git a/automation/script/README-specs.md b/automation/script/README-specs.md
@@ -27,7 +27,7 @@ See the [automatically generated catalog](https://github.com/mlcommons/ck/blob/m
 When we run a CM script we can also pass inputs to it and any input added in `input_mapping` dictionary inside `_cm.json` gets converted to the corresponding `ENV` variable.
 
 ### Conditional execution of any `deps`, `post_deps`
-We can use `skip_if_env` dictionary inside any `deps`, `prehook_deps`, `posthook_deps` or `post_deps` to make its executional conditional
+We can use `skip_if_env` dictionary inside any `deps`, `prehook_deps`, `posthook_deps` or `post_deps` to make its execution conditional
 
 ### Versions
 We can specify any specific version of a script using `version`. `version_max` and `version_min` are also possible options. 
@@ -73,9 +73,7 @@ Sometimes it is difficult to add all variations needed for a script like say `ba
 
 ### Script workflow (env, deps, native scripts)
 
-![](assets/scripts-workflow.png)
+<img src="https://github.com/mlcommons/cm4mlops/raw/mlperf-inference/automation/script/assets/scripts-workflow.png" width="248">
 
 
-
-
-&copy; 2022-23 [MLCommons](https://mlcommons.org)<br>
+&copy; 2022-24 [MLCommons](https://mlcommons.org)<br>
diff --git a/automation/script/module.py b/automation/script/module.py
@@ -4314,19 +4314,24 @@ def enable_or_skip_script(meta, env):
     (AND function)
     """
     for key in meta:
+        meta_key = [str(v).lower() for v in meta[key]]
         if key in env:
             value = str(env[key]).lower()
 
-            meta_key = [str(v).lower() for v in meta[key]]
-
             if set(meta_key) & set(["yes", "on", "true", "1"]):
+                # Any set value other than false is taken as set
                 if value not in ["no", "off", "false", "0"]:
                     continue
             elif set(meta_key) & set(["no", "off", "false", "0"]):
                 if value in ["no", "off", "false", "0"]:
                     continue
             elif value in meta_key:
                 continue
+        else:
+            if set(meta_key) & set(["no", "off", "false", "0"]):
+                # If key is missing in env, and if the expected value is False, consider it a match
+                continue
+
         return False
 
     return True

diff --git a/script/app-mlperf-inference-intel/run_gptj_harness.sh b/script/app-mlperf-inference-intel/run_gptj_harness.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 export PATH=${CM_CONDA_BIN_PATH}:$PATH
 
+KMP_BLOCKTIME=${KMP_BLOCKTIME:-10}
+
 export KMP_BLOCKTIME=${KMP_BLOCKTIME}
 export KMP_AFFINITY=granularity=fine,compact,1,0
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
@@ -9,11 +11,11 @@ export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export num_physical_cores=`lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l`
 num_numa=$(numactl --hardware|grep available|awk -F' ' '{ print $2 }')
 
-NUM_PROC=${NUM_PROC:-num_numa}
+NUM_PROC=${NUM_PROC:-$num_numa}
 CPUS_PER_PROC=$((num_physical_cores/num_numa))
-WORKERS_PER_PROC=${WORKERS_PER_PROC}
+WORKERS_PER_PROC=${WORKERS_PER_PROC:-1}
 TOTAL_SAMPLE_COUNT=13368
-BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE}
+BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE:-8}
 TIMESTAMP=$(date +%m-%d-%H-%M)
 HOSTNAME=$(hostname)
 #OUTPUT_DIR=offline-output-${HOSTNAME}-batch-${BATCH_SIZE}-procs-${NUM_PROC}-ins-per-proc-${WORKERS_PER_PROC}-${TIMESTAMP}
@@ -28,7 +30,7 @@ USER_CONF="${CM_MLPERF_USER_CONF}"
 
 
 cmd="python runner.py --workload-name gptj \
-	--scenario ${${CM_MLPERF_LOADGEN_SCENARIO}} \
+	--scenario ${CM_MLPERF_LOADGEN_SCENARIO} \
 	--mode ${LOADGEN_MODE} \
 	--num-proc ${NUM_PROC} \
 	--cpus-per-proc ${CPUS_PER_PROC} \

diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml
@@ -430,7 +430,7 @@ variations:
       tags: run,accuracy,mlperf,_imagenet
     docker:
       deps:
-      - tags: get,dataset,imagenet,original
+      - tags: get,dataset,imagenet,validation,original
         names:
           - imagenet-original
           - dataset-original

diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py
@@ -46,8 +46,8 @@ def postprocess(i):
     env['CMD'] = ''
     state = i['state']
 
-    if env.get('CM_MLPERF_USER_CONF', '') == '':
-        return {'return': 0}
+    #if env.get('CM_MLPERF_USER_CONF', '') == '':
+    #    return {'return': 0}
 
     output_dir = env['CM_MLPERF_OUTPUT_DIR']
     mode = env['CM_MLPERF_LOADGEN_MODE']
@@ -254,16 +254,16 @@ def postprocess(i):
         if env.get('CM_HOST_SYSTEM_NAME','')!='': host_info['system_name']=env['CM_HOST_SYSTEM_NAME']
 
         # Check CM automation repository
-        repo_name = 'mlcommons@ck'
+        repo_name = 'mlcommons@cm4mlops'
         repo_hash = ''
-        r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@ck,a4705959af8e447a'})
+        r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@cm4mlops,9e97bb72b0474657'})
         if r['return']==0 and len(r['list'])==1:
             repo_path = r['list'][0].path
             if os.path.isdir(repo_path):
                 repo_name = os.path.basename(repo_path)
 
-                # Check Grigori's dev
-                if repo_name == 'ck': repo_name = 'ctuning@mlcommons-ck'
+                # Check dev
+                if repo_name == 'cm4mlops': repo_name = 'gateoverflow@cm4mlops'
 
                 r = cm.access({'action':'system',
                                'automation':'utils',
@@ -275,54 +275,6 @@ def postprocess(i):
                     host_info['cm_repo_name'] = repo_name
                     host_info['cm_repo_git_hash'] = repo_hash
 
-        # Check a few important MLCommons repos
-        xhashes = []
-        md_xhashes = ''
-
-        for x in [('get,git,inference', ['inference']),
-                  ('get,git,mlperf,power', ['power-dev'])]:
-            xtags = x[0]
-            xdirs = x[1]
-
-            rx = cm.access({'action':'find', 'automation':'cache', 'tags':xtags})
-            if rx['return']>0: return rx
-            for cache in rx['list']:
-                xurl = ''
-                xhash = ''
-
-                for xd in xdirs:
-                    xpath = os.path.join(cache.path, xd)
-                    if os.path.isdir(xpath):
-                        r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git rev-parse HEAD'})
-                        if r['return'] == 0 and r['ret'] == 0:
-                            xhash = r['stdout']
-
-                        r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git config --get remote.origin.url'})
-                        if r['return'] == 0 and r['ret'] == 0:
-                            xurl = r['stdout']
-
-                    if xurl!='' and xhash!='':
-                        break
-
-                if xurl!='' and xhash!='':
-                    # Check if doesn't exist
-                    found = False
-
-                    for xh in xhashes:
-                        if xh['mlcommons_git_url'] == xurl and xh['mlcommons_git_hash'] == xhash:
-                            found = True
-                            break
-
-                    if not found:
-                        xhashes.append({'mlcommons_git_url': xurl,
-                                        'mlcommons_git_hash': xhash,
-                                        'cm_cache_tags':cache.meta['tags']})
-
-                        md_xhashes +='* MLCommons Git {} ({})\n'.format(xurl, xhash)
-
-        if len(xhashes)>0:
-            host_info['mlcommons_repos'] = xhashes
-
         with open ("cm-host-info.json", "w") as fp:
             fp.write(json.dumps(host_info, indent=2)+'\n')
 
@@ -336,10 +288,10 @@ def postprocess(i):
 
         readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/ck).\n\n"
 
-        readme_init+= "*Check [CM MLPerf docs](https://github.com/mlcommons/ck/tree/master/docs/mlperf) for more details.*\n\n"
+        readme_init+= "*Check [CM MLPerf docs](https://mlcommons.github.io/inference) for more details.*\n\n"
 
-        readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n{}\n\n".format(platform.platform(), 
-            platform.processor(), sys.version, cm.__version__, md_xhashes)
+        readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), 
+            platform.processor(), sys.version, cm.__version__)
 
         x = repo_name
         if repo_hash!='': x+=' --checkout='+str(repo_hash)

diff --git a/script/get-cache-dir/_cm.json b/script/get-cache-dir/_cm.json
@@ -0,0 +1,36 @@
+{
+  "alias": "get-cache-dir",
+  "automation_alias": "script",
+  "automation_uid": "5b4e0237da074764",
+  "cache": true,
+  "category": "CM Interface",
+  "deps": [],
+  "docker": {
+    "run": false
+  },
+  "input_description": {},
+  "new_env_keys": [
+    "CM_CACHE_DIR",
+    "<<<CM_CACHE_DIR_ENV_NAME>>>"
+  ],
+  "new_state_keys": [],
+  "post_deps": [],
+  "posthook_deps": [],
+  "prehook_deps": [],
+  "tags": [
+    "get",
+    "cache",
+    "dir",
+    "directory"
+  ],
+  "uid": "48f4622e059b45ce",
+  "variations": {
+    "name.#": {
+      "env": {
+        "CM_CACHE_DIR_NAME": "#"
+      }
+    }
+  },
+  "versions": {
+  }
+}
diff --git a/script/get-cache-dir/customize.py b/script/get-cache-dir/customize.py
@@ -0,0 +1,29 @@
+from cmind import utils
+import os
+
+def preprocess(i):
+
+    os_info = i['os_info']
+
+    env = i['env']
+
+    meta = i['meta']
+
+    automation = i['automation']
+
+    quiet = (env.get('CM_QUIET', False) == 'yes')
+
+    return {'return':0}
+
+def postprocess(i):
+
+    env = i['env']
+
+    cache_dir = os.getcwd()
+    if env.get('CM_CACHE_DIR_ENV_NAME', '') != '':
+        env[env['CM_CACHE_DIR_ENV_NAME']] = cache_dir
+
+    env['CM_CACHE_DIR'] = cache_dir
+    env['CM_GET_DEPENDENT_CACHED_PATH'] = cache_dir
+
+    return {'return':0}
diff --git a/script/get-mlperf-inference-src/_cm.json b/script/get-mlperf-inference-src/_cm.json
@@ -68,7 +68,7 @@
       }
     }
   ],
-  "print_env_at_the_end": {
+  "print_env_at_the_end_disabled": {
     "CM_MLPERF_INFERENCE_CONF_PATH": "Path to the MLPerf inference benchmark configuration file",
     "CM_MLPERF_INFERENCE_SOURCE": "Path to MLPerf inference benchmark sources"
   },

diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py
@@ -82,7 +82,7 @@ def postprocess(i):
     if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '':
         env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH']
 
-    return {'return':0}
+    return {'return':0, 'version': env['CM_VERSION']}
 
 
 def get_valid_models(mlperf_version, mlperf_path):

diff --git a/script/get-mlperf-inference-sut-configs/_cm.json b/script/get-mlperf-inference-sut-configs/_cm.json
@@ -2,7 +2,7 @@
   "alias": "get-mlperf-inference-sut-configs",
   "automation_alias": "script",
   "automation_uid": "5b4e0237da074764",
-  "cache": true,
+  "cache": false,
   "category": "MLPerf benchmark support",
   "new_env_keys": [
     "CM_HW_*",
@@ -28,23 +28,16 @@
     "configs",
     "sut-configs"
   ],
-  "uid": "c2fbf72009e2445b",
-  "variations": {
-    "octoml": {
+  "deps": [
+    {
+      "tags": "get,cache,dir,_name.mlperf-inference-sut-configs",
       "env": {
-        "CM_SUT_USE_EXTERNAL_CONFIG_REPO": "yes",
-        "CM_GIT_CHECKOUT_FOLDER": "configs",
-        "CM_GIT_URL": "https://github.com/arjunsuresh/mlperf-inference-configs"
+        "CM_CACHE_DIR_ENV_NAME": "CM_SUT_CONFIGS_PATH"
       },
-      "prehook_deps": [
-        {
-          "force_env_keys": [
-            "CM_GIT_URL",
-            "CM_GIT_CHECKOUT_*"
-          ],
-          "tags": "get,git,repo,_repo.mlperf_inference_configs_octoml"
-        }
-      ]
+      "extra_cache_tags": "mlperf,inference,sut,configs"
     }
+  ],
+  "uid": "c2fbf72009e2445b",
+  "variations": {
   }
 }
diff --git a/script/get-mlperf-inference-sut-configs/customize.py b/script/get-mlperf-inference-sut-configs/customize.py
@@ -36,7 +36,7 @@ def postprocess(i):
     if env.get('CM_SUT_NAME', '') == '':
         env['CM_SUT_NAME'] = env['CM_HW_NAME'] + "-" + implementation_string + "-" + device + "-" + backend + "-" + backend_version + "-" + run_config_string
 
-    if env.get('CM_SUT_CONFIGS_PATH',''):
+    if env.get('CM_SUT_CONFIGS_PATH','') != '':
         path = env['CM_SUT_CONFIGS_PATH']
     elif env.get('CM_SUT_USE_EXTERNAL_CONFIG_REPO', '') == "yes":
         path = env.get('CM_GIT_CHECKOUT_PATH')

diff --git a/script/get-mlperf-inference-sut-description/_cm.json b/script/get-mlperf-inference-sut-description/_cm.json
@@ -2,7 +2,7 @@
   "alias": "get-mlperf-inference-sut-description",
   "automation_alias": "script",
   "automation_uid": "5b4e0237da074764",
-  "cache": true,
+  "cache": false,
   "category": "MLPerf benchmark support",
   "deps": [
     {
@@ -46,6 +46,13 @@
     },
     {
       "tags": "get,generic-python-lib,_package.dmiparser"
+    },
+    {
+      "tags": "get,cache,dir,_name.mlperf-inference-sut-descriptions",
+      "extra_cache_tags": "mlperf,inference,sut,descriptions",
+      "env": {
+        "CM_CACHE_DIR_ENV_NAME": "CM_MLPERF_INFERENCE_SUT_DESC_PATH"
+      }
     }
   ],
   "default_env": {

diff --git a/script/get-mlperf-inference-sut-description/customize.py b/script/get-mlperf-inference-sut-description/customize.py
@@ -33,7 +33,9 @@ def preprocess(i):
 
     sut = hw_name + sut_suffix
     script_path = i['run_script_input']['path']
-    sut_path = os.path.join(os.getcwd(), "suts", sut + ".json")
+    sut_desc_path=env['CM_MLPERF_INFERENCE_SUT_DESC_PATH']
+
+    sut_path = os.path.join(sut_desc_path, "suts", sut + ".json")
     if os.path.exists(sut_path) and env.get('CM_SUT_DESC_CACHE', '') == "yes":
         print(f"Reusing SUT description file {sut}")
         state['CM_SUT_META'] = json.load(open(sut_path))

diff --git a/script/get-mlperf-power-dev/customize.py b/script/get-mlperf-power-dev/customize.py
@@ -18,4 +18,4 @@ def postprocess(i):
     if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '':
         env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH']
 
-    return {'return':0}
+    return {'return':0, 'version': env['CM_VERSION']}
diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml
@@ -179,7 +179,6 @@ variations:
       CM_MLPERF_DASHBOARD: 'on'
 
   find-performance:
-    default: true
     env:
       CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes'
       CM_MLPERF_LOADGEN_ALL_MODES: 'no'
@@ -264,6 +263,7 @@ variations:
     group: submission-generation-style
 
   performance-and-accuracy:
+    default: true
     base:
     - all-modes
     default_variations: