diff --git a/automation/script/README-specs.md b/automation/script/README-specs.md
index 58526d168..4b40feeba 100644
--- a/automation/script/README-specs.md
+++ b/automation/script/README-specs.md
@@ -27,7 +27,7 @@ See the [automatically generated catalog](https://github.com/mlcommons/ck/blob/m
 When we run a CM script we can also pass inputs to it and any input added in `input_mapping` dictionary inside `_cm.json` gets converted to the corresponding `ENV` variable.
 
 ### Conditional execution of any `deps`, `post_deps`
-We can use `skip_if_env` dictionary inside any `deps`, `prehook_deps`, `posthook_deps` or `post_deps` to make its executional conditional
+We can use `skip_if_env` dictionary inside any `deps`, `prehook_deps`, `posthook_deps` or `post_deps` to make its execution conditional
 
 ### Versions
 We can specify any specific version of a script using `version`. `version_max` and `version_min` are also possible options. 
@@ -73,9 +73,7 @@ Sometimes it is difficult to add all variations needed for a script like say `ba
 
 ### Script workflow (env, deps, native scripts)
 
-![](assets/scripts-workflow.png)
+<img src="https://github.com/mlcommons/cm4mlops/raw/mlperf-inference/automation/script/assets/scripts-workflow.png" width="248">
 
 
-
-
-&copy; 2022-23 [MLCommons](https://mlcommons.org)<br>
+&copy; 2022-24 [MLCommons](https://mlcommons.org)<br>
diff --git a/automation/script/module.py b/automation/script/module.py
index eee99803c..e79b33a3e 100644
--- a/automation/script/module.py
+++ b/automation/script/module.py
@@ -4314,12 +4314,12 @@ def enable_or_skip_script(meta, env):
     (AND function)
     """
     for key in meta:
+        meta_key = [str(v).lower() for v in meta[key]]
         if key in env:
             value = str(env[key]).lower()
 
-            meta_key = [str(v).lower() for v in meta[key]]
-
             if set(meta_key) & set(["yes", "on", "true", "1"]):
+                # Any set value other than false is taken as set
                 if value not in ["no", "off", "false", "0"]:
                     continue
             elif set(meta_key) & set(["no", "off", "false", "0"]):
@@ -4327,6 +4327,11 @@ def enable_or_skip_script(meta, env):
                     continue
             elif value in meta_key:
                 continue
+        else:
+            if set(meta_key) & set(["no", "off", "false", "0"]):
+                # If key is missing in env, and if the expected value is False, consider it a match
+                continue
+
         return False
 
     return True
diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py
index 91baf5cb6..e76a68fed 100644
--- a/automation/script/module_misc.py
+++ b/automation/script/module_misc.py
@@ -1873,7 +1873,7 @@ def docker(i):
                 dockerfilename_suffix = dockerfilename_suffix[len(dockerfilename_suffix) - 1]
 
 
-        cm_repo=i.get('docker_cm_repo', 'mlcommons@cm4mlops')
+        cm_repo=i.get('docker_cm_repo', docker_settings.get('cm_repo', 'mlcommons@cm4mlops'))
 
         docker_path = i.get('docker_path', '').strip()
         if docker_path == '': 
diff --git a/script/app-mlperf-inference-intel/_cm.yaml b/script/app-mlperf-inference-intel/_cm.yaml
index b33c0e8b5..ce70f07d6 100644
--- a/script/app-mlperf-inference-intel/_cm.yaml
+++ b/script/app-mlperf-inference-intel/_cm.yaml
@@ -293,7 +293,7 @@ variations:
         adr:
           conda-python:
             version: "3.9"
-      - tags: install,llvm,src,_tag.llvmorg-16.0.6,_clang,_release,_for-intel-mlperf-inference-v3.1-gptj
+      - tags: install,llvm,src,_for-intel-mlperf-inference-v3.1-gptj
       - names:
         - conda-package
         - ncurses
diff --git a/script/app-mlperf-inference-intel/run_gptj_harness.sh b/script/app-mlperf-inference-intel/run_gptj_harness.sh
index f006f673b..43e57bbb0 100644
--- a/script/app-mlperf-inference-intel/run_gptj_harness.sh
+++ b/script/app-mlperf-inference-intel/run_gptj_harness.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 export PATH=${CM_CONDA_BIN_PATH}:$PATH
 
+KMP_BLOCKTIME=${KMP_BLOCKTIME:-10}
+
 export KMP_BLOCKTIME=${KMP_BLOCKTIME}
 export KMP_AFFINITY=granularity=fine,compact,1,0
 export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so
@@ -9,11 +11,11 @@ export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
 export num_physical_cores=`lscpu -b -p=Core,Socket | grep -v '^#' | sort -u | wc -l`
 num_numa=$(numactl --hardware|grep available|awk -F' ' '{ print $2 }')
 
-NUM_PROC=${NUM_PROC:-num_numa}
+NUM_PROC=${NUM_PROC:-$num_numa}
 CPUS_PER_PROC=$((num_physical_cores/num_numa))
-WORKERS_PER_PROC=${WORKERS_PER_PROC}
+WORKERS_PER_PROC=${WORKERS_PER_PROC:-1}
 TOTAL_SAMPLE_COUNT=13368
-BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE}
+BATCH_SIZE=${CM_MLPERF_LOADGEN_BATCH_SIZE:-8}
 TIMESTAMP=$(date +%m-%d-%H-%M)
 HOSTNAME=$(hostname)
 #OUTPUT_DIR=offline-output-${HOSTNAME}-batch-${BATCH_SIZE}-procs-${NUM_PROC}-ins-per-proc-${WORKERS_PER_PROC}-${TIMESTAMP}
@@ -28,7 +30,7 @@ USER_CONF="${CM_MLPERF_USER_CONF}"
 
 
 cmd="python runner.py --workload-name gptj \
-	--scenario ${${CM_MLPERF_LOADGEN_SCENARIO}} \
+	--scenario ${CM_MLPERF_LOADGEN_SCENARIO} \
 	--mode ${LOADGEN_MODE} \
 	--num-proc ${NUM_PROC} \
 	--cpus-per-proc ${CPUS_PER_PROC} \
diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
index 67e60b166..b9fc789f8 100644
--- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml
+++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -104,6 +104,8 @@ deps:
 
   # Detect CUDA if required
   - tags: get,cuda,_cudnn
+    names:
+      - cuda
     enable_if_env:
       CM_MLPERF_DEVICE:
       - gpu
diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py
index 23a738453..ee97038ff 100644
--- a/script/app-mlperf-inference-mlcommons-python/customize.py
+++ b/script/app-mlperf-inference-mlcommons-python/customize.py
@@ -177,7 +177,7 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
 
         env['RUN_DIR'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "gpt-j")
         cmd = env['CM_PYTHON_BIN_WITH_PATH'] +  \
-            " main.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
+            " run.py --model-path=" + env['CM_ML_MODEL_FILE_WITH_PATH'] + ' --dataset-path=' + env['CM_DATASET_EVAL_PATH'] + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \
             ' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
             scenario_extra_options + mode_extra_options + dataset_options
         cmd = cmd.replace("--count", "--max_examples")
@@ -188,7 +188,6 @@ def get_run_cmd_reference(os_info, env, scenario_extra_options, mode_extra_optio
             gpu_options = ""
         cmd = cmd + gpu_options
         env['LOG_PATH'] = env['CM_MLPERF_OUTPUT_DIR']
-        return cmd, env['RUN_DIR']
 
     if env['CM_MODEL'] in [ "resnet50", "retinanet" ]:
 
diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml
index 19e789ae0..337c64375 100644
--- a/script/app-mlperf-inference-nvidia/_cm.yaml
+++ b/script/app-mlperf-inference-nvidia/_cm.yaml
@@ -261,7 +261,10 @@ deps:
       CM_MLPERF_NVIDIA_HARNESS_RUN_MODE:
       - run_harness
 
-  - tags: get,generic-python-lib,_package.nvmitten,_path./opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl
+  - tags: get,generic-python-lib,_package.nvmitten
+    update_tags_from_env_with_prefix:
+      _path.:
+        - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH
     enable_if_env:
       CM_RUN_STATE_DOCKER:
         - 'yes'
@@ -338,6 +341,7 @@ variations:
       CM_ML_MODEL_WEIGHTS_DATA_TYPE: int8
     deps:
     - tags: get,generic-python-lib,_onnx-graphsurgeon
+      version: 0.3.27
     - tags: get,generic-python-lib,_package.onnx
       version: 1.13.1
 
diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml
index 681620c2f..a4bb7b6a3 100644
--- a/script/app-mlperf-inference/_cm.yaml
+++ b/script/app-mlperf-inference/_cm.yaml
@@ -182,6 +182,8 @@ variations:
         tags: _float32
       librispeech-accuracy-script:
         tags: _int32
+      cnndm-accuracy-script:
+        tags: _int32
     env:
       CM_MLPERF_PYTHON: 'yes'
       CM_MLPERF_IMPLEMENTATION: mlcommons_python
@@ -189,6 +191,7 @@ variations:
       CM_IMAGENET_ACCURACY_DTYPE: float32
       CM_OPENIMAGES_ACCURACY_DTYPE: float32
       CM_LIBRISPEECH_ACCURACY_DTYPE: float32
+      CM_CNNDM_ACCURACY_DTYPE: int32
     prehook_deps:
       - names:
          - python-reference-mlperf-inference
@@ -235,6 +238,10 @@ variations:
     default_variations:
       backend: onnxruntime
 
+  nvidia-original,r4.1_default:
+    docker:
+      base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public
+
   nvidia-original:
     docker:
       interactive: True
@@ -430,7 +437,7 @@ variations:
       tags: run,accuracy,mlperf,_imagenet
     docker:
       deps:
-      - tags: get,dataset,imagenet,original
+      - tags: get,dataset,imagenet,validation,original
         names:
           - imagenet-original
           - dataset-original
@@ -1142,6 +1149,25 @@ variations:
     default_env:
       CM_SKIP_SYS_UTILS: 'yes'
       CM_REGENERATE_MEASURE_FILES: 'yes'
+    env:
+      CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl'
+
+  r4.1_default:
+    group:
+      reproducibility
+    add_deps_recursive:
+      nvidia-inference-common-code:
+        version: r4.0
+        tags: _go
+      nvidia-inference-server:
+        version: r4.0
+        tags: _go
+    default_env:
+      CM_SKIP_SYS_UTILS: 'yes'
+      CM_REGENERATE_MEASURE_FILES: 'yes'
+    env:
+      CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl'
+
 
 invalid_variation_combinations:
   -
@@ -1250,10 +1276,10 @@ docker:
   shm_size: '32gb'
   interactive: True
   extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
-  docker_os: ubuntu
-  docker_cm_repo: gateoverflow@cm4mlops
-  docker_real_run: False
-  docker_os_version: '22.04'
+  os: ubuntu
+  cm_repo: gateoverflow@cm4mlops
+  real_run: False
+  os_version: '22.04'
   docker_input_mapping:
     imagenet_path: IMAGENET_PATH
     gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH
diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py
index 23a7f75ce..183290828 100644
--- a/script/app-mlperf-inference/customize.py
+++ b/script/app-mlperf-inference/customize.py
@@ -46,8 +46,8 @@ def postprocess(i):
     env['CMD'] = ''
     state = i['state']
 
-    if env.get('CM_MLPERF_USER_CONF', '') == '':
-        return {'return': 0}
+    #if env.get('CM_MLPERF_USER_CONF', '') == '':
+    #    return {'return': 0}
 
     output_dir = env['CM_MLPERF_OUTPUT_DIR']
     mode = env['CM_MLPERF_LOADGEN_MODE']
@@ -254,16 +254,16 @@ def postprocess(i):
         if env.get('CM_HOST_SYSTEM_NAME','')!='': host_info['system_name']=env['CM_HOST_SYSTEM_NAME']
 
         # Check CM automation repository
-        repo_name = 'mlcommons@ck'
+        repo_name = 'mlcommons@cm4mlops'
         repo_hash = ''
-        r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@ck,a4705959af8e447a'})
+        r = cm.access({'action':'find', 'automation':'repo', 'artifact':'mlcommons@cm4mlops,9e97bb72b0474657'})
         if r['return']==0 and len(r['list'])==1:
             repo_path = r['list'][0].path
             if os.path.isdir(repo_path):
                 repo_name = os.path.basename(repo_path)
 
-                # Check Grigori's dev
-                if repo_name == 'ck': repo_name = 'ctuning@mlcommons-ck'
+                # Check dev
+                if repo_name == 'cm4mlops': repo_name = 'gateoverflow@cm4mlops'
 
                 r = cm.access({'action':'system',
                                'automation':'utils',
@@ -275,54 +275,6 @@ def postprocess(i):
                     host_info['cm_repo_name'] = repo_name
                     host_info['cm_repo_git_hash'] = repo_hash
 
-        # Check a few important MLCommons repos
-        xhashes = []
-        md_xhashes = ''
-
-        for x in [('get,git,inference', ['inference']),
-                  ('get,git,mlperf,power', ['power-dev'])]:
-            xtags = x[0]
-            xdirs = x[1]
-
-            rx = cm.access({'action':'find', 'automation':'cache', 'tags':xtags})
-            if rx['return']>0: return rx
-            for cache in rx['list']:
-                xurl = ''
-                xhash = ''
-
-                for xd in xdirs:
-                    xpath = os.path.join(cache.path, xd)
-                    if os.path.isdir(xpath):
-                        r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git rev-parse HEAD'})
-                        if r['return'] == 0 and r['ret'] == 0:
-                            xhash = r['stdout']
-                        
-                        r = cm.access({'action':'system', 'automation':'utils', 'path':xpath, 'cmd':'git config --get remote.origin.url'})
-                        if r['return'] == 0 and r['ret'] == 0:
-                            xurl = r['stdout']
-            
-                    if xurl!='' and xhash!='':
-                        break
-
-                if xurl!='' and xhash!='':
-                    # Check if doesn't exist
-                    found = False
-
-                    for xh in xhashes:
-                        if xh['mlcommons_git_url'] == xurl and xh['mlcommons_git_hash'] == xhash:
-                            found = True
-                            break
-
-                    if not found:
-                        xhashes.append({'mlcommons_git_url': xurl,
-                                        'mlcommons_git_hash': xhash,
-                                        'cm_cache_tags':cache.meta['tags']})
-
-                        md_xhashes +='* MLCommons Git {} ({})\n'.format(xurl, xhash)
-
-        if len(xhashes)>0:
-            host_info['mlcommons_repos'] = xhashes
-
         with open ("cm-host-info.json", "w") as fp:
             fp.write(json.dumps(host_info, indent=2)+'\n')
         
@@ -336,10 +288,10 @@ def postprocess(i):
 
         readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/ck).\n\n"
 
-        readme_init+= "*Check [CM MLPerf docs](https://github.com/mlcommons/ck/tree/master/docs/mlperf) for more details.*\n\n"
+        readme_init+= "*Check [CM MLPerf docs](https://mlcommons.github.io/inference) for more details.*\n\n"
 
-        readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n{}\n\n".format(platform.platform(), 
-            platform.processor(), sys.version, cm.__version__, md_xhashes)
+        readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), 
+            platform.processor(), sys.version, cm.__version__)
 
         x = repo_name
         if repo_hash!='': x+=' --checkout='+str(repo_hash)
diff --git a/script/build-mlperf-inference-server-nvidia/_cm.yaml b/script/build-mlperf-inference-server-nvidia/_cm.yaml
index 149941282..359614e34 100644
--- a/script/build-mlperf-inference-server-nvidia/_cm.yaml
+++ b/script/build-mlperf-inference-server-nvidia/_cm.yaml
@@ -164,6 +164,11 @@ variations:
     add_deps_recursive:
       nvidia-inference-common-code:
         tags: _ctuning
+  go:
+    group: code
+    add_deps_recursive:
+      nvidia-inference-common-code:
+        tags: _go
   nvidia-only:
     group: code
     add_deps_recursive:
@@ -180,6 +185,23 @@ variations:
       nvidia-inference-common-code:
         tags: _mlcommons
 
+  r4.0:
+    group: version
+    add_deps_recursive:
+      nvidia-inference-common-code:
+        version: r4.0
+      nvidia-scratch-space:
+        tags: _version.4_1
+    deps:
+      - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0
+        names:
+        - pytorch
+        - torch
+      - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0
+        names:
+        - pytorchvision
+        - torchvision
+      - tags: install,nccl,libs,_cuda
 
 versions:
   r2.1:
@@ -213,16 +235,33 @@ versions:
         - torchvision
       - tags: install,nccl,libs,_cuda
 
+  r4.0:
+    add_deps_recursive:
+      nvidia-inference-common-code:
+        version: r4.0
+      nvidia-scratch-space:
+        tags: _version.4_1
+    deps:
+      - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0
+        names:
+        - pytorch
+        - torch
+      - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v4.0
+        names:
+        - pytorchvision
+        - torchvision
+      - tags: install,nccl,libs,_cuda
+
 docker:
   skip_run_cmd: 'no'
   all_gpus: 'yes'
   shm_size: '32gb'
   extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
-  docker_os: ubuntu
+  os: ubuntu
   cm_repo_flags1: ' --branch=mlperf-inference'
-  docker_real_run: False
+  real_run: False
   interactive: True
-  docker_os_version: '20.04'
+  os_version: '20.04'
   base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public
   docker_input_mapping:
     imagenet_path: IMAGENET_PATH
diff --git a/script/get-cache-dir/_cm.json b/script/get-cache-dir/_cm.json
new file mode 100644
index 000000000..b5fd30277
--- /dev/null
+++ b/script/get-cache-dir/_cm.json
@@ -0,0 +1,36 @@
+{
+  "alias": "get-cache-dir",
+  "automation_alias": "script",
+  "automation_uid": "5b4e0237da074764",
+  "cache": true,
+  "category": "CM Interface",
+  "deps": [],
+  "docker": {
+    "run": false
+  },
+  "input_description": {},
+  "new_env_keys": [
+    "CM_CACHE_DIR",
+    "<<<CM_CACHE_DIR_ENV_NAME>>>"
+  ],
+  "new_state_keys": [],
+  "post_deps": [],
+  "posthook_deps": [],
+  "prehook_deps": [],
+  "tags": [
+    "get",
+    "cache",
+    "dir",
+    "directory"
+  ],
+  "uid": "48f4622e059b45ce",
+  "variations": {
+    "name.#": {
+      "env": {
+        "CM_CACHE_DIR_NAME": "#"
+      }
+    }
+  },
+  "versions": {
+  }
+}
diff --git a/script/get-cache-dir/customize.py b/script/get-cache-dir/customize.py
new file mode 100644
index 000000000..6e8a76460
--- /dev/null
+++ b/script/get-cache-dir/customize.py
@@ -0,0 +1,29 @@
+from cmind import utils
+import os
+
+def preprocess(i):
+
+    os_info = i['os_info']
+
+    env = i['env']
+
+    meta = i['meta']
+
+    automation = i['automation']
+
+    quiet = (env.get('CM_QUIET', False) == 'yes')
+
+    return {'return':0}
+
+def postprocess(i):
+
+    env = i['env']
+
+    cache_dir = os.getcwd()
+    if env.get('CM_CACHE_DIR_ENV_NAME', '') != '':
+        env[env['CM_CACHE_DIR_ENV_NAME']] = cache_dir
+
+    env['CM_CACHE_DIR'] = cache_dir
+    env['CM_GET_DEPENDENT_CACHED_PATH'] = cache_dir
+
+    return {'return':0}
diff --git a/script/get-mlperf-inference-nvidia-common-code/_cm.json b/script/get-mlperf-inference-nvidia-common-code/_cm.json
index 46f12477e..7b47209ca 100644
--- a/script/get-mlperf-inference-nvidia-common-code/_cm.json
+++ b/script/get-mlperf-inference-nvidia-common-code/_cm.json
@@ -39,6 +39,9 @@
     },
     "nvidia-only": {
       "group": "repo-owner"
+    },
+    "go": {
+      "group": "repo-owner"
     }
   },
   "versions": {
@@ -62,6 +65,13 @@
           "version": "v3.1"
         }
       }
+    },
+    "r4.0": {
+      "add_deps_recursive": {
+        "mlperf-inference-results": {
+          "version": "v4.0"
+        }
+      }
     }
   }
 }
diff --git a/script/get-mlperf-inference-results/_cm.json b/script/get-mlperf-inference-results/_cm.json
index 46feecd04..f4870d57f 100644
--- a/script/get-mlperf-inference-results/_cm.json
+++ b/script/get-mlperf-inference-results/_cm.json
@@ -57,6 +57,12 @@
         "CM_MLPERF_INFERENCE_RESULTS_VERSION_NAME": "v3.1",
         "CM_GIT_URL": "https://github.com/<<<GITHUB_REPO_OWNER>>>/inference_results_v3.1.git"
       }
+    },
+    "v4.0": {
+      "env": {
+        "CM_MLPERF_INFERENCE_RESULTS_VERSION_NAME": "v4.0",
+        "CM_GIT_URL": "https://github.com/<<<GITHUB_REPO_OWNER>>>/inference_results_v4.0.git"
+      }
     }
   },
   "variations": {
@@ -85,6 +91,12 @@
         "GITHUB_REPO_OWNER": "GATEOverflow",
         "NVIDIA_ONLY": "yes"
       }
+    },
+    "go": {
+      "group": "source-repo",
+      "env": {
+        "GITHUB_REPO_OWNER": "GATEOverflow"
+      }
     }
   }
 }
diff --git a/script/get-mlperf-inference-src/_cm.json b/script/get-mlperf-inference-src/_cm.json
index 4e4c4806d..0c1bac0c8 100644
--- a/script/get-mlperf-inference-src/_cm.json
+++ b/script/get-mlperf-inference-src/_cm.json
@@ -68,7 +68,7 @@
       }
     }
   ],
-  "print_env_at_the_end": {
+  "print_env_at_the_end_disabled": {
     "CM_MLPERF_INFERENCE_CONF_PATH": "Path to the MLPerf inference benchmark configuration file",
     "CM_MLPERF_INFERENCE_SOURCE": "Path to MLPerf inference benchmark sources"
   },
diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py
index 7916a1bde..c42db0263 100644
--- a/script/get-mlperf-inference-src/customize.py
+++ b/script/get-mlperf-inference-src/customize.py
@@ -82,7 +82,7 @@ def postprocess(i):
     if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '':
         env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH']
 
-    return {'return':0}
+    return {'return':0, 'version': env['CM_VERSION']}
 
 
 def get_valid_models(mlperf_version, mlperf_path):
diff --git a/script/get-mlperf-inference-sut-configs/_cm.json b/script/get-mlperf-inference-sut-configs/_cm.json
index 7ad8376f3..012aca664 100644
--- a/script/get-mlperf-inference-sut-configs/_cm.json
+++ b/script/get-mlperf-inference-sut-configs/_cm.json
@@ -2,7 +2,7 @@
   "alias": "get-mlperf-inference-sut-configs",
   "automation_alias": "script",
   "automation_uid": "5b4e0237da074764",
-  "cache": true,
+  "cache": false,
   "category": "MLPerf benchmark support",
   "new_env_keys": [
     "CM_HW_*",
@@ -28,23 +28,16 @@
     "configs",
     "sut-configs"
   ],
-  "uid": "c2fbf72009e2445b",
-  "variations": {
-    "octoml": {
+  "deps": [
+    {
+      "tags": "get,cache,dir,_name.mlperf-inference-sut-configs",
       "env": {
-        "CM_SUT_USE_EXTERNAL_CONFIG_REPO": "yes",
-        "CM_GIT_CHECKOUT_FOLDER": "configs",
-        "CM_GIT_URL": "https://github.com/arjunsuresh/mlperf-inference-configs"
+        "CM_CACHE_DIR_ENV_NAME": "CM_SUT_CONFIGS_PATH"
       },
-      "prehook_deps": [
-        {
-          "force_env_keys": [
-            "CM_GIT_URL",
-            "CM_GIT_CHECKOUT_*"
-          ],
-          "tags": "get,git,repo,_repo.mlperf_inference_configs_octoml"
-        }
-      ]
+      "extra_cache_tags": "mlperf,inference,sut,configs"
     }
+  ],
+  "uid": "c2fbf72009e2445b",
+  "variations": {
   }
 }
diff --git a/script/get-mlperf-inference-sut-configs/customize.py b/script/get-mlperf-inference-sut-configs/customize.py
index f074ed30b..8bca2a401 100644
--- a/script/get-mlperf-inference-sut-configs/customize.py
+++ b/script/get-mlperf-inference-sut-configs/customize.py
@@ -36,7 +36,7 @@ def postprocess(i):
     if env.get('CM_SUT_NAME', '') == '':
         env['CM_SUT_NAME'] = env['CM_HW_NAME'] + "-" + implementation_string + "-" + device + "-" + backend + "-" + backend_version + "-" + run_config_string
 
-    if env.get('CM_SUT_CONFIGS_PATH',''):
+    if env.get('CM_SUT_CONFIGS_PATH','') != '':
         path = env['CM_SUT_CONFIGS_PATH']
     elif env.get('CM_SUT_USE_EXTERNAL_CONFIG_REPO', '') == "yes":
         path = env.get('CM_GIT_CHECKOUT_PATH')
diff --git a/script/get-mlperf-inference-sut-description/_cm.json b/script/get-mlperf-inference-sut-description/_cm.json
index 4c3f998e5..a160722c2 100644
--- a/script/get-mlperf-inference-sut-description/_cm.json
+++ b/script/get-mlperf-inference-sut-description/_cm.json
@@ -2,7 +2,7 @@
   "alias": "get-mlperf-inference-sut-description",
   "automation_alias": "script",
   "automation_uid": "5b4e0237da074764",
-  "cache": true,
+  "cache": false,
   "category": "MLPerf benchmark support",
   "deps": [
     {
@@ -46,6 +46,13 @@
     },
     {
       "tags": "get,generic-python-lib,_package.dmiparser"
+    },
+    {
+      "tags": "get,cache,dir,_name.mlperf-inference-sut-descriptions",
+      "extra_cache_tags": "mlperf,inference,sut,descriptions",
+      "env": {
+        "CM_CACHE_DIR_ENV_NAME": "CM_MLPERF_INFERENCE_SUT_DESC_PATH"
+      }
     }
   ],
   "default_env": {
diff --git a/script/get-mlperf-inference-sut-description/customize.py b/script/get-mlperf-inference-sut-description/customize.py
index cd0c2f754..71636941f 100644
--- a/script/get-mlperf-inference-sut-description/customize.py
+++ b/script/get-mlperf-inference-sut-description/customize.py
@@ -33,7 +33,9 @@ def preprocess(i):
 
     sut = hw_name + sut_suffix
     script_path = i['run_script_input']['path']
-    sut_path = os.path.join(os.getcwd(), "suts", sut + ".json")
+    sut_desc_path=env['CM_MLPERF_INFERENCE_SUT_DESC_PATH']
+
+    sut_path = os.path.join(sut_desc_path, "suts", sut + ".json")
     if os.path.exists(sut_path) and env.get('CM_SUT_DESC_CACHE', '') == "yes":
         print(f"Reusing SUT description file {sut}")
         state['CM_SUT_META'] = json.load(open(sut_path))
diff --git a/script/get-mlperf-power-dev/customize.py b/script/get-mlperf-power-dev/customize.py
index 2af085d74..50afb3ba4 100644
--- a/script/get-mlperf-power-dev/customize.py
+++ b/script/get-mlperf-power-dev/customize.py
@@ -18,4 +18,4 @@ def postprocess(i):
     if env.get('CM_GIT_REPO_CURRENT_HASH', '') != '':
         env['CM_VERSION'] += "-git-"+env['CM_GIT_REPO_CURRENT_HASH']
 
-    return {'return':0}
+    return {'return':0, 'version': env['CM_VERSION']}
diff --git a/script/get-preprocessed-dataset-criteo/preprocess_multihot.sh b/script/get-preprocessed-dataset-criteo/preprocess_multihot.sh
new file mode 100644
index 000000000..058cd76ee
--- /dev/null
+++ b/script/get-preprocessed-dataset-criteo/preprocess_multihot.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+cd ${CM_MLPERF_TRAINING_SOURCE}/recommendation_v2_torchrec_dlrm/
+${CM_PYTHON_BIN_WITH_PATH} materialize_synthetic_multihot_dataset.py \
+    --in_memory_binary_criteo_path $PREPROCESSED_CRITEO_1TB_CLICK_LOGS_DATASET_PATH \
+    --output_path $MATERIALIZED_DATASET_PATH \
+    --num_embeddings_per_feature 40000000,39060,17295,7424,20265,3,7122,1543,63,40000000,3067956,405282,10,2209,11938,155,4,976,14,40000000,40000000,40000000,590152,12973,108,36 \
+    --multi_hot_sizes 3,2,1,2,6,1,1,1,1,7,3,8,1,6,9,5,1,1,1,12,100,27,10,3,1,1 \
+    --multi_hot_distribution_type uniform
+test $? -eq 0 || exit $?
diff --git a/script/install-ipex-from-src/_cm.json b/script/install-ipex-from-src/_cm.json
index f9774e143..f7bb3d675 100644
--- a/script/install-ipex-from-src/_cm.json
+++ b/script/install-ipex-from-src/_cm.json
@@ -34,6 +34,9 @@
       },
       "tags": "get,pytorch,from.src"
     },
+    {
+      "tags": "get,generic,conda-package,_package.ninja"
+    },
     {
       "env": {
         "CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_IPEX_SRC_REPO_PATH"
@@ -123,6 +126,20 @@
           ],
           "tags": "get,generic,conda-package,_package.setuptools,_source.conda-forge"
         },
+        {
+          "names": [
+            "conda-package",
+            "typing-extensions"
+          ],
+          "tags": "get,generic,conda-package,_package.typing-extensions,_source.conda-forge"
+        },
+        {
+          "names": [
+            "conda-package",
+            "sympy"
+          ],
+          "tags": "get,generic,conda-package,_package.sympy,_source.conda-forge"
+        },
         {
           "tags": "install,llvm,src,_for-intel-mlperf-inference-v3.1-gptj"
         }
diff --git a/script/install-llvm-src/_cm.json b/script/install-llvm-src/_cm.json
index 3ae795695..42e7e20df 100644
--- a/script/install-llvm-src/_cm.json
+++ b/script/install-llvm-src/_cm.json
@@ -281,6 +281,20 @@
              "+ CXXFLAGS": [ "-Wno-nonnull", "-Wno-maybe-uninitialized", "-Wno-uninitialized", "-Wno-free-nonheap-object" ]
            }
         },
+        {
+           "names": [
+             "conda-package",
+             "typing-extensions"
+           ],
+           "tags": "get,generic,conda-package,_package.typing-extensions,_source.conda-forge"
+        },
+        {
+           "names": [
+             "conda-package",
+             "sympy"
+           ],
+           "tags": "get,generic,conda-package,_package.sympy,_source.conda-forge"
+        },
         {
            "tags": "get,generic-python-lib,_custom-python,_package.setuptools",
            "env": {
diff --git a/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh b/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh
index df23aa3e3..30b612b2b 100644
--- a/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh
+++ b/script/install-llvm-src/install-llvm-16-intel-mlperf-inference.sh
@@ -1,11 +1,15 @@
 #!/bin/bash
 
 export PATH=${CM_CONDA_BIN_PATH}:${PATH}
-export ABI=$(python -c "import torch; print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")
+#export LD_LIBRARY_PATH=${CM_CONDA_LIB_PATH}:${LD_LIBRARY_PATH}
+ABI=$(python -c "import torch; print(int(torch._C._GLIBCXX_USE_CXX11_ABI))")
+test $? -eq 0 || exit $?
+export ABI=$ABI
 mkdir -p llvm-project && cd llvm-project
 wget -nc https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.6/cmake-16.0.6.src.tar.xz
 wget -nc https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.6/llvm-16.0.6.src.tar.xz
 tar -xf cmake-16.0.6.src.tar.xz
+test $? -eq 0 || exit $?
 mv cmake-16.0.6.src cmake
 tar -xf llvm-16.0.6.src.tar.xz
 mv llvm-16.0.6.src llvm
@@ -13,9 +17,15 @@ rm -rf build
 mkdir -p build
 cd build
 export DEB_BUILD_MAINT_OPTIONS=hardening=-format
+export CC=${CM_C_COMPILER_WITH_PATH}
+export CXX=${CM_CXX_COMPILER_WITH_PATH}
 cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=${ABI}" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_BUILD_LLVM_DYLIB=ON   -DLLVM_INCLUDE_BENCHMARKS=OFF ../llvm/
 
+test $? -eq 0 || exit $?
 cmake --build . -j $(nproc)
+test $? -eq 0 || exit $?
 export LLVM_ROOT=$CONDA_PREFIX
 cmake -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_SHARED_LINKER_FLAGS="-L$CONDA_PREFIX -Wl,-rpath,$CONDA_PREFIX" -P cmake_install.cmake
-ln -sf ${LLVM_ROOT}/bin/llvm-config ${LLVM_ROOT}/bin/llvm-config-13 
+test $? -eq 0 || exit $?
+ln -sf ${LLVM_ROOT}/bin/llvm-config ${LLVM_ROOT}/bin/llvm-config-13
+test $? -eq 0 || exit $?
diff --git a/script/install-pytorch-from-src/_cm.json b/script/install-pytorch-from-src/_cm.json
index 057dbe681..2bdd46e68 100644
--- a/script/install-pytorch-from-src/_cm.json
+++ b/script/install-pytorch-from-src/_cm.json
@@ -220,6 +220,23 @@
         }
       ]
     },
+    "for-nvidia-mlperf-inference-v4.0": {
+      "base": [
+        "sha.32f93b1",
+        "cuda"
+      ],
+      "deps": [
+        {
+          "tags": "get,cmake",
+          "version_min": "3.25.0"
+	      }
+      ],
+      "ad": {
+        "pytorch-src-repo": {
+          "tags": "_no-recurse-submodules,_full-history"
+        }
+      }
+    },
     "for-nvidia-mlperf-inference-v3.1": {
       "base": [
         "sha.b5021ba9",
diff --git a/script/install-torchvision-from-src/_cm.json b/script/install-torchvision-from-src/_cm.json
index a474bb484..34965bc60 100644
--- a/script/install-torchvision-from-src/_cm.json
+++ b/script/install-torchvision-from-src/_cm.json
@@ -94,6 +94,9 @@
         "TORCH_CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=1"
       }
     },
+    "for-nvidia-mlperf-inference-v4.0": {
+      "alias": "for-nvidia-mlperf-inference-v3.1"
+    },
     "for-nvidia-mlperf-inference-v3.1": {
       "base": [
         "sha.657027f3",
@@ -102,7 +105,7 @@
       "deps": [
         {
           "tags": "install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1"
-	}
+        }
       ],
       "env": {
       }
diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py
index f7b13c16d..895227b44 100644
--- a/script/process-mlperf-accuracy/customize.py
+++ b/script/process-mlperf-accuracy/customize.py
@@ -25,7 +25,7 @@ def preprocess(i):
 
     results_dir_split = results_dir.split(xsep)
     dataset = env['CM_DATASET']
-    regenerate_accuracy_file = env.get('CM_MLPERF_REGENERATE_ACCURACY_FILE', False)
+    regenerate_accuracy_file = env.get('CM_MLPERF_REGENERATE_ACCURACY_FILE', env.get('CM_RERUN', False))
 
     for result_dir in results_dir_split:
 
@@ -105,9 +105,7 @@ def preprocess(i):
         else:
             return {'return': 1, 'error': 'Unsupported dataset'}
 
-        outfile = os.path.join(result_dir, "accuracy.txt")
-        if not os.path.exists(outfile) or (os.stat(outfile).st_size == 0) or env.get("CM_REGENERATE_MEASURE_FILES", False):
-            run_cmds.append(CMD)
+        run_cmds.append(CMD)
 
 
     if os_info['platform'] == 'windows':
diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml
index 7e19560e2..be61dae5c 100644
--- a/script/run-mlperf-inference-app/_cm.yaml
+++ b/script/run-mlperf-inference-app/_cm.yaml
@@ -37,6 +37,7 @@ default_env:
 
 input_mapping:
   backend: CM_MLPERF_BACKEND
+  beam_size: GPTJ_BEAM_SIZE
   category: CM_MLPERF_SUBMISSION_SYSTEM_TYPE
   clean: CM_MLPERF_CLEAN_ALL
   compliance: CM_MLPERF_LOADGEN_COMPLIANCE
@@ -179,7 +180,6 @@ variations:
       CM_MLPERF_DASHBOARD: 'on'
 
   find-performance:
-    default: true
     env:
       CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes'
       CM_MLPERF_LOADGEN_ALL_MODES: 'no'
@@ -254,6 +254,12 @@ variations:
       CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.0_default
     group: benchmark-version
 
+  r4.1:
+    env:
+      CM_MLPERF_INFERENCE_VERSION: '4.1'
+      CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1_default
+    group: benchmark-version
+
   short:
     add_deps_recursive:
       submission-checker:
@@ -264,6 +270,7 @@ variations:
     group: submission-generation-style
 
   performance-and-accuracy:
+    default: true
     base:
     - all-modes
     default_variations:
diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py
index 1e7b13761..861207c29 100644
--- a/script/run-mlperf-inference-app/customize.py
+++ b/script/run-mlperf-inference-app/customize.py
@@ -14,6 +14,7 @@ def preprocess(i):
 
     os_info = i['os_info']
     env = i['env']
+
     inp = i['input']
     state = i['state']
     script_path = i['run_script_input']['path']
@@ -176,6 +177,9 @@ def preprocess(i):
         del(env['OUTPUT_BASE_DIR'])
         state = {}
         docker_extra_input = {}
+
+        del(env['CM_HW_NAME'])
+
         for k in inp:
             if k.startswith("docker_"):
                 docker_extra_input[k] = inp[k]
@@ -215,6 +219,9 @@ def preprocess(i):
             r = cm.access(ii)
             if r['return'] > 0:
                 return r
+            if action == "docker":
+                return {'return': 0} # We run commands interactively inside the docker container
+
             if state.get('docker', {}):
                 del(state['docker'])
 
@@ -237,8 +244,7 @@ def preprocess(i):
     if state.get("cm-mlperf-inference-results"):
         #print(state["cm-mlperf-inference-results"])
         for sut in state["cm-mlperf-inference-results"]:#only one sut will be there
-            # Grigori: that may not work properly since customize may have another Python than MLPerf
-            # (for example, if we use virtual env)
+            # Better to do this in a stand alone CM script with proper deps but currently we manage this by modifying the sys path of the python executing CM
             import mlperf_utils
 
             print(sut)