From 95646b2232676f30b81869f2502a102a15b584b7 Mon Sep 17 00:00:00 2001 From: Lin Guo Date: Sun, 3 Nov 2024 00:14:56 -0700 Subject: [PATCH] Add c4a to the nosmt list This fixes the issue where Slurm wrongly considers c4a to have smt enabled (threads_per_core=2). In truth, smt is not applicable for Arm machines. Test with the change: ```sh # As indicated from lscpu, c4a always has 1 thread per core linsword_google_com@c4atest-c4ahighmemnode-0:~$ lscpu | grep "per " Thread(s) per core: 1 Core(s) per socket: 72 # Verify the number looks correct with the change linsword_google_com@c4atest-login-001:~$ scontrol show nodes | grep 72 | head -n2 NodeName=c4atest-c4ahighmemnode-0 Arch=aarch64 CoresPerSocket=72 CPUAlloc=0 CPUEfctv=72 CPUTot=72 CPULoad=0.06 ``` --- .../modules/slurm_files/scripts/util.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py index 68716e51bf..d046c27357 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py @@ -454,7 +454,7 @@ def _list_config_blobs() -> Tuple[Any, str]: if res["core"] is None: raise DeffetiveStoredConfigError("config.yaml not found in bucket") return res, hash.hexdigest() - + def _fetch_config(old_hash: Optional[str]) -> Optional[Tuple[NSDict, str]]: """Fetch config from bucket, returns None if no changes are detected.""" @@ -1156,7 +1156,12 @@ def machine_type_sockets(template) -> int: def isSmt(template) -> bool: # https://cloud.google.com/compute/docs/cpu-platforms - noSmtFamily = ("t2a", "t2d", "h3",) + noSmtFamily = ( + "t2a", + "t2d", + "h3", + "c4a", + ) if machine_type_family(template.machineType) in noSmtFamily: return False if template.machine_info.guestCpus == 1: