Skip to content

Commit

Permalink
Merge pull request #187 from hmeiland/feature-archdetect
Browse files Browse the repository at this point in the history
add support in init script for using 'archdetect' alternative to archspec (only if $EESSI_USE_ARCHDETECT is set to '1')
  • Loading branch information
boegel authored Nov 8, 2022
2 parents f4f2fb4 + 2d77d74 commit 92233a9
Show file tree
Hide file tree
Showing 22 changed files with 363 additions and 3 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/tests_archdetect.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions
name: Tests for eessi_archdetect.sh
on: [push, pull_request]
jobs:
build:
runs-on: ubuntu-20.04
strategy:
matrix:
proc_cpuinfo:
- x86_64/intel/haswell/archspec-linux-E5-2680-v3
- x86_64/intel/skylake_avx512/archspec-linux-6132
- x86_64/amd/zen2/Azure-CentOS7-7V12
- x86_64/amd/zen3/Azure-CentOS7-7V73X
- ppc64le/power9le/unknown-power9le
- aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra
- aarch64/arm/neoverse-n1/AWS-awslinux-graviton2
- aarch64/arm/neoverse-v1/AWS-awslinux-graviton3
fail-fast: false
steps:
- uses: actions/checkout@v2

- name: test eessi_archdetect.sh
run: |
export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}}
export EESSI_MACHINE_TYPE=${EESSI_MACHINE_TYPE%%/*}
export EESSI_PROC_CPUINFO=./tests/archdetect/${{matrix.proc_cpuinfo}}.cpuinfo
CPU_ARCH=$(./init/eessi_archdetect.sh cpupath)
if [[ $CPU_ARCH == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.output )" ]]; then
echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH" >&2
else
echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCH" >&2
exit 1
fi
6 changes: 6 additions & 0 deletions init/arch_specs/eessi_arch_arm.spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# ARM CPU architecture specifications
# Software path in EESSI | Vendor ID | List of defining CPU features
"aarch64/arm/neoverse-n1" "ARM" "asimd" # Ampere Altra
"aarch64/arm/neoverse-n1" "" "asimd" # AWS Graviton2
"aarch64/arm/neoverse-v1" "ARM" "asimd svei8mm"
"aarch64/arm/neoverse-v1" "" "asimd svei8mm" # AWS Graviton3
3 changes: 3 additions & 0 deletions init/arch_specs/eessi_arch_ppc.spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# POWER CPU architecture specifications
# Software path in EESSI | Vendor ID | List of defining CPU features
"ppc64le/power9le" "" "POWER9" # IBM Power9
6 changes: 6 additions & 0 deletions init/arch_specs/eessi_arch_x86.spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# x86_64 CPU architecture specifications
# Software path in EESSI | Vendor ID | List of defining CPU features
"x86_64/intel/haswell" "GenuineIntel" "avx2 fma" # Intel Haswell, Broadwell
"x86_64/intel/skylake_avx512" "GenuineIntel" "avx2 fma avx512f avx512bw avx512cd avx512dq avx512vl" # Intel Skylake, Cascade Lake
"x86_64/amd/zen2" "AuthenticAMD" "avx2 fma" # AMD Rome
"x86_64/amd/zen3" "AuthenticAMD" "avx2 fma vaes" # AMD Milan, Milan-X
143 changes: 143 additions & 0 deletions init/eessi_archdetect.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env bash
VERSION="1.0.0"

# Logging
LOG_LEVEL="INFO"

timestamp () {
date "+%Y-%m-%d %H:%M:%S"
}

log () {
# Simple logger function
declare -A levels=([DEBUG]=0 [INFO]=1 [WARN]=2 [ERROR]=3)
msg_type="${1:-INFO}"
msg_body="${2:-'null'}"

[ ${levels[$msg_type]} ] || log "ERROR" "Unknown log level $msg_type"

# ignore messages below log level
[ ${levels[$msg_type]} -lt ${levels[$LOG_LEVEL]} ] && return 0
# print log message to standard error
echo "$(timestamp) [$msg_type] $msg_body" >&2
# exit after any error message
[ $msg_type == "ERROR" ] && exit 1
}

# Supported CPU specifications
update_arch_specs(){
# Add contents of given spec file into an array
# 1: name of array with CPU arch specs
# 2: spec file with the additional specs

[ -z "$1" ] && echo "[ERROR] update_arch_specs: missing array in argument list" >&2 && exit 1
local -n arch_specs=$1

[ ! -f "$2" ] && echo "[ERROR] update_arch_specs: spec file not found: $2" >&2 && exit 1
local spec_file="$2"
while read spec_line; do
# format spec line as an array and append it to array with all CPU arch specs
arch_specs+=("(${spec_line})")
# remove comments from spec file
done < <(sed -E 's/(^|[\s\t])#.*$//g;/^\s*$/d' "$spec_file")
}

# CPU specification of host system
get_cpuinfo(){
# Return the value from cpuinfo for the matching key
# 1: string with key pattern

[ -z "$1" ] && log "ERROR" "get_cpuinfo: missing key pattern in argument list"
cpuinfo_pattern="^${1}\s*:\s*"

# case insensitive match of key pattern and delete key pattern from result
grep -i "$cpuinfo_pattern" ${EESSI_PROC_CPUINFO:-/proc/cpuinfo} | tail -n 1 | sed "s/$cpuinfo_pattern//i"
}

check_allinfirst(){
# Return true if all given arguments after the first are found in the first one
# 1: reference string of space separated values
# 2,3..: each additional argument is a single value to be found in the reference string

[ -z "$1" ] && log "ERROR" "check_allinfirst: missing argument with reference string"
reference="$1"
shift

for candidate in "$@"; do
[[ " $reference " == *" $candidate "* ]] || return 1
done
return 0
}

cpupath(){
# Identify the best matching CPU architecture from a list of supported specifications for the host CPU
# Return the path to the installation files in EESSI of the best matching architecture
local cpu_arch_spec=()

# Identify the host CPU architecture
local machine_type=${EESSI_MACHINE_TYPE:-$(uname -m)}
log "DEBUG" "cpupath: Host CPU architecture identified as '$machine_type'"

# Populate list of supported specs for this architecture
case $machine_type in
"x86_64") local spec_file="eessi_arch_x86.spec";;
"aarch64") local spec_file="eessi_arch_arm.spec";;
"ppc64le") local spec_file="eessi_arch_ppc.spec";;
*) log "ERROR" "cpupath: Unsupported CPU architecture $machine_type"
esac
# spec files are located in a subfolder with this script
local base_dir=$(dirname $(realpath $0))
update_arch_specs cpu_arch_spec "$base_dir/arch_specs/${spec_file}"

# Identify the host CPU vendor
local cpu_vendor_tag="vendor[ _]id"
local cpu_vendor=$(get_cpuinfo "$cpu_vendor_tag")
log "DEBUG" "cpupath: CPU vendor of host system: '$cpu_vendor'"

# Identify the host CPU flags or features
local cpu_flag_tag='flags'
# cpuinfo systems print different line identifiers, eg features, instead of flags
[ "${cpu_vendor}" == "ARM" ] && cpu_flag_tag='flags'
[ "${machine_type}" == "aarch64" ] && [ "${cpu_vendor}x" == "x" ] && cpu_flag_tag='features'
[ "${machine_type}" == "ppc64le" ] && cpu_flag_tag='cpu'

local cpu_flags=$(get_cpuinfo "$cpu_flag_tag")
log "DEBUG" "cpupath: CPU flags of host system: '$cpu_flags'"

# Default to generic CPU
local best_arch_match="generic"

# Iterate over the supported CPU specifications to find the best match for host CPU
# Order of the specifications matters, the last one to match will be selected
for arch in "${cpu_arch_spec[@]}"; do
eval "arch_spec=$arch"
if [ "${cpu_vendor}x" == "${arch_spec[1]}x" ]; then
# each flag in this CPU specification must be found in the list of flags of the host
check_allinfirst "${cpu_flags[*]}" ${arch_spec[2]} && best_arch_match=${arch_spec[0]} && \
log "DEBUG" "cpupath: host CPU best match updated to $best_arch_match"
fi
done

log "INFO" "cpupath: best match for host CPU: $best_arch_match"
echo "$best_arch_match"
}

# Parse command line arguments
USAGE="Usage: eessi_archdetect.sh [-h][-d] <action>"

while getopts 'hdv' OPTION; do
case "$OPTION" in
h) echo "$USAGE"; exit 0;;
d) LOG_LEVEL="DEBUG";;
v) echo "eessi_archdetect.sh v$VERSION"; exit 0;;
?) echo "$USAGE"; exit 1;;
esac
done
shift "$(($OPTIND -1))"

ARGUMENT=${1:-none}

case "$ARGUMENT" in
"cpupath") cpupath; exit;;
*) echo "$USAGE"; log "ERROR" "Missing <action> argument";;
esac
13 changes: 10 additions & 3 deletions init/eessi_environment_variables
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,16 @@ if [ -d $EESSI_PREFIX ]; then
if [ -d $EESSI_EPREFIX ]; then

# determine subdirectory in software layer
# note: eessi_software_subdir_for_host.py will pick up value from $EESSI_SOFTWARE_SUBDIR_OVERRIDE if it's defined!
export EESSI_EPREFIX_PYTHON=$EESSI_EPREFIX/usr/bin/python3
export EESSI_SOFTWARE_SUBDIR=$($EESSI_EPREFIX_PYTHON ${EESSI_INIT_DIR_PATH}/eessi_software_subdir_for_host.py $EESSI_PREFIX)
if [ "$EESSI_USE_ARCHDETECT" == "1" ]; then
# if archdetect is enabled, use internal code
export EESSI_SOFTWARE_SUBDIR=$(${EESSI_INIT_DIR_PATH}/eessi_archdetect.sh cpupath)
echo "archdetect says ${EESSI_SOFTWARE_SUBDIR}" >> $output
else
# note: eessi_software_subdir_for_host.py will pick up value from $EESSI_SOFTWARE_SUBDIR_OVERRIDE if it's defined!
export EESSI_EPREFIX_PYTHON=$EESSI_EPREFIX/usr/bin/python3
export EESSI_SOFTWARE_SUBDIR=$($EESSI_EPREFIX_PYTHON ${EESSI_INIT_DIR_PATH}/eessi_software_subdir_for_host.py $EESSI_PREFIX)
echo "archspec says ${EESSI_SOFTWARE_SUBDIR}" >> $output
fi
if [ ! -z $EESSI_SOFTWARE_SUBDIR ]; then

echo "Using ${EESSI_SOFTWARE_SUBDIR} as software subdirectory." >> $output
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
processor : 0
BogoMIPS : 243.75
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop asimddp ssbs
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x3
CPU part : 0xd0c
CPU revision : 1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
aarch64/arm/neoverse-n1
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Architecture: aarch64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 64
On-line CPU(s) list: 0-63
Thread(s) per core: 1
Core(s) per socket: 64
Socket(s): 1
NUMA node(s): 1
Vendor ID: ARM
Model: 1
Model name: Neoverse-N1
Stepping: r3p1
BogoMIPS: 50.00
L1d cache: 4 MiB
L1i cache: 4 MiB
L2 cache: 64 MiB
L3 cache: 32 MiB
NUMA node0 CPU(s): 0-63
Vulnerability Itlb multihit: Not affected
Vulnerability L1tf: Not affected
Vulnerability Mds: Not affected
Vulnerability Meltdown: Mitigation; PTI
Vulnerability Mmio stale data: Not affected
Vulnerability Spec store bypass: Not affected
Vulnerability Spectre v1: Mitigation; __user pointer sanitization
Vulnerability Spectre v2: Mitigation; CSV2, BHB
Vulnerability Srbds: Not affected
Vulnerability Tsx async abort: Not affected
Flags: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop asimddp
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
aarch64/arm/neoverse-n1
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
processor : 0
BogoMIPS : 2100.00
Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs paca pacg dcpodp svei8mm svebf16 i8mm bf16 dgh rng
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x1
CPU part : 0xd40
CPU revision : 1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
aarch64/arm/neoverse-v1
4 changes: 4 additions & 0 deletions tests/archdetect/ppc64le/power9le/unknown-power9le.cpuinfo
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
processor : 0
cpu : POWER9 (architected), altivec supported
clock : 2200.000000MHz
revision : 2.2 (pvr 004e 1202)
1 change: 1 addition & 0 deletions tests/archdetect/ppc64le/power9le/unknown-power9le.output
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ppc64le/power9le
27 changes: 27 additions & 0 deletions tests/archdetect/x86_64/amd/zen2/Azure-CentOS7-7V12.cpuinfo
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 120
On-line CPU(s) list: 0-119
Thread(s) per core: 1
Core(s) per socket: 60
Socket(s): 2
NUMA node(s): 4
Vendor ID: AuthenticAMD
CPU family: 23
Model: 49
Model name: AMD EPYC 7V12 64-Core Processor
Stepping: 0
CPU MHz: 2445.424
BogoMIPS: 4890.84
Hypervisor vendor: Microsoft
Virtualization type: full
L1d cache: 32K
L1i cache: 32K
L2 cache: 512K
L3 cache: 16384K
NUMA node0 CPU(s): 0-29
NUMA node1 CPU(s): 30-59
NUMA node2 CPU(s): 60-89
NUMA node3 CPU(s): 90-119
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm art rep_good nopl extd_apicid aperfmperf eagerfpu pni pclmulqdq ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm cmp_legacy cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw topoext retpoline_amd ssbd vmmcall fsgsbase bmi1 avx2 smep bmi2 rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 clzero xsaveerptr arat umip
1 change: 1 addition & 0 deletions tests/archdetect/x86_64/amd/zen2/Azure-CentOS7-7V12.output
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
x86_64/amd/zen2
27 changes: 27 additions & 0 deletions tests/archdetect/x86_64/amd/zen3/Azure-CentOS7-7V73X.cpuinfo
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 120
On-line CPU(s) list: 0-119
Thread(s) per core: 1
Core(s) per socket: 60
Socket(s): 2
NUMA node(s): 4
Vendor ID: AuthenticAMD
CPU family: 25
Model: 1
Model name: AMD EPYC 7V73X 64-Core Processor
Stepping: 2
CPU MHz: 1846.550
BogoMIPS: 3693.10
Hypervisor vendor: Microsoft
Virtualization type: full
L1d cache: 32K
L1i cache: 32K
L2 cache: 512K
L3 cache: 98304K
NUMA node0 CPU(s): 0-29
NUMA node1 CPU(s): 30-59
NUMA node2 CPU(s): 60-89
NUMA node3 CPU(s): 90-119
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm art rep_good nopl extd_apicid aperfmperf eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm cmp_legacy cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw topoext perfctr_core invpcid_single retpoline_amd vmmcall fsgsbase bmi1 avx2 smep bmi2 invpcid rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 clzero xsaveerptr arat umip vaes vpclmulqdq
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
x86_64/amd/zen3
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 63
model name : Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
stepping : 2
microcode : 0x3c
cpu MHz : 1757.910
cache size : 30720 KB
physical id : 0
siblings : 12
core id : 0
cpu cores : 12
apicid : 0
initial apicid : 0
fpu : yes
fpu_exception : yes
cpuid level : 15
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm epb invpcid_single tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm xsaveopt cqm_llc cqm_occup_llc ibpb ibrs stibp dtherm arat pln pts spec_ctrl intel_stibp
bogomips : 4987.97
clflush size : 64
cache_alignment : 64
address sizes : 46 bits physical, 48 bits virtual
power management:
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
x86_64/intel/haswell
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 85
model name : Intel(R) Xeon(R) Gold 6132 CPU @ 2.60GHz
stepping : 4
microcode : 0x200004d
cpu MHz : 2600.000
cache size : 19712 KB
physical id : 0
siblings : 14
core id : 0
cpu cores : 14
apicid : 0
initial apicid : 0
fpu : yes
fpu_exception : yes
cpuid level : 22
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb cat_l3 cdp_l3 invpcid_single intel_pt tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm mpx rdt_a avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local ibpb ibrs stibp dtherm ida arat pln pts spec_ctrl intel_stibp ssbd
bogomips : 5200.00
clflush size : 64
cache_alignment : 64
address sizes : 46 bits physical, 48 bits virtual
power management:
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
x86_64/intel/skylake_avx512

0 comments on commit 92233a9

Please sign in to comment.