Skip to content

Commit

Permalink
Work update 20250117
Browse files Browse the repository at this point in the history
  • Loading branch information
BrianCurtis-NOAA committed Jan 17, 2025
1 parent c7412fa commit d54f8fe
Show file tree
Hide file tree
Showing 7 changed files with 1,112 additions and 0 deletions.
110 changes: 110 additions & 0 deletions tests/newtests/detect_machine.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/bin/bash

# The authoritative copy of this script lives in the ufs-weather-model at:
# https://github.com/ufs-community/ufs-weather-model/blob/develop/tests/detect_machine.sh
# If any local modifications are made or new platform support added,
# please consider opening an issue and a PR to the ufs-weather-model
# so that this copy remains in sync with its authoritative source
#
# Thank you for your contribution

# If the MACHINE_ID variable is set, skip this script.
[[ -n ${MACHINE_ID:-} ]] && return

# First detect w/ hostname
case $(hostname -f) in

adecflow0[12].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn
alogin0[123].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn
clogin0[1-9].cactus.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### cactus01-9
clogin10.cactus.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### cactus10
dlogin0[1-9].dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood01-9
dlogin10.dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood10

gaea5[1-8]) MACHINE_ID=gaeac5 ;; ### gaea51-58
gaea5[1-8].ncrc.gov) MACHINE_ID=gaeac5 ;; ### gaea51-58
gaea6[1-8]) MACHINE_ID=gaeac6 ;; ### gaea61-68
gaea6[1-8].ncrc.gov) MACHINE_ID=gaeac6 ;; ### gaea61-68

hfe0[1-9]) MACHINE_ID=hera ;; ### hera01-09
hfe1[0-2]) MACHINE_ID=hera ;; ### hera10-12
hecflow01) MACHINE_ID=hera ;; ### heraecflow01

s4-submit.ssec.wisc.edu) MACHINE_ID=s4 ;; ### s4

fe[1-8]) MACHINE_ID=jet ;; ### jet01-8
tfe[12]) MACHINE_ID=jet ;; ### tjet1-2

Orion-login-[1-4].HPC.MsState.Edu) MACHINE_ID=orion ;; ### orion1-4

[Hh]ercules-login-[1-4].[Hh][Pp][Cc].[Mm]s[Ss]tate.[Ee]du) MACHINE_ID=hercules ;; ### hercules1-4

derecho1.hsn.de.hpc.ucar.edu) MACHINE_ID=derecho ;; ### derecho1
derecho2.hsn.de.hpc.ucar.edu) MACHINE_ID=derecho ;; ### derecho2
derecho3.hsn.de.hpc.ucar.edu) MACHINE_ID=derecho ;; ### derecho3
derecho4.hsn.de.hpc.ucar.edu) MACHINE_ID=derecho ;; ### derecho4
derecho5.hsn.de.hpc.ucar.edu) MACHINE_ID=derecho ;; ### derecho5
derecho6.hsn.de.hpc.ucar.edu) MACHINE_ID=derecho ;; ### derecho6
derecho7.hsn.de.hpc.ucar.edu) MACHINE_ID=derecho ;; ### derecho7
derecho8.hsn.de.hpc.ucar.edu) MACHINE_ID=derecho ;; ### derecho8

login[1-4].stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede1-4

login[1-4].frontera.tacc.utexas.edu) MACHINE_ID=frontera ;; ### frontera1-4
c*.frontera.tacc.utexas.edu) MACHINE_ID=frontera ;; ### frontera compute

login0[1-2].expanse.sdsc.edu) MACHINE_ID=expanse ;; ### expanse1-2

discover3[1-5].prv.cube) MACHINE_ID=discover ;; ### discover31-35
*) MACHINE_ID=UNKNOWN ;; # Unknown platform
esac

if [[ ${MACHINE_ID} == "UNKNOWN" ]]; then
case ${PW_CSP:-} in
"aws" | "google" | "azure") MACHINE_ID=noaacloud ;;
*) PW_CSP="UNKNOWN"
esac
fi

# Overwrite auto-detect with MACHINE if set
MACHINE_ID=${MACHINE:-${MACHINE_ID}}

# If MACHINE_ID is no longer UNKNNOWN, return it
if [[ "${MACHINE_ID}" != "UNKNOWN" ]]; then
return
fi

# Try searching based on paths since hostname may not match on compute nodes
if [[ -d /lfs/h3 ]]; then
# We are on NOAA Cactus or Dogwood
MACHINE_ID=wcoss2
elif [[ -d /lfs/h1 && ! -d /lfs/h3 ]]; then
# We are on NOAA TDS Acorn
MACHINE_ID=acorn
elif [[ -d /mnt/lfs1 ]]; then
# We are on NOAA Jet
MACHINE_ID=jet
elif [[ -d /scratch1 ]]; then
# We are on NOAA Hera
MACHINE_ID=hera
elif [[ -d /work ]]; then
# We are on MSU Orion or Hercules
mount=$(findmnt -n -o SOURCE /home)
if [[ ${mount} =~ "hercules" ]]; then
# We are on Hercules
MACHINE_ID=hercules
else
MACHINE_ID=orion
fi
elif [[ -d /gpfs/f5 && -d /ncrc ]]; then
# We are on GAEA C5.
MACHINE_ID=gaeac5
elif [[ -d /gpfs/f6 && -d /ncrc ]]; then
# We are on GAEA C6.
MACHINE_ID=gaeac6
elif [[ -d /data/prod ]]; then
# We are on SSEC's S4
MACHINE_ID=s4
else
echo WARNING: UNKNOWN PLATFORM 1>&2
fi
35 changes: 35 additions & 0 deletions tests/newtests/error_handling.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#! /bin/bash

die() { echo "$@" >&2; exit 1; }

handle_error() {
echo "rt.sh: Getting error information..."
local exit_code=$1
local exit_line=$2
echo "Exited at line ${exit_line} having code ${exit_code}"
rt_trap
}

rt_trap() {
echo "rt.sh: Exited abnormally, killing workflow and cleaning up"
trap "" SIGINT
[[ ${ROCOTO:-false} == true ]] && rocoto_kill
[[ ${ECFLOW:-false} == true ]] && ecflow_kill
cleanup
}

cleanup() {
echo "rt.sh: Cleaning up..."
awk_info=$(awk '{print $2}' < "${LOCKDIR}/PID")
[[ ${awk_info} == "$$" ]] && rm -rf "${LOCKDIR}"
[[ ${ECFLOW:-false} == true ]] && ecflow_stop
trap 0
echo "rt.sh: Exiting."
exit
}

trap '{ echo "rt.sh interrupted"; rt_trap ; }' INT
trap '{ echo "rt.sh quit"; rt_trap ; }' QUIT
trap '{ echo "rt.sh terminated"; rt_trap ; }' TERM
trap '{ handle_error $? $LINENO ; }' ERR
trap '{ echo "rt.sh finished"; cleanup ; }' EXIT
95 changes: 95 additions & 0 deletions tests/newtests/manage_ecflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#! /bin/bash

source error_handling.sh

case ${MACHINE_ID} in
wcoss2|acorn)
module load ecflow/5.6.0.13
;;
gaeac5)
# UFSWM Uses ECFLow in Python
module load PrgEnv-intel/8.5.0
module load intel-classic/2023.2.0
module load python/3.9.12

# ECFLow isn't installed by default. It is in epic stack
module use /ncrc/proj/epic/spack-stack/modulefiles
module load ecflow/5.8.4
ECF_HOST=$(hostname)
ECF_PORT=$(( $(id -u) + 1500 ))
export ECF_PORT ECF_HOST
;;
gaeac6)
# UFSWM Uses ECFLow in Python
module use /ncrc/proj/epic/spack-stack/c6/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core
module load stack-intel/2023.2.0
module load python/3.10.13

# ECFLow isn't installed by default. It is in epic stack
module use /ncrc/proj/epic/spack-stack/modulefiles
module load ecflow/5.8.4
ECF_HOST=$(hostname)
ECF_PORT=$(( $(id -u) + 1500 ))
export ECF_PORT ECF_HOST
;;
hera)
module load ecflow/5.11.4
;;
orion)
module use /work/noaa/epic/role-epic/spack-stack/orion/modulefiles
module load ecflow/5.8.4
ECF_HOST=$(hostname)
ECF_PORT="$(( $(id -u) + 1500 ))"
export ECF_PORT ECF_HOST
;;
hercules)
module use /work/noaa/epic/role-epic/spack-stack/hercules/modulefiles
module load ecflow/5.8.4
ECF_HOST=$(hostname)
ECF_PORT="$(( $(id -u) + 1500 ))"
export ECF_PORT ECF_HOST
;;
jet)
# UFSWM Uses ECFlow in Python
module use /contrib/spack-stack/spack-stack-1.6.0/envs/unified-env-rocky8/install/modulefiles/Core
module load stack-intel/2021.5.0
module load stack-python/3.10.13

module load ecflow/5.11.4
;;
s4)
module load ecflow/5.6.0
module load miniconda/3.8-s4
module use /data/prod/jedi/spack-stack/modulefiles
module load ecflow/5.8.4
ECF_HOST=$(hostname)
ECF_PORT="$(( $(id -u) + 1500 ))"
export ECF_PORT ECF_HOST
;;
derecho)
# ECFlow needs Python
module unload ncarcompilers
module use /glade/work/epicufsrt/contrib/spack-stack/derecho/spack-stack-1.5.1/envs/unified-env/install/modulefiles/Core
module load stack-intel/2021.10.0
module load stack-python/3.10.8

# ECFlow not in default system stack. Need to use epic build
module use /glade/work/epicufsrt/contrib/spack-stack/derecho/modulefiles
module load ecflow/5.8.4
ECF_HOST=$(hostname)
ECF_PORT=$(( $(id -u) + 1500 ))
export ECF_PORT ECF_HOST
;;
stampede)
die "Machine does not support ECFlow."
;;
expanse)
die "Machine does not support ECFlow."
;;
noaacloud)
die "Machine does not support ECFlow."
;;
*)
die "Unknown machine ID, please edit detect_machine.sh file"
;;
esac
59 changes: 59 additions & 0 deletions tests/newtests/manage_rocoto.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#! /bin/bash

source error_handling.sh

case ${MACHINE_ID} in
wcoss2|acorn)
ROCOTO_SCHEDULER="pbs"
;;
gaeac5)
module use /ncrc/proj/epic/rocoto/modulefiles
module load rocoto
ROCOTO_SCHEDULER="slurm"
;;
gaeac6)
module use /ncrc/proj/epic/c6/modulefiles
module load rocoto/1.3.7
ROCOTO_SCHEDULER="slurm"
;;
hera)
module load rocoto
ROCOTO_SCHEDULER=slurm
;;
orion)
module load contrib ruby/3.2.3 rocoto/1.3.7
ROCOTO_SCHEDULER="slurm"
;;
hercules)
module load contrib rocoto
ROCOTO_SCHEDULER="slurm"
;;
jet)
module load rocoto
ROCOTO_SCHEDULER="slurm"
;;
s4)
module load rocoto/1.3.2
ROCOTO_SCHEDULER=slurm
;;
derecho)
module use /glade/work/epicufsrt/contrib/derecho/rocoto/modulefiles
module load rocoto
ROCOTO_SCHEDULER="pbspro"
;;
stampede)
die "Machine does not support Rocoto."
;;
expanse)
die "Machine does not support Rocoto."
;;
noaacloud)
export PATH="/contrib/EPIC/bin:${PATH}"
module use /apps/modules/modulefiles
module load rocoto/1.3.7
ROCOTO_SCHEDULER=slurm
;;
*)
die "Unknown machine ID, please edit detect_machine.sh file"
;;
esac
Loading

0 comments on commit d54f8fe

Please sign in to comment.