Skip to content

Commit

Permalink
Add --cpus-per-task flag to APRUN commands
Browse files Browse the repository at this point in the history
- Update to SLURM on Orion now requires jobs to explicitly
state thread needs in APRUN/srun command at runtime, instead
of exporting to environment and getting it from there.
- Add "--cpus-per-task" flag with thread # to APRUN commands.

Refs NOAA-EMC#2044
  • Loading branch information
KateFriedman-NOAA committed Nov 14, 2023
1 parent 0a0d698 commit 4b492f4
Showing 1 changed file with 20 additions and 20 deletions.
40 changes: 20 additions & 20 deletions env/ORION.env
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ elif [[ "${step}" = "atmanlrun" ]]; then

export NTHREADS_ATMANL=${nth_atmanlrun:-${nth_max}}
[[ ${NTHREADS_ATMANL} -gt ${nth_max} ]] && export NTHREADS_ATMANL=${nth_max}
export APRUN_ATMANL="${launcher} -n ${npe_atmanlrun}"
export APRUN_ATMANL="${launcher} -n ${npe_atmanlrun} --cpus-per-task=${NTHREADS_ATMANL}"

elif [[ "${step}" = "atmensanlrun" ]]; then

nth_max=$((npe_node_max / npe_node_atmensanlrun))

export NTHREADS_ATMENSANL=${nth_atmensanlrun:-${nth_max}}
[[ ${NTHREADS_ATMENSANL} -gt ${nth_max} ]] && export NTHREADS_ATMENSANL=${nth_max}
export APRUN_ATMENSANL="${launcher} -n ${npe_atmensanlrun}"
export APRUN_ATMENSANL="${launcher} -n ${npe_atmensanlrun} --cpus-per-task=${NTHREADS_ATMENSANL}"

elif [[ "${step}" = "aeroanlrun" ]]; then

Expand All @@ -77,15 +77,15 @@ elif [[ "${step}" = "aeroanlrun" ]]; then

export NTHREADS_AEROANL=${nth_aeroanlrun:-${nth_max}}
[[ ${NTHREADS_AEROANL} -gt ${nth_max} ]] && export NTHREADS_AEROANL=${nth_max}
export APRUN_AEROANL="${launcher} -n ${npe_aeroanlrun}"
export APRUN_AEROANL="${launcher} -n ${npe_aeroanlrun} --cpus-per-task=${NTHREADS_AEROANL}"

elif [[ "${step}" = "landanl" ]]; then

nth_max=$((npe_node_max / npe_node_landanl))

export NTHREADS_LANDANL=${nth_landanl:-${nth_max}}
[[ ${NTHREADS_LANDANL} -gt ${nth_max} ]] && export NTHREADS_LANDANL=${nth_max}
export APRUN_LANDANL="${launcher} -n ${npe_landanl}"
export APRUN_LANDANL="${launcher} -n ${npe_landanl} --cpus-per-task=${NTHREADS_LANDANL}"

export APRUN_APPLY_INCR="${launcher} -n 6"

Expand All @@ -97,7 +97,7 @@ elif [[ "${step}" = "ocnanalbmat" ]]; then

export NTHREADS_OCNANAL=${nth_ocnanalbmat:-${nth_max}}
[[ ${NTHREADS_OCNANAL} -gt ${nth_max} ]] && export NTHREADS_OCNANAL=${nth_max}
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalbmat}"
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalbmat} --cpus-per-task=${NTHREADS_OCNANAL}"

elif [[ "${step}" = "ocnanalrun" ]]; then

Expand All @@ -107,7 +107,7 @@ elif [[ "${step}" = "ocnanalrun" ]]; then

export NTHREADS_OCNANAL=${nth_ocnanalrun:-${nth_max}}
[[ ${NTHREADS_OCNANAL} -gt ${nth_max} ]] && export NTHREADS_OCNANAL=${nth_max}
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalrun}"
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalrun} --cpus-per-task=${NTHREADS_OCNANAL}"

elif [[ "${step}" = "ocnanalchkpt" ]]; then

Expand All @@ -117,7 +117,7 @@ elif [[ "${step}" = "ocnanalchkpt" ]]; then

export NTHREADS_OCNANAL=${nth_ocnanalchkpt:-${nth_max}}
[[ ${NTHREADS_OCNANAL} -gt ${nth_max} ]] && export NTHREADS_OCNANAL=${nth_max}
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalchkpt}"
export APRUN_OCNANAL="${launcher} -n ${npe_ocnanalchkpt} --cpus-per-task=${NTHREADS_OCNANAL}"

elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then

Expand Down Expand Up @@ -153,7 +153,7 @@ elif [[ "${step}" = "sfcanl" ]]; then
export NTHREADS_CYCLE=${nth_sfcanl:-14}
[[ ${NTHREADS_CYCLE} -gt ${npe_node_max} ]] && export NTHREADS_CYCLE=${npe_node_max}
npe_sfcanl=${ntiles:-6}
export APRUN_CYCLE="${launcher} -n ${npe_sfcanl}"
export APRUN_CYCLE="${launcher} -n ${npe_sfcanl} --cpus-per-task=${NTHREADS_CYCLE}"

elif [[ "${step}" = "eobs" ]]; then

Expand All @@ -168,7 +168,7 @@ elif [[ "${step}" = "eobs" ]]; then

export NTHREADS_GSI=${nth_eobs:-${nth_max}}
[[ ${NTHREADS_GSI} -gt ${nth_max} ]] && export NTHREADS_GSI=${nth_max}
export APRUN_GSI="${launcher} -n ${npe_gsi:-${npe_eobs}}"
export APRUN_GSI="${launcher} -n ${npe_gsi:-${npe_eobs}} --cpus-per-task=${NTHREADS_GSI}"

elif [[ "${step}" = "eupd" ]]; then

Expand All @@ -180,7 +180,7 @@ elif [[ "${step}" = "eupd" ]]; then

export NTHREADS_ENKF=${nth_eupd:-${nth_max}}
[[ ${NTHREADS_ENKF} -gt ${nth_max} ]] && export NTHREADS_ENKF=${nth_max}
export APRUN_ENKF="${launcher} -n ${npe_enkf:-${npe_eupd}}"
export APRUN_ENKF="${launcher} -n ${npe_enkf:-${npe_eupd}} --cpus-per-task=${NTHREADS_ENKF}"

elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then

Expand All @@ -204,56 +204,56 @@ elif [[ "${step}" = "upp" ]]; then

export NTHREADS_UPP=${nth_upp:-1}
[[ ${NTHREADS_UPP} -gt ${nth_max} ]] && export NTHREADS_UPP=${nth_max}
export APRUN_UPP="${launcher} -n ${npe_upp}"
export APRUN_UPP="${launcher} -n ${npe_upp} --cpus-per-task=${NTHREADS_UPP}"

elif [[ "${step}" = "post" ]]; then

nth_max=$((npe_node_max / npe_node_post))

export NTHREADS_NP=${nth_np:-1}
[[ ${NTHREADS_NP} -gt ${nth_max} ]] && export NTHREADS_NP=${nth_max}
export APRUN_NP="${launcher} -n ${npe_post}"
export APRUN_NP="${launcher} -n ${npe_post} --cpus-per-task=${NTHREADS_NP}"

export USE_CFP="YES" # Use MPMD for downstream product generation on Orion
export NTHREADS_DWN=${nth_dwn:-1}
[[ ${NTHREADS_DWN} -gt ${nth_max} ]] && export NTHREADS_DWN=${nth_max}
export APRUN_DWN="${launcher} -n ${npe_dwn}"
export APRUN_DWN="${launcher} -n ${npe_dwn} --cpus-per-task=${NTHREADS_DWN}"

elif [[ "${step}" = "ecen" ]]; then

nth_max=$((npe_node_max / npe_node_ecen))

export NTHREADS_ECEN=${nth_ecen:-${nth_max}}
[[ ${NTHREADS_ECEN} -gt ${nth_max} ]] && export NTHREADS_ECEN=${nth_max}
export APRUN_ECEN="${launcher} -n ${npe_ecen}"
export APRUN_ECEN="${launcher} -n ${npe_ecen} --cpus-per-task=${NTHREADS_ECEN}"

export NTHREADS_CHGRES=${nth_chgres:-12}
[[ ${NTHREADS_CHGRES} -gt ${npe_node_max} ]] && export NTHREADS_CHGRES=${npe_node_max}
export APRUN_CHGRES="time"

export NTHREADS_CALCINC=${nth_calcinc:-1}
[[ ${NTHREADS_CALCINC} -gt ${nth_max} ]] && export NTHREADS_CALCINC=${nth_max}
export APRUN_CALCINC="${launcher} -n ${npe_ecen}"
export APRUN_CALCINC="${launcher} -n ${npe_ecen} --cpus-per-task=${NTHREADS_CALCINC}"

elif [[ "${step}" = "esfc" ]]; then

nth_max=$((npe_node_max / npe_node_esfc))

export NTHREADS_ESFC=${nth_esfc:-${nth_max}}
[[ ${NTHREADS_ESFC} -gt ${nth_max} ]] && export NTHREADS_ESFC=${nth_max}
export APRUN_ESFC="${launcher} -n ${npe_esfc}"
export APRUN_ESFC="${launcher} -n ${npe_esfc} --cpus-per-task=${NTHREADS_ESFC}"

export NTHREADS_CYCLE=${nth_cycle:-14}
[[ ${NTHREADS_CYCLE} -gt ${npe_node_max} ]] && export NTHREADS_CYCLE=${npe_node_max}
export APRUN_CYCLE="${launcher} -n ${npe_esfc}"
export APRUN_CYCLE="${launcher} -n ${npe_esfc} --cpus-per-task=${NTHREADS_CYCLE}"

elif [[ "${step}" = "epos" ]]; then

nth_max=$((npe_node_max / npe_node_epos))

export NTHREADS_EPOS=${nth_epos:-${nth_max}}
[[ ${NTHREADS_EPOS} -gt ${nth_max} ]] && export NTHREADS_EPOS=${nth_max}
export APRUN_EPOS="${launcher} -n ${npe_epos}"
export APRUN_EPOS="${launcher} -n ${npe_epos} --cpus-per-task=${NTHREADS_EPOS}"

elif [[ "${step}" = "postsnd" ]]; then

Expand All @@ -263,7 +263,7 @@ elif [[ "${step}" = "postsnd" ]]; then

export NTHREADS_POSTSND=${nth_postsnd:-1}
[[ ${NTHREADS_POSTSND} -gt ${nth_max} ]] && export NTHREADS_POSTSND=${nth_max}
export APRUN_POSTSND="${launcher} -n ${npe_postsnd}"
export APRUN_POSTSND="${launcher} -n ${npe_postsnd} --cpus-per-task=${NTHREADS_POSTSND}"

export NTHREADS_POSTSNDCFP=${nth_postsndcfp:-1}
[[ ${NTHREADS_POSTSNDCFP} -gt ${nth_max} ]] && export NTHREADS_POSTSNDCFP=${nth_max}
Expand Down Expand Up @@ -298,6 +298,6 @@ elif [[ "${step}" = "fit2obs" ]]; then

export NTHREADS_FIT2OBS=${nth_fit2obs:-1}
[[ ${NTHREADS_FIT2OBS} -gt ${nth_max} ]] && export NTHREADS_FIT2OBS=${nth_max}
export MPIRUN="${launcher} -n ${npe_fit2obs}"
export MPIRUN="${launcher} -n ${npe_fit2obs} --cpus-per-task=${NTHREADS_FIT2OBS}"

fi

0 comments on commit 4b492f4

Please sign in to comment.