Skip to content

Commit

Permalink
Bring in less hard coded values for ecflow_run
Browse files Browse the repository at this point in the history
  • Loading branch information
BrianCurtis-NOAA committed Mar 22, 2024
1 parent 978a86b commit 689a697
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 51 deletions.
62 changes: 31 additions & 31 deletions tests/rt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -522,8 +522,8 @@ cleanup() {
trap '{ echo "rt.sh interrupted"; rt_trap ; }' INT
trap '{ echo "rt.sh quit"; rt_trap ; }' QUIT
trap '{ echo "rt.sh terminated"; rt_trap ; }' TERM
trap '{ echo "rt.sh error on line $LINENO"; cleanup ; }' ERR
trap '{ echo "rt.sh finished"; cleanup ; }' EXIT
trap '{ echo "rt.sh error on line $LINENO"; rt_trap ; }' ERR
trap '{ echo "rt.sh finished"; rt_trap ; }' EXIT

# PATHRT - Path to regression tests directory
PATHRT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd -P )"
Expand All @@ -544,7 +544,7 @@ else
exit 1
fi

source detect_machine.sh # Note: this does not set ACCNR. The "if" block below does.
source detect_machine.sh
source rt_utils.sh
# shellcheck disable=SC1091
source module-setup.sh
Expand Down Expand Up @@ -656,36 +656,12 @@ fi
# Display the machine and account using the format detect_machine.sh used:
echo "Machine: ${MACHINE_ID} | Account: ${ACCNR}"

if [[ ${MACHINE_ID} = wcoss2 ]]; then
if [[ ${MACHINE_ID} = wcoss2 || ${MACHINE_ID} = acorn ]]; then

module load ecflow/5.6.0.13
module load intel/19.1.3.304
module load python/3.8.6

# ECFLOW_START="${ECF_ROOT}/scripts/server_check.sh"
# export ECF_OUTPUTDIR="${PATHRT}/ecf_outputdir"
# export ECF_COMDIR="${PATHRT}/ecf_comdir"
# rm -rf "${ECF_OUTPUTDIR}" "${ECF_COMDIR}"
# mkdir -p "${ECF_OUTPUTDIR}"
# mkdir -p "${ECF_COMDIR}"
export colonifnco=":output" # hack

DISKNM=/lfs/h2/emc/nems/noscrub/emc.nems/RT
QUEUE=dev
COMPILE_QUEUE=dev
ROCOTO_SCHEDULER=pbs
PARTITION=
STMP="/lfs/h2/emc/ptmp"
PTMP="/lfs/h2/emc/ptmp"
SCHEDULER="pbs"

elif [[ ${MACHINE_ID} = acorn ]]; then

module load ecflow/5.6.0.13
module load intel/19.1.3.304
module load python/3.8.6
INPUTDATA_ROOT=${INPUTDATA_ROOT:-${DISKNM}/NEMSfv3gfs/input-data-20221101}
ECF_ROOT=${ECF_ROOT:-}
ECFLOW_START="${ECF_ROOT}/scripts/server_check.sh"
export ECF_OUTPUTDIR="${PATHRT}/ecf_outputdir"
export ECF_COMDIR="${PATHRT}/ecf_comdir"
Expand All @@ -694,7 +670,7 @@ elif [[ ${MACHINE_ID} = acorn ]]; then
mkdir -p "${ECF_COMDIR}"
export colonifnco=":output" # hack

DISKNM=/lfs/h1/emc/nems/noscrub/emc.nems/RT
DISKNM=/lfs/h2/emc/nems/noscrub/emc.nems/RT
QUEUE=dev
COMPILE_QUEUE=dev
ROCOTO_SCHEDULER=pbs
Expand All @@ -703,6 +679,30 @@ elif [[ ${MACHINE_ID} = acorn ]]; then
PTMP="/lfs/h2/emc/ptmp"
SCHEDULER="pbs"

# elif [[ ${MACHINE_ID} = acorn ]]; then

# module load ecflow/5.6.0.13
# module load intel/19.1.3.304
# module load python/3.8.6
# INPUTDATA_ROOT=${INPUTDATA_ROOT:-${DISKNM}/NEMSfv3gfs/input-data-20221101}
# ECF_ROOT=${ECF_ROOT:-}
# ECFLOW_START="${ECF_ROOT}/scripts/server_check.sh"
# export ECF_OUTPUTDIR="${PATHRT}/ecf_outputdir"
# export ECF_COMDIR="${PATHRT}/ecf_comdir"
# rm -rf "${ECF_OUTPUTDIR}" "${ECF_COMDIR}"
# mkdir -p "${ECF_OUTPUTDIR}"
# mkdir -p "${ECF_COMDIR}"
# export colonifnco=":output" # hack

# DISKNM=/lfs/h2/emc/nems/noscrub/emc.nems/RT
# QUEUE=dev
# COMPILE_QUEUE=dev
# ROCOTO_SCHEDULER=pbs
# PARTITION=
# STMP="/lfs/h2/emc/ptmp"
# PTMP="/lfs/h2/emc/ptmp"
# SCHEDULER="pbs"

elif [[ ${MACHINE_ID} = gaea ]]; then

module use /ncrc/proj/epic/rocoto/modulefiles
Expand Down Expand Up @@ -919,7 +919,7 @@ else
fi

# Does this machine support Rocoto?
if [[ -n ${ROCOTO} ]]; then
if [[ ${ROCOTO} == true ]]; then
if [[ ${MACHINE_ID} != wcoss2 && ${MACHINE_ID} != acorn && ${MACHINE_ID} != expanse && ${MACHINE_ID} != stampede ]]; then
ROCOTORUN="$(command -v rocotorun)"
export ROCOTORUN
Expand All @@ -933,7 +933,7 @@ if [[ -n ${ROCOTO} ]]; then
fi

# Does this machine support ecflow?
if [[ -n ${ECFLOW} ]]; then
if [[ ${ECFLOW} == true ]]; then
if [[ ${MACHINE_ID} == wcoss2 && ${MACHINE_ID} == acorn ]]; then
ECFLOW_START="$(command -v server_check.sh)"

Expand Down
58 changes: 38 additions & 20 deletions tests/rt_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -763,39 +763,57 @@ ecflow_run() {

ECF_HOST="${ECF_HOST:-${HOSTNAME}}"

set +e
#set +e
# Make sure ECF_HOST and ECF_PORT are set/ready on systems that have an
# explicit ecflow node
if [[ ${MACHINE_ID} == wcoss2 || ${MACHINE_ID} == acorn ]]; then
readarray -t ECFHOSTLIST < "${ECF_HOSTFILE}"
for ECF_HOST in "${ECFHOSTLIST[@]}"
do
if ssh -t -t "${ECF_HOST}"; then
export ECF_HOST
break
else
ECF_HOST=''
fi
done
elif [[ ${MACHINE_ID} == hera || ${MACHINE_ID} == jet ]]; then
module load ecflow
fi
if [[ -z ${ECF_HOST} || -z ${ECF_PORT} ]]; then
echo "ERROR: ECF_HOST or ECF_PORT are not set, and rt.sh cannot continue with ECFLOW"
exit 1
else
echo "ECF_HOST: ${ECF_HOST}, ECF_PORT: ${ECF_PORT}"
fi

# Start the ecflow_server
ecflow_client --ping --host="${ECF_HOST}" --port="${ECF_PORT}"
not_running=$?
if [[ ${not_running} -eq 1 ]]; then
echo "ecflow_server is NOT running on ${ECF_HOST}:${ECF_PORT}"
if [[ ${MACHINE_ID} == wcoss2 || ${MACHINE_ID} == acorn ]]; then
if [[ "${HOST::1}" == "a" ]]; then
export ECF_HOST=aecflow01
elif [[ "${HOST::1}" == "c" ]]; then
export ECF_HOST=cdecflow01
elif [[ "${HOST::1}" == "d" ]]; then
export ECF_HOST=ddecflow01
fi
#shellcheck disable=SC2029
ssh "${ECF_HOST}" "bash -l -c \"module load ecflow && ecflow_start.sh -p ${ECF_PORT}\""
ssh "${ECF_HOST}" "bash -l -c \"module load ecflow && ${ECFLOW_START} -p ${ECF_PORT}\""
elif [[ ${MACHINE_ID} == hera || ${MACHINE_ID} == jet ]]; then
module load ecflow
echo "On ${MACHINE_ID}, start ecFlow server on dedicated node ${ECF_HOST}"
#shellcheck disable=SC2029
ssh "${ECF_HOST}" "bash -l -c \"module load ecflow && ${ECFLOW_START} -d ${RUNDIR_ROOT}/ecflow_server\""
else
${ECFLOW_START} -p "${ECF_PORT}" -d "${RUNDIR_ROOT}/ecflow_server"
fi

# Try pinging ecflow server now, and erroring out if not there.
ecflow_client --ping --host="${ECF_HOST}" --port="${ECF_PORT}"
not_running=$?
if [[ ${not_running} -eq 1 ]]; then
echo "ERROR: Failure to start ecflow, exiting..."
exit 1
fi
else
echo "ecflow_server is already running on ${ECF_HOST}:${ECF_PORT}"
fi
set -e

#set -e
ECFLOW_RUNNING=true

export ECF_PORT
export ECF_HOST

ecflow_client --load="${ECFLOW_RUN}/${ECFLOW_SUITE}.def"
ecflow_client --begin="${ECFLOW_SUITE}"
ecflow_client --restart
Expand All @@ -819,16 +837,16 @@ ecflow_run() {

ecflow_kill() {
[[ ${ECFLOW_RUNNING:-false} == true ]] || return
set +e
#set +e
ecflow_client --suspend "/${ECFLOW_SUITE}"
ecflow_client --kill "/${ECFLOW_SUITE}"
sleep 20
ecflow_client --delete=force yes"/${ECFLOW_SUITE}"
ecflow_client --delete=force yes "/${ECFLOW_SUITE}"
}

ecflow_stop() {
[[ ${ECFLOW_RUNNING:-false} == true ]] || return
set +e
#set +e
SUITES=$( ecflow_client --get )
SUITES=$( grep "^suite" <<< "${SUITES}" )
#SUITES=$( ecflow_client --get | grep "^suite" )
Expand Down

0 comments on commit 689a697

Please sign in to comment.