Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' 2024 May 2 version in…
Browse files Browse the repository at this point in the history
…to catchem_Apr
  • Loading branch information
zhanglikate committed May 3, 2024
2 parents 3747d68 + a005244 commit 7fd6d18
Show file tree
Hide file tree
Showing 336 changed files with 4,062 additions and 18,193 deletions.
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,23 +158,26 @@ sorc/ocnicepost.fd
# jobs symlinks
# scripts symlinks
scripts/exemcsfc_global_sfc_prep.sh
scripts/exgdas_global_marine_analysis_ecen.py
scripts/exglobal_prep_ocean_obs.py
# ush symlinks
ush/chgres_cube.sh
ush/emcsfc_ice_blend.sh
ush/emcsfc_snow.sh
ush/exglobal_prep_ocean_obs.py
ush/fix_precip.sh
ush/fv3gfs_driver_grid.sh
ush/fv3gfs_filter_topo.sh
ush/fv3gfs_make_grid.sh
ush/fv3gfs_make_orog.sh
ush/gen_bufr2ioda_json.py
ush/gen_bufr2ioda_yaml.py
ush/global_chgres.sh
ush/global_chgres_driver.sh
ush/global_cycle.sh
ush/global_cycle_driver.sh
ush/jediinc2fv3.py
ush/ufsda
ush/finddate.sh
ush/soca
ush/make_NTC_file.pl
ush/make_ntc_bull.pl
ush/make_tif.sh
Expand Down
162 changes: 87 additions & 75 deletions ci/Jenkinsfile

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions ci/cases/hires/C1152_S2SW.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
experiment:
system: gfs
mode: forecast-only

arguments:
pslot: {{ 'pslot' | getenv }}
app: S2SW
resdetatmos: 1152
resdetocean: 0.25
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
idate: 2019120300
edate: 2019120300
yaml: {{ HOMEgfs }}/ci/cases/yamls/gfs_defaults_ci.yaml
14 changes: 14 additions & 0 deletions ci/cases/hires/C768_S2SW.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
experiment:
system: gfs
mode: forecast-only

arguments:
pslot: {{ 'pslot' | getenv }}
app: S2SW
resdetatmos: 768
resdetocean: 0.25
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
idate: 2019120300
edate: 2019120300
yaml: {{ HOMEgfs }}/ci/cases/yamls/gfs_defaults_ci.yaml
2 changes: 2 additions & 0 deletions ci/cases/pr/C48_S2SWA_gefs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ arguments:
edate: 2021032312
yaml: {{ HOMEgfs }}/ci/cases/yamls/gefs_ci_defaults.yaml

skip_ci_on_hosts:
- wcoss2
2 changes: 1 addition & 1 deletion ci/cases/pr/C48mx500_3DVarAOWCDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ arguments:
yaml: {{ HOMEgfs }}/ci/cases/yamls/soca_gfs_defaults_ci.yaml

skip_ci_on_hosts:
- wcoss2
- orion
- hera
- hercules
2 changes: 2 additions & 0 deletions ci/cases/pr/C96C48_ufs_hybatmDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ skip_ci_on_hosts:
- hera
- orion
- hercules
- wcoss2

1 change: 1 addition & 0 deletions ci/cases/pr/C96_atmaerosnowDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ arguments:
skip_ci_on_hosts:
- orion
- hercules
- wcoss2
4 changes: 2 additions & 2 deletions ci/cases/yamls/build.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
builds:
- gefs: './build_all.sh -k'
- gfs: './build_all.sh -kwgu'
- gefs: './build_all.sh -kw'
- gfs: './build_all.sh -kgu'
8 changes: 8 additions & 0 deletions ci/platforms/config.wcoss2
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/bash

export GFS_CI_ROOT=/lfs/h2/emc/global/noscrub/globalworkflow.ci/GFS_CI_ROOT
export ICSDIR_ROOT=/lfs/h2/emc/global/noscrub/emc.global/data/ICSDIR
export STMP="/lfs/h2/emc/stmp/${USER}"
export SLURM_ACCOUNT=GFS-DEV
export max_concurrent_cases=5
export max_concurrent_pr=4
78 changes: 48 additions & 30 deletions ci/scripts/check_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@ scriptname=$(basename "${BASH_SOURCE[0]}")
echo "Begin ${scriptname} at $(date -u)" || true
export PS4='+ $(basename ${BASH_SOURCE})[${LINENO}]'

GH=${HOME}/bin/gh
REPO_URL="https://github.com/NOAA-EMC/global-workflow.git"
REPO_URL=${REPO_URL:-"git@github.com:NOAA-EMC/global-workflow.git"}

#########################################################################
# Set up runtime environment varibles for accounts on supproted machines
#########################################################################

source "${HOMEgfs}/ush/detect_machine.sh"
case ${MACHINE_ID} in
hera | orion | hercules)
hera | orion | hercules | wcoss2)
echo "Running Automated Testing on ${MACHINE_ID}"
source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}"
;;
Expand All @@ -38,7 +37,18 @@ source "${HOMEgfs}/ci/scripts/utils/ci_utils.sh"
module use "${HOMEgfs}/modulefiles"
module load "module_gwsetup.${MACHINE_ID}"
module list
# Load machine specific modules for ci (only wcoss2 is current)
if [[ "${MACHINE_ID}" == "wcoss2" ]]; then
module load "module_gwci.${MACHINE_ID}"
fi
set -x
if ! command -v gh > /dev/null; then
GH="${HOME}/bin/gh"
else
GH=$(command -v gh)
fi
export GH

rocotostat=$(command -v rocotostat)
if [[ -z ${rocotostat+x} ]]; then
echo "rocotostat not found on system"
Expand All @@ -58,7 +68,7 @@ pr_list_dbfile="${GFS_CI_ROOT}/open_pr_list.db"

pr_list=""
if [[ -f "${pr_list_dbfile}" ]]; then
pr_list=$("${HOMEgfs}/ci/scripts/pr_list_database.py" --dbfile "${pr_list_dbfile}" --display | grep -v Failed | grep Running | awk '{print $1}') || true
pr_list=$("${HOMEgfs}/ci/scripts/utils/pr_list_database.py" --dbfile "${pr_list_dbfile}" --list Open Running) || true
fi
if [[ -z "${pr_list+x}" ]]; then
echo "no PRs open and ready to run cases on .. exiting"
Expand Down Expand Up @@ -90,7 +100,7 @@ for pr in ${pr_list}; do
sed -i "1 i\`\`\`" "${output_ci}"
sed -i "1 i\All CI Test Cases Passed on ${MACHINE_ID^}:" "${output_ci}"
"${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci}"
"${HOMEgfs}/ci/scripts/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}"
"${HOMEgfs}/ci/scripts/utils/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}"
# Check to see if this PR that was opened by the weekly tests and if so close it if it passed on all platforms
weekly_labels=$(${GH} pr view "${pr}" --repo "${REPO_URL}" --json headRefName,labels,author --jq 'select(.author.login | contains("emcbot")) | select(.headRefName | contains("weekly_ci")) | .labels[].name ') || true
if [[ -n "${weekly_labels}" ]]; then
Expand Down Expand Up @@ -123,31 +133,40 @@ for pr in ${pr_list}; do
if [[ ! -f "${db}" ]]; then
continue
fi
rocoto_stat_output=$("${rocotostat}" -w "${xml}" -d "${db}" -s | grep -v CYCLE) || true
num_cycles=$(echo "${rocoto_stat_output}" | wc -l) || true
num_done=$(echo "${rocoto_stat_output}" | grep -c Done) || true
# num_succeeded=$("${rocotostat}" -w "${xml}" -d "${db}" -a | grep -c SUCCEEDED) || true
echo "${pslot} Total Cycles: ${num_cycles} number done: ${num_done}" || true
num_failed=$("${rocotostat}" -w "${xml}" -d "${db}" -a | grep -c -E 'FAIL|DEAD') || true
if [[ ${num_failed} -ne 0 ]]; then
"${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Failed"
error_logs=$("${rocotostat}" -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs "${rocotocheck}" -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true
{
echo "Experiment ${pslot} *** FAILED *** on ${MACHINE_ID^}"
echo "Experiment ${pslot} with ${num_failed} tasks failed at $(date +'%D %r')" || true
echo "Error logs:"
echo "${error_logs}"
} >> "${output_ci}"
sed -i "1 i\`\`\`" "${output_ci}"
"${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci}"
"${HOMEgfs}/ci/scripts/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}"
for kill_cases in "${pr_dir}/RUNTESTS/"*; do
pslot=$(basename "${kill_cases}")
cancel_slurm_jobs "${pslot}"
done
break

set +e
rocoto_state="$("${HOMEgfs}/ci/scripts/utils/rocotostat.py" -w "${xml}" -d "${db}")"
rocoto_error=$?
rm -f "${output_ci_single}"
if [[ "${rocoto_error}" -ne 0 ]]; then
"${GH}" pr edit --repo "${REPO_URL}" "${pr}" --remove-label "CI-${MACHINE_ID^}-Running" --add-label "CI-${MACHINE_ID^}-Failed"
if [[ "${rocoto_state}" == "STALLED" ]]; then
# shellcheck disable=SC2312
"${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body "Experiment ${pslot} **${rocoto_state}** on ${MACHINE_ID^} at $(date +'%D %r')"
"${HOMEgfs}/ci/scripts/utils/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}"
cancel_all_batch_jobs "${pr_dir}/RUNTESTS"
exit "${rocoto_error}"
fi
error_logs=$("${rocotostat}" -d "${db}" -w "${xml}" | grep -E 'FAIL|DEAD' | awk '{print "-c", $1, "-t", $2}' | xargs "${rocotocheck}" -d "${db}" -w "${xml}" | grep join | awk '{print $2}') || true
# shellcheck disable=SC2086
${HOMEgfs}/ci/scripts/utils/publish_logs.py --file ${error_logs} --repo "PR_${pr}" > /dev/null
# shellcheck disable=SC2086
gist_url="$("${HOMEgfs}/ci/scripts/utils/publish_logs.py" --file ${error_logs} --gist "PR_${pr}")"
{
echo "Experiment ${pslot} **${rocoto_state}** on ${MACHINE_ID^} at $(date +'%D %r')" || true
echo ""
echo "Error logs:"
echo "\`\`\`"
echo "${error_logs}"
echo "\`\`\`"
echo "Follow link here to view the contents of the above file(s): [(link)](${gist_url})"
} >> "${output_ci_single}"
"${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}"
"${HOMEgfs}/ci/scripts/utils/pr_list_database.py" --remove_pr "${pr}" --dbfile "${pr_list_dbfile}"
cancel_all_batch_jobs "${pr_dir}/RUNTESTS"
exit "${rocoto_error}"
fi
if [[ "${num_done}" -eq "${num_cycles}" ]]; then
if [[ "${rocoto_state}" == "DONE" ]]; then
#Remove Experment cases that completed successfully
rm -Rf "${pslot_dir}"
rm -Rf "${pr_dir}/RUNTESTS/COMROOT/${pslot}"
Expand All @@ -157,7 +176,6 @@ for pr in ${pr_list}; do
echo "Experiment ${pslot} **SUCCESS** on ${MACHINE_ID^} at ${DATE}" >> "${output_ci_single}"
echo "Experiment ${pslot} *** SUCCESS *** at ${DATE}" >> "${output_ci}"
"${GH}" pr comment "${pr}" --repo "${REPO_URL}" --body-file "${output_ci_single}"

fi
done
done
3 changes: 2 additions & 1 deletion ci/scripts/clone-build_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ set +e
source "${HOMEgfs}/ush/module-setup.sh"
export BUILD_JOBS=8
rm -rf log.build
./build_all.sh -guw >> log.build 2>&1
./build_all.sh -gk >> log.build 2>&1
build_status=$?

DATE=$(date +'%D %r')
Expand All @@ -83,6 +83,7 @@ if [[ ${build_status} != 0 ]]; then
echo "Build: *** FAILED ***"
echo "Build: Failed at ${DATE}"
cat "${PWD}/log.build"
cat "${PWD}/logs/error.logs"
} >> "${outfile}"
exit "${build_status}"
else
Expand Down
Loading

0 comments on commit 7fd6d18

Please sign in to comment.