Skip to content

Commit

Permalink
Merge branch 'ufs-community:develop' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
gspetro-NOAA authored May 22, 2024
2 parents de99fa7 + 28cbbc8 commit 85cbead
Show file tree
Hide file tree
Showing 346 changed files with 10,679 additions and 14,635 deletions.
96 changes: 74 additions & 22 deletions .cicd/Jenkinsfile

Large diffs are not rendered by default.

48 changes: 48 additions & 0 deletions .cicd/scripts/disk_usage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env bash

# Output a CSV report of disk usage on subdirs of some path
# Usage:
# [JOB_NAME=<ci_job>] [BUILD_NUMBER=<n>] [SRW_COMPILER=<intel>] [SRW_PLATFORM=<machine>] disk_usage path depth size outfile.csv
#
# args:
# directory=$1
# depth=$2
# size=$3
# outfile=$4

[[ -n ${WORKSPACE} ]] || WORKSPACE=$(pwd)
[[ -n ${SRW_PLATFORM} ]] || SRW_PLATFORM=$(hostname -s 2>/dev/null) || SRW_PLATFORM=$(hostname 2>/dev/null)
[[ -n ${SRW_COMPILER} ]] || SRW_COMPILER=compiler

script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)"

# Get repository root from Jenkins WORKSPACE variable if set, otherwise, set
# relative to script directory.
declare workspace
if [[ -n "${WORKSPACE}/${SRW_PLATFORM}" ]]; then
workspace="${WORKSPACE}/${SRW_PLATFORM}"
else
workspace="$(cd -- "${script_dir}/../.." && pwd)"
fi

echo "STAGE_NAME=${STAGE_NAME}" # from pipeline
outfile="${4:-${workspace}-${SRW_COMPILER}-disk-usage${STAGE_NAME}.csv}"

function disk_usage() {
local directory=${1:-${PWD}}
local depth=${2:-1}
local size=${3:-k}
echo "Disk usage: ${JOB_NAME:-ci}/${SRW_PLATFORM}/$(basename $directory)"
(
cd $directory || exit 1
echo "Platform,Build,Owner,Group,Inodes,${size:-k}bytes,Access Time,Filename"
du -Px -d ${depth:-1} --inode --exclude='./workspace' | \
while read line ; do
arr=($line); inode=${arr[0]}; filename=${arr[1]};
echo "${SRW_PLATFORM}-${SRW_COMPILER:-compiler},${JOB_NAME:-ci}/${BUILD_NUMBER:-0},$(stat -c '%U,%G' $filename),${inode:-0},$(du -Px -s -${size:-k} --time $filename)" | tr '\t' ',' ;
done | sort -t, -k5 -n #-r
)
echo ""
}

disk_usage $1 $2 $3 | tee ${outfile}
2 changes: 1 addition & 1 deletion .cicd/scripts/sbatch_srw_ftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#SBATCH --account=${SRW_PROJECT}
#SBATCH --qos=batch
#SBATCH --nodes=1
#SBATCH --tasks-per-node=24
#SBATCH --tasks-per-node=12
#SBATCH --cpus-per-task=1
#SBATCH -t 00:30:00
#SBATCH -o log_wrap.%j.log
Expand Down
3 changes: 2 additions & 1 deletion .cicd/scripts/srw_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ fi
# Build and install
cd ${workspace}/tests
set +e
./build.sh ${platform} ${SRW_COMPILER}
/usr/bin/time -p -f '{\n "cpu": "%P"\n, "memMax": "%M"\n, "mem": {"text": "%X", "data": "%D", "swaps": "%W", "context": "%c", "waits": "%w"}\n, "pagefaults": {"major": "%F", "minor": "%R"}\n, "filesystem": {"inputs": "%I", "outputs": "%O"}\n, "time": {"real": "%e", "user": "%U", "sys": "%S"}\n}' -o ${WORKSPACE}/${SRW_PLATFORM}-${SRW_COMPILER}-time-srw_build.json \
./build.sh ${platform} ${SRW_COMPILER}
build_exit=$?
set -e
cd -
Expand Down
13 changes: 5 additions & 8 deletions .cicd/scripts/srw_ftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ fi
# Test directories
we2e_experiment_base_dir="${workspace}/expt_dirs"
we2e_test_dir="${workspace}/tests/WE2E"
nco_dir="${workspace}/nco_dirs"

pwd

Expand Down Expand Up @@ -78,19 +77,17 @@ sed "s|^task_get_extrn_lbcs:|task_get_extrn_lbcs:\n EXTRN_MDL_SOURCE_BASEDIR_LB
# Use staged data for HPSS supported machines
sed 's|^platform:|platform:\n EXTRN_MDL_DATA_STORES: disk|g' -i ush/config.yaml

# Set OMP_NUM_THREADS_RUN_FCST to 1 in config.yaml
sed 's|^task_run_fcst:|task_run_fcst:\n OMP_NUM_THREADS_RUN_FCST: 1|1' -i ush/config.yaml

# Activate the workflow environment ...
source etc/lmod-setup.sh ${platform,,}
module use modulefiles
module load build_${platform,,}_${SRW_COMPILER}
module load wflow_${platform,,}

[[ ${FORGIVE_CONDA} == true ]] && set +e +u # Some platforms have incomplete python3 or conda support, but wouldn't necessarily block workflow tests
# Gaea-C5 special case missing jinja2
if [ "${platform}" == "gaea-c5" ]; then
conda activate workflow_tools
else
conda activate srw_app
fi
conda activate srw_app
set -e -u

# Adjust for strict limitation of stack size
Expand All @@ -111,7 +108,7 @@ cp ${workspace}/ush/wrappers/*.sh .
export JOBSdir=${workspace}/jobs
export USHdir=${workspace}/ush
export OMP_NUM_THREADS=1
export nprocs=24
export nprocs=12

[[ -n ${TASKS} ]] || TASKS=(
run_make_grid
Expand Down
38 changes: 38 additions & 0 deletions .cicd/scripts/srw_init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash
#
# A unified init script for the SRW application. This script is expected to
# fetch initial source for the SRW application for all supported platforms.
#
set -e -u -x

script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)"

# Get repository root from Jenkins WORKSPACE variable if set, otherwise, set
# relative to script directory.
declare workspace
if [[ -n "${WORKSPACE}/${SRW_PLATFORM}" ]]; then
workspace="${WORKSPACE}/${SRW_PLATFORM}"
else
workspace="$(cd -- "${script_dir}/../.." && pwd)"
fi

# Normalize Parallel Works cluster platform value.
declare platform
if [[ "${SRW_PLATFORM}" =~ ^(az|g|p)clusternoaa ]]; then
platform='noaacloud'
else
platform="${SRW_PLATFORM}"
fi

# Build and install
cd ${workspace}
set +e
/usr/bin/time -p -f '{\n "cpu": "%P"\n, "memMax": "%M"\n, "mem": {"text": "%X", "data": "%D", "swaps": "%W", "context": "%c", "waits": "%w"}\n, "pagefaults": {"major": "%F", "minor": "%R"}\n, "filesystem": {"inputs": "%I", "outputs": "%O"}\n, "time": {"real": "%e", "user": "%U", "sys": "%S"}\n}' -o ${WORKSPACE}/${SRW_PLATFORM}-${SRW_COMPILER}-time-srw_init.json \
./manage_externals/checkout_externals
init_exit=$?
echo "STAGE_NAME=${STAGE_NAME}"
env | grep = | sort > ${WORKSPACE}/${SRW_PLATFORM}-${SRW_COMPILER}-env.txt
set -e
cd -

exit $init_exit
146 changes: 146 additions & 0 deletions .cicd/scripts/srw_metric.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#!/usr/bin/env bash
#
# The goal of this script is to provide an example of performing Indy-Severe-Weather test run and compare results to reference with
# Skill score index that is calculated by MET Stat-Analysis Tools
#
# Required (these options are set in the Jenkins env):
# WORKSPACE=</full/path/to/ufs-srweather-app>
# SRW_PLATFORM=<supported_platform_host>
# SRW_COMPILER=<intel|gnu>
# SRW_PROJECT=<platform_account>
#
# Optional:
#[[ -n ${SRW_PROJECT} ]] || SRW_PROJECT="no_account"
[[ -n ${FORGIVE_CONDA} ]] || FORGIVE_CONDA=true
set -e -u -x

BUILD_OPT=false
RUN_WE2E_OPT=false
RUN_STAT_ANLY_OPT=false

if [[ $# -eq 0 ]]; then
BUILD_OPT=true
RUN_WE2E_OPT=true
RUN_STAT_ANLY_OPT=true
elif [[ $# -ge 4 ]]; then
echo "Too many arguments, expecting three or less"
exit 1
else
for opt in "$@"; do
case $opt in
build) BUILD_OPT=true ;;
run_we2e) RUN_WE2E_OPT=true ;;
run_stat_anly) RUN_STAT_ANLY_OPT=true ;;
*) echo "Not valid option. Exiting!" ; exit 1 ;;
esac
done
fi

script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)"

# Get repository root from Jenkins WORKSPACE variable if set, otherwise, set
# relative to script directory.
declare workspace
if [[ -d "${WORKSPACE}/${SRW_PLATFORM}" ]]; then
workspace="${WORKSPACE}/${SRW_PLATFORM}"
else
workspace="$(cd -- "${script_dir}/../.." && pwd)"
fi

# Normalize Parallel Works cluster platform value.
declare platform
if [[ "${SRW_PLATFORM}" =~ ^(az|g|p)clusternoaa ]]; then
platform='noaacloud'
else
platform="${SRW_PLATFORM}"
fi

# Test directories
we2e_experiment_base_dir="${we2e_experiment_base_dir:=${workspace}/../expt_dirs/metric_test}"
we2e_test_dir="${we2e_test_dir:=${workspace}/tests/WE2E}"
we2e_test_name="grid_SUBCONUS_Ind_3km_ics_FV3GFS_lbcs_FV3GFS_suite_WoFS_v0"

pwd

# Setup the build environment
declare srw_compiler
srw_compiler=${SRW_COMPILER}
source ${workspace}/etc/lmod-setup.sh ${platform,,}
module use ${workspace}/modulefiles
module load build_${platform,,}_${srw_compiler}

# Build srw
if [[ ${BUILD_OPT} == true ]]; then
cd ${workspace}/tests
./build.sh ${platform,,} ${srw_compiler}
fi
cd ${workspace}

# Activate workflow environment
module load wflow_${platform,,}

[[ ${FORGIVE_CONDA} == true ]] && set +e +u # Some platforms have incomplete python3 or conda support, but would not necessarily block workflow tests
conda activate srw_app
set -e -u

# Run test
declare srw_project
srw_project=${SRW_PROJECT}
if [[ ${RUN_WE2E_OPT} == true ]]; then
[[ -d ${we2e_experiment_base_dir} ]] && rm -rf ${we2e_experiment_base_dir}
cd ${workspace}/tests/WE2E
./run_WE2E_tests.py -t ${we2e_test_name} -m ${platform,,} -a ${srw_project} --expt_basedir "metric_test" --exec_subdir=install_intel/exec -q
fi
cd ${workspace}

# Run skill-score check
if [[ ${RUN_STAT_ANLY_OPT} == true ]]; then
# Clear out data
rm -rf ${workspace}/Indy-Severe-Weather/
# Check if metprd data exists locally otherwise get it from S3
TEST_EXTRN_MDL_SOURCE_BASEDIR=$(grep TEST_EXTRN_MDL_SOURCE_BASEDIR ${workspace}/ush/machine/${SRW_PLATFORM}.yaml | awk '{print $NF}')
if [[ -d $(dirname ${TEST_EXTRN_MDL_SOURCE_BASEDIR})/metprd/point_stat ]] ; then
mkdir -p Indy-Severe-Weather/metprd/point_stat
cp -rp $(dirname ${TEST_EXTRN_MDL_SOURCE_BASEDIR})/metprd/point_stat Indy-Severe-Weather/metprd
elif [[ -f Indy-Severe-Weather.tgz ]]; then
tar xvfz Indy-Severe-Weather.tgz
else
wget https://noaa-ufs-srw-pds.s3.amazonaws.com/sample_cases/release-public-v2.1.0/Indy-Severe-Weather.tgz
tar xvfz Indy-Severe-Weather.tgz
fi
[[ -f ${platform,,}-${srw_compiler}-skill-score.txt ]] && rm ${platform,,}-${srw_compiler}-skill-score.txt
# Skill score index is computed over several terms that are defined in parm/metplus/STATAnalysisConfig_skill_score.
# It is computed by aggregating the output from earlier runs of the Point-Stat and/or Grid-Stat tools over one or more cases.
# In this example, skill score index is a weighted average of 4 skill scores of RMSE statistics for wind speed, dew point temperature,
# temperature, and pressure at lowest level in the atmosphere over 6 hour lead time.
cp ${we2e_experiment_base_dir}/${we2e_test_name}/2019061500/metprd/PointStat/*.stat ${workspace}/Indy-Severe-Weather/metprd/point_stat/
# Remove conda for Orion due to conda env conflicts
if [[ ${platform} =~ "orion" ]]; then
sed -i 's|load("conda")|--load("conda")|g' ${workspace}/modulefiles/tasks/${platform,,}/run_vx.local.lua
fi
# Load met and metplus
module use modulefiles/tasks/${platform,,}
module load run_vx.local
# Reset Orion run_vx.local file
if [[ ${platform} =~ "orion" ]]; then
sed -i 's|--load("conda")|load("conda")|g' ${workspace}/modulefiles/tasks/${platform,,}/run_vx.local.lua
fi
# Run stat_analysis
stat_analysis -config parm/metplus/STATAnalysisConfig_skill_score -lookin ${workspace}/Indy-Severe-Weather/metprd/point_stat -v 2 -out ${platform,,}-${srw_compiler}-skill-score.txt

# check skill-score.txt
cat ${platform,,}-${srw_compiler}-skill-score.txt

# get skill-score (SS_INDEX) and check if it is significantly smaller than 1.0
# A value greater than 1.0 indicates that the forecast model outperforms the reference,
# while a value less than 1.0 indicates that the reference outperforms the forecast.
tmp_string=$( tail -2 ${platform,,}-${srw_compiler}-skill-score.txt | head -1 )
SS_INDEX=$(echo $tmp_string | awk -F " " '{print $NF}')
echo "Skill Score: ${SS_INDEX}"
if [[ ${SS_INDEX} < "0.700" ]]; then
echo "Your Skill Score is way smaller than 1.00, better check before merging"
exit 1
else
echo "Congrats! You pass check!"
fi
fi
91 changes: 0 additions & 91 deletions .cicd/scripts/srw_metric_example.sh

This file was deleted.

Loading

0 comments on commit 85cbead

Please sign in to comment.