From d1401ec181793559530a9168dceb436c3ad75422 Mon Sep 17 00:00:00 2001 From: "Chan-Hoo.Jeon-NOAA" <60152248+chan-hoo@users.noreply.github.com> Date: Thu, 4 Apr 2024 08:57:31 -0400 Subject: [PATCH] [develop] Fix failure on warm start option of SRW-AQM (#1065) * Fix failure on the warm start option of SRW-AQM. * Change the sample script config.aqm.yaml for running a warm start. * Change cpreq to cp because it does not work correctly on other machines except for WCOSS2. * Add missing exclusion to .gitignore. --- .gitignore | 1 + parm/wflow/coldstart.yaml | 30 +++---- scripts/exregional_make_ics.sh | 8 +- scripts/exregional_make_lbcs.sh | 2 +- scripts/exsrw_aqm_ics.sh | 78 ++++++++++++------- scripts/exsrw_aqm_lbcs.sh | 8 +- scripts/exsrw_fire_emission.sh | 14 ++-- scripts/exsrw_nexus_emission.sh | 6 +- scripts/exsrw_nexus_post_split.sh | 8 +- ...fig.aqm.community.yaml => config.aqm.yaml} | 10 ++- 10 files changed, 91 insertions(+), 74 deletions(-) rename ush/{config.aqm.community.yaml => config.aqm.yaml} (84%) diff --git a/.gitignore b/.gitignore index 2b362272f6..ed78ca4182 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ lib/ parm/aqm_utils_parm/ parm/nexus_config/ parm/ufs_utils_parm/ +parm/upp_parm/ share/ sorc/*/ tests/WE2E/WE2E_tests_*.yaml diff --git a/parm/wflow/coldstart.yaml b/parm/wflow/coldstart.yaml index 002d7f7b96..ceefe865e6 100644 --- a/parm/wflow/coldstart.yaml +++ b/parm/wflow/coldstart.yaml @@ -186,29 +186,21 @@ metatask_run_ensemble: attrs: task: point_source or_aqm_ics: - and_no_aqm_ics: - not: - taskvalid: - attrs: - task: aqm_ics_ext - not: - taskvalid: - attrs: - task: aqm_ics - and_aqm_atstart: - taskvalid: - attrs: - task: aqm_ics_ext - taskdep: - attrs: - task: aqm_ics_ext - and_aqm_cycled: + not: taskvalid: attrs: task: aqm_ics - taskdep: + taskdep: + attrs: + task: aqm_ics + or_aqm_ics_ext: + not: + taskvalid: attrs: - task: aqm_ics + task: aqm_ics_ext + taskdep: + attrs: + task: aqm_ics_ext or_aqm_lbcs: not: taskvalid: diff --git a/scripts/exregional_make_ics.sh b/scripts/exregional_make_ics.sh index 0fd6b0884d..84d73696eb 100755 --- a/scripts/exregional_make_ics.sh +++ b/scripts/exregional_make_ics.sh @@ -650,10 +650,10 @@ if [ "${CPL_AQM}" = "TRUE" ]; then else data_trans_path="${DATA_SHARE}" fi - cpreq -p out.atm.tile${TILE_RGNL}.nc "${data_trans_path}/${NET}.${cycle}${dot_ensmem}.gfs_data.tile${TILE_RGNL}.halo${NH0}.nc" - cpreq -p out.sfc.tile${TILE_RGNL}.nc "${COMOUT}/${NET}.${cycle}${dot_ensmem}.sfc_data.tile${TILE_RGNL}.halo${NH0}.nc" - cpreq -p gfs_ctrl.nc "${COMOUT}/${NET}.${cycle}${dot_ensmem}.gfs_ctrl.nc" - cpreq -p gfs.bndy.nc "${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile${TILE_RGNL}.f000.nc" + cp -p out.atm.tile${TILE_RGNL}.nc "${data_trans_path}/${NET}.${cycle}${dot_ensmem}.gfs_data.tile${TILE_RGNL}.halo${NH0}.nc" + cp -p out.sfc.tile${TILE_RGNL}.nc "${COMOUT}/${NET}.${cycle}${dot_ensmem}.sfc_data.tile${TILE_RGNL}.halo${NH0}.nc" + cp -p gfs_ctrl.nc "${COMOUT}/${NET}.${cycle}${dot_ensmem}.gfs_ctrl.nc" + cp -p gfs.bndy.nc "${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile${TILE_RGNL}.f000.nc" else mv_vrfy out.atm.tile${TILE_RGNL}.nc ${INPUT_DATA}/${NET}.${cycle}${dot_ensmem}.gfs_data.tile${TILE_RGNL}.halo${NH0}.nc mv_vrfy out.sfc.tile${TILE_RGNL}.nc ${INPUT_DATA}/${NET}.${cycle}${dot_ensmem}.sfc_data.tile${TILE_RGNL}.halo${NH0}.nc diff --git a/scripts/exregional_make_lbcs.sh b/scripts/exregional_make_lbcs.sh index 3a7f586051..ca3f6401cb 100755 --- a/scripts/exregional_make_lbcs.sh +++ b/scripts/exregional_make_lbcs.sh @@ -560,7 +560,7 @@ located in the following directory: fcst_hhh=$(( ${lbc_spec_fhrs} - ${EXTRN_MDL_LBCS_OFFSET_HRS} )) fcst_hhh_FV3LAM=$( printf "%03d" "$fcst_hhh" ) if [ "${CPL_AQM}" = "TRUE" ]; then - cpreq -p gfs.bndy.nc ${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile7.f${fcst_hhh_FV3LAM}.nc + cp -p gfs.bndy.nc ${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile7.f${fcst_hhh_FV3LAM}.nc else mv_vrfy gfs.bndy.nc ${INPUT_DATA}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile7.f${fcst_hhh_FV3LAM}.nc fi diff --git a/scripts/exsrw_aqm_ics.sh b/scripts/exsrw_aqm_ics.sh index 9104374705..efd833b092 100755 --- a/scripts/exsrw_aqm_ics.sh +++ b/scripts/exsrw_aqm_ics.sh @@ -55,31 +55,54 @@ tial or boundary condition files for the FV3 will be generated. # rst_dir="${PREV_CYCLE_DIR}/RESTART" rst_file="fv_tracer.res.tile1.nc" -fv_tracer_file="${rst_dir}/${PDY}.${cyc}0000.${rst_file}" -print_info_msg "Looking for tracer restart file: \"${fv_tracer_file}\"" -if [ ! -r ${fv_tracer_file} ]; then - if [ -r ${rst_dir}/coupler.res ]; then - rst_info=( $( tail -n 1 ${rst_dir}/coupler.res ) ) - # Remove leading zeros from ${rst_info[1]} - month="${rst_info[1]#"${rst_info[1]%%[!0]*}"}" - # Remove leading zeros from ${rst_info[2]} - day="${rst_info[2]#"${rst_info[2]%%[!0]*}"}" - # Format the date without leading zeros - rst_date=$(printf "%04d%02d%02d%02d" ${rst_info[0]} $((10#$month)) $((10#$day)) ${rst_info[3]}) - print_info_msg " - Tracer file not found. Checking available restart date: - requested date: \"${PDY}${cyc}\" - available date: \"${rst_date}\"" - if [ "${rst_date}" = "${PDY}${cyc}" ] ; then - fv_tracer_file="${rst_dir}/${rst_file}" - if [ -r ${fv_tracer_file} ]; then - print_info_msg "Tracer file found: \"${fv_tracer_file}\"" - else - message_txt="FATAL ERROR No suitable tracer restart file ${rst_dir}/${rst_file} found." - err_exit "${message_txt}" - print_err_msg_exit "${message_txt}" - fi +rst_file_with_date="${PDY}.${cyc}0000.${rst_file}" +if [ -e "${rst_dir}/${rst_file_with_date}" ]; then + fv_tracer_file="${rst_dir}/${rst_file_with_date}" +elif [ -e "${rst_dir}/${rst_file}" ]; then + fv_tracer_file="${rst_dir}/${rst_file}" +else + message_txt="Tracer restart file: \"${fv_tracer_file}\" is NOT found" + err_exit "${message_txt}" + print_err_msg_exit "${message_txt}" +fi +print_info_msg "Tracer restart file: \"${fv_tracer_file}\"" + +cplr_file="coupler.res" +cplr_file_with_date="${PDY}.${cyc}0000.${cplr_file}" +if [ -e "${rst_dir}/${cplr_file_with_date}" ]; then + coupler_file="${rst_dir}/${cplr_file_with_date}" +elif [ -e "${rst_dir}/${cplr_file}" ]; then + coupler_file="${rst_dir}/${cplr_file}" +else + message_txt="Coupler file: \"${coupler_file}\" is NOT found" + err_exit "${message_txt}" + print_err_msg_exit "${message_txt}" +fi +print_info_msg "Coupler file: \"${coupler_file}\"" + +if [ -r ${coupler_file} ]; then + rst_info=( $( tail -n 1 ${coupler_file} ) ) + # Remove leading zeros from ${rst_info[1]} + month="${rst_info[1]#"${rst_info[1]%%[!0]*}"}" + # Remove leading zeros from ${rst_info[2]} + day="${rst_info[2]#"${rst_info[2]%%[!0]*}"}" + # Format the date without leading zeros + rst_date=$(printf "%04d%02d%02d%02d" ${rst_info[0]} $((10#$month)) $((10#$day)) ${rst_info[3]}) + if [ "${rst_date}" = "${PDY}${cyc}" ]; then + if [ -r ${fv_tracer_file} ]; then + print_info_msg "Tracer restart file is for ${PDY}${cyc}" + else + message_txt="Tracer restart file \"${fv_tracer_file}\" is NOT readable." + err_exit "${message_txt}" + print_err_msg_exit "${message_txt}" fi + else + message_txt="Tracer restart file is NOT for ${PDY}${cyc}. +Checking available restart date: + requested date: \"${PDY}${cyc}\" + available date: \"${rst_date}\"" + err_exit "${message_txt}" + print_err_msg_exit "${message_txt}" fi fi # @@ -103,7 +126,7 @@ print_info_msg " tracer file: \"${fv_tracer_file}\" FV3 IC file: \"${gfs_ic_fp}\"" -cpreq ${gfs_ic_fp} ${wrk_ic_fp} +cp -p ${gfs_ic_fp} ${wrk_ic_fp} ${USHsrw}/aqm_utils_python/add_aqm_ics.py --fv_tracer_file "${fv_tracer_file}" --wrk_ic_file "${wrk_ic_fp}" export err=$? if [ $err -ne 0 ]; then @@ -122,10 +145,7 @@ fi mv tmp1.nc ${gfs_ic_fn} -cpreq -p ${gfs_ic_fn} ${COMOUT} -cpreq -p "${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.sfc_data.tile${TILE_RGNL}.halo${NH0}.nc" ${COMOUT} -cpreq -p "${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.gfs_ctrl.nc" ${COMOUT} -cpreq -p "${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile${TILE_RGNL}.f000.nc" ${COMOUT} +cp -p ${gfs_ic_fn} ${COMOUT} unset fv_tracer_file unset wrk_ic_file diff --git a/scripts/exsrw_aqm_lbcs.sh b/scripts/exsrw_aqm_lbcs.sh index f6d932962e..93dc119ec2 100755 --- a/scripts/exsrw_aqm_lbcs.sh +++ b/scripts/exsrw_aqm_lbcs.sh @@ -97,7 +97,7 @@ aqm_lbcs_fn_prefix="${NET}.${cycle}${dot_ensmem}.gfs_bndy.tile7.f" for hr in 0 ${LBC_SPEC_FCST_HRS[@]}; do fhr=$( printf "%03d" "${hr}" ) aqm_lbcs_fn="${aqm_lbcs_fn_prefix}${fhr}.nc" - cpreq "${DATA_SHARE}/${aqm_lbcs_fn}" ${DATA} + cp -p "${DATA_SHARE}/${aqm_lbcs_fn}" ${DATA} done if [ "${DO_AQM_CHEM_LBCS}" = "TRUE" ]; then @@ -106,7 +106,7 @@ if [ "${DO_AQM_CHEM_LBCS}" = "TRUE" ]; then chem_lbcs_fp="${FIXaqm}/chemlbc/${chem_lbcs_fn}" if [ -f ${chem_lbcs_fp} ]; then #Copy the boundary condition file to the current location - cpreq ${chem_lbcs_fp} . + cp -p ${chem_lbcs_fp} . else message_txt="The chemical LBC files do not exist: CHEM_BOUNDARY_CONDITION_FILE = \"${chem_lbcs_fp}\"" @@ -125,7 +125,7 @@ if [ "${DO_AQM_CHEM_LBCS}" = "TRUE" ]; then err_exit "${message_txt}" print_err_msg_exit "${message_txt}" fi - cpreq ${aqm_lbcs_fn} "${aqm_lbcs_fn}_chemlbc" + cp -p ${aqm_lbcs_fn} "${aqm_lbcs_fn}_chemlbc" fi done @@ -229,7 +229,7 @@ fi for hr in 0 ${LBC_SPEC_FCST_HRS[@]}; do fhr=$( printf "%03d" "${hr}" ) aqm_lbcs_fn="${aqm_lbcs_fn_prefix}${fhr}.nc" - cpreq -p "${DATA}/${aqm_lbcs_fn}" ${COMOUT} + cp -p "${DATA}/${aqm_lbcs_fn}" ${COMOUT} done # print_info_msg " diff --git a/scripts/exsrw_fire_emission.sh b/scripts/exsrw_fire_emission.sh index 68178016e7..cb44c99d8d 100755 --- a/scripts/exsrw_fire_emission.sh +++ b/scripts/exsrw_fire_emission.sh @@ -70,7 +70,7 @@ aqm_fire_file_fn="${AQM_FIRE_FILE_PREFIX}_${YYYYMMDD}_t${HH}z${AQM_FIRE_FILE_SUF # Check if the fire file exists in the designated directory if [ -e "${COMINfire}/${aqm_fire_file_fn}" ]; then - cpreq "${COMINfire}/${aqm_fire_file_fn}" ${COMOUT} + cp -p "${COMINfire}/${aqm_fire_file_fn}" ${COMOUT} else # Copy raw data for ihr in {0..23}; do @@ -83,16 +83,16 @@ else yyyymmdd_dn_md1="${missing_download_time:0:8}" FILE_13km_md1="RAVE-HrlyEmiss-13km_v*_blend_s${missing_download_time}00000_e${missing_download_time}59590_c*.nc" if [ -s `ls ${COMINfire}/${yyyymmdd_dn}/rave/${FILE_13km}` ] && [ $(stat -c %s `ls ${COMINfire}/${yyyymmdd_dn}/rave/${FILE_13km}`) -gt 4000000 ]; then - cpreq -p ${COMINfire}/${yyyymmdd_dn}/rave/${FILE_13km} ${FILE_curr} + cp -p ${COMINfire}/${yyyymmdd_dn}/rave/${FILE_13km} ${FILE_curr} elif [ -s `ls ${COMINfire}/${yyyymmdd_dn_md1}/rave/${FILE_13km_md1}` ] && [ $(stat -c %s `ls ${COMINfire}/${yyyymmdd_dn_md1}/rave/${FILE_13km_md1}`) -gt 4000000 ]; then echo "WARNING: ${FILE_13km} does not exist or broken. Replacing with the file of previous date ..." - cpreq -p ${COMINfire}/${yyyymmdd_dn_md1}/rave/${FILE_13km_md1} ${FILE_curr} + cp -p ${COMINfire}/${yyyymmdd_dn_md1}/rave/${FILE_13km_md1} ${FILE_curr} else message_txt="WARNING Fire Emission RAW data does not exist or broken: FILE_13km_md1 = \"${FILE_13km_md1}\" DCOMINfire = \"${DCOMINfire}\"" - cpreq -p ${FIXaqm}/fire/Hourly_Emissions_13km_dummy.nc ${FILE_curr} + cp -p ${FIXaqm}/fire/Hourly_Emissions_13km_dummy.nc ${FILE_curr} print_info_msg "WARNING: ${message_txt}. Replacing with the dummy file :: AQM RUN SOFT FAILED." fi done @@ -134,8 +134,8 @@ else print_err_msg_exit "${message_txt}" fi - cpreq Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24.nc Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24_1.nc - cpreq Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24.nc Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24_2.nc + cp -p Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24.nc Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24_1.nc + cp -p Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24.nc Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24_2.nc ncrcat -O -D 2 Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24.nc Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24_1.nc Hourly_Emissions_regrid_NA_13km_${YYYYMMDD}_t${HH}z_h24_2.nc ${aqm_fire_file_fn} export err=$? @@ -155,7 +155,7 @@ else mv temp6.nc ${aqm_fire_file_fn} # Copy the final fire emission file to data share directory - cpreq "${DATA}/${aqm_fire_file_fn}" ${COMOUT} + cp -p "${DATA}/${aqm_fire_file_fn}" ${COMOUT} fi # #----------------------------------------------------------------------- diff --git a/scripts/exsrw_nexus_emission.sh b/scripts/exsrw_nexus_emission.sh index 7edd18ce42..a5769a6483 100755 --- a/scripts/exsrw_nexus_emission.sh +++ b/scripts/exsrw_nexus_emission.sh @@ -103,12 +103,12 @@ fi # #----------------------------------------------------------------------- # -cpreq ${FIXaqm}/nexus/${NEXUS_GRID_FN} ${DATA}/grid_spec.nc +cp -p ${FIXaqm}/nexus/${NEXUS_GRID_FN} ${DATA}/grid_spec.nc if [ "${USE_GFS_SFC}" = "TRUE" ]; then - cpreq ${PARMsrw}/nexus_config/cmaq_gfs_megan/*.rc ${DATA} + cp -p ${PARMsrw}/nexus_config/cmaq_gfs_megan/*.rc ${DATA} else - cpreq ${PARMsrw}/nexus_config/cmaq/*.rc ${DATA} + cp -p ${PARMsrw}/nexus_config/cmaq/*.rc ${DATA} fi # #----------------------------------------------------------------------- diff --git a/scripts/exsrw_nexus_post_split.sh b/scripts/exsrw_nexus_post_split.sh index 3b83dee523..517893b5e5 100755 --- a/scripts/exsrw_nexus_post_split.sh +++ b/scripts/exsrw_nexus_post_split.sh @@ -74,12 +74,12 @@ end_date=`$NDATE +${FCST_LEN_HRS} ${YYYYMMDD}${HH}` # #----------------------------------------------------------------------- # -cpreq ${PARMsrw}/nexus_config/cmaq/HEMCO_sa_Time.rc ${DATA}/HEMCO_sa_Time.rc -cpreq ${FIXaqm}/nexus/${NEXUS_GRID_FN} ${DATA}/grid_spec.nc +cp -p ${PARMsrw}/nexus_config/cmaq/HEMCO_sa_Time.rc ${DATA}/HEMCO_sa_Time.rc +cp -p ${FIXaqm}/nexus/${NEXUS_GRID_FN} ${DATA}/grid_spec.nc if [ "${NUM_SPLIT_NEXUS}" = "01" ]; then nspt="00" - cpreq ${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.NEXUS_Expt_split.${nspt}.nc ${DATA}/NEXUS_Expt_combined.nc + cp -p ${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.NEXUS_Expt_split.${nspt}.nc ${DATA}/NEXUS_Expt_combined.nc else ${USHsrw}/nexus_utils/python/concatenate_nexus_post_split.py "${DATA_SHARE}/${NET}.${cycle}${dot_ensmem}.NEXUS_Expt_split.*.nc" "${DATA}/NEXUS_Expt_combined.nc" export err=$? @@ -110,7 +110,7 @@ fi # #----------------------------------------------------------------------- # -mv ${DATA}/NEXUS_Expt.nc ${COMOUT}/${NET}.${cycle}${dot_ensmem}.NEXUS_Expt.nc +cp -p ${DATA}/NEXUS_Expt.nc ${COMOUT}/${NET}.${cycle}${dot_ensmem}.NEXUS_Expt.nc # # Print message indicating successful completion of script. # diff --git a/ush/config.aqm.community.yaml b/ush/config.aqm.yaml similarity index 84% rename from ush/config.aqm.community.yaml rename to ush/config.aqm.yaml index 2f32d0eac5..2718eafbbf 100644 --- a/ush/config.aqm.community.yaml +++ b/ush/config.aqm.yaml @@ -1,5 +1,5 @@ metadata: - description: config for Online-CMAQ, AQM_NA_13km, community mode + description: config for Online-CMAQ, AQM_NA_13km, warm-start user: RUN_ENVIR: community MACHINE: hera @@ -7,7 +7,7 @@ user: workflow: USE_CRON_TO_RELAUNCH: true CRON_RELAUNCH_INTVL_MNTS: 3 - EXPT_SUBDIR: aqm_community_aqmna13 + EXPT_SUBDIR: aqm_AQMNA13km_warmstart PREDEF_GRID_NAME: AQM_NA_13km CCPP_PHYS_SUITE: FV3_GFS_v16 DATE_FIRST_CYCL: '2023111000' @@ -21,12 +21,16 @@ workflow: DIAG_TABLE_TMPL_FN: diag_table_aqm.FV3_GFS_v16 FIELD_TABLE_TMPL_FN: field_table_aqm.FV3_GFS_v16 DO_REAL_TIME: false + COLDSTART: false # set to true for cold start + WARMSTART_CYCLE_DIR: '/scratch2/NAGAPE/epic/SRW-AQM_DATA/aqm_data/restart/2023111000' nco: + envir_default: test_aqm_warmstart NET_default: aqm + RUN_default: aqm rocoto: tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/aqm_prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml"]|include }}' - task_aqm_ics_ext: +# task_aqm_ics_ext: # uncomment this in case of COLDSTART: true metatask_run_ensemble: task_run_fcst_mem#mem#: walltime: 01:00:00