Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Abort model cleanly in ifsinterface and standalone on errors #661

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
262 changes: 205 additions & 57 deletions src/gen_model_setup.F90
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,47 @@ subroutine setup_model(partit)
implicit none
type(t_partit), intent(inout), target :: partit
character(len=MAX_PATH) :: nmlfile
integer fileunit
integer :: fileunit, istat

namelist /clockinit/ timenew, daynew, yearnew

nmlfile ='namelist.config' ! name of general configuration namelist file
open (newunit=fileunit, file=nmlfile)
read (fileunit, NML=modelname)
read (fileunit, NML=timestep)
read (fileunit, NML=clockinit)
read (fileunit, NML=paths)
read (fileunit, NML=restart_log)
read (fileunit, NML=ale_def)
read (fileunit, NML=geometry)
read (fileunit, NML=calendar)
read (fileunit, NML=run_config)
read (fileunit,NML=icebergs)
open (newunit=fileunit, file=nmlfile, status='OLD', iostat=istat)
if (istat /= 0) then
if(partit%mype==0) then
write(*,*) 'ERROR: Could not open namelist file ', trim(nmlfile)
endif
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
endif
read (fileunit, NML=modelname, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'modelname', nmlfile, partit)

read (fileunit, NML=timestep, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'timestep', nmlfile, partit)

read (fileunit, NML=clockinit, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'clockinit', nmlfile, partit)

read (fileunit, NML=paths, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paths', nmlfile, partit)

read (fileunit, NML=restart_log, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'restart_log', nmlfile, partit)

read (fileunit, NML=ale_def, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'ale_def', nmlfile, partit)

read (fileunit, NML=geometry, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'geometry', nmlfile, partit)

read (fileunit, NML=calendar, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'calendar', nmlfile, partit)

read (fileunit, NML=run_config, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'run_config', nmlfile, partit)

read (fileunit, NML=icebergs, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'icebergs', nmlfile, partit)

!!$ read (fileunit, NML=machine)
close (fileunit)
Expand All @@ -56,22 +81,48 @@ subroutine setup_model(partit)
! =================================

nmlfile ='namelist.oce' ! name of ocean namelist file
open (newunit=fileunit, file=nmlfile)
read (fileunit, NML=oce_dyn)
open (newunit=fileunit, file=nmlfile, status='OLD', iostat=istat)
if (istat /= 0) then
if(partit%mype==0) then
write(*,*) 'ERROR: Could not open namelist file ', trim(nmlfile)
endif
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
endif
read (fileunit, NML=oce_dyn, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'oce_dyn', nmlfile, partit)
close (fileunit)

nmlfile ='namelist.tra' ! name of ocean namelist file
open (newunit=fileunit, file=nmlfile)
read (fileunit, NML=tracer_phys)
open (newunit=fileunit, file=nmlfile, status='OLD', iostat=istat)
if (istat /= 0) then
if(partit%mype==0) then
write(*,*) 'ERROR: Could not open namelist file ', trim(nmlfile)
endif
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
endif
read (fileunit, NML=tracer_phys, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'tracer_phys', nmlfile, partit)
close (fileunit)

nmlfile ='namelist.forcing' ! name of forcing namelist file
open (newunit=fileunit, file=nmlfile)
read (fileunit, NML=forcing_exchange_coeff)
read (fileunit, NML=forcing_bulk)
read (fileunit, NML=land_ice)
read (fileunit, NML=age_tracer) !---age-code
close (fileunit)
open (newunit=fileunit, file=nmlfile, status='OLD', iostat=istat)
if (istat /= 0) then
if(partit%mype==0) then
write(*,*) 'ERROR: Could not open namelist file ', trim(nmlfile)
endif
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
endif
read (fileunit, NML=forcing_exchange_coeff, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'forcing_exchange_coeff', nmlfile, partit)

read (fileunit, NML=forcing_bulk, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'forcing_bulk', nmlfile, partit)

read (fileunit, NML=land_ice, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'land_ice', nmlfile, partit)

read (fileunit, NML=age_tracer, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'age_tracer', nmlfile, partit)

! if(use_ice) then
! nmlfile ='namelist.ice' ! name of ice namelist file
Expand All @@ -82,45 +133,122 @@ subroutine setup_model(partit)
! endif

nmlfile ='namelist.io' ! name of forcing namelist file
open (newunit=fileunit, file=nmlfile)
read (fileunit, NML=diag_list)
open (newunit=fileunit, file=nmlfile, status='OLD', iostat=istat)
if (istat /= 0) then
if(partit%mype==0) then
write(*,*) 'ERROR: Could not open namelist file ', trim(nmlfile)
endif
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
endif
read (fileunit, NML=diag_list, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'diag_list', nmlfile, partit)
close (fileunit)

#if defined (__recom)
nmlfile ='namelist.recom' ! name of recom namelist file
open (newunit=fileunit, file=nmlfile)
read (fileunit, NML=pavariables)
read (fileunit, NML=pasinking)
read (fileunit, NML=painitialization_N)
read (fileunit, NML=paArrhenius)
read (fileunit, NML=palimiter_function)
read (fileunit, NML=palight_calculations)
read (fileunit, NML=paphotosynthesis)
read (fileunit, NML=paassimilation)
read (fileunit, NML=pairon_chem)
read (fileunit, NML=pazooplankton)
read (fileunit, NML=pasecondzooplankton)
read (fileunit, NML=pathirdzooplankton)
read (fileunit, NML=pagrazingdetritus)
read (fileunit, NML=paaggregation)
read (fileunit, NML=padin_rho_N)
read (fileunit, NML=padic_rho_C1)
read (fileunit, NML=paphytoplankton_N)
read (fileunit, NML=paphytoplankton_C)
read (fileunit, NML=paphytoplankton_ChlA)
read (fileunit, NML=padetritus_N)
read (fileunit, NML=padetritus_C)
read (fileunit, NML=paheterotrophs)
read (fileunit, NML=paseczooloss)
read (fileunit, NML=pathirdzooloss)
read (fileunit, NML=paco2lim)
read (fileunit, NML=pairon)
read (fileunit, NML=pacalc)
read (fileunit, NML=pabenthos_decay_rate)
read (fileunit, NML=paco2_flux_param)
read (fileunit, NML=paalkalinity_restoring)
read (fileunit, NML=paballasting)
read (fileunit, NML=paciso)
open (newunit=fileunit, file=nmlfile, iostat=istat)
if (istat /= 0) then
if(partit%mype==0) then
write(*,*) 'ERROR: Could not open namelist file ', trim(nmlfile)
endif
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
endif
read (fileunit, NML=pavariables, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pavariables', nmlfile, partit)

read (fileunit, NML=pasinking, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pasinking', nmlfile, partit)

read (fileunit, NML=painitialization_N, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'painitialization_N', nmlfile, partit)

read (fileunit, NML=paArrhenius, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paArrhenius', nmlfile, partit)

read (fileunit, NML=palimiter_function, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'palimiter_function', nmlfile, partit)

read (fileunit, NML=palight_calculations, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'palight_calculations', nmlfile, partit)

read (fileunit, NML=paphotosynthesis, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paphotosynthesis', nmlfile, partit)

read (fileunit, NML=paassimilation, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paassimilation', nmlfile, partit)

read (fileunit, NML=pairon_chem, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pairon_chem', nmlfile, partit)

read (fileunit, NML=pazooplankton, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pazooplankton', nmlfile, partit)

read (fileunit, NML=pasecondzooplankton, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pasecondzooplankton', nmlfile, partit)

read (fileunit, NML=pathirdzooplankton, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pathirdzooplankton', nmlfile, partit)

read (fileunit, NML=pagrazingdetritus, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pagrazingdetritus', nmlfile, partit)

read (fileunit, NML=paaggregation, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paaggregation', nmlfile, partit)

read (fileunit, NML=padin_rho_N, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'padin_rho_N', nmlfile, partit)

read (fileunit, NML=padic_rho_C1, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'padic_rho_C1', nmlfile, partit)

read (fileunit, NML=paphytoplankton_N, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paphytoplankton_N', nmlfile, partit)

read (fileunit, NML=paphytoplankton_C, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paphytoplankton_C', nmlfile, partit)

read (fileunit, NML=paphytoplankton_ChlA, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paphytoplankton_ChlA', nmlfile, partit)

read (fileunit, NML=padetritus_N, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'padetritus_N', nmlfile, partit)

read (fileunit, NML=padetritus_C, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'padetritus_C', nmlfile, partit)

read (fileunit, NML=paheterotrophs, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paheterotrophs', nmlfile, partit)

read (fileunit, NML=paseczooloss, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paseczooloss', nmlfile, partit)

read (fileunit, NML=pathirdzooloss, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pathirdzooloss', nmlfile, partit)

read (fileunit, NML=paco2lim, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paco2lim', nmlfile, partit)

read (fileunit, NML=pairon, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pairon', nmlfile, partit)

read (fileunit, NML=pacalc, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pacalc', nmlfile, partit)

read (fileunit, NML=pabenthos_decay_rate, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'pabenthos_decay_rate', nmlfile, partit)

read (fileunit, NML=paco2_flux_param, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paco2_flux_param', nmlfile, partit)

read (fileunit, NML=paalkalinity_restoring, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paalkalinity_restoring', nmlfile, partit)

read (fileunit, NML=paballasting, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paballasting', nmlfile, partit)

read (fileunit, NML=paciso, iostat=istat)
if (istat /= 0) call check_namelist_read(fileunit, 'paciso', nmlfile, partit)

close (fileunit)
#endif

Expand Down Expand Up @@ -227,3 +355,23 @@ end subroutine get_run_steps


! ==============================================================

subroutine check_namelist_read(fileunit, nml_name, nmlfile, partit)
use MOD_PARTIT
use MOD_PARSUP
use, intrinsic :: iso_fortran_env, only: error_unit
implicit none
integer, intent(in) :: fileunit
character(len=*), intent(in) :: nml_name
character(len=*), intent(in) :: nmlfile
type(t_partit), intent(in) :: partit
character(len=256) :: line

backspace(fileunit)
read(fileunit, fmt='(A)') line
if(partit%mype==0) then
write(error_unit,'(A)') 'ERROR: Could not read namelist '//trim(nml_name)//' from '//trim(nmlfile)
write(error_unit,'(A)') 'Invalid line in namelist: '//trim(line)
endif
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
end subroutine
20 changes: 12 additions & 8 deletions src/gen_modules_partitioning.F90
Original file line number Diff line number Diff line change
Expand Up @@ -107,25 +107,29 @@ subroutine par_ex(COMM, mype, abort) ! finalizes MPI
integer, optional, intent(in) :: abort
integer :: error

! For standalone runs we directly call the MPI_barrier and MPI_finalize

! For standalone runs we
! when there is error par_ex should be called with abort argument to abort abruptly,
! in all other cases model will be finalized here, call the MPI_barrier and MPI_finalize
!---------------------------------------------------------------
!TODO: logic is convoluted here, not defined oasis and model needs to abort doesn't happen using par_ex
#ifndef __oasis
if (present(abort)) then
if (mype==0) write(*,*) 'Run finished unexpectedly!'
call MPI_ABORT(MPI_COMM_WORLD, 1, error)
call MPI_ABORT(COMM, 1, error)
else
! TODO: this is where fesom standalone, ifsinterface etc get to
!1. there no abort actually even when model calls abort, and barrier may hang
!2. when using fesom as lib using finalize is bad here as there may
! TODO: this is where fesom standalone, and ifsinterface get to in case of normal exit.
!1. barrier may hang so have to be careful
!2. when using fesom as interface using finalize is bad here as there may
! be other MPI tasks running in calling library like IFS, better
! better practice in that case would be to free the communicator.
!3. or change all the cases in fesom that call par_ex to properly use abort in all cases other then normal closure.
call MPI_Barrier(COMM, error)
call MPI_Finalize(error)
endif

#else ! standalone
! TODO logic below is also convoluted really not really for standalone
#else !
! TODO logic below is convoluted, COMM that is passed should be used for MPI_ABORT
! changes are easy but need to be tested with coupled configurations
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does that mean changes are were easy to make or would be easy?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It handles core parts and there are other parts like ice modules, blowup that may need more inspection on aborts, clean kind to stop normally vs really abort kind. this can be easy to do once we go through those parts, but i would leave it here for now, going into all those parts to me is another issue. @JanStreffing thanks for confirming communicator in oifs-fesom case, then nothing about that configuration to worry about with these changes.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i want to do one small test with OpenIFS-FESOM2 before I merge.

! From here on the two coupled options
!-------------------------------------
#if defined (__oifs)
Expand Down
20 changes: 14 additions & 6 deletions src/io_meandata.F90
Original file line number Diff line number Diff line change
Expand Up @@ -178,18 +178,25 @@ subroutine ini_mean_io(ice, dynamics, tracers, partit, mesh)
! OPEN and read namelist for I/O
open( unit=nm_io_unit, file='namelist.io', form='formatted', access='sequential', status='old', iostat=iost )
if (iost == 0) then
if (mype==0) WRITE(*,*) ' file : ', 'namelist.io',' open ok'
else
if (mype==0) WRITE(*,*) 'ERROR: --> bad opening file : ', 'namelist.io',' ; iostat=',iost
call par_ex(partit%MPI_COMM_FESOM, partit%mype)
stop
if (mype==0) WRITE(*,*) ' file : ', 'namelist.io',' open ok'
else
if (mype==0) WRITE(*,*) 'ERROR: --> file not found : ', 'namelist.io',' ; iostat=',iost
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
stop
endif

READ(nm_io_unit, nml=nml_general, iostat=iost )
if (iost/=0) then
if (mype==0) WRITE(*,*) 'ERROR: in reading nml_general block in namelist.io, invalid formatting.'
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
endif

allocate(io_list(io_listsize))
READ(nm_io_unit, nml=nml_list, iostat=iost )
close(nm_io_unit )

!___________________________________________________________________________
! TODO: unknown variable found then write clearly in log, saves lot of frustration.
do i=1, io_listsize
if (trim(io_list(i)%id)=='unknown ') then
if (mype==0) write(*,*) 'io_listsize will be changed from ', io_listsize, ' to ', i-1, '!'
Expand Down Expand Up @@ -2236,7 +2243,7 @@ subroutine def_stream_after_dimension_specific(entry, name, description, units,
allocate(data_strategy_nf_float_type :: entry%data_strategy)
else
if (partit%mype==0) write(*,*) 'not supported output accuracy:',accuracy,'for',trim(name)
call par_ex(partit%MPI_COMM_FESOM, partit%mype)
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
stop
endif ! accuracy

Expand Down Expand Up @@ -2270,6 +2277,7 @@ subroutine def_stream_after_dimension_specific(entry, name, description, units,
! write(*,*) 'total elem=', mesh%elem2D, entry_index
else
if(partit%mype == 0) print *,"can not determine if ",trim(name)," is node or elem based"
call par_ex(partit%MPI_COMM_FESOM, partit%mype, 1)
stop
end if

Expand Down
Loading
Loading