From 6b918b17c7d4e39b027ec481b57352954e877b68 Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Wed, 21 Oct 2020 14:26:19 -0400 Subject: [PATCH 1/4] ice_domain: revert changes to 'max_blocks' computation from fcbea1de (#377) In fcbea1d (Added DMI/nuopc driver and fixed ANGLET (#377), 2019-11-22), the computation in 'ice_domain::init_domain_blocks' that estimates 'max_blocks' if the value '-1' is given in the namelist was changed without explanations. The old computation was computing the number of blocks in the X and Y directions, taking any necessary padding into account (by substracting 1 to n[xy]_global, using integer division and adding 1), multiplying them to compute the total number of blocks, and integer-dividing by `nprocs` to estimate the number of blocks per processor. The new computation does a similar computation, but it's unclear what it is computing exactly. Since it uses floating point division and only casts the result to an integer at the end of the computation, it systematically computes a `max_blocks` value that is smaller than the old computation. This leads to a `max_blocks` value that is systematically too small for the cartesian decomposition when `block_size_x(y)` does not divide `nx(y)_global` evenly. Go back to the old computation. Also, adjust the documentation to make it clearer that it's possible that the `max_blocks` value computed by the model might not be appropriate. --- cicecore/cicedynB/infrastructure/ice_domain.F90 | 6 ++---- doc/source/user_guide/ug_implementation.rst | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cicecore/cicedynB/infrastructure/ice_domain.F90 b/cicecore/cicedynB/infrastructure/ice_domain.F90 index f34d1967e..f6142cbd8 100644 --- a/cicecore/cicedynB/infrastructure/ice_domain.F90 +++ b/cicecore/cicedynB/infrastructure/ice_domain.F90 @@ -192,10 +192,8 @@ subroutine init_domain_blocks call broadcast_scalar(add_mpi_barriers, master_task) if (my_task == master_task) then if (max_blocks < 1) then - max_blocks=int( & - ( (dble(nx_global-1)/dble(block_size_x + 1)) * & - (dble(ny_global-1)/dble(block_size_y + 1)) ) & - / dble(nprocs)) + max_blocks=( ((nx_global-1)/block_size_x + 1) * & + ((ny_global-1)/block_size_y + 1) ) / nprocs max_blocks=max(1,max_blocks) write(nu_diag,'(/,a52,i6,/)') & '(ice_domain): max_block < 1: max_block estimated to ',max_blocks diff --git a/doc/source/user_guide/ug_implementation.rst b/doc/source/user_guide/ug_implementation.rst index 88afdcf52..26194b422 100644 --- a/doc/source/user_guide/ug_implementation.rst +++ b/doc/source/user_guide/ug_implementation.rst @@ -163,7 +163,7 @@ information to the log file, and if the block size or max blocks is inconsistent with the task and thread size, the model will abort. The code will also print a warning if the maximum number of blocks is too large. Although this is not fatal, it does use extra memory. If ``max_blocks`` is -set to -1, the code will compute a ``max_blocks`` on the fly. +set to -1, the code will compute a tentative ``max_blocks`` on the fly. A loop at the end of routine *create\_blocks* in module **ice\_blocks.F90** will print the locations for all of the blocks on From 87a44acffb72d1976c9cd06ec033ab129f91b96e Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Wed, 21 Oct 2020 16:35:54 -0400 Subject: [PATCH 2/4] ice_distribution: check 'max_blocks' is enough for all distributions The subroutines 'create_distrb_cart', 'create_distrb_rake' and 'create_distrb_spacecurve', in contrast to the other distribution-creating subroutines in module ice_distribution, do not check if the index they are about to access in the `blockIndex` array of the distribution they are creating is smaller then `max_blocks`. This results in an out-of-bound access when `max_blocks` is too small, and potential segementation faults. Add checks for these three distributions. Additionnally, for the cartesian distribution, compute the required minimum value for `max_blocks`, which can be done easily in this case, abort early, and inform the user of the required minimum value. --- cicecore/shared/ice_distribution.F90 | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/cicecore/shared/ice_distribution.F90 b/cicecore/shared/ice_distribution.F90 index 8c5808820..e95d8d6bd 100644 --- a/cicecore/shared/ice_distribution.F90 +++ b/cicecore/shared/ice_distribution.F90 @@ -575,7 +575,11 @@ function create_distrb_cart(nprocs, workPerBlock) result(newDistrb) nprocsX, &! num of procs in x for global domain nprocsY, &! num of procs in y for global domain numBlocksXPerProc, &! num of blocks per processor in x - numBlocksYPerProc ! num of blocks per processor in y + numBlocksYPerProc, &! num of blocks per processor in y + numBlocksPerProc ! required number of blocks per processor + + character(len=char_len) :: & + numBlocksPerProc_str ! required number of blocks per processor (as string) character(len=*),parameter :: subname='(create_distrb_cart)' @@ -628,6 +632,14 @@ function create_distrb_cart(nprocs, workPerBlock) result(newDistrb) numBlocksXPerProc = (nblocks_x-1)/nprocsX + 1 numBlocksYPerProc = (nblocks_y-1)/nprocsY + 1 + ! Check if max_blocks is too small + numBlocksPerProc = numBlocksXPerProc * numBlocksYPerProc + if (numBlocksPerProc > max_blocks) then + write(numBlocksPerProc_str, '(i2)') numBlocksPerProc + call abort_ice(subname//'ERROR: max_blocks too small (need at least '//trim(numBlocksPerProc_str)//')') + return + endif + do j=1,nprocsY do i=1,nprocsX processor = (j-1)*nprocsX + i ! number the processors @@ -996,6 +1008,10 @@ function create_distrb_rake(nprocs, workPerBlock) result(newDistrb) if (pid > 0) then procTmp(pid) = procTmp(pid) + 1 + if (procTmp(pid) > max_blocks) then + call abort_ice(subname//'ERROR: max_blocks too small') + return + endif newDistrb%blockLocalID (n) = procTmp(pid) newDistrb%blockIndex(pid,procTmp(pid)) = n else @@ -2304,6 +2320,10 @@ function create_distrb_spacecurve(nprocs,work_per_block) if(pid>0) then proc_tmp(pid) = proc_tmp(pid) + 1 + if (proc_tmp(pid) > max_blocks) then + call abort_ice(subname//'ERROR: max_blocks too small') + return + endif dist%blockLocalID(n) = proc_tmp(pid) dist%blockIndex(pid,proc_tmp(pid)) = n else From efa7e28e0bc58f19f0631fbbf4d1f41bec2407fd Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Fri, 23 Oct 2020 14:09:33 -0400 Subject: [PATCH 3/4] cicecore: add 'debug_blocks' namelist variable to debug block decomposition As mentioned in the documentation, subroutines 'ice_blocks::create_blocks' and 'ice_distribution::create_local_block_ids' can print block information to standard out if the local variable `dbug` is modified to ".true.". For convenience, replace these local variables with a namelist variable, 'debug_blocks', and add a 'cice.setup' option to activate this new variable in the namelist. Adjust the documentation accordingly. --- cicecore/cicedynB/infrastructure/ice_blocks.F90 | 9 ++++----- cicecore/cicedynB/infrastructure/ice_domain.F90 | 8 ++++++-- cicecore/shared/ice_distribution.F90 | 8 ++------ configuration/scripts/ice_in | 1 + configuration/scripts/options/set_nml.debugblocks | 1 + doc/source/user_guide/ug_case_settings.rst | 1 + doc/source/user_guide/ug_implementation.rst | 12 +++++++----- 7 files changed, 22 insertions(+), 18 deletions(-) create mode 100644 configuration/scripts/options/set_nml.debugblocks diff --git a/cicecore/cicedynB/infrastructure/ice_blocks.F90 b/cicecore/cicedynB/infrastructure/ice_blocks.F90 index d6a0b2bcc..2768a40c3 100644 --- a/cicecore/cicedynB/infrastructure/ice_blocks.F90 +++ b/cicecore/cicedynB/infrastructure/ice_blocks.F90 @@ -83,6 +83,9 @@ module ice_blocks nblocks_x ,&! tot num blocks in i direction nblocks_y ! tot num blocks in j direction + logical (kind=log_kind), public :: & + debug_blocks ! print verbose block information + !----------------------------------------------------------------------- ! ! module private data @@ -133,8 +136,6 @@ subroutine create_blocks(nx_global, ny_global, ew_boundary_type, & iblock, jblock ,&! block loop indices is, ie, js, je ! temp start, end indices - logical (log_kind) :: dbug - character(len=*), parameter :: subname = '(create_blocks)' !---------------------------------------------------------------------- @@ -311,9 +312,7 @@ subroutine create_blocks(nx_global, ny_global, ew_boundary_type, & end do end do -! dbug = .true. - dbug = .false. - if (dbug) then + if (debug_blocks) then if (my_task == master_task) then write(nu_diag,*) 'block i,j locations' do n = 1, nblocks_tot diff --git a/cicecore/cicedynB/infrastructure/ice_domain.F90 b/cicecore/cicedynB/infrastructure/ice_domain.F90 index f6142cbd8..3f8e34588 100644 --- a/cicecore/cicedynB/infrastructure/ice_domain.F90 +++ b/cicecore/cicedynB/infrastructure/ice_domain.F90 @@ -21,7 +21,7 @@ module ice_domain add_mpi_barriers use ice_broadcast, only: broadcast_scalar, broadcast_array use ice_blocks, only: block, get_block, create_blocks, nghost, & - nblocks_x, nblocks_y, nblocks_tot, nx_block, ny_block + nblocks_x, nblocks_y, nblocks_tot, nx_block, ny_block, debug_blocks use ice_distribution, only: distrb use ice_boundary, only: ice_halo use ice_exit, only: abort_ice @@ -134,7 +134,8 @@ subroutine init_domain_blocks maskhalo_dyn, & maskhalo_remap, & maskhalo_bound, & - add_mpi_barriers + add_mpi_barriers, & + debug_blocks !---------------------------------------------------------------------- ! @@ -153,6 +154,7 @@ subroutine init_domain_blocks maskhalo_remap = .false. ! if true, use masked halos for transport maskhalo_bound = .false. ! if true, use masked halos for bound_state add_mpi_barriers = .false. ! if true, throttle communication + debug_blocks = .false. ! if true, print verbose block information max_blocks = -1 ! max number of blocks per processor block_size_x = -1 ! size of block in first horiz dimension block_size_y = -1 ! size of block in second horiz dimension @@ -190,6 +192,7 @@ subroutine init_domain_blocks call broadcast_scalar(maskhalo_remap, master_task) call broadcast_scalar(maskhalo_bound, master_task) call broadcast_scalar(add_mpi_barriers, master_task) + call broadcast_scalar(debug_blocks, master_task) if (my_task == master_task) then if (max_blocks < 1) then max_blocks=( ((nx_global-1)/block_size_x + 1) * & @@ -266,6 +269,7 @@ subroutine init_domain_blocks write(nu_diag,'(a,l6)') ' maskhalo_remap = ', maskhalo_remap write(nu_diag,'(a,l6)') ' maskhalo_bound = ', maskhalo_bound write(nu_diag,'(a,l6)') ' add_mpi_barriers = ', add_mpi_barriers + write(nu_diag,'(a,l6)') ' debug_blocks = ', debug_blocks write(nu_diag,'(a,2i6)') ' block_size_x,_y = ', block_size_x, block_size_y write(nu_diag,'(a,i6)') ' max_blocks = ', max_blocks write(nu_diag,'(a,i6,/)')' Number of ghost cells = ', nghost diff --git a/cicecore/shared/ice_distribution.F90 b/cicecore/shared/ice_distribution.F90 index e95d8d6bd..6128f28d8 100644 --- a/cicecore/shared/ice_distribution.F90 +++ b/cicecore/shared/ice_distribution.F90 @@ -12,7 +12,7 @@ module ice_distribution use ice_kinds_mod use ice_domain_size, only: max_blocks use ice_communicate, only: my_task, master_task, create_communicator - use ice_blocks, only: nblocks_x, nblocks_y, nblocks_tot + use ice_blocks, only: nblocks_x, nblocks_y, nblocks_tot, debug_blocks use ice_exit, only: abort_ice use ice_fileunits, only: nu_diag @@ -154,8 +154,6 @@ subroutine create_local_block_ids(block_ids, distribution) integer (int_kind) :: & n, bcount ! dummy counters - logical (log_kind) :: dbug - character(len=*),parameter :: subname='(create_local_block_ids)' !----------------------------------------------------------------------- @@ -178,14 +176,12 @@ subroutine create_local_block_ids(block_ids, distribution) ! !----------------------------------------------------------------------- -! dbug = .true. - dbug = .false. if (bcount > 0) then do n=1,size(distribution%blockLocation) if (distribution%blockLocation(n) == my_task+1) then block_ids(distribution%blockLocalID(n)) = n - if (dbug) then + if (debug_blocks) then write(nu_diag,*) subname,'block id, proc, local_block: ', & block_ids(distribution%blockLocalID(n)), & distribution%blockLocation(n), & diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in index 2c9cf948b..4b23fee9b 100644 --- a/configuration/scripts/ice_in +++ b/configuration/scripts/ice_in @@ -243,6 +243,7 @@ maskhalo_remap = .false. maskhalo_bound = .false. add_mpi_barriers = .false. + debug_blocks = .false. / &zbgc_nml diff --git a/configuration/scripts/options/set_nml.debugblocks b/configuration/scripts/options/set_nml.debugblocks new file mode 100644 index 000000000..299dfff66 --- /dev/null +++ b/configuration/scripts/options/set_nml.debugblocks @@ -0,0 +1 @@ +debug_blocks = .true. diff --git a/doc/source/user_guide/ug_case_settings.rst b/doc/source/user_guide/ug_case_settings.rst index d23224ee8..d66925bde 100644 --- a/doc/source/user_guide/ug_case_settings.rst +++ b/doc/source/user_guide/ug_case_settings.rst @@ -258,6 +258,7 @@ domain_nml "``add_mpi_barriers``", "logical", "throttle communication", "``.false.``" "``block_size_x``", "integer", "block size in x direction", "-1" "``block_size_y``", "integer", "block size in y direction", "-1" + "``debug_blocks``", "logical", "add additional print statements to debug the block decomposition", "``.false.``" "``distribution_type``", "``cartesian``", "2D cartesian block distribution method", "``cartesian``" "", "``rake``", "redistribute blocks among neighbors", "" "", "``roundrobin``", "1 block per proc until blocks are used", "" diff --git a/doc/source/user_guide/ug_implementation.rst b/doc/source/user_guide/ug_implementation.rst index 26194b422..7ec6fb4ab 100644 --- a/doc/source/user_guide/ug_implementation.rst +++ b/doc/source/user_guide/ug_implementation.rst @@ -167,14 +167,16 @@ set to -1, the code will compute a tentative ``max_blocks`` on the fly. A loop at the end of routine *create\_blocks* in module **ice\_blocks.F90** will print the locations for all of the blocks on -the global grid if dbug is set to be true. Likewise, a similar loop at +the global grid if the namelist variable ``debug_blocks`` is set to be true. Likewise, a similar loop at the end of routine *create\_local\_block\_ids* in module **ice\_distribution.F90** will print the processor and local block number for each block. With this information, the grid decomposition -into processors and blocks can be ascertained. The dbug flag must be -manually set in the code in each case (independently of the dbug flag in -**ice\_in**), as there may be hundreds or thousands of blocks to print -and this information should be needed only rarely. This information is +into processors and blocks can be ascertained. This ``debug_blocks`` variable +is independent of the ``dbug`` variable in +**ice\_in**, as there may be hundreds or thousands of blocks to print +and this information should be needed only rarely. ``debug_blocks`` +can be set to true using the +``debugblocks`` option with **cice.setup**. This information is much easier to look at using a debugger such as Totalview. There is also an output field that can be activated in `icefields\_nml`, ``f_blkmask``, that prints out the variable ``blkmask`` to the history file and From 0ef319c368b659b5fd987d2eac558a04002051ea Mon Sep 17 00:00:00 2001 From: Philippe Blain Date: Tue, 30 Mar 2021 14:26:01 -0400 Subject: [PATCH 4/4] ice_domain: improve 'max_blocks' computation The value of 'max_blocks' as currently computed can still be too small if the number of procs does not divide the number of blocks evenly. Use the same 'minus one, integer divide, plus one' trick as is done for the number of blocks in the X and Y directions in order to always compute a 'max_blocks' value that is large enough. This estimates the same value for 'max_blocks' as the 'cice_decomp.csh' script computes: @ bx = $nxglob / ${blckx} if ($bx * ${blckx} != $nxglob) @ bx = $bx + 1 @ by = $nyglob / ${blcky} if ($by * ${blcky} != $nyglob) @ by = $by + 1 @ m = ($bx * $by) / ${task} if ($m * ${task} != $bx * $by) @ m = $m + 1 set mxblcks = $m --- cicecore/cicedynB/infrastructure/ice_domain.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cicecore/cicedynB/infrastructure/ice_domain.F90 b/cicecore/cicedynB/infrastructure/ice_domain.F90 index 3f8e34588..f2153db5e 100644 --- a/cicecore/cicedynB/infrastructure/ice_domain.F90 +++ b/cicecore/cicedynB/infrastructure/ice_domain.F90 @@ -196,7 +196,7 @@ subroutine init_domain_blocks if (my_task == master_task) then if (max_blocks < 1) then max_blocks=( ((nx_global-1)/block_size_x + 1) * & - ((ny_global-1)/block_size_y + 1) ) / nprocs + ((ny_global-1)/block_size_y + 1) - 1) / nprocs + 1 max_blocks=max(1,max_blocks) write(nu_diag,'(/,a52,i6,/)') & '(ice_domain): max_block < 1: max_block estimated to ',max_blocks