Skip to content

Commit

Permalink
ice_dyn_vp: use global_{norm,dot_product} for bit-for-bit output repr…
Browse files Browse the repository at this point in the history
…oducibility

Make the results of the VP solver reproducible if desired by refactoring
the code to use the subroutines 'global_norm' and 'global_dot_product'
added in the previous commit.

The same pattern appears in the FGMRES solver (subroutine 'fgmres'), the
preconditioner 'pgmres' which uses the same algorithm, and the
Classical and Modified Gram-Schmidt algorithms in 'orthogonalize'.

These modifications do not change the number of global sums in the
fgmres, pgmres and the MGS algorithm. For the CGS algorithm, there is
(in theory) a slight performance impact as 'global_dot_product' is
called inside the loop, whereas previously we called
'global_allreduce_sum' after the loop to compute all 'initer' sums at
the same time.

To keep that optimization, we would have to implement a new interface
'global_allreduce_sum' which would take an array of shape
(nx_block,ny_block,max_blocks,k) and sum over their first three
dimensions before performing the global reduction over the k dimension.

We choose to not go that route for now mostly because anyway the CGS
algorithm is (by default) only used for the PGMRES preconditioner, and
so the cost should be relatively low as 'initer' corresponds to
'dim_pgmres' in the namelist, which should be kept low for efficiency
(default 5).

These changes lead to bit-for-bit reproducibility (the decomp_suite
passes) when using 'precond=ident' and 'precond=diag' along with
'bfbflag=reprosum'. 'precond=pgmres' is still not bit-for-bit because
some halo updates are skipped for efficiency. This will be addressed in
a following commit.

[1] CICE-Consortium#491 (comment)
  • Loading branch information
phil-blain committed Oct 17, 2022
1 parent 58bd7c4 commit 02eeb42
Showing 1 changed file with 39 additions and 101 deletions.
140 changes: 39 additions & 101 deletions cicecore/cicedynB/dynamics/ice_dyn_vp.F90
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ module ice_dyn_vp
seabed_stress, Ktens, stack_fields, unstack_fields
use ice_fileunits, only: nu_diag
use ice_flux, only: fmU
use ice_global_reductions, only: global_sum, global_allreduce_sum
use ice_global_reductions, only: global_sum
use ice_grid, only: dxT, dyT, dxhy, dyhx, cxp, cyp, cxm, cym, uarear
use ice_exit, only: abort_ice
use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted
Expand Down Expand Up @@ -2925,18 +2925,10 @@ subroutine fgmres (zetax2 , etax2 , &
arnoldi_basis_y = c0

! solution is zero if RHS is zero
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellu (iblk), &
indxui (:,iblk), indxuj(:,iblk), &
bx(:,:,iblk) , &
by(:,:,iblk) , &
norm_squared(iblk))

enddo
!$OMP END PARALLEL DO
norm_rhs = sqrt(global_sum(sum(norm_squared), distrb_info))
norm_rhs = global_norm(nx_block, ny_block, &
icellU , &
indxUi , indxUj , &
bx , by )
if (norm_rhs == c0) then
solx = bx
soly = by
Expand Down Expand Up @@ -2974,18 +2966,11 @@ subroutine fgmres (zetax2 , etax2 , &
! Start outer (restarts) loop
do
! Compute norm of initial residual
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellU (iblk), &
indxUi (:,iblk), indxUj(:,iblk), &
arnoldi_basis_x(:,:,iblk, 1) , &
arnoldi_basis_y(:,:,iblk, 1) , &
norm_squared(iblk))

enddo
!$OMP END PARALLEL DO
norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
norm_residual = global_norm(nx_block, ny_block, &
icellU , &
indxUi , indxUj , &
arnoldi_basis_x(:,:,:, 1), &
arnoldi_basis_y(:,:,:, 1))

if (my_task == master_task .and. monitor_fgmres) then
write(nu_diag, '(a,i4,a,d26.16)') "monitor_fgmres: iter_fgmres= ", nbiter, &
Expand Down Expand Up @@ -3079,17 +3064,11 @@ subroutine fgmres (zetax2 , etax2 , &
hessenberg)

! Compute norm of new Arnoldi vector and update Hessenberg matrix
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellU (iblk), &
indxUi (:,iblk), indxUj(:, iblk) , &
arnoldi_basis_x(:,:,iblk, nextit), &
arnoldi_basis_y(:,:,iblk, nextit), &
norm_squared(iblk))
enddo
!$OMP END PARALLEL DO
hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
hessenberg(nextit,initer) = global_norm(nx_block, ny_block, &
icellU , &
indxUi , indxUj , &
arnoldi_basis_x(:,:,:, nextit), &
arnoldi_basis_y(:,:,:, nextit))

! Watch out for happy breakdown
if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
Expand Down Expand Up @@ -3367,18 +3346,11 @@ subroutine pgmres (zetax2 , etax2 , &
! Start outer (restarts) loop
do
! Compute norm of initial residual
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellU (iblk), &
indxUi (:,iblk), indxUj(:, iblk), &
arnoldi_basis_x(:,:,iblk, 1), &
arnoldi_basis_y(:,:,iblk, 1), &
norm_squared(iblk))

enddo
!$OMP END PARALLEL DO
norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
norm_residual = global_norm(nx_block, ny_block, &
icellU , &
indxUi , indxUj , &
arnoldi_basis_x(:,:,:, 1), &
arnoldi_basis_y(:,:,:, 1))

if (my_task == master_task .and. monitor_pgmres) then
write(nu_diag, '(a,i4,a,d26.16)') "monitor_pgmres: iter_pgmres= ", nbiter, &
Expand Down Expand Up @@ -3461,17 +3433,11 @@ subroutine pgmres (zetax2 , etax2 , &
hessenberg)

! Compute norm of new Arnoldi vector and update Hessenberg matrix
!$OMP PARALLEL DO PRIVATE(iblk)
do iblk = 1, nblocks
call calc_L2norm_squared(nx_block , ny_block , &
icellU (iblk), &
indxUi (:,iblk), indxUj(:, iblk) , &
arnoldi_basis_x(:,:,iblk, nextit), &
arnoldi_basis_y(:,:,iblk, nextit), &
norm_squared(iblk))
enddo
!$OMP END PARALLEL DO
hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
hessenberg(nextit,initer) = global_norm(nx_block, ny_block, &
icellU , &
indxUi , indxUj , &
arnoldi_basis_x(:,:,:, nextit), &
arnoldi_basis_y(:,:,:, nextit))

! Watch out for happy breakdown
if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
Expand Down Expand Up @@ -3750,39 +3716,20 @@ subroutine orthogonalize(ortho_type , initer , &
ij , & ! compressed index
i, j ! grid indices

real (kind=dbl_kind), dimension (max_blocks) :: &
local_dot ! local array value to accumulate dot product of grid function over blocks

real (kind=dbl_kind), dimension(maxinner) :: &
dotprod_local ! local array to accumulate several dot product computations

character(len=*), parameter :: subname = '(orthogonalize)'

if (trim(ortho_type) == 'cgs') then ! Classical Gram-Schmidt
! Classical Gram-Schmidt orthogonalisation process
! First loop of Gram-Schmidt (compute coefficients)
dotprod_local = c0
do it = 1, initer
local_dot = c0

!$OMP PARALLEL DO PRIVATE(iblk,ij,i,j)
do iblk = 1, nblocks
do ij = 1, icellU(iblk)
i = indxUi(ij, iblk)
j = indxUj(ij, iblk)

local_dot(iblk) = local_dot(iblk) + &
(arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
(arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
enddo ! ij
enddo
!$OMP END PARALLEL DO

dotprod_local(it) = sum(local_dot)
hessenberg(it, initer) = global_dot_product(nx_block, ny_block, &
icellU , &
indxUi , indxUj , &
arnoldi_basis_x(:,:,:, it) , &
arnoldi_basis_y(:,:,:, it) , &
arnoldi_basis_x(:,:,:, nextit), &
arnoldi_basis_y(:,:,:, nextit))
end do

hessenberg(1:initer, initer) = global_allreduce_sum(dotprod_local(1:initer), distrb_info)

! Second loop of Gram-Schmidt (orthonormalize)
do it = 1, initer
!$OMP PARALLEL DO PRIVATE(iblk,ij,i,j)
Expand All @@ -3802,22 +3749,13 @@ subroutine orthogonalize(ortho_type , initer , &
elseif (trim(ortho_type) == 'mgs') then ! Modified Gram-Schmidt
! Modified Gram-Schmidt orthogonalisation process
do it = 1, initer
local_dot = c0

!$OMP PARALLEL DO PRIVATE(iblk,ij,i,j)
do iblk = 1, nblocks
do ij = 1, icellU(iblk)
i = indxUi(ij, iblk)
j = indxUj(ij, iblk)

local_dot(iblk) = local_dot(iblk) + &
(arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
(arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
enddo ! ij
enddo
!$OMP END PARALLEL DO

hessenberg(it,initer) = global_sum(sum(local_dot), distrb_info)
hessenberg(it, initer) = global_dot_product(nx_block, ny_block, &
icellU , &
indxUi , indxUj , &
arnoldi_basis_x(:,:,:, it) , &
arnoldi_basis_y(:,:,:, it) , &
arnoldi_basis_x(:,:,:, nextit), &
arnoldi_basis_y(:,:,:, nextit))

!$OMP PARALLEL DO PRIVATE(iblk,ij,i,j)
do iblk = 1, nblocks
Expand Down

0 comments on commit 02eeb42

Please sign in to comment.