From 5b0418a9f6d181d668ddebdc2c540566529e4125 Mon Sep 17 00:00:00 2001
From: Tony Craig <apcraig@users.noreply.github.com>
Date: Wed, 5 Apr 2023 13:29:21 -0700
Subject: [PATCH] Update global reduction implementation to improve
 performance, fix VP bug (#824)

* Update global reduction implementation to improve performance, fix VP bug

This was mainly done for situations like VP that need a fast global sum.
The VP global sum is still slightly faster than the one computed in the
infrastructure, so kept that implementation.  Found a bug in the workspace_y
calculation in VP that was fixed.  Also found that the haloupdate call
as part of the precondition step generally improves VP performance, so removed
option to NOT call the haloupdate there.

Separately, fixed a bug in the tripoleT global sum implementation, added
a tripoleT global sum unit test, and resynced ice_exit.F90, ice_reprosum.F90,
and ice_global_reductions.F90 between serial and mpi versions.

- Refactor global sums to improve performance, move if checks outside do loops
- Fix bug in tripoleT global sums, tripole seam masking
- Update VP solver, use local global sum more often
- Update VP solver, fix bug in workspace_y calculation
- Update VP solver, always call haloupdate during precondition
- Refactor ice_exit.F90 and sync serial and mpi versions
- Sync ice_reprosum.F90 between serial and mpi versions
- Update sumchk unit test to handle grids better
- Add tripoleT sumchk test

* Update VP global sum to exclude local implementation with tripole grids
---
 cicecore/cicedyn/analysis/ice_diagnostics.F90 |   6 +-
 cicecore/cicedyn/dynamics/ice_dyn_vp.F90      |  41 ++--
 .../infrastructure/comm/mpi/ice_exit.F90      |  60 ++---
 .../comm/mpi/ice_global_reductions.F90        | 232 ++++++++++++------
 .../infrastructure/comm/mpi/ice_reprosum.F90  |  34 +--
 .../infrastructure/comm/serial/ice_exit.F90   |  69 ++++--
 .../comm/serial/ice_global_reductions.F90     | 232 ++++++++++++------
 cicecore/drivers/unittest/sumchk/sumchk.F90   |  59 +++--
 configuration/scripts/tests/unittest_suite.ts |   1 +
 9 files changed, 466 insertions(+), 268 deletions(-)

diff --git a/cicecore/cicedyn/analysis/ice_diagnostics.F90 b/cicecore/cicedyn/analysis/ice_diagnostics.F90
index 53631b2d4..b14dff4e3 100644
--- a/cicecore/cicedyn/analysis/ice_diagnostics.F90
+++ b/cicecore/cicedyn/analysis/ice_diagnostics.F90
@@ -261,10 +261,8 @@ subroutine runtime_diags (dt)
       !$OMP END PARALLEL DO
       extentn = c0
       extents = c0
-      extentn = global_sum(work1, distrb_info, field_loc_center, &
-                           tarean)
-      extents = global_sum(work1, distrb_info, field_loc_center, &
-                           tareas)
+      extentn = global_sum(work1, distrb_info, field_loc_center, tarean)
+      extents = global_sum(work1, distrb_info, field_loc_center, tareas)
       extentn = extentn * m2_to_km2
       extents = extents * m2_to_km2
 
diff --git a/cicecore/cicedyn/dynamics/ice_dyn_vp.F90 b/cicecore/cicedyn/dynamics/ice_dyn_vp.F90
index 3915004b4..32971c5b6 100644
--- a/cicecore/cicedyn/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedyn/dynamics/ice_dyn_vp.F90
@@ -2502,7 +2502,7 @@ function global_dot_product (nx_block  , ny_block , &
                                    vector2_x , vector2_y) &
                result(dot_product)
 
-      use ice_domain, only: distrb_info
+      use ice_domain, only: distrb_info, ns_boundary_type
       use ice_domain_size, only: max_blocks
       use ice_fileunits, only: bfbflag
 
@@ -2552,8 +2552,14 @@ function global_dot_product (nx_block  , ny_block , &
       enddo
       !$OMP END PARALLEL DO
 
-      ! Use local summation result unless bfbflag is active
-      if (bfbflag == 'off') then
+      ! Use faster local summation result for several bfbflag settings.
+      ! The local implementation sums over each block, sums over local
+      ! blocks, and calls global_sum on a scalar and should be just as accurate as
+      ! bfbflag = 'off', 'lsum8', and 'lsum4' without the extra copies and overhead
+      ! in the more general array global_sum.  But use the array global_sum
+      ! if bfbflag is more strict or for tripole grids (requires special masking)
+      if (ns_boundary_type /= 'tripole' .and. ns_boundary_type /= 'tripoleT' .and. &
+         (bfbflag == 'off' .or. bfbflag == 'lsum8' .or. bfbflag == 'lsum4')) then
          dot_product = global_sum(sum(dot), distrb_info)
       else
          dot_product = global_sum(prod, distrb_info, field_loc_NEcorner)
@@ -3120,7 +3126,7 @@ subroutine fgmres (zetax2   , etax2   , &
                   j = indxUj(ij, iblk)
 
                   workspace_x(i, j, iblk) = workspace_x(i, j, iblk) + rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
-                  workspace_y(i, j, iblk) = workspace_x(i, j, iblk) + rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = workspace_y(i, j, iblk) + rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
                enddo ! ij
             enddo
             !$OMP END PARALLEL DO
@@ -3151,7 +3157,6 @@ subroutine pgmres (zetax2   , etax2   , &
 
       use ice_boundary, only: ice_HaloUpdate
       use ice_domain, only: maskhalo_dyn, halo_info
-      use ice_fileunits, only: bfbflag
       use ice_timers, only: ice_timer_start, ice_timer_stop, timer_bound
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
@@ -3343,21 +3348,17 @@ subroutine pgmres (zetax2   , etax2   , &
                               workspace_x , workspace_y)
 
             ! Update workspace with boundary values
-            ! NOTE: skipped for efficiency since this is just a preconditioner
-            ! unless bfbflag is active
-            if (bfbflag /= 'off') then
-               call stack_fields(workspace_x, workspace_y, fld2)
-               call ice_timer_start(timer_bound)
-               if (maskhalo_dyn) then
-                  call ice_HaloUpdate (fld2,               halo_info_mask, &
-                                       field_loc_NEcorner, field_type_vector)
-               else
-                  call ice_HaloUpdate (fld2,               halo_info, &
-                                       field_loc_NEcorner, field_type_vector)
-               endif
-               call ice_timer_stop(timer_bound)
-               call unstack_fields(fld2, workspace_x, workspace_y)
+            call stack_fields(workspace_x, workspace_y, fld2)
+            call ice_timer_start(timer_bound)
+            if (maskhalo_dyn) then
+               call ice_HaloUpdate (fld2,               halo_info_mask, &
+                                    field_loc_NEcorner, field_type_vector)
+            else
+               call ice_HaloUpdate (fld2,               halo_info, &
+                                    field_loc_NEcorner, field_type_vector)
             endif
+            call ice_timer_stop(timer_bound)
+            call unstack_fields(fld2, workspace_x, workspace_y)
 
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
@@ -3528,7 +3529,7 @@ subroutine pgmres (zetax2   , etax2   , &
                   j = indxUj(ij, iblk)
 
                   workspace_x(i, j, iblk) = workspace_x(i, j, iblk) + rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
-                  workspace_y(i, j, iblk) = workspace_x(i, j, iblk) + rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = workspace_y(i, j, iblk) + rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
                enddo ! ij
             enddo
             !$OMP END PARALLEL DO
diff --git a/cicecore/cicedyn/infrastructure/comm/mpi/ice_exit.F90 b/cicecore/cicedyn/infrastructure/comm/mpi/ice_exit.F90
index eafb3228f..5351a5336 100644
--- a/cicecore/cicedyn/infrastructure/comm/mpi/ice_exit.F90
+++ b/cicecore/cicedyn/infrastructure/comm/mpi/ice_exit.F90
@@ -1,3 +1,4 @@
+
 !=======================================================================
 !
 ! Exit the model.
@@ -8,7 +9,15 @@
       module ice_exit
 
       use ice_kinds_mod
+      use ice_fileunits, only: nu_diag, ice_stderr, flush_fileunit
       use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted
+#if (defined CESMCOUPLED)
+      use shr_sys_mod
+#else
+#ifndef SERIAL_REMOVE_MPI
+      use mpi   ! MPI Fortran module
+#endif
+#endif
 
       implicit none
       public
@@ -23,14 +32,6 @@ subroutine abort_ice(error_message, file, line, doabort)
 
 !  This routine aborts the ice model and prints an error message.
 
-#if (defined CESMCOUPLED)
-      use ice_fileunits, only: nu_diag, flush_fileunit
-      use shr_sys_mod
-#else
-      use ice_fileunits, only: nu_diag, ice_stderr, flush_fileunit
-      use mpi   ! MPI Fortran module
-#endif
-
       character (len=*), intent(in),optional :: error_message  ! error message
       character (len=*), intent(in),optional :: file           ! file
       integer (kind=int_kind), intent(in), optional :: line    ! line number
@@ -38,11 +39,10 @@ subroutine abort_ice(error_message, file, line, doabort)
 
       ! local variables
 
-#ifndef CESMCOUPLED
       integer (int_kind) :: &
          ierr,       & ! MPI error flag
+         outunit,    & ! output unit
          error_code    ! return code
-#endif
       logical (log_kind) :: ldoabort   ! local doabort flag
       character(len=*), parameter :: subname='(abort_ice)'
 
@@ -50,30 +50,31 @@ subroutine abort_ice(error_message, file, line, doabort)
       if (present(doabort)) ldoabort = doabort
 
 #if (defined CESMCOUPLED)
-      call flush_fileunit(nu_diag)
-      call icepack_warnings_flush(nu_diag)
-      write(nu_diag,*) ' '
-      write(nu_diag,*) subname, 'ABORTED: '
-      if (present(file))   write (nu_diag,*) subname,' called from ',trim(file)
-      if (present(line))   write (nu_diag,*) subname,' line number ',line
-      if (present(error_message)) write (nu_diag,*) subname,' error = ',trim(error_message)
-      call flush_fileunit(nu_diag)
-      if (ldoabort) call shr_sys_abort(subname//trim(error_message))
+      outunit = nu_diag
 #else
+      outunit = ice_stderr
+#endif
+
       call flush_fileunit(nu_diag)
       call icepack_warnings_flush(nu_diag)
-      write(ice_stderr,*) ' '
-      write(ice_stderr,*) subname, 'ABORTED: '
-      if (present(file))   write (ice_stderr,*) subname,' called from ',trim(file)
-      if (present(line))   write (ice_stderr,*) subname,' line number ',line
-      if (present(error_message)) write (ice_stderr,*) subname,' error = ',trim(error_message)
-      call flush_fileunit(ice_stderr)
-      error_code = 128
+      write(outunit,*) ' '
+      write(outunit,*) subname, 'ABORTED: '
+      if (present(file))   write (outunit,*) subname,' called from ',trim(file)
+      if (present(line))   write (outunit,*) subname,' line number ',line
+      if (present(error_message)) write (outunit,*) subname,' error = ',trim(error_message)
+      call flush_fileunit(outunit)
+
       if (ldoabort) then
+#if (defined CESMCOUPLED)
+         call shr_sys_abort(subname//trim(error_message))
+#else
+#ifndef SERIAL_REMOVE_MPI
+         error_code = 128
          call MPI_ABORT(MPI_COMM_WORLD, error_code, ierr)
+#endif
          stop
-      endif
 #endif
+      endif
 
       end subroutine abort_ice
 
@@ -81,12 +82,15 @@ end subroutine abort_ice
 
       subroutine end_run
 
-! Ends run by calling MPI_FINALIZE.
+! Ends run by calling MPI_FINALIZE
+! Does nothing in serial runs
 
       integer (int_kind) :: ierr ! MPI error flag
       character(len=*), parameter :: subname = '(end_run)'
 
+#ifndef SERIAL_REMOVE_MPI
       call MPI_FINALIZE(ierr)
+#endif
 
       end subroutine end_run
 
diff --git a/cicecore/cicedyn/infrastructure/comm/mpi/ice_global_reductions.F90 b/cicecore/cicedyn/infrastructure/comm/mpi/ice_global_reductions.F90
index 4b94389f7..91daf53a8 100644
--- a/cicecore/cicedyn/infrastructure/comm/mpi/ice_global_reductions.F90
+++ b/cicecore/cicedyn/infrastructure/comm/mpi/ice_global_reductions.F90
@@ -181,7 +181,7 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -189,25 +189,45 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) &
 
       n = (iblock-1)*nx_block*ny_block
 
-      do j=jb,je
-      do i=ib,ie
-         n = n + 1
-         ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
-            work(n,1) = 0._dbl_kind
-         else
-            if (present(mMask)) then
-               work(n,1) = array(i,j,iblock)*mMask(i,j,iblock)
-            else if (present(lMask)) then
-               if (lMask(i,j,iblock)) then
-                  work(n,1) = array(i,j,iblock)
-               endif
-            else
+      if (present(mMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = array(i,j,iblock)*mMask(i,j,iblock)
+         end do
+         end do
+      elseif (present(lMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            if (lMask(i,j,iblock)) then
                work(n,1) = array(i,j,iblock)
             endif
-         endif
-      end do
-      end do
+         end do
+         end do
+      else
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = array(i,j,iblock)
+         enddo
+         enddo
+      endif
+
+      if (maxiglob >= 0) then
+         ! eliminate redundant points at je
+         ! set n to (ib,je) index
+         n = (iblock-1)*nx_block*ny_block
+         n = n + (je-1-jb+1)*(ie-ib+1)
+         j=je
+         do i=ib,ie
+            n = n + 1
+            if (this_block%i_glob(i) > maxiglob) then
+               work(n,1) = 0._dbl_kind
+            endif
+         end do
+      endif
+
    end do
 
    call compute_sums_dbl(work,sums,communicator,numProcs)
@@ -317,7 +337,7 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -325,25 +345,45 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) &
 
       n = (iblock-1)*nx_block*ny_block
 
-      do j=jb,je
-      do i=ib,ie
-         n = n + 1
-         ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
-            work(n,1) = 0._dbl_kind
-         else
-            if (present(mMask)) then
-               work(n,1) = real(array(i,j,iblock)*mMask(i,j,iblock),dbl_kind)
-            else if (present(lMask)) then
-               if (lMask(i,j,iblock)) then
-                  work(n,1) = real(array(i,j,iblock),dbl_kind)
-               endif
-            else
+      if (present(mMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = real(array(i,j,iblock)*mMask(i,j,iblock),dbl_kind)
+         end do
+         end do
+      elseif (present(lMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            if (lMask(i,j,iblock)) then
                work(n,1) = real(array(i,j,iblock),dbl_kind)
             endif
-         endif
-      end do
-      end do
+         end do
+         end do
+      else
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = real(array(i,j,iblock),dbl_kind)
+         enddo
+         enddo
+      endif
+
+      if (maxiglob >= 0) then
+         ! eliminate redundant points at je
+         ! set n to (ib,je) index
+         n = (iblock-1)*nx_block*ny_block
+         n = n + (je-1-jb+1)*(ie-ib+1)
+         j=je
+         do i=ib,ie
+            n = n + 1
+            if (this_block%i_glob(i) > maxiglob) then
+               work(n,1) = 0._dbl_kind
+            endif
+         end do
+      endif
+
    end do
 
    call compute_sums_dbl(work,sums,communicator,numProcs)
@@ -445,7 +485,7 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -456,7 +496,7 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) &
       do j=jb,je
       do i=ib,ie
          ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
+         if (maxiglob >= 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
 !            blockSum = blockSum + 0_int_kind
          else
             if (present(mMask)) then
@@ -798,7 +838,7 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -806,25 +846,45 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, &
 
       n = (iblock-1)*nx_block*ny_block
 
-      do j=jb,je
-      do i=ib,ie
-         n = n + 1
-         ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
-            work(n,1) = 0._dbl_kind
-         else
-            if (present(mMask)) then
-               work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock)
-            else if (present(lMask)) then
-               if (lMask(i,j,iblock)) then
-                  work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)
-               endif
-            else
+      if (present(mMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock)
+         end do
+         end do
+      elseif (present(lMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            if (lMask(i,j,iblock)) then
                work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)
             endif
-         endif
-      end do
-      end do
+         end do
+         end do
+      else
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)
+         enddo
+         enddo
+      endif
+
+      if (maxiglob >= 0) then
+         ! eliminate redundant points at je
+         ! set n to (ib,je) index
+         n = (iblock-1)*nx_block*ny_block
+         n = n + (je-1-jb+1)*(ie-ib+1)
+         j=je
+         do i=ib,ie
+            n = n + 1
+            if (this_block%i_glob(i) > maxiglob) then
+               work(n,1) = 0._dbl_kind
+            endif
+         end do
+      endif
+
    end do
 
    call compute_sums_dbl(work,sums,communicator,numProcs)
@@ -936,7 +996,7 @@ function global_sum_prod_real (array1, array2, dist, field_loc, &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -944,25 +1004,45 @@ function global_sum_prod_real (array1, array2, dist, field_loc, &
 
       n = (iblock-1)*nx_block*ny_block
 
-      do j=jb,je
-      do i=ib,ie
-         n = n + 1
-         ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
-            work(n,1) = 0._dbl_kind
-         else
-            if (present(mMask)) then
-               work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock),dbl_kind)
-            else if (present(lMask)) then
-               if (lMask(i,j,iblock)) then
-                  work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind)
-               endif
-            else
+      if (present(mMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock),dbl_kind)
+         end do
+         end do
+      elseif (present(lMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            if (lMask(i,j,iblock)) then
                work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind)
             endif
-         endif
-      end do
-      end do
+         end do
+         end do
+      else
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind)
+         enddo
+         enddo
+      endif
+
+      if (maxiglob >= 0) then
+         ! eliminate redundant points at je
+         ! set n to (ib,je) index
+         n = (iblock-1)*nx_block*ny_block
+         n = n + (je-1-jb+1)*(ie-ib+1)
+         j=je
+         do i=ib,ie
+            n = n + 1
+            if (this_block%i_glob(i) > maxiglob) then
+               work(n,1) = 0._dbl_kind
+            endif
+         end do
+      endif
+
    end do
 
    call compute_sums_dbl(work,sums,communicator,numProcs)
@@ -1066,7 +1146,7 @@ function global_sum_prod_int (array1, array2, dist, field_loc, &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -1077,7 +1157,7 @@ function global_sum_prod_int (array1, array2, dist, field_loc, &
       do j=jb,je
       do i=ib,ie
          ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
+         if (maxiglob >= 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
 !            blockSum = blockSum + 0_int_kind
          else
             if (present(mMask)) then
diff --git a/cicecore/cicedyn/infrastructure/comm/mpi/ice_reprosum.F90 b/cicecore/cicedyn/infrastructure/comm/mpi/ice_reprosum.F90
index 8c6f90363..7c6c0eb77 100644
--- a/cicecore/cicedyn/infrastructure/comm/mpi/ice_reprosum.F90
+++ b/cicecore/cicedyn/infrastructure/comm/mpi/ice_reprosum.F90
@@ -87,7 +87,7 @@ MODULE ice_reprosum
 !-----------------------------------------------------------------------
 
    logical :: repro_sum_use_ddpdd = .false.
-!   logical :: detailed_timing = .false.
+   logical :: detailed_timing = .false.
    character(len=char_len_long) :: tmpstr
 
    CONTAINS
@@ -100,10 +100,10 @@ MODULE ice_reprosum
 !-----------------------------------------------------------------------
 
    subroutine ice_reprosum_setopts(repro_sum_use_ddpdd_in,    &
-                                     repro_sum_rel_diff_max_in, &
-                                     repro_sum_recompute_in,    &
-                                     repro_sum_master,          &
-                                     repro_sum_logunit          )
+                                   repro_sum_rel_diff_max_in, &
+                                   repro_sum_recompute_in,    &
+                                   repro_sum_master,          &
+                                   repro_sum_logunit          )
 
 !------------------------------Arguments--------------------------------
       logical, intent(in), optional :: repro_sum_use_ddpdd_in
@@ -260,12 +260,12 @@ end subroutine ice_reprosum_setopts
 
 !----------------------------------------------------------------------
    subroutine ice_reprosum_calc (arr, arr_gsum, nsummands, dsummands,     &
-                                   nflds, ddpdd_sum,                        &
-                                   arr_gbl_max, arr_gbl_max_out,            &
-                                   arr_max_levels, arr_max_levels_out,      &
-                                   gbl_max_nsummands, gbl_max_nsummands_out,&
-                                   gbl_count, repro_sum_validate,           &
-                                   repro_sum_stats, rel_diff, commid        )
+                                 nflds, ddpdd_sum,                        &
+                                 arr_gbl_max, arr_gbl_max_out,            &
+                                 arr_max_levels, arr_max_levels_out,      &
+                                 gbl_max_nsummands, gbl_max_nsummands_out,&
+                                 gbl_count, repro_sum_validate,           &
+                                 repro_sum_stats, rel_diff, commid        )
 !----------------------------------------------------------------------
 
 ! Arguments
@@ -434,7 +434,7 @@ subroutine ice_reprosum_calc (arr, arr_gsum, nsummands, dsummands,     &
 !         if (detailed_timing) call xicex_timer_start('ice_reprosum_ddpdd')
 
          call ice_reprosum_ddpdd(arr, arr_gsum, nsummands, dsummands, &
-                              nflds, mpi_comm)
+                                 nflds, mpi_comm)
          repro_sum_fast = 1
 
 !         if (detailed_timing) call xicex_timer_stop('ice_reprosum_ddpdd')
@@ -774,9 +774,9 @@ end subroutine ice_reprosum_calc
 !----------------------------------------------------------------------
 
    subroutine ice_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, &
-                                  arr_max_shift, arr_gmax_exp, max_levels,    &
-                                  max_level, validate, recompute,             &
-                                  omp_nthreads, mpi_comm                      )
+                                arr_max_shift, arr_gmax_exp, max_levels,    &
+                                max_level, validate, recompute,             &
+                                omp_nthreads, mpi_comm                      )
 
 !----------------------------------------------------------------------
 
@@ -1224,7 +1224,7 @@ end subroutine ice_reprosum_int
 !----------------------------------------------------------------------
 
    logical function ice_reprosum_tolExceeded (name, nflds, master, &
-                                                logunit, rel_diff    )
+                                              logunit, rel_diff    )
 !----------------------------------------------------------------------
 
 ! Arguments
@@ -1310,7 +1310,7 @@ end function ice_reprosum_tolExceeded
 !----------------------------------------------------------------------
 
    subroutine ice_reprosum_ddpdd (arr, arr_gsum, nsummands, dsummands,  &
-                                    nflds, mpi_comm                       )
+                                  nflds, mpi_comm                       )
 !----------------------------------------------------------------------
 
 ! Arguments
diff --git a/cicecore/cicedyn/infrastructure/comm/serial/ice_exit.F90 b/cicecore/cicedyn/infrastructure/comm/serial/ice_exit.F90
index 2daadc0e6..39f2b6702 100644
--- a/cicecore/cicedyn/infrastructure/comm/serial/ice_exit.F90
+++ b/cicecore/cicedyn/infrastructure/comm/serial/ice_exit.F90
@@ -1,7 +1,9 @@
+
+#define SERIAL_REMOVE_MPI
+
 !=======================================================================
 !
 ! Exit the model.
-!
 ! authors William H. Lipscomb (LANL)
 !         Elizabeth C. Hunke (LANL)
 ! 2006 ECH: separated serial and mpi functionality
@@ -9,10 +11,14 @@
       module ice_exit
 
       use ice_kinds_mod
-      use ice_fileunits, only: nu_diag, flush_fileunit
+      use ice_fileunits, only: nu_diag, ice_stderr, flush_fileunit
       use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted
-#ifdef CESMCOUPLED
+#if (defined CESMCOUPLED)
       use shr_sys_mod
+#else
+#ifndef SERIAL_REMOVE_MPI
+      use mpi   ! MPI Fortran module
+#endif
 #endif
 
       implicit none
@@ -24,7 +30,7 @@ module ice_exit
 
 !=======================================================================
 
-      subroutine abort_ice(error_message,file,line,doabort)
+      subroutine abort_ice(error_message, file, line, doabort)
 
 !  This routine aborts the ice model and prints an error message.
 
@@ -33,30 +39,44 @@ subroutine abort_ice(error_message,file,line,doabort)
       integer (kind=int_kind), intent(in), optional :: line    ! line number
       logical (kind=log_kind), intent(in), optional :: doabort ! abort flag
 
-      logical (kind=log_kind) :: ldoabort   ! local doabort
+      ! local variables
+
+      integer (int_kind) :: &
+         ierr,       & ! MPI error flag
+         outunit,    & ! output unit
+         error_code    ! return code
+      logical (log_kind) :: ldoabort   ! local doabort flag
       character(len=*), parameter :: subname='(abort_ice)'
 
       ldoabort = .true.
       if (present(doabort)) ldoabort = doabort
 
-#ifdef CESMCOUPLED
-      call icepack_warnings_flush(nu_diag)
-      write(nu_diag,*) ' '
-      write(nu_diag,*) subname, 'ABORTED: '
-      if (present(file))   write (nu_diag,*) subname,' called from ',trim(file)
-      if (present(line))   write (nu_diag,*) subname,' line number ',line
-      if (present(error_message)) write (nu_diag,*) subname,' error = ',trim(error_message)
-      if (ldoabort) call shr_sys_abort(subname//trim(error_message))
+#if (defined CESMCOUPLED)
+      outunit = nu_diag
 #else
-      call icepack_warnings_flush(nu_diag)
-      write(nu_diag,*) ' '
-      write(nu_diag,*) subname, 'ABORTED: '
-      if (present(file))   write (nu_diag,*) subname,' called from ',trim(file)
-      if (present(line))   write (nu_diag,*) subname,' line number ',line
-      if (present(error_message)) write (nu_diag,*) subname,' error = ',trim(error_message)
+      outunit = ice_stderr
+#endif
+
       call flush_fileunit(nu_diag)
-      if (ldoabort) stop
+      call icepack_warnings_flush(nu_diag)
+      write(outunit,*) ' '
+      write(outunit,*) subname, 'ABORTED: '
+      if (present(file))   write (outunit,*) subname,' called from ',trim(file)
+      if (present(line))   write (outunit,*) subname,' line number ',line
+      if (present(error_message)) write (outunit,*) subname,' error = ',trim(error_message)
+      call flush_fileunit(outunit)
+
+      if (ldoabort) then
+#if (defined CESMCOUPLED)
+         call shr_sys_abort(subname//trim(error_message))
+#else
+#ifndef SERIAL_REMOVE_MPI
+         error_code = 128
+         call MPI_ABORT(MPI_COMM_WORLD, error_code, ierr)
 #endif
+         stop
+#endif
+      endif
 
       end subroutine abort_ice
 
@@ -64,10 +84,15 @@ end subroutine abort_ice
 
       subroutine end_run
 
+! Ends run by calling MPI_FINALIZE
+! Does nothing in serial runs
+
+      integer (int_kind) :: ierr ! MPI error flag
       character(len=*), parameter :: subname = '(end_run)'
 
-! Ends parallel run by calling MPI_FINALIZE.
-! Does nothing in serial runs.
+#ifndef SERIAL_REMOVE_MPI
+      call MPI_FINALIZE(ierr)
+#endif
 
       end subroutine end_run
 
diff --git a/cicecore/cicedyn/infrastructure/comm/serial/ice_global_reductions.F90 b/cicecore/cicedyn/infrastructure/comm/serial/ice_global_reductions.F90
index 5fcd45876..ed36cc6c0 100644
--- a/cicecore/cicedyn/infrastructure/comm/serial/ice_global_reductions.F90
+++ b/cicecore/cicedyn/infrastructure/comm/serial/ice_global_reductions.F90
@@ -182,7 +182,7 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -190,25 +190,45 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) &
 
       n = (iblock-1)*nx_block*ny_block
 
-      do j=jb,je
-      do i=ib,ie
-         n = n + 1
-         ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
-            work(n,1) = 0._dbl_kind
-         else
-            if (present(mMask)) then
-               work(n,1) = array(i,j,iblock)*mMask(i,j,iblock)
-            else if (present(lMask)) then
-               if (lMask(i,j,iblock)) then
-                  work(n,1) = array(i,j,iblock)
-               endif
-            else
+      if (present(mMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = array(i,j,iblock)*mMask(i,j,iblock)
+         end do
+         end do
+      elseif (present(lMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            if (lMask(i,j,iblock)) then
                work(n,1) = array(i,j,iblock)
             endif
-         endif
-      end do
-      end do
+         end do
+         end do
+      else
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = array(i,j,iblock)
+         enddo
+         enddo
+      endif
+
+      if (maxiglob >= 0) then
+         ! eliminate redundant points at je
+         ! set n to (ib,je) index
+         n = (iblock-1)*nx_block*ny_block
+         n = n + (je-1-jb+1)*(ie-ib+1)
+         j=je
+         do i=ib,ie
+            n = n + 1
+            if (this_block%i_glob(i) > maxiglob) then
+               work(n,1) = 0._dbl_kind
+            endif
+         end do
+      endif
+
    end do
 
    call compute_sums_dbl(work,sums,communicator,numProcs)
@@ -318,7 +338,7 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -326,25 +346,45 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) &
 
       n = (iblock-1)*nx_block*ny_block
 
-      do j=jb,je
-      do i=ib,ie
-         n = n + 1
-         ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
-            work(n,1) = 0._dbl_kind
-         else
-            if (present(mMask)) then
-               work(n,1) = real(array(i,j,iblock)*mMask(i,j,iblock),dbl_kind)
-            else if (present(lMask)) then
-               if (lMask(i,j,iblock)) then
-                  work(n,1) = real(array(i,j,iblock),dbl_kind)
-               endif
-            else
+      if (present(mMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = real(array(i,j,iblock)*mMask(i,j,iblock),dbl_kind)
+         end do
+         end do
+      elseif (present(lMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            if (lMask(i,j,iblock)) then
                work(n,1) = real(array(i,j,iblock),dbl_kind)
             endif
-         endif
-      end do
-      end do
+         end do
+         end do
+      else
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = real(array(i,j,iblock),dbl_kind)
+         enddo
+         enddo
+      endif
+
+      if (maxiglob >= 0) then
+         ! eliminate redundant points at je
+         ! set n to (ib,je) index
+         n = (iblock-1)*nx_block*ny_block
+         n = n + (je-1-jb+1)*(ie-ib+1)
+         j=je
+         do i=ib,ie
+            n = n + 1
+            if (this_block%i_glob(i) > maxiglob) then
+               work(n,1) = 0._dbl_kind
+            endif
+         end do
+      endif
+
    end do
 
    call compute_sums_dbl(work,sums,communicator,numProcs)
@@ -446,7 +486,7 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -457,7 +497,7 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) &
       do j=jb,je
       do i=ib,ie
          ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
+         if (maxiglob >= 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
 !            blockSum = blockSum + 0_int_kind
          else
             if (present(mMask)) then
@@ -799,7 +839,7 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -807,25 +847,45 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, &
 
       n = (iblock-1)*nx_block*ny_block
 
-      do j=jb,je
-      do i=ib,ie
-         n = n + 1
-         ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
-            work(n,1) = 0._dbl_kind
-         else
-            if (present(mMask)) then
-               work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock)
-            else if (present(lMask)) then
-               if (lMask(i,j,iblock)) then
-                  work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)
-               endif
-            else
+      if (present(mMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock)
+         end do
+         end do
+      elseif (present(lMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            if (lMask(i,j,iblock)) then
                work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)
             endif
-         endif
-      end do
-      end do
+         end do
+         end do
+      else
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)
+         enddo
+         enddo
+      endif
+
+      if (maxiglob >= 0) then
+         ! eliminate redundant points at je
+         ! set n to (ib,je) index
+         n = (iblock-1)*nx_block*ny_block
+         n = n + (je-1-jb+1)*(ie-ib+1)
+         j=je
+         do i=ib,ie
+            n = n + 1
+            if (this_block%i_glob(i) > maxiglob) then
+               work(n,1) = 0._dbl_kind
+            endif
+         end do
+      endif
+
    end do
 
    call compute_sums_dbl(work,sums,communicator,numProcs)
@@ -937,7 +997,7 @@ function global_sum_prod_real (array1, array2, dist, field_loc, &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -945,25 +1005,45 @@ function global_sum_prod_real (array1, array2, dist, field_loc, &
 
       n = (iblock-1)*nx_block*ny_block
 
-      do j=jb,je
-      do i=ib,ie
-         n = n + 1
-         ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
-            work(n,1) = 0._dbl_kind
-         else
-            if (present(mMask)) then
-               work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock),dbl_kind)
-            else if (present(lMask)) then
-               if (lMask(i,j,iblock)) then
-                  work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind)
-               endif
-            else
+      if (present(mMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock),dbl_kind)
+         end do
+         end do
+      elseif (present(lMask)) then
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            if (lMask(i,j,iblock)) then
                work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind)
             endif
-         endif
-      end do
-      end do
+         end do
+         end do
+      else
+         do j=jb,je
+         do i=ib,ie
+            n = n + 1
+            work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind)
+         enddo
+         enddo
+      endif
+
+      if (maxiglob >= 0) then
+         ! eliminate redundant points at je
+         ! set n to (ib,je) index
+         n = (iblock-1)*nx_block*ny_block
+         n = n + (je-1-jb+1)*(ie-ib+1)
+         j=je
+         do i=ib,ie
+            n = n + 1
+            if (this_block%i_glob(i) > maxiglob) then
+               work(n,1) = 0._dbl_kind
+            endif
+         end do
+      endif
+
    end do
 
    call compute_sums_dbl(work,sums,communicator,numProcs)
@@ -1067,7 +1147,7 @@ function global_sum_prod_int (array1, array2, dist, field_loc, &
          if (Nrow .and. this_block%tripoleTFlag) then
             maxiglob = 0 ! entire u-row on T-fold grid
          elseif (Nrow .or. this_block%tripoleTFlag) then
-            maxiglob = nx_global/2 ! half T-row on T-fold and u-row on u-fold
+            maxiglob = nx_global/2 ! half T-row on T-fold or half u-row on u-fold
          else
             maxiglob = -1 ! nothing to do for T-row on u-fold
          endif
@@ -1078,7 +1158,7 @@ function global_sum_prod_int (array1, array2, dist, field_loc, &
       do j=jb,je
       do i=ib,ie
          ! eliminate redundant points
-         if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
+         if (maxiglob >= 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then
 !            blockSum = blockSum + 0_int_kind
          else
             if (present(mMask)) then
diff --git a/cicecore/drivers/unittest/sumchk/sumchk.F90 b/cicecore/drivers/unittest/sumchk/sumchk.F90
index d9ea72d8c..1a2745aea 100644
--- a/cicecore/drivers/unittest/sumchk/sumchk.F90
+++ b/cicecore/drivers/unittest/sumchk/sumchk.F90
@@ -16,10 +16,10 @@ program sumchk
       use ice_communicate, only: my_task, master_task, get_num_procs
       use ice_domain_size, only: nx_global, ny_global
       use ice_domain_size, only: block_size_x, block_size_y, max_blocks
-      use ice_domain, only: distrb_info
+      use ice_domain, only: distrb_info, ns_boundary_type
       use ice_blocks, only: block, get_block, nx_block, ny_block, nblocks_tot
       use ice_distribution, only: ice_distributionGetBlockID, ice_distributionGet
-      use ice_constants, only: field_loc_center, field_loc_Nface
+      use ice_constants, only: field_loc_center, field_loc_Nface, field_loc_Eface, field_loc_NEcorner
       use ice_fileunits, only: bfbflag
       use ice_global_reductions
       use ice_exit, only: abort_ice, end_run
@@ -113,6 +113,13 @@ program sumchk
          write(6,*) ' block_size_y = ',block_size_y
          write(6,*) ' nblocks_tot  = ',nblocks_tot
          write(6,*) ' '
+         write(6,*) ' Values are generally O(1.), lscale is the relative size of'
+         write(6,*) ' values set in the array to test precision.  A pair of equal'
+         write(6,*) ' and opposite values of O(lscale) are placed in the array.'
+         write(6,*) ' "easy" sets the lscaled values at the start of the array so'
+         write(6,*) ' are added to the sum first.  Otherwise, the lscaled values'
+         write(6,*) ' are set near the end of the array and to create precision'
+         write(6,*) ' challenges in the global sums'
       endif
 
       ! ---------------------------
@@ -165,7 +172,7 @@ program sumchk
       reldigchk(4,4) = 0.
       reldigchk(5,4) = 15.
       if (nx_global == 360 .and. ny_global == 240) then
-         reldigchk(1:3,1) = 13.
+         reldigchk(1:3,1) = 12.5
          reldigchk(5,4) = 14.
       endif
 #else
@@ -181,7 +188,7 @@ program sumchk
       reldigchk(4,4) = 0.
       reldigchk(5,4) = 15.
       if (nx_global == 360 .and. ny_global == 240) then
-         reldigchk(1:2,1) = 13.
+         reldigchk(1:2,1) = 12.5
          reldigchk(5,4) = 14.
       endif
 #endif
@@ -212,20 +219,22 @@ program sumchk
          ! set corval to something a little interesting (not 1.0 for instance which gives atypical results)
          corval = 4.0_dbl_kind/3.0_dbl_kind
          iocval = 8
-         ! tuned for gx3 and tx1 only
-         if ((nx_global == 100 .and. ny_global == 116) .or. &
-             (nx_global == 360 .and. ny_global == 240)) then
-            if (field_loc(m) == field_loc_Nface .and. nx_global == 360 .and. ny_global == 240) then
-               ! tx1 tripole face, need to adjust local value to remove half of row at ny_global
-               ! or modify corval to account for different sum
-               locval = corval / real((nblocks_tot*(block_size_x*block_size_y-2)-nx_global/2),dbl_kind)
-               corvali = (nblocks_tot*(block_size_x*block_size_y-2)-nx_global/2)*iocval
-            else
-               locval = corval / real(nblocks_tot*(block_size_x*block_size_y-2),dbl_kind)
-               corvali = nblocks_tot*(block_size_x*block_size_y-2)*iocval
-            endif
+         if     ((ns_boundary_type == 'tripoleT' .and. field_loc(m) == field_loc_Nface   ) .or. &
+                 (ns_boundary_type == 'tripoleT' .and. field_loc(m) == field_loc_NEcorner)) then
+            ! remove full row at ny_global
+            locval = corval / real((nblocks_tot*(block_size_x*block_size_y-2)-nx_global),dbl_kind)
+            corvali = (nblocks_tot*(block_size_x*block_size_y-2)-nx_global)*iocval
+         elseif ((ns_boundary_type == 'tripoleT' .and. field_loc(m) == field_loc_center  ) .or. &
+                 (ns_boundary_type == 'tripoleT' .and. field_loc(m) == field_loc_Eface   ) .or. &
+                 (ns_boundary_type == 'tripole'  .and. field_loc(m) == field_loc_NEcorner) .or. &
+                 (ns_boundary_type == 'tripole'  .and. field_loc(m) == field_loc_Nface   )) then
+            ! remove half of row at ny_global
+            locval = corval / real((nblocks_tot*(block_size_x*block_size_y-2)-nx_global/2),dbl_kind)
+            corvali = (nblocks_tot*(block_size_x*block_size_y-2)-nx_global/2)*iocval
          else
-            call abort_ice(subname//' ERROR not set for this grid ')
+            ! all gridcells
+            locval = corval / real(nblocks_tot*(block_size_x*block_size_y-2),dbl_kind)
+            corvali = nblocks_tot*(block_size_x*block_size_y-2)*iocval
          endif
 
       do l = 1, nscale
@@ -253,18 +262,18 @@ program sumchk
             jb = this_block%jlo
             je = this_block%jhi
 
-            lmask(ie,je-1,iblock)   = .false.
-            lmask(ie,je-2,iblock) = .false.
-            arrayA(ie,je-1,iblock)   = locval * lscale(l)
+            lmask(ie,je-1,iblock)  = .false.
+            lmask(ie,je-2,iblock)  = .false.
+            arrayA(ie,je-1,iblock) = locval * lscale(l)
             arrayA(ie,je-2,iblock) = -arrayA(ie,je-1,iblock)
-            arrayB(ie,je-1,iblock)   = locval * lscale(l)
+            arrayB(ie,je-1,iblock) = locval * lscale(l)
             arrayB(ie,je-2,iblock) =  arrayB(ie,je-1,iblock)
             arrayC(ib,jb,iblock)   = locval * lscale(l)
             arrayC(ib+1,jb,iblock) = -arrayC(ib,jb,iblock)
-            arrayiA(:,:,iblock) = iocval
-            arrayiB(:,:,iblock) = iocval
-            arrayiA(ie,je-1,iblock)   = 13 * iocval
-            arrayiA(ie,je-2,iblock) = -arrayiA(ie,je-1,iblock)
+            arrayiA(:,:,iblock)    = iocval
+            arrayiB(:,:,iblock)    = iocval
+            arrayiA(ie,je-1,iblock)= 13 * iocval
+            arrayiA(ie,je-2,iblock)= -arrayiA(ie,je-1,iblock)
          enddo
 
          do k = 1,ntests1
diff --git a/configuration/scripts/tests/unittest_suite.ts b/configuration/scripts/tests/unittest_suite.ts
index 7486e87aa..e64bea2f7 100644
--- a/configuration/scripts/tests/unittest_suite.ts
+++ b/configuration/scripts/tests/unittest_suite.ts
@@ -5,6 +5,7 @@ unittest       gx3     1x1           calchk,short
 unittest       gx3     4x1x25x29x4   sumchk
 unittest       gx3     1x1x25x29x16  sumchk
 unittest       tx1     8x1           sumchk
+unittest       tx1     8x1           sumchk,tripolet
 unittest       gx3     4x1           bcstchk
 unittest       gx3     1x1           bcstchk
 unittest       gx3     8x2           gridavgchk,dwblockall