From ccf9ba51369803c74d0b4070cbbcbe1f72eb9161 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Tue, 29 May 2018 18:00:55 +0000
Subject: [PATCH 001/196] initial coding of ice_dyn_vp.F90

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 1242 +++++++++++++++++++++
 1 file changed, 1242 insertions(+)
 create mode 100644 cicecore/cicedynB/dynamics/ice_dyn_vp.F90

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
new file mode 100644
index 000000000..ffc577ee7
--- /dev/null
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -0,0 +1,1242 @@
+!  SVN:$Id: ice_dyn_evp.F90 1228 2017-05-23 21:33:34Z tcraig $
+!=======================================================================
+!
+! Elastic-viscous-plastic sea ice dynamics model
+! Computes ice velocity and deformation
+!
+! See:
+!
+! Hunke, E. C., and J. K. Dukowicz (1997). An elastic-viscous-plastic model
+! for sea ice dynamics. {\em J. Phys. Oceanogr.}, {\bf 27}, 1849--1867.
+!
+! Hunke, E. C. (2001).  Viscous-Plastic Sea Ice Dynamics with the EVP Model:
+! Linearization Issues. {\em Journal of Computational Physics}, {\bf 170},
+! 18--38.
+!
+! Hunke, E. C., and J. K. Dukowicz (2002).  The Elastic-Viscous-Plastic
+! Sea Ice Dynamics Model in General Orthogonal Curvilinear Coordinates
+! on a Sphere---Incorporation of Metric Terms. {\em Monthly Weather Review},
+! {\bf 130}, 1848--1865.
+!
+! Hunke, E. C., and J. K. Dukowicz (2003).  The sea ice momentum
+! equation in the free drift regime.  Los Alamos Tech. Rep. LA-UR-03-2219.
+!
+! Bouillon, S., T. Fichefet, V. Legat and G. Madec (submitted 2013).  The 
+! revised elastic-viscous-plastic method.  Ocean Modelling.
+!
+! author: Elizabeth C. Hunke, LANL
+!
+! 2003: Vectorized by Clifford Chen (Fujitsu) and William Lipscomb (LANL)
+! 2004: Block structure added by William Lipscomb
+! 2005: Removed boundary calls for stress arrays (WHL)
+! 2006: Streamlined for efficiency by Elizabeth Hunke
+!       Converted to free source form (F90)
+
+      module ice_dyn_vp
+
+      use ice_kinds_mod
+      use ice_constants, only: field_loc_center, field_loc_NEcorner, &
+          field_type_scalar, field_type_vector
+      use ice_constants, only: c0, c4, p027, p055, p111, p166, &
+          p2, p222, p25, p333, p5, c1
+      use ice_dyn_shared, only: stepu, evp_prep1, evp_prep2, evp_finish, &
+          yield_curve, ecci, fcor_blk, uvel_init,  &
+          vvel_init, basal_stress_coeff, basalstress, Ktens
+      use ice_fileunits, only: nu_diag
+      use ice_exit, only: abort_ice
+      use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted
+      use icepack_intfc, only: icepack_ice_strength, icepack_query_parameters
+#ifdef CICE_IN_NEMO
+      use icepack_intfc, only: calc_strair
+#endif
+
+      implicit none
+      private
+      public :: evp
+
+!=======================================================================
+
+      contains
+
+!=======================================================================
+
+! Viscous-plastic dynamics driver
+!
+#ifdef CICE_IN_NEMO
+! Wind stress is set during this routine from the values supplied
+! via NEMO (unless calc_strair is true).  These values are supplied 
+! rotated on u grid and multiplied by aice.  strairxT = 0 in this 
+! case so operations in evp_prep1 are pointless but carried out to 
+! minimise code changes.
+#endif
+!
+! author: JF Lemieux, F. Dupont and A. Qaddouri, ECCC
+
+      subroutine imp_solver (dt)
+
+      use ice_arrays_column, only: Cdn_ocn
+      use ice_boundary, only: ice_halo, ice_HaloMask, ice_HaloUpdate, &
+          ice_HaloDestroy, ice_HaloUpdate_stress
+      use ice_blocks, only: block, get_block, nx_block, ny_block
+      use ice_domain, only: nblocks, blocks_ice, halo_info, maskhalo_dyn
+      use ice_domain_size, only: max_blocks, ncat
+      use ice_flux, only: rdg_conv, rdg_shear, strairxT, strairyT, &
+          strairx, strairy, uocn, vocn, ss_tltx, ss_tlty, iceumask, fm, &
+          strtltx, strtlty, strocnx, strocny, strintx, strinty, taubx, tauby, &
+          strocnxT, strocnyT, strax, stray, &
+          Tbu, hwater, &
+          stressp_1, stressp_2, stressp_3, stressp_4, &
+          stressm_1, stressm_2, stressm_3, stressm_4, &
+          stress12_1, stress12_2, stress12_3, stress12_4
+      use ice_grid, only: tmask, umask, dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
+          tarear, uarear, tinyarea, to_ugrid, t2ugrid_vector, u2tgrid_vector, &
+          grid_type
+      use ice_state, only: aice, vice, vsno, uvel, vvel, divu, shear, &
+          aice_init, aice0, aicen, vicen, strength
+      use ice_timers, only: timer_dynamics, timer_bound, &
+          ice_timer_start, ice_timer_stop
+
+      real (kind=dbl_kind), intent(in) :: &
+         dt      ! time step
+
+      ! local variables
+
+      integer (kind=int_kind) :: & 
+         kOL            , & ! outer loop iteration
+         kmax           , & ! jfl put in namelist
+         iblk           , & ! block index
+         ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
+         i, j, ij
+
+      integer (kind=int_kind), dimension(max_blocks) :: & 
+         icellt   , & ! no. of cells where icetmask = 1
+         icellu       ! no. of cells where iceumask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj   , & ! compressed index in j-direction
+         indxui   , & ! compressed index in i-direction
+         indxuj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         tmass    , & ! total mass of ice and snow (kg/m^2)
+         waterx   , & ! for ocean stress calculation, x (m/s)
+         watery   , & ! for ocean stress calculation, y (m/s)
+         forcex   , & ! work array: combined atm stress and ocn tilt, x
+         forcey   , & ! work array: combined atm stress and ocn tilt, y
+         bxfix    , & ! bx = taux + bxfix !jfl
+         byfix    , & ! by = tauy + byfix !jfl
+         bx       , & ! b vector, bx = taux + bxfix !jfl
+         by       , & ! b vector, by = tauy + byfix !jfl
+         Au       , & ! matvec, Fx = Au - bx ! jfl
+         Av       , & ! matvec, Fy = Av - by ! jfl
+         Fx       , & ! x residual vector, Fx = Au - bx ! jfl
+         Fy       , & ! y residual vector, Fy = Av - by ! jfl
+         aiu      , & ! ice fraction on u-grid
+         umass    , & ! total mass of ice and snow (u grid)
+         umassdti     ! mass of U-cell/dte (kg/m^2 s)
+
+      real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
+         strtmp       ! stress combinations for momentum equation
+
+      integer (kind=int_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         icetmask, &  ! ice extent mask (T-cell)
+         halomask     ! generic halo mask
+
+      type (ice_halo) :: &
+         halo_info_mask !  ghost cell update info for masked halo
+
+      type (block) :: &
+         this_block           ! block information for current block
+      
+      call ice_timer_start(timer_dynamics) ! dynamics
+
+      !-----------------------------------------------------------------
+      ! Initialize
+      !-----------------------------------------------------------------
+
+      allocate(fld2(nx_block,ny_block,2,max_blocks))
+
+       ! This call is needed only if dt changes during runtime.
+!      call set_evp_parameters (dt)
+
+      !-----------------------------------------------------------------
+      ! boundary updates
+      ! commented out because the ghost cells are freshly 
+      ! updated after cleanup_itd
+      !-----------------------------------------------------------------
+
+!      call ice_timer_start(timer_bound)
+!      call ice_HaloUpdate (aice,              halo_info, &
+!                           field_loc_center,  field_type_scalar)
+!      call ice_HaloUpdate (vice,              halo_info, &
+!                           field_loc_center,  field_type_scalar)
+!      call ice_HaloUpdate (vsno,              halo_info, &
+!                           field_loc_center,  field_type_scalar)
+!      call ice_timer_stop(timer_bound)
+
+      !$OMP PARALLEL DO PRIVATE(iblk,i,j,ilo,ihi,jlo,jhi,this_block)
+      do iblk = 1, nblocks
+
+         do j = 1, ny_block 
+         do i = 1, nx_block 
+            rdg_conv (i,j,iblk) = c0 
+            rdg_shear(i,j,iblk) = c0 
+            divu (i,j,iblk) = c0 
+            shear(i,j,iblk) = c0 
+         enddo
+         enddo
+
+      !-----------------------------------------------------------------
+      ! preparation for dynamics JFL change names of evp_prep1 and 2
+      !-----------------------------------------------------------------
+
+         this_block = get_block(blocks_ice(iblk),iblk)         
+         ilo = this_block%ilo
+         ihi = this_block%ihi
+         jlo = this_block%jlo
+         jhi = this_block%jhi
+
+         call evp_prep1 (nx_block,           ny_block,           & 
+                         ilo, ihi,           jlo, jhi,           &
+                         aice    (:,:,iblk), vice    (:,:,iblk), & 
+                         vsno    (:,:,iblk), tmask   (:,:,iblk), & 
+                         strairxT(:,:,iblk), strairyT(:,:,iblk), & 
+                         strairx (:,:,iblk), strairy (:,:,iblk), & 
+                         tmass   (:,:,iblk), icetmask(:,:,iblk))
+
+      enddo                     ! iblk
+      !$OMP END PARALLEL DO
+
+      call ice_timer_start(timer_bound)
+      call ice_HaloUpdate (icetmask,          halo_info, &
+                           field_loc_center,  field_type_scalar)
+      call ice_timer_stop(timer_bound)
+
+      !-----------------------------------------------------------------
+      ! convert fields from T to U grid
+      !-----------------------------------------------------------------
+
+      call to_ugrid(tmass,umass)
+      call to_ugrid(aice_init, aiu)
+
+#ifdef CICE_IN_NEMO
+      !----------------------------------------------------------------
+      ! Set wind stress to values supplied via NEMO
+      ! This wind stress is rotated on u grid and multiplied by aice
+      !----------------------------------------------------------------
+      if (.not. calc_strair) then       
+         strairx(:,:,:) = strax(:,:,:)
+         strairy(:,:,:) = stray(:,:,:)
+      else
+#endif
+      call t2ugrid_vector(strairx)
+      call t2ugrid_vector(strairy)
+#ifdef CICE_IN_NEMO
+      endif      
+#endif
+
+! tcraig, tcx, threading here leads to some non-reproducbile results and failures in icepack_ice_strength
+! need to do more debugging
+      !$TCXOMP PARALLEL DO PRIVATE(iblk,ilo,ihi,jlo,jhi,this_block)
+      do iblk = 1, nblocks
+
+      !-----------------------------------------------------------------
+      ! more preparation for dynamics
+      !-----------------------------------------------------------------
+
+         this_block = get_block(blocks_ice(iblk),iblk)         
+         ilo = this_block%ilo
+         ihi = this_block%ihi
+         jlo = this_block%jlo
+         jhi = this_block%jhi
+
+         call evp_prep2 (nx_block,             ny_block,             & 
+                         ilo, ihi,             jlo, jhi,             &
+                         icellt(iblk),         icellu(iblk),         & 
+                         indxti      (:,iblk), indxtj      (:,iblk), & 
+                         indxui      (:,iblk), indxuj      (:,iblk), & 
+                         aiu       (:,:,iblk), umass     (:,:,iblk), & 
+                         umassdti  (:,:,iblk), fcor_blk  (:,:,iblk), & 
+                         umask     (:,:,iblk),                       & 
+                         uocn      (:,:,iblk), vocn      (:,:,iblk), & 
+                         strairx   (:,:,iblk), strairy   (:,:,iblk), & 
+                         ss_tltx   (:,:,iblk), ss_tlty   (:,:,iblk), &  
+                         icetmask  (:,:,iblk), iceumask  (:,:,iblk), & 
+                         fm        (:,:,iblk), dt,                   & 
+                         strtltx   (:,:,iblk), strtlty   (:,:,iblk), & 
+                         strocnx   (:,:,iblk), strocny   (:,:,iblk), & 
+                         strintx   (:,:,iblk), strinty   (:,:,iblk), & 
+                         taubx     (:,:,iblk), tauby     (:,:,iblk), & 
+                         waterx    (:,:,iblk), watery    (:,:,iblk), & 
+                         forcex    (:,:,iblk), forcey    (:,:,iblk), & 
+                         stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
+                         stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
+                         stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
+                         stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & 
+                         stress12_1(:,:,iblk), stress12_2(:,:,iblk), & 
+                         stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
+                         uvel_init (:,:,iblk), vvel_init (:,:,iblk), &
+                         uvel      (:,:,iblk), vvel      (:,:,iblk), &
+                         Tbu       (:,:,iblk))
+
+         call calc_bfix (nx_block            , ny_block,             & 
+                         icellu(iblk)        ,                       & 
+                         indxui      (:,iblk), indxuj      (:,iblk), & 
+                         umassdti  (:,:,iblk),                       & 
+                         forcex    (:,:,iblk), forcey    (:,:,iblk), & 
+                         uvel_init (:,:,iblk), vvel_init (:,:,iblk), &
+                         bxfix     (:,:,iblk), byfix     (:,:,iblk))                         
+                         
+      !-----------------------------------------------------------------
+      ! ice strength
+      !-----------------------------------------------------------------
+
+         strength(:,:,iblk) = c0  ! initialize
+         do ij = 1, icellt(iblk)
+            i = indxti(ij, iblk)
+            j = indxtj(ij, iblk)
+            call icepack_ice_strength (ncat,                 &
+                                      aice    (i,j,  iblk), & 
+                                      vice    (i,j,  iblk), & 
+                                      aice0   (i,j,  iblk), & 
+                                      aicen   (i,j,:,iblk), &  
+                                      vicen   (i,j,:,iblk), & 
+                                      strength(i,j,  iblk) )
+         enddo  ! ij
+
+         ! load velocity into array for boundary updates
+         fld2(:,:,1,iblk) = uvel(:,:,iblk)
+         fld2(:,:,2,iblk) = vvel(:,:,iblk)
+
+      enddo  ! iblk
+      !$TCXOMP END PARALLEL DO
+
+      call icepack_warnings_flush(nu_diag)
+      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+         file=__FILE__, line=__LINE__)
+
+      call ice_timer_start(timer_bound)
+      call ice_HaloUpdate (strength,           halo_info, &
+                           field_loc_center,   field_type_scalar)
+      ! velocities may have changed in evp_prep2
+      call ice_HaloUpdate (fld2,               halo_info, &
+                           field_loc_NEcorner, field_type_vector)
+      call ice_timer_stop(timer_bound)
+
+      ! unload
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         uvel(:,:,iblk) = fld2(:,:,1,iblk)
+         vvel(:,:,iblk) = fld2(:,:,2,iblk)
+      enddo
+      !$OMP END PARALLEL DO
+
+      if (maskhalo_dyn) then
+         call ice_timer_start(timer_bound)
+         halomask = 0
+         where (iceumask) halomask = 1
+         call ice_HaloUpdate (halomask,          halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_timer_stop(timer_bound)
+         call ice_HaloMask(halo_info_mask, halo_info, halomask)
+      endif
+
+      !-----------------------------------------------------------------
+      ! basal stress coefficients (landfast ice)
+      !-----------------------------------------------------------------
+      
+      if (basalstress) then
+       do iblk = 1, nblocks
+         call basal_stress_coeff (nx_block,         ny_block,       &
+                                  icellu  (iblk),                   &
+                                  indxui(:,iblk),   indxuj(:,iblk), &
+                                  vice(:,:,iblk),   aice(:,:,iblk), &
+                                  hwater(:,:,iblk), Tbu(:,:,iblk))
+       enddo                           
+      endif
+      
+      kmax=1
+      do kOL = 1,kmax        ! outer loop 
+
+      !-----------------------------------------------------------------
+      ! stress tensor equation, total surface stress
+      !-----------------------------------------------------------------
+
+         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+         do iblk = 1, nblocks
+
+! CALC Au and Av (MATVEC)
+            call stress_vp (nx_block,             ny_block,             & 
+                            kOL,                  icellt(iblk),         & 
+                            indxti      (:,iblk), indxtj      (:,iblk), & 
+                            uvel      (:,:,iblk), vvel      (:,:,iblk), &     
+                            dxt       (:,:,iblk), dyt       (:,:,iblk), & 
+                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
+                            cxp       (:,:,iblk), cyp       (:,:,iblk), & 
+                            cxm       (:,:,iblk), cym       (:,:,iblk), & 
+                            tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
+                            strength  (:,:,iblk),                       & 
+                            stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
+                            stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
+                            stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
+                            stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & 
+                            stress12_1(:,:,iblk), stress12_2(:,:,iblk), & 
+                            stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
+                            shear     (:,:,iblk), divu      (:,:,iblk), & 
+                            rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
+                            strtmp    (:,:,:) )
+
+      !-----------------------------------------------------------------
+      ! momentum equation
+      !-----------------------------------------------------------------
+
+            call matvec (nx_block           , ny_block,           &
+                         icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                         indxui     (:,iblk), indxuj    (:,iblk), &
+                         kOL                ,                     &
+                         aiu      (:,:,iblk), strtmp  (:,:,:),    & 
+                         uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                         waterx   (:,:,iblk), watery  (:,:,iblk), & 
+                         umassdti (:,:,iblk), fm      (:,:,iblk), & 
+                         uarear   (:,:,iblk), Tbu     (:,:,iblk), & 
+                         strocnx  (:,:,iblk), strocny (:,:,iblk), & 
+                         strintx  (:,:,iblk), strinty (:,:,iblk), &
+                         taubx    (:,:,iblk), tauby   (:,:,iblk), & 
+                         uvel     (:,:,iblk), vvel    (:,:,iblk), &
+                         Au       (:,:,iblk), Av      (:,:,iblk))
+
+! end of Au and Av calc
+! CALC b_u and b_v (bvec)
+                         
+                         
+            call bvec (nx_block           , ny_block,           &
+                       icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                       indxui     (:,iblk), indxuj    (:,iblk), &
+                       kOL                ,                     &
+                       aiu      (:,:,iblk),                     & 
+                       uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                       waterx   (:,:,iblk), watery  (:,:,iblk), & 
+                       forcex   (:,:,iblk), forcey  (:,:,iblk), & 
+                       umassdti (:,:,iblk),                     &
+                       uvel_init(:,:,iblk), vvel_init(:,:,iblk),&
+                       uvel     (:,:,iblk), vvel    (:,:,iblk), &
+                       bxfix    (:,:,iblk), byfix   (:,:,iblk), &
+                       bx       (:,:,iblk), by      (:,:,iblk))                         
+                         
+            ! load velocity into array for boundary updates
+            fld2(:,:,1,iblk) = uvel(:,:,iblk)
+            fld2(:,:,2,iblk) = vvel(:,:,iblk)
+           
+            call residual_vec (nx_block           , ny_block,           &
+                               icellu       (iblk),                     & 
+                               indxui     (:,iblk), indxuj    (:,iblk), &
+                               bx       (:,:,iblk), by      (:,:,iblk), &
+                               Au       (:,:,iblk), Av      (:,:,iblk), &
+                               Fx       (:,:,iblk), Fy      (:,:,iblk))           
+            
+         enddo
+         !$OMP END PARALLEL DO
+
+         call ice_timer_start(timer_bound)
+         if (maskhalo_dyn) then
+            call ice_HaloUpdate (fld2,               halo_info_mask, &
+                                 field_loc_NEcorner, field_type_vector)
+         else
+            call ice_HaloUpdate (fld2,               halo_info, &
+                                 field_loc_NEcorner, field_type_vector)
+         endif
+         call ice_timer_stop(timer_bound)
+
+         ! unload
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            uvel(:,:,iblk) = fld2(:,:,1,iblk)
+            vvel(:,:,iblk) = fld2(:,:,2,iblk)
+         enddo
+         !$OMP END PARALLEL DO
+         
+      enddo                     ! outer loop
+
+      deallocate(fld2)
+      if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
+
+      ! Force symmetry across the tripole seam
+      if (trim(grid_type) == 'tripole') then
+      if (maskhalo_dyn) then
+         !-------------------------------------------------------
+         ! set halomask to zero because ice_HaloMask always keeps
+         ! local copies AND tripole zipper communication
+         !-------------------------------------------------------
+         halomask = 0
+         call ice_HaloMask(halo_info_mask, halo_info, halomask)
+
+         call ice_HaloUpdate_stress(stressp_1, stressp_3, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_3, stressp_1, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_2, stressp_4, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_4, stressp_2, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloUpdate_stress(stressm_1, stressm_3, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_3, stressm_1, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_2, stressm_4, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_4, stressm_2, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloUpdate_stress(stress12_1, stress12_3, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_3, stress12_1, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_2, stress12_4, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_4, stress12_2, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloDestroy(halo_info_mask)
+      else
+         call ice_HaloUpdate_stress(stressp_1, stressp_3, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_3, stressp_1, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_2, stressp_4, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_4, stressp_2, halo_info, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloUpdate_stress(stressm_1, stressm_3, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_3, stressm_1, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_2, stressm_4, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_4, stressm_2, halo_info, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloUpdate_stress(stress12_1, stress12_3, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_3, stress12_1, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_2, stress12_4, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_4, stress12_2, halo_info, &
+                              field_loc_center,  field_type_scalar)
+      endif   ! maskhalo
+      endif   ! tripole
+
+      !-----------------------------------------------------------------
+      ! ice-ocean stress
+      !-----------------------------------------------------------------
+
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+
+         call evp_finish                               & 
+              (nx_block,           ny_block,           & 
+               icellu      (iblk), Cdn_ocn (:,:,iblk), & 
+               indxui    (:,iblk), indxuj    (:,iblk), & 
+               uvel    (:,:,iblk), vvel    (:,:,iblk), & 
+               uocn    (:,:,iblk), vocn    (:,:,iblk), & 
+               aiu     (:,:,iblk), fm      (:,:,iblk), & 
+               strintx (:,:,iblk), strinty (:,:,iblk), &
+               strairx (:,:,iblk), strairy (:,:,iblk), &
+               strocnx (:,:,iblk), strocny (:,:,iblk), & 
+               strocnxT(:,:,iblk), strocnyT(:,:,iblk))
+
+      enddo
+      !$OMP END PARALLEL DO
+
+      call u2tgrid_vector(strocnxT)    ! shift
+      call u2tgrid_vector(strocnyT)
+
+      call ice_timer_stop(timer_dynamics)    ! dynamics
+
+      end subroutine imp_solver
+
+!=======================================================================
+
+! Computes the rates of strain and internal stress components for
+! each of the four corners on each T-grid cell.
+! Computes stress terms for the momentum equation
+!
+! author: Elizabeth C. Hunke, LANL,  JF Lemieux, ECCC
+
+      subroutine stress_vp (nx_block,   ny_block,   & 
+                            kOL,        icellt,     & 
+                            indxti,     indxtj,     & 
+                            uvel,       vvel,       & 
+                            dxt,        dyt,        & 
+                            dxhy,       dyhx,       & 
+                            cxp,        cyp,        & 
+                            cxm,        cym,        & 
+                            tarear,     tinyarea,   & 
+                            strength,               & 
+                            stressp_1,  stressp_2,  & 
+                            stressp_3,  stressp_4,  & 
+                            stressm_1,  stressm_2,  & 
+                            stressm_3,  stressm_4,  & 
+                            stress12_1, stress12_2, & 
+                            stress12_3, stress12_4, & 
+                            shear,      divu,       & 
+                            rdg_conv,   rdg_shear,  & 
+                            str )
+
+      integer (kind=int_kind), intent(in) :: & 
+         nx_block, ny_block, & ! block dimensions
+         kOL               , & ! subcycling step
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         strength , & ! ice strength (N/m)
+         uvel     , & ! x-component of velocity (m/s)
+         vvel     , & ! y-component of velocity (m/s)
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm      , & ! 0.5*HTN - 1.5*HTN
+         tarear   , & ! 1/tarea
+         tinyarea     ! puny*tarea
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+         intent(inout) :: &
+         stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22
+         stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
+         stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+         intent(inout) :: &
+         shear    , & ! strain rate II component (1/s)
+         divu     , & ! strain rate I component, velocity divergence (1/s)
+         rdg_conv , & ! convergence term for ridging (1/s)
+         rdg_shear    ! shear term for ridging (1/s)
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+         intent(out) :: &
+         str          ! stress combinations
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+        puny                                      , & ! puny
+        c0ne, c0nw, c0se, c0sw                    , & ! useful combinations
+        c1ne, c1nw, c1se, c1sw                    , &
+        ssigpn, ssigps, ssigpe, ssigpw            , &
+        ssigmn, ssigms, ssigme, ssigmw            , &
+        ssig12n, ssig12s, ssig12e, ssig12w        , &
+        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
+        csigpne, csigpnw, csigpse, csigpsw        , &
+        csigmne, csigmnw, csigmse, csigmsw        , &
+        csig12ne, csig12nw, csig12se, csig12sw    , &
+        str12ew, str12we, str12ns, str12sn        , &
+        strp_tmp, strm_tmp, tmp
+
+      !-----------------------------------------------------------------
+      ! Initialize
+      !-----------------------------------------------------------------
+
+      str(:,:,:) = c0
+
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
+         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
+         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
+         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
+         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
+         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
+         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
+
+         ! shearing strain rate  =  e_12
+         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
+                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
+         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
+                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
+         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
+                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
+         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
+                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
+         
+         ! Delta (in the denominator of zeta, eta)
+         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
+         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
+         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
+
+      !-----------------------------------------------------------------
+      ! on last subcycle, save quantities for mechanical redistribution
+      !-----------------------------------------------------------------
+         if (kOL == kmax) then ! jfl MODIF
+            divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
+            tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
+            rdg_conv(i,j)  = -min(divu(i,j),c0)
+            rdg_shear(i,j) = p5*(tmp-abs(divu(i,j))) 
+
+            ! diagnostic only
+            ! shear = sqrt(tension**2 + shearing**2)
+            shear(i,j) = p25*tarear(i,j)*sqrt( &
+                 (tensionne + tensionnw + tensionse + tensionsw)**2 &
+                +  (shearne +   shearnw +   shearse +   shearsw)**2)
+
+         endif
+
+      !-----------------------------------------------------------------
+      ! replacement pressure/Delta                   ! kg/s
+      ! save replacement pressure for principal stress calculation
+      !-----------------------------------------------------------------
+         c0ne = strength(i,j)/max(Deltane,tinyarea(i,j))
+         c0nw = strength(i,j)/max(Deltanw,tinyarea(i,j))
+         c0sw = strength(i,j)/max(Deltasw,tinyarea(i,j))
+         c0se = strength(i,j)/max(Deltase,tinyarea(i,j))
+
+!         c1ne = c0ne*arlx1i
+!         c1nw = c0nw*arlx1i
+!         c1sw = c0sw*arlx1i
+!         c1se = c0se*arlx1i
+
+!         c0ne = c1ne*ecci
+!         c0nw = c1nw*ecci
+!         c0sw = c1sw*ecci
+!         c0se = c1se*ecci
+
+      !-----------------------------------------------------------------
+      ! the stresses                            ! kg/s^2
+      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+      !-----------------------------------------------------------------
+
+         stressp_1(i,j) = c0ne*(divune*(c1+Ktens) - Deltane*(c1-Ktens))
+         stressp_2(i,j) = c0nw*(divunw*(c1+Ktens) - Deltanw*(c1-Ktens))
+         stressp_3(i,j) = c0sw*(divusw*(c1+Ktens) - Deltasw*(c1-Ktens))
+         stressp_4(i,j) = c0se*(divuse*(c1+Ktens) - Deltase*(c1-Ktens))
+         
+         stressm_1(i,j) = c0ne*tensionne*(c1+Ktens)*ecci
+         stressm_2(i,j) = c0nw*tensionnw*(c1+Ktens)*ecci
+         stressm_3(i,j) = c0sw*tensionsw*(c1+Ktens)*ecci
+         stressm_4(i,j) = c0se*tensionse*(c1+Ktens)*ecci
+         
+         stress12_1(i,j) = c0ne*shearne*p5*(c1+Ktens)*ecci
+         stress12_2(i,j) = c0nw*shearnw*p5*(c1+Ktens)*ecci
+         stress12_3(i,j) = c0sw*shearsw*p5*(c1+Ktens)*ecci
+         stress12_4(i,j) = c0se*shearse*p5*(c1+Ktens)*ecci
+
+      !-----------------------------------------------------------------
+      ! Eliminate underflows.
+      ! The following code is commented out because it is relatively 
+      ! expensive and most compilers include a flag that accomplishes
+      ! the same thing more efficiently.  This code is cheaper than
+      ! handling underflows if the compiler lacks a flag; uncomment
+      ! it in that case.  The compiler flag is often described with the 
+      ! phrase "flush to zero".
+      !-----------------------------------------------------------------
+
+!      call icepack_query_parameters(puny_out=puny)
+!      call icepack_warnings_flush(nu_diag)
+!      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+!         file=__FILE__, line=__LINE__)
+
+!      stressp_1(i,j) = sign(max(abs(stressp_1(i,j)),puny),stressp_1(i,j))
+!      stressp_2(i,j) = sign(max(abs(stressp_2(i,j)),puny),stressp_2(i,j))
+!      stressp_3(i,j) = sign(max(abs(stressp_3(i,j)),puny),stressp_3(i,j))
+!      stressp_4(i,j) = sign(max(abs(stressp_4(i,j)),puny),stressp_4(i,j))
+
+!      stressm_1(i,j) = sign(max(abs(stressm_1(i,j)),puny),stressm_1(i,j))
+!      stressm_2(i,j) = sign(max(abs(stressm_2(i,j)),puny),stressm_2(i,j))
+!      stressm_3(i,j) = sign(max(abs(stressm_3(i,j)),puny),stressm_3(i,j))
+!      stressm_4(i,j) = sign(max(abs(stressm_4(i,j)),puny),stressm_4(i,j))
+
+!      stress12_1(i,j) = sign(max(abs(stress12_1(i,j)),puny),stress12_1(i,j))
+!      stress12_2(i,j) = sign(max(abs(stress12_2(i,j)),puny),stress12_2(i,j))
+!      stress12_3(i,j) = sign(max(abs(stress12_3(i,j)),puny),stress12_3(i,j))
+!      stress12_4(i,j) = sign(max(abs(stress12_4(i,j)),puny),stress12_4(i,j))
+
+      !-----------------------------------------------------------------
+      ! combinations of the stresses for the momentum equation ! kg/s^2
+      !-----------------------------------------------------------------
+
+         ssigpn  = stressp_1(i,j) + stressp_2(i,j)
+         ssigps  = stressp_3(i,j) + stressp_4(i,j)
+         ssigpe  = stressp_1(i,j) + stressp_4(i,j)
+         ssigpw  = stressp_2(i,j) + stressp_3(i,j)
+         ssigp1  =(stressp_1(i,j) + stressp_3(i,j))*p055
+         ssigp2  =(stressp_2(i,j) + stressp_4(i,j))*p055
+
+         ssigmn  = stressm_1(i,j) + stressm_2(i,j)
+         ssigms  = stressm_3(i,j) + stressm_4(i,j)
+         ssigme  = stressm_1(i,j) + stressm_4(i,j)
+         ssigmw  = stressm_2(i,j) + stressm_3(i,j)
+         ssigm1  =(stressm_1(i,j) + stressm_3(i,j))*p055
+         ssigm2  =(stressm_2(i,j) + stressm_4(i,j))*p055
+
+         ssig12n = stress12_1(i,j) + stress12_2(i,j)
+         ssig12s = stress12_3(i,j) + stress12_4(i,j)
+         ssig12e = stress12_1(i,j) + stress12_4(i,j)
+         ssig12w = stress12_2(i,j) + stress12_3(i,j)
+         ssig121 =(stress12_1(i,j) + stress12_3(i,j))*p111
+         ssig122 =(stress12_2(i,j) + stress12_4(i,j))*p111
+
+         csigpne = p111*stressp_1(i,j) + ssigp2 + p027*stressp_3(i,j)
+         csigpnw = p111*stressp_2(i,j) + ssigp1 + p027*stressp_4(i,j)
+         csigpsw = p111*stressp_3(i,j) + ssigp2 + p027*stressp_1(i,j)
+         csigpse = p111*stressp_4(i,j) + ssigp1 + p027*stressp_2(i,j)
+         
+         csigmne = p111*stressm_1(i,j) + ssigm2 + p027*stressm_3(i,j)
+         csigmnw = p111*stressm_2(i,j) + ssigm1 + p027*stressm_4(i,j)
+         csigmsw = p111*stressm_3(i,j) + ssigm2 + p027*stressm_1(i,j)
+         csigmse = p111*stressm_4(i,j) + ssigm1 + p027*stressm_2(i,j)
+         
+         csig12ne = p222*stress12_1(i,j) + ssig122 &
+                  + p055*stress12_3(i,j)
+         csig12nw = p222*stress12_2(i,j) + ssig121 &
+                  + p055*stress12_4(i,j)
+         csig12sw = p222*stress12_3(i,j) + ssig122 &
+                  + p055*stress12_1(i,j)
+         csig12se = p222*stress12_4(i,j) + ssig121 &
+                  + p055*stress12_2(i,j)
+
+         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
+         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
+         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
+         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
+
+      !-----------------------------------------------------------------
+      ! for dF/dx (u momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+
+         ! northeast (i,j)
+         str(i,j,1) = -strp_tmp - strm_tmp - str12ew &
+              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
+
+         ! northwest (i+1,j)
+         str(i,j,2) = strp_tmp + strm_tmp - str12we &
+              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
+
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
+
+         ! southeast (i,j+1)
+         str(i,j,3) = -strp_tmp - strm_tmp + str12ew &
+              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
+
+         ! southwest (i+1,j+1)
+         str(i,j,4) = strp_tmp + strm_tmp + str12we &
+              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
+
+      !-----------------------------------------------------------------
+      ! for dF/dy (v momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
+
+         ! northeast (i,j)
+         str(i,j,5) = -strp_tmp + strm_tmp - str12ns &
+              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
+
+         ! southeast (i,j+1)
+         str(i,j,6) = strp_tmp - strm_tmp - str12sn &
+              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
+
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
+
+         ! northwest (i+1,j)
+         str(i,j,7) = -strp_tmp + strm_tmp + str12ns &
+              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
+
+         ! southwest (i+1,j+1)
+         str(i,j,8) = strp_tmp - strm_tmp + str12sn &
+              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
+
+      enddo                     ! ij
+
+      end subroutine stress_vp
+      
+      
+!=======================================================================
+
+      subroutine matvec (nx_block,   ny_block, &
+                         icellu,     Cw,       &
+                         indxui,     indxuj,   &
+                         ksub,                 &
+                         aiu,        str,      &
+                         uocn,       vocn,     &
+                         waterx,     watery,   &
+                         umassdti,   fm,       &
+                         uarear,     Tbu,      &
+                         strocnx,    strocny,  &
+                         strintx,    strinty,  &
+                         taubx,      tauby,    &
+                         uvel,       vvel,     &
+                         Au,         Av)
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellu,             & ! total count when iceumask is true
+         kOL                   ! outer loop iteration
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         Tbu,      & ! coefficient for basal stress (N/m^2)
+         aiu     , & ! ice fraction on u-grid
+         waterx  , & ! for ocean stress calculation, x (m/s)
+         watery  , & ! for ocean stress calculation, y (m/s)
+         umassdti, & ! mass of U-cell/dt (kg/m^2 s)
+         uocn    , & ! ocean current, x-direction (m/s)
+         vocn    , & ! ocean current, y-direction (m/s)
+         fm      , & ! Coriolis param. * mass in U-cell (kg/s)
+         uarear      ! 1/uarea
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
+         intent(in) :: &
+         str
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(in) :: &
+         uvel    , & ! x-component of velocity (m/s)
+         vvel        ! y-component of velocity (m/s)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(inout) :: &
+         strocnx , & ! ice-ocean stress, x-direction
+         strocny , & ! ice-ocean stress, y-direction
+         strintx , & ! divergence of internal ice stress, x (N/m^2)
+         strinty , & ! divergence of internal ice stress, y (N/m^2)
+         taubx   , & ! basal stress, x-direction (N/m^2)
+         tauby   , & ! basal stress, y-direction (N/m^2)
+         Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
+         Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(inout) :: &
+         Cw                   ! ocean-ice neutral drag coefficient
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+         utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
+         vrel              , & ! relative ice-ocean velocity
+         ccaimp,ccb        , & ! intermediate variables
+         taux, tauy        , & ! part of ocean stress term
+         Cb                , & ! complete basal stress coeff
+         rhow                  !
+
+      real (kind=dbl_kind) :: &
+         u0 = 5e-5_dbl_kind    ! residual velocity for basal stress (m/s)
+         
+      !-----------------------------------------------------------------
+      ! integrate the momentum equation
+      !-----------------------------------------------------------------
+
+      call icepack_query_parameters(rhow_out=rhow)
+      call icepack_warnings_flush(nu_diag)
+      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+         file=__FILE__, line=__LINE__)
+
+      do ij =1, icellu
+         i = indxui(ij)
+         j = indxuj(ij)
+
+         utp = uvel(i,j)
+         vtp = vvel(i,j)
+
+         ! (magnitude of relative ocean current)*rhow*drag*aice
+         vrel = aiu(i,j)*rhow*Cw(i,j)*sqrt((uocn(i,j) - utp)**2 + &
+                                           (vocn(i,j) - vtp)**2)  ! m/s
+         ! ice/ocean stress
+         taux = vrel*waterx(i,j) ! NOTE this is not the entire
+         tauy = vrel*watery(i,j) ! ocn stress term
+      
+         Cb  = Tbu(i,j) / (sqrt(utp**2 + vtp**2) + u0) ! for basal stress
+         ! revp = 0 for classic evp, 1 for revised evp
+         ccaimp = umassdti(i,j) + vrel * cosw + Cb ! kg/m^2 s
+               
+         ccb = fm(i,j) + sign(c1,fm(i,j)) * vrel * sinw ! kg/m^2 s
+
+         ! divergence of the internal stress tensor
+         strintx(i,j) = uarear(i,j)* &
+             (str(i,j,1) + str(i+1,j,2) + str(i,j+1,3) + str(i+1,j+1,4))
+         strinty(i,j) = uarear(i,j)* &
+             (str(i,j,5) + str(i,j+1,6) + str(i+1,j,7) + str(i+1,j+1,8))
+
+         Au(i,j) = ccaimp*utp - ccb*vtp - strintx(i,j)
+         Av(i,j) = ccaimp*vtp + ccb*utp - strinty(i,j)
+             
+      !-----------------------------------------------------------------
+      ! ocean-ice stress for coupling
+      ! here, strocn includes the factor of aice
+      !-----------------------------------------------------------------
+         strocnx(i,j) = taux ! jfl could be moved
+         strocny(i,j) = tauy
+         
+      ! calculate basal stress component for outputs ! jfl move this
+!         if (ksub == ndte) then ! on last subcycling iteration
+!          if ( basalstress ) then
+!           taubx(i,j) = -uvel(i,j)*Tbu(i,j) / (sqrt(uold**2 + vold**2) + u0)
+!           tauby(i,j) = -vvel(i,j)*Tbu(i,j) / (sqrt(uold**2 + vold**2) + u0)
+!          endif
+!         endif
+
+      enddo                     ! ij
+
+      end subroutine matvec
+
+!=======================================================================      
+      
+     subroutine calc_bfix  (nx_block,   ny_block,   & 
+                            icellu,                 & 
+                            indxui,     indxuj,     & 
+                            umassdti,               & 
+                            forcex,     forcey,     &
+                            uvel_init,  vvel_init,  &
+                            bxfix,      byfix        )
+
+      use ice_constants, only: c0, c1
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block  ! block dimensions
+         
+      integer (kind=int_kind), intent(out) :: &
+         icellu       ! no. of cells where iceumask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(out) :: &
+         indxui   , & ! compressed index in i-direction
+         indxuj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+         intent(in) :: &
+         uvel_init,& ! x-component of velocity (m/s), beginning of time step
+         vvel_init,& ! y-component of velocity (m/s), beginning of time step
+         umassdti, & ! mass of U-cell/dt (kg/m^2 s)
+         forcex  , & ! work array: combined atm stress and ocn tilt, x
+         forcey      ! work array: combined atm stress and ocn tilt, y
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+         intent(out) :: &
+         bxfix   , & ! bx = taux + bxfix !jfl
+         byfix       ! by = tauy + byfix !jfl
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+! jfl move in ice_dyn_vp
+
+      !-----------------------------------------------------------------
+      ! Define variables for momentum equation
+      !-----------------------------------------------------------------
+
+      do ij = 1, icellu
+         i = indxui(ij)
+         j = indxuj(ij)
+         
+         bxfix(i,j) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
+         byfix(i,j) = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
+         
+      enddo
+
+      end subroutine calc_bfix            
+      
+!=======================================================================
+
+      subroutine bvec (nx_block,   ny_block, &
+                       icellu,     Cw,       &
+                       indxui,     indxuj,   &
+                       kOL,                  &
+                       aiu,                  &
+                       uocn,       vocn,     &
+                       waterx,     watery,   &
+                       forcex,     forcey,   &
+                       umassdti,             &
+                       uvel_init,  vvel_init,&
+                       uvel,       vvel,     &
+                       bxfix,      byfix,    &
+                       bx,         by)
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellu,             & ! total count when iceumask is true
+         kOL                   ! outer loop iteration
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         uvel_init,& ! x-component of velocity (m/s), beginning of timestep
+         vvel_init,& ! y-component of velocity (m/s), beginning of timestep
+         uvel    , & ! x-component of velocity (m/s)
+         vvel    , & ! y-component of velocity (m/s)         
+         aiu     , & ! ice fraction on u-grid
+         waterx  , & ! for ocean stress calculation, x (m/s)
+         watery  , & ! for ocean stress calculation, y (m/s)
+         forcex  , & ! work array: combined atm stress and ocn tilt, x
+         forcey  , & ! work array: combined atm stress and ocn tilt, y
+         bxfix   , & ! bx = taux + bxfix !jfl
+         byfix   , & ! by = tauy + byfix !jfl
+         umassdti, & ! mass of U-cell/dt (kg/m^2 s)
+         uocn    , & ! ocean current, x-direction (m/s)
+         vocn        ! ocean current, y-direction (m/s)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(inout) :: &
+         bx      , & ! b vector, bx = taux + bxfix (N/m^2) !jfl
+         by          ! b vector, by = tauy + byfix (N/m^2) !jfl
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(inout) :: &
+         Cw                   ! ocean-ice neutral drag coefficient
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+         vrel              , & ! relative ice-ocean velocity
+         utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
+         taux, tauy            ! part of ocean stress term
+         rhow                  !
+         
+      !-----------------------------------------------------------------
+      ! calc b vector
+      !-----------------------------------------------------------------
+
+      call icepack_query_parameters(rhow_out=rhow)
+      call icepack_warnings_flush(nu_diag)
+      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+         file=__FILE__, line=__LINE__)
+
+      do ij =1, icellu
+         i = indxui(ij)
+         j = indxuj(ij)
+
+         utp = uvel(i,j)
+         vtp = vvel(i,j)
+
+         ! (magnitude of relative ocean current)*rhow*drag*aice
+         vrel = aiu(i,j)*rhow*Cw(i,j)*sqrt((uocn(i,j) - utp)**2 + &
+                                           (vocn(i,j) - vtp)**2)  ! m/s
+         ! ice/ocean stress
+         taux = vrel*waterx(i,j) ! NOTE this is not the entire
+         tauy = vrel*watery(i,j) ! ocn stress term
+
+         bx(i,j) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j) + taux
+         by(i,j) = umassdti(i,j)*vvel_init(i,j) + forcey(i,j) + tauy
+         
+      enddo                     ! ij
+
+      end subroutine bvec
+      
+      !=======================================================================
+
+      subroutine residual_vec (nx_block,   ny_block, &
+                               icellu,               &
+                               indxui,     indxuj,   &
+                               bx,         by,       &
+                               Au,         Ay,       &
+                               Fx,         Fy)
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellu,             & ! total count when iceumask is true
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         bx       , & ! b vector, bx = taux + bxfix (N/m^2) !jfl
+         by       , & ! b vector, by = tauy + byfix (N/m^2) !jfl
+         Au       , & ! matvec, Fx = Au - bx (N/m^2) ! jfl
+         Av           ! matvec, Fy = Av - by (N/m^2) ! jfl
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(inout) :: &
+         Fx      , & ! x residual vector, Fx = Au - bx (N/m^2)
+         Fy          ! y residual vector, Fy = Av - by (N/m^2)
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      !-----------------------------------------------------------------
+      ! calc b vector
+      !-----------------------------------------------------------------
+
+      call icepack_query_parameters(rhow_out=rhow)
+      call icepack_warnings_flush(nu_diag)
+      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+         file=__FILE__, line=__LINE__)
+
+      do ij =1, icellu
+         i = indxui(ij)
+         j = indxuj(ij)
+
+         Fx(i,j) = Au(i,j) - bx(i,j)
+         Fy(i,j) = Av(i,j) - by(i,j)
+         
+      enddo                     ! ij
+
+      end subroutine residual_vec
+      
+!=======================================================================
+
+      end module ice_dyn_vp
+
+!=======================================================================

From 460d360ef8a4cebfe0fa0ba6f1d60c0bfae7bbbe Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Tue, 29 May 2018 19:26:34 +0000
Subject: [PATCH 002/196] code (matvec, bvec and residual) now compiles after
 debugging

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 27 +++++++++--------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ffc577ee7..529dff401 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -39,8 +39,8 @@ module ice_dyn_vp
           field_type_scalar, field_type_vector
       use ice_constants, only: c0, c4, p027, p055, p111, p166, &
           p2, p222, p25, p333, p5, c1
-      use ice_dyn_shared, only: stepu, evp_prep1, evp_prep2, evp_finish, &
-          yield_curve, ecci, fcor_blk, uvel_init,  &
+      use ice_dyn_shared, only: evp_prep1, evp_prep2, evp_finish, &
+          yield_curve, ecci, cosw, sinw, fcor_blk, uvel_init,  &
           vvel_init, basal_stress_coeff, basalstress, Ktens
       use ice_fileunits, only: nu_diag
       use ice_exit, only: abort_ice
@@ -52,7 +52,7 @@ module ice_dyn_vp
 
       implicit none
       private
-      public :: evp
+      public :: imp_solver
 
 !=======================================================================
 
@@ -709,7 +709,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
       !-----------------------------------------------------------------
       ! on last subcycle, save quantities for mechanical redistribution
       !-----------------------------------------------------------------
-         if (kOL == kmax) then ! jfl MODIF
+         if (kOL == 10) then ! jfl MODIF
             divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
             tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
             rdg_conv(i,j)  = -min(divu(i,j),c0)
@@ -901,7 +901,7 @@ end subroutine stress_vp
       subroutine matvec (nx_block,   ny_block, &
                          icellu,     Cw,       &
                          indxui,     indxuj,   &
-                         ksub,                 &
+                         kOL,                  &
                          aiu,        str,      &
                          uocn,       vocn,     &
                          waterx,     watery,   &
@@ -1041,16 +1041,12 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
                             uvel_init,  vvel_init,  &
                             bxfix,      byfix        )
 
-      use ice_constants, only: c0, c1
-
       integer (kind=int_kind), intent(in) :: &
-         nx_block, ny_block  ! block dimensions
+         nx_block, ny_block, & ! block dimensions
+         icellu                ! no. of cells where iceumask = 1
          
-      integer (kind=int_kind), intent(out) :: &
-         icellu       ! no. of cells where iceumask = 1
-
       integer (kind=int_kind), dimension (nx_block*ny_block), & 
-         intent(out) :: &
+         intent(in) :: &
          indxui   , & ! compressed index in i-direction
          indxuj       ! compressed index in j-direction
 
@@ -1148,7 +1144,7 @@ subroutine bvec (nx_block,   ny_block, &
       real (kind=dbl_kind) :: &
          vrel              , & ! relative ice-ocean velocity
          utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
-         taux, tauy            ! part of ocean stress term
+         taux, tauy        , & ! part of ocean stress term
          rhow                  !
          
       !-----------------------------------------------------------------
@@ -1187,12 +1183,12 @@ subroutine residual_vec (nx_block,   ny_block, &
                                icellu,               &
                                indxui,     indxuj,   &
                                bx,         by,       &
-                               Au,         Ay,       &
+                               Au,         Av,       &
                                Fx,         Fy)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu,             & ! total count when iceumask is true
+         icellu                ! total count when iceumask is true
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -1219,7 +1215,6 @@ subroutine residual_vec (nx_block,   ny_block, &
       ! calc b vector
       !-----------------------------------------------------------------
 
-      call icepack_query_parameters(rhow_out=rhow)
       call icepack_warnings_flush(nu_diag)
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)

From 322b9c46050214cc3b2742b3b90d808ddd8c580c Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 30 May 2018 19:04:34 +0000
Subject: [PATCH 003/196] introduced calc_vrel_Cb and calc_visc_coeff
 subroutines and precond.F90 file

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 344 ++++++++++++++++------
 1 file changed, 257 insertions(+), 87 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 529dff401..0ec004acf 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -132,6 +132,8 @@ subroutine imp_solver (dt)
          Av       , & ! matvec, Fy = Av - by ! jfl
          Fx       , & ! x residual vector, Fx = Au - bx ! jfl
          Fy       , & ! y residual vector, Fy = Av - by ! jfl
+         vrel     , & ! coeff for tauw ! jfl
+         Cb       , & ! seabed stress coeff ! jfl
          aiu      , & ! ice fraction on u-grid
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
@@ -139,7 +141,10 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
-         strtmp       ! stress combinations for momentum equation
+         strtmp       ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
+                      ! doit etre (nx_block,ny_block,max_blocks,8)????
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
+         zetaD      ! zetaD = 2zeta (viscous coeff)
 
       integer (kind=int_kind), dimension (nx_block,ny_block,max_blocks) :: &
          icetmask, &  ! ice extent mask (T-cell)
@@ -369,6 +374,18 @@ subroutine imp_solver (dt)
          do iblk = 1, nblocks
 
 ! CALC Au and Av (MATVEC)
+                                
+            call viscous_coeff (nx_block           ,  ny_block,           &
+                                kOL                ,  icellt(iblk),       & 
+                                indxti   (:,iblk)  ,  indxtj(:,iblk),     & 
+                                uvel     (:,:,iblk),  vvel  (:,:,iblk),   & 
+                                dxt      (:,:,iblk),  dyt   (:,:,iblk),   & 
+                                dxhy     (:,:,iblk),  dyhx  (:,:,iblk),   & 
+                                cxp      (:,:,iblk),  cyp   (:,:,iblk),   & 
+                                cxm      (:,:,iblk),  cym   (:,:,iblk),   & 
+                                tarear   (:,:,iblk),  tinyarea (:,:,iblk),& 
+                                strength  (:,:,iblk), zetaD (:,:,iblk,:))                                              
+
             call stress_vp (nx_block,             ny_block,             & 
                             kOL,                  icellt(iblk),         & 
                             indxti      (:,iblk), indxtj      (:,iblk), & 
@@ -378,7 +395,7 @@ subroutine imp_solver (dt)
                             cxp       (:,:,iblk), cyp       (:,:,iblk), & 
                             cxm       (:,:,iblk), cym       (:,:,iblk), & 
                             tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
-                            strength  (:,:,iblk),                       & 
+                            zetaD     (:,:,iblk,:),strength (:,:,iblk), &                      & 
                             stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
                             stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
                             stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
@@ -387,24 +404,29 @@ subroutine imp_solver (dt)
                             stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
                             shear     (:,:,iblk), divu      (:,:,iblk), & 
                             rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
-                            strtmp    (:,:,:) )
+                            strtmp    (:,:,:))
 
       !-----------------------------------------------------------------
       ! momentum equation
       !-----------------------------------------------------------------
 
+            call calc_vrel_Cb (nx_block           , ny_block,           &
+                               icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                               indxui     (:,iblk), indxuj    (:,iblk), &
+                               kOL                ,                     &
+                               aiu      (:,:,iblk), Tbu     (:,:,iblk), &
+                               uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                               uvel     (:,:,iblk), vvel    (:,:,iblk), &
+                               vrel     (:,:,iblk), Cb      (:,:,iblk))                                                              
+                               
             call matvec (nx_block           , ny_block,           &
                          icellu       (iblk), Cdn_ocn (:,:,iblk), & 
                          indxui     (:,iblk), indxuj    (:,iblk), &
                          kOL                ,                     &
                          aiu      (:,:,iblk), strtmp  (:,:,:),    & 
-                         uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                         waterx   (:,:,iblk), watery  (:,:,iblk), & 
                          umassdti (:,:,iblk), fm      (:,:,iblk), & 
-                         uarear   (:,:,iblk), Tbu     (:,:,iblk), & 
-                         strocnx  (:,:,iblk), strocny (:,:,iblk), & 
+                         uarear   (:,:,iblk), Cb      (:,:,iblk), & 
                          strintx  (:,:,iblk), strinty (:,:,iblk), &
-                         taubx    (:,:,iblk), tauby   (:,:,iblk), & 
                          uvel     (:,:,iblk), vvel    (:,:,iblk), &
                          Au       (:,:,iblk), Av      (:,:,iblk))
 
@@ -425,10 +447,6 @@ subroutine imp_solver (dt)
                        uvel     (:,:,iblk), vvel    (:,:,iblk), &
                        bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                        bx       (:,:,iblk), by      (:,:,iblk))                         
-                         
-            ! load velocity into array for boundary updates
-            fld2(:,:,1,iblk) = uvel(:,:,iblk)
-            fld2(:,:,2,iblk) = vvel(:,:,iblk)
            
             call residual_vec (nx_block           , ny_block,           &
                                icellu       (iblk),                     & 
@@ -437,6 +455,10 @@ subroutine imp_solver (dt)
                                Au       (:,:,iblk), Av      (:,:,iblk), &
                                Fx       (:,:,iblk), Fy      (:,:,iblk))           
             
+            ! load velocity into array for boundary updates
+            fld2(:,:,1,iblk) = uvel(:,:,iblk)
+            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
+            
          enddo
          !$OMP END PARALLEL DO
 
@@ -562,6 +584,117 @@ end subroutine imp_solver
 
 !=======================================================================
 
+! Computes the viscous coefficients. In fact zetaD=2*zeta
+
+      subroutine viscous_coeff (nx_block,   ny_block,   & 
+                                kOL,        icellt,     & 
+                                indxti,     indxtj,     & 
+                                uvel,       vvel,       & 
+                                dxt,        dyt,        & 
+                                dxhy,       dyhx,       & 
+                                cxp,        cyp,        & 
+                                cxm,        cym,        & 
+                                tarear,     tinyarea,   & 
+                                strength,   zetaD)
+
+      integer (kind=int_kind), intent(in) :: & 
+         nx_block, ny_block, & ! block dimensions
+         kOL               , & ! subcycling step
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         strength , & ! ice strength (N/m)
+         uvel     , & ! x-component of velocity (m/s)
+         vvel     , & ! y-component of velocity (m/s)
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm      , & ! 0.5*HTN - 1.5*HTN
+         tarear   , & ! 1/tarea
+         tinyarea     ! puny*tarea
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(out) :: &
+         zetaD          ! 2*zeta
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
+         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
+         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
+         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
+         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
+         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
+         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
+
+         ! shearing strain rate  =  e_12
+         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
+                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
+         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
+                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
+         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
+                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
+         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
+                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
+         
+         ! Delta (in the denominator of zeta, eta)
+         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
+         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
+         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
+         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+
+         zetaD(i,j,1) = strength(i,j)/max(Deltane,tinyarea(i,j))
+         zetaD(i,j,2) = strength(i,j)/max(Deltanw,tinyarea(i,j))
+         zetaD(i,j,3) = strength(i,j)/max(Deltasw,tinyarea(i,j))
+         zetaD(i,j,4) = strength(i,j)/max(Deltase,tinyarea(i,j))
+
+      enddo                     ! ij
+
+      end subroutine viscous_coeff      
+      
+!=======================================================================
+
 ! Computes the rates of strain and internal stress components for
 ! each of the four corners on each T-grid cell.
 ! Computes stress terms for the momentum equation
@@ -577,7 +710,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
                             cxp,        cyp,        & 
                             cxm,        cym,        & 
                             tarear,     tinyarea,   & 
-                            strength,               & 
+                            zetaD,      strength,   & 
                             stressp_1,  stressp_2,  & 
                             stressp_3,  stressp_4,  & 
                             stressm_1,  stressm_2,  & 
@@ -612,6 +745,10 @@ subroutine stress_vp (nx_block,   ny_block,   &
          cxm      , & ! 0.5*HTN - 1.5*HTN
          tarear   , & ! 1/tarea
          tinyarea     ! puny*tarea
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(in) :: &
+         zetaD          ! 2*zeta   
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), & 
          intent(inout) :: &
@@ -641,8 +778,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
         shearne, shearnw, shearse, shearsw        , & ! shearing
         Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
         puny                                      , & ! puny
-        c0ne, c0nw, c0se, c0sw                    , & ! useful combinations
-        c1ne, c1nw, c1se, c1sw                    , &
         ssigpn, ssigps, ssigpe, ssigpw            , &
         ssigmn, ssigms, ssigme, ssigmw            , &
         ssig12n, ssig12s, ssig12e, ssig12w        , &
@@ -703,8 +838,8 @@ subroutine stress_vp (nx_block,   ny_block,   &
          ! Delta (in the denominator of zeta, eta)
          Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
          Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
-         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
          Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
+         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
 
       !-----------------------------------------------------------------
       ! on last subcycle, save quantities for mechanical redistribution
@@ -723,44 +858,25 @@ subroutine stress_vp (nx_block,   ny_block,   &
 
          endif
 
-      !-----------------------------------------------------------------
-      ! replacement pressure/Delta                   ! kg/s
-      ! save replacement pressure for principal stress calculation
-      !-----------------------------------------------------------------
-         c0ne = strength(i,j)/max(Deltane,tinyarea(i,j))
-         c0nw = strength(i,j)/max(Deltanw,tinyarea(i,j))
-         c0sw = strength(i,j)/max(Deltasw,tinyarea(i,j))
-         c0se = strength(i,j)/max(Deltase,tinyarea(i,j))
-
-!         c1ne = c0ne*arlx1i
-!         c1nw = c0nw*arlx1i
-!         c1sw = c0sw*arlx1i
-!         c1se = c0se*arlx1i
-
-!         c0ne = c1ne*ecci
-!         c0nw = c1nw*ecci
-!         c0sw = c1sw*ecci
-!         c0se = c1se*ecci
-
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2
       ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
       !-----------------------------------------------------------------
 
-         stressp_1(i,j) = c0ne*(divune*(c1+Ktens) - Deltane*(c1-Ktens))
-         stressp_2(i,j) = c0nw*(divunw*(c1+Ktens) - Deltanw*(c1-Ktens))
-         stressp_3(i,j) = c0sw*(divusw*(c1+Ktens) - Deltasw*(c1-Ktens))
-         stressp_4(i,j) = c0se*(divuse*(c1+Ktens) - Deltase*(c1-Ktens))
+         stressp_1(i,j) = zetaD(i,j,1)*(divune*(c1+Ktens) - Deltane*(c1-Ktens))
+         stressp_2(i,j) = zetaD(i,j,2)*(divunw*(c1+Ktens) - Deltanw*(c1-Ktens))
+         stressp_3(i,j) = zetaD(i,j,3)*(divusw*(c1+Ktens) - Deltasw*(c1-Ktens))
+         stressp_4(i,j) = zetaD(i,j,4)*(divuse*(c1+Ktens) - Deltase*(c1-Ktens))
          
-         stressm_1(i,j) = c0ne*tensionne*(c1+Ktens)*ecci
-         stressm_2(i,j) = c0nw*tensionnw*(c1+Ktens)*ecci
-         stressm_3(i,j) = c0sw*tensionsw*(c1+Ktens)*ecci
-         stressm_4(i,j) = c0se*tensionse*(c1+Ktens)*ecci
+         stressm_1(i,j) = zetaD(i,j,1)*tensionne*(c1+Ktens)*ecci
+         stressm_2(i,j) = zetaD(i,j,2)*tensionnw*(c1+Ktens)*ecci
+         stressm_3(i,j) = zetaD(i,j,3)*tensionsw*(c1+Ktens)*ecci
+         stressm_4(i,j) = zetaD(i,j,4)*tensionse*(c1+Ktens)*ecci
          
-         stress12_1(i,j) = c0ne*shearne*p5*(c1+Ktens)*ecci
-         stress12_2(i,j) = c0nw*shearnw*p5*(c1+Ktens)*ecci
-         stress12_3(i,j) = c0sw*shearsw*p5*(c1+Ktens)*ecci
-         stress12_4(i,j) = c0se*shearse*p5*(c1+Ktens)*ecci
+         stress12_1(i,j) = zetaD(i,j,1)*shearne*p5*(c1+Ktens)*ecci
+         stress12_2(i,j) = zetaD(i,j,2)*shearnw*p5*(c1+Ktens)*ecci
+         stress12_3(i,j) = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
+         stress12_4(i,j) = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
 
       !-----------------------------------------------------------------
       ! Eliminate underflows.
@@ -895,6 +1011,84 @@ subroutine stress_vp (nx_block,   ny_block,   &
 
       end subroutine stress_vp
       
+!=======================================================================
+
+      subroutine calc_vrel_Cb (nx_block,   ny_block, &
+                               icellu,     Cw,       &
+                               indxui,     indxuj,   &
+                               kOL,                  &
+                               aiu,        Tbu,      &
+                               uocn,       vocn,     &
+                               uvel,       vvel,     &
+                               vrel,       Cb)
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellu,             & ! total count when iceumask is true
+         kOL                   ! outer loop iteration
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         Tbu,      & ! coefficient for basal stress (N/m^2)
+         aiu     , & ! ice fraction on u-grid
+         uocn    , & ! ocean current, x-direction (m/s)
+         vocn        ! ocean current, y-direction (m/s)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(in) :: &
+         uvel    , & ! x-component of velocity (m/s)
+         vvel        ! y-component of velocity (m/s)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(inout) :: &
+         vrel      , & ! coeff for tauw ! jfl
+         Cb            ! seabed stress coeff ! jfl
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(inout) :: &
+         Cw                   ! ocean-ice neutral drag coefficient
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+         utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
+         rhow                  !
+
+      real (kind=dbl_kind) :: &
+         u0 = 5e-5_dbl_kind    ! residual velocity for basal stress (m/s)
+         
+      !-----------------------------------------------------------------
+      ! integrate the momentum equation
+      !-----------------------------------------------------------------
+
+      call icepack_query_parameters(rhow_out=rhow)
+      call icepack_warnings_flush(nu_diag)
+      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+         file=__FILE__, line=__LINE__)
+
+      do ij =1, icellu
+         i = indxui(ij)
+         j = indxuj(ij)
+
+         utp = uvel(i,j)
+         vtp = vvel(i,j)
+
+         ! (magnitude of relative ocean current)*rhow*drag*aice
+         vrel(i,j) = aiu(i,j)*rhow*Cw(i,j)*sqrt((uocn(i,j) - utp)**2 + &
+                                           (vocn(i,j) - vtp)**2)  ! m/s
+      
+         Cb(i,j)  = Tbu(i,j) / (sqrt(utp**2 + vtp**2) + u0) ! for basal stress
+
+      enddo                     ! ij
+
+      end subroutine calc_vrel_Cb      
       
 !=======================================================================
 
@@ -903,13 +1097,10 @@ subroutine matvec (nx_block,   ny_block, &
                          indxui,     indxuj,   &
                          kOL,                  &
                          aiu,        str,      &
-                         uocn,       vocn,     &
-                         waterx,     watery,   &
+                         vrel,                 &
                          umassdti,   fm,       &
-                         uarear,     Tbu,      &
-                         strocnx,    strocny,  &
+                         uarear,     Cb,       &
                          strintx,    strinty,  &
-                         taubx,      tauby,    &
                          uvel,       vvel,     &
                          Au,         Av)
 
@@ -924,13 +1115,10 @@ subroutine matvec (nx_block,   ny_block, &
          indxuj      ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         Tbu,      & ! coefficient for basal stress (N/m^2)
+         vrel,     & ! coefficient for tauw
+         Cb,       & ! coefficient for basal stress
          aiu     , & ! ice fraction on u-grid
-         waterx  , & ! for ocean stress calculation, x (m/s)
-         watery  , & ! for ocean stress calculation, y (m/s)
          umassdti, & ! mass of U-cell/dt (kg/m^2 s)
-         uocn    , & ! ocean current, x-direction (m/s)
-         vocn    , & ! ocean current, y-direction (m/s)
          fm      , & ! Coriolis param. * mass in U-cell (kg/s)
          uarear      ! 1/uarea
 
@@ -945,19 +1133,15 @@ subroutine matvec (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         strocnx , & ! ice-ocean stress, x-direction
-         strocny , & ! ice-ocean stress, y-direction
+!         strocnx , & ! ice-ocean stress, x-direction
+!         strocny , & ! ice-ocean stress, y-direction
          strintx , & ! divergence of internal ice stress, x (N/m^2)
          strinty , & ! divergence of internal ice stress, y (N/m^2)
-         taubx   , & ! basal stress, x-direction (N/m^2)
-         tauby   , & ! basal stress, y-direction (N/m^2)
+!         taubx   , & ! basal stress, x-direction (N/m^2)
+!         tauby   , & ! basal stress, y-direction (N/m^2)
          Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
          Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
          
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
-         Cw                   ! ocean-ice neutral drag coefficient
-
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -965,24 +1149,17 @@ subroutine matvec (nx_block,   ny_block, &
 
       real (kind=dbl_kind) :: &
          utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
-         vrel              , & ! relative ice-ocean velocity
          ccaimp,ccb        , & ! intermediate variables
-         taux, tauy        , & ! part of ocean stress term
-         Cb                , & ! complete basal stress coeff
          rhow                  !
 
-      real (kind=dbl_kind) :: &
-         u0 = 5e-5_dbl_kind    ! residual velocity for basal stress (m/s)
-         
       !-----------------------------------------------------------------
       ! integrate the momentum equation
       !-----------------------------------------------------------------
 
-      call icepack_query_parameters(rhow_out=rhow)
       call icepack_warnings_flush(nu_diag)
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
-
+ATTENTION vrel et viscous coeff doivent etre a previous k iteration...
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
@@ -990,18 +1167,9 @@ subroutine matvec (nx_block,   ny_block, &
          utp = uvel(i,j)
          vtp = vvel(i,j)
 
-         ! (magnitude of relative ocean current)*rhow*drag*aice
-         vrel = aiu(i,j)*rhow*Cw(i,j)*sqrt((uocn(i,j) - utp)**2 + &
-                                           (vocn(i,j) - vtp)**2)  ! m/s
-         ! ice/ocean stress
-         taux = vrel*waterx(i,j) ! NOTE this is not the entire
-         tauy = vrel*watery(i,j) ! ocn stress term
-      
-         Cb  = Tbu(i,j) / (sqrt(utp**2 + vtp**2) + u0) ! for basal stress
-         ! revp = 0 for classic evp, 1 for revised evp
-         ccaimp = umassdti(i,j) + vrel * cosw + Cb ! kg/m^2 s
+         ccaimp = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) ! kg/m^2 s
                
-         ccb = fm(i,j) + sign(c1,fm(i,j)) * vrel * sinw ! kg/m^2 s
+         ccb = fm(i,j) + sign(c1,fm(i,j)) * vrel(i,j) * sinw ! kg/m^2 s
 
          ! divergence of the internal stress tensor
          strintx(i,j) = uarear(i,j)* &
@@ -1016,8 +1184,8 @@ subroutine matvec (nx_block,   ny_block, &
       ! ocean-ice stress for coupling
       ! here, strocn includes the factor of aice
       !-----------------------------------------------------------------
-         strocnx(i,j) = taux ! jfl could be moved
-         strocny(i,j) = tauy
+!         strocnx(i,j) = taux ! jfl could be moved
+!         strocny(i,j) = tauy
          
       ! calculate basal stress component for outputs ! jfl move this
 !         if (ksub == ndte) then ! on last subcycling iteration
@@ -1077,7 +1245,7 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
       do ij = 1, icellu
          i = indxui(ij)
          j = indxuj(ij)
-         
+
          bxfix(i,j) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
          byfix(i,j) = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
          
@@ -1230,6 +1398,8 @@ subroutine residual_vec (nx_block,   ny_block, &
 
       end subroutine residual_vec
       
+!      JFL ROUTINE POUR CALC STRESS OCN POUR COUPLAGE
+      
 !=======================================================================
 
       end module ice_dyn_vp

From cbd7b8db14cfbb67a61344833080ba9b49ba5a26 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 30 May 2018 19:23:07 +0000
Subject: [PATCH 004/196] after debugging...now compiles

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 0ec004acf..ed9459716 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -420,10 +420,11 @@ subroutine imp_solver (dt)
                                vrel     (:,:,iblk), Cb      (:,:,iblk))                                                              
                                
             call matvec (nx_block           , ny_block,           &
-                         icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                         icellu       (iblk),                     & 
                          indxui     (:,iblk), indxuj    (:,iblk), &
                          kOL                ,                     &
-                         aiu      (:,:,iblk), strtmp  (:,:,:),    & 
+                         aiu      (:,:,iblk), strtmp  (:,:,:),    &
+                         vrel     (:,:,iblk),                     &
                          umassdti (:,:,iblk), fm      (:,:,iblk), & 
                          uarear   (:,:,iblk), Cb      (:,:,iblk), & 
                          strintx  (:,:,iblk), strinty (:,:,iblk), &
@@ -635,7 +636,7 @@ subroutine viscous_coeff (nx_block,   ny_block,   &
         divune, divunw, divuse, divusw            , & ! divergence
         tensionne, tensionnw, tensionse, tensionsw, & ! tension
         shearne, shearnw, shearse, shearsw        , & ! shearing
-        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+        Deltane, Deltanw, Deltase, Deltasw            ! Delt
 
 !DIR$ CONCURRENT !Cray
 !cdir nodep      !NEC
@@ -1093,7 +1094,7 @@ end subroutine calc_vrel_Cb
 !=======================================================================
 
       subroutine matvec (nx_block,   ny_block, &
-                         icellu,     Cw,       &
+                         icellu,               &
                          indxui,     indxuj,   &
                          kOL,                  &
                          aiu,        str,      &
@@ -1159,7 +1160,7 @@ subroutine matvec (nx_block,   ny_block, &
       call icepack_warnings_flush(nu_diag)
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
-ATTENTION vrel et viscous coeff doivent etre a previous k iteration...
+
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)

From 1195bc0e0187b3a293f30448af913fea8b6b8ab5 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 30 May 2018 20:30:23 +0000
Subject: [PATCH 005/196] in process of adding precondD

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 349 +++++++++++++++++++++-
 1 file changed, 348 insertions(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ed9459716..14aa9e684 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -454,7 +454,29 @@ subroutine imp_solver (dt)
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                bx       (:,:,iblk), by      (:,:,iblk), &
                                Au       (:,:,iblk), Av      (:,:,iblk), &
-                               Fx       (:,:,iblk), Fy      (:,:,iblk))           
+                               Fx       (:,:,iblk), Fy      (:,:,iblk))
+                               
+            call precondD  (nx_block,             ny_block,             & 
+                            kOL,                  icellt(iblk),         & 
+                            indxti      (:,iblk), indxtj      (:,iblk), & 
+                            uvel      (:,:,iblk), vvel      (:,:,iblk), &     
+                            dxt       (:,:,iblk), dyt       (:,:,iblk), & 
+                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
+                            cxp       (:,:,iblk), cyp       (:,:,iblk), & 
+                            cxm       (:,:,iblk), cym       (:,:,iblk), & 
+                            tarear    (:,:,iblk), tinyarea  (:,:,iblk), &
+                            vrel     (:,:,iblk) , Cb        (:,:,iblk), &
+                            umassdti (:,:,iblk),                        &
+                            zetaD     (:,:,iblk,:),strength (:,:,iblk), & 
+                            stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
+                            stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
+                            stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
+                            stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & 
+                            stress12_1(:,:,iblk), stress12_2(:,:,iblk), & 
+                            stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
+                            shear     (:,:,iblk), divu      (:,:,iblk), & 
+                            rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
+                            strtmp    (:,:,:))                               
             
             ! load velocity into array for boundary updates
             fld2(:,:,1,iblk) = uvel(:,:,iblk)
@@ -1399,6 +1421,331 @@ subroutine residual_vec (nx_block,   ny_block, &
 
       end subroutine residual_vec
       
+!=======================================================================
+
+      subroutine precondD  (nx_block,   ny_block,   & 
+                            kOL,        icellt,     & 
+                            indxti,     indxtj,     & 
+                            uvel,       vvel,       & 
+                            dxt,        dyt,        & 
+                            dxhy,       dyhx,       & 
+                            cxp,        cyp,        & 
+                            cxm,        cym,        & 
+                            tarear,     tinyarea,   &
+                            vrel,       Cb,         &
+                            umassdti,               &
+                            zetaD,      strength,   & 
+                            stressp_1,  stressp_2,  & 
+                            stressp_3,  stressp_4,  & 
+                            stressm_1,  stressm_2,  & 
+                            stressm_3,  stressm_4,  & 
+                            stress12_1, stress12_2, & 
+                            stress12_3, stress12_4, & 
+                            shear,      divu,       & 
+                            rdg_conv,   rdg_shear,  & 
+                            str )
+
+      integer (kind=int_kind), intent(in) :: & 
+         nx_block, ny_block, & ! block dimensions
+         kOL               , & ! subcycling step
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         strength , & ! ice strength (N/m)
+         uvel     , & ! x-component of velocity (m/s)
+         vvel     , & ! y-component of velocity (m/s)
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm      , & ! 0.5*HTN - 1.5*HTN
+         tarear   , & ! 1/tarea
+         tinyarea     ! puny*tarea
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &         
+         vrel,     & ! coefficient for tauw
+         Cb,       & ! coefficient for basal stress
+         umassdti    ! mass of U-cell/dt (kg/m^2 s)
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(in) :: &
+         zetaD          ! 2*zeta   
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+         intent(inout) :: &
+         stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22
+         stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
+         stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+         intent(inout) :: &
+         shear    , & ! strain rate II component (1/s)
+         divu     , & ! strain rate I component, velocity divergence (1/s)
+         rdg_conv , & ! convergence term for ridging (1/s)
+         rdg_shear    ! shear term for ridging (1/s)
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+         intent(out) :: &
+         str          ! stress combinations
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+        puny                                      , & ! puny
+        ssigpn, ssigps, ssigpe, ssigpw            , &
+        ssigmn, ssigms, ssigme, ssigmw            , &
+        ssig12n, ssig12s, ssig12e, ssig12w        , &
+        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
+        csigpne, csigpnw, csigpse, csigpsw        , &
+        csigmne, csigmnw, csigmse, csigmsw        , &
+        csig12ne, csig12nw, csig12se, csig12sw    , &
+        str12ew, str12we, str12ns, str12sn        , &
+        strp_tmp, strm_tmp, tmp
+
+      !-----------------------------------------------------------------
+      ! Initialize
+      !-----------------------------------------------------------------
+
+      str(:,:,:) = c0
+
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+
+      !-----------------------------------------------------------------
+      ! JFL watchout currently on LHS
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divuneDu  = cyp(i,j) ! D=diagonal
+         divuneDv  = cxp(i,j)
+         divunwDu  = dyt(i,j)
+         divuseDv  = dxt(i,j)
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionneDu = -cym(i,j)
+         tensionneDv = cxm(i,j)
+         tensionnwDu = dyt(i,j)
+         tensionseDv = -dxt(i,j)
+
+         ! shearing strain rate  =  e_12
+         shearneDu = -cxm(i,j)
+         shearneDv = -cym(i,j)      
+         shearnwDv = dyt(i,j)
+         shearseDu = dxt(i,j)
+
+
+      !-----------------------------------------------------------------
+      ! the stresses                            ! kg/s^2
+      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+      !-----------------------------------------------------------------
+
+! IMProve: delete stress coeff not needed instead of setting them to 0.
+!          no need for divuneDu...just plug them directly in eqs below. 
+      
+         stressp_1u = zetaD(i,j,1)*divuneDu*(c1+Ktens)
+         stressp_1v = zetaD(i,j,1)*divuneDv*(c1+Ktens)
+         stressp_2u = zetaD(i,j,2)*divunwDu*(c1+Ktens)
+         stressp_2v = c0
+         stressp_3u = c0
+         stressp_3v = c0
+         stressp_4u = c0
+         stressp_4v = zetaD(i,j,4)*divuseDv*(c1+Ktens)
+         
+         stressm_1u = zetaD(i,j,1)*tensionneDu*(c1+Ktens)*ecci
+         stressm_1v = zetaD(i,j,1)*tensionneDv*(c1+Ktens)*ecci
+         stressm_2u = zetaD(i,j,2)*tensionnwDu*(c1+Ktens)*ecci
+         stressm_2v = c0
+         stressm_3u = c0
+         stressm_3v = c0
+         stressm_4u = c0
+         stressm_4v = zetaD(i,j,4)*tensionseDv*(c1+Ktens)*ecci
+         
+         stress12_1u = zetaD(i,j,1)*shearneDu*p5*(c1+Ktens)*ecci
+         stress12_1v = zetaD(i,j,1)*shearneDv*p5*(c1+Ktens)*ecci
+         stress12_2u = c0
+         stress12_2v = zetaD(i,j,2)*shearnwDv*p5*(c1+Ktens)*ecci
+         stress12_3u = c0
+         stress12_3v = c0
+         stress12_4u = zetaD(i,j,4)*shearseDu*p5*(c1+Ktens)*ecci
+         stress12_4v = c0
+
+      !-----------------------------------------------------------------
+      ! Eliminate underflows.
+      ! The following code is commented out because it is relatively 
+      ! expensive and most compilers include a flag that accomplishes
+      ! the same thing more efficiently.  This code is cheaper than
+      ! handling underflows if the compiler lacks a flag; uncomment
+      ! it in that case.  The compiler flag is often described with the 
+      ! phrase "flush to zero".
+      !-----------------------------------------------------------------
+
+!      call icepack_query_parameters(puny_out=puny)
+!      call icepack_warnings_flush(nu_diag)
+!      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+!         file=__FILE__, line=__LINE__)
+
+!      stressp_1(i,j) = sign(max(abs(stressp_1(i,j)),puny),stressp_1(i,j))
+!      stressp_2(i,j) = sign(max(abs(stressp_2(i,j)),puny),stressp_2(i,j))
+!      stressp_3(i,j) = sign(max(abs(stressp_3(i,j)),puny),stressp_3(i,j))
+!      stressp_4(i,j) = sign(max(abs(stressp_4(i,j)),puny),stressp_4(i,j))
+
+!      stressm_1(i,j) = sign(max(abs(stressm_1(i,j)),puny),stressm_1(i,j))
+!      stressm_2(i,j) = sign(max(abs(stressm_2(i,j)),puny),stressm_2(i,j))
+!      stressm_3(i,j) = sign(max(abs(stressm_3(i,j)),puny),stressm_3(i,j))
+!      stressm_4(i,j) = sign(max(abs(stressm_4(i,j)),puny),stressm_4(i,j))
+
+!      stress12_1(i,j) = sign(max(abs(stress12_1(i,j)),puny),stress12_1(i,j))
+!      stress12_2(i,j) = sign(max(abs(stress12_2(i,j)),puny),stress12_2(i,j))
+!      stress12_3(i,j) = sign(max(abs(stress12_3(i,j)),puny),stress12_3(i,j))
+!      stress12_4(i,j) = sign(max(abs(stress12_4(i,j)),puny),stress12_4(i,j))
+
+      !-----------------------------------------------------------------
+      ! combinations of the stresses for the momentum equation ! kg/s^2
+      !-----------------------------------------------------------------
+
+         ssigpnu  = stressp_1u + stressp_2u
+         ssigpnv  = stressp_1v + stressp_2v
+         ssigpsu  = stressp_3u + stressp_4u
+         ssigpsv  = stressp_3v + stressp_4v
+         ssigpeu  = stressp_1u + stressp_4v
+         ssigpev  = stressp_1v + stressp_4v
+         ssigpwu  = stressp_2u + stressp_3u
+         ssigpwv  = stressp_2v + stressp_3v
+         ssigp1u  =(stressp_1u + stressp_3u)*p055
+         ssigp1v  =(stressp_1v + stressp_3v)*p055
+         ssigp2u  =(stressp_2u + stressp_4u)*p055
+         ssigp2v  =(stressp_2v + stressp_4v)*p055
+
+         ssigmnu  = stressm_1u + stressm_2u
+         ssigmnv  = stressm_1v + stressm_2v
+         ssigmsu  = stressm_3u + stressm_4u
+         ssigmsv  = stressm_3v + stressm_4v
+         ssigmeu  = stressm_1u + stressm_4u
+         ssigmev  = stressm_1v + stressm_4v
+         ssigmwu  = stressm_2u + stressm_3u
+         ssigmwv  = stressm_2v + stressm_3v
+         ssigm1u  =(stressm_1u + stressm_3u)*p055
+         ssigm1v  =(stressm_1v + stressm_3v)*p055
+         ssigm2u  =(stressm_2u + stressm_4u)*p055
+         ssigm2v  =(stressm_2v + stressm_4v)*p055
+
+         ssig12nu = stress12_1u + stress12_2u
+         ssig12nv = stress12_1v + stress12_2v
+         ssig12su = stress12_3u + stress12_4u
+         ssig12sv = stress12_3v + stress12_4v
+         ssig12eu = stress12_1u + stress12_4u
+         ssig12ev = stress12_1v + stress12_4v
+         ssig12wu = stress12_2u + stress12_3u
+         ssig12wv = stress12_2v + stress12_3v
+         ssig121u =(stress12_1u + stress12_3u)*p111
+         ssig121v =(stress12_1v + stress12_3v)*p111
+         ssig122u =(stress12_2u + stress12_4u)*p111
+         ssig122v =(stress12_2v + stress12_4v)*p111
+
+         csigpneu = p111*stressp_1u + ssigp2u + p027*stressp_3u
+         csigpnev = p111*stressp_1v + ssigp2v + p027*stressp_3v
+         csigpnwu = p111*stressp_2u + ssigp1u + p027*stressp_4u
+         csigpnwv = p111*stressp_2v + ssigp1v + p027*stressp_4v
+         csigpswu = p111*stressp_3u + ssigp2u + p027*stressp_1u
+         csigpswv = p111*stressp_3v + ssigp2v + p027*stressp_1v
+         csigpseu = p111*stressp_4u + ssigp1u + p027*stressp_2u
+         csigpsev = p111*stressp_4v + ssigp1v + p027*stressp_2v
+         
+         csigmneu = p111*stressm_1u + ssigm2u + p027*stressm_3u
+         csigmnev = p111*stressm_1v + ssigm2v + p027*stressm_3v
+         csigmnwu = p111*stressm_2u + ssigm1u + p027*stressm_4u
+         csigmnwv = p111*stressm_2v + ssigm1v + p027*stressm_4v
+         csigmswu = p111*stressm_3u + ssigm2u + p027*stressm_1u
+         csigmswv = p111*stressm_3v + ssigm2v + p027*stressm_1v
+         csigmseu = p111*stressm_4u + ssigm1u + p027*stressm_2u
+         csigmsev = p111*stressm_4v + ssigm1v + p027*stressm_2v
+         
+         csig12ne = p222*stress12_1(i,j) + ssig122 &
+                  + p055*stress12_3(i,j)
+         csig12nw = p222*stress12_2(i,j) + ssig121 &
+                  + p055*stress12_4(i,j)
+         csig12sw = p222*stress12_3(i,j) + ssig122 &
+                  + p055*stress12_1(i,j)
+         csig12se = p222*stress12_4(i,j) + ssig121 &
+                  + p055*stress12_2(i,j)
+
+         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
+         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
+         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
+         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
+
+      !-----------------------------------------------------------------
+      ! for dF/dx (u momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+
+         ! northeast (i,j)
+         str(i,j,1) = -strp_tmp - strm_tmp - str12ew &
+              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
+
+         ! northwest (i+1,j)
+         str(i,j,2) = strp_tmp + strm_tmp - str12we &
+              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
+
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
+
+         ! southeast (i,j+1)
+         str(i,j,3) = -strp_tmp - strm_tmp + str12ew &
+              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
+
+         ! southwest (i+1,j+1)
+         str(i,j,4) = strp_tmp + strm_tmp + str12we &
+              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
+
+      !-----------------------------------------------------------------
+      ! for dF/dy (v momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
+
+         ! northeast (i,j)
+         str(i,j,5) = -strp_tmp + strm_tmp - str12ns &
+              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
+
+         ! southeast (i,j+1)
+         str(i,j,6) = strp_tmp - strm_tmp - str12sn &
+              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
+
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
+
+         ! northwest (i+1,j)
+         str(i,j,7) = -strp_tmp + strm_tmp + str12ns &
+              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
+
+         ! southwest (i+1,j+1)
+         str(i,j,8) = strp_tmp - strm_tmp + str12sn &
+              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
+
+      enddo                     ! ij
+
+      end subroutine precondD
+      
 !      JFL ROUTINE POUR CALC STRESS OCN POUR COUPLAGE
       
 !=======================================================================

From a7074df16acf0884a320e73b4c890194447c7ec2 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 4 Jun 2018 15:23:58 +0000
Subject: [PATCH 006/196] precondD is in progress

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 92 ++++++++++++++---------
 1 file changed, 55 insertions(+), 37 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 14aa9e684..636b453fe 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1678,70 +1678,88 @@ subroutine precondD  (nx_block,   ny_block,   &
          csigmseu = p111*stressm_4u + ssigm1u + p027*stressm_2u
          csigmsev = p111*stressm_4v + ssigm1v + p027*stressm_2v
          
-         csig12ne = p222*stress12_1(i,j) + ssig122 &
-                  + p055*stress12_3(i,j)
-         csig12nw = p222*stress12_2(i,j) + ssig121 &
-                  + p055*stress12_4(i,j)
-         csig12sw = p222*stress12_3(i,j) + ssig122 &
-                  + p055*stress12_1(i,j)
-         csig12se = p222*stress12_4(i,j) + ssig121 &
-                  + p055*stress12_2(i,j)
-
-         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
-         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
-         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
-         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
+         csig12neu = p222*stress12_1u + ssig122u &
+                  + p055*stress12_3u
+         csig12nev = p222*stress12_1v + ssig122v &
+                  + p055*stress12_3v             
+         csig12nwu = p222*stress12_2u + ssig121u &
+                  + p055*stress12_4u
+         csig12nwv = p222*stress12_2v + ssig121v &
+                  + p055*stress12_4v
+         csig12swu = p222*stress12_3u + ssig122u &
+                  + p055*stress12_1u
+         csig12swv = p222*stress12_3v + ssig122v &
+                  + p055*stress12_1v
+         csig12seu = p222*stress12_4u + ssig121u &
+                  + p055*stress12_2u
+         csig12sev = p222*stress12_4v + ssig121v &
+                  + p055*stress12_2v         
+
+         str12ewu = p5*dxt(i,j)*(p333*ssig12eu + p166*ssig12wu)
+         str12ewv = p5*dxt(i,j)*(p333*ssig12ev + p166*ssig12wv)
+         str12weu = p5*dxt(i,j)*(p333*ssig12wu + p166*ssig12eu)
+         str12wev = p5*dxt(i,j)*(p333*ssig12wv + p166*ssig12ev)
+         str12nsu = p5*dyt(i,j)*(p333*ssig12nu + p166*ssig12su)
+         str12nsv = p5*dyt(i,j)*(p333*ssig12nv + p166*ssig12sv)
+         str12snu = p5*dyt(i,j)*(p333*ssig12su + p166*ssig12nu)
+         str12snv = p5*dyt(i,j)*(p333*ssig12sv + p166*ssig12nv)
 
       !-----------------------------------------------------------------
       ! for dF/dx (u momentum)
       !-----------------------------------------------------------------
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+         strp_tmpu  = p25*dyt(i,j)*(p333*ssigpnu  + p166*ssigpsu)
+         strm_tmpu  = p25*dyt(i,j)*(p333*ssigmnu  + p166*ssigmsu)
 
          ! northeast (i,j)
-         str(i,j,1) = -strp_tmp - strm_tmp - str12ew &
-              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
+         str1 = -strp_tmpu - strm_tmpu - str12ewu &
+              + dxhy(i,j)*(-csigpneu + csigmneu) + dyhx(i,j)*csig12neu
 
          ! northwest (i+1,j)
-         str(i,j,2) = strp_tmp + strm_tmp - str12we &
-              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
+         str2 = strp_tmpu + strm_tmpu - str12weu &
+              + dxhy(i,j)*(-csigpnwu + csigmnwu) + dyhx(i,j)*csig12nwu
 
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
+         strp_tmpu  = p25*dyt(i,j)*(p333*ssigpsu  + p166*ssigpnu)
+         strm_tmpu  = p25*dyt(i,j)*(p333*ssigmsu  + p166*ssigmnu)
 
          ! southeast (i,j+1)
-         str(i,j,3) = -strp_tmp - strm_tmp + str12ew &
-              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
+         str3 = -strp_tmpu - strm_tmpu + str12ewu &
+              + dxhy(i,j)*(-csigpseu + csigmseu) + dyhx(i,j)*csig12seu
 
          ! southwest (i+1,j+1)
-         str(i,j,4) = strp_tmp + strm_tmp + str12we &
-              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
+         str4 = strp_tmpu + strm_tmpu + str12weu &
+              + dxhy(i,j)*(-csigpswu + csigmswu) + dyhx(i,j)*csig12swu
 
+         Du(i,j) = -uarear(i,j)*(str1 + str2 + str3 + str4) ! -sign to bring it on LHS
+         
+         ! MANQUE LE TERME CCAimp....
+              
       !-----------------------------------------------------------------
       ! for dF/dy (v momentum)
       !-----------------------------------------------------------------
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
+         strp_tmpv  = p25*dxt(i,j)*(p333*ssigpev  + p166*ssigpwv)
+         strm_tmpv  = p25*dxt(i,j)*(p333*ssigmev  + p166*ssigmwv)
 
          ! northeast (i,j)
-         str(i,j,5) = -strp_tmp + strm_tmp - str12ns &
-              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
+         str5 = -strp_tmpv + strm_tmpv - str12nsv &
+              - dyhx(i,j)*(csigpnev + csigmnev) + dxhy(i,j)*csig12nev
 
          ! southeast (i,j+1)
-         str(i,j,6) = strp_tmp - strm_tmp - str12sn &
-              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
+         str6 = strp_tmpv - strm_tmpv - str12snv &
+              - dyhx(i,j)*(csigpsev + csigmsev) + dxhy(i,j)*csig12sev
 
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
+         strp_tmpv  = p25*dxt(i,j)*(p333*ssigpwv  + p166*ssigpev)
+         strm_tmpv  = p25*dxt(i,j)*(p333*ssigmwv  + p166*ssigmev)
 
          ! northwest (i+1,j)
-         str(i,j,7) = -strp_tmp + strm_tmp + str12ns &
-              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
+         str7 = -strp_tmpv + strm_tmpv + str12nsv &
+              - dyhx(i,j)*(csigpnwv + csigmnwv) + dxhy(i,j)*csig12nwv
 
          ! southwest (i+1,j+1)
-         str(i,j,8) = strp_tmp - strm_tmp + str12sn &
-              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
+         str8 = strp_tmpv - strm_tmpv + str12snv &
+              - dyhx(i,j)*(csigpswv + csigmswv) + dxhy(i,j)*csig12swv
 
+         Dv(i,j) = -uarear(i,j)*(str5 + str6 + str7 + str8) ! -sign to bring it on LHS              
+              
       enddo                     ! ij
 
       end subroutine precondD

From 93639139b93473118de0decffd12e4a4229d351a Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 4 Jun 2018 16:18:28 +0000
Subject: [PATCH 007/196] I finished coding precondD...it compiles

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 118 +++++++++-------------
 1 file changed, 50 insertions(+), 68 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 636b453fe..2c114f582 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -132,6 +132,8 @@ subroutine imp_solver (dt)
          Av       , & ! matvec, Fy = Av - by ! jfl
          Fx       , & ! x residual vector, Fx = Au - bx ! jfl
          Fy       , & ! y residual vector, Fy = Av - by ! jfl
+         Diagu    , & ! diagonal matrix coeff for u component
+         Diagv    , & ! diagonal matrix coeff for v component
          vrel     , & ! coeff for tauw ! jfl
          Cb       , & ! seabed stress coeff ! jfl
          aiu      , & ! ice fraction on u-grid
@@ -459,25 +461,15 @@ subroutine imp_solver (dt)
             call precondD  (nx_block,             ny_block,             & 
                             kOL,                  icellt(iblk),         & 
                             indxti      (:,iblk), indxtj      (:,iblk), & 
-                            uvel      (:,:,iblk), vvel      (:,:,iblk), &     
                             dxt       (:,:,iblk), dyt       (:,:,iblk), & 
                             dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
                             cxp       (:,:,iblk), cyp       (:,:,iblk), & 
                             cxm       (:,:,iblk), cym       (:,:,iblk), & 
-                            tarear    (:,:,iblk), tinyarea  (:,:,iblk), &
+                            uarear    (:,:,iblk),                       &
                             vrel     (:,:,iblk) , Cb        (:,:,iblk), &
-                            umassdti (:,:,iblk),                        &
-                            zetaD     (:,:,iblk,:),strength (:,:,iblk), & 
-                            stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
-                            stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
-                            stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
-                            stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & 
-                            stress12_1(:,:,iblk), stress12_2(:,:,iblk), & 
-                            stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
-                            shear     (:,:,iblk), divu      (:,:,iblk), & 
-                            rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
-                            strtmp    (:,:,:))                               
-            
+                            umassdti (:,:,iblk) , zetaD   (:,:,iblk,:), &
+                            Diagu     (:,:,iblk), diagv   (:,:,iblk)) 
+                            
             ! load velocity into array for boundary updates
             fld2(:,:,1,iblk) = uvel(:,:,iblk)
             fld2(:,:,2,iblk) = vvel(:,:,iblk)            
@@ -1426,24 +1418,14 @@ end subroutine residual_vec
       subroutine precondD  (nx_block,   ny_block,   & 
                             kOL,        icellt,     & 
                             indxti,     indxtj,     & 
-                            uvel,       vvel,       & 
                             dxt,        dyt,        & 
                             dxhy,       dyhx,       & 
                             cxp,        cyp,        & 
                             cxm,        cym,        & 
-                            tarear,     tinyarea,   &
+                            uarear,                 &
                             vrel,       Cb,         &
-                            umassdti,               &
-                            zetaD,      strength,   & 
-                            stressp_1,  stressp_2,  & 
-                            stressp_3,  stressp_4,  & 
-                            stressm_1,  stressm_2,  & 
-                            stressm_3,  stressm_4,  & 
-                            stress12_1, stress12_2, & 
-                            stress12_3, stress12_4, & 
-                            shear,      divu,       & 
-                            rdg_conv,   rdg_shear,  & 
-                            str )
+                            umassdti,   zetaD,      &
+                            Diagu, Diagv)
 
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
@@ -1456,9 +1438,6 @@ subroutine precondD  (nx_block,   ny_block,   &
          indxtj       ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         strength , & ! ice strength (N/m)
-         uvel     , & ! x-component of velocity (m/s)
-         vvel     , & ! y-component of velocity (m/s)
          dxt      , & ! width of T-cell through the middle (m)
          dyt      , & ! height of T-cell through the middle (m)
          dxhy     , & ! 0.5*(HTE - HTE)
@@ -1467,8 +1446,7 @@ subroutine precondD  (nx_block,   ny_block,   &
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm      , & ! 0.5*HTN - 1.5*HTN
-         tarear   , & ! 1/tarea
-         tinyarea     ! puny*tarea
+         uarear      ! 1/uarea
          
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &         
          vrel,     & ! coefficient for tauw
@@ -1478,23 +1456,10 @@ subroutine precondD  (nx_block,   ny_block,   &
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
          intent(in) :: &
          zetaD          ! 2*zeta   
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
-         intent(inout) :: &
-         stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22
-         stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
-         stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
-         intent(inout) :: &
-         shear    , & ! strain rate II component (1/s)
-         divu     , & ! strain rate I component, velocity divergence (1/s)
-         rdg_conv , & ! convergence term for ridging (1/s)
-         rdg_shear    ! shear term for ridging (1/s)
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
-         intent(out) :: &
-         str          ! stress combinations
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(inout) :: &         
+         Diagu,     & ! diagonal matrix coefficients for u component
+         Diagv        ! diagonal matrix coefficients for v component
 
       ! local variables
 
@@ -1502,26 +1467,43 @@ subroutine precondD  (nx_block,   ny_block,   &
          i, j, ij
 
       real (kind=dbl_kind) :: &
-        divune, divunw, divuse, divusw            , & ! divergence
-        tensionne, tensionnw, tensionse, tensionsw, & ! tension
-        shearne, shearnw, shearse, shearsw        , & ! shearing
-        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
-        puny                                      , & ! puny
-        ssigpn, ssigps, ssigpe, ssigpw            , &
-        ssigmn, ssigms, ssigme, ssigmw            , &
-        ssig12n, ssig12s, ssig12e, ssig12w        , &
-        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
-        csigpne, csigpnw, csigpse, csigpsw        , &
-        csigmne, csigmnw, csigmse, csigmsw        , &
-        csig12ne, csig12nw, csig12se, csig12sw    , &
-        str12ew, str12we, str12ns, str12sn        , &
-        strp_tmp, strm_tmp, tmp
+        divuneDu, divunwDu, divuseDu, divuswDu            , & ! divergence
+        divuneDv, divunwDv, divuseDv, divuswDv            , & ! divergence
+        tensionneDu, tensionnwDu, tensionseDu, tensionswDu, & ! tension
+        tensionneDv, tensionnwDv, tensionseDv, tensionswDv, & ! tension
+        shearneDu, shearnwDu, shearseDu, shearswDu        , & ! shearing
+        shearneDv, shearnwDv, shearseDv, shearswDv        , & ! shearing
+        stressp_1u, stressp_2u, stressp_3u, stressp_4u    , & 
+        stressp_1v, stressp_2v, stressp_3v, stressp_4v    , & 
+        stressm_1u, stressm_2u, stressm_3u, stressm_4u    , & 
+        stressm_1v, stressm_2v, stressm_3v, stressm_4v    , & 
+        stress12_1u, stress12_2u, stress12_3u, stress12_4u, & 
+        stress12_1v, stress12_2v, stress12_3v, stress12_4v, & 
+        ssigpnu, ssigpsu, ssigpeu, ssigpwu                , &
+        ssigpnv, ssigpsv, ssigpev, ssigpwv                , &
+        ssigmnu, ssigmsu, ssigmeu, ssigmwu                , &
+        ssigmnv, ssigmsv, ssigmev, ssigmwv                , &
+        ssig12nu, ssig12su, ssig12eu, ssig12wu            , &
+        ssig12nv, ssig12sv, ssig12ev, ssig12wv            , &
+        ssigp1u, ssigp2u, ssigm1u, ssigm2u, ssig121u, ssig122u, &
+        ssigp1v, ssigp2v, ssigm1v, ssigm2v, ssig121v, ssig122v, &
+        csigpneu, csigpnwu, csigpseu, csigpswu            , &
+        csigpnev, csigpnwv, csigpsev, csigpswv            , &
+        csigmneu, csigmnwu, csigmseu, csigmswu            , &
+        csigmnev, csigmnwv, csigmsev, csigmswv            , &
+        csig12neu, csig12nwu, csig12seu, csig12swu        , &
+        csig12nev, csig12nwv, csig12sev, csig12swv        , &
+        str12ewu, str12weu, str12nsu, str12snu            , &
+        str12ewv, str12wev, str12nsv, str12snv            , &
+        strp_tmpu, strm_tmpu, strp_tmpv, strm_tmpv        , &
+        str1, str2, str3, str4, str5, str6, str7, str8  
 
       !-----------------------------------------------------------------
       ! Initialize
       !-----------------------------------------------------------------
 
-      str(:,:,:) = c0
+      Diagu(:,:) = c0
+      Diagv(:,:) = c0
 
 !DIR$ CONCURRENT !Cray
 !cdir nodep      !NEC
@@ -1729,10 +1711,9 @@ subroutine precondD  (nx_block,   ny_block,   &
          str4 = strp_tmpu + strm_tmpu + str12weu &
               + dxhy(i,j)*(-csigpswu + csigmswu) + dyhx(i,j)*csig12swu
 
-         Du(i,j) = -uarear(i,j)*(str1 + str2 + str3 + str4) ! -sign to bring it on LHS
+         Diagu(i,j) = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) & 
+                  -uarear(i,j)*(str1 + str2 + str3 + str4) ! -sign to bring it on LHS
          
-         ! MANQUE LE TERME CCAimp....
-              
       !-----------------------------------------------------------------
       ! for dF/dy (v momentum)
       !-----------------------------------------------------------------
@@ -1758,7 +1739,8 @@ subroutine precondD  (nx_block,   ny_block,   &
          str8 = strp_tmpv - strm_tmpv + str12snv &
               - dyhx(i,j)*(csigpswv + csigmswv) + dxhy(i,j)*csig12swv
 
-         Dv(i,j) = -uarear(i,j)*(str5 + str6 + str7 + str8) ! -sign to bring it on LHS              
+         Diagv(i,j) = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) &
+                  -uarear(i,j)*(str5 + str6 + str7 + str8) ! -sign to bring it on LHS              
               
       enddo                     ! ij
 

From ae15ace5f0f193fab79a29bd834f912d820e627a Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 4 Jun 2018 19:51:09 +0000
Subject: [PATCH 008/196] added calculation for L2norm

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 2c114f582..ca8aaeb66 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -134,6 +134,8 @@ subroutine imp_solver (dt)
          Fy       , & ! y residual vector, Fy = Av - by ! jfl
          Diagu    , & ! diagonal matrix coeff for u component
          Diagv    , & ! diagonal matrix coeff for v component
+         uprev_k  , & ! uvel at previous Picard iteration
+         vprev_k  , & ! vvel at previous Picard iteration
          vrel     , & ! coeff for tauw ! jfl
          Cb       , & ! seabed stress coeff ! jfl
          aiu      , & ! ice fraction on u-grid
@@ -141,6 +143,8 @@ subroutine imp_solver (dt)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
 
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
+      
+      real (kind=dbl_kind), dimension (max_blocks) :: L2norm
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp       ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
@@ -375,6 +379,9 @@ subroutine imp_solver (dt)
          !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks
 
+            uprev_k(:,:,iblk) = uvel(:,:,iblk)
+            vprev_k(:,:,iblk) = vvel(:,:,iblk)
+         
 ! CALC Au and Av (MATVEC)
                                 
             call viscous_coeff (nx_block           ,  ny_block,           &
@@ -408,10 +415,6 @@ subroutine imp_solver (dt)
                             rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
                             strtmp    (:,:,:))
 
-      !-----------------------------------------------------------------
-      ! momentum equation
-      !-----------------------------------------------------------------
-
             call calc_vrel_Cb (nx_block           , ny_block,           &
                                icellu       (iblk), Cdn_ocn (:,:,iblk), & 
                                indxui     (:,iblk), indxuj    (:,iblk), &
@@ -456,7 +459,8 @@ subroutine imp_solver (dt)
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                bx       (:,:,iblk), by      (:,:,iblk), &
                                Au       (:,:,iblk), Av      (:,:,iblk), &
-                               Fx       (:,:,iblk), Fy      (:,:,iblk))
+                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
+                               L2norm(iblk))
                                
             call precondD  (nx_block,             ny_block,             & 
                             kOL,                  icellt(iblk),         & 
@@ -1367,7 +1371,8 @@ subroutine residual_vec (nx_block,   ny_block, &
                                indxui,     indxuj,   &
                                bx,         by,       &
                                Au,         Av,       &
-                               Fx,         Fy)
+                               Fx,         Fy,       &
+                               L2normtp)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1388,6 +1393,9 @@ subroutine residual_vec (nx_block,   ny_block, &
          intent(inout) :: &
          Fx      , & ! x residual vector, Fx = Au - bx (N/m^2)
          Fy          ! y residual vector, Fy = Av - by (N/m^2)
+         
+      real (kind=dbl_kind), intent(inout) :: &
+         L2normtp    ! (L2norm)^2
 
       ! local variables
 
@@ -1410,6 +1418,8 @@ subroutine residual_vec (nx_block,   ny_block, &
          Fy(i,j) = Av(i,j) - by(i,j)
          
       enddo                     ! ij
+      
+      L2normtp = DOT_PRODUCT(Fx,Fx)+DOT_PRODUCT(Fy,Fy)
 
       end subroutine residual_vec
       

From 35babddb5c9d420eecb8d0cedfc66cadd6504560 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 4 Jun 2018 20:15:00 +0000
Subject: [PATCH 009/196] modif to ice_step_mod.F90 for call to imp_solver

---
 cicecore/cicedynB/general/ice_step_mod.F90 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/general/ice_step_mod.F90 b/cicecore/cicedynB/general/ice_step_mod.F90
index 7a2493d58..c1c85fdae 100644
--- a/cicecore/cicedynB/general/ice_step_mod.F90
+++ b/cicecore/cicedynB/general/ice_step_mod.F90
@@ -862,11 +862,12 @@ subroutine step_dyn_horiz (dt)
       call init_history_dyn     ! initialize dynamic history variables
 
       !-----------------------------------------------------------------
-      ! Elastic-viscous-plastic ice dynamics
+      ! Ice dynamics (momentum equation)
       !-----------------------------------------------------------------
 
       if (kdyn == 1) call evp (dt)
       if (kdyn == 2) call eap (dt)
+      if (kdyn == 3) call imp_solver (dt)
 
       !-----------------------------------------------------------------
       ! Horizontal ice transport

From b793f7f8e63aebc202a695571456d1e1d8316b44 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Tue, 5 Jun 2018 20:28:21 +0000
Subject: [PATCH 010/196] now form complete vectors by alternating u(i,j) and
 v(i,j) and so on

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90  | 95 ++++++++++++++++------
 cicecore/cicedynB/general/ice_step_mod.F90 |  1 +
 2 files changed, 70 insertions(+), 26 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ca8aaeb66..66c1790f4 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -141,6 +141,11 @@ subroutine imp_solver (dt)
          aiu      , & ! ice fraction on u-grid
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
+         
+      real (kind=dbl_kind), dimension (2*nx_block*ny_block, max_blocks) :: &
+         bvec     , & ! b vector (...bu(i,j), bv(i,j),....)
+         Aw       , & ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)
+         DiagA        ! diagonal of matrix A
 
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
@@ -434,34 +439,37 @@ subroutine imp_solver (dt)
                          uarear   (:,:,iblk), Cb      (:,:,iblk), & 
                          strintx  (:,:,iblk), strinty (:,:,iblk), &
                          uvel     (:,:,iblk), vvel    (:,:,iblk), &
-                         Au       (:,:,iblk), Av      (:,:,iblk))
+                         Au       (:,:,iblk), Av      (:,:,iblk), &
+                         Aw       (:,iblk))
 
 ! end of Au and Av calc
 ! CALC b_u and b_v (bvec)
                          
                          
-            call bvec (nx_block           , ny_block,           &
-                       icellu       (iblk), Cdn_ocn (:,:,iblk), & 
-                       indxui     (:,iblk), indxuj    (:,iblk), &
-                       kOL                ,                     &
-                       aiu      (:,:,iblk),                     & 
-                       uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                       waterx   (:,:,iblk), watery  (:,:,iblk), & 
-                       forcex   (:,:,iblk), forcey  (:,:,iblk), & 
-                       umassdti (:,:,iblk),                     &
-                       uvel_init(:,:,iblk), vvel_init(:,:,iblk),&
-                       uvel     (:,:,iblk), vvel    (:,:,iblk), &
-                       bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-                       bx       (:,:,iblk), by      (:,:,iblk))                         
+            call calc_bvec (nx_block           , ny_block,           &
+                            icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                            indxui     (:,iblk), indxuj    (:,iblk), &
+                            kOL                ,                     &
+                            aiu      (:,:,iblk),                     & 
+                            uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                            waterx   (:,:,iblk), watery  (:,:,iblk), & 
+                            forcex   (:,:,iblk), forcey  (:,:,iblk), & 
+                            umassdti (:,:,iblk),                     &
+                            uvel_init(:,:,iblk), vvel_init(:,:,iblk),&
+                            uvel     (:,:,iblk), vvel    (:,:,iblk), &
+                            bxfix    (:,:,iblk), byfix   (:,:,iblk), &
+                            bx       (:,:,iblk), by      (:,:,iblk), &
+                            bvec     (:,iblk))                         
            
             call residual_vec (nx_block           , ny_block,           &
                                icellu       (iblk),                     & 
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                bx       (:,:,iblk), by      (:,:,iblk), &
                                Au       (:,:,iblk), Av      (:,:,iblk), &
+                               Aw       (:,iblk)  , bvec    (:,iblk)  , &
                                Fx       (:,:,iblk), Fy      (:,:,iblk), &
                                L2norm(iblk))
-                               
+  stop
             call precondD  (nx_block,             ny_block,             & 
                             kOL,                  icellt(iblk),         & 
                             indxti      (:,iblk), indxtj      (:,iblk), & 
@@ -470,9 +478,10 @@ subroutine imp_solver (dt)
                             cxp       (:,:,iblk), cyp       (:,:,iblk), & 
                             cxm       (:,:,iblk), cym       (:,:,iblk), & 
                             uarear    (:,:,iblk),                       &
-                            vrel     (:,:,iblk) , Cb        (:,:,iblk), &
-                            umassdti (:,:,iblk) , zetaD   (:,:,iblk,:), &
-                            Diagu     (:,:,iblk), diagv   (:,:,iblk)) 
+                            vrel      (:,:,iblk), Cb        (:,:,iblk), &
+                            umassdti  (:,:,iblk), zetaD   (:,:,iblk,:), &
+                            Diagu     (:,:,iblk), Diagv   (:,:,iblk)  , &
+                            DiagA     (:,iblk)) 
                             
             ! load velocity into array for boundary updates
             fld2(:,:,1,iblk) = uvel(:,:,iblk)
@@ -1121,7 +1130,8 @@ subroutine matvec (nx_block,   ny_block, &
                          uarear,     Cb,       &
                          strintx,    strinty,  &
                          uvel,       vvel,     &
-                         Au,         Av)
+                         Au,         Av,       &
+                         Aw)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1161,6 +1171,10 @@ subroutine matvec (nx_block,   ny_block, &
          Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
          Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
          
+      real (kind=dbl_kind), dimension (2*nx_block*ny_block), &
+         intent(inout) :: &
+         Aw          ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)   
+         
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -1198,7 +1212,9 @@ subroutine matvec (nx_block,   ny_block, &
 
          Au(i,j) = ccaimp*utp - ccb*vtp - strintx(i,j)
          Av(i,j) = ccaimp*vtp + ccb*utp - strinty(i,j)
-             
+         
+         Aw(2*ij-1)= ccaimp*utp - ccb*vtp - strintx(i,j)
+         Aw(2*ij)  = ccaimp*vtp + ccb*utp - strinty(i,j)
       !-----------------------------------------------------------------
       ! ocean-ice stress for coupling
       ! here, strocn includes the factor of aice
@@ -1274,7 +1290,7 @@ end subroutine calc_bfix
       
 !=======================================================================
 
-      subroutine bvec (nx_block,   ny_block, &
+      subroutine calc_bvec (nx_block,   ny_block, &
                        icellu,     Cw,       &
                        indxui,     indxuj,   &
                        kOL,                  &
@@ -1286,7 +1302,8 @@ subroutine bvec (nx_block,   ny_block, &
                        uvel_init,  vvel_init,&
                        uvel,       vvel,     &
                        bxfix,      byfix,    &
-                       bx,         by)
+                       bx,         by,       &
+                       bvec)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1318,6 +1335,10 @@ subroutine bvec (nx_block,   ny_block, &
          intent(inout) :: &
          bx      , & ! b vector, bx = taux + bxfix (N/m^2) !jfl
          by          ! b vector, by = tauy + byfix (N/m^2) !jfl
+         
+      real (kind=dbl_kind), dimension (2*nx_block*ny_block), &
+         intent(inout) :: &
+         bvec        ! b vector, bijx, bijy
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
@@ -1360,9 +1381,12 @@ subroutine bvec (nx_block,   ny_block, &
          bx(i,j) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j) + taux
          by(i,j) = umassdti(i,j)*vvel_init(i,j) + forcey(i,j) + tauy
          
+         bvec(2*ij-1)= umassdti(i,j)*uvel_init(i,j) + forcex(i,j) + taux
+         bvec(2*ij)  = umassdti(i,j)*vvel_init(i,j) + forcey(i,j) + tauy
+         
       enddo                     ! ij
 
-      end subroutine bvec
+      end subroutine calc_bvec
       
       !=======================================================================
 
@@ -1371,6 +1395,7 @@ subroutine residual_vec (nx_block,   ny_block, &
                                indxui,     indxuj,   &
                                bx,         by,       &
                                Au,         Av,       &
+                               Aw,         bvec,     &
                                Fx,         Fy,       &
                                L2normtp)
 
@@ -1389,6 +1414,11 @@ subroutine residual_vec (nx_block,   ny_block, &
          Au       , & ! matvec, Fx = Au - bx (N/m^2) ! jfl
          Av           ! matvec, Fy = Av - by (N/m^2) ! jfl
 
+      real (kind=dbl_kind), dimension (2*nx_block*ny_block), &
+         intent(in) :: &
+         bvec     , & ! b vector, bijx, bijy   
+         Aw
+         
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
          Fx      , & ! x residual vector, Fx = Au - bx (N/m^2)
@@ -1399,6 +1429,11 @@ subroutine residual_vec (nx_block,   ny_block, &
 
       ! local variables
 
+      real (kind=dbl_kind), dimension (2*nx_block*ny_block) :: &
+         Fres
+      
+      real (kind=dbl_kind) :: L2norm
+      
       integer (kind=int_kind) :: &
          i, j, ij
 
@@ -1410,16 +1445,22 @@ subroutine residual_vec (nx_block,   ny_block, &
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
 
+      L2normtp=0d0   
+         
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
 
          Fx(i,j) = Au(i,j) - bx(i,j)
          Fy(i,j) = Av(i,j) - by(i,j)
+         L2normtp = L2normtp + Fx(i,j)**2 + Fy(i,j)**2
+         Fres(2*ij-1) = Au(i,j) - bx(i,j)
+         Fres(2*ij)   = Av(i,j) - by(i,j)
          
       enddo                     ! ij
       
-      L2normtp = DOT_PRODUCT(Fx,Fx)+DOT_PRODUCT(Fy,Fy)
+       L2norm = sqrt(DOT_PRODUCT(Fres,Fres))
+       print *, 'L2norm', L2norm, sqrt(L2normtp)
 
       end subroutine residual_vec
       
@@ -1435,7 +1476,7 @@ subroutine precondD  (nx_block,   ny_block,   &
                             uarear,                 &
                             vrel,       Cb,         &
                             umassdti,   zetaD,      &
-                            Diagu, Diagv)
+                            Diagu, Diagv, DiagA)
 
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
@@ -1470,6 +1511,9 @@ subroutine precondD  (nx_block,   ny_block,   &
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(inout) :: &         
          Diagu,     & ! diagonal matrix coefficients for u component
          Diagv        ! diagonal matrix coefficients for v component
+         
+      real (kind=dbl_kind), dimension (2*nx_block*ny_block), intent(inout) :: &   
+         DiagA
 
       ! local variables
 
@@ -1543,7 +1587,6 @@ subroutine precondD  (nx_block,   ny_block,   &
          shearnwDv = dyt(i,j)
          shearseDu = dxt(i,j)
 
-
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2
       ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
diff --git a/cicecore/cicedynB/general/ice_step_mod.F90 b/cicecore/cicedynB/general/ice_step_mod.F90
index c1c85fdae..cf2a6b17f 100644
--- a/cicecore/cicedynB/general/ice_step_mod.F90
+++ b/cicecore/cicedynB/general/ice_step_mod.F90
@@ -850,6 +850,7 @@ subroutine step_dyn_horiz (dt)
 
       use ice_dyn_evp, only: evp
       use ice_dyn_eap, only: eap
+      use ice_dyn_vp, only: imp_solver
       use ice_dyn_shared, only: kdyn, ktransport
       use ice_flux, only: init_history_dyn
       use ice_transport_driver, only: advection, transport_upwind, transport_remap

From 4a8a3f86f239afc8d7407997921fa67f92bd195b Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 6 Jun 2018 13:40:10 +0000
Subject: [PATCH 011/196] added simple routine to form a vector

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 62 ++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 66c1790f4..7a5e99395 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -145,7 +145,8 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), dimension (2*nx_block*ny_block, max_blocks) :: &
          bvec     , & ! b vector (...bu(i,j), bv(i,j),....)
          Aw       , & ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)
-         DiagA        ! diagonal of matrix A
+         DiagA    , & ! diagonal of matrix A
+         tpvec        ! for debugging
 
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
@@ -459,7 +460,13 @@ subroutine imp_solver (dt)
                             uvel     (:,:,iblk), vvel    (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk), &
-                            bvec     (:,iblk))                         
+                            bvec     (:,iblk))
+                            
+            call form_vec  (nx_block           , ny_block,           &
+                            icellu       (iblk),                     & 
+                            indxui     (:,iblk), indxuj    (:,iblk), &
+                            uvel     (:,:,iblk), vvel    (:,:,iblk), &
+                            tpvec     (:,iblk))                                                     
            
             call residual_vec (nx_block           , ny_block,           &
                                icellu       (iblk),                     & 
@@ -1799,6 +1806,57 @@ subroutine precondD  (nx_block,   ny_block,   &
 
       end subroutine precondD
       
+      !=======================================================================
+
+      subroutine form_vec (nx_block,   ny_block, &
+                           icellu,               &
+                           indxui,     indxuj,   &
+                           tpu,        tpv ,     &
+                           tpvec)
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellu                ! total count when iceumask is true
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         tpu     , & ! x-component of vector
+         tpv         ! y-component of vector         
+
+      real (kind=dbl_kind), dimension (2*nx_block*ny_block), &
+         intent(inout) :: &
+         tpvec        ! vector
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+         
+      real (kind=dbl_kind) :: L2norm
+
+      !-----------------------------------------------------------------
+      ! fomr vector
+      !-----------------------------------------------------------------
+
+      do ij =1, icellu
+         i = indxui(ij)
+         j = indxuj(ij)
+         
+         tpvec(2*ij-1)= tpu(i,j)
+         tpvec(2*ij)  = tpv(i,j)
+         
+      enddo                     ! ij
+
+      L2norm = sqrt(DOT_PRODUCT(tpvec,tpvec))
+      
+      print *, 'ici uvel', icellu, L2norm
+      
+      end subroutine form_vec
+      
 !      JFL ROUTINE POUR CALC STRESS OCN POUR COUPLAGE
       
 !=======================================================================

From a8eecfe6fc0692da23ffd50e6756857cf672f790 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 6 Jun 2018 15:31:33 +0000
Subject: [PATCH 012/196] corrected bug for vectors of size ntot

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 85 +++++++++++++++--------
 1 file changed, 56 insertions(+), 29 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 7a5e99395..502ead123 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -105,12 +105,14 @@ subroutine imp_solver (dt)
          kOL            , & ! outer loop iteration
          kmax           , & ! jfl put in namelist
          iblk           , & ! block index
+         ntottp         , & ! ntottp = 2*icellu
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
 
       integer (kind=int_kind), dimension(max_blocks) :: & 
          icellt   , & ! no. of cells where icetmask = 1
-         icellu       ! no. of cells where iceumask = 1
+         icellu   , & ! no. of cells where iceumask = 1
+         ntot         ! ntot = 2*icellu
 
       integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks) :: &
          indxti   , & ! compressed index in i-direction
@@ -142,11 +144,11 @@ subroutine imp_solver (dt)
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
          
-      real (kind=dbl_kind), dimension (2*nx_block*ny_block, max_blocks) :: &
-         bvec     , & ! b vector (...bu(i,j), bv(i,j),....)
-         Aw       , & ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)
-         DiagA    , & ! diagonal of matrix A
-         tpvec        ! for debugging
+      real (kind=dbl_kind), allocatable :: &
+         bvec(:,:)  , & ! b vector (...bu(i,j), bv(i,j),....)
+         Aw(:,:)    , & ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)
+         DiagA(:,:) , & ! diagonal of matrix A
+         tpvec(:,:)     ! for debugging
 
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
@@ -299,6 +301,10 @@ subroutine imp_solver (dt)
                          uvel      (:,:,iblk), vvel      (:,:,iblk), &
                          Tbu       (:,:,iblk))
 
+         ntot(iblk) = 2*icellu(iblk)                         
+         allocate(bvec(ntot(iblk),max_blocks), Aw(ntot(iblk),max_blocks), & 
+                  DiagA(ntot(iblk),max_blocks), tpvec(ntot(iblk),max_blocks))
+         
          call calc_bfix (nx_block            , ny_block,             & 
                          icellu(iblk)        ,                       & 
                          indxui      (:,iblk), indxuj      (:,iblk), & 
@@ -431,7 +437,7 @@ subroutine imp_solver (dt)
                                vrel     (:,:,iblk), Cb      (:,:,iblk))                                                              
                                
             call matvec (nx_block           , ny_block,           &
-                         icellu       (iblk),                     & 
+                         icellu       (iblk), ntot      (iblk),   & 
                          indxui     (:,iblk), indxuj    (:,iblk), &
                          kOL                ,                     &
                          aiu      (:,:,iblk), strtmp  (:,:,:),    &
@@ -448,9 +454,9 @@ subroutine imp_solver (dt)
                          
                          
             call calc_bvec (nx_block           , ny_block,           &
-                            icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                            icellu       (iblk), ntot    (iblk),     & 
                             indxui     (:,iblk), indxuj    (:,iblk), &
-                            kOL                ,                     &
+                            kOL                , Cdn_ocn (:,:,iblk), &
                             aiu      (:,:,iblk),                     & 
                             uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
@@ -463,22 +469,25 @@ subroutine imp_solver (dt)
                             bvec     (:,iblk))
                             
             call form_vec  (nx_block           , ny_block,           &
-                            icellu       (iblk),                     & 
+                            icellu       (iblk), ntot    (iblk),     & 
                             indxui     (:,iblk), indxuj    (:,iblk), &
                             uvel     (:,:,iblk), vvel    (:,:,iblk), &
                             tpvec     (:,iblk))                                                     
            
             call residual_vec (nx_block           , ny_block,           &
-                               icellu       (iblk),                     & 
+                               icellu       (iblk), ntot    (iblk),     & 
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                bx       (:,:,iblk), by      (:,:,iblk), &
                                Au       (:,:,iblk), Av      (:,:,iblk), &
                                Aw       (:,iblk)  , bvec    (:,iblk)  , &
                                Fx       (:,:,iblk), Fy      (:,:,iblk), &
                                L2norm(iblk))
+  
   stop
+  
+  
             call precondD  (nx_block,             ny_block,             & 
-                            kOL,                  icellt(iblk),         & 
+                            kOL,            icellt(iblk), ntot(iblk),   & 
                             indxti      (:,iblk), indxtj      (:,iblk), & 
                             dxt       (:,:,iblk), dyt       (:,:,iblk), & 
                             dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
@@ -518,6 +527,7 @@ subroutine imp_solver (dt)
       enddo                     ! outer loop
 
       deallocate(fld2)
+      deallocate(bvec, Aw, DiagA, tpvec)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
       ! Force symmetry across the tripole seam
@@ -832,6 +842,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
 !DIR$ CONCURRENT !Cray
 !cdir nodep      !NEC
 !ocl novrec      !Fujitsu
+
       do ij = 1, icellt
          i = indxti(ij)
          j = indxtj(ij)
@@ -1128,7 +1139,7 @@ end subroutine calc_vrel_Cb
 !=======================================================================
 
       subroutine matvec (nx_block,   ny_block, &
-                         icellu,               &
+                         icellu,     ntot,     &
                          indxui,     indxuj,   &
                          kOL,                  &
                          aiu,        str,      &
@@ -1143,6 +1154,7 @@ subroutine matvec (nx_block,   ny_block, &
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellu,             & ! total count when iceumask is true
+         ntot,               & ! size of problem ntot=2*icellu
          kOL                   ! outer loop iteration
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
@@ -1178,7 +1190,7 @@ subroutine matvec (nx_block,   ny_block, &
          Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
          Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
          
-      real (kind=dbl_kind), dimension (2*nx_block*ny_block), &
+      real (kind=dbl_kind), dimension (ntot), &
          intent(inout) :: &
          Aw          ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)   
          
@@ -1200,6 +1212,8 @@ subroutine matvec (nx_block,   ny_block, &
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
 
+      Aw(:)=c0   
+         
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
@@ -1298,9 +1312,9 @@ end subroutine calc_bfix
 !=======================================================================
 
       subroutine calc_bvec (nx_block,   ny_block, &
-                       icellu,     Cw,       &
+                       icellu,     ntot,     &
                        indxui,     indxuj,   &
-                       kOL,                  &
+                       kOL,        Cw,       &
                        aiu,                  &
                        uocn,       vocn,     &
                        waterx,     watery,   &
@@ -1315,6 +1329,7 @@ subroutine calc_bvec (nx_block,   ny_block, &
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellu,             & ! total count when iceumask is true
+         ntot,               & ! size of problem ntot=2*icellu
          kOL                   ! outer loop iteration
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
@@ -1343,7 +1358,7 @@ subroutine calc_bvec (nx_block,   ny_block, &
          bx      , & ! b vector, bx = taux + bxfix (N/m^2) !jfl
          by          ! b vector, by = tauy + byfix (N/m^2) !jfl
          
-      real (kind=dbl_kind), dimension (2*nx_block*ny_block), &
+      real (kind=dbl_kind), dimension (ntot), &
          intent(inout) :: &
          bvec        ! b vector, bijx, bijy
 
@@ -1371,6 +1386,8 @@ subroutine calc_bvec (nx_block,   ny_block, &
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
 
+      bvec(:)=c0   
+         
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
@@ -1398,7 +1415,7 @@ end subroutine calc_bvec
       !=======================================================================
 
       subroutine residual_vec (nx_block,   ny_block, &
-                               icellu,               &
+                               icellu,     ntot,     &
                                indxui,     indxuj,   &
                                bx,         by,       &
                                Au,         Av,       &
@@ -1408,7 +1425,8 @@ subroutine residual_vec (nx_block,   ny_block, &
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu                ! total count when iceumask is true
+         icellu,             & ! total count when iceumask is true
+         ntot                  ! size of problem ntot=2*icellu
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -1421,7 +1439,7 @@ subroutine residual_vec (nx_block,   ny_block, &
          Au       , & ! matvec, Fx = Au - bx (N/m^2) ! jfl
          Av           ! matvec, Fy = Av - by (N/m^2) ! jfl
 
-      real (kind=dbl_kind), dimension (2*nx_block*ny_block), &
+      real (kind=dbl_kind), dimension (ntot), &
          intent(in) :: &
          bvec     , & ! b vector, bijx, bijy   
          Aw
@@ -1436,7 +1454,7 @@ subroutine residual_vec (nx_block,   ny_block, &
 
       ! local variables
 
-      real (kind=dbl_kind), dimension (2*nx_block*ny_block) :: &
+      real (kind=dbl_kind), dimension (ntot) :: &
          Fres
       
       real (kind=dbl_kind) :: L2norm
@@ -1452,7 +1470,8 @@ subroutine residual_vec (nx_block,   ny_block, &
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
 
-      L2normtp=0d0   
+      L2normtp=c0
+      Fres(:)=c0
          
       do ij =1, icellu
          i = indxui(ij)
@@ -1467,14 +1486,14 @@ subroutine residual_vec (nx_block,   ny_block, &
       enddo                     ! ij
       
        L2norm = sqrt(DOT_PRODUCT(Fres,Fres))
-       print *, 'L2norm', L2norm, sqrt(L2normtp)
+       print *, 'ici L2norm', L2norm, sqrt(L2normtp), icellu, 2*nx_block*ny_block
 
       end subroutine residual_vec
       
 !=======================================================================
 
       subroutine precondD  (nx_block,   ny_block,   & 
-                            kOL,        icellt,     & 
+                            kOL,     icellt, ntot,  & 
                             indxti,     indxtj,     & 
                             dxt,        dyt,        & 
                             dxhy,       dyhx,       & 
@@ -1488,7 +1507,8 @@ subroutine precondD  (nx_block,   ny_block,   &
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
          kOL               , & ! subcycling step
-         icellt                ! no. of cells where icetmask = 1
+         icellt            , & ! no. of cells where icetmask = 1
+         ntot                  ! size of problem ntot=2*icellu
 
       integer (kind=int_kind), dimension (nx_block*ny_block), & 
          intent(in) :: &
@@ -1519,7 +1539,7 @@ subroutine precondD  (nx_block,   ny_block,   &
          Diagu,     & ! diagonal matrix coefficients for u component
          Diagv        ! diagonal matrix coefficients for v component
          
-      real (kind=dbl_kind), dimension (2*nx_block*ny_block), intent(inout) :: &   
+      real (kind=dbl_kind), dimension (ntot), intent(inout) :: &   
          DiagA
 
       ! local variables
@@ -1569,6 +1589,10 @@ subroutine precondD  (nx_block,   ny_block,   &
 !DIR$ CONCURRENT !Cray
 !cdir nodep      !NEC
 !ocl novrec      !Fujitsu
+
+
+! ATTENTION ICI CEST icellt et non pas icellu....MODIF A FAIRE!!!
+
       do ij = 1, icellt
          i = indxti(ij)
          j = indxtj(ij)
@@ -1809,14 +1833,15 @@ end subroutine precondD
       !=======================================================================
 
       subroutine form_vec (nx_block,   ny_block, &
-                           icellu,               &
+                           icellu,     ntot,     &
                            indxui,     indxuj,   &
                            tpu,        tpv ,     &
                            tpvec)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu                ! total count when iceumask is true
+         icellu,             & ! total count when iceumask is true
+         ntot                  ! size of the problem ntot=2*icellu
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -1827,7 +1852,7 @@ subroutine form_vec (nx_block,   ny_block, &
          tpu     , & ! x-component of vector
          tpv         ! y-component of vector         
 
-      real (kind=dbl_kind), dimension (2*nx_block*ny_block), &
+      real (kind=dbl_kind), dimension (ntot), &
          intent(inout) :: &
          tpvec        ! vector
 
@@ -1842,6 +1867,8 @@ subroutine form_vec (nx_block,   ny_block, &
       ! fomr vector
       !-----------------------------------------------------------------
 
+      tpvec(:)=c0
+      
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)

From 8245a45a4a15991d4e0b4fc8832c48150bc118e1 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 6 Jun 2018 17:01:01 +0000
Subject: [PATCH 013/196] added vectors of size ntot to bvec and matvec

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 59 ++++++++++++-----------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 502ead123..8433953c4 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -145,9 +145,10 @@ subroutine imp_solver (dt)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
          
       real (kind=dbl_kind), allocatable :: &
-         bvec(:,:)  , & ! b vector (...bu(i,j), bv(i,j),....)
-         Aw(:,:)    , & ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)
-         DiagA(:,:) , & ! diagonal of matrix A
+         bvecfix(:,:), & ! part of b vector that does not change durign the OL
+         bvec(:,:)   , & ! b vector (...bu(i,j), bv(i,j),....)
+         Aw(:,:)     , & ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)
+         DiagA(:,:)  , & ! diagonal of matrix A
          tpvec(:,:)     ! for debugging
 
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
@@ -302,16 +303,18 @@ subroutine imp_solver (dt)
                          Tbu       (:,:,iblk))
 
          ntot(iblk) = 2*icellu(iblk)                         
-         allocate(bvec(ntot(iblk),max_blocks), Aw(ntot(iblk),max_blocks), & 
-                  DiagA(ntot(iblk),max_blocks), tpvec(ntot(iblk),max_blocks))
+         allocate(bvec(ntot(iblk),max_blocks), bvecfix(ntot(iblk),max_blocks), &
+                  Aw(ntot(iblk),max_blocks), DiagA(ntot(iblk),max_blocks), & 
+                  tpvec(ntot(iblk),max_blocks))
          
          call calc_bfix (nx_block            , ny_block,             & 
-                         icellu(iblk)        ,                       & 
+                         icellu(iblk)        , ntot      (iblk),     & 
                          indxui      (:,iblk), indxuj      (:,iblk), & 
                          umassdti  (:,:,iblk),                       & 
                          forcex    (:,:,iblk), forcey    (:,:,iblk), & 
                          uvel_init (:,:,iblk), vvel_init (:,:,iblk), &
-                         bxfix     (:,:,iblk), byfix     (:,:,iblk))                         
+                         bxfix     (:,:,iblk), byfix     (:,:,iblk), &
+                         bvecfix   (:,iblk))                         
                          
       !-----------------------------------------------------------------
       ! ice strength
@@ -460,13 +463,10 @@ subroutine imp_solver (dt)
                             aiu      (:,:,iblk),                     & 
                             uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
-                            forcex   (:,:,iblk), forcey  (:,:,iblk), & 
-                            umassdti (:,:,iblk),                     &
-                            uvel_init(:,:,iblk), vvel_init(:,:,iblk),&
                             uvel     (:,:,iblk), vvel    (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk), &
-                            bvec     (:,iblk))
+                            bvecfix  (:,iblk)  , bvec     (:,iblk))
                             
             call form_vec  (nx_block           , ny_block,           &
                             icellu       (iblk), ntot    (iblk),     & 
@@ -527,7 +527,7 @@ subroutine imp_solver (dt)
       enddo                     ! outer loop
 
       deallocate(fld2)
-      deallocate(bvec, Aw, DiagA, tpvec)
+      deallocate(bvecfix, bvec, Aw, DiagA, tpvec)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
       ! Force symmetry across the tripole seam
@@ -1258,16 +1258,18 @@ end subroutine matvec
 !=======================================================================      
       
      subroutine calc_bfix  (nx_block,   ny_block,   & 
-                            icellu,                 & 
+                            icellu,     ntot,       & 
                             indxui,     indxuj,     & 
                             umassdti,               & 
                             forcex,     forcey,     &
                             uvel_init,  vvel_init,  &
-                            bxfix,      byfix        )
+                            bxfix,      byfix,      &
+                            bvecfix)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu                ! no. of cells where iceumask = 1
+         icellu,             & ! no. of cells where iceumask = 1
+         ntot                  ! size of problem ntot=2*icellu
          
       integer (kind=int_kind), dimension (nx_block*ny_block), & 
          intent(in) :: &
@@ -1287,6 +1289,10 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
          bxfix   , & ! bx = taux + bxfix !jfl
          byfix       ! by = tauy + byfix !jfl
 
+      real (kind=dbl_kind), dimension (ntot), & 
+         intent(out) :: &
+         bvecfix     ! fixed part of b vector
+         
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -1304,6 +1310,8 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
 
          bxfix(i,j) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
          byfix(i,j) = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
+         bvecfix(2*ij-1) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
+         bvecfix(2*ij)   = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
          
       enddo
 
@@ -1318,13 +1326,10 @@ subroutine calc_bvec (nx_block,   ny_block, &
                        aiu,                  &
                        uocn,       vocn,     &
                        waterx,     watery,   &
-                       forcex,     forcey,   &
-                       umassdti,             &
-                       uvel_init,  vvel_init,&
                        uvel,       vvel,     &
                        bxfix,      byfix,    &
                        bx,         by,       &
-                       bvec)
+                       bvecfix,    bvec)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1338,20 +1343,18 @@ subroutine calc_bvec (nx_block,   ny_block, &
          indxuj      ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         uvel_init,& ! x-component of velocity (m/s), beginning of timestep
-         vvel_init,& ! y-component of velocity (m/s), beginning of timestep
          uvel    , & ! x-component of velocity (m/s)
          vvel    , & ! y-component of velocity (m/s)         
          aiu     , & ! ice fraction on u-grid
          waterx  , & ! for ocean stress calculation, x (m/s)
          watery  , & ! for ocean stress calculation, y (m/s)
-         forcex  , & ! work array: combined atm stress and ocn tilt, x
-         forcey  , & ! work array: combined atm stress and ocn tilt, y
          bxfix   , & ! bx = taux + bxfix !jfl
          byfix   , & ! by = tauy + byfix !jfl
-         umassdti, & ! mass of U-cell/dt (kg/m^2 s)
          uocn    , & ! ocean current, x-direction (m/s)
          vocn        ! ocean current, y-direction (m/s)
+         
+      real (kind=dbl_kind), dimension (ntot), intent(in) :: &
+         bvecfix     ! fixed part of the b vector       
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
@@ -1402,11 +1405,11 @@ subroutine calc_bvec (nx_block,   ny_block, &
          taux = vrel*waterx(i,j) ! NOTE this is not the entire
          tauy = vrel*watery(i,j) ! ocn stress term
 
-         bx(i,j) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j) + taux
-         by(i,j) = umassdti(i,j)*vvel_init(i,j) + forcey(i,j) + tauy
+         bx(i,j) = bxfix(i,j) + taux
+         by(i,j) = byfix(i,j) + tauy
          
-         bvec(2*ij-1)= umassdti(i,j)*uvel_init(i,j) + forcex(i,j) + taux
-         bvec(2*ij)  = umassdti(i,j)*vvel_init(i,j) + forcey(i,j) + tauy
+         bvec(2*ij-1)= bvecfix(2*ij-1) + taux
+         bvec(2*ij)  = bvecfix(2*ij) + tauy
          
       enddo                     ! ij
 

From 18fc8960732efbeb19dc5418e1b3041c1effae07 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 6 Jun 2018 18:01:45 +0000
Subject: [PATCH 014/196] step back...not so easy to form all the vectors for
 the blocks...

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 158 +++++++---------------
 1 file changed, 47 insertions(+), 111 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 8433953c4..523ae6f40 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -105,7 +105,6 @@ subroutine imp_solver (dt)
          kOL            , & ! outer loop iteration
          kmax           , & ! jfl put in namelist
          iblk           , & ! block index
-         ntottp         , & ! ntottp = 2*icellu
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
 
@@ -144,13 +143,6 @@ subroutine imp_solver (dt)
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
          
-      real (kind=dbl_kind), allocatable :: &
-         bvecfix(:,:), & ! part of b vector that does not change durign the OL
-         bvec(:,:)   , & ! b vector (...bu(i,j), bv(i,j),....)
-         Aw(:,:)     , & ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)
-         DiagA(:,:)  , & ! diagonal of matrix A
-         tpvec(:,:)     ! for debugging
-
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
@@ -302,19 +294,13 @@ subroutine imp_solver (dt)
                          uvel      (:,:,iblk), vvel      (:,:,iblk), &
                          Tbu       (:,:,iblk))
 
-         ntot(iblk) = 2*icellu(iblk)                         
-         allocate(bvec(ntot(iblk),max_blocks), bvecfix(ntot(iblk),max_blocks), &
-                  Aw(ntot(iblk),max_blocks), DiagA(ntot(iblk),max_blocks), & 
-                  tpvec(ntot(iblk),max_blocks))
-         
          call calc_bfix (nx_block            , ny_block,             & 
-                         icellu(iblk)        , ntot      (iblk),     & 
+                         icellu(iblk)        ,                       & 
                          indxui      (:,iblk), indxuj      (:,iblk), & 
                          umassdti  (:,:,iblk),                       & 
                          forcex    (:,:,iblk), forcey    (:,:,iblk), & 
                          uvel_init (:,:,iblk), vvel_init (:,:,iblk), &
-                         bxfix     (:,:,iblk), byfix     (:,:,iblk), &
-                         bvecfix   (:,iblk))                         
+                         bxfix     (:,:,iblk), byfix     (:,:,iblk))                         
                          
       !-----------------------------------------------------------------
       ! ice strength
@@ -440,7 +426,7 @@ subroutine imp_solver (dt)
                                vrel     (:,:,iblk), Cb      (:,:,iblk))                                                              
                                
             call matvec (nx_block           , ny_block,           &
-                         icellu       (iblk), ntot      (iblk),   & 
+                         icellu       (iblk),                     & 
                          indxui     (:,iblk), indxuj    (:,iblk), &
                          kOL                ,                     &
                          aiu      (:,:,iblk), strtmp  (:,:,:),    &
@@ -449,15 +435,14 @@ subroutine imp_solver (dt)
                          uarear   (:,:,iblk), Cb      (:,:,iblk), & 
                          strintx  (:,:,iblk), strinty (:,:,iblk), &
                          uvel     (:,:,iblk), vvel    (:,:,iblk), &
-                         Au       (:,:,iblk), Av      (:,:,iblk), &
-                         Aw       (:,iblk))
+                         Au       (:,:,iblk), Av      (:,:,iblk))
 
 ! end of Au and Av calc
 ! CALC b_u and b_v (bvec)
                          
                          
             call calc_bvec (nx_block           , ny_block,           &
-                            icellu       (iblk), ntot    (iblk),     & 
+                            icellu       (iblk),                     & 
                             indxui     (:,iblk), indxuj    (:,iblk), &
                             kOL                , Cdn_ocn (:,:,iblk), &
                             aiu      (:,:,iblk),                     & 
@@ -465,21 +450,18 @@ subroutine imp_solver (dt)
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
                             uvel     (:,:,iblk), vvel    (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-                            bx       (:,:,iblk), by      (:,:,iblk), &
-                            bvecfix  (:,iblk)  , bvec     (:,iblk))
+                            bx       (:,:,iblk), by      (:,:,iblk))
                             
             call form_vec  (nx_block           , ny_block,           &
-                            icellu       (iblk), ntot    (iblk),     & 
+                            icellu       (iblk),                     & 
                             indxui     (:,iblk), indxuj    (:,iblk), &
-                            uvel     (:,:,iblk), vvel    (:,:,iblk), &
-                            tpvec     (:,iblk))                                                     
+                            uvel     (:,:,iblk), vvel    (:,:,iblk))                                                     
            
             call residual_vec (nx_block           , ny_block,           &
-                               icellu       (iblk), ntot    (iblk),     & 
+                               icellu       (iblk),                     & 
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                bx       (:,:,iblk), by      (:,:,iblk), &
                                Au       (:,:,iblk), Av      (:,:,iblk), &
-                               Aw       (:,iblk)  , bvec    (:,iblk)  , &
                                Fx       (:,:,iblk), Fy      (:,:,iblk), &
                                L2norm(iblk))
   
@@ -487,7 +469,7 @@ subroutine imp_solver (dt)
   
   
             call precondD  (nx_block,             ny_block,             & 
-                            kOL,            icellt(iblk), ntot(iblk),   & 
+                            kOL                 , icellt(iblk),         & 
                             indxti      (:,iblk), indxtj      (:,iblk), & 
                             dxt       (:,:,iblk), dyt       (:,:,iblk), & 
                             dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
@@ -496,8 +478,7 @@ subroutine imp_solver (dt)
                             uarear    (:,:,iblk),                       &
                             vrel      (:,:,iblk), Cb        (:,:,iblk), &
                             umassdti  (:,:,iblk), zetaD   (:,:,iblk,:), &
-                            Diagu     (:,:,iblk), Diagv   (:,:,iblk)  , &
-                            DiagA     (:,iblk)) 
+                            Diagu     (:,:,iblk), Diagv   (:,:,iblk)) 
                             
             ! load velocity into array for boundary updates
             fld2(:,:,1,iblk) = uvel(:,:,iblk)
@@ -527,7 +508,6 @@ subroutine imp_solver (dt)
       enddo                     ! outer loop
 
       deallocate(fld2)
-      deallocate(bvecfix, bvec, Aw, DiagA, tpvec)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
       ! Force symmetry across the tripole seam
@@ -1139,7 +1119,7 @@ end subroutine calc_vrel_Cb
 !=======================================================================
 
       subroutine matvec (nx_block,   ny_block, &
-                         icellu,     ntot,     &
+                         icellu,               &
                          indxui,     indxuj,   &
                          kOL,                  &
                          aiu,        str,      &
@@ -1148,13 +1128,11 @@ subroutine matvec (nx_block,   ny_block, &
                          uarear,     Cb,       &
                          strintx,    strinty,  &
                          uvel,       vvel,     &
-                         Au,         Av,       &
-                         Aw)
+                         Au,         Av)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellu,             & ! total count when iceumask is true
-         ntot,               & ! size of problem ntot=2*icellu
          kOL                   ! outer loop iteration
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
@@ -1190,10 +1168,6 @@ subroutine matvec (nx_block,   ny_block, &
          Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
          Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
          
-      real (kind=dbl_kind), dimension (ntot), &
-         intent(inout) :: &
-         Aw          ! A matrix times w, w=u,v (...u(i,j), v(i,j),...)   
-         
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -1212,8 +1186,6 @@ subroutine matvec (nx_block,   ny_block, &
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
 
-      Aw(:)=c0   
-         
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
@@ -1234,8 +1206,8 @@ subroutine matvec (nx_block,   ny_block, &
          Au(i,j) = ccaimp*utp - ccb*vtp - strintx(i,j)
          Av(i,j) = ccaimp*vtp + ccb*utp - strinty(i,j)
          
-         Aw(2*ij-1)= ccaimp*utp - ccb*vtp - strintx(i,j)
-         Aw(2*ij)  = ccaimp*vtp + ccb*utp - strinty(i,j)
+!         Aw(2*ij-1)= ccaimp*utp - ccb*vtp - strintx(i,j)
+!         Aw(2*ij)  = ccaimp*vtp + ccb*utp - strinty(i,j)
       !-----------------------------------------------------------------
       ! ocean-ice stress for coupling
       ! here, strocn includes the factor of aice
@@ -1258,18 +1230,16 @@ end subroutine matvec
 !=======================================================================      
       
      subroutine calc_bfix  (nx_block,   ny_block,   & 
-                            icellu,     ntot,       & 
+                            icellu,                 & 
                             indxui,     indxuj,     & 
                             umassdti,               & 
                             forcex,     forcey,     &
                             uvel_init,  vvel_init,  &
-                            bxfix,      byfix,      &
-                            bvecfix)
+                            bxfix,      byfix)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu,             & ! no. of cells where iceumask = 1
-         ntot                  ! size of problem ntot=2*icellu
+         icellu                ! no. of cells where iceumask = 1
          
       integer (kind=int_kind), dimension (nx_block*ny_block), & 
          intent(in) :: &
@@ -1289,10 +1259,6 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
          bxfix   , & ! bx = taux + bxfix !jfl
          byfix       ! by = tauy + byfix !jfl
 
-      real (kind=dbl_kind), dimension (ntot), & 
-         intent(out) :: &
-         bvecfix     ! fixed part of b vector
-         
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -1310,8 +1276,8 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
 
          bxfix(i,j) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
          byfix(i,j) = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
-         bvecfix(2*ij-1) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
-         bvecfix(2*ij)   = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
+!         bvecfix(2*ij-1) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
+!         bvecfix(2*ij)   = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
          
       enddo
 
@@ -1320,7 +1286,7 @@ end subroutine calc_bfix
 !=======================================================================
 
       subroutine calc_bvec (nx_block,   ny_block, &
-                       icellu,     ntot,     &
+                       icellu,               &
                        indxui,     indxuj,   &
                        kOL,        Cw,       &
                        aiu,                  &
@@ -1328,13 +1294,11 @@ subroutine calc_bvec (nx_block,   ny_block, &
                        waterx,     watery,   &
                        uvel,       vvel,     &
                        bxfix,      byfix,    &
-                       bx,         by,       &
-                       bvecfix,    bvec)
+                       bx,         by)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellu,             & ! total count when iceumask is true
-         ntot,               & ! size of problem ntot=2*icellu
          kOL                   ! outer loop iteration
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
@@ -1353,18 +1317,11 @@ subroutine calc_bvec (nx_block,   ny_block, &
          uocn    , & ! ocean current, x-direction (m/s)
          vocn        ! ocean current, y-direction (m/s)
          
-      real (kind=dbl_kind), dimension (ntot), intent(in) :: &
-         bvecfix     ! fixed part of the b vector       
-
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
          bx      , & ! b vector, bx = taux + bxfix (N/m^2) !jfl
          by          ! b vector, by = tauy + byfix (N/m^2) !jfl
          
-      real (kind=dbl_kind), dimension (ntot), &
-         intent(inout) :: &
-         bvec        ! b vector, bijx, bijy
-
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
          Cw                   ! ocean-ice neutral drag coefficient
@@ -1389,8 +1346,6 @@ subroutine calc_bvec (nx_block,   ny_block, &
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
 
-      bvec(:)=c0   
-         
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
@@ -1408,8 +1363,8 @@ subroutine calc_bvec (nx_block,   ny_block, &
          bx(i,j) = bxfix(i,j) + taux
          by(i,j) = byfix(i,j) + tauy
          
-         bvec(2*ij-1)= bvecfix(2*ij-1) + taux
-         bvec(2*ij)  = bvecfix(2*ij) + tauy
+!         bvec(2*ij-1)= bvecfix(2*ij-1) + taux
+!         bvec(2*ij)  = bvecfix(2*ij) + tauy
          
       enddo                     ! ij
 
@@ -1418,18 +1373,16 @@ end subroutine calc_bvec
       !=======================================================================
 
       subroutine residual_vec (nx_block,   ny_block, &
-                               icellu,     ntot,     &
+                               icellu,               &
                                indxui,     indxuj,   &
                                bx,         by,       &
                                Au,         Av,       &
-                               Aw,         bvec,     &
                                Fx,         Fy,       &
                                L2normtp)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu,             & ! total count when iceumask is true
-         ntot                  ! size of problem ntot=2*icellu
+         icellu                ! total count when iceumask is true
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -1442,11 +1395,6 @@ subroutine residual_vec (nx_block,   ny_block, &
          Au       , & ! matvec, Fx = Au - bx (N/m^2) ! jfl
          Av           ! matvec, Fy = Av - by (N/m^2) ! jfl
 
-      real (kind=dbl_kind), dimension (ntot), &
-         intent(in) :: &
-         bvec     , & ! b vector, bijx, bijy   
-         Aw
-         
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
          Fx      , & ! x residual vector, Fx = Au - bx (N/m^2)
@@ -1454,13 +1402,6 @@ subroutine residual_vec (nx_block,   ny_block, &
          
       real (kind=dbl_kind), intent(inout) :: &
          L2normtp    ! (L2norm)^2
-
-      ! local variables
-
-      real (kind=dbl_kind), dimension (ntot) :: &
-         Fres
-      
-      real (kind=dbl_kind) :: L2norm
       
       integer (kind=int_kind) :: &
          i, j, ij
@@ -1474,7 +1415,6 @@ subroutine residual_vec (nx_block,   ny_block, &
          file=__FILE__, line=__LINE__)
 
       L2normtp=c0
-      Fres(:)=c0
          
       do ij =1, icellu
          i = indxui(ij)
@@ -1483,20 +1423,26 @@ subroutine residual_vec (nx_block,   ny_block, &
          Fx(i,j) = Au(i,j) - bx(i,j)
          Fy(i,j) = Av(i,j) - by(i,j)
          L2normtp = L2normtp + Fx(i,j)**2 + Fy(i,j)**2
-         Fres(2*ij-1) = Au(i,j) - bx(i,j)
-         Fres(2*ij)   = Av(i,j) - by(i,j)
+!         Fres(2*ij-1) = Au(i,j) - bx(i,j)
+!         Fres(2*ij)   = Av(i,j) - by(i,j)
          
       enddo                     ! ij
       
-       L2norm = sqrt(DOT_PRODUCT(Fres,Fres))
-       print *, 'ici L2norm', L2norm, sqrt(L2normtp), icellu, 2*nx_block*ny_block
+!      do ij = 1, ntot
+      
+!	Ftp(ij) = Aw(ij) - bvec(ij)
+      
+!      enddo
+      
+!       L2norm = sqrt(DOT_PRODUCT(Fres,Fres))
+       print *, 'ici L2norm', sqrt(L2normtp)
 
       end subroutine residual_vec
       
 !=======================================================================
 
       subroutine precondD  (nx_block,   ny_block,   & 
-                            kOL,     icellt, ntot,  & 
+                            kOL,        icellt,     & 
                             indxti,     indxtj,     & 
                             dxt,        dyt,        & 
                             dxhy,       dyhx,       & 
@@ -1505,13 +1451,12 @@ subroutine precondD  (nx_block,   ny_block,   &
                             uarear,                 &
                             vrel,       Cb,         &
                             umassdti,   zetaD,      &
-                            Diagu, Diagv, DiagA)
+                            Diagu, Diagv)
 
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
          kOL               , & ! subcycling step
-         icellt            , & ! no. of cells where icetmask = 1
-         ntot                  ! size of problem ntot=2*icellu
+         icellt                ! no. of cells where icetmask = 1
 
       integer (kind=int_kind), dimension (nx_block*ny_block), & 
          intent(in) :: &
@@ -1542,9 +1487,6 @@ subroutine precondD  (nx_block,   ny_block,   &
          Diagu,     & ! diagonal matrix coefficients for u component
          Diagv        ! diagonal matrix coefficients for v component
          
-      real (kind=dbl_kind), dimension (ntot), intent(inout) :: &   
-         DiagA
-
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -1836,15 +1778,13 @@ end subroutine precondD
       !=======================================================================
 
       subroutine form_vec (nx_block,   ny_block, &
-                           icellu,     ntot,     &
+                           icellu,               &
                            indxui,     indxuj,   &
-                           tpu,        tpv ,     &
-                           tpvec)
+                           tpu,        tpv )
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu,             & ! total count when iceumask is true
-         ntot                  ! size of the problem ntot=2*icellu
+         icellu                ! total count when iceumask is true
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -1855,10 +1795,6 @@ subroutine form_vec (nx_block,   ny_block, &
          tpu     , & ! x-component of vector
          tpv         ! y-component of vector         
 
-      real (kind=dbl_kind), dimension (ntot), &
-         intent(inout) :: &
-         tpvec        ! vector
-
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -1870,20 +1806,20 @@ subroutine form_vec (nx_block,   ny_block, &
       ! fomr vector
       !-----------------------------------------------------------------
 
-      tpvec(:)=c0
+!      tpvec(:)=c0
       
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
          
-         tpvec(2*ij-1)= tpu(i,j)
-         tpvec(2*ij)  = tpv(i,j)
+!         tpvec(2*ij-1)= tpu(i,j)
+!         tpvec(2*ij)  = tpv(i,j)
          
       enddo                     ! ij
 
-      L2norm = sqrt(DOT_PRODUCT(tpvec,tpvec))
+!      L2norm = sqrt(DOT_PRODUCT(tpvec,tpvec))
       
-      print *, 'ici uvel', icellu, L2norm
+!      print *, 'ici uvel', icellu, L2norm
       
       end subroutine form_vec
       

From 4a3e1f275acd5bccba0578d8aa482a77f85564fc Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 6 Jun 2018 19:01:42 +0000
Subject: [PATCH 015/196] new subroutine for assembling ntot vectors

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 115 ++++++++++++++++++----
 1 file changed, 94 insertions(+), 21 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 523ae6f40..29f00d839 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -104,14 +104,14 @@ subroutine imp_solver (dt)
       integer (kind=int_kind) :: & 
          kOL            , & ! outer loop iteration
          kmax           , & ! jfl put in namelist
+         ntot           , & ! size of problem for fgmres (for given cpu)
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
 
       integer (kind=int_kind), dimension(max_blocks) :: & 
          icellt   , & ! no. of cells where icetmask = 1
-         icellu   , & ! no. of cells where iceumask = 1
-         ntot         ! ntot = 2*icellu
+         icellu       ! no. of cells where iceumask = 1
 
       integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks) :: &
          indxti   , & ! compressed index in i-direction
@@ -145,6 +145,8 @@ subroutine imp_solver (dt)
          
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
+      real (kind=dbl_kind), allocatable :: bvec(:), sol(:)
+      
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
@@ -319,13 +321,23 @@ subroutine imp_solver (dt)
                                       strength(i,j,  iblk) )
          enddo  ! ij
 
-         ! load velocity into array for boundary updates
+         ! load velocity into array for boundary updates JFL move?
          fld2(:,:,1,iblk) = uvel(:,:,iblk)
          fld2(:,:,2,iblk) = vvel(:,:,iblk)
 
       enddo  ! iblk
       !$TCXOMP END PARALLEL DO
 
+      !-----------------------------------------------------------------
+      ! value of ntot
+      !-----------------------------------------------------------------
+      
+      ntot=0
+      do iblk = 1, nblocks
+        ntot = ntot + icellu(iblk)      
+      enddo
+      ntot = 2*ntot ! times 2 because of u and v
+      
       call icepack_warnings_flush(nu_diag)
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
@@ -452,10 +464,10 @@ subroutine imp_solver (dt)
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk))
                             
-            call form_vec  (nx_block           , ny_block,           &
-                            icellu       (iblk),                     & 
-                            indxui     (:,iblk), indxuj    (:,iblk), &
-                            uvel     (:,:,iblk), vvel    (:,:,iblk))                                                     
+            call calc_L2norm (nx_block           , ny_block,           &
+                             icellu       (iblk),                     & 
+                             indxui     (:,iblk), indxuj    (:,iblk), &
+                             uvel     (:,:,iblk), vvel    (:,:,iblk))                                                     
            
             call residual_vec (nx_block           , ny_block,           &
                                icellu       (iblk),                     & 
@@ -465,9 +477,6 @@ subroutine imp_solver (dt)
                                Fx       (:,:,iblk), Fy      (:,:,iblk), &
                                L2norm(iblk))
   
-  stop
-  
-  
             call precondD  (nx_block,             ny_block,             & 
                             kOL                 , icellt(iblk),         & 
                             indxti      (:,iblk), indxtj      (:,iblk), & 
@@ -478,7 +487,7 @@ subroutine imp_solver (dt)
                             uarear    (:,:,iblk),                       &
                             vrel      (:,:,iblk), Cb        (:,:,iblk), &
                             umassdti  (:,:,iblk), zetaD   (:,:,iblk,:), &
-                            Diagu     (:,:,iblk), Diagv   (:,:,iblk)) 
+                            Diagu     (:,:,iblk), Diagv   (:,:,iblk))
                             
             ! load velocity into array for boundary updates
             fld2(:,:,1,iblk) = uvel(:,:,iblk)
@@ -486,6 +495,14 @@ subroutine imp_solver (dt)
             
          enddo
          !$OMP END PARALLEL DO
+         
+         allocate(bvec(ntot))
+         ! form b vector for fgmres
+         call form_vec      (nx_block, ny_block, max_blocks, &
+                             icellu        (:), ntot,        & 
+                             indxui      (:,:), indxuj(:,:), &
+                             bx        (:,:,:), by  (:,:,:), &
+                             bvec(:))  
 
          call ice_timer_start(timer_bound)
          if (maskhalo_dyn) then
@@ -1777,10 +1794,10 @@ end subroutine precondD
       
       !=======================================================================
 
-      subroutine form_vec (nx_block,   ny_block, &
-                           icellu,               &
-                           indxui,     indxuj,   &
-                           tpu,        tpv )
+      subroutine calc_L2norm (nx_block,   ny_block, &
+                              icellu,               &
+                              indxui,     indxuj,   &
+                              tpu,        tpv )
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1803,23 +1820,79 @@ subroutine form_vec (nx_block,   ny_block, &
       real (kind=dbl_kind) :: L2norm
 
       !-----------------------------------------------------------------
-      ! fomr vector
+      ! form vector
       !-----------------------------------------------------------------
 
-!      tpvec(:)=c0
+     L2norm = c0
       
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
          
-!         tpvec(2*ij-1)= tpu(i,j)
-!         tpvec(2*ij)  = tpv(i,j)
+         L2norm = L2norm + tpu(i,j)**2
+         L2norm = L2norm + tpv(i,j)**2
          
       enddo                     ! ij
 
-!      L2norm = sqrt(DOT_PRODUCT(tpvec,tpvec))
+      L2norm = sqrt(L2norm)
+      
+      print *, 'ici uvel', nx_block, ny_block, icellu, L2norm
+      
+      end subroutine calc_L2norm
+      
+            !=======================================================================
+
+      subroutine form_vec (nx_block, ny_block, max_blocks, &
+                           icellu,   ntot,                 &
+                           indxui,   indxuj,               &
+                           tpu,      tpv,                  &
+                           outvec)
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         max_blocks,         & ! nb of blocks
+         ntot                  ! size of problem for fgmres
+
+      integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
+         icellu          
+         
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block, max_blocks), intent(in) :: &
+         tpu     , & ! x-component of vector
+         tpv         ! y-component of vector         
+         
+      real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
+         outvec
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, iblk, tot, ij
+         
+
+      !-----------------------------------------------------------------
+      ! fomr vector
+      !-----------------------------------------------------------------
+
+      outvec(:)=c0
+      tot=0
       
-!      print *, 'ici uvel', icellu, L2norm
+      do iblk=1, max_blocks
+       do ij =1, icellu(iblk)
+          i = indxui(ij,iblk)
+          j = indxuj(ij,iblk)
+          tot=tot+1
+          outvec(tot)=tpu(i,j,iblk)
+          tot=tot+1
+          outvec(tot)=tpv(i,j,iblk)
+       enddo
+      enddo! ij
+
+      print *, 'NTOT', tot, ntot
       
       end subroutine form_vec
       

From 0690b1ea9f5f2154ee4d4517dd7db1eeac8a47f4 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 6 Jun 2018 19:58:15 +0000
Subject: [PATCH 016/196] In the process of adding the call to fgmres

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 95 +++++++++++++----------
 1 file changed, 53 insertions(+), 42 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 29f00d839..208c38d33 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -386,7 +386,7 @@ subroutine imp_solver (dt)
       do kOL = 1,kmax        ! outer loop 
 
       !-----------------------------------------------------------------
-      ! stress tensor equation, total surface stress
+      ! Calc zetaD, vrel, Cb and vrel = f(uprev_k, vprev_k)
       !-----------------------------------------------------------------
 
          !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
@@ -394,19 +394,44 @@ subroutine imp_solver (dt)
 
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
+            
+            call viscous_coeff (nx_block           , ny_block,           &
+                                kOL                , icellt(iblk),       & 
+                                indxti   (:,iblk)  , indxtj(:,iblk),     & 
+                                uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                                dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
+                                dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
+                                cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
+                                cxm      (:,:,iblk), cym   (:,:,iblk),   & 
+                                tarear   (:,:,iblk), tinyarea (:,:,iblk),& 
+                                strength (:,:,iblk), zetaD (:,:,iblk,:))                      
+            
+            call calc_vrel_Cb (nx_block           , ny_block,           &
+                               icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                               indxui     (:,iblk), indxuj    (:,iblk), &
+                               kOL                ,                     &
+                               aiu      (:,:,iblk), Tbu     (:,:,iblk), &
+                               uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                               vrel     (:,:,iblk), Cb      (:,:,iblk))
+                               
+            call calc_bvec (nx_block           , ny_block,           &
+                            icellu       (iblk),                     & 
+                            indxui     (:,iblk), indxuj    (:,iblk), &
+                            kOL                , Cdn_ocn (:,:,iblk), &
+                            aiu      (:,:,iblk),                     & 
+                            uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                            waterx   (:,:,iblk), watery  (:,:,iblk), & 
+                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                            bxfix    (:,:,iblk), byfix   (:,:,iblk), &
+                            bx       (:,:,iblk), by      (:,:,iblk))
+                            
+         enddo
+         !$OMP END PARALLEL DO                            
          
-! CALC Au and Av (MATVEC)
-                                
-            call viscous_coeff (nx_block           ,  ny_block,           &
-                                kOL                ,  icellt(iblk),       & 
-                                indxti   (:,iblk)  ,  indxtj(:,iblk),     & 
-                                uvel     (:,:,iblk),  vvel  (:,:,iblk),   & 
-                                dxt      (:,:,iblk),  dyt   (:,:,iblk),   & 
-                                dxhy     (:,:,iblk),  dyhx  (:,:,iblk),   & 
-                                cxp      (:,:,iblk),  cyp   (:,:,iblk),   & 
-                                cxm      (:,:,iblk),  cym   (:,:,iblk),   & 
-                                tarear   (:,:,iblk),  tinyarea (:,:,iblk),& 
-                                strength  (:,:,iblk), zetaD (:,:,iblk,:))                                              
+
+         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+         do iblk = 1, nblocks                                  
 
             call stress_vp (nx_block,             ny_block,             & 
                             kOL,                  icellt(iblk),         & 
@@ -426,16 +451,7 @@ subroutine imp_solver (dt)
                             stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
                             shear     (:,:,iblk), divu      (:,:,iblk), & 
                             rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
-                            strtmp    (:,:,:))
-
-            call calc_vrel_Cb (nx_block           , ny_block,           &
-                               icellu       (iblk), Cdn_ocn (:,:,iblk), & 
-                               indxui     (:,iblk), indxuj    (:,iblk), &
-                               kOL                ,                     &
-                               aiu      (:,:,iblk), Tbu     (:,:,iblk), &
-                               uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                               uvel     (:,:,iblk), vvel    (:,:,iblk), &
-                               vrel     (:,:,iblk), Cb      (:,:,iblk))                                                              
+                            strtmp    (:,:,:))                                                             
                                
             call matvec (nx_block           , ny_block,           &
                          icellu       (iblk),                     & 
@@ -452,19 +468,8 @@ subroutine imp_solver (dt)
 ! end of Au and Av calc
 ! CALC b_u and b_v (bvec)
                          
-                         
-            call calc_bvec (nx_block           , ny_block,           &
-                            icellu       (iblk),                     & 
-                            indxui     (:,iblk), indxuj    (:,iblk), &
-                            kOL                , Cdn_ocn (:,:,iblk), &
-                            aiu      (:,:,iblk),                     & 
-                            uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                            waterx   (:,:,iblk), watery  (:,:,iblk), & 
-                            uvel     (:,:,iblk), vvel    (:,:,iblk), &
-                            bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-                            bx       (:,:,iblk), by      (:,:,iblk))
                             
-            call calc_L2norm (nx_block           , ny_block,           &
+            call calc_L2norm (nx_block           , ny_block,          &
                              icellu       (iblk),                     & 
                              indxui     (:,iblk), indxuj    (:,iblk), &
                              uvel     (:,:,iblk), vvel    (:,:,iblk))                                                     
@@ -492,17 +497,23 @@ subroutine imp_solver (dt)
             ! load velocity into array for boundary updates
             fld2(:,:,1,iblk) = uvel(:,:,iblk)
             fld2(:,:,2,iblk) = vvel(:,:,iblk)            
-            
+
          enddo
          !$OMP END PARALLEL DO
          
-         allocate(bvec(ntot))
-         ! form b vector for fgmres
+         allocate(bvec(ntot), sol(ntot))
+         ! form b vector for fgmres         
          call form_vec      (nx_block, ny_block, max_blocks, &
                              icellu        (:), ntot,        & 
                              indxui      (:,:), indxuj(:,:), &
                              bx        (:,:,:), by  (:,:,:), &
-                             bvec(:))  
+                             bvec(:))
+         ! form sol vector for fgmres (sol is iniguess at the beginning)        
+         call form_vec      (nx_block, ny_block, max_blocks,   &
+                             icellu      (:), ntot,            & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             uprev_k (:,:,:), vprev_k (:,:,:), &
+                             sol(:))                               
 
          call ice_timer_start(timer_bound)
          if (maskhalo_dyn) then
@@ -1452,7 +1463,7 @@ subroutine residual_vec (nx_block,   ny_block, &
 !      enddo
       
 !       L2norm = sqrt(DOT_PRODUCT(Fres,Fres))
-       print *, 'ici L2norm', sqrt(L2normtp)
+!       print *, 'ici L2norm', sqrt(L2normtp)
 
       end subroutine residual_vec
       
@@ -1836,7 +1847,7 @@ subroutine calc_L2norm (nx_block,   ny_block, &
 
       L2norm = sqrt(L2norm)
       
-      print *, 'ici uvel', nx_block, ny_block, icellu, L2norm
+!      print *, 'ici uvel', nx_block, ny_block, icellu, L2norm
       
       end subroutine calc_L2norm
       
@@ -1892,7 +1903,7 @@ subroutine form_vec (nx_block, ny_block, max_blocks, &
        enddo
       enddo! ij
 
-      print *, 'NTOT', tot, ntot
+!      print *, 'NTOT', max_blocks, tot, ntot
       
       end subroutine form_vec
       

From 4dc1a092ae9199d8b718747ae348c79f5e554752 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Fri, 8 Jun 2018 18:10:54 +0000
Subject: [PATCH 017/196] new subroutine to convert vector to the max_blocks
 arrays

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 196 +++++++++++++++++++---
 1 file changed, 176 insertions(+), 20 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 208c38d33..e800827ea 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -105,6 +105,10 @@ subroutine imp_solver (dt)
          kOL            , & ! outer loop iteration
          kmax           , & ! jfl put in namelist
          ntot           , & ! size of problem for fgmres (for given cpu)
+         icode          , & ! for fgmres
+         its            , & ! iteration nb for fgmres
+         maxits         , & ! max nb of iteration for fgmres
+         im_fgmres      , & ! for size of Krylov subspace
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
@@ -145,7 +149,8 @@ subroutine imp_solver (dt)
          
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
-      real (kind=dbl_kind), allocatable :: bvec(:), sol(:)
+      real (kind=dbl_kind), allocatable :: bvec(:), sol(:), wk11(:), wk22(:)
+      real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
 
@@ -429,10 +434,64 @@ subroutine imp_solver (dt)
          enddo
          !$OMP END PARALLEL DO                            
          
+         allocate(bvec(ntot), sol(ntot), wk11(ntot), wk22(ntot))
+         ! form b vector from matrices (max_blocks matrices)      
+         call arrays_to_vec (nx_block, ny_block, max_blocks, &
+                             icellu        (:), ntot,        & 
+                             indxui      (:,:), indxuj(:,:), &
+                             bx        (:,:,:), by  (:,:,:), &
+                             bvec(:))
+         ! form sol vector for fgmres (sol is iniguess at the beginning)        
+         call arrays_to_vec (nx_block, ny_block, max_blocks,   &
+                             icellu      (:), ntot,            & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             uprev_k (:,:,:), vprev_k (:,:,:), &
+                             sol(:))    
+
+!-----------------------------------------------------------------------
+!     prep F G M R E S 
+!-----------------------------------------------------------------------                             
+         
+      icode  = 0
+      conv   = 1.d0
+      its    = 0   
+      maxits = 50
+      
+!-----------------------------------------------------------------------
+!     F G M R E S   L O O P
+!-----------------------------------------------------------------------
+ 1    continue
+!-----------------------------------------------------------------------
+
+      call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
+                           sol_fgm_eps, maxits,its,conv,icode )
+
+      if (icode == 1) then
+
+!         if (sol2D_precond_S == 'JACOBI')   then
+!               call pre_jacobi2D ( wk22,wk11,Prec_xevec_8,niloc,njloc,&
+!                                   F_nk,Prec_ai_8,Prec_bi_8,Prec_ci_8 )
+!         else
+!            call dcopy (nloc, wk11, 1, wk22, 1) ! precond=identity
+!         endif
+
+         wk11(:)=wk22(:) ! precond=identity
+
+         goto 1
+
+      else
+
+         if (icode >= 2) then
+
+!            if (Lun_debug_L.and.print_conv_L) write(lun_out, 199) conv,its
+!            call sol_matvec ( wk22, wk11, Minx, Maxx, Miny, Maxy, &
+!                           nil,njl, F_nk, minx1,maxx1,minx2,maxx2 )
 
          !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks                                  
 
+JFL need to convert wk11 to uvel, vvel          
+         
             call stress_vp (nx_block,             ny_block,             & 
                             kOL,                  icellt(iblk),         & 
                             indxti      (:,iblk), indxtj      (:,iblk), & 
@@ -442,7 +501,62 @@ subroutine imp_solver (dt)
                             cxp       (:,:,iblk), cyp       (:,:,iblk), & 
                             cxm       (:,:,iblk), cym       (:,:,iblk), & 
                             tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
-                            zetaD     (:,:,iblk,:),strength (:,:,iblk), &                      & 
+                            zetaD     (:,:,iblk,:),strength (:,:,iblk), &
+                            stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
+                            stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
+                            stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
+                            stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & 
+                            stress12_1(:,:,iblk), stress12_2(:,:,iblk), & 
+                            stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
+                            shear     (:,:,iblk), divu      (:,:,iblk), & 
+                            rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
+                            strtmp    (:,:,:))                                                             
+                               
+            call matvec (nx_block           , ny_block,           &
+                         icellu       (iblk),                     & 
+                         indxui     (:,iblk), indxuj    (:,iblk), &
+                         kOL                ,                     &
+                         aiu      (:,:,iblk), strtmp  (:,:,:),    &
+                         vrel     (:,:,iblk),                     &
+                         umassdti (:,:,iblk), fm      (:,:,iblk), & 
+                         uarear   (:,:,iblk), Cb      (:,:,iblk), & 
+                         strintx  (:,:,iblk), strinty (:,:,iblk), &
+                         uvel     (:,:,iblk), vvel    (:,:,iblk), &
+                         Au       (:,:,iblk), Av      (:,:,iblk))
+                         
+        enddo
+        !$OMP END PARALLEL DO 
+        
+         ! form sol vector for fgmres (sol is iniguess at the beginning)        
+        call form_vec (nx_block, ny_block, max_blocks,  &
+                      icellu      (:), ntot,            & 
+                      indxui    (:,:), indxuj(:,:),     &
+                      Au      (:,:,:), Av (:,:,:),      &
+                      wk22(:))            
+
+            goto 1
+
+         endif
+
+      endif
+
+! 199  format (3x,'Iterative FGMRES solver convergence criteria: ',1pe14.7,' at iteration', i3)
+
+!     deallocate (wk11,wk22,rhs1,sol1,vv_8,ww_8)         
+         
+         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+         do iblk = 1, nblocks                                  
+
+            call stress_vp (nx_block,             ny_block,             & 
+                            kOL,                  icellt(iblk),         & 
+                            indxti      (:,iblk), indxtj      (:,iblk), & 
+                            uvel      (:,:,iblk), vvel      (:,:,iblk), &     
+                            dxt       (:,:,iblk), dyt       (:,:,iblk), & 
+                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
+                            cxp       (:,:,iblk), cyp       (:,:,iblk), & 
+                            cxm       (:,:,iblk), cym       (:,:,iblk), & 
+                            tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
+                            zetaD     (:,:,iblk,:),strength (:,:,iblk), &
                             stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
                             stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
                             stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
@@ -499,21 +613,7 @@ subroutine imp_solver (dt)
             fld2(:,:,2,iblk) = vvel(:,:,iblk)            
 
          enddo
-         !$OMP END PARALLEL DO
-         
-         allocate(bvec(ntot), sol(ntot))
-         ! form b vector for fgmres         
-         call form_vec      (nx_block, ny_block, max_blocks, &
-                             icellu        (:), ntot,        & 
-                             indxui      (:,:), indxuj(:,:), &
-                             bx        (:,:,:), by  (:,:,:), &
-                             bvec(:))
-         ! form sol vector for fgmres (sol is iniguess at the beginning)        
-         call form_vec      (nx_block, ny_block, max_blocks,   &
-                             icellu      (:), ntot,            & 
-                             indxui    (:,:), indxuj(:,:),     &
-                             uprev_k (:,:,:), vprev_k (:,:,:), &
-                             sol(:))                               
+         !$OMP END PARALLEL DO                           
 
          call ice_timer_start(timer_bound)
          if (maskhalo_dyn) then
@@ -1853,7 +1953,7 @@ end subroutine calc_L2norm
       
             !=======================================================================
 
-      subroutine form_vec (nx_block, ny_block, max_blocks, &
+      subroutine arrays_to_vec (nx_block, ny_block, max_blocks, &
                            icellu,   ntot,                 &
                            indxui,   indxuj,               &
                            tpu,      tpv,                  &
@@ -1886,7 +1986,7 @@ subroutine form_vec (nx_block, ny_block, max_blocks, &
          
 
       !-----------------------------------------------------------------
-      ! fomr vector
+      ! form vector (converts from max_blocks arrays to single vector
       !-----------------------------------------------------------------
 
       outvec(:)=c0
@@ -1905,7 +2005,63 @@ subroutine form_vec (nx_block, ny_block, max_blocks, &
 
 !      print *, 'NTOT', max_blocks, tot, ntot
       
-      end subroutine form_vec
+      end subroutine arrays_to_vec
+      
+            !=======================================================================
+
+      subroutine vec_to_arrays (nx_block, ny_block, max_blocks, &
+                                icellu,   ntot,                 &
+                                indxui,   indxuj,               &
+                                outvec,                         &
+                                tpu,      tpv)
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         max_blocks,         & ! nb of blocks
+         ntot                  ! size of problem for fgmres
+
+      integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
+         icellu          
+         
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+         
+      real (kind=dbl_kind), dimension (ntot), intent(in) :: &
+         invec         
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block, max_blocks), intent(inout) :: &
+         tpu     , & ! x-component of vector
+         tpv         ! y-component of vector         
+         
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, iblk, tot, ij
+         
+
+      !-----------------------------------------------------------------
+      ! form arrays (converts from vector to the max_blocks arrays
+      !-----------------------------------------------------------------
+
+      outvec(:)=c0
+      tot=0
+      
+      do iblk=1, max_blocks
+       do ij =1, icellu(iblk)
+          i = indxui(ij,iblk)
+          j = indxuj(ij,iblk)
+          tot=tot+1
+          tpu(i,j,iblk)=invec(tot)
+          tot=tot+1
+          tpv(i,j,iblk)=invec(tot)
+       enddo
+      enddo! ij
+
+      print *, 'NTOT', max_blocks, tot, ntot
+      
+      end subroutine vec_to_arrays
       
 !      JFL ROUTINE POUR CALC STRESS OCN POUR COUPLAGE
       

From a5112d83e659ef007ef8e4097a3cb13777c4a12e Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Fri, 8 Jun 2018 18:54:52 +0000
Subject: [PATCH 018/196] ice_dyn_vp compiles...I now need to make fgmres.F90
 compile...

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 145 +++++++++-------------
 1 file changed, 60 insertions(+), 85 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index e800827ea..ce7dff5de 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -107,6 +107,7 @@ subroutine imp_solver (dt)
          ntot           , & ! size of problem for fgmres (for given cpu)
          icode          , & ! for fgmres
          its            , & ! iteration nb for fgmres
+         ischmi         , & ! Quesse ca!?!?! jfl
          maxits         , & ! max nb of iteration for fgmres
          im_fgmres      , & ! for size of Krylov subspace
          iblk           , & ! block index
@@ -153,6 +154,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
+      real (kind=dbl_kind) :: conv, sol_eps
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp       ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
@@ -434,7 +436,20 @@ subroutine imp_solver (dt)
          enddo
          !$OMP END PARALLEL DO                            
          
+!-----------------------------------------------------------------------
+!     prep F G M R E S 
+!-----------------------------------------------------------------------                             
+         
+      icode  = 0
+      conv   = 1.d0
+      its    = 0 
+      ischmi = 0
+      im_fgmres = 50
+      maxits = 50     
+      sol_eps = 1d-02
+         
          allocate(bvec(ntot), sol(ntot), wk11(ntot), wk22(ntot))
+         allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
          ! form b vector from matrices (max_blocks matrices)      
          call arrays_to_vec (nx_block, ny_block, max_blocks, &
                              icellu        (:), ntot,        & 
@@ -447,15 +462,6 @@ subroutine imp_solver (dt)
                              indxui    (:,:), indxuj(:,:),     &
                              uprev_k (:,:,:), vprev_k (:,:,:), &
                              sol(:))    
-
-!-----------------------------------------------------------------------
-!     prep F G M R E S 
-!-----------------------------------------------------------------------                             
-         
-      icode  = 0
-      conv   = 1.d0
-      its    = 0   
-      maxits = 50
       
 !-----------------------------------------------------------------------
 !     F G M R E S   L O O P
@@ -463,8 +469,8 @@ subroutine imp_solver (dt)
  1    continue
 !-----------------------------------------------------------------------
 
-      call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
-                           sol_fgm_eps, maxits,its,conv,icode )
+!      call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
+!                           sol_eps, maxits,its,conv,icode )
 
       if (icode == 1) then
 
@@ -475,7 +481,7 @@ subroutine imp_solver (dt)
 !            call dcopy (nloc, wk11, 1, wk22, 1) ! precond=identity
 !         endif
 
-         wk11(:)=wk22(:) ! precond=identity
+         wk22(:)=wk11(:) ! precond=identity
 
          goto 1
 
@@ -487,10 +493,14 @@ subroutine imp_solver (dt)
 !            call sol_matvec ( wk22, wk11, Minx, Maxx, Miny, Maxy, &
 !                           nil,njl, F_nk, minx1,maxx1,minx2,maxx2 )
 
+         call arrays_to_vec (nx_block, ny_block, max_blocks,   &
+                             icellu      (:), ntot,            & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             wk11 (:),                         &
+                             uvel (:,:,:), vvel (:,:,:))    
+
          !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks                                  
-
-JFL need to convert wk11 to uvel, vvel          
          
             call stress_vp (nx_block,             ny_block,             & 
                             kOL,                  icellt(iblk),         & 
@@ -524,15 +534,15 @@ subroutine imp_solver (dt)
                          uvel     (:,:,iblk), vvel    (:,:,iblk), &
                          Au       (:,:,iblk), Av      (:,:,iblk))
                          
-        enddo
-        !$OMP END PARALLEL DO 
+         enddo
+         !$OMP END PARALLEL DO 
         
-         ! form sol vector for fgmres (sol is iniguess at the beginning)        
-        call form_vec (nx_block, ny_block, max_blocks,  &
-                      icellu      (:), ntot,            & 
-                      indxui    (:,:), indxuj(:,:),     &
-                      Au      (:,:,:), Av (:,:,:),      &
-                      wk22(:))            
+         ! form wk2 from Au and Av arrays        
+         call arrays_to_vec (nx_block, ny_block, max_blocks,   &
+                             icellu      (:), ntot,            & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             Au      (:,:,:), Av    (:,:,:),   &
+                             wk22(:))    
 
             goto 1
 
@@ -544,69 +554,33 @@ subroutine imp_solver (dt)
 
 !     deallocate (wk11,wk22,rhs1,sol1,vv_8,ww_8)         
          
-         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
-         do iblk = 1, nblocks                                  
-
-            call stress_vp (nx_block,             ny_block,             & 
-                            kOL,                  icellt(iblk),         & 
-                            indxti      (:,iblk), indxtj      (:,iblk), & 
-                            uvel      (:,:,iblk), vvel      (:,:,iblk), &     
-                            dxt       (:,:,iblk), dyt       (:,:,iblk), & 
-                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
-                            cxp       (:,:,iblk), cyp       (:,:,iblk), & 
-                            cxm       (:,:,iblk), cym       (:,:,iblk), & 
-                            tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
-                            zetaD     (:,:,iblk,:),strength (:,:,iblk), &
-                            stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
-                            stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
-                            stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
-                            stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & 
-                            stress12_1(:,:,iblk), stress12_2(:,:,iblk), & 
-                            stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
-                            shear     (:,:,iblk), divu      (:,:,iblk), & 
-                            rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
-                            strtmp    (:,:,:))                                                             
-                               
-            call matvec (nx_block           , ny_block,           &
-                         icellu       (iblk),                     & 
-                         indxui     (:,iblk), indxuj    (:,iblk), &
-                         kOL                ,                     &
-                         aiu      (:,:,iblk), strtmp  (:,:,:),    &
-                         vrel     (:,:,iblk),                     &
-                         umassdti (:,:,iblk), fm      (:,:,iblk), & 
-                         uarear   (:,:,iblk), Cb      (:,:,iblk), & 
-                         strintx  (:,:,iblk), strinty (:,:,iblk), &
-                         uvel     (:,:,iblk), vvel    (:,:,iblk), &
-                         Au       (:,:,iblk), Av      (:,:,iblk))
-
-! end of Au and Av calc
-! CALC b_u and b_v (bvec)
-                         
-                            
-            call calc_L2norm (nx_block           , ny_block,          &
-                             icellu       (iblk),                     & 
-                             indxui     (:,iblk), indxuj    (:,iblk), &
-                             uvel     (:,:,iblk), vvel    (:,:,iblk))                                                     
+!            call calc_L2norm (nx_block           , ny_block,          &
+!                             icellu       (iblk),                     & 
+!                             indxui     (:,iblk), indxuj    (:,iblk), &
+!                             uvel     (:,:,iblk), vvel    (:,:,iblk))                                                     
            
-            call residual_vec (nx_block           , ny_block,           &
-                               icellu       (iblk),                     & 
-                               indxui     (:,iblk), indxuj    (:,iblk), &
-                               bx       (:,:,iblk), by      (:,:,iblk), &
-                               Au       (:,:,iblk), Av      (:,:,iblk), &
-                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
-                               L2norm(iblk))
+!            call residual_vec (nx_block           , ny_block,           &
+!                               icellu       (iblk),                     & 
+!                               indxui     (:,iblk), indxuj    (:,iblk), &
+!                               bx       (:,:,iblk), by      (:,:,iblk), &
+!                               Au       (:,:,iblk), Av      (:,:,iblk), &
+!                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
+!                               L2norm(iblk))
   
-            call precondD  (nx_block,             ny_block,             & 
-                            kOL                 , icellt(iblk),         & 
-                            indxti      (:,iblk), indxtj      (:,iblk), & 
-                            dxt       (:,:,iblk), dyt       (:,:,iblk), & 
-                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
-                            cxp       (:,:,iblk), cyp       (:,:,iblk), & 
-                            cxm       (:,:,iblk), cym       (:,:,iblk), & 
-                            uarear    (:,:,iblk),                       &
-                            vrel      (:,:,iblk), Cb        (:,:,iblk), &
-                            umassdti  (:,:,iblk), zetaD   (:,:,iblk,:), &
-                            Diagu     (:,:,iblk), Diagv   (:,:,iblk))
+!            call precondD  (nx_block,             ny_block,             & 
+!                            kOL                 , icellt(iblk),         & 
+!                            indxti      (:,iblk), indxtj      (:,iblk), & 
+!                            dxt       (:,:,iblk), dyt       (:,:,iblk), & 
+!                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
+!                            cxp       (:,:,iblk), cyp       (:,:,iblk), & 
+!                            cxm       (:,:,iblk), cym       (:,:,iblk), & 
+!                            uarear    (:,:,iblk),                       &
+!                            vrel      (:,:,iblk), Cb        (:,:,iblk), &
+!                            umassdti  (:,:,iblk), zetaD   (:,:,iblk,:), &
+!                            Diagu     (:,:,iblk), Diagv   (:,:,iblk))
+
+         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+         do iblk = 1, nblocks                             
                             
             ! load velocity into array for boundary updates
             fld2(:,:,1,iblk) = uvel(:,:,iblk)
@@ -2012,7 +1986,7 @@ end subroutine arrays_to_vec
       subroutine vec_to_arrays (nx_block, ny_block, max_blocks, &
                                 icellu,   ntot,                 &
                                 indxui,   indxuj,               &
-                                outvec,                         &
+                                invec,                          &
                                 tpu,      tpv)
 
       integer (kind=int_kind), intent(in) :: &
@@ -2045,7 +2019,8 @@ subroutine vec_to_arrays (nx_block, ny_block, max_blocks, &
       ! form arrays (converts from vector to the max_blocks arrays
       !-----------------------------------------------------------------
 
-      outvec(:)=c0
+      tpu(:,:,:)=c0
+      tpv(:,:,:)=c0
       tot=0
       
       do iblk=1, max_blocks

From 5cd50a8ef936f17b6b5e8d60da5803a064ed8b82 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Fri, 8 Jun 2018 19:13:14 +0000
Subject: [PATCH 019/196] added BLAS routines needed by fgmres

---
 cicecore/cicedynB/dynamics/BLAS_routines.F90 | 195 +++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 cicecore/cicedynB/dynamics/BLAS_routines.F90

diff --git a/cicecore/cicedynB/dynamics/BLAS_routines.F90 b/cicecore/cicedynB/dynamics/BLAS_routines.F90
new file mode 100644
index 000000000..5d0cbfe81
--- /dev/null
+++ b/cicecore/cicedynB/dynamics/BLAS_routines.F90
@@ -0,0 +1,195 @@
+      subroutine  dcopy(n,dx,incx,dy,incy)
+!
+!     copies a vector, x, to a vector, y.
+!     uses unrolled loops for increments equal to one.
+!     jack dongarra, linpack, 3/11/78.
+!
+      double precision dx(1),dy(1)
+      integer i,incx,incy,ix,iy,m,mp1,n
+!
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+!
+!        code for unequal increments or equal increments
+!          not equal to 1
+!
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dy(iy) = dx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+!
+!        code for both increments equal to 1
+!
+!
+!        clean-up loop
+!
+   20 m = mod(n,7)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dy(i) = dx(i)
+   30 continue
+      if( n .lt. 7 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,7
+        dy(i) = dx(i)
+        dy(i + 1) = dx(i + 1)
+        dy(i + 2) = dx(i + 2)
+        dy(i + 3) = dx(i + 3)
+        dy(i + 4) = dx(i + 4)
+        dy(i + 5) = dx(i + 5)
+        dy(i + 6) = dx(i + 6)
+   50 continue
+      return
+      end
+      
+      subroutine daxpy(n,da,dx,incx,dy,incy)
+!
+!     constant times a vector plus a vector.
+!     uses unrolled loops for increments equal to one.
+!     jack dongarra, linpack, 3/11/78.
+!
+      double precision dx(1),dy(1),da
+      integer i,incx,incy,ix,iy,m,mp1,n
+!
+      if(n.le.0)return
+      if (da .eq. 0.0d0) return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+!
+!        code for unequal increments or equal increments
+!          not equal to 1
+!
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dy(iy) = dy(iy) + da*dx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+!
+!        code for both increments equal to 1
+!
+!
+!        clean-up loop
+!
+   20 m = mod(n,4)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dy(i) = dy(i) + da*dx(i)
+   30 continue
+      if( n .lt. 4 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,4
+        dy(i) = dy(i) + da*dx(i)
+        dy(i + 1) = dy(i + 1) + da*dx(i + 1)
+        dy(i + 2) = dy(i + 2) + da*dx(i + 2)
+        dy(i + 3) = dy(i + 3) + da*dx(i + 3)
+   50 continue
+      return
+      end
+      
+      subroutine  dscal(n,da,dx,incx)
+
+!     scales a vector by a constant.
+!     uses unrolled loops for increment equal to one.
+!     jack dongarra, linpack, 3/11/78.
+!     modified 3/93 to return if incx .le. 0.
+!     modified 12/3/93, array(1) declarations changed to array(*)
+!
+      double precision da,dx(*)
+      integer i,incx,m,mp1,n,nincx
+!
+      if( n .le. 0 .or. incx .le. 0 )return
+      if(incx==1)go to 20
+!
+!        code for increment not equal to 1
+!
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        dx(i) = da*dx(i)
+   10 continue
+      return
+!
+!        code for increment equal to 1
+!
+!
+!        clean-up loop
+!
+   20 m = mod(n,5)
+      if( m == 0 ) go to 40
+      do 30 i = 1,m
+        dx(i) = da*dx(i)
+   30 continue
+      if( n < 5 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        dx(i) = da*dx(i)
+        dx(i + 1) = da*dx(i + 1)
+        dx(i + 2) = da*dx(i + 2)
+        dx(i + 3) = da*dx(i + 3)
+        dx(i + 4) = da*dx(i + 4)
+   50 continue
+      return
+      end
+      
+      double precision function ddot(n,dx,incx,dy,incy)
+!
+!     forms the dot product of two vectors.
+!     uses unrolled loops for increments equal to one.
+!     jack dongarra, linpack, 3/11/78.
+!
+      double precision dx(1),dy(1),dtemp
+      integer i,incx,incy,ix,iy,m,mp1,n
+!
+      ddot = 0.0d0
+      dtemp = 0.0d0
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+!
+!        code for unequal increments or equal increments
+!          not equal to 1
+!
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dtemp = dtemp + dx(ix)*dy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      ddot = dtemp
+      return
+!
+!        code for both increments equal to 1
+!
+!
+!        clean-up loop
+!
+   20 m = mod(n,5)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dtemp = dtemp + dx(i)*dy(i)
+   30 continue
+      if( n .lt. 5 ) go to 60
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) + &
+         dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4)
+   50 continue
+   60 ddot = dtemp
+
+      return
+      end
+      
+      
+      
+      
\ No newline at end of file

From 1d1eeb0bbd1a2c9127f413d2af9e366ddd75ade1 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Fri, 8 Jun 2018 19:16:59 +0000
Subject: [PATCH 020/196] added the fgmres.F90 routine...does not compile
 yet...

---
 cicecore/cicedynB/dynamics/fgmres.F90     | 306 ++++++++++++++++++++++
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 |   4 +-
 2 files changed, 308 insertions(+), 2 deletions(-)
 create mode 100644 cicecore/cicedynB/dynamics/fgmres.F90

diff --git a/cicecore/cicedynB/dynamics/fgmres.F90 b/cicecore/cicedynB/dynamics/fgmres.F90
new file mode 100644
index 000000000..d236e6290
--- /dev/null
+++ b/cicecore/cicedynB/dynamics/fgmres.F90
@@ -0,0 +1,306 @@
+!---------------------------------- LICENCE BEGIN -------------------------------
+! GEM - Library of kernel routines for the GEM numerical atmospheric model
+! Copyright (C) 1990-2010 - Division de Recherche en Prevision Numerique
+!                       Environnement Canada
+! This library is free software; you can redistribute it and/or modify it
+! under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation, version 2.1 of the License. This library is
+! distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+! without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+! You should have received a copy of the GNU Lesser General Public License
+! along with this library; if not, write to the Free Software Foundation, Inc.,
+! 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+!---------------------------------- LICENCE END ---------------------------------
+
+!** fgmres - flexible GMRES routine to allow a variable preconditioner
+!
+      subroutine fgmres2(n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits, &
+                                                  its,conv,icode)
+      implicit none
+!#include <arch_specific.hf>
+
+      integer n, im, i, maxits, its, icode
+      real*8 rhs(*), sol(*), vv(n,im+1),w(n,im), wk1(n), wk2(n), eps,conv
+!
+!author Y. Saad (modified by A. Malevsky Feb1, 1995)
+!
+!revision
+! v3_30 - Abdessamad Qaddouri ; add RPN_comm calls
+!-----------------------------------------------------------------------
+! flexible GMRES routine. This is a version of GMRES which allows a
+! a variable preconditioner. Implemented with a reverse communication
+! protocole for flexibility -
+! DISTRIBUTED VERSION (USES DISTDOT FOR DDOT)
+! explicit (exact) residual norms for restarts
+! written by Y. Saad, modified by A. Malevsky, version February 1, 1995
+!  revision :
+!            Abdessamad Qaddouri ; add RPN_comm calls
+!-----------------------------------------------------------------------
+! This Is A Reverse Communication Implementation.
+!-------------------------------------------------
+! USAGE: (see also comments for icode below). CGMRES
+! should be put in a loop and the loop should be active for as
+! long as icode is not equal to 0. On return fgmres will
+!    1) either be requesting the new preconditioned vector applied
+!       to wk1 in case icode == 1 (result should be put in wk2)
+!    2) or be requesting the product of A applied to the vector wk1
+!       in case icode == 2 (result should be put in wk2)
+!    3) or be terminated in case icode == 0.
+! on entry always set icode = 0. So icode should be set back to zero
+! upon convergence.
+!-----------------------------------------------------------------------
+! Here is a typical way of running fgmres:
+!
+!      icode = 0
+! 1    continue
+!      call fgmres2(n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits,its,conv,icode)
+!
+!      if (icode == 1) then
+!         call  precon(n, wk1, wk2)    <--- user's variable preconditioning
+!         goto 1
+!      else if (icode >= 2) then
+!         call  matvec (n,wk1, wk2)    <--- user's matrix vector product.
+!         goto 1
+!      else
+!         ----- done ----
+!         .........
+!-----------------------------------------------------------------------
+! list of parameters
+!-------------------
+!
+! n     == integer. the dimension of the problem
+! im    == size of Krylov subspace:  should not exceed 100 in this
+!          version (can be reset in code. looking at comment below)
+! rhs   == vector of length n containing the right hand side
+! sol   == initial guess on input, approximate solution on output
+! vv    == work space of size n x (im+1)
+! w     == work space of length n x im
+! wk1,
+! wk2,  == two work vectors of length n each used for the reverse
+!          communication protocole. When on return (icode /= 1)
+!          the user should call fgmres again with wk2 = precon * wk1
+!          and icode untouched. When icode == 1 then it means that
+!          convergence has taken place.
+!
+! eps   == tolerance for stopping criterion. process is stopped
+!          as soon as ( ||.|| is the euclidean norm):
+!          || current residual||/||initial residual|| <= eps
+!
+! maxits== maximum number of iterations allowed
+!
+!
+! icode = integer. indicator for the reverse communication protocole.
+!         ON ENTRY : icode should be set to icode = 0.
+!         ON RETURN:
+!       * icode == 1 value means that fgmres has not finished
+!         and that it is requesting a preconditioned vector before
+!         continuing. The user must compute M**(-1) wk1, where M is
+!         the preconditioing  matrix (may vary at each call) and wk1 is
+!         the vector as provided by fgmres upun return, and put the
+!         result in wk2. Then fgmres must be called again without
+!         changing any other argument.
+!       * icode == 2 value means that fgmres has not finished
+!         and that it is requesting a matrix vector product before
+!         continuing. The user must compute  A * wk1, where A is the
+!         coefficient  matrix and wk1 is the vector provided by
+!         upon return. The result of the operation is to be put in
+!         the vector wk2. Then fgmres must be called again without
+!         changing any other argument.
+!       * icode == 0 means that fgmres has finished and sol contains
+!         the approximate solution.
+!         comment: typically fgmres must be implemented in a loop
+!         with fgmres being called as long icode is returned with
+!         a value /= 0.
+!-----------------------------------------------------------------------
+!     local variables --
+      real*8 hh(101,100),hhloc(101,100),c(100),s(100), &
+           rs(101),t,tloc,ro,dsqrt,epsmac, ddot, eps1, gam ,r0
+      integer n1, j, i1, k, k1, ii, jj, ierr
+!      integer mproc, myproc ! use when printing out ..
+!-------------------------------------------------------------
+!     arnoldi size should not exceed 100 in this version..
+!     to reset modify sizes of hh, c, s, rs
+!-------------------------------------------------------------
+!
+      save
+! ##
+!     used for printing out only -- ignore
+!      call MPI_Comm_rank(MPI_COMM_WORLD,mproc,ierr)
+!      myproc = mproc+1
+!
+      data epsmac/1.d-16/
+!
+!     computed goto
+!
+      goto (100,200,300,11) icode +1
+ 100  continue
+      n1 = n + 1
+      its = 0
+!-------------------------------------------------------------
+!     **  outer loop starts here..
+!--------------compute initial residual vector --------------
+ 10   continue
+      call dcopy (n, sol, 1, wk1, 1)
+      icode = 3
+      return
+ 11   continue
+      do 21 j=1,n
+         vv(j,1) = rhs(j) - wk2(j)
+ 21   continue
+ 20   continue
+       tloc=ddot(n, vv, 1, vv,1)
+      call MPI_allreduce(tloc,ro,1,MPI_double_precision,
+     *     MPI_sum,MPI_COMM_WORLD,ierr)
+!       call RPN_COMM_allreduce(tloc,ro,1,"MPI_double_precision", &
+!           "MPI_sum","grid",ierr)
+       ro = dsqrt(ro)
+!##
+!      if (mproc == 0) write (19,*) ro
+      if (ro == 0.0d0) goto 999
+      t = 1.0d0/ ro
+      call dscal(n,t,vv(1,1),1)
+      if (its == 0) eps1=eps*ro
+      if (its == 0) r0 = ro
+
+      conv = ro/r0
+!
+!     initialize 1-st term  of rhs of hessenberg system..
+!
+      rs(1) = ro
+      i = 0
+ 4    i=i+1
+      its = its + 1
+      i1 = i + 1
+      call dcopy(n,vv(1,i),1,wk1,1)
+!
+!     return
+!
+      icode = 1
+      return
+ 200  continue
+      call dcopy(n, wk2, 1, w(1,i), 1)
+!
+!     call matvec operation
+!
+      icode = 2
+      call dcopy(n, wk2, 1, wk1, 1)
+!
+!     return
+!
+      return
+ 300  continue
+!
+!     first call to ope corresponds to intialization goto back to 11.
+!
+!      if (icode == 3) goto 11
+      call  dcopy (n, wk2, 1, vv(1,i1), 1)
+!
+!     classical gram - schmidt...
+!
+      do 55 j=1, i
+         hhloc(j,i) = ddot(n, vv(1,j), 1, vv(1,i1), 1)
+ 55   continue
+      call MPI_allreduce(hhloc(1,i),hh(1,i),i,MPI_double_precision,
+    *     MPI_sum,MPI_COMM_WORLD,ierr)
+!       call RPN_COMM_allreduce(hhloc(1,i),hh(1,i),i,"MPI_double_precision", &
+!           "MPI_sum","grid",ierr)
+
+      do 56 j=1, i
+         call daxpy(n, -hh(j,i), vv(1,j), 1, vv(1,i1), 1)
+ 56   continue
+      tloc = ddot(n, vv(1,i1), 1, vv(1,i1), 1)
+!
+      call MPI_allreduce(tloc,t,1,MPI_double_precision,
+     *     MPI_sum,MPI_COMM_WORLD,ierr)
+!       call RPN_COMM_allreduce(tloc,t,1,"MPI_double_precision", &
+!           "MPI_sum","grid",ierr)
+
+      t = sqrt(t)
+      hh(i1,i) = t
+      if (t == 0.0d0) goto 58
+      t = 1.0d0 / t
+      call dscal(n,t,vv(1,i1),1)
+!
+!     done with classical gram schimd and arnoldi step.
+!     now  update factorization of hh
+!
+ 58   if (i == 1) goto 121
+!
+!     perfrom previous transformations  on i-th column of h
+!
+      do 66 k=2,i
+         k1 = k-1
+         t = hh(k1,i)
+         hh(k1,i) = c(k1)*t + s(k1)*hh(k,i)
+         hh(k,i) = -s(k1)*t + c(k1)*hh(k,i)
+ 66   continue
+ 121  gam = sqrt(hh(i,i)**2 + hh(i1,i)**2)
+      if (gam == 0.0d0) gam = epsmac
+!-----------#determinenextplane rotation  #-------------------
+      c(i) = hh(i,i)/gam
+      s(i) = hh(i1,i)/gam
+      rs(i1) = -s(i)*rs(i)
+      rs(i) =  c(i)*rs(i)
+!
+!     determine res. norm. and test for convergence-
+!
+      hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
+      ro = abs(rs(i1))
+!##
+!      if (mproc == 0) write (19,*) ro
+!
+      conv = ro/r0
+
+      if ((i < im) .and. (ro > eps1)) then
+
+      goto 4
+      endif
+!
+!     now compute solution. first solve upper triangular system.
+!
+      rs(i) = rs(i)/hh(i,i)
+      do 30 ii=2,i
+         k=i-ii+1
+         k1 = k+1
+         t=rs(k)
+         do 40 j=k1,i
+            t = t-hh(k,j)*rs(j)
+ 40      continue
+         rs(k) = t/hh(k,k)
+ 30   continue
+!
+!     done with back substitution..
+!     now form linear combination to get solution
+!
+      do 16 j=1, i
+         t = rs(j)
+         call daxpy(n, t, w(1,j), 1, sol,1)
+ 16   continue
+!
+!     test for return
+      if (ro <= eps1 .or. its >= maxits) goto 999
+!
+!     else compute residual vector and continue..
+!
+!      goto 10
+      do 24 j=1,i
+         jj = i1-j+1
+         rs(jj-1) = -s(jj-1)*rs(jj)
+         rs(jj) = c(jj-1)*rs(jj)
+ 24   continue
+      do 25  j=1,i1
+         t = rs(j)
+         if (j == 1)  t = t-1.0d0
+         call daxpy (n, t, vv(1,j), 1,  vv, 1)
+ 25   continue
+!
+!     restart outer loop.
+!
+      goto 20
+ 999  icode = 0
+!
+      return
+!-----end-of-fgmres-----------------------------------------------------
+!-----------------------------------------------------------------------
+      end
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ce7dff5de..7ebaf7c49 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -469,8 +469,8 @@ subroutine imp_solver (dt)
  1    continue
 !-----------------------------------------------------------------------
 
-!      call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
-!                           sol_eps, maxits,its,conv,icode )
+      call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
+                           sol_eps, maxits,its,conv,icode )
 
       if (icode == 1) then
 

From bada6d78e23a0034d6997f71443836f96a031ce9 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 11 Jun 2018 18:29:53 +0000
Subject: [PATCH 021/196] minor modif to fgmres.F90

---
 cicecore/cicedynB/dynamics/fgmres.F90     | 14 ++++++++------
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 |  6 +++---
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/fgmres.F90 b/cicecore/cicedynB/dynamics/fgmres.F90
index d236e6290..9685e80af 100644
--- a/cicecore/cicedynB/dynamics/fgmres.F90
+++ b/cicecore/cicedynB/dynamics/fgmres.F90
@@ -20,6 +20,8 @@ subroutine fgmres2(n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits, &
       implicit none
 !#include <arch_specific.hf>
 
+      include 'mpif.h'   ! MPI Fortran include file
+
       integer n, im, i, maxits, its, icode
       real*8 rhs(*), sol(*), vv(n,im+1),w(n,im), wk1(n), wk2(n), eps,conv
 !
@@ -150,8 +152,8 @@ subroutine fgmres2(n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits, &
  21   continue
  20   continue
        tloc=ddot(n, vv, 1, vv,1)
-      call MPI_allreduce(tloc,ro,1,MPI_double_precision,
-     *     MPI_sum,MPI_COMM_WORLD,ierr)
+      call MPI_allreduce(tloc,ro,1,MPI_double_precision, &
+           MPI_sum,MPI_COMM_WORLD,ierr)
 !       call RPN_COMM_allreduce(tloc,ro,1,"MPI_double_precision", &
 !           "MPI_sum","grid",ierr)
        ro = dsqrt(ro)
@@ -201,8 +203,8 @@ subroutine fgmres2(n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits, &
       do 55 j=1, i
          hhloc(j,i) = ddot(n, vv(1,j), 1, vv(1,i1), 1)
  55   continue
-      call MPI_allreduce(hhloc(1,i),hh(1,i),i,MPI_double_precision,
-    *     MPI_sum,MPI_COMM_WORLD,ierr)
+      call MPI_allreduce(hhloc(1,i),hh(1,i),i,MPI_double_precision, &
+                         MPI_sum,MPI_COMM_WORLD,ierr)
 !       call RPN_COMM_allreduce(hhloc(1,i),hh(1,i),i,"MPI_double_precision", &
 !           "MPI_sum","grid",ierr)
 
@@ -211,8 +213,8 @@ subroutine fgmres2(n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits, &
  56   continue
       tloc = ddot(n, vv(1,i1), 1, vv(1,i1), 1)
 !
-      call MPI_allreduce(tloc,t,1,MPI_double_precision,
-     *     MPI_sum,MPI_COMM_WORLD,ierr)
+      call MPI_allreduce(tloc,t,1,MPI_double_precision, &
+                         MPI_sum,MPI_COMM_WORLD,ierr)
 !       call RPN_COMM_allreduce(tloc,t,1,"MPI_double_precision", &
 !           "MPI_sum","grid",ierr)
 
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 7ebaf7c49..8bee3e0c9 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -396,7 +396,7 @@ subroutine imp_solver (dt)
       ! Calc zetaD, vrel, Cb and vrel = f(uprev_k, vprev_k)
       !-----------------------------------------------------------------
 
-         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+      !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks
 
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
@@ -435,7 +435,7 @@ subroutine imp_solver (dt)
                             
          enddo
          !$OMP END PARALLEL DO                            
-         
+
 !-----------------------------------------------------------------------
 !     prep F G M R E S 
 !-----------------------------------------------------------------------                             
@@ -462,7 +462,7 @@ subroutine imp_solver (dt)
                              indxui    (:,:), indxuj(:,:),     &
                              uprev_k (:,:,:), vprev_k (:,:,:), &
                              sol(:))    
-      
+
 !-----------------------------------------------------------------------
 !     F G M R E S   L O O P
 !-----------------------------------------------------------------------

From 8901ccf8a5b86ab29bc8a36af7dd6207e12b5ffa Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 11 Jun 2018 18:35:18 +0000
Subject: [PATCH 022/196] bug in call to arrays_to_vec instead of vec_to_arrays

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 8bee3e0c9..5e7db6b11 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -493,7 +493,7 @@ subroutine imp_solver (dt)
 !            call sol_matvec ( wk22, wk11, Minx, Maxx, Miny, Maxy, &
 !                           nil,njl, F_nk, minx1,maxx1,minx2,maxx2 )
 
-         call arrays_to_vec (nx_block, ny_block, max_blocks,   &
+         call vec_to_arrays (nx_block, ny_block, max_blocks,   &
                              icellu      (:), ntot,            & 
                              indxui    (:,:), indxuj(:,:),     &
                              wk11 (:),                         &

From 89bf00a029d6a26fe069369ccfecd953b0ddf3d7 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 6 Jun 2019 15:56:28 -0400
Subject: [PATCH 023/196] Remove fgmres.F90 (MPI version, will re-add GEM
 version later)

This file 'include's mpif.h and so breaks serial compilation.
I will copy the FGMRES solver from GEM later on and modify
it to call the CICE communication routines.
---
 cicecore/cicedynB/dynamics/fgmres.F90 | 308 --------------------------
 1 file changed, 308 deletions(-)
 delete mode 100644 cicecore/cicedynB/dynamics/fgmres.F90

diff --git a/cicecore/cicedynB/dynamics/fgmres.F90 b/cicecore/cicedynB/dynamics/fgmres.F90
deleted file mode 100644
index 9685e80af..000000000
--- a/cicecore/cicedynB/dynamics/fgmres.F90
+++ /dev/null
@@ -1,308 +0,0 @@
-!---------------------------------- LICENCE BEGIN -------------------------------
-! GEM - Library of kernel routines for the GEM numerical atmospheric model
-! Copyright (C) 1990-2010 - Division de Recherche en Prevision Numerique
-!                       Environnement Canada
-! This library is free software; you can redistribute it and/or modify it
-! under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation, version 2.1 of the License. This library is
-! distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
-! without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-! PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
-! You should have received a copy of the GNU Lesser General Public License
-! along with this library; if not, write to the Free Software Foundation, Inc.,
-! 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-!---------------------------------- LICENCE END ---------------------------------
-
-!** fgmres - flexible GMRES routine to allow a variable preconditioner
-!
-      subroutine fgmres2(n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits, &
-                                                  its,conv,icode)
-      implicit none
-!#include <arch_specific.hf>
-
-      include 'mpif.h'   ! MPI Fortran include file
-
-      integer n, im, i, maxits, its, icode
-      real*8 rhs(*), sol(*), vv(n,im+1),w(n,im), wk1(n), wk2(n), eps,conv
-!
-!author Y. Saad (modified by A. Malevsky Feb1, 1995)
-!
-!revision
-! v3_30 - Abdessamad Qaddouri ; add RPN_comm calls
-!-----------------------------------------------------------------------
-! flexible GMRES routine. This is a version of GMRES which allows a
-! a variable preconditioner. Implemented with a reverse communication
-! protocole for flexibility -
-! DISTRIBUTED VERSION (USES DISTDOT FOR DDOT)
-! explicit (exact) residual norms for restarts
-! written by Y. Saad, modified by A. Malevsky, version February 1, 1995
-!  revision :
-!            Abdessamad Qaddouri ; add RPN_comm calls
-!-----------------------------------------------------------------------
-! This Is A Reverse Communication Implementation.
-!-------------------------------------------------
-! USAGE: (see also comments for icode below). CGMRES
-! should be put in a loop and the loop should be active for as
-! long as icode is not equal to 0. On return fgmres will
-!    1) either be requesting the new preconditioned vector applied
-!       to wk1 in case icode == 1 (result should be put in wk2)
-!    2) or be requesting the product of A applied to the vector wk1
-!       in case icode == 2 (result should be put in wk2)
-!    3) or be terminated in case icode == 0.
-! on entry always set icode = 0. So icode should be set back to zero
-! upon convergence.
-!-----------------------------------------------------------------------
-! Here is a typical way of running fgmres:
-!
-!      icode = 0
-! 1    continue
-!      call fgmres2(n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits,its,conv,icode)
-!
-!      if (icode == 1) then
-!         call  precon(n, wk1, wk2)    <--- user's variable preconditioning
-!         goto 1
-!      else if (icode >= 2) then
-!         call  matvec (n,wk1, wk2)    <--- user's matrix vector product.
-!         goto 1
-!      else
-!         ----- done ----
-!         .........
-!-----------------------------------------------------------------------
-! list of parameters
-!-------------------
-!
-! n     == integer. the dimension of the problem
-! im    == size of Krylov subspace:  should not exceed 100 in this
-!          version (can be reset in code. looking at comment below)
-! rhs   == vector of length n containing the right hand side
-! sol   == initial guess on input, approximate solution on output
-! vv    == work space of size n x (im+1)
-! w     == work space of length n x im
-! wk1,
-! wk2,  == two work vectors of length n each used for the reverse
-!          communication protocole. When on return (icode /= 1)
-!          the user should call fgmres again with wk2 = precon * wk1
-!          and icode untouched. When icode == 1 then it means that
-!          convergence has taken place.
-!
-! eps   == tolerance for stopping criterion. process is stopped
-!          as soon as ( ||.|| is the euclidean norm):
-!          || current residual||/||initial residual|| <= eps
-!
-! maxits== maximum number of iterations allowed
-!
-!
-! icode = integer. indicator for the reverse communication protocole.
-!         ON ENTRY : icode should be set to icode = 0.
-!         ON RETURN:
-!       * icode == 1 value means that fgmres has not finished
-!         and that it is requesting a preconditioned vector before
-!         continuing. The user must compute M**(-1) wk1, where M is
-!         the preconditioing  matrix (may vary at each call) and wk1 is
-!         the vector as provided by fgmres upun return, and put the
-!         result in wk2. Then fgmres must be called again without
-!         changing any other argument.
-!       * icode == 2 value means that fgmres has not finished
-!         and that it is requesting a matrix vector product before
-!         continuing. The user must compute  A * wk1, where A is the
-!         coefficient  matrix and wk1 is the vector provided by
-!         upon return. The result of the operation is to be put in
-!         the vector wk2. Then fgmres must be called again without
-!         changing any other argument.
-!       * icode == 0 means that fgmres has finished and sol contains
-!         the approximate solution.
-!         comment: typically fgmres must be implemented in a loop
-!         with fgmres being called as long icode is returned with
-!         a value /= 0.
-!-----------------------------------------------------------------------
-!     local variables --
-      real*8 hh(101,100),hhloc(101,100),c(100),s(100), &
-           rs(101),t,tloc,ro,dsqrt,epsmac, ddot, eps1, gam ,r0
-      integer n1, j, i1, k, k1, ii, jj, ierr
-!      integer mproc, myproc ! use when printing out ..
-!-------------------------------------------------------------
-!     arnoldi size should not exceed 100 in this version..
-!     to reset modify sizes of hh, c, s, rs
-!-------------------------------------------------------------
-!
-      save
-! ##
-!     used for printing out only -- ignore
-!      call MPI_Comm_rank(MPI_COMM_WORLD,mproc,ierr)
-!      myproc = mproc+1
-!
-      data epsmac/1.d-16/
-!
-!     computed goto
-!
-      goto (100,200,300,11) icode +1
- 100  continue
-      n1 = n + 1
-      its = 0
-!-------------------------------------------------------------
-!     **  outer loop starts here..
-!--------------compute initial residual vector --------------
- 10   continue
-      call dcopy (n, sol, 1, wk1, 1)
-      icode = 3
-      return
- 11   continue
-      do 21 j=1,n
-         vv(j,1) = rhs(j) - wk2(j)
- 21   continue
- 20   continue
-       tloc=ddot(n, vv, 1, vv,1)
-      call MPI_allreduce(tloc,ro,1,MPI_double_precision, &
-           MPI_sum,MPI_COMM_WORLD,ierr)
-!       call RPN_COMM_allreduce(tloc,ro,1,"MPI_double_precision", &
-!           "MPI_sum","grid",ierr)
-       ro = dsqrt(ro)
-!##
-!      if (mproc == 0) write (19,*) ro
-      if (ro == 0.0d0) goto 999
-      t = 1.0d0/ ro
-      call dscal(n,t,vv(1,1),1)
-      if (its == 0) eps1=eps*ro
-      if (its == 0) r0 = ro
-
-      conv = ro/r0
-!
-!     initialize 1-st term  of rhs of hessenberg system..
-!
-      rs(1) = ro
-      i = 0
- 4    i=i+1
-      its = its + 1
-      i1 = i + 1
-      call dcopy(n,vv(1,i),1,wk1,1)
-!
-!     return
-!
-      icode = 1
-      return
- 200  continue
-      call dcopy(n, wk2, 1, w(1,i), 1)
-!
-!     call matvec operation
-!
-      icode = 2
-      call dcopy(n, wk2, 1, wk1, 1)
-!
-!     return
-!
-      return
- 300  continue
-!
-!     first call to ope corresponds to intialization goto back to 11.
-!
-!      if (icode == 3) goto 11
-      call  dcopy (n, wk2, 1, vv(1,i1), 1)
-!
-!     classical gram - schmidt...
-!
-      do 55 j=1, i
-         hhloc(j,i) = ddot(n, vv(1,j), 1, vv(1,i1), 1)
- 55   continue
-      call MPI_allreduce(hhloc(1,i),hh(1,i),i,MPI_double_precision, &
-                         MPI_sum,MPI_COMM_WORLD,ierr)
-!       call RPN_COMM_allreduce(hhloc(1,i),hh(1,i),i,"MPI_double_precision", &
-!           "MPI_sum","grid",ierr)
-
-      do 56 j=1, i
-         call daxpy(n, -hh(j,i), vv(1,j), 1, vv(1,i1), 1)
- 56   continue
-      tloc = ddot(n, vv(1,i1), 1, vv(1,i1), 1)
-!
-      call MPI_allreduce(tloc,t,1,MPI_double_precision, &
-                         MPI_sum,MPI_COMM_WORLD,ierr)
-!       call RPN_COMM_allreduce(tloc,t,1,"MPI_double_precision", &
-!           "MPI_sum","grid",ierr)
-
-      t = sqrt(t)
-      hh(i1,i) = t
-      if (t == 0.0d0) goto 58
-      t = 1.0d0 / t
-      call dscal(n,t,vv(1,i1),1)
-!
-!     done with classical gram schimd and arnoldi step.
-!     now  update factorization of hh
-!
- 58   if (i == 1) goto 121
-!
-!     perfrom previous transformations  on i-th column of h
-!
-      do 66 k=2,i
-         k1 = k-1
-         t = hh(k1,i)
-         hh(k1,i) = c(k1)*t + s(k1)*hh(k,i)
-         hh(k,i) = -s(k1)*t + c(k1)*hh(k,i)
- 66   continue
- 121  gam = sqrt(hh(i,i)**2 + hh(i1,i)**2)
-      if (gam == 0.0d0) gam = epsmac
-!-----------#determinenextplane rotation  #-------------------
-      c(i) = hh(i,i)/gam
-      s(i) = hh(i1,i)/gam
-      rs(i1) = -s(i)*rs(i)
-      rs(i) =  c(i)*rs(i)
-!
-!     determine res. norm. and test for convergence-
-!
-      hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
-      ro = abs(rs(i1))
-!##
-!      if (mproc == 0) write (19,*) ro
-!
-      conv = ro/r0
-
-      if ((i < im) .and. (ro > eps1)) then
-
-      goto 4
-      endif
-!
-!     now compute solution. first solve upper triangular system.
-!
-      rs(i) = rs(i)/hh(i,i)
-      do 30 ii=2,i
-         k=i-ii+1
-         k1 = k+1
-         t=rs(k)
-         do 40 j=k1,i
-            t = t-hh(k,j)*rs(j)
- 40      continue
-         rs(k) = t/hh(k,k)
- 30   continue
-!
-!     done with back substitution..
-!     now form linear combination to get solution
-!
-      do 16 j=1, i
-         t = rs(j)
-         call daxpy(n, t, w(1,j), 1, sol,1)
- 16   continue
-!
-!     test for return
-      if (ro <= eps1 .or. its >= maxits) goto 999
-!
-!     else compute residual vector and continue..
-!
-!      goto 10
-      do 24 j=1,i
-         jj = i1-j+1
-         rs(jj-1) = -s(jj-1)*rs(jj)
-         rs(jj) = c(jj-1)*rs(jj)
- 24   continue
-      do 25  j=1,i1
-         t = rs(j)
-         if (j == 1)  t = t-1.0d0
-         call daxpy (n, t, vv(1,j), 1,  vv, 1)
- 25   continue
-!
-!     restart outer loop.
-!
-      goto 20
- 999  icode = 0
-!
-      return
-!-----end-of-fgmres-----------------------------------------------------
-!-----------------------------------------------------------------------
-      end

From bc803ea78b9568cadab6714c4e3222fd63fd64b8 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Tue, 12 Jun 2018 14:40:21 +0000
Subject: [PATCH 024/196] added serial fgmres for testing

---
 cicecore/cicedynB/dynamics/fgmresD.f90 | 283 +++++++++++++++++++++++++
 1 file changed, 283 insertions(+)
 create mode 100644 cicecore/cicedynB/dynamics/fgmresD.f90

diff --git a/cicecore/cicedynB/dynamics/fgmresD.f90 b/cicecore/cicedynB/dynamics/fgmresD.f90
new file mode 100644
index 000000000..81931bf7a
--- /dev/null
+++ b/cicecore/cicedynB/dynamics/fgmresD.f90
@@ -0,0 +1,283 @@
+      subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
+                  eps,maxits,iout,icode,its) 
+
+!-----------------------------------------------------------------------
+! jfl Dec 1st 2006. We modified the routine so that it is double precison.
+! Here are the modifications:
+! 1) implicit real (a-h,o-z) becomes implicit real*8 (a-h,o-z) 
+! 2) real bocomes real*8
+! 3) subroutine scopy.f has been changed for dcopy.f
+! 4) subroutine saxpy.f has been changed for daxpy.f
+! 5) function sdot.f has been changed for ddot.f
+! 6) 1e-08 becomes 1d-08
+!
+! Be careful with the dcopy, daxpy and ddot code...there is a slight 
+! difference with the single precision versions (scopy, saxpy and sdot).
+! In the single precision versions, the array are declared sightly differently.
+! It is written for single precision:
+!
+! modified 12/3/93, array(1) declarations changed to array(*)
+!-----------------------------------------------------------------------
+
+      implicit double precision (a-h,o-z) !jfl modification
+      integer n, im, maxits, iout, icode
+      double precision rhs(*), sol(*), vv(n,im+1),w(n,im)
+      double precision wk1(n), wk2(n), eps
+!-----------------------------------------------------------------------
+! flexible GMRES routine. This is a version of GMRES which allows a 
+! a variable preconditioner. Implemented with a reverse communication 
+! protocole for flexibility -
+! DISTRIBUTED VERSION (USES DISTDOT FOR DDOT) 
+! explicit (exact) residual norms for restarts  
+! written by Y. Saad, modified by A. Malevsky, version February 1, 1995
+!-----------------------------------------------------------------------
+! This Is A Reverse Communication Implementation. 
+!------------------------------------------------- 
+! USAGE: (see also comments for icode below). FGMRES
+! should be put in a loop and the loop should be active for as
+! long as icode is not equal to 0. On return fgmres will
+!    1) either be requesting the new preconditioned vector applied
+!       to wk1 in case icode.eq.1 (result should be put in wk2) 
+!    2) or be requesting the product of A applied to the vector wk1
+!       in case icode.eq.2 (result should be put in wk2) 
+!    3) or be terminated in case icode .eq. 0. 
+! on entry always set icode = 0. So icode should be set back to zero
+! upon convergence.
+!-----------------------------------------------------------------------
+! Here is a typical way of running fgmres: 
+!
+!      icode = 0
+! 1    continue
+!      call fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits,iout,icode)
+!
+!      if (icode .eq. 1) then
+!         call  precon(n, wk1, wk2)    <--- user's variable preconditioning
+!         goto 1
+!      else if (icode .ge. 2) then
+!         call  matvec (n,wk1, wk2)    <--- user's matrix vector product. 
+!         goto 1
+!      else 
+!         ----- done ---- 
+!         .........
+!-----------------------------------------------------------------------
+! list of parameters 
+!------------------- 
+!
+! n     == integer. the dimension of the problem
+! im    == size of Krylov subspace:  should not exceed 50 in this
+!          version (can be reset in code. looking at comment below)
+! rhs   == vector of length n containing the right hand side
+! sol   == initial guess on input, approximate solution on output
+! vv    == work space of size n x (im+1)
+! w     == work space of length n x im 
+! wk1,
+! wk2,  == two work vectors of length n each used for the reverse
+!          communication protocole. When on return (icode .ne. 1)
+!          the user should call fgmres again with wk2 = precon * wk1
+!          and icode untouched. When icode.eq.1 then it means that
+!          convergence has taken place.
+!          
+! eps   == tolerance for stopping criterion. process is stopped
+!          as soon as ( ||.|| is the euclidean norm):
+!          || current residual||/||initial residual|| <= eps
+!
+! maxits== maximum number of iterations allowed
+!
+! iout  == output unit number number for printing intermediate results
+!          if (iout .le. 0) no statistics are printed.
+! 
+! icode = integer. indicator for the reverse communication protocole.
+!         ON ENTRY : icode should be set to icode = 0.
+!         ON RETURN: 
+!       * icode .eq. 1 value means that fgmres has not finished
+!         and that it is requesting a preconditioned vector before
+!         continuing. The user must compute M**(-1) wk1, where M is
+!         the preconditioing  matrix (may vary at each call) and wk1 is
+!         the vector as provided by fgmres upun return, and put the 
+!         result in wk2. Then fgmres must be called again without
+!         changing any other argument. 
+!       * icode .eq. 2 value means that fgmres has not finished
+!         and that it is requesting a matrix vector product before
+!         continuing. The user must compute  A * wk1, where A is the
+!         coefficient  matrix and wk1 is the vector provided by 
+!         upon return. The result of the operation is to be put in
+!         the vector wk2. Then fgmres must be called again without
+!         changing any other argument. 
+!       * icode .eq. 0 means that fgmres has finished and sol contains 
+!         the approximate solution.
+!         comment: typically fgmres must be implemented in a loop
+!         with fgmres being called as long icode is returned with 
+!         a value .ne. 0. 
+!-----------------------------------------------------------------------
+!     local variables -- !jfl modif
+      double precision hh(201,200),c(200),s(200),rs(201),t,ro,ddot,sqrt 
+!
+!-------------------------------------------------------------
+!     arnoldi size should not exceed 50 in this version..
+!     to reset modify sizes of hh, c, s, rs       
+!-------------------------------------------------------------
+
+      save
+      data epsmac/1.d-16/
+!     
+!     computed goto 
+!     
+      goto (100,200,300,11) icode +1
+ 100  continue
+      n1 = n + 1
+      its = 0
+!-------------------------------------------------------------
+!     **  outer loop starts here..
+!--------------compute initial residual vector --------------
+! 10   continue
+      call dcopy (n, sol, 1, wk1, 1) !jfl modification
+      icode = 3
+      return
+ 11   continue
+      do j=1,n
+         vv(j,1) = rhs(j) - wk2(j) 
+      enddo
+ 20   ro = ddot(n, vv, 1, vv,1) !jfl modification
+      ro = sqrt(ro)
+      if (ro .eq. 0.0d0) goto 999 
+      t = 1.0d0/ ro 
+      do j=1, n
+         vv(j,1) = vv(j,1)*t 
+      enddo
+      if (its .eq. 0) eps1=eps
+      if (its .eq. 0) r0 = ro
+      if (iout .gt. 0) write(*, 199) its, ro!&
+!           print *,'chau',its, ro !write(iout, 199) its, ro
+!     
+!     initialize 1-st term  of rhs of hessenberg system..
+!     
+      rs(1) = ro
+      i = 0
+ 4    i=i+1
+      its = its + 1
+      i1 = i + 1
+      do k=1, n
+         wk1(k) = vv(k,i) 
+      enddo
+!     
+!     return
+!     
+      icode = 1
+
+      return
+ 200  continue
+      do k=1, n
+         w(k,i) = wk2(k) 
+      enddo
+!     
+!     call matvec operation
+!     
+      icode = 2
+      call dcopy(n, wk2, 1, wk1, 1) !jfl modification
+!
+!     return
+!     
+      return
+ 300  continue
+!     
+!     first call to ope corresponds to intialization goto back to 11.
+!     
+!      if (icode .eq. 3) goto 11
+      call  dcopy (n, wk2, 1, vv(1,i1), 1) !jfl modification
+!     
+!     modified gram - schmidt...
+!     
+      do j=1, i
+         t = ddot(n, vv(1,j), 1, vv(1,i1), 1) !jfl modification
+         hh(j,i) = t
+         call daxpy(n, -t, vv(1,j), 1, vv(1,i1), 1) !jfl modification
+      enddo
+      t = sqrt(ddot(n, vv(1,i1), 1, vv(1,i1), 1)) !jfl modification
+      hh(i1,i) = t
+      if (t .eq. 0.0d0) goto 58
+      t = 1.0d0 / t
+      do k=1,n
+         vv(k,i1) = vv(k,i1)*t
+      enddo
+!     
+!     done with modified gram schimd and arnoldi step. 
+!     now  update factorization of hh
+!     
+ 58   if (i .eq. 1) goto 121
+!     
+!     perfrom previous transformations  on i-th column of h
+!     
+      do k=2,i
+         k1 = k-1
+         t = hh(k1,i)
+         hh(k1,i) = c(k1)*t + s(k1)*hh(k,i)
+         hh(k,i) = -s(k1)*t + c(k1)*hh(k,i)
+      enddo
+ 121  gam = sqrt(hh(i,i)**2 + hh(i1,i)**2)
+      if (gam .eq. 0.0d0) gam = epsmac
+!-----------#determine next plane rotation  #-------------------
+      c(i) = hh(i,i)/gam
+      s(i) = hh(i1,i)/gam
+      rs(i1) = -s(i)*rs(i)
+      rs(i) =  c(i)*rs(i)
+!     
+!     determine res. norm. and test for convergence-
+!     
+      hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
+      ro = abs(rs(i1))
+      if (iout .gt. 0) &
+           write(*, 199) its, ro
+      if (i .lt. im .and. (ro .gt. eps1))  goto 4
+!     
+!     now compute solution. first solve upper triangular system.
+!     
+      rs(i) = rs(i)/hh(i,i)
+      do ii=2,i
+         k=i-ii+1
+         k1 = k+1
+         t=rs(k)
+         do j=k1,i
+            t = t-hh(k,j)*rs(j)
+         enddo
+         rs(k) = t/hh(k,k)
+      enddo
+!     
+!     done with back substitution..
+!     now form linear combination to get solution
+!     
+      do j=1, i
+         t = rs(j)
+         call daxpy(n, t, w(1,j), 1, sol,1) !jfl modification
+      enddo
+!     
+!     test for return 
+!     
+      if (ro .le. eps1 .or. its .ge. maxits) goto 999
+!     
+!     else compute residual vector and continue..
+!     
+!       goto 10
+
+     do j=1,i
+        jj = i1-j+1
+        rs(jj-1) = -s(jj-1)*rs(jj)
+        rs(jj) = c(jj-1)*rs(jj)
+     enddo
+     do j=1,i1
+        t = rs(j)
+        if (j .eq. 1)  t = t-1.0d0
+        call daxpy (n, t, vv(1,j), 1,  vv, 1)
+     enddo
+!     
+!     restart outer loop.
+!     
+     goto 20
+ 999  icode = 0
+
+ 199  format('   -- fmgres its =', i4, ' res. norm =', d26.16)
+!     
+      return 
+!-----end-of-fgmres----------------------------------------------------- 
+!-----------------------------------------------------------------------
+      end
+

From c426123df809613acc817ea6847990cecebb01f0 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Tue, 12 Jun 2018 14:50:14 +0000
Subject: [PATCH 025/196] modified code for serial tests

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 54 +++++++++++++----------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 5e7db6b11..022792e45 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -106,9 +106,11 @@ subroutine imp_solver (dt)
          kmax           , & ! jfl put in namelist
          ntot           , & ! size of problem for fgmres (for given cpu)
          icode          , & ! for fgmres
+         iout           , & ! for printing fgmres info
          its            , & ! iteration nb for fgmres
          ischmi         , & ! Quesse ca!?!?! jfl
          maxits         , & ! max nb of iteration for fgmres
+         fgmres_its     , & ! final nb of fgmres_its
          im_fgmres      , & ! for size of Krylov subspace
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
@@ -442,22 +444,23 @@ subroutine imp_solver (dt)
          
       icode  = 0
       conv   = 1.d0
-      its    = 0 
+      iout   = 1
+!      its    = 0 
       ischmi = 0
       im_fgmres = 50
       maxits = 50     
-      sol_eps = 1d-02
+      sol_eps = 1d-01
          
          allocate(bvec(ntot), sol(ntot), wk11(ntot), wk22(ntot))
          allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
-         ! form b vector from matrices (max_blocks matrices)      
-         call arrays_to_vec (nx_block, ny_block, max_blocks, &
+         ! form b vector from matrices (nblocks matrices)      
+         call arrays_to_vec (nx_block, ny_block, nblocks,    &
                              icellu        (:), ntot,        & 
                              indxui      (:,:), indxuj(:,:), &
                              bx        (:,:,:), by  (:,:,:), &
                              bvec(:))
          ! form sol vector for fgmres (sol is iniguess at the beginning)        
-         call arrays_to_vec (nx_block, ny_block, max_blocks,   &
+         call arrays_to_vec (nx_block, ny_block, nblocks,      &
                              icellu      (:), ntot,            & 
                              indxui    (:,:), indxuj(:,:),     &
                              uprev_k (:,:,:), vprev_k (:,:,:), &
@@ -469,8 +472,11 @@ subroutine imp_solver (dt)
  1    continue
 !-----------------------------------------------------------------------
 
-      call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
-                           sol_eps, maxits,its,conv,icode )
+      !call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
+      !                     sol_eps, maxits,its,conv,icode )
+                           
+      call fgmres (ntot,im_fgmres,bvec,sol,its,vv,wk,wk11,wk22, &
+                   sol_eps, maxits,iout,icode,fgmres_its)                     
 
       if (icode == 1) then
 
@@ -493,7 +499,7 @@ subroutine imp_solver (dt)
 !            call sol_matvec ( wk22, wk11, Minx, Maxx, Miny, Maxy, &
 !                           nil,njl, F_nk, minx1,maxx1,minx2,maxx2 )
 
-         call vec_to_arrays (nx_block, ny_block, max_blocks,   &
+         call vec_to_arrays (nx_block, ny_block, nblocks,      &
                              icellu      (:), ntot,            & 
                              indxui    (:,:), indxuj(:,:),     &
                              wk11 (:),                         &
@@ -538,7 +544,7 @@ subroutine imp_solver (dt)
          !$OMP END PARALLEL DO 
         
          ! form wk2 from Au and Av arrays        
-         call arrays_to_vec (nx_block, ny_block, max_blocks,   &
+         call arrays_to_vec (nx_block, ny_block, nblocks,      &
                              icellu      (:), ntot,            & 
                              indxui    (:,:), indxuj(:,:),     &
                              Au      (:,:,:), Av    (:,:,:),   &
@@ -1927,15 +1933,16 @@ end subroutine calc_L2norm
       
             !=======================================================================
 
-      subroutine arrays_to_vec (nx_block, ny_block, max_blocks, &
-                           icellu,   ntot,                 &
-                           indxui,   indxuj,               &
-                           tpu,      tpv,                  &
-                           outvec)
+      subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
+                                max_blocks, icellu,   ntot,  &
+                                indxui,   indxuj,            &
+                                tpu,      tpv,               &
+                                outvec)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         max_blocks,         & ! nb of blocks
+         nblocks,            & ! nb of blocks
+         max_blocks,         & ! max nb of blocks
          ntot                  ! size of problem for fgmres
 
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
@@ -1966,7 +1973,7 @@ subroutine arrays_to_vec (nx_block, ny_block, max_blocks, &
       outvec(:)=c0
       tot=0
       
-      do iblk=1, max_blocks
+      do iblk=1, nblocks
        do ij =1, icellu(iblk)
           i = indxui(ij,iblk)
           j = indxuj(ij,iblk)
@@ -1983,15 +1990,16 @@ end subroutine arrays_to_vec
       
             !=======================================================================
 
-      subroutine vec_to_arrays (nx_block, ny_block, max_blocks, &
-                                icellu,   ntot,                 &
-                                indxui,   indxuj,               &
-                                invec,                          &
+      subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
+                                max_blocks, icellu,   ntot,  &
+                                indxui,   indxuj,            &
+                                invec,                       &
                                 tpu,      tpv)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         max_blocks,         & ! nb of blocks
+         nblocks,            & ! nb of blocks
+         max_blocks,         & ! max nb of blocks
          ntot                  ! size of problem for fgmres
 
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
@@ -2023,7 +2031,7 @@ subroutine vec_to_arrays (nx_block, ny_block, max_blocks, &
       tpv(:,:,:)=c0
       tot=0
       
-      do iblk=1, max_blocks
+      do iblk=1, nblocks
        do ij =1, icellu(iblk)
           i = indxui(ij,iblk)
           j = indxuj(ij,iblk)
@@ -2034,8 +2042,6 @@ subroutine vec_to_arrays (nx_block, ny_block, max_blocks, &
        enddo
       enddo! ij
 
-      print *, 'NTOT', max_blocks, tot, ntot
-      
       end subroutine vec_to_arrays
       
 !      JFL ROUTINE POUR CALC STRESS OCN POUR COUPLAGE

From 4e36b413aa7fe9cb63a624d8b8a0c77a64375f71 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Tue, 12 Jun 2018 15:01:18 +0000
Subject: [PATCH 026/196] after debugging...

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 022792e45..bf554ccd2 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -455,13 +455,13 @@ subroutine imp_solver (dt)
          allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
          ! form b vector from matrices (nblocks matrices)      
          call arrays_to_vec (nx_block, ny_block, nblocks,    &
-                             icellu        (:), ntot,        & 
+                             max_blocks, icellu (:), ntot,   & 
                              indxui      (:,:), indxuj(:,:), &
                              bx        (:,:,:), by  (:,:,:), &
                              bvec(:))
          ! form sol vector for fgmres (sol is iniguess at the beginning)        
          call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                             icellu      (:), ntot,            & 
+                             max_blocks, icellu (:), ntot,   &  
                              indxui    (:,:), indxuj(:,:),     &
                              uprev_k (:,:,:), vprev_k (:,:,:), &
                              sol(:))    
@@ -475,7 +475,7 @@ subroutine imp_solver (dt)
       !call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
       !                     sol_eps, maxits,its,conv,icode )
                            
-      call fgmres (ntot,im_fgmres,bvec,sol,its,vv,wk,wk11,wk22, &
+      call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
                    sol_eps, maxits,iout,icode,fgmres_its)                     
 
       if (icode == 1) then
@@ -500,7 +500,7 @@ subroutine imp_solver (dt)
 !                           nil,njl, F_nk, minx1,maxx1,minx2,maxx2 )
 
          call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                             icellu      (:), ntot,            & 
+                             max_blocks, icellu (:), ntot,     & 
                              indxui    (:,:), indxuj(:,:),     &
                              wk11 (:),                         &
                              uvel (:,:,:), vvel (:,:,:))    
@@ -545,7 +545,7 @@ subroutine imp_solver (dt)
         
          ! form wk2 from Au and Av arrays        
          call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                             icellu      (:), ntot,            & 
+                             max_blocks, icellu (:), ntot,     & 
                              indxui    (:,:), indxuj(:,:),     &
                              Au      (:,:,:), Av    (:,:,:),   &
                              wk22(:))    

From 2139aef3d40f37ecf84b741b0e6cc390636b348b Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Tue, 12 Jun 2018 15:19:31 +0000
Subject: [PATCH 027/196] added code to put sol vector in uvel and vvel arrays
 at the end of fgmres

---
 .../cicedynB/dynamics/{fgmresD.f90 => fgmresD.F90}     |  0
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90              | 10 ++++++++++
 2 files changed, 10 insertions(+)
 rename cicecore/cicedynB/dynamics/{fgmresD.f90 => fgmresD.F90} (100%)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.f90 b/cicecore/cicedynB/dynamics/fgmresD.F90
similarity index 100%
rename from cicecore/cicedynB/dynamics/fgmresD.f90
rename to cicecore/cicedynB/dynamics/fgmresD.F90
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index bf554ccd2..66eaac449 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -585,6 +585,16 @@ subroutine imp_solver (dt)
 !                            umassdti  (:,:,iblk), zetaD   (:,:,iblk,:), &
 !                            Diagu     (:,:,iblk), Diagv   (:,:,iblk))
 
+!-----------------------------------------------------------------------
+!     Put vector sol in uvel and vvel arrays
+!-----------------------------------------------------------------------
+
+         call vec_to_arrays (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             sol (:),                          &
+                             uvel (:,:,:), vvel (:,:,:))    
+
          !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks                             
                             

From 502bab079387272db77e1dfe3b45763c753bf1bc Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 13 Jun 2018 15:07:06 +0000
Subject: [PATCH 028/196] modified tinyarea calc for VP (punyVP=2e-9)

---
 cicecore/cicedynB/infrastructure/ice_grid.F90 | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/infrastructure/ice_grid.F90 b/cicecore/cicedynB/infrastructure/ice_grid.F90
index 34b37cf29..8832407fd 100644
--- a/cicecore/cicedynB/infrastructure/ice_grid.F90
+++ b/cicecore/cicedynB/infrastructure/ice_grid.F90
@@ -339,7 +339,8 @@ subroutine init_grid2
 
       real (kind=dbl_kind) :: &
          angle_0, angle_w, angle_s, angle_sw, &
-         pi, pi2, puny
+         pi, pi2, puny, punyVP
+
       logical (kind=log_kind), dimension(nx_block,ny_block,max_blocks):: &
          out_of_range
 
@@ -383,6 +384,9 @@ subroutine init_grid2
       ! T-grid cell and U-grid cell quantities
       !-----------------------------------------------------------------
 
+      print *, 'in init_grid2 to set tinyarea with punyVP value'
+      punyVP = 2d-09
+      
 !     tarea(:,:,:) = c0
 
       !$OMP PARALLEL DO PRIVATE(iblk,i,j,ilo,ihi,jlo,jhi,this_block)
@@ -407,7 +411,8 @@ subroutine init_grid2
             else
                uarear(i,j,iblk) = c0 ! possible on boundaries
             endif
-            tinyarea(i,j,iblk) = puny*tarea(i,j,iblk)
+!            tinyarea(i,j,iblk) = puny*tarea(i,j,iblk)
+            tinyarea(i,j,iblk) = punyVP*tarea(i,j,iblk)
 
             dxhy(i,j,iblk) = p5*(HTE(i,j,iblk) - HTE(i-1,j,iblk))
             dyhx(i,j,iblk) = p5*(HTN(i,j,iblk) - HTN(i,j-1,iblk))

From f6be97eda374894eaf1f02f2fca1fe5b51e814ae Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 13 Jun 2018 16:45:49 +0000
Subject: [PATCH 029/196] modified location of allocation for some variables...

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 66eaac449..cdd38134d 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -338,7 +338,7 @@ subroutine imp_solver (dt)
       !$TCXOMP END PARALLEL DO
 
       !-----------------------------------------------------------------
-      ! value of ntot
+      ! calc size of problem (ntot) and allocate arrays and vectors
       !-----------------------------------------------------------------
       
       ntot=0
@@ -347,6 +347,11 @@ subroutine imp_solver (dt)
       enddo
       ntot = 2*ntot ! times 2 because of u and v
       
+      allocate(bvec(ntot), sol(ntot), wk11(ntot), wk22(ntot))
+      allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
+      
+      !-----------------------------------------------------------------
+      
       call icepack_warnings_flush(nu_diag)
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
@@ -391,9 +396,9 @@ subroutine imp_solver (dt)
        enddo                           
       endif
       
-      kmax=1
+      kmax=2
       do kOL = 1,kmax        ! outer loop 
-
+      print *, 'Picard iteration', kOL
       !-----------------------------------------------------------------
       ! Calc zetaD, vrel, Cb and vrel = f(uprev_k, vprev_k)
       !-----------------------------------------------------------------
@@ -449,10 +454,8 @@ subroutine imp_solver (dt)
       ischmi = 0
       im_fgmres = 50
       maxits = 50     
-      sol_eps = 1d-01
+      sol_eps = 5d-01
          
-         allocate(bvec(ntot), sol(ntot), wk11(ntot), wk22(ntot))
-         allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
          ! form b vector from matrices (nblocks matrices)      
          call arrays_to_vec (nx_block, ny_block, nblocks,    &
                              max_blocks, icellu (:), ntot,   & 
@@ -625,6 +628,7 @@ subroutine imp_solver (dt)
          
       enddo                     ! outer loop
 
+      deallocate(bvec, sol, wk11, wk22, vv, ww)
       deallocate(fld2)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 

From d7316c4574697c62ebc59bf967bd78bbdd162957 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 13 Jun 2018 16:51:43 +0000
Subject: [PATCH 030/196] small bug corrected in allocating variables

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index cdd38134d..6e92a723d 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -181,6 +181,14 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
 
       allocate(fld2(nx_block,ny_block,2,max_blocks))
+      
+      !-----------------------------------------------------------------
+      ! Define a few things for FGMRES and Picard solver
+      !-----------------------------------------------------------------
+      
+      im_fgmres = 50 
+      maxits = 50    
+      kmax=2
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -396,7 +404,6 @@ subroutine imp_solver (dt)
        enddo                           
       endif
       
-      kmax=2
       do kOL = 1,kmax        ! outer loop 
       print *, 'Picard iteration', kOL
       !-----------------------------------------------------------------
@@ -451,9 +458,7 @@ subroutine imp_solver (dt)
       conv   = 1.d0
       iout   = 1
 !      its    = 0 
-      ischmi = 0
-      im_fgmres = 50
-      maxits = 50     
+      ischmi = 0 
       sol_eps = 5d-01
          
          ! form b vector from matrices (nblocks matrices)      

From def1babb8e909d988297f7b1f5e7e7416a442e28 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 13 Jun 2018 18:14:34 +0000
Subject: [PATCH 031/196] introduced capping of the viscous coeff following
 Kreysher

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 42 ++++++++++++++++++-----
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 6e92a723d..293afcc09 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -144,6 +144,8 @@ subroutine imp_solver (dt)
          Diagv    , & ! diagonal matrix coeff for v component
          uprev_k  , & ! uvel at previous Picard iteration
          vprev_k  , & ! vvel at previous Picard iteration
+         ulin     , & ! uvel to linearize vrel
+         vlin     , & ! vvel to linearize vrel
          vrel     , & ! coeff for tauw ! jfl
          Cb       , & ! seabed stress coeff ! jfl
          aiu      , & ! ice fraction on u-grid
@@ -188,7 +190,7 @@ subroutine imp_solver (dt)
       
       im_fgmres = 50 
       maxits = 50    
-      kmax=2
+      kmax=50
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -413,6 +415,14 @@ subroutine imp_solver (dt)
       !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks
 
+	    if (kOL .eq. 1) then
+	      ulin(:,:,iblk) = uvel(:,:,iblk)
+	      vlin(:,:,iblk) = vvel(:,:,iblk)
+	    else
+	      ulin(:,:,iblk) = p5*uprev_k(:,:,iblk) + p5*uvel(:,:,iblk)
+	      vlin(:,:,iblk) = p5*vprev_k(:,:,iblk) + p5*vvel(:,:,iblk)
+	    endif
+         
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
             
@@ -433,7 +443,7 @@ subroutine imp_solver (dt)
                                kOL                ,                     &
                                aiu      (:,:,iblk), Tbu     (:,:,iblk), &
                                uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                               ulin     (:,:,iblk), vlin    (:,:,iblk), & 
                                vrel     (:,:,iblk), Cb      (:,:,iblk))
                                
             call calc_bvec (nx_block           , ny_block,           &
@@ -443,7 +453,7 @@ subroutine imp_solver (dt)
                             aiu      (:,:,iblk),                     & 
                             uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
-                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                            ulin     (:,:,iblk), vlin    (:,:,iblk), & 
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk))
                             
@@ -459,7 +469,7 @@ subroutine imp_solver (dt)
       iout   = 1
 !      its    = 0 
       ischmi = 0 
-      sol_eps = 5d-01
+      sol_eps = 1d-05
          
          ! form b vector from matrices (nblocks matrices)      
          call arrays_to_vec (nx_block, ny_block, nblocks,    &
@@ -788,7 +798,11 @@ subroutine viscous_coeff (nx_block,   ny_block,   &
         tensionne, tensionnw, tensionse, tensionsw, & ! tension
         shearne, shearnw, shearse, shearsw        , & ! shearing
         Deltane, Deltanw, Deltase, Deltasw            ! Delt
+        
+      logical :: capping ! of the viscous coeff  
 
+      capping = .false.
+      
 !DIR$ CONCURRENT !Cray
 !cdir nodep      !NEC
 !ocl novrec      !Fujitsu
@@ -836,10 +850,22 @@ subroutine viscous_coeff (nx_block,   ny_block,   &
          Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
          Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
 
-         zetaD(i,j,1) = strength(i,j)/max(Deltane,tinyarea(i,j))
-         zetaD(i,j,2) = strength(i,j)/max(Deltanw,tinyarea(i,j))
-         zetaD(i,j,3) = strength(i,j)/max(Deltasw,tinyarea(i,j))
-         zetaD(i,j,4) = strength(i,j)/max(Deltase,tinyarea(i,j))
+         if (capping) then
+         
+          zetaD(i,j,1) = strength(i,j)/max(Deltane,tinyarea(i,j))
+          zetaD(i,j,2) = strength(i,j)/max(Deltanw,tinyarea(i,j))
+          zetaD(i,j,3) = strength(i,j)/max(Deltasw,tinyarea(i,j))
+          zetaD(i,j,4) = strength(i,j)/max(Deltase,tinyarea(i,j))
+          
+         else
+
+          zetaD(i,j,1) = strength(i,j)/(Deltane + tinyarea(i,j))
+          zetaD(i,j,2) = strength(i,j)/(Deltanw + tinyarea(i,j))
+          zetaD(i,j,3) = strength(i,j)/(Deltasw + tinyarea(i,j))
+          zetaD(i,j,4) = strength(i,j)/(Deltase + tinyarea(i,j))
+          
+         
+         endif
 
       enddo                     ! ij
 

From 933fa43a053548bc9f5aca2bbc7e4a577e992b48 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 13 Jun 2018 19:05:21 +0000
Subject: [PATCH 032/196] added a relaxation to uvel,vvel to try to get
 nonlinear conv

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 293afcc09..936e519e4 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -158,7 +158,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
-      real (kind=dbl_kind) :: conv, sol_eps
+      real (kind=dbl_kind) :: conv, sol_eps, krelax
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp       ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
@@ -191,6 +191,7 @@ subroutine imp_solver (dt)
       im_fgmres = 50 
       maxits = 50    
       kmax=50
+      krelax=0.5d0
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -613,6 +614,13 @@ subroutine imp_solver (dt)
                              sol (:),                          &
                              uvel (:,:,:), vvel (:,:,:))    
 
+         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+         do iblk = 1, nblocks
+              uvel(:,:,iblk) = (1d0-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
+              vvel(:,:,iblk) = (1d0-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
+         enddo
+         !$OMP END PARALLEL DO  
+                             
          !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks                             
                             

From 9779b0c6ef1c9b579fb4d2254289b8ed2916852a Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 5 Jul 2018 19:14:13 +0000
Subject: [PATCH 033/196] modif to relaxation calc...minor

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 936e519e4..d4e38e1d4 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -191,7 +191,7 @@ subroutine imp_solver (dt)
       im_fgmres = 50 
       maxits = 50    
       kmax=50
-      krelax=0.5d0
+      krelax=c1
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -616,8 +616,8 @@ subroutine imp_solver (dt)
 
          !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks
-              uvel(:,:,iblk) = (1d0-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
-              vvel(:,:,iblk) = (1d0-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
+              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
+              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
          enddo
          !$OMP END PARALLEL DO  
                              

From f7c276a4532ae1323abfe896a498b975249e6a04 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 5 Jul 2018 19:47:48 +0000
Subject: [PATCH 034/196] I found a bug in my 1st implementation. The dsig/dx
 term related to the rep pressurer should be on the RHS...in the process of
 correcting this

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 206 +++++++++++++++++++++-
 1 file changed, 203 insertions(+), 3 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index d4e38e1d4..ac4d77904 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -536,7 +536,7 @@ subroutine imp_solver (dt)
                             cxp       (:,:,iblk), cyp       (:,:,iblk), & 
                             cxm       (:,:,iblk), cym       (:,:,iblk), & 
                             tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
-                            zetaD     (:,:,iblk,:),strength (:,:,iblk), &
+                            zetaD     (:,:,iblk,:),                     &
                             stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
                             stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
                             stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
@@ -896,7 +896,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
                             cxp,        cyp,        & 
                             cxm,        cym,        & 
                             tarear,     tinyarea,   & 
-                            zetaD,      strength,   & 
+                            zetaD,                  & 
                             stressp_1,  stressp_2,  & 
                             stressp_3,  stressp_4,  & 
                             stressm_1,  stressm_2,  & 
@@ -918,7 +918,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
          indxtj       ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         strength , & ! ice strength (N/m)
          uvel     , & ! x-component of velocity (m/s)
          vvel     , & ! y-component of velocity (m/s)
          dxt      , & ! width of T-cell through the middle (m)
@@ -1198,6 +1197,207 @@ subroutine stress_vp (nx_block,   ny_block,   &
 
       end subroutine stress_vp
       
+!=======================================================================
+! Computes part of rheology term associated with the replacement pressure
+!
+! author: JF Lemieux, ECCC
+
+      subroutine stress_vp_Pr (nx_block,   ny_block,   & 
+                               kOL,        icellt,     & 
+                               indxti,     indxtj,     & 
+                               uvel,       vvel,       & 
+                               dxt,        dyt,        & 
+                               dxhy,       dyhx,       & 
+                               cxp,        cyp,        & 
+                               cxm,        cym,        & 
+                               tarear,     tinyarea,   & 
+                               zetaD,                  & 
+                               stressp_1,  stressp_2,  & 
+                               stressp_3,  stressp_4,  & 
+                               stPr )
+
+      integer (kind=int_kind), intent(in) :: & 
+         nx_block, ny_block, & ! block dimensions
+         kOL               , & ! subcycling step
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         uvel     , & ! x-component of velocity (m/s)
+         vvel     , & ! y-component of velocity (m/s)
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm      , & ! 0.5*HTN - 1.5*HTN
+         tarear   , & ! 1/tarea
+         tinyarea     ! puny*tarea
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(in) :: &
+         zetaD          ! 2*zeta   
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+         intent(inout) :: &
+         stressp_1, stressp_2, stressp_3, stressp_4      ! Prsigma11+Prsigma22
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+         intent(out) :: &
+         stPr          ! stress combinations for replacement pressure term
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+        puny                                      , & ! puny
+        ssigpn, ssigps, ssigpe, ssigpw            , &
+        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
+        csigpne, csigpnw, csigpse, csigpsw        , &
+        strp_tmp, tmp
+
+      !-----------------------------------------------------------------
+      ! Initialize
+      !-----------------------------------------------------------------
+
+      stPr(:,:,:) = c0
+
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
+         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
+         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
+         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
+         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
+         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
+         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
+
+         ! shearing strain rate  =  e_12
+         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
+                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
+         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
+                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
+         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
+                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
+         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
+                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
+         
+         ! Delta (in the denominator of zeta, eta)
+         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
+         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
+         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
+         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+
+      !-----------------------------------------------------------------
+      ! the stresses                            ! kg/s^2
+      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+      !-----------------------------------------------------------------
+
+         stressp_1(i,j) = -zetaD(i,j,1)*(Deltane*(c1-Ktens))
+         stressp_2(i,j) = -zetaD(i,j,2)*(Deltanw*(c1-Ktens))
+         stressp_3(i,j) = -zetaD(i,j,3)*(Deltasw*(c1-Ktens))
+         stressp_4(i,j) = -zetaD(i,j,4)*(Deltase*(c1-Ktens))
+         
+      !-----------------------------------------------------------------
+      ! combinations of the Pr related stresses for the momentum equation ! kg/s^2
+      !-----------------------------------------------------------------
+
+         ssigpn  = stressp_1(i,j) + stressp_2(i,j)
+         ssigps  = stressp_3(i,j) + stressp_4(i,j)
+         ssigpe  = stressp_1(i,j) + stressp_4(i,j)
+         ssigpw  = stressp_2(i,j) + stressp_3(i,j)
+         ssigp1  =(stressp_1(i,j) + stressp_3(i,j))*p055
+         ssigp2  =(stressp_2(i,j) + stressp_4(i,j))*p055
+
+         csigpne = p111*stressp_1(i,j) + ssigp2 + p027*stressp_3(i,j)
+         csigpnw = p111*stressp_2(i,j) + ssigp1 + p027*stressp_4(i,j)
+         csigpsw = p111*stressp_3(i,j) + ssigp2 + p027*stressp_1(i,j)
+         csigpse = p111*stressp_4(i,j) + ssigp1 + p027*stressp_2(i,j)
+         
+      !-----------------------------------------------------------------
+      ! for dF/dx (u momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+
+         ! northeast (i,j)
+         stPr(i,j,1) = -strp_tmp &
+              + dxhy(i,j)*(-csigpne)
+
+         ! northwest (i+1,j)
+         stPr(i,j,2) = strp_tmp  &
+              + dxhy(i,j)*(-csigpnw)
+
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+
+         ! southeast (i,j+1)
+         stPr(i,j,3) = -strp_tmp &
+              + dxhy(i,j)*(-csigpse)
+
+         ! southwest (i+1,j+1)
+         stPr(i,j,4) = strp_tmp  &
+              + dxhy(i,j)*(-csigpsw)
+
+      !-----------------------------------------------------------------
+      ! for dF/dy (v momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+
+         ! northeast (i,j)
+         stPr(i,j,5) = -strp_tmp &
+              - dyhx(i,j)*(csigpne)
+
+         ! southeast (i,j+1)
+         stPr(i,j,6) = strp_tmp  &
+              - dyhx(i,j)*(csigpse)
+
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+
+         ! northwest (i+1,j)
+         stPr(i,j,7) = -strp_tmp &
+              - dyhx(i,j)*(csigpnw)
+
+         ! southwest (i+1,j+1)
+         stPr(i,j,8) = strp_tmp  &
+              - dyhx(i,j)*(csigpsw)
+
+      enddo                     ! ij
+
+      end subroutine stress_vp_Pr
+      
 !=======================================================================
 
       subroutine calc_vrel_Cb (nx_block,   ny_block, &

From aad62bf041c4de9b46c5dc926e7c0167ac16297b Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 9 Jul 2018 19:45:20 +0000
Subject: [PATCH 035/196] Pr is now calc with zeta

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 310 +++++++---------------
 1 file changed, 96 insertions(+), 214 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ac4d77904..8cdbd0ad0 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -161,8 +161,9 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind) :: conv, sol_eps, krelax
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
-         strtmp       ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
-                      ! doit etre (nx_block,ny_block,max_blocks,8)????
+         strtmp,    & ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
+         stPrtmp      ! doit etre (nx_block,ny_block,max_blocks,8)????
+         
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
          zetaD      ! zetaD = 2zeta (viscous coeff)
 
@@ -427,7 +428,7 @@ subroutine imp_solver (dt)
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
             
-            call viscous_coeff (nx_block           , ny_block,           &
+            call calc_zeta_Pr  (nx_block           , ny_block,           &
                                 kOL                , icellt(iblk),       & 
                                 indxti   (:,iblk)  , indxtj(:,iblk),     & 
                                 uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
@@ -436,7 +437,8 @@ subroutine imp_solver (dt)
                                 cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
                                 cxm      (:,:,iblk), cym   (:,:,iblk),   & 
                                 tarear   (:,:,iblk), tinyarea (:,:,iblk),& 
-                                strength (:,:,iblk), zetaD (:,:,iblk,:))                      
+                                strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
+                                stPrtmp    (:,:,:))                      
             
             call calc_vrel_Cb (nx_block           , ny_block,           &
                                icellu       (iblk), Cdn_ocn (:,:,iblk), & 
@@ -754,9 +756,9 @@ end subroutine imp_solver
 
 !=======================================================================
 
-! Computes the viscous coefficients. In fact zetaD=2*zeta
+! Computes the viscous coefficients (in fact zetaD=2*zeta) and dPr/dx. 
 
-      subroutine viscous_coeff (nx_block,   ny_block,   & 
+      subroutine calc_zeta_Pr  (nx_block,   ny_block,   & 
                                 kOL,        icellt,     & 
                                 indxti,     indxtj,     & 
                                 uvel,       vvel,       & 
@@ -765,7 +767,8 @@ subroutine viscous_coeff (nx_block,   ny_block,   &
                                 cxp,        cyp,        & 
                                 cxm,        cym,        & 
                                 tarear,     tinyarea,   & 
-                                strength,   zetaD)
+                                strength,   zetaD,      &
+                                stPr)
 
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
@@ -795,6 +798,10 @@ subroutine viscous_coeff (nx_block,   ny_block,   &
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
          intent(out) :: &
          zetaD          ! 2*zeta
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+         intent(out) :: &
+         stPr          ! stress Pr combinations
 
       ! local variables
 
@@ -802,10 +809,14 @@ subroutine viscous_coeff (nx_block,   ny_block,   &
          i, j, ij
 
       real (kind=dbl_kind) :: &
-        divune, divunw, divuse, divusw            , & ! divergence
-        tensionne, tensionnw, tensionse, tensionsw, & ! tension
-        shearne, shearnw, shearse, shearsw        , & ! shearing
-        Deltane, Deltanw, Deltase, Deltasw            ! Delt
+        divune, divunw, divuse, divusw                , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw    , & ! tension
+        shearne, shearnw, shearse, shearsw            , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw            , & ! Delt
+        ssigpn, ssigps, ssigpe, ssigpw, ssigp1, ssigp2, &
+        csigpne, csigpnw, csigpsw, csigpse            , &
+        stressp_1, stressp_2, stressp_3, stressp_4    , &
+        strp_tmp
         
       logical :: capping ! of the viscous coeff  
 
@@ -874,10 +885,82 @@ subroutine viscous_coeff (nx_block,   ny_block,   &
           
          
          endif
+         
+      !-----------------------------------------------------------------
+      ! the stresses                            ! kg/s^2
+      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+      !-----------------------------------------------------------------
+
+         stressp_1 = -zetaD(i,j,1)*(Deltane*(c1-Ktens))
+         stressp_2 = -zetaD(i,j,2)*(Deltanw*(c1-Ktens))
+         stressp_3 = -zetaD(i,j,3)*(Deltasw*(c1-Ktens))
+         stressp_4 = -zetaD(i,j,4)*(Deltase*(c1-Ktens))
+         
+      !-----------------------------------------------------------------
+      ! combinations of the Pr related stresses for the momentum equation ! kg/s^2
+      !-----------------------------------------------------------------
+
+         ssigpn  = stressp_1 + stressp_2
+         ssigps  = stressp_3 + stressp_4
+         ssigpe  = stressp_1 + stressp_4
+         ssigpw  = stressp_2 + stressp_3
+         ssigp1  =(stressp_1 + stressp_3)*p055
+         ssigp2  =(stressp_2 + stressp_4)*p055
+
+         csigpne = p111*stressp_1 + ssigp2 + p027*stressp_3
+         csigpnw = p111*stressp_2 + ssigp1 + p027*stressp_4
+         csigpsw = p111*stressp_3 + ssigp2 + p027*stressp_1
+         csigpse = p111*stressp_4 + ssigp1 + p027*stressp_2
+         
+      !-----------------------------------------------------------------
+      ! for dF/dx (u momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+
+         ! northeast (i,j)
+         stPr(i,j,1) = -strp_tmp &
+              + dxhy(i,j)*(-csigpne)
+
+         ! northwest (i+1,j)
+         stPr(i,j,2) = strp_tmp  &
+              + dxhy(i,j)*(-csigpnw)
+
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+
+         ! southeast (i,j+1)
+         stPr(i,j,3) = -strp_tmp &
+              + dxhy(i,j)*(-csigpse)
+
+         ! southwest (i+1,j+1)
+         stPr(i,j,4) = strp_tmp  &
+              + dxhy(i,j)*(-csigpsw)
+
+      !-----------------------------------------------------------------
+      ! for dF/dy (v momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+
+         ! northeast (i,j)
+         stPr(i,j,5) = -strp_tmp &
+              - dyhx(i,j)*(csigpne)
+
+         ! southeast (i,j+1)
+         stPr(i,j,6) = strp_tmp  &
+              - dyhx(i,j)*(csigpse)
+
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+
+         ! northwest (i+1,j)
+         stPr(i,j,7) = -strp_tmp &
+              - dyhx(i,j)*(csigpnw)
+
+         ! southwest (i+1,j+1)
+         stPr(i,j,8) = strp_tmp  &
+              - dyhx(i,j)*(csigpsw)
 
       enddo                     ! ij
 
-      end subroutine viscous_coeff      
+      end subroutine calc_zeta_Pr      
       
 !=======================================================================
 
@@ -1030,7 +1113,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
       !-----------------------------------------------------------------
       ! on last subcycle, save quantities for mechanical redistribution
       !-----------------------------------------------------------------
-         if (kOL == 10) then ! jfl MODIF
+         if (kOL == 100) then ! jfl MODIF
             divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
             tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
             rdg_conv(i,j)  = -min(divu(i,j),c0)
@@ -1197,207 +1280,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
 
       end subroutine stress_vp
       
-!=======================================================================
-! Computes part of rheology term associated with the replacement pressure
-!
-! author: JF Lemieux, ECCC
-
-      subroutine stress_vp_Pr (nx_block,   ny_block,   & 
-                               kOL,        icellt,     & 
-                               indxti,     indxtj,     & 
-                               uvel,       vvel,       & 
-                               dxt,        dyt,        & 
-                               dxhy,       dyhx,       & 
-                               cxp,        cyp,        & 
-                               cxm,        cym,        & 
-                               tarear,     tinyarea,   & 
-                               zetaD,                  & 
-                               stressp_1,  stressp_2,  & 
-                               stressp_3,  stressp_4,  & 
-                               stPr )
-
-      integer (kind=int_kind), intent(in) :: & 
-         nx_block, ny_block, & ! block dimensions
-         kOL               , & ! subcycling step
-         icellt                ! no. of cells where icetmask = 1
-
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
-         intent(in) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj       ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         uvel     , & ! x-component of velocity (m/s)
-         vvel     , & ! y-component of velocity (m/s)
-         dxt      , & ! width of T-cell through the middle (m)
-         dyt      , & ! height of T-cell through the middle (m)
-         dxhy     , & ! 0.5*(HTE - HTE)
-         dyhx     , & ! 0.5*(HTN - HTN)
-         cyp      , & ! 1.5*HTE - 0.5*HTE
-         cxp      , & ! 1.5*HTN - 0.5*HTN
-         cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm      , & ! 0.5*HTN - 1.5*HTN
-         tarear   , & ! 1/tarea
-         tinyarea     ! puny*tarea
-         
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
-         intent(in) :: &
-         zetaD          ! 2*zeta   
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
-         intent(inout) :: &
-         stressp_1, stressp_2, stressp_3, stressp_4      ! Prsigma11+Prsigma22
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
-         intent(out) :: &
-         stPr          ! stress combinations for replacement pressure term
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         i, j, ij
-
-      real (kind=dbl_kind) :: &
-        divune, divunw, divuse, divusw            , & ! divergence
-        tensionne, tensionnw, tensionse, tensionsw, & ! tension
-        shearne, shearnw, shearse, shearsw        , & ! shearing
-        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
-        puny                                      , & ! puny
-        ssigpn, ssigps, ssigpe, ssigpw            , &
-        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
-        csigpne, csigpnw, csigpse, csigpsw        , &
-        strp_tmp, tmp
-
-      !-----------------------------------------------------------------
-      ! Initialize
-      !-----------------------------------------------------------------
-
-      stPr(:,:,:) = c0
-
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
-
-      do ij = 1, icellt
-         i = indxti(ij)
-         j = indxtj(ij)
-
-      !-----------------------------------------------------------------
-      ! strain rates
-      ! NOTE these are actually strain rates * area  (m^2/s)
-      !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
-         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
-         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
-         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
-         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
-         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
-         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
-
-         ! shearing strain rate  =  e_12
-         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
-                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
-         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
-                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
-         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
-                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
-         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
-                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
-         
-         ! Delta (in the denominator of zeta, eta)
-         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
-         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
-         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
-         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
-
-      !-----------------------------------------------------------------
-      ! the stresses                            ! kg/s^2
-      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
-      !-----------------------------------------------------------------
-
-         stressp_1(i,j) = -zetaD(i,j,1)*(Deltane*(c1-Ktens))
-         stressp_2(i,j) = -zetaD(i,j,2)*(Deltanw*(c1-Ktens))
-         stressp_3(i,j) = -zetaD(i,j,3)*(Deltasw*(c1-Ktens))
-         stressp_4(i,j) = -zetaD(i,j,4)*(Deltase*(c1-Ktens))
-         
-      !-----------------------------------------------------------------
-      ! combinations of the Pr related stresses for the momentum equation ! kg/s^2
-      !-----------------------------------------------------------------
-
-         ssigpn  = stressp_1(i,j) + stressp_2(i,j)
-         ssigps  = stressp_3(i,j) + stressp_4(i,j)
-         ssigpe  = stressp_1(i,j) + stressp_4(i,j)
-         ssigpw  = stressp_2(i,j) + stressp_3(i,j)
-         ssigp1  =(stressp_1(i,j) + stressp_3(i,j))*p055
-         ssigp2  =(stressp_2(i,j) + stressp_4(i,j))*p055
-
-         csigpne = p111*stressp_1(i,j) + ssigp2 + p027*stressp_3(i,j)
-         csigpnw = p111*stressp_2(i,j) + ssigp1 + p027*stressp_4(i,j)
-         csigpsw = p111*stressp_3(i,j) + ssigp2 + p027*stressp_1(i,j)
-         csigpse = p111*stressp_4(i,j) + ssigp1 + p027*stressp_2(i,j)
-         
-      !-----------------------------------------------------------------
-      ! for dF/dx (u momentum)
-      !-----------------------------------------------------------------
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
-
-         ! northeast (i,j)
-         stPr(i,j,1) = -strp_tmp &
-              + dxhy(i,j)*(-csigpne)
-
-         ! northwest (i+1,j)
-         stPr(i,j,2) = strp_tmp  &
-              + dxhy(i,j)*(-csigpnw)
-
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
-
-         ! southeast (i,j+1)
-         stPr(i,j,3) = -strp_tmp &
-              + dxhy(i,j)*(-csigpse)
-
-         ! southwest (i+1,j+1)
-         stPr(i,j,4) = strp_tmp  &
-              + dxhy(i,j)*(-csigpsw)
-
-      !-----------------------------------------------------------------
-      ! for dF/dy (v momentum)
-      !-----------------------------------------------------------------
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
-
-         ! northeast (i,j)
-         stPr(i,j,5) = -strp_tmp &
-              - dyhx(i,j)*(csigpne)
-
-         ! southeast (i,j+1)
-         stPr(i,j,6) = strp_tmp  &
-              - dyhx(i,j)*(csigpse)
-
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
-
-         ! northwest (i+1,j)
-         stPr(i,j,7) = -strp_tmp &
-              - dyhx(i,j)*(csigpnw)
-
-         ! southwest (i+1,j+1)
-         stPr(i,j,8) = strp_tmp  &
-              - dyhx(i,j)*(csigpsw)
-
-      enddo                     ! ij
-
-      end subroutine stress_vp_Pr
-      
 !=======================================================================
 
       subroutine calc_vrel_Cb (nx_block,   ny_block, &

From 1eb637321d20bf64047fdc1f89a6f1b021ae7801 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 9 Jul 2018 20:10:30 +0000
Subject: [PATCH 036/196] dPrdx is now in calc_bvec

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 60 +++++++++++++----------
 1 file changed, 34 insertions(+), 26 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 8cdbd0ad0..4909aa10e 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -162,7 +162,7 @@ subroutine imp_solver (dt)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp,    & ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
-         stPrtmp      ! doit etre (nx_block,ny_block,max_blocks,8)????
+         stPrtmp      ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
          zetaD      ! zetaD = 2zeta (viscous coeff)
@@ -428,17 +428,17 @@ subroutine imp_solver (dt)
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
             
-            call calc_zeta_Pr  (nx_block           , ny_block,           &
-                                kOL                , icellt(iblk),       & 
-                                indxti   (:,iblk)  , indxtj(:,iblk),     & 
-                                uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-                                dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
-                                dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
-                                cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
-                                cxm      (:,:,iblk), cym   (:,:,iblk),   & 
-                                tarear   (:,:,iblk), tinyarea (:,:,iblk),& 
-                                strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
-                                stPrtmp    (:,:,:))                      
+            call calc_zeta_Pr (nx_block           , ny_block,           &
+                               kOL                , icellt(iblk),       & 
+                               indxti   (:,iblk)  , indxtj(:,iblk),     & 
+                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                               dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
+                               dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
+                               cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
+                               cxm      (:,:,iblk), cym   (:,:,iblk),   & 
+                               tarear   (:,:,iblk), tinyarea (:,:,iblk),& 
+                               strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
+                               stPrtmp  (:,:,:) )                      
             
             call calc_vrel_Cb (nx_block           , ny_block,           &
                                icellu       (iblk), Cdn_ocn (:,:,iblk), & 
@@ -453,12 +453,13 @@ subroutine imp_solver (dt)
                             icellu       (iblk),                     & 
                             indxui     (:,iblk), indxuj    (:,iblk), &
                             kOL                , Cdn_ocn (:,:,iblk), &
-                            aiu      (:,:,iblk),                     & 
+                            aiu      (:,:,iblk), uarear  (:,:,iblk), & 
                             uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
                             ulin     (:,:,iblk), vlin    (:,:,iblk), & 
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-                            bx       (:,:,iblk), by      (:,:,iblk))
+                            bx       (:,:,iblk), by      (:,:,iblk), &
+                            stPrtmp  (:,:,:))
                             
          enddo
          !$OMP END PARALLEL DO                            
@@ -1532,12 +1533,13 @@ subroutine calc_bvec (nx_block,   ny_block, &
                        icellu,               &
                        indxui,     indxuj,   &
                        kOL,        Cw,       &
-                       aiu,                  &
+                       aiu,        uarear,   &
                        uocn,       vocn,     &
                        waterx,     watery,   &
                        uvel,       vvel,     &
                        bxfix,      byfix,    &
-                       bx,         by)
+                       bx,         by,       &
+                       stPr)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1551,8 +1553,10 @@ subroutine calc_bvec (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
          uvel    , & ! x-component of velocity (m/s)
-         vvel    , & ! y-component of velocity (m/s)         
+         vvel    , & ! y-component of velocity (m/s)
+         Cw      , & ! ocean-ice neutral drag coefficient
          aiu     , & ! ice fraction on u-grid
+         uarear  , & ! 1/uarea
          waterx  , & ! for ocean stress calculation, x (m/s)
          watery  , & ! for ocean stress calculation, y (m/s)
          bxfix   , & ! bx = taux + bxfix !jfl
@@ -1560,15 +1564,15 @@ subroutine calc_bvec (nx_block,   ny_block, &
          uocn    , & ! ocean current, x-direction (m/s)
          vocn        ! ocean current, y-direction (m/s)
          
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
+         intent(in) :: &
+         stPr
+         
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
          bx      , & ! b vector, bx = taux + bxfix (N/m^2) !jfl
          by          ! b vector, by = tauy + byfix (N/m^2) !jfl
          
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
-         Cw                   ! ocean-ice neutral drag coefficient
-
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -1578,6 +1582,7 @@ subroutine calc_bvec (nx_block,   ny_block, &
          vrel              , & ! relative ice-ocean velocity
          utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
          taux, tauy        , & ! part of ocean stress term
+         strintx, strinty  , & ! divergence of the internal stress tensor (only Pr part)
          rhow                  !
          
       !-----------------------------------------------------------------
@@ -1602,12 +1607,15 @@ subroutine calc_bvec (nx_block,   ny_block, &
          ! ice/ocean stress
          taux = vrel*waterx(i,j) ! NOTE this is not the entire
          tauy = vrel*watery(i,j) ! ocn stress term
-
-         bx(i,j) = bxfix(i,j) + taux
-         by(i,j) = byfix(i,j) + tauy
          
-!         bvec(2*ij-1)= bvecfix(2*ij-1) + taux
-!         bvec(2*ij)  = bvecfix(2*ij) + tauy
+         ! divergence of the internal stress tensor (only Pr part, i.e. dPr/dx)
+         strintx = uarear(i,j)* &
+             (stPr(i,j,1) + stPr(i+1,j,2) + stPr(i,j+1,3) + stPr(i+1,j+1,4))
+         strinty = uarear(i,j)* &
+             (stPr(i,j,5) + stPr(i,j+1,6) + stPr(i+1,j,7) + stPr(i+1,j+1,8))
+
+         bx(i,j) = bxfix(i,j) + taux + strintx
+         by(i,j) = byfix(i,j) + tauy + strinty
          
       enddo                     ! ij
 

From fa0604b09c3c9846bbaa3067cf3c862421cf5e70 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 9 Jul 2018 20:21:12 +0000
Subject: [PATCH 037/196] removed Pr part from stress_vp calc

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 323 +++++++++++++++++++++-
 1 file changed, 311 insertions(+), 12 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 4909aa10e..31f8e5a38 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -540,12 +540,6 @@ subroutine imp_solver (dt)
                             cxm       (:,:,iblk), cym       (:,:,iblk), & 
                             tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
                             zetaD     (:,:,iblk,:),                     &
-                            stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
-                            stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
-                            stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
-                            stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & 
-                            stress12_1(:,:,iblk), stress12_2(:,:,iblk), & 
-                            stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
                             shear     (:,:,iblk), divu      (:,:,iblk), & 
                             rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
                             strtmp    (:,:,:))                                                             
@@ -965,13 +959,318 @@ end subroutine calc_zeta_Pr
       
 !=======================================================================
 
-! Computes the rates of strain and internal stress components for
-! each of the four corners on each T-grid cell.
-! Computes stress terms for the momentum equation
-!
-! author: Elizabeth C. Hunke, LANL,  JF Lemieux, ECCC
+! Computes VP stress without the rep. pressure Pr (included in b vector)
 
       subroutine stress_vp (nx_block,   ny_block,   & 
+                            kOL,        icellt,     & 
+                            indxti,     indxtj,     & 
+                            uvel,       vvel,       & 
+                            dxt,        dyt,        & 
+                            dxhy,       dyhx,       & 
+                            cxp,        cyp,        & 
+                            cxm,        cym,        & 
+                            tarear,     tinyarea,   & 
+                            zetaD,                  & 
+                            shear,      divu,       & 
+                            rdg_conv,   rdg_shear,  & 
+                            str )
+
+      integer (kind=int_kind), intent(in) :: & 
+         nx_block, ny_block, & ! block dimensions
+         kOL               , & ! subcycling step
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         uvel     , & ! x-component of velocity (m/s)
+         vvel     , & ! y-component of velocity (m/s)
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm      , & ! 0.5*HTN - 1.5*HTN
+         tarear   , & ! 1/tarea
+         tinyarea     ! puny*tarea
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(in) :: &
+         zetaD          ! 2*zeta   
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+         intent(inout) :: &
+         shear    , & ! strain rate II component (1/s)
+         divu     , & ! strain rate I component, velocity divergence (1/s)
+         rdg_conv , & ! convergence term for ridging (1/s)
+         rdg_shear    ! shear term for ridging (1/s)
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+         intent(out) :: &
+         str          ! stress combinations
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+        puny                                      , & ! puny
+        ssigpn, ssigps, ssigpe, ssigpw            , &
+        ssigmn, ssigms, ssigme, ssigmw            , &
+        ssig12n, ssig12s, ssig12e, ssig12w        , &
+        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
+        csigpne, csigpnw, csigpse, csigpsw        , &
+        csigmne, csigmnw, csigmse, csigmsw        , &
+        csig12ne, csig12nw, csig12se, csig12sw    , &
+        str12ew, str12we, str12ns, str12sn        , &
+        strp_tmp, strm_tmp, tmp
+        
+      real (kind=dbl_kind) :: &
+         stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22 (without Pr)
+         stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
+         stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12        
+
+      !-----------------------------------------------------------------
+      ! Initialize
+      !-----------------------------------------------------------------
+
+      str(:,:,:) = c0
+
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
+         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
+         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
+         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
+         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
+         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
+         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
+
+         ! shearing strain rate  =  e_12
+         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
+                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
+         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
+                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
+         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
+                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
+         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
+                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
+         
+         ! Delta (in the denominator of zeta, eta)
+         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
+         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
+         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
+         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+
+      !-----------------------------------------------------------------
+      ! on last subcycle, save quantities for mechanical redistribution
+      !-----------------------------------------------------------------
+         if (kOL == 100) then ! jfl MODIF
+            divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
+            tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
+            rdg_conv(i,j)  = -min(divu(i,j),c0)
+            rdg_shear(i,j) = p5*(tmp-abs(divu(i,j))) 
+
+            ! diagnostic only
+            ! shear = sqrt(tension**2 + shearing**2)
+            shear(i,j) = p25*tarear(i,j)*sqrt( &
+                 (tensionne + tensionnw + tensionse + tensionsw)**2 &
+                +  (shearne +   shearnw +   shearse +   shearsw)**2)
+
+         endif
+
+      !-----------------------------------------------------------------
+      ! the stresses                            ! kg/s^2
+      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+      ! JFL commented part of stressp is for the rep pressure Pr
+      !-----------------------------------------------------------------
+
+         stressp_1 = zetaD(i,j,1)*(divune*(c1+Ktens))! - Deltane*(c1-Ktens))
+         stressp_2 = zetaD(i,j,2)*(divunw*(c1+Ktens))! - Deltanw*(c1-Ktens))
+         stressp_3 = zetaD(i,j,3)*(divusw*(c1+Ktens))! - Deltasw*(c1-Ktens))
+         stressp_4 = zetaD(i,j,4)*(divuse*(c1+Ktens))! - Deltase*(c1-Ktens))
+         
+         stressm_1 = zetaD(i,j,1)*tensionne*(c1+Ktens)*ecci
+         stressm_2 = zetaD(i,j,2)*tensionnw*(c1+Ktens)*ecci
+         stressm_3 = zetaD(i,j,3)*tensionsw*(c1+Ktens)*ecci
+         stressm_4 = zetaD(i,j,4)*tensionse*(c1+Ktens)*ecci
+         
+         stress12_1 = zetaD(i,j,1)*shearne*p5*(c1+Ktens)*ecci
+         stress12_2 = zetaD(i,j,2)*shearnw*p5*(c1+Ktens)*ecci
+         stress12_3 = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
+         stress12_4 = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
+
+      !-----------------------------------------------------------------
+      ! Eliminate underflows.
+      ! The following code is commented out because it is relatively 
+      ! expensive and most compilers include a flag that accomplishes
+      ! the same thing more efficiently.  This code is cheaper than
+      ! handling underflows if the compiler lacks a flag; uncomment
+      ! it in that case.  The compiler flag is often described with the 
+      ! phrase "flush to zero".
+      !-----------------------------------------------------------------
+
+!      call icepack_query_parameters(puny_out=puny)
+!      call icepack_warnings_flush(nu_diag)
+!      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+!         file=__FILE__, line=__LINE__)
+
+!      stressp_1(i,j) = sign(max(abs(stressp_1(i,j)),puny),stressp_1(i,j))
+!      stressp_2(i,j) = sign(max(abs(stressp_2(i,j)),puny),stressp_2(i,j))
+!      stressp_3(i,j) = sign(max(abs(stressp_3(i,j)),puny),stressp_3(i,j))
+!      stressp_4(i,j) = sign(max(abs(stressp_4(i,j)),puny),stressp_4(i,j))
+
+!      stressm_1(i,j) = sign(max(abs(stressm_1(i,j)),puny),stressm_1(i,j))
+!      stressm_2(i,j) = sign(max(abs(stressm_2(i,j)),puny),stressm_2(i,j))
+!      stressm_3(i,j) = sign(max(abs(stressm_3(i,j)),puny),stressm_3(i,j))
+!      stressm_4(i,j) = sign(max(abs(stressm_4(i,j)),puny),stressm_4(i,j))
+
+!      stress12_1(i,j) = sign(max(abs(stress12_1(i,j)),puny),stress12_1(i,j))
+!      stress12_2(i,j) = sign(max(abs(stress12_2(i,j)),puny),stress12_2(i,j))
+!      stress12_3(i,j) = sign(max(abs(stress12_3(i,j)),puny),stress12_3(i,j))
+!      stress12_4(i,j) = sign(max(abs(stress12_4(i,j)),puny),stress12_4(i,j))
+
+      !-----------------------------------------------------------------
+      ! combinations of the stresses for the momentum equation ! kg/s^2
+      !-----------------------------------------------------------------
+
+         ssigpn  = stressp_1 + stressp_2
+         ssigps  = stressp_3 + stressp_4
+         ssigpe  = stressp_1 + stressp_4
+         ssigpw  = stressp_2 + stressp_3
+         ssigp1  =(stressp_1 + stressp_3)*p055
+         ssigp2  =(stressp_2 + stressp_4)*p055
+
+         ssigmn  = stressm_1 + stressm_2
+         ssigms  = stressm_3 + stressm_4
+         ssigme  = stressm_1 + stressm_4
+         ssigmw  = stressm_2 + stressm_3
+         ssigm1  =(stressm_1 + stressm_3)*p055
+         ssigm2  =(stressm_2 + stressm_4)*p055
+
+         ssig12n = stress12_1 + stress12_2
+         ssig12s = stress12_3 + stress12_4
+         ssig12e = stress12_1 + stress12_4
+         ssig12w = stress12_2 + stress12_3
+         ssig121 =(stress12_1 + stress12_3)*p111
+         ssig122 =(stress12_2 + stress12_4)*p111
+
+         csigpne = p111*stressp_1 + ssigp2 + p027*stressp_3
+         csigpnw = p111*stressp_2 + ssigp1 + p027*stressp_4
+         csigpsw = p111*stressp_3 + ssigp2 + p027*stressp_1
+         csigpse = p111*stressp_4 + ssigp1 + p027*stressp_2
+         
+         csigmne = p111*stressm_1 + ssigm2 + p027*stressm_3
+         csigmnw = p111*stressm_2 + ssigm1 + p027*stressm_4
+         csigmsw = p111*stressm_3 + ssigm2 + p027*stressm_1
+         csigmse = p111*stressm_4 + ssigm1 + p027*stressm_2
+         
+         csig12ne = p222*stress12_1 + ssig122 &
+                  + p055*stress12_3
+         csig12nw = p222*stress12_2 + ssig121 &
+                  + p055*stress12_4
+         csig12sw = p222*stress12_3 + ssig122 &
+                  + p055*stress12_1
+         csig12se = p222*stress12_4 + ssig121 &
+                  + p055*stress12_2
+
+         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
+         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
+         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
+         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
+
+      !-----------------------------------------------------------------
+      ! for dF/dx (u momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+
+         ! northeast (i,j)
+         str(i,j,1) = -strp_tmp - strm_tmp - str12ew &
+              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
+
+         ! northwest (i+1,j)
+         str(i,j,2) = strp_tmp + strm_tmp - str12we &
+              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
+
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
+
+         ! southeast (i,j+1)
+         str(i,j,3) = -strp_tmp - strm_tmp + str12ew &
+              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
+
+         ! southwest (i+1,j+1)
+         str(i,j,4) = strp_tmp + strm_tmp + str12we &
+              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
+
+      !-----------------------------------------------------------------
+      ! for dF/dy (v momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
+
+         ! northeast (i,j)
+         str(i,j,5) = -strp_tmp + strm_tmp - str12ns &
+              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
+
+         ! southeast (i,j+1)
+         str(i,j,6) = strp_tmp - strm_tmp - str12sn &
+              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
+
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
+
+         ! northwest (i+1,j)
+         str(i,j,7) = -strp_tmp + strm_tmp + str12ns &
+              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
+
+         ! southwest (i+1,j+1)
+         str(i,j,8) = strp_tmp - strm_tmp + str12sn &
+              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
+
+      enddo                     ! ij
+
+      end subroutine stress_vp      
+      
+      
+!=======================================================================
+
+! Computes the VP stress (as diagnostic)
+
+      subroutine Diagstress_vp (nx_block,   ny_block,   & 
                             kOL,        icellt,     & 
                             indxti,     indxtj,     & 
                             uvel,       vvel,       & 
@@ -1279,7 +1578,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
 
       enddo                     ! ij
 
-      end subroutine stress_vp
+      end subroutine Diagstress_vp
       
 !=======================================================================
 

From 74c2b82b3af1738d85a56666e16871adfb9c6281 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 11 Jul 2018 15:08:18 +0000
Subject: [PATCH 038/196] minor modifs to improve print of res norm in fgmres

---
 cicecore/cicedynB/dynamics/fgmresD.F90    | 15 ++++++++-------
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 |  6 +++---
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
index 81931bf7a..cb86d7e24 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -1,5 +1,5 @@
       subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
-                  eps,maxits,iout,icode,its) 
+                  eps,maxits,iout,icode,its,kOL) 
 
 !-----------------------------------------------------------------------
 ! jfl Dec 1st 2006. We modified the routine so that it is double precison.
@@ -20,7 +20,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
 !-----------------------------------------------------------------------
 
       implicit double precision (a-h,o-z) !jfl modification
-      integer n, im, maxits, iout, icode
+      integer n, im, maxits, iout, icode, kOL
       double precision rhs(*), sol(*), vv(n,im+1),w(n,im)
       double precision wk1(n), wk2(n), eps
 !-----------------------------------------------------------------------
@@ -85,6 +85,8 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
 !
 ! iout  == output unit number number for printing intermediate results
 !          if (iout .le. 0) no statistics are printed.
+!          if (iout .eq. 1) L2norm of 1st ite is printed.
+!          if (iout .gt. 1) L2norm of all ite are printed.
 ! 
 ! icode = integer. indicator for the reverse communication protocole.
 !         ON ENTRY : icode should be set to icode = 0.
@@ -146,8 +148,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
       enddo
       if (its .eq. 0) eps1=eps
       if (its .eq. 0) r0 = ro
-      if (iout .gt. 0) write(*, 199) its, ro!&
-!           print *,'chau',its, ro !write(iout, 199) its, ro
+      if (iout .gt. 0) write(*, 199) kOL, its, ro!&
 !     
 !     initialize 1-st term  of rhs of hessenberg system..
 !     
@@ -225,8 +226,8 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
 !     
       hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
       ro = abs(rs(i1))
-      if (iout .gt. 0) &
-           write(*, 199) its, ro
+      if (iout .gt. 1) &
+           write(*, 199) kOL, its, ro
       if (i .lt. im .and. (ro .gt. eps1))  goto 4
 !     
 !     now compute solution. first solve upper triangular system.
@@ -274,7 +275,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
      goto 20
  999  icode = 0
 
- 199  format('   -- fmgres its =', i4, ' res. norm =', d26.16)
+ 199  format('Picard its=', i4, ' fmgres its =', i4, ' res. norm =', d26.16)
 !     
       return 
 !-----end-of-fgmres----------------------------------------------------- 
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 31f8e5a38..0aee8c573 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -409,7 +409,7 @@ subroutine imp_solver (dt)
       endif
       
       do kOL = 1,kmax        ! outer loop 
-      print *, 'Picard iteration', kOL
+      
       !-----------------------------------------------------------------
       ! Calc zetaD, vrel, Cb and vrel = f(uprev_k, vprev_k)
       !-----------------------------------------------------------------
@@ -470,7 +470,7 @@ subroutine imp_solver (dt)
          
       icode  = 0
       conv   = 1.d0
-      iout   = 1
+      iout   = 1 !0: nothing printed, 1: 1st ite only, 2: all iterations
 !      its    = 0 
       ischmi = 0 
       sol_eps = 1d-05
@@ -498,7 +498,7 @@ subroutine imp_solver (dt)
       !                     sol_eps, maxits,its,conv,icode )
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-                   sol_eps, maxits,iout,icode,fgmres_its)                     
+                   sol_eps, maxits,iout,icode,fgmres_its,kOL)                     
 
       if (icode == 1) then
 

From ea595d8e83533ea78042e25b4f70e93a7ca3df59 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 11 Jul 2018 15:22:54 +0000
Subject: [PATCH 039/196] added linear conv criterion based on a gamma

---
 cicecore/cicedynB/dynamics/fgmresD.F90    | 7 ++++---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 8 ++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
index cb86d7e24..27b66fbad 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -1,5 +1,5 @@
       subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
-                  eps,maxits,iout,icode,its,kOL) 
+                  gamma,maxits,iout,icode,its,kOL) 
 
 !-----------------------------------------------------------------------
 ! jfl Dec 1st 2006. We modified the routine so that it is double precison.
@@ -22,7 +22,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
       implicit double precision (a-h,o-z) !jfl modification
       integer n, im, maxits, iout, icode, kOL
       double precision rhs(*), sol(*), vv(n,im+1),w(n,im)
-      double precision wk1(n), wk2(n), eps
+      double precision wk1(n), wk2(n), gamma
 !-----------------------------------------------------------------------
 ! flexible GMRES routine. This is a version of GMRES which allows a 
 ! a variable preconditioner. Implemented with a reverse communication 
@@ -146,8 +146,9 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
       do j=1, n
          vv(j,1) = vv(j,1)*t 
       enddo
-      if (its .eq. 0) eps1=eps
+!      if (its .eq. 0) eps1=eps
       if (its .eq. 0) r0 = ro
+      if (its .eq. 0) eps1=gamma*ro
       if (iout .gt. 0) write(*, 199) kOL, its, ro!&
 !     
 !     initialize 1-st term  of rhs of hessenberg system..
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 0aee8c573..a10c2f102 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -158,7 +158,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
-      real (kind=dbl_kind) :: conv, sol_eps, krelax
+      real (kind=dbl_kind) :: conv, gamma, krelax
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp,    & ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
@@ -191,7 +191,7 @@ subroutine imp_solver (dt)
       
       im_fgmres = 50 
       maxits = 50    
-      kmax=50
+      kmax=200
       krelax=c1
 
        ! This call is needed only if dt changes during runtime.
@@ -473,7 +473,7 @@ subroutine imp_solver (dt)
       iout   = 1 !0: nothing printed, 1: 1st ite only, 2: all iterations
 !      its    = 0 
       ischmi = 0 
-      sol_eps = 1d-05
+      gamma  = 0.1d0 ! linear stopping criterion: gamma*(res_ini)
          
          ! form b vector from matrices (nblocks matrices)      
          call arrays_to_vec (nx_block, ny_block, nblocks,    &
@@ -498,7 +498,7 @@ subroutine imp_solver (dt)
       !                     sol_eps, maxits,its,conv,icode )
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-                   sol_eps, maxits,iout,icode,fgmres_its,kOL)                     
+                   gamma, maxits,iout,icode,fgmres_its,kOL)                     
 
       if (icode == 1) then
 

From 4f33964be144aa6651731999a4fe4c0577e229e1 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 11 Jul 2018 17:20:59 +0000
Subject: [PATCH 040/196] added NL conv criterion

---
 cicecore/cicedynB/dynamics/fgmresD.F90    | 19 ++++++++++++++-----
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 14 +++++++++-----
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
index 27b66fbad..6dcf7d809 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -1,5 +1,5 @@
       subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
-                  gamma,maxits,iout,icode,its,kOL) 
+                  gamma,gammaNL,tolNL,maxits,iout,icode,iconv,its,kOL) 
 
 !-----------------------------------------------------------------------
 ! jfl Dec 1st 2006. We modified the routine so that it is double precison.
@@ -20,9 +20,9 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
 !-----------------------------------------------------------------------
 
       implicit double precision (a-h,o-z) !jfl modification
-      integer n, im, maxits, iout, icode, kOL
+      integer n, im, maxits, iout, icode, iconv, kOL
       double precision rhs(*), sol(*), vv(n,im+1),w(n,im)
-      double precision wk1(n), wk2(n), gamma
+      double precision wk1(n), wk2(n), gamma, gammaNL
 !-----------------------------------------------------------------------
 ! flexible GMRES routine. This is a version of GMRES which allows a 
 ! a variable preconditioner. Implemented with a reverse communication 
@@ -147,8 +147,17 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
          vv(j,1) = vv(j,1)*t 
       enddo
 !      if (its .eq. 0) eps1=eps
-      if (its .eq. 0) r0 = ro
-      if (its .eq. 0) eps1=gamma*ro
+      if (its .eq. 0) then 
+       r0 = ro
+       eps1=gamma*ro
+       if (kOL .eq. 1) tolNL=gammaNL*ro
+      endif 
+      
+      if (ro .lt. tolNL) then
+       iconv = 1
+       goto 999
+      endif 
+      
       if (iout .gt. 0) write(*, 199) kOL, its, ro!&
 !     
 !     initialize 1-st term  of rhs of hessenberg system..
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index a10c2f102..5a44a05a7 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -106,6 +106,7 @@ subroutine imp_solver (dt)
          kmax           , & ! jfl put in namelist
          ntot           , & ! size of problem for fgmres (for given cpu)
          icode          , & ! for fgmres
+         iconvNL        , & ! code for NL convergence criterion
          iout           , & ! for printing fgmres info
          its            , & ! iteration nb for fgmres
          ischmi         , & ! Quesse ca!?!?! jfl
@@ -158,7 +159,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
-      real (kind=dbl_kind) :: conv, gamma, krelax
+      real (kind=dbl_kind) :: conv, gamma, gammaNL, tolNL, krelax
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp,    & ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
@@ -191,7 +192,9 @@ subroutine imp_solver (dt)
       
       im_fgmres = 50 
       maxits = 50    
-      kmax=200
+      kmax=1000
+      gammaNL=1e-2_dbl_kind 
+      iconv=0 ! equals 1 when NL convergence is reached
       krelax=c1
 
        ! This call is needed only if dt changes during runtime.
@@ -469,11 +472,10 @@ subroutine imp_solver (dt)
 !-----------------------------------------------------------------------                             
          
       icode  = 0
-      conv   = 1.d0
       iout   = 1 !0: nothing printed, 1: 1st ite only, 2: all iterations
 !      its    = 0 
       ischmi = 0 
-      gamma  = 0.1d0 ! linear stopping criterion: gamma*(res_ini)
+      gamma  = 0.25_dbl_kind ! linear stopping criterion: gamma*(res_ini)
          
          ! form b vector from matrices (nblocks matrices)      
          call arrays_to_vec (nx_block, ny_block, nblocks,    &
@@ -498,8 +500,10 @@ subroutine imp_solver (dt)
       !                     sol_eps, maxits,its,conv,icode )
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-                   gamma, maxits,iout,icode,fgmres_its,kOL)                     
+                   gamma, gammaNL, tolNL, maxits,iout,icode,iconvNL,fgmres_its,kOL)                     
 
+      if (iconv .eq. 1) exit             
+                   
       if (icode == 1) then
 
 !         if (sol2D_precond_S == 'JACOBI')   then

From 4ad3875d7e96cbed08fb8e70b5c11f347ed1b3f9 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 12 Jul 2018 13:10:37 +0000
Subject: [PATCH 041/196] in the process of adding precond diag

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 295 +++++++++++++++++++++-
 1 file changed, 293 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 5a44a05a7..fda4688ba 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1995,7 +1995,7 @@ end subroutine residual_vec
       
 !=======================================================================
 
-      subroutine precondD  (nx_block,   ny_block,   & 
+      subroutine OLDprecondD  (nx_block,   ny_block,   & 
                             kOL,        icellt,     & 
                             indxti,     indxtj,     & 
                             dxt,        dyt,        & 
@@ -2327,7 +2327,298 @@ subroutine precondD  (nx_block,   ny_block,   &
               
       enddo                     ! ij
 
-      end subroutine precondD
+      end subroutine OLDprecondD
+      
+!=======================================================================
+
+! Calc diagonal term related to rheology for precond 
+
+      subroutine precondD_stress (nx_block,   ny_block,   & 
+                                  icellt,     & 
+                                  indxti,     indxtj,     & 
+                                  uvel,       vvel,       & 
+                                  dxt,        dyt,        & 
+                                  dxhy,       dyhx,       & 
+                                  cxp,        cyp,        & 
+                                  cxm,        cym,        & 
+                                  zetaD,      Dstr )
+
+      integer (kind=int_kind), intent(in) :: & 
+         nx_block, ny_block, & ! block dimensions
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         strength , & ! ice strength (N/m)
+         uvel     , & ! x-component of velocity (m/s)
+         vvel     , & ! y-component of velocity (m/s)
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm          ! 0.5*HTN - 1.5*HTN
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+         intent(out) :: &
+         Dstr          ! stress combinations
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(in) :: &
+         zetaD          ! 2*zeta      
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+        divuneDu, divunwDu, divuseDu, divuswDu            , & ! divergence
+        divuneDv, divunwDv, divuseDv, divuswDv            , & ! divergence
+        tensionneDu, tensionnwDu, tensionseDu, tensionswDu, & ! tension
+        tensionneDv, tensionnwDv, tensionseDv, tensionswDv, & ! tension
+        shearneDu, shearnwDu, shearseDu, shearswDu        , & ! shearing
+        shearneDv, shearnwDv, shearseDv, shearswDv        , & ! shearing
+        stressp_1u, stressp_2u, stressp_3u, stressp_4u    , & 
+        stressp_1v, stressp_2v, stressp_3v, stressp_4v    , & 
+        stressm_1u, stressm_2u, stressm_3u, stressm_4u    , & 
+        stressm_1v, stressm_2v, stressm_3v, stressm_4v    , & 
+        stress12_1u, stress12_2u, stress12_3u, stress12_4u, & 
+        stress12_1v, stress12_2v, stress12_3v, stress12_4v, & 
+        ssigpnu, ssigpsu, ssigpeu, ssigpwu                , &
+        ssigpnv, ssigpsv, ssigpev, ssigpwv                , &
+        ssigmnu, ssigmsu, ssigmeu, ssigmwu                , &
+        ssigmnv, ssigmsv, ssigmev, ssigmwv                , &
+        ssig12nu, ssig12su, ssig12eu, ssig12wu            , &
+        ssig12nv, ssig12sv, ssig12ev, ssig12wv            , &
+        ssigp1u, ssigp2u, ssigm1u, ssigm2u, ssig121u, ssig122u, &
+        ssigp1v, ssigp2v, ssigm1v, ssigm2v, ssig121v, ssig122v, &
+        csigpneu, csigpnwu, csigpseu, csigpswu            , &
+        csigpnev, csigpnwv, csigpsev, csigpswv            , &
+        csigmneu, csigmnwu, csigmseu, csigmswu            , &
+        csigmnev, csigmnwv, csigmsev, csigmswv            , &
+        csig12neu, csig12nwu, csig12seu, csig12swu        , &
+        csig12nev, csig12nwv, csig12sev, csig12swv        , &
+        str12ewu, str12weu, str12nsu, str12snu            , &
+        str12ewv, str12wev, str12nsv, str12snv            , &
+        strp_tmpu, strm_tmpu, strp_tmpv, strm_tmpv        , &
+        str1, str2, str3, str4, str5, str6, str7, str8  
+        
+      !-----------------------------------------------------------------
+      ! Initialize
+      !-----------------------------------------------------------------
+
+      Dstr(:,:,:) = c0
+
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+
+      !-----------------------------------------------------------------
+      ! JFL watchout if currently on LHS or RHS
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divuneDu  = cyp(i,j)
+         divuneDv  = cxp(i,j)
+         divunwDu  = dyt(i,j)
+         divuseDv  = dxt(i,j)
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionneDu = -cym(i,j)
+         tensionneDv = cxm(i,j)
+         tensionnwDu = dyt(i,j)
+         tensionseDv = - dxt(i,j)
+
+         ! shearing strain rate  =  e_12
+         shearneDu = -cxm(i,j)
+         shearneDv = -cym(i,j)
+         shearnwDv = dyt(i,j)
+         shearseDu = dxt(i,j)
+         
+      !-----------------------------------------------------------------
+      ! the stresses                            ! kg/s^2
+      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+      !-----------------------------------------------------------------
+
+! IMProve: delete stress coeff not needed instead of setting them to 0.
+!          no need for divuneDu...just plug them directly in eqs below. 
+      
+         stressp_1u = zetaD(i,j,1)*divuneDu*(c1+Ktens)
+         stressp_1v = zetaD(i,j,1)*divuneDv*(c1+Ktens)
+         stressp_2u = zetaD(i,j,2)*divunwDu*(c1+Ktens)
+         stressp_2v = c0
+         stressp_3u = c0
+         stressp_3v = c0
+         stressp_4u = c0
+         stressp_4v = zetaD(i,j,4)*divuseDv*(c1+Ktens)
+         
+         stressm_1u = zetaD(i,j,1)*tensionneDu*(c1+Ktens)*ecci
+         stressm_1v = zetaD(i,j,1)*tensionneDv*(c1+Ktens)*ecci
+         stressm_2u = zetaD(i,j,2)*tensionnwDu*(c1+Ktens)*ecci
+         stressm_2v = c0
+         stressm_3u = c0
+         stressm_3v = c0
+         stressm_4u = c0
+         stressm_4v = zetaD(i,j,4)*tensionseDv*(c1+Ktens)*ecci
+         
+         stress12_1u = zetaD(i,j,1)*shearneDu*p5*(c1+Ktens)*ecci
+         stress12_1v = zetaD(i,j,1)*shearneDv*p5*(c1+Ktens)*ecci
+         stress12_2u = c0
+         stress12_2v = zetaD(i,j,2)*shearnwDv*p5*(c1+Ktens)*ecci
+         stress12_3u = c0
+         stress12_3v = c0
+         stress12_4u = zetaD(i,j,4)*shearseDu*p5*(c1+Ktens)*ecci
+         stress12_4v = c0
+
+      !-----------------------------------------------------------------
+      ! combinations of the stresses for the momentum equation ! kg/s^2
+      !-----------------------------------------------------------------
+
+         ssigpnu  = stressp_1u + stressp_2u
+         ssigpnv  = stressp_1v + stressp_2v
+         ssigpsu  = stressp_3u + stressp_4u
+         ssigpsv  = stressp_3v + stressp_4v
+         ssigpeu  = stressp_1u + stressp_4v
+         ssigpev  = stressp_1v + stressp_4v
+         ssigpwu  = stressp_2u + stressp_3u
+         ssigpwv  = stressp_2v + stressp_3v
+         ssigp1u  =(stressp_1u + stressp_3u)*p055
+         ssigp1v  =(stressp_1v + stressp_3v)*p055
+         ssigp2u  =(stressp_2u + stressp_4u)*p055
+         ssigp2v  =(stressp_2v + stressp_4v)*p055
+
+         ssigmnu  = stressm_1u + stressm_2u
+         ssigmnv  = stressm_1v + stressm_2v
+         ssigmsu  = stressm_3u + stressm_4u
+         ssigmsv  = stressm_3v + stressm_4v
+         ssigmeu  = stressm_1u + stressm_4u
+         ssigmev  = stressm_1v + stressm_4v
+         ssigmwu  = stressm_2u + stressm_3u
+         ssigmwv  = stressm_2v + stressm_3v
+         ssigm1u  =(stressm_1u + stressm_3u)*p055
+         ssigm1v  =(stressm_1v + stressm_3v)*p055
+         ssigm2u  =(stressm_2u + stressm_4u)*p055
+         ssigm2v  =(stressm_2v + stressm_4v)*p055
+
+         ssig12nu = stress12_1u + stress12_2u
+         ssig12nv = stress12_1v + stress12_2v
+         ssig12su = stress12_3u + stress12_4u
+         ssig12sv = stress12_3v + stress12_4v
+         ssig12eu = stress12_1u + stress12_4u
+         ssig12ev = stress12_1v + stress12_4v
+         ssig12wu = stress12_2u + stress12_3u
+         ssig12wv = stress12_2v + stress12_3v
+         ssig121u =(stress12_1u + stress12_3u)*p111
+         ssig121v =(stress12_1v + stress12_3v)*p111
+         ssig122u =(stress12_2u + stress12_4u)*p111
+         ssig122v =(stress12_2v + stress12_4v)*p111
+
+         csigpneu = p111*stressp_1u + ssigp2u + p027*stressp_3u
+         csigpnev = p111*stressp_1v + ssigp2v + p027*stressp_3v
+         csigpnwu = p111*stressp_2u + ssigp1u + p027*stressp_4u
+         csigpnwv = p111*stressp_2v + ssigp1v + p027*stressp_4v
+         csigpswu = p111*stressp_3u + ssigp2u + p027*stressp_1u
+         csigpswv = p111*stressp_3v + ssigp2v + p027*stressp_1v
+         csigpseu = p111*stressp_4u + ssigp1u + p027*stressp_2u
+         csigpsev = p111*stressp_4v + ssigp1v + p027*stressp_2v
+         
+         csigmneu = p111*stressm_1u + ssigm2u + p027*stressm_3u
+         csigmnev = p111*stressm_1v + ssigm2v + p027*stressm_3v
+         csigmnwu = p111*stressm_2u + ssigm1u + p027*stressm_4u
+         csigmnwv = p111*stressm_2v + ssigm1v + p027*stressm_4v
+         csigmswu = p111*stressm_3u + ssigm2u + p027*stressm_1u
+         csigmswv = p111*stressm_3v + ssigm2v + p027*stressm_1v
+         csigmseu = p111*stressm_4u + ssigm1u + p027*stressm_2u
+         csigmsev = p111*stressm_4v + ssigm1v + p027*stressm_2v
+         
+         csig12neu = p222*stress12_1u + ssig122u &
+                  + p055*stress12_3u
+         csig12nev = p222*stress12_1v + ssig122v &
+                  + p055*stress12_3v             
+         csig12nwu = p222*stress12_2u + ssig121u &
+                  + p055*stress12_4u
+         csig12nwv = p222*stress12_2v + ssig121v &
+                  + p055*stress12_4v
+         csig12swu = p222*stress12_3u + ssig122u &
+                  + p055*stress12_1u
+         csig12swv = p222*stress12_3v + ssig122v &
+                  + p055*stress12_1v
+         csig12seu = p222*stress12_4u + ssig121u &
+                  + p055*stress12_2u
+         csig12sev = p222*stress12_4v + ssig121v &
+                  + p055*stress12_2v         
+
+         str12ewu = p5*dxt(i,j)*(p333*ssig12eu + p166*ssig12wu)
+         str12ewv = p5*dxt(i,j)*(p333*ssig12ev + p166*ssig12wv)
+         str12weu = p5*dxt(i,j)*(p333*ssig12wu + p166*ssig12eu)
+         str12wev = p5*dxt(i,j)*(p333*ssig12wv + p166*ssig12ev)
+         str12nsu = p5*dyt(i,j)*(p333*ssig12nu + p166*ssig12su)
+         str12nsv = p5*dyt(i,j)*(p333*ssig12nv + p166*ssig12sv)
+         str12snu = p5*dyt(i,j)*(p333*ssig12su + p166*ssig12nu)
+         str12snv = p5*dyt(i,j)*(p333*ssig12sv + p166*ssig12nv)
+
+      !-----------------------------------------------------------------
+      ! for dF/dx (u momentum)
+      !-----------------------------------------------------------------
+         strp_tmpu  = p25*dyt(i,j)*(p333*ssigpnu  + p166*ssigpsu)
+         strm_tmpu  = p25*dyt(i,j)*(p333*ssigmnu  + p166*ssigmsu)
+
+         ! northeast (i,j)
+         Dstr(i,j,1) = -strp_tmpu - strm_tmpu - str12ewu &
+              + dxhy(i,j)*(-csigpneu + csigmneu) + dyhx(i,j)*csig12neu
+
+         ! northwest (i+1,j)
+         Dstr(i,j,2) = strp_tmpu + strm_tmpu - str12weu &
+              + dxhy(i,j)*(-csigpnwu + csigmnwu) + dyhx(i,j)*csig12nwu
+
+         strp_tmpu  = p25*dyt(i,j)*(p333*ssigpsu  + p166*ssigpnu)
+         strm_tmpu  = p25*dyt(i,j)*(p333*ssigmsu  + p166*ssigmnu)
+
+         ! southeast (i,j+1)
+         Dstr(i,j,3) = -strp_tmpu - strm_tmpu + str12ewu &
+              + dxhy(i,j)*(-csigpseu + csigmseu) + dyhx(i,j)*csig12seu
+
+         ! southwest (i+1,j+1)
+         Dstr(i,j,4) = strp_tmpu + strm_tmpu + str12weu &
+              + dxhy(i,j)*(-csigpswu + csigmswu) + dyhx(i,j)*csig12swu
+
+      !-----------------------------------------------------------------
+      ! for dF/dy (v momentum)
+      !-----------------------------------------------------------------
+         strp_tmpv  = p25*dxt(i,j)*(p333*ssigpev  + p166*ssigpwv)
+         strm_tmpv  = p25*dxt(i,j)*(p333*ssigmev  + p166*ssigmwv)
+
+         ! northeast (i,j)
+         Dstr(i,j,5) = -strp_tmpv + strm_tmpv - str12nsv &
+              - dyhx(i,j)*(csigpnev + csigmnev) + dxhy(i,j)*csig12nev
+
+         ! southeast (i,j+1)
+         Dstr(i,j,6) = strp_tmpv - strm_tmpv - str12snv &
+              - dyhx(i,j)*(csigpsev + csigmsev) + dxhy(i,j)*csig12sev
+
+         strp_tmpv  = p25*dxt(i,j)*(p333*ssigpwv  + p166*ssigpev)
+         strm_tmpv  = p25*dxt(i,j)*(p333*ssigmwv  + p166*ssigmev)
+
+         ! northwest (i+1,j)
+         Dstr(i,j,7) = -strp_tmpv + strm_tmpv + str12nsv &
+              - dyhx(i,j)*(csigpnwv + csigmnwv) + dxhy(i,j)*csig12nwv
+
+         ! southwest (i+1,j+1)
+         Dstr(i,j,8) = strp_tmpv - strm_tmpv + str12snv &
+              - dyhx(i,j)*(csigpswv + csigmswv) + dxhy(i,j)*csig12swv
+
+      enddo                     ! ij
+
+      end subroutine precondD_stress
       
       !=======================================================================
 

From 422c7243caa45a1c48d72f8872efb7eeb6926219 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 12 Jul 2018 13:46:29 +0000
Subject: [PATCH 042/196] now forms the vector diagvec

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 467 ++++++----------------
 1 file changed, 116 insertions(+), 351 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index fda4688ba..03839551a 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -113,6 +113,7 @@ subroutine imp_solver (dt)
          maxits         , & ! max nb of iteration for fgmres
          fgmres_its     , & ! final nb of fgmres_its
          im_fgmres      , & ! for size of Krylov subspace
+         precond        , & ! 1: identity, 2: diagonal
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
@@ -139,10 +140,10 @@ subroutine imp_solver (dt)
          by       , & ! b vector, by = tauy + byfix !jfl
          Au       , & ! matvec, Fx = Au - bx ! jfl
          Av       , & ! matvec, Fy = Av - by ! jfl
+         Diagu    , & ! Diagonal (u component) of the matrix A
+         Diagv    , & ! Diagonal (v component) of the matrix A
          Fx       , & ! x residual vector, Fx = Au - bx ! jfl
          Fy       , & ! y residual vector, Fy = Av - by ! jfl
-         Diagu    , & ! diagonal matrix coeff for u component
-         Diagv    , & ! diagonal matrix coeff for v component
          uprev_k  , & ! uvel at previous Picard iteration
          vprev_k  , & ! vvel at previous Picard iteration
          ulin     , & ! uvel to linearize vrel
@@ -155,7 +156,7 @@ subroutine imp_solver (dt)
          
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
-      real (kind=dbl_kind), allocatable :: bvec(:), sol(:), wk11(:), wk22(:)
+      real (kind=dbl_kind), allocatable :: bvec(:), sol(:), diagvec(:), wk11(:), wk22(:)
       real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
@@ -163,7 +164,8 @@ subroutine imp_solver (dt)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp,    & ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
-         stPrtmp      ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
+         stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 3? reuse?
+         Dstrtmp
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
          zetaD      ! zetaD = 2zeta (viscous coeff)
@@ -194,8 +196,9 @@ subroutine imp_solver (dt)
       maxits = 50    
       kmax=1000
       gammaNL=1e-2_dbl_kind 
-      iconv=0 ! equals 1 when NL convergence is reached
+      iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
+      precond=2 ! 1: identity, 2: diagonal
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -451,7 +454,8 @@ subroutine imp_solver (dt)
                                uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                                ulin     (:,:,iblk), vlin    (:,:,iblk), & 
                                vrel     (:,:,iblk), Cb      (:,:,iblk))
-                               
+
+!     prepare b vector (RHS)                                                
             call calc_bvec (nx_block           , ny_block,           &
                             icellu       (iblk),                     & 
                             indxui     (:,iblk), indxuj    (:,iblk), &
@@ -463,7 +467,25 @@ subroutine imp_solver (dt)
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk), &
                             stPrtmp  (:,:,:))
-                            
+
+!     prepare precond matrix                                                        
+           call formDiag_step1 (nx_block           , ny_block,       &
+                                icellt       (iblk),                 & 
+                                indxti     (:,iblk), indxtj(:,iblk), &
+                                dxt      (:,:,iblk), dyt (:,:,iblk), & 
+                                dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
+                                cxp      (:,:,iblk), cyp (:,:,iblk), & 
+                                cxm      (:,:,iblk), cym (:,:,iblk), & 
+                                zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
+                                
+           call formDiag_step2 (nx_block           , ny_block,           &
+                                icellu       (iblk),                     & 
+                                indxui     (:,iblk), indxuj    (:,iblk), &
+                                Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
+                                umassdti (:,:,iblk),                     & 
+                                uarear   (:,:,iblk), Cb      (:,:,iblk), & 
+                                Diagu    (:,:,iblk), Diagv   (:,:,iblk))         
+                                
          enddo
          !$OMP END PARALLEL DO                            
 
@@ -488,7 +510,14 @@ subroutine imp_solver (dt)
                              max_blocks, icellu (:), ntot,   &  
                              indxui    (:,:), indxuj(:,:),     &
                              uprev_k (:,:,:), vprev_k (:,:,:), &
-                             sol(:))    
+                             sol(:))
+                             
+         ! form matrix diagonal as a vector from Diagu and Diagv arrays      
+         call arrays_to_vec (nx_block, ny_block, nblocks,    &
+                             max_blocks, icellu (:), ntot,   & 
+                             indxui      (:,:), indxuj(:,:), &
+                             Diagu     (:,:,:), Diagv(:,:,:),&
+                             diagvec(:))                             
 
 !-----------------------------------------------------------------------
 !     F G M R E S   L O O P
@@ -502,7 +531,7 @@ subroutine imp_solver (dt)
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
                    gamma, gammaNL, tolNL, maxits,iout,icode,iconvNL,fgmres_its,kOL)                     
 
-      if (iconv .eq. 1) exit             
+      if (iconvNL .eq. 1) exit             
                    
       if (icode == 1) then
 
@@ -1714,6 +1743,8 @@ subroutine matvec (nx_block,   ny_block, &
 !         tauby   , & ! basal stress, y-direction (N/m^2)
          Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
          Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
+
+! JFL strintx and y do not need to be inout...         
          
       ! local variables
 
@@ -1993,350 +2024,13 @@ subroutine residual_vec (nx_block,   ny_block, &
 
       end subroutine residual_vec
       
-!=======================================================================
-
-      subroutine OLDprecondD  (nx_block,   ny_block,   & 
-                            kOL,        icellt,     & 
-                            indxti,     indxtj,     & 
-                            dxt,        dyt,        & 
-                            dxhy,       dyhx,       & 
-                            cxp,        cyp,        & 
-                            cxm,        cym,        & 
-                            uarear,                 &
-                            vrel,       Cb,         &
-                            umassdti,   zetaD,      &
-                            Diagu, Diagv)
-
-      integer (kind=int_kind), intent(in) :: & 
-         nx_block, ny_block, & ! block dimensions
-         kOL               , & ! subcycling step
-         icellt                ! no. of cells where icetmask = 1
-
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
-         intent(in) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj       ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         dxt      , & ! width of T-cell through the middle (m)
-         dyt      , & ! height of T-cell through the middle (m)
-         dxhy     , & ! 0.5*(HTE - HTE)
-         dyhx     , & ! 0.5*(HTN - HTN)
-         cyp      , & ! 1.5*HTE - 0.5*HTE
-         cxp      , & ! 1.5*HTN - 0.5*HTN
-         cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm      , & ! 0.5*HTN - 1.5*HTN
-         uarear      ! 1/uarea
-         
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &         
-         vrel,     & ! coefficient for tauw
-         Cb,       & ! coefficient for basal stress
-         umassdti    ! mass of U-cell/dt (kg/m^2 s)
-         
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
-         intent(in) :: &
-         zetaD          ! 2*zeta   
-         
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(inout) :: &         
-         Diagu,     & ! diagonal matrix coefficients for u component
-         Diagv        ! diagonal matrix coefficients for v component
-         
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         i, j, ij
-
-      real (kind=dbl_kind) :: &
-        divuneDu, divunwDu, divuseDu, divuswDu            , & ! divergence
-        divuneDv, divunwDv, divuseDv, divuswDv            , & ! divergence
-        tensionneDu, tensionnwDu, tensionseDu, tensionswDu, & ! tension
-        tensionneDv, tensionnwDv, tensionseDv, tensionswDv, & ! tension
-        shearneDu, shearnwDu, shearseDu, shearswDu        , & ! shearing
-        shearneDv, shearnwDv, shearseDv, shearswDv        , & ! shearing
-        stressp_1u, stressp_2u, stressp_3u, stressp_4u    , & 
-        stressp_1v, stressp_2v, stressp_3v, stressp_4v    , & 
-        stressm_1u, stressm_2u, stressm_3u, stressm_4u    , & 
-        stressm_1v, stressm_2v, stressm_3v, stressm_4v    , & 
-        stress12_1u, stress12_2u, stress12_3u, stress12_4u, & 
-        stress12_1v, stress12_2v, stress12_3v, stress12_4v, & 
-        ssigpnu, ssigpsu, ssigpeu, ssigpwu                , &
-        ssigpnv, ssigpsv, ssigpev, ssigpwv                , &
-        ssigmnu, ssigmsu, ssigmeu, ssigmwu                , &
-        ssigmnv, ssigmsv, ssigmev, ssigmwv                , &
-        ssig12nu, ssig12su, ssig12eu, ssig12wu            , &
-        ssig12nv, ssig12sv, ssig12ev, ssig12wv            , &
-        ssigp1u, ssigp2u, ssigm1u, ssigm2u, ssig121u, ssig122u, &
-        ssigp1v, ssigp2v, ssigm1v, ssigm2v, ssig121v, ssig122v, &
-        csigpneu, csigpnwu, csigpseu, csigpswu            , &
-        csigpnev, csigpnwv, csigpsev, csigpswv            , &
-        csigmneu, csigmnwu, csigmseu, csigmswu            , &
-        csigmnev, csigmnwv, csigmsev, csigmswv            , &
-        csig12neu, csig12nwu, csig12seu, csig12swu        , &
-        csig12nev, csig12nwv, csig12sev, csig12swv        , &
-        str12ewu, str12weu, str12nsu, str12snu            , &
-        str12ewv, str12wev, str12nsv, str12snv            , &
-        strp_tmpu, strm_tmpu, strp_tmpv, strm_tmpv        , &
-        str1, str2, str3, str4, str5, str6, str7, str8  
-
-      !-----------------------------------------------------------------
-      ! Initialize
-      !-----------------------------------------------------------------
-
-      Diagu(:,:) = c0
-      Diagv(:,:) = c0
-
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
-
-
-! ATTENTION ICI CEST icellt et non pas icellu....MODIF A FAIRE!!!
-
-      do ij = 1, icellt
-         i = indxti(ij)
-         j = indxtj(ij)
-
-      !-----------------------------------------------------------------
-      ! JFL watchout currently on LHS
-      !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divuneDu  = cyp(i,j) ! D=diagonal
-         divuneDv  = cxp(i,j)
-         divunwDu  = dyt(i,j)
-         divuseDv  = dxt(i,j)
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionneDu = -cym(i,j)
-         tensionneDv = cxm(i,j)
-         tensionnwDu = dyt(i,j)
-         tensionseDv = -dxt(i,j)
-
-         ! shearing strain rate  =  e_12
-         shearneDu = -cxm(i,j)
-         shearneDv = -cym(i,j)      
-         shearnwDv = dyt(i,j)
-         shearseDu = dxt(i,j)
-
-      !-----------------------------------------------------------------
-      ! the stresses                            ! kg/s^2
-      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
-      !-----------------------------------------------------------------
-
-! IMProve: delete stress coeff not needed instead of setting them to 0.
-!          no need for divuneDu...just plug them directly in eqs below. 
-      
-         stressp_1u = zetaD(i,j,1)*divuneDu*(c1+Ktens)
-         stressp_1v = zetaD(i,j,1)*divuneDv*(c1+Ktens)
-         stressp_2u = zetaD(i,j,2)*divunwDu*(c1+Ktens)
-         stressp_2v = c0
-         stressp_3u = c0
-         stressp_3v = c0
-         stressp_4u = c0
-         stressp_4v = zetaD(i,j,4)*divuseDv*(c1+Ktens)
-         
-         stressm_1u = zetaD(i,j,1)*tensionneDu*(c1+Ktens)*ecci
-         stressm_1v = zetaD(i,j,1)*tensionneDv*(c1+Ktens)*ecci
-         stressm_2u = zetaD(i,j,2)*tensionnwDu*(c1+Ktens)*ecci
-         stressm_2v = c0
-         stressm_3u = c0
-         stressm_3v = c0
-         stressm_4u = c0
-         stressm_4v = zetaD(i,j,4)*tensionseDv*(c1+Ktens)*ecci
-         
-         stress12_1u = zetaD(i,j,1)*shearneDu*p5*(c1+Ktens)*ecci
-         stress12_1v = zetaD(i,j,1)*shearneDv*p5*(c1+Ktens)*ecci
-         stress12_2u = c0
-         stress12_2v = zetaD(i,j,2)*shearnwDv*p5*(c1+Ktens)*ecci
-         stress12_3u = c0
-         stress12_3v = c0
-         stress12_4u = zetaD(i,j,4)*shearseDu*p5*(c1+Ktens)*ecci
-         stress12_4v = c0
-
-      !-----------------------------------------------------------------
-      ! Eliminate underflows.
-      ! The following code is commented out because it is relatively 
-      ! expensive and most compilers include a flag that accomplishes
-      ! the same thing more efficiently.  This code is cheaper than
-      ! handling underflows if the compiler lacks a flag; uncomment
-      ! it in that case.  The compiler flag is often described with the 
-      ! phrase "flush to zero".
-      !-----------------------------------------------------------------
-
-!      call icepack_query_parameters(puny_out=puny)
-!      call icepack_warnings_flush(nu_diag)
-!      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
-!         file=__FILE__, line=__LINE__)
-
-!      stressp_1(i,j) = sign(max(abs(stressp_1(i,j)),puny),stressp_1(i,j))
-!      stressp_2(i,j) = sign(max(abs(stressp_2(i,j)),puny),stressp_2(i,j))
-!      stressp_3(i,j) = sign(max(abs(stressp_3(i,j)),puny),stressp_3(i,j))
-!      stressp_4(i,j) = sign(max(abs(stressp_4(i,j)),puny),stressp_4(i,j))
-
-!      stressm_1(i,j) = sign(max(abs(stressm_1(i,j)),puny),stressm_1(i,j))
-!      stressm_2(i,j) = sign(max(abs(stressm_2(i,j)),puny),stressm_2(i,j))
-!      stressm_3(i,j) = sign(max(abs(stressm_3(i,j)),puny),stressm_3(i,j))
-!      stressm_4(i,j) = sign(max(abs(stressm_4(i,j)),puny),stressm_4(i,j))
-
-!      stress12_1(i,j) = sign(max(abs(stress12_1(i,j)),puny),stress12_1(i,j))
-!      stress12_2(i,j) = sign(max(abs(stress12_2(i,j)),puny),stress12_2(i,j))
-!      stress12_3(i,j) = sign(max(abs(stress12_3(i,j)),puny),stress12_3(i,j))
-!      stress12_4(i,j) = sign(max(abs(stress12_4(i,j)),puny),stress12_4(i,j))
-
-      !-----------------------------------------------------------------
-      ! combinations of the stresses for the momentum equation ! kg/s^2
-      !-----------------------------------------------------------------
-
-         ssigpnu  = stressp_1u + stressp_2u
-         ssigpnv  = stressp_1v + stressp_2v
-         ssigpsu  = stressp_3u + stressp_4u
-         ssigpsv  = stressp_3v + stressp_4v
-         ssigpeu  = stressp_1u + stressp_4v
-         ssigpev  = stressp_1v + stressp_4v
-         ssigpwu  = stressp_2u + stressp_3u
-         ssigpwv  = stressp_2v + stressp_3v
-         ssigp1u  =(stressp_1u + stressp_3u)*p055
-         ssigp1v  =(stressp_1v + stressp_3v)*p055
-         ssigp2u  =(stressp_2u + stressp_4u)*p055
-         ssigp2v  =(stressp_2v + stressp_4v)*p055
-
-         ssigmnu  = stressm_1u + stressm_2u
-         ssigmnv  = stressm_1v + stressm_2v
-         ssigmsu  = stressm_3u + stressm_4u
-         ssigmsv  = stressm_3v + stressm_4v
-         ssigmeu  = stressm_1u + stressm_4u
-         ssigmev  = stressm_1v + stressm_4v
-         ssigmwu  = stressm_2u + stressm_3u
-         ssigmwv  = stressm_2v + stressm_3v
-         ssigm1u  =(stressm_1u + stressm_3u)*p055
-         ssigm1v  =(stressm_1v + stressm_3v)*p055
-         ssigm2u  =(stressm_2u + stressm_4u)*p055
-         ssigm2v  =(stressm_2v + stressm_4v)*p055
-
-         ssig12nu = stress12_1u + stress12_2u
-         ssig12nv = stress12_1v + stress12_2v
-         ssig12su = stress12_3u + stress12_4u
-         ssig12sv = stress12_3v + stress12_4v
-         ssig12eu = stress12_1u + stress12_4u
-         ssig12ev = stress12_1v + stress12_4v
-         ssig12wu = stress12_2u + stress12_3u
-         ssig12wv = stress12_2v + stress12_3v
-         ssig121u =(stress12_1u + stress12_3u)*p111
-         ssig121v =(stress12_1v + stress12_3v)*p111
-         ssig122u =(stress12_2u + stress12_4u)*p111
-         ssig122v =(stress12_2v + stress12_4v)*p111
-
-         csigpneu = p111*stressp_1u + ssigp2u + p027*stressp_3u
-         csigpnev = p111*stressp_1v + ssigp2v + p027*stressp_3v
-         csigpnwu = p111*stressp_2u + ssigp1u + p027*stressp_4u
-         csigpnwv = p111*stressp_2v + ssigp1v + p027*stressp_4v
-         csigpswu = p111*stressp_3u + ssigp2u + p027*stressp_1u
-         csigpswv = p111*stressp_3v + ssigp2v + p027*stressp_1v
-         csigpseu = p111*stressp_4u + ssigp1u + p027*stressp_2u
-         csigpsev = p111*stressp_4v + ssigp1v + p027*stressp_2v
-         
-         csigmneu = p111*stressm_1u + ssigm2u + p027*stressm_3u
-         csigmnev = p111*stressm_1v + ssigm2v + p027*stressm_3v
-         csigmnwu = p111*stressm_2u + ssigm1u + p027*stressm_4u
-         csigmnwv = p111*stressm_2v + ssigm1v + p027*stressm_4v
-         csigmswu = p111*stressm_3u + ssigm2u + p027*stressm_1u
-         csigmswv = p111*stressm_3v + ssigm2v + p027*stressm_1v
-         csigmseu = p111*stressm_4u + ssigm1u + p027*stressm_2u
-         csigmsev = p111*stressm_4v + ssigm1v + p027*stressm_2v
-         
-         csig12neu = p222*stress12_1u + ssig122u &
-                  + p055*stress12_3u
-         csig12nev = p222*stress12_1v + ssig122v &
-                  + p055*stress12_3v             
-         csig12nwu = p222*stress12_2u + ssig121u &
-                  + p055*stress12_4u
-         csig12nwv = p222*stress12_2v + ssig121v &
-                  + p055*stress12_4v
-         csig12swu = p222*stress12_3u + ssig122u &
-                  + p055*stress12_1u
-         csig12swv = p222*stress12_3v + ssig122v &
-                  + p055*stress12_1v
-         csig12seu = p222*stress12_4u + ssig121u &
-                  + p055*stress12_2u
-         csig12sev = p222*stress12_4v + ssig121v &
-                  + p055*stress12_2v         
-
-         str12ewu = p5*dxt(i,j)*(p333*ssig12eu + p166*ssig12wu)
-         str12ewv = p5*dxt(i,j)*(p333*ssig12ev + p166*ssig12wv)
-         str12weu = p5*dxt(i,j)*(p333*ssig12wu + p166*ssig12eu)
-         str12wev = p5*dxt(i,j)*(p333*ssig12wv + p166*ssig12ev)
-         str12nsu = p5*dyt(i,j)*(p333*ssig12nu + p166*ssig12su)
-         str12nsv = p5*dyt(i,j)*(p333*ssig12nv + p166*ssig12sv)
-         str12snu = p5*dyt(i,j)*(p333*ssig12su + p166*ssig12nu)
-         str12snv = p5*dyt(i,j)*(p333*ssig12sv + p166*ssig12nv)
-
-      !-----------------------------------------------------------------
-      ! for dF/dx (u momentum)
-      !-----------------------------------------------------------------
-         strp_tmpu  = p25*dyt(i,j)*(p333*ssigpnu  + p166*ssigpsu)
-         strm_tmpu  = p25*dyt(i,j)*(p333*ssigmnu  + p166*ssigmsu)
-
-         ! northeast (i,j)
-         str1 = -strp_tmpu - strm_tmpu - str12ewu &
-              + dxhy(i,j)*(-csigpneu + csigmneu) + dyhx(i,j)*csig12neu
-
-         ! northwest (i+1,j)
-         str2 = strp_tmpu + strm_tmpu - str12weu &
-              + dxhy(i,j)*(-csigpnwu + csigmnwu) + dyhx(i,j)*csig12nwu
-
-         strp_tmpu  = p25*dyt(i,j)*(p333*ssigpsu  + p166*ssigpnu)
-         strm_tmpu  = p25*dyt(i,j)*(p333*ssigmsu  + p166*ssigmnu)
-
-         ! southeast (i,j+1)
-         str3 = -strp_tmpu - strm_tmpu + str12ewu &
-              + dxhy(i,j)*(-csigpseu + csigmseu) + dyhx(i,j)*csig12seu
-
-         ! southwest (i+1,j+1)
-         str4 = strp_tmpu + strm_tmpu + str12weu &
-              + dxhy(i,j)*(-csigpswu + csigmswu) + dyhx(i,j)*csig12swu
-
-         Diagu(i,j) = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) & 
-                  -uarear(i,j)*(str1 + str2 + str3 + str4) ! -sign to bring it on LHS
-         
-      !-----------------------------------------------------------------
-      ! for dF/dy (v momentum)
-      !-----------------------------------------------------------------
-         strp_tmpv  = p25*dxt(i,j)*(p333*ssigpev  + p166*ssigpwv)
-         strm_tmpv  = p25*dxt(i,j)*(p333*ssigmev  + p166*ssigmwv)
-
-         ! northeast (i,j)
-         str5 = -strp_tmpv + strm_tmpv - str12nsv &
-              - dyhx(i,j)*(csigpnev + csigmnev) + dxhy(i,j)*csig12nev
-
-         ! southeast (i,j+1)
-         str6 = strp_tmpv - strm_tmpv - str12snv &
-              - dyhx(i,j)*(csigpsev + csigmsev) + dxhy(i,j)*csig12sev
-
-         strp_tmpv  = p25*dxt(i,j)*(p333*ssigpwv  + p166*ssigpev)
-         strm_tmpv  = p25*dxt(i,j)*(p333*ssigmwv  + p166*ssigmev)
-
-         ! northwest (i+1,j)
-         str7 = -strp_tmpv + strm_tmpv + str12nsv &
-              - dyhx(i,j)*(csigpnwv + csigmnwv) + dxhy(i,j)*csig12nwv
-
-         ! southwest (i+1,j+1)
-         str8 = strp_tmpv - strm_tmpv + str12snv &
-              - dyhx(i,j)*(csigpswv + csigmswv) + dxhy(i,j)*csig12swv
-
-         Diagv(i,j) = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) &
-                  -uarear(i,j)*(str5 + str6 + str7 + str8) ! -sign to bring it on LHS              
-              
-      enddo                     ! ij
-
-      end subroutine OLDprecondD
-      
 !=======================================================================
 
 ! Calc diagonal term related to rheology for precond 
 
-      subroutine precondD_stress (nx_block,   ny_block,   & 
+      subroutine formDiag_step1  (nx_block,   ny_block,   & 
                                   icellt,     & 
                                   indxti,     indxtj,     & 
-                                  uvel,       vvel,       & 
                                   dxt,        dyt,        & 
                                   dxhy,       dyhx,       & 
                                   cxp,        cyp,        & 
@@ -2353,9 +2047,6 @@ subroutine precondD_stress (nx_block,   ny_block,   &
          indxtj       ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         strength , & ! ice strength (N/m)
-         uvel     , & ! x-component of velocity (m/s)
-         vvel     , & ! y-component of velocity (m/s)
          dxt      , & ! width of T-cell through the middle (m)
          dyt      , & ! height of T-cell through the middle (m)
          dxhy     , & ! 0.5*(HTE - HTE)
@@ -2618,7 +2309,81 @@ subroutine precondD_stress (nx_block,   ny_block,   &
 
       enddo                     ! ij
 
-      end subroutine precondD_stress
+      end subroutine formDiag_step1
+      
+!=======================================================================
+
+      subroutine formDiag_step2 (nx_block,   ny_block, &
+                                 icellu,               &
+                                 indxui,     indxuj,   &
+                                 vrel,       Dstr,     &
+                                 umassdti,             &
+                                 uarear,     Cb,       &
+                                 Diagu,      Diagv )
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellu                ! total count when iceumask is true
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         vrel,     & ! coefficient for tauw
+         Cb,       & ! coefficient for basal stress
+         umassdti, & ! mass of U-cell/dt (kg/m^2 s)
+         uarear      ! 1/uarea
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
+         intent(in) :: &
+         Dstr
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(inout) :: &
+         Diagu   , & ! matvec, Fx = Au - bx (N/m^2)! jfl
+         Diagv       ! matvec, Fy = Av - by (N/m^2)! jfl    
+
+! JFL strintx and y do not need to be inout...         
+         
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+         utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
+         ccaimp,ccb        , & ! intermediate variables
+         rhow              , & !
+         strintx, strinty
+
+      !-----------------------------------------------------------------
+      ! integrate the momentum equation
+      !-----------------------------------------------------------------
+
+      call icepack_warnings_flush(nu_diag)
+      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+         file=__FILE__, line=__LINE__)
+
+      do ij =1, icellu
+         i = indxui(ij)
+         j = indxuj(ij)
+
+         ccaimp = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) ! kg/m^2 s
+               
+         ! divergence of the internal stress tensor
+         strintx = uarear(i,j)* &
+             (Dstr(i,j,1) + Dstr(i+1,j,2) + Dstr(i,j+1,3) + Dstr(i+1,j+1,4))
+         strinty = uarear(i,j)* &
+             (Dstr(i,j,5) + Dstr(i,j+1,6) + Dstr(i+1,j,7) + Dstr(i+1,j+1,8))
+
+         Diagu(i,j) = ccaimp - strintx
+         Diagv(i,j) = ccaimp - strinty
+         
+      enddo                     ! ij
+
+      end subroutine formDiag_step2     
       
       !=======================================================================
 

From 373d81a482adb4ec16a0247eec360a9f15debdcb Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 12 Jul 2018 14:02:59 +0000
Subject: [PATCH 043/196] precond diag is completed and compiles

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 47 ++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 03839551a..b3cab17c4 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -542,8 +542,17 @@ subroutine imp_solver (dt)
 !            call dcopy (nloc, wk11, 1, wk22, 1) ! precond=identity
 !         endif
 
-         wk22(:)=wk11(:) ! precond=identity
+         if (precond .eq. 1) then
 
+           wk22(:)=wk11(:) ! precond=identity
+           
+         elseif (precond .eq. 2) then ! use diagonal of A for precond step
+          
+           call precond_diag (ntot,            & 
+                              diagvec (:),     &
+                              wk11 (:), wk22 (:) )
+         endif
+         
          goto 1
 
       else
@@ -2385,7 +2394,41 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
 
       end subroutine formDiag_step2     
       
-      !=======================================================================
+!=======================================================================
+      
+     subroutine precond_diag   (ntot,    &
+                                diagvec, &
+                                wk1, wk2)
+
+      integer (kind=int_kind), intent(in) :: &
+         ntot                  ! size of problem for fgmres
+
+      real (kind=dbl_kind), dimension (ntot), intent(in) :: &
+         diagvec, wk1
+         
+      real (kind=dbl_kind), dimension (ntot), intent(out) :: &
+         wk2 
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i         
+
+      !-----------------------------------------------------------------
+      ! form vector (converts from max_blocks arrays to single vector
+      !-----------------------------------------------------------------
+
+      wk2(:)=c0
+      
+      do i=1, ntot
+
+	wk2(i) = wk1(i)/diagvec(i)
+      
+      enddo! i
+
+      end subroutine precond_diag
+      
+!=======================================================================      
 
       subroutine calc_L2norm (nx_block,   ny_block, &
                               icellu,               &

From 7ea59d5d0860960e09c9459af63e81d23f4f2eba Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 12 Jul 2018 14:23:43 +0000
Subject: [PATCH 044/196] diagvec was not allocated...now ok

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index b3cab17c4..912d14571 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -365,7 +365,7 @@ subroutine imp_solver (dt)
       enddo
       ntot = 2*ntot ! times 2 because of u and v
       
-      allocate(bvec(ntot), sol(ntot), wk11(ntot), wk22(ntot))
+      allocate(bvec(ntot), sol(ntot), diagvec(ntot), wk11(ntot), wk22(ntot))
       allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
       
       !-----------------------------------------------------------------

From 12af0f2d0d12bac66b423d79d1e579f07c271a6d Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 12 Jul 2018 14:53:57 +0000
Subject: [PATCH 045/196] diagvec was not deallocated...now ok

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 912d14571..2cc0e8a22 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -690,7 +690,7 @@ subroutine imp_solver (dt)
          
       enddo                     ! outer loop
 
-      deallocate(bvec, sol, wk11, wk22, vv, ww)
+      deallocate(bvec, sol, diagvec, wk11, wk22, vv, ww)
       deallocate(fld2)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 

From 4771f9cca8e23c3d2429cdfffb0b44b787670d16 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 12 Jul 2018 16:09:26 +0000
Subject: [PATCH 046/196] fixed bug in formDiag_step2

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 2cc0e8a22..32d8bf1c5 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -195,7 +195,8 @@ subroutine imp_solver (dt)
       im_fgmres = 50 
       maxits = 50    
       kmax=1000
-      gammaNL=1e-2_dbl_kind 
+      gammaNL=1e-2_dbl_kind
+      gamma=1e-1_dbl_kind
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
       precond=2 ! 1: identity, 2: diagonal
@@ -2325,7 +2326,7 @@ end subroutine formDiag_step1
       subroutine formDiag_step2 (nx_block,   ny_block, &
                                  icellu,               &
                                  indxui,     indxuj,   &
-                                 vrel,       Dstr,     &
+                                 Dstr,       vrel,     &
                                  umassdti,             &
                                  uarear,     Cb,       &
                                  Diagu,      Diagv )

From 0e1be0ba2622d9ac209c03df30a4b9de305c9fd9 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 12 Jul 2018 16:12:04 +0000
Subject: [PATCH 047/196] small modif

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 32d8bf1c5..4b85cc3f3 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -194,9 +194,9 @@ subroutine imp_solver (dt)
       
       im_fgmres = 50 
       maxits = 50    
-      kmax=1000
-      gammaNL=1e-2_dbl_kind
-      gamma=1e-1_dbl_kind
+      kmax=2
+      gammaNL=1e-2_dbl_kind !linear stopping criterion: gamma*(res_ini)
+      gamma=1e-6_dbl_kind   !nonlinear stopping criterion:
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
       precond=2 ! 1: identity, 2: diagonal
@@ -495,10 +495,9 @@ subroutine imp_solver (dt)
 !-----------------------------------------------------------------------                             
          
       icode  = 0
-      iout   = 1 !0: nothing printed, 1: 1st ite only, 2: all iterations
+      iout   = 2 !0: nothing printed, 1: 1st ite only, 2: all iterations
 !      its    = 0 
       ischmi = 0 
-      gamma  = 0.25_dbl_kind ! linear stopping criterion: gamma*(res_ini)
          
          ! form b vector from matrices (nblocks matrices)      
          call arrays_to_vec (nx_block, ny_block, nblocks,    &

From 2dfa64866db519a724f310a1ea6b7cae72de8065 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 12 Jul 2018 17:12:41 +0000
Subject: [PATCH 048/196] small modif...again

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 95 +++++------------------
 1 file changed, 20 insertions(+), 75 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 4b85cc3f3..3f125e87e 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -134,22 +134,22 @@ subroutine imp_solver (dt)
          watery   , & ! for ocean stress calculation, y (m/s)
          forcex   , & ! work array: combined atm stress and ocn tilt, x
          forcey   , & ! work array: combined atm stress and ocn tilt, y
-         bxfix    , & ! bx = taux + bxfix !jfl
-         byfix    , & ! by = tauy + byfix !jfl
-         bx       , & ! b vector, bx = taux + bxfix !jfl
-         by       , & ! b vector, by = tauy + byfix !jfl
-         Au       , & ! matvec, Fx = Au - bx ! jfl
-         Av       , & ! matvec, Fy = Av - by ! jfl
+         bxfix    , & ! part of bx that is constant during Picard 
+         byfix    , & ! part of by that is constant during Picard 
+         bx       , & ! b vector
+         by       , & ! b vector
+         Au       , & ! matvec, Fx = Au - bx
+         Av       , & ! matvec, Fy = Av - by
          Diagu    , & ! Diagonal (u component) of the matrix A
          Diagv    , & ! Diagonal (v component) of the matrix A
-         Fx       , & ! x residual vector, Fx = Au - bx ! jfl
-         Fy       , & ! y residual vector, Fy = Av - by ! jfl
+         Fx       , & ! x residual vector, Fx = Au - bx 
+         Fy       , & ! y residual vector, Fy = Av - by 
          uprev_k  , & ! uvel at previous Picard iteration
          vprev_k  , & ! vvel at previous Picard iteration
          ulin     , & ! uvel to linearize vrel
          vlin     , & ! vvel to linearize vrel
-         vrel     , & ! coeff for tauw ! jfl
-         Cb       , & ! seabed stress coeff ! jfl
+         vrel     , & ! coeff for tauw 
+         Cb       , & ! seabed stress coeff
          aiu      , & ! ice fraction on u-grid
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
@@ -1489,36 +1489,6 @@ subroutine Diagstress_vp (nx_block,   ny_block,   &
          stress12_3(i,j) = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
          stress12_4(i,j) = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
 
-      !-----------------------------------------------------------------
-      ! Eliminate underflows.
-      ! The following code is commented out because it is relatively 
-      ! expensive and most compilers include a flag that accomplishes
-      ! the same thing more efficiently.  This code is cheaper than
-      ! handling underflows if the compiler lacks a flag; uncomment
-      ! it in that case.  The compiler flag is often described with the 
-      ! phrase "flush to zero".
-      !-----------------------------------------------------------------
-
-!      call icepack_query_parameters(puny_out=puny)
-!      call icepack_warnings_flush(nu_diag)
-!      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
-!         file=__FILE__, line=__LINE__)
-
-!      stressp_1(i,j) = sign(max(abs(stressp_1(i,j)),puny),stressp_1(i,j))
-!      stressp_2(i,j) = sign(max(abs(stressp_2(i,j)),puny),stressp_2(i,j))
-!      stressp_3(i,j) = sign(max(abs(stressp_3(i,j)),puny),stressp_3(i,j))
-!      stressp_4(i,j) = sign(max(abs(stressp_4(i,j)),puny),stressp_4(i,j))
-
-!      stressm_1(i,j) = sign(max(abs(stressm_1(i,j)),puny),stressm_1(i,j))
-!      stressm_2(i,j) = sign(max(abs(stressm_2(i,j)),puny),stressm_2(i,j))
-!      stressm_3(i,j) = sign(max(abs(stressm_3(i,j)),puny),stressm_3(i,j))
-!      stressm_4(i,j) = sign(max(abs(stressm_4(i,j)),puny),stressm_4(i,j))
-
-!      stress12_1(i,j) = sign(max(abs(stress12_1(i,j)),puny),stress12_1(i,j))
-!      stress12_2(i,j) = sign(max(abs(stress12_2(i,j)),puny),stress12_2(i,j))
-!      stress12_3(i,j) = sign(max(abs(stress12_3(i,j)),puny),stress12_3(i,j))
-!      stress12_4(i,j) = sign(max(abs(stress12_4(i,j)),puny),stress12_4(i,j))
-
       !-----------------------------------------------------------------
       ! combinations of the stresses for the momentum equation ! kg/s^2
       !-----------------------------------------------------------------
@@ -1744,26 +1714,18 @@ subroutine matvec (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-!         strocnx , & ! ice-ocean stress, x-direction
-!         strocny , & ! ice-ocean stress, y-direction
-         strintx , & ! divergence of internal ice stress, x (N/m^2)
-         strinty , & ! divergence of internal ice stress, y (N/m^2)
-!         taubx   , & ! basal stress, x-direction (N/m^2)
-!         tauby   , & ! basal stress, y-direction (N/m^2)
          Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
          Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
 
-! JFL strintx and y do not need to be inout...         
-         
       ! local variables
 
       integer (kind=int_kind) :: &
          i, j, ij
 
       real (kind=dbl_kind) :: &
-         utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
+         utp, vtp          , & ! utp = uvel, vtp = vvel
          ccaimp,ccb        , & ! intermediate variables
-         rhow                  !
+         strintx, strinty
 
       !-----------------------------------------------------------------
       ! integrate the momentum equation
@@ -1785,22 +1747,13 @@ subroutine matvec (nx_block,   ny_block, &
          ccb = fm(i,j) + sign(c1,fm(i,j)) * vrel(i,j) * sinw ! kg/m^2 s
 
          ! divergence of the internal stress tensor
-         strintx(i,j) = uarear(i,j)* &
+         strintx = uarear(i,j)* &
              (str(i,j,1) + str(i+1,j,2) + str(i,j+1,3) + str(i+1,j+1,4))
-         strinty(i,j) = uarear(i,j)* &
+         strinty = uarear(i,j)* &
              (str(i,j,5) + str(i,j+1,6) + str(i+1,j,7) + str(i+1,j+1,8))
 
-         Au(i,j) = ccaimp*utp - ccb*vtp - strintx(i,j)
-         Av(i,j) = ccaimp*vtp + ccb*utp - strinty(i,j)
-         
-!         Aw(2*ij-1)= ccaimp*utp - ccb*vtp - strintx(i,j)
-!         Aw(2*ij)  = ccaimp*vtp + ccb*utp - strinty(i,j)
-      !-----------------------------------------------------------------
-      ! ocean-ice stress for coupling
-      ! here, strocn includes the factor of aice
-      !-----------------------------------------------------------------
-!         strocnx(i,j) = taux ! jfl could be moved
-!         strocny(i,j) = tauy
+         Au(i,j) = ccaimp*utp - ccb*vtp - strintx
+         Av(i,j) = ccaimp*vtp + ccb*utp - strinty
          
       ! calculate basal stress component for outputs ! jfl move this
 !         if (ksub == ndte) then ! on last subcycling iteration
@@ -1851,8 +1804,6 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
       integer (kind=int_kind) :: &
          i, j, ij
 
-! jfl move in ice_dyn_vp
-
       !-----------------------------------------------------------------
       ! Define variables for momentum equation
       !-----------------------------------------------------------------
@@ -1863,8 +1814,6 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
 
          bxfix(i,j) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
          byfix(i,j) = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
-!         bvecfix(2*ij-1) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
-!         bvecfix(2*ij)   = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
          
       enddo
 
@@ -1932,6 +1881,8 @@ subroutine calc_bvec (nx_block,   ny_block, &
       ! calc b vector
       !-----------------------------------------------------------------
 
+      !JFL vrel could be sent here (already calc before...
+      
       call icepack_query_parameters(rhow_out=rhow)
       call icepack_warnings_flush(nu_diag)
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
@@ -2351,11 +2302,9 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         Diagu   , & ! matvec, Fx = Au - bx (N/m^2)! jfl
-         Diagv       ! matvec, Fy = Av - by (N/m^2)! jfl    
+         Diagu   , & ! matvec, Fx = Au - bx (N/m^2)
+         Diagv       ! matvec, Fy = Av - by (N/m^2)
 
-! JFL strintx and y do not need to be inout...         
-         
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -2472,8 +2421,6 @@ subroutine calc_L2norm (nx_block,   ny_block, &
 
       L2norm = sqrt(L2norm)
       
-!      print *, 'ici uvel', nx_block, ny_block, icellu, L2norm
-      
       end subroutine calc_L2norm
       
             !=======================================================================
@@ -2529,8 +2476,6 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
        enddo
       enddo! ij
 
-!      print *, 'NTOT', max_blocks, tot, ntot
-      
       end subroutine arrays_to_vec
       
             !=======================================================================

From 65eb20966c8012090318411085b461cde28aa2d7 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 16 Jul 2018 14:51:25 +0000
Subject: [PATCH 049/196] modified formation of diagonal for precond

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 288 +++++++++++++++++++++-
 1 file changed, 280 insertions(+), 8 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 3f125e87e..ecce61e71 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -471,7 +471,7 @@ subroutine imp_solver (dt)
 
 !     prepare precond matrix                                                        
            call formDiag_step1 (nx_block           , ny_block,       &
-                                icellt       (iblk),                 & 
+                                icellt       (iblk), 1             , & ! for u comp
                                 indxti     (:,iblk), indxtj(:,iblk), &
                                 dxt      (:,:,iblk), dyt (:,:,iblk), & 
                                 dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
@@ -479,6 +479,15 @@ subroutine imp_solver (dt)
                                 cxm      (:,:,iblk), cym (:,:,iblk), & 
                                 zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
                                 
+           call formDiag_step1 (nx_block           , ny_block,       &
+                                icellt       (iblk), 2             , & ! for v comp
+                                indxti     (:,iblk), indxtj(:,iblk), &
+                                dxt      (:,:,iblk), dyt (:,:,iblk), & 
+                                dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
+                                cxp      (:,:,iblk), cyp (:,:,iblk), & 
+                                cxm      (:,:,iblk), cym (:,:,iblk), & 
+                                zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )                                
+                                
            call formDiag_step2 (nx_block           , ny_block,           &
                                 icellu       (iblk),                     & 
                                 indxui     (:,iblk), indxuj    (:,iblk), &
@@ -594,7 +603,6 @@ subroutine imp_solver (dt)
                          vrel     (:,:,iblk),                     &
                          umassdti (:,:,iblk), fm      (:,:,iblk), & 
                          uarear   (:,:,iblk), Cb      (:,:,iblk), & 
-                         strintx  (:,:,iblk), strinty (:,:,iblk), &
                          uvel     (:,:,iblk), vvel    (:,:,iblk), &
                          Au       (:,:,iblk), Av      (:,:,iblk))
                          
@@ -1681,7 +1689,6 @@ subroutine matvec (nx_block,   ny_block, &
                          vrel,                 &
                          umassdti,   fm,       &
                          uarear,     Cb,       &
-                         strintx,    strinty,  &
                          uvel,       vvel,     &
                          Au,         Av)
 
@@ -1984,11 +1991,278 @@ subroutine residual_vec (nx_block,   ny_block, &
 
       end subroutine residual_vec
       
+!=======================================================================
+
+      subroutine formDiag_step1  (nx_block,   ny_block,   & 
+                                  icellt,     velcode,    & 
+                                  indxti,     indxtj,     & 
+                                  dxt,        dyt,        & 
+                                  dxhy,       dyhx,       & 
+                                  cxp,        cyp,        & 
+                                  cxm,        cym,        & 
+                                  zetaD,      Dstr )
+
+      integer (kind=int_kind), intent(in) :: & 
+         nx_block, ny_block, & ! block dimensions
+         icellt,             & ! no. of cells where icetmask = 1
+         velcode               ! 1: u comp, 2: v comp
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm          ! 0.5*HTN - 1.5*HTN
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+         intent(out) :: &
+         Dstr          ! stress combinations
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(in) :: &
+         zetaD          ! 2*zeta      
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+        uij,ui1j,uij1,ui1j1,vij,vi1j,vij1,vi1j1   , & ! c0 or c1
+        stressp_1, stressp_2, stressp_3, stressp_4, &
+        stressm_1, stressm_2, stressm_3, stressm_4, &
+        stress12_1,stress12_2,stress12_3,stress12_4,&
+        ssigpn, ssigps, ssigpe, ssigpw            , &
+        ssigmn, ssigms, ssigme, ssigmw            , &
+        ssig12n, ssig12s, ssig12e, ssig12w        , &
+        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
+        csigpne, csigpnw, csigpse, csigpsw        , &
+        csigmne, csigmnw, csigmse, csigmsw        , &
+        csig12ne, csig12nw, csig12se, csig12sw    , &
+        str12ew, str12we, str12ns, str12sn        , &
+        strp_tmp, strm_tmp, tmp
+
+      !-----------------------------------------------------------------
+      ! Initialize
+      !-----------------------------------------------------------------
+
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+
+      if (velcode .eq. 1) then
+       
+       uij   = c1
+       ui1j  = c0
+       uij1  = c0
+       ui1j1 = c0
+       
+       vij   = c0
+       vi1j  = c0
+       vij1  = c0
+       vi1j1 = c0
+       
+       Dstr(:,:,:) = c0
+       
+      elseif (velcode .eq. 2) then
+       
+       uij   = c0
+       ui1j  = c0
+       uij1  = c0
+       ui1j1 = c0
+       
+       vij   = c1
+       vi1j  = c0
+       vij1  = c0
+       vi1j1 = c0
+       
+      endif
+      
+
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divune    = cyp(i,j)*uij - dyt(i,j)*ui1j &
+                   + cxp(i,j)*vij - dxt(i,j)*vij1
+         divunw    = cym(i,j)*ui1j + dyt(i,j)*uij &
+                   + cxp(i,j)*vi1j - dxt(i,j)*vi1j1
+         divusw    = cym(i,j)*ui1j1 + dyt(i,j)*uij1 &
+                   + cxm(i,j)*vi1j1 + dxt(i,j)*vi1j
+         divuse    = cyp(i,j)*uij1 - dyt(i,j)*ui1j1 &
+                   + cxm(i,j)*vij1 + dxt(i,j)*vij
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionne = -cym(i,j)*uij - dyt(i,j)*ui1j &
+                   +  cxm(i,j)*vij + dxt(i,j)*vij1
+         tensionnw = -cyp(i,j)*ui1j + dyt(i,j)*uij &
+                   +  cxm(i,j)*vi1j + dxt(i,j)*vi1j1
+         tensionsw = -cyp(i,j)*ui1j1 + dyt(i,j)*uij1 &
+                   +  cxp(i,j)*vi1j1 - dxt(i,j)*vi1j
+         tensionse = -cym(i,j)*uij1  - dyt(i,j)*ui1j1 &
+                   +  cxp(i,j)*vij1 - dxt(i,j)*vij
+
+         ! shearing strain rate  =  e_12
+         shearne = -cym(i,j)*vij - dyt(i,j)*vi1j &
+                 -  cxm(i,j)*uij - dxt(i,j)*uij1
+         shearnw = -cyp(i,j)*vi1j + dyt(i,j)*vij &
+                 -  cxm(i,j)*ui1j - dxt(i,j)*ui1j1
+         shearsw = -cyp(i,j)*vi1j1 + dyt(i,j)*vij1 &
+                 -  cxp(i,j)*ui1j1 + dxt(i,j)*ui1j
+         shearse = -cym(i,j)*vij1 - dyt(i,j)*vi1j1 &
+                 -  cxp(i,j)*uij1 + dxt(i,j)*uij
+         
+      !-----------------------------------------------------------------
+      ! the stresses                            ! kg/s^2
+      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+      !-----------------------------------------------------------------
+         
+         stressp_1 = zetaD(i,j,1)*divune*(c1+Ktens)
+         stressp_2 = zetaD(i,j,2)*divunw*(c1+Ktens)
+         stressp_3 = zetaD(i,j,3)*divusw*(c1+Ktens)
+         stressp_4 = zetaD(i,j,4)*divuse*(c1+Ktens)
+         
+         stressm_1 = zetaD(i,j,1)*tensionne*(c1+Ktens)*ecci
+         stressm_2 = zetaD(i,j,2)*tensionnw*(c1+Ktens)*ecci
+         stressm_3 = zetaD(i,j,3)*tensionsw*(c1+Ktens)*ecci
+         stressm_4 = zetaD(i,j,4)*tensionse*(c1+Ktens)*ecci
+                          
+         stress12_1 = zetaD(i,j,1)*shearne*p5*(c1+Ktens)*ecci
+         stress12_2 = zetaD(i,j,2)*shearnw*p5*(c1+Ktens)*ecci
+         stress12_3 = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
+         stress12_4 = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
+
+      !-----------------------------------------------------------------
+      ! combinations of the stresses for the momentum equation ! kg/s^2
+      !-----------------------------------------------------------------
+
+         ssigpn  = stressp_1 + stressp_2
+         ssigps  = stressp_3 + stressp_4
+         ssigpe  = stressp_1 + stressp_4
+         ssigpw  = stressp_2 + stressp_3
+         ssigp1  =(stressp_1 + stressp_3)*p055
+         ssigp2  =(stressp_2 + stressp_4)*p055
+
+         ssigmn  = stressm_1 + stressm_2
+         ssigms  = stressm_3 + stressm_4
+         ssigme  = stressm_1 + stressm_4
+         ssigmw  = stressm_2 + stressm_3
+         ssigm1  =(stressm_1 + stressm_3)*p055
+         ssigm2  =(stressm_2 + stressm_4)*p055
+
+         ssig12n = stress12_1 + stress12_2
+         ssig12s = stress12_3 + stress12_4
+         ssig12e = stress12_1 + stress12_4
+         ssig12w = stress12_2 + stress12_3
+         ssig121 =(stress12_1 + stress12_3)*p111
+         ssig122 =(stress12_2 + stress12_4)*p111
+
+         csigpne = p111*stressp_1 + ssigp2 + p027*stressp_3
+         csigpnw = p111*stressp_2 + ssigp1 + p027*stressp_4
+         csigpsw = p111*stressp_3 + ssigp2 + p027*stressp_1
+         csigpse = p111*stressp_4 + ssigp1 + p027*stressp_2
+         
+         csigmne = p111*stressm_1 + ssigm2 + p027*stressm_3
+         csigmnw = p111*stressm_2 + ssigm1 + p027*stressm_4
+         csigmsw = p111*stressm_3 + ssigm2 + p027*stressm_1
+         csigmse = p111*stressm_4 + ssigm1 + p027*stressm_2
+         
+         csig12ne = p222*stress12_1 + ssig122 &
+                  + p055*stress12_3
+         csig12nw = p222*stress12_2 + ssig121 &
+                  + p055*stress12_4
+         csig12sw = p222*stress12_3 + ssig122 &
+                  + p055*stress12_1
+         csig12se = p222*stress12_4 + ssig121 &
+                  + p055*stress12_2
+
+         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
+         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
+         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
+         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
+
+          if (velcode .eq. 1) then
+         
+      !-----------------------------------------------------------------
+      ! for dF/dx (u momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+
+         ! northeast (i,j)
+         Dstr(i,j,1) = -strp_tmp - strm_tmp - str12ew &
+              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
+
+         ! northwest (i+1,j)
+         Dstr(i,j,2) = strp_tmp + strm_tmp - str12we &
+              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
+
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
+
+         ! southeast (i,j+1)
+         Dstr(i,j,3) = -strp_tmp - strm_tmp + str12ew &
+              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
+
+         ! southwest (i+1,j+1)
+         Dstr(i,j,4) = strp_tmp + strm_tmp + str12we &
+              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
+
+        elseif (velcode .eq. 2) then      
+              
+      !-----------------------------------------------------------------
+      ! for dF/dy (v momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
+
+         ! northeast (i,j)
+         Dstr(i,j,5) = -strp_tmp + strm_tmp - str12ns &
+              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
+
+         ! southeast (i,j+1)
+         Dstr(i,j,6) = strp_tmp - strm_tmp - str12sn &
+              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
+
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
+
+         ! northwest (i+1,j)
+         Dstr(i,j,7) = -strp_tmp + strm_tmp + str12ns &
+              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
+
+         ! southwest (i+1,j+1)
+         Dstr(i,j,8) = strp_tmp - strm_tmp + str12sn &
+              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
+
+         endif     
+              
+      enddo                     ! ij
+
+      end subroutine formDiag_step1      
+      
 !=======================================================================
 
 ! Calc diagonal term related to rheology for precond 
 
-      subroutine formDiag_step1  (nx_block,   ny_block,   & 
+      subroutine OLDformDiag_step1  (nx_block,   ny_block,   & 
                                   icellt,     & 
                                   indxti,     indxtj,     & 
                                   dxt,        dyt,        & 
@@ -2269,7 +2543,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
 
       enddo                     ! ij
 
-      end subroutine formDiag_step1
+      end subroutine OLDformDiag_step1
       
 !=======================================================================
 
@@ -2311,9 +2585,7 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
          i, j, ij
 
       real (kind=dbl_kind) :: &
-         utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
-         ccaimp,ccb        , & ! intermediate variables
-         rhow              , & !
+         ccaimp             , & ! intermediate variables
          strintx, strinty
 
       !-----------------------------------------------------------------

From c0c94e4685682aef818d190038e5966f9f8297eb Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 16 Jul 2018 15:03:36 +0000
Subject: [PATCH 050/196] just removed a comment

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 30 -----------------------
 1 file changed, 30 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ecce61e71..78af304c9 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1182,36 +1182,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
          stress12_3 = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
          stress12_4 = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
 
-      !-----------------------------------------------------------------
-      ! Eliminate underflows.
-      ! The following code is commented out because it is relatively 
-      ! expensive and most compilers include a flag that accomplishes
-      ! the same thing more efficiently.  This code is cheaper than
-      ! handling underflows if the compiler lacks a flag; uncomment
-      ! it in that case.  The compiler flag is often described with the 
-      ! phrase "flush to zero".
-      !-----------------------------------------------------------------
-
-!      call icepack_query_parameters(puny_out=puny)
-!      call icepack_warnings_flush(nu_diag)
-!      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
-!         file=__FILE__, line=__LINE__)
-
-!      stressp_1(i,j) = sign(max(abs(stressp_1(i,j)),puny),stressp_1(i,j))
-!      stressp_2(i,j) = sign(max(abs(stressp_2(i,j)),puny),stressp_2(i,j))
-!      stressp_3(i,j) = sign(max(abs(stressp_3(i,j)),puny),stressp_3(i,j))
-!      stressp_4(i,j) = sign(max(abs(stressp_4(i,j)),puny),stressp_4(i,j))
-
-!      stressm_1(i,j) = sign(max(abs(stressm_1(i,j)),puny),stressm_1(i,j))
-!      stressm_2(i,j) = sign(max(abs(stressm_2(i,j)),puny),stressm_2(i,j))
-!      stressm_3(i,j) = sign(max(abs(stressm_3(i,j)),puny),stressm_3(i,j))
-!      stressm_4(i,j) = sign(max(abs(stressm_4(i,j)),puny),stressm_4(i,j))
-
-!      stress12_1(i,j) = sign(max(abs(stress12_1(i,j)),puny),stress12_1(i,j))
-!      stress12_2(i,j) = sign(max(abs(stress12_2(i,j)),puny),stress12_2(i,j))
-!      stress12_3(i,j) = sign(max(abs(stress12_3(i,j)),puny),stress12_3(i,j))
-!      stress12_4(i,j) = sign(max(abs(stress12_4(i,j)),puny),stress12_4(i,j))
-
       !-----------------------------------------------------------------
       ! combinations of the stresses for the momentum equation ! kg/s^2
       !-----------------------------------------------------------------

From e5ddf7c210810eed03d17ec9672701451e4c6765 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 16 Jul 2018 16:49:25 +0000
Subject: [PATCH 051/196] minor modif to formDiag_step1

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 78af304c9..b67744c9c 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1991,14 +1991,14 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm          ! 0.5*HTN - 1.5*HTN
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
-         intent(out) :: &
-         Dstr          ! stress combinations
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
          intent(in) :: &
          zetaD          ! 2*zeta      
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+         intent(inout) :: &
+         Dstr          ! stress combinations         
 
       ! local variables
 

From c89ce4534730492598aee9202b6993d60ef144df Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 16 Jul 2018 18:04:29 +0000
Subject: [PATCH 052/196] added an option for precond (free drift diag or
 complete diag

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 25 ++++++++++++++---------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index b67744c9c..eae6253ac 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -113,7 +113,7 @@ subroutine imp_solver (dt)
          maxits         , & ! max nb of iteration for fgmres
          fgmres_its     , & ! final nb of fgmres_its
          im_fgmres      , & ! for size of Krylov subspace
-         precond        , & ! 1: identity, 2: diagonal
+         precond        , & ! 1: identity, 2: diagonal (free drift), 3: complete diagonal
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
@@ -199,7 +199,7 @@ subroutine imp_solver (dt)
       gamma=1e-6_dbl_kind   !nonlinear stopping criterion:
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
-      precond=2 ! 1: identity, 2: diagonal
+      precond=2 ! 1: identity, 2: diagonal (fd), 3: complete diagonal 
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -489,7 +489,7 @@ subroutine imp_solver (dt)
                                 zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )                                
                                 
            call formDiag_step2 (nx_block           , ny_block,           &
-                                icellu       (iblk),                     & 
+                                icellu       (iblk), precond,            & 
                                 indxui     (:,iblk), indxuj    (:,iblk), &
                                 Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
                                 umassdti (:,:,iblk),                     & 
@@ -555,7 +555,7 @@ subroutine imp_solver (dt)
 
            wk22(:)=wk11(:) ! precond=identity
            
-         elseif (precond .eq. 2) then ! use diagonal of A for precond step
+         elseif (precond .gt. 1) then ! use diagonal of A for precond step
           
            call precond_diag (ntot,            & 
                               diagvec (:),     &
@@ -2518,7 +2518,7 @@ end subroutine OLDformDiag_step1
 !=======================================================================
 
       subroutine formDiag_step2 (nx_block,   ny_block, &
-                                 icellu,               &
+                                 icellu,     precond,  &
                                  indxui,     indxuj,   &
                                  Dstr,       vrel,     &
                                  umassdti,             &
@@ -2527,7 +2527,8 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu                ! total count when iceumask is true
+         icellu,             & ! total count when iceumask is true
+         precond               ! precond type
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -2566,17 +2567,21 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
 
+      strintx=c0
+      strinty=c0
+         
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
 
          ccaimp = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) ! kg/m^2 s
-               
-         ! divergence of the internal stress tensor
-         strintx = uarear(i,j)* &
+         
+         if (precond .eq. 3) then
+          strintx = uarear(i,j)* &
              (Dstr(i,j,1) + Dstr(i+1,j,2) + Dstr(i,j+1,3) + Dstr(i+1,j+1,4))
-         strinty = uarear(i,j)* &
+          strinty = uarear(i,j)* &
              (Dstr(i,j,5) + Dstr(i,j+1,6) + Dstr(i+1,j,7) + Dstr(i+1,j+1,8))
+         endif    
 
          Diagu(i,j) = ccaimp - strintx
          Diagv(i,j) = ccaimp - strinty

From 9f75ad1a3e4fad6f57050deebabd3a263270934b Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 16 Jul 2018 18:09:04 +0000
Subject: [PATCH 053/196] minor modif

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 36 ++++++++++++-----------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index eae6253ac..8d240ebf1 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -469,24 +469,26 @@ subroutine imp_solver (dt)
                             bx       (:,:,iblk), by      (:,:,iblk), &
                             stPrtmp  (:,:,:))
 
-!     prepare precond matrix                                                        
-           call formDiag_step1 (nx_block           , ny_block,       &
-                                icellt       (iblk), 1             , & ! for u comp
-                                indxti     (:,iblk), indxtj(:,iblk), &
-                                dxt      (:,:,iblk), dyt (:,:,iblk), & 
-                                dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
-                                cxp      (:,:,iblk), cyp (:,:,iblk), & 
-                                cxm      (:,:,iblk), cym (:,:,iblk), & 
-                                zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
+!     prepare precond matrix
+           if (precond .eq. 3) then
+            call formDiag_step1 (nx_block           , ny_block,       &
+                                 icellt       (iblk), 1             , & ! for u comp
+                                 indxti     (:,iblk), indxtj(:,iblk), &
+                                 dxt      (:,:,iblk), dyt (:,:,iblk), & 
+                                 dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
+                                 cxp      (:,:,iblk), cyp (:,:,iblk), & 
+                                 cxm      (:,:,iblk), cym (:,:,iblk), & 
+                                 zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
                                 
-           call formDiag_step1 (nx_block           , ny_block,       &
-                                icellt       (iblk), 2             , & ! for v comp
-                                indxti     (:,iblk), indxtj(:,iblk), &
-                                dxt      (:,:,iblk), dyt (:,:,iblk), & 
-                                dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
-                                cxp      (:,:,iblk), cyp (:,:,iblk), & 
-                                cxm      (:,:,iblk), cym (:,:,iblk), & 
-                                zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )                                
+            call formDiag_step1 (nx_block           , ny_block,       &
+                                 icellt       (iblk), 2             , & ! for v comp
+                                 indxti     (:,iblk), indxtj(:,iblk), &
+                                 dxt      (:,:,iblk), dyt (:,:,iblk), & 
+                                 dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
+                                 cxp      (:,:,iblk), cyp (:,:,iblk), & 
+                                 cxm      (:,:,iblk), cym (:,:,iblk), & 
+                                 zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )                             
+          endif                       
                                 
            call formDiag_step2 (nx_block           , ny_block,           &
                                 icellu       (iblk), precond,            & 

From 3b01585fd85e917df77200b0e81e14e4181b9bb7 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 16 Jul 2018 18:55:15 +0000
Subject: [PATCH 054/196] minor modif

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 8d240ebf1..d58ffac14 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -471,6 +471,7 @@ subroutine imp_solver (dt)
 
 !     prepare precond matrix
            if (precond .eq. 3) then
+           print *, "BUG in formDiag_step1 due to confusion between icellt and icellu"
             call formDiag_step1 (nx_block           , ny_block,       &
                                  icellt       (iblk), 1             , & ! for u comp
                                  indxti     (:,iblk), indxtj(:,iblk), &
@@ -570,10 +571,6 @@ subroutine imp_solver (dt)
 
          if (icode >= 2) then
 
-!            if (Lun_debug_L.and.print_conv_L) write(lun_out, 199) conv,its
-!            call sol_matvec ( wk22, wk11, Minx, Maxx, Miny, Maxy, &
-!                           nil,njl, F_nk, minx1,maxx1,minx2,maxx2 )
-
          call vec_to_arrays (nx_block, ny_block, nblocks,      &
                              max_blocks, icellu (:), ntot,     & 
                              indxui    (:,:), indxuj(:,:),     &

From 4f601bcc2e0c6c2d301e4b0ca5498cf5999fe2e7 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 16 Jul 2018 19:25:08 +0000
Subject: [PATCH 055/196] removed OLDformDiag_step1

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 287 ----------------------
 1 file changed, 287 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index d58ffac14..4cbce931d 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -2227,293 +2227,6 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
 
       end subroutine formDiag_step1      
       
-!=======================================================================
-
-! Calc diagonal term related to rheology for precond 
-
-      subroutine OLDformDiag_step1  (nx_block,   ny_block,   & 
-                                  icellt,     & 
-                                  indxti,     indxtj,     & 
-                                  dxt,        dyt,        & 
-                                  dxhy,       dyhx,       & 
-                                  cxp,        cyp,        & 
-                                  cxm,        cym,        & 
-                                  zetaD,      Dstr )
-
-      integer (kind=int_kind), intent(in) :: & 
-         nx_block, ny_block, & ! block dimensions
-         icellt                ! no. of cells where icetmask = 1
-
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
-         intent(in) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj       ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         dxt      , & ! width of T-cell through the middle (m)
-         dyt      , & ! height of T-cell through the middle (m)
-         dxhy     , & ! 0.5*(HTE - HTE)
-         dyhx     , & ! 0.5*(HTN - HTN)
-         cyp      , & ! 1.5*HTE - 0.5*HTE
-         cxp      , & ! 1.5*HTN - 0.5*HTN
-         cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm          ! 0.5*HTN - 1.5*HTN
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
-         intent(out) :: &
-         Dstr          ! stress combinations
-         
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
-         intent(in) :: &
-         zetaD          ! 2*zeta      
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         i, j, ij
-
-      real (kind=dbl_kind) :: &
-        divuneDu, divunwDu, divuseDu, divuswDu            , & ! divergence
-        divuneDv, divunwDv, divuseDv, divuswDv            , & ! divergence
-        tensionneDu, tensionnwDu, tensionseDu, tensionswDu, & ! tension
-        tensionneDv, tensionnwDv, tensionseDv, tensionswDv, & ! tension
-        shearneDu, shearnwDu, shearseDu, shearswDu        , & ! shearing
-        shearneDv, shearnwDv, shearseDv, shearswDv        , & ! shearing
-        stressp_1u, stressp_2u, stressp_3u, stressp_4u    , & 
-        stressp_1v, stressp_2v, stressp_3v, stressp_4v    , & 
-        stressm_1u, stressm_2u, stressm_3u, stressm_4u    , & 
-        stressm_1v, stressm_2v, stressm_3v, stressm_4v    , & 
-        stress12_1u, stress12_2u, stress12_3u, stress12_4u, & 
-        stress12_1v, stress12_2v, stress12_3v, stress12_4v, & 
-        ssigpnu, ssigpsu, ssigpeu, ssigpwu                , &
-        ssigpnv, ssigpsv, ssigpev, ssigpwv                , &
-        ssigmnu, ssigmsu, ssigmeu, ssigmwu                , &
-        ssigmnv, ssigmsv, ssigmev, ssigmwv                , &
-        ssig12nu, ssig12su, ssig12eu, ssig12wu            , &
-        ssig12nv, ssig12sv, ssig12ev, ssig12wv            , &
-        ssigp1u, ssigp2u, ssigm1u, ssigm2u, ssig121u, ssig122u, &
-        ssigp1v, ssigp2v, ssigm1v, ssigm2v, ssig121v, ssig122v, &
-        csigpneu, csigpnwu, csigpseu, csigpswu            , &
-        csigpnev, csigpnwv, csigpsev, csigpswv            , &
-        csigmneu, csigmnwu, csigmseu, csigmswu            , &
-        csigmnev, csigmnwv, csigmsev, csigmswv            , &
-        csig12neu, csig12nwu, csig12seu, csig12swu        , &
-        csig12nev, csig12nwv, csig12sev, csig12swv        , &
-        str12ewu, str12weu, str12nsu, str12snu            , &
-        str12ewv, str12wev, str12nsv, str12snv            , &
-        strp_tmpu, strm_tmpu, strp_tmpv, strm_tmpv        , &
-        str1, str2, str3, str4, str5, str6, str7, str8  
-        
-      !-----------------------------------------------------------------
-      ! Initialize
-      !-----------------------------------------------------------------
-
-      Dstr(:,:,:) = c0
-
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
-      do ij = 1, icellt
-         i = indxti(ij)
-         j = indxtj(ij)
-
-      !-----------------------------------------------------------------
-      ! JFL watchout if currently on LHS or RHS
-      !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divuneDu  = cyp(i,j)
-         divuneDv  = cxp(i,j)
-         divunwDu  = dyt(i,j)
-         divuseDv  = dxt(i,j)
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionneDu = -cym(i,j)
-         tensionneDv = cxm(i,j)
-         tensionnwDu = dyt(i,j)
-         tensionseDv = - dxt(i,j)
-
-         ! shearing strain rate  =  e_12
-         shearneDu = -cxm(i,j)
-         shearneDv = -cym(i,j)
-         shearnwDv = dyt(i,j)
-         shearseDu = dxt(i,j)
-         
-      !-----------------------------------------------------------------
-      ! the stresses                            ! kg/s^2
-      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
-      !-----------------------------------------------------------------
-
-! IMProve: delete stress coeff not needed instead of setting them to 0.
-!          no need for divuneDu...just plug them directly in eqs below. 
-      
-         stressp_1u = zetaD(i,j,1)*divuneDu*(c1+Ktens)
-         stressp_1v = zetaD(i,j,1)*divuneDv*(c1+Ktens)
-         stressp_2u = zetaD(i,j,2)*divunwDu*(c1+Ktens)
-         stressp_2v = c0
-         stressp_3u = c0
-         stressp_3v = c0
-         stressp_4u = c0
-         stressp_4v = zetaD(i,j,4)*divuseDv*(c1+Ktens)
-         
-         stressm_1u = zetaD(i,j,1)*tensionneDu*(c1+Ktens)*ecci
-         stressm_1v = zetaD(i,j,1)*tensionneDv*(c1+Ktens)*ecci
-         stressm_2u = zetaD(i,j,2)*tensionnwDu*(c1+Ktens)*ecci
-         stressm_2v = c0
-         stressm_3u = c0
-         stressm_3v = c0
-         stressm_4u = c0
-         stressm_4v = zetaD(i,j,4)*tensionseDv*(c1+Ktens)*ecci
-         
-         stress12_1u = zetaD(i,j,1)*shearneDu*p5*(c1+Ktens)*ecci
-         stress12_1v = zetaD(i,j,1)*shearneDv*p5*(c1+Ktens)*ecci
-         stress12_2u = c0
-         stress12_2v = zetaD(i,j,2)*shearnwDv*p5*(c1+Ktens)*ecci
-         stress12_3u = c0
-         stress12_3v = c0
-         stress12_4u = zetaD(i,j,4)*shearseDu*p5*(c1+Ktens)*ecci
-         stress12_4v = c0
-
-      !-----------------------------------------------------------------
-      ! combinations of the stresses for the momentum equation ! kg/s^2
-      !-----------------------------------------------------------------
-
-         ssigpnu  = stressp_1u + stressp_2u
-         ssigpnv  = stressp_1v + stressp_2v
-         ssigpsu  = stressp_3u + stressp_4u
-         ssigpsv  = stressp_3v + stressp_4v
-         ssigpeu  = stressp_1u + stressp_4v
-         ssigpev  = stressp_1v + stressp_4v
-         ssigpwu  = stressp_2u + stressp_3u
-         ssigpwv  = stressp_2v + stressp_3v
-         ssigp1u  =(stressp_1u + stressp_3u)*p055
-         ssigp1v  =(stressp_1v + stressp_3v)*p055
-         ssigp2u  =(stressp_2u + stressp_4u)*p055
-         ssigp2v  =(stressp_2v + stressp_4v)*p055
-
-         ssigmnu  = stressm_1u + stressm_2u
-         ssigmnv  = stressm_1v + stressm_2v
-         ssigmsu  = stressm_3u + stressm_4u
-         ssigmsv  = stressm_3v + stressm_4v
-         ssigmeu  = stressm_1u + stressm_4u
-         ssigmev  = stressm_1v + stressm_4v
-         ssigmwu  = stressm_2u + stressm_3u
-         ssigmwv  = stressm_2v + stressm_3v
-         ssigm1u  =(stressm_1u + stressm_3u)*p055
-         ssigm1v  =(stressm_1v + stressm_3v)*p055
-         ssigm2u  =(stressm_2u + stressm_4u)*p055
-         ssigm2v  =(stressm_2v + stressm_4v)*p055
-
-         ssig12nu = stress12_1u + stress12_2u
-         ssig12nv = stress12_1v + stress12_2v
-         ssig12su = stress12_3u + stress12_4u
-         ssig12sv = stress12_3v + stress12_4v
-         ssig12eu = stress12_1u + stress12_4u
-         ssig12ev = stress12_1v + stress12_4v
-         ssig12wu = stress12_2u + stress12_3u
-         ssig12wv = stress12_2v + stress12_3v
-         ssig121u =(stress12_1u + stress12_3u)*p111
-         ssig121v =(stress12_1v + stress12_3v)*p111
-         ssig122u =(stress12_2u + stress12_4u)*p111
-         ssig122v =(stress12_2v + stress12_4v)*p111
-
-         csigpneu = p111*stressp_1u + ssigp2u + p027*stressp_3u
-         csigpnev = p111*stressp_1v + ssigp2v + p027*stressp_3v
-         csigpnwu = p111*stressp_2u + ssigp1u + p027*stressp_4u
-         csigpnwv = p111*stressp_2v + ssigp1v + p027*stressp_4v
-         csigpswu = p111*stressp_3u + ssigp2u + p027*stressp_1u
-         csigpswv = p111*stressp_3v + ssigp2v + p027*stressp_1v
-         csigpseu = p111*stressp_4u + ssigp1u + p027*stressp_2u
-         csigpsev = p111*stressp_4v + ssigp1v + p027*stressp_2v
-         
-         csigmneu = p111*stressm_1u + ssigm2u + p027*stressm_3u
-         csigmnev = p111*stressm_1v + ssigm2v + p027*stressm_3v
-         csigmnwu = p111*stressm_2u + ssigm1u + p027*stressm_4u
-         csigmnwv = p111*stressm_2v + ssigm1v + p027*stressm_4v
-         csigmswu = p111*stressm_3u + ssigm2u + p027*stressm_1u
-         csigmswv = p111*stressm_3v + ssigm2v + p027*stressm_1v
-         csigmseu = p111*stressm_4u + ssigm1u + p027*stressm_2u
-         csigmsev = p111*stressm_4v + ssigm1v + p027*stressm_2v
-         
-         csig12neu = p222*stress12_1u + ssig122u &
-                  + p055*stress12_3u
-         csig12nev = p222*stress12_1v + ssig122v &
-                  + p055*stress12_3v             
-         csig12nwu = p222*stress12_2u + ssig121u &
-                  + p055*stress12_4u
-         csig12nwv = p222*stress12_2v + ssig121v &
-                  + p055*stress12_4v
-         csig12swu = p222*stress12_3u + ssig122u &
-                  + p055*stress12_1u
-         csig12swv = p222*stress12_3v + ssig122v &
-                  + p055*stress12_1v
-         csig12seu = p222*stress12_4u + ssig121u &
-                  + p055*stress12_2u
-         csig12sev = p222*stress12_4v + ssig121v &
-                  + p055*stress12_2v         
-
-         str12ewu = p5*dxt(i,j)*(p333*ssig12eu + p166*ssig12wu)
-         str12ewv = p5*dxt(i,j)*(p333*ssig12ev + p166*ssig12wv)
-         str12weu = p5*dxt(i,j)*(p333*ssig12wu + p166*ssig12eu)
-         str12wev = p5*dxt(i,j)*(p333*ssig12wv + p166*ssig12ev)
-         str12nsu = p5*dyt(i,j)*(p333*ssig12nu + p166*ssig12su)
-         str12nsv = p5*dyt(i,j)*(p333*ssig12nv + p166*ssig12sv)
-         str12snu = p5*dyt(i,j)*(p333*ssig12su + p166*ssig12nu)
-         str12snv = p5*dyt(i,j)*(p333*ssig12sv + p166*ssig12nv)
-
-      !-----------------------------------------------------------------
-      ! for dF/dx (u momentum)
-      !-----------------------------------------------------------------
-         strp_tmpu  = p25*dyt(i,j)*(p333*ssigpnu  + p166*ssigpsu)
-         strm_tmpu  = p25*dyt(i,j)*(p333*ssigmnu  + p166*ssigmsu)
-
-         ! northeast (i,j)
-         Dstr(i,j,1) = -strp_tmpu - strm_tmpu - str12ewu &
-              + dxhy(i,j)*(-csigpneu + csigmneu) + dyhx(i,j)*csig12neu
-
-         ! northwest (i+1,j)
-         Dstr(i,j,2) = strp_tmpu + strm_tmpu - str12weu &
-              + dxhy(i,j)*(-csigpnwu + csigmnwu) + dyhx(i,j)*csig12nwu
-
-         strp_tmpu  = p25*dyt(i,j)*(p333*ssigpsu  + p166*ssigpnu)
-         strm_tmpu  = p25*dyt(i,j)*(p333*ssigmsu  + p166*ssigmnu)
-
-         ! southeast (i,j+1)
-         Dstr(i,j,3) = -strp_tmpu - strm_tmpu + str12ewu &
-              + dxhy(i,j)*(-csigpseu + csigmseu) + dyhx(i,j)*csig12seu
-
-         ! southwest (i+1,j+1)
-         Dstr(i,j,4) = strp_tmpu + strm_tmpu + str12weu &
-              + dxhy(i,j)*(-csigpswu + csigmswu) + dyhx(i,j)*csig12swu
-
-      !-----------------------------------------------------------------
-      ! for dF/dy (v momentum)
-      !-----------------------------------------------------------------
-         strp_tmpv  = p25*dxt(i,j)*(p333*ssigpev  + p166*ssigpwv)
-         strm_tmpv  = p25*dxt(i,j)*(p333*ssigmev  + p166*ssigmwv)
-
-         ! northeast (i,j)
-         Dstr(i,j,5) = -strp_tmpv + strm_tmpv - str12nsv &
-              - dyhx(i,j)*(csigpnev + csigmnev) + dxhy(i,j)*csig12nev
-
-         ! southeast (i,j+1)
-         Dstr(i,j,6) = strp_tmpv - strm_tmpv - str12snv &
-              - dyhx(i,j)*(csigpsev + csigmsev) + dxhy(i,j)*csig12sev
-
-         strp_tmpv  = p25*dxt(i,j)*(p333*ssigpwv  + p166*ssigpev)
-         strm_tmpv  = p25*dxt(i,j)*(p333*ssigmwv  + p166*ssigmev)
-
-         ! northwest (i+1,j)
-         Dstr(i,j,7) = -strp_tmpv + strm_tmpv + str12nsv &
-              - dyhx(i,j)*(csigpnwv + csigmnwv) + dxhy(i,j)*csig12nwv
-
-         ! southwest (i+1,j+1)
-         Dstr(i,j,8) = strp_tmpv - strm_tmpv + str12snv &
-              - dyhx(i,j)*(csigpswv + csigmswv) + dxhy(i,j)*csig12swv
-
-      enddo                     ! ij
-
-      end subroutine OLDformDiag_step1
-      
 !=======================================================================
 
       subroutine formDiag_step2 (nx_block,   ny_block, &

From b1c4a8ba93029148b33cdeaaf5fddcdafb58d4c1 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 18 Jul 2018 13:17:06 +0000
Subject: [PATCH 056/196] Ok I think I know what was wrong with the precond
 diag...I have to use icellu and not icellt

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 456 ++++++++++++++++++++--
 1 file changed, 413 insertions(+), 43 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 4cbce931d..d1c232b4b 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -199,7 +199,7 @@ subroutine imp_solver (dt)
       gamma=1e-6_dbl_kind   !nonlinear stopping criterion:
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
-      precond=2 ! 1: identity, 2: diagonal (fd), 3: complete diagonal 
+      precond=3 ! 1: identity, 2: diagonal (fd), 3: complete diagonal 
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -471,24 +471,16 @@ subroutine imp_solver (dt)
 
 !     prepare precond matrix
            if (precond .eq. 3) then
-           print *, "BUG in formDiag_step1 due to confusion between icellt and icellu"
-            call formDiag_step1 (nx_block           , ny_block,       &
-                                 icellt       (iblk), 1             , & ! for u comp
-                                 indxti     (:,iblk), indxtj(:,iblk), &
+
+           call formDiag_step1  (nx_block           , ny_block,       & ! D term due to rheology
+                                 icellu       (iblk),                 &
+                                 indxui     (:,iblk), indxuj(:,iblk), &
                                  dxt      (:,:,iblk), dyt (:,:,iblk), & 
                                  dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
                                  cxp      (:,:,iblk), cyp (:,:,iblk), & 
                                  cxm      (:,:,iblk), cym (:,:,iblk), & 
                                  zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
                                 
-            call formDiag_step1 (nx_block           , ny_block,       &
-                                 icellt       (iblk), 2             , & ! for v comp
-                                 indxti     (:,iblk), indxtj(:,iblk), &
-                                 dxt      (:,:,iblk), dyt (:,:,iblk), & 
-                                 dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
-                                 cxp      (:,:,iblk), cyp (:,:,iblk), & 
-                                 cxm      (:,:,iblk), cym (:,:,iblk), & 
-                                 zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )                             
           endif                       
                                 
            call formDiag_step2 (nx_block           , ny_block,           &
@@ -580,19 +572,19 @@ subroutine imp_solver (dt)
          !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks                                  
          
-            call stress_vp (nx_block,             ny_block,             & 
-                            kOL,                  icellt(iblk),         & 
-                            indxti      (:,iblk), indxtj      (:,iblk), & 
-                            uvel      (:,:,iblk), vvel      (:,:,iblk), &     
-                            dxt       (:,:,iblk), dyt       (:,:,iblk), & 
-                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
-                            cxp       (:,:,iblk), cyp       (:,:,iblk), & 
-                            cxm       (:,:,iblk), cym       (:,:,iblk), & 
-                            tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
-                            zetaD     (:,:,iblk,:),                     &
-                            shear     (:,:,iblk), divu      (:,:,iblk), & 
-                            rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
-                            strtmp    (:,:,:))                                                             
+            call stress_prime_vp (nx_block,             ny_block,             & 
+                                  kOL,                  icellt(iblk),         & 
+                                  indxti      (:,iblk), indxtj      (:,iblk), & 
+                                  uvel      (:,:,iblk), vvel      (:,:,iblk), &     
+                                  dxt       (:,:,iblk), dyt       (:,:,iblk), & 
+                                  dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
+                                  cxp       (:,:,iblk), cyp       (:,:,iblk), & 
+                                  cxm       (:,:,iblk), cym       (:,:,iblk), & 
+                                  tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
+                                  zetaD     (:,:,iblk,:),                     &
+                                  shear     (:,:,iblk), divu      (:,:,iblk), & 
+                                  rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
+                                  strtmp    (:,:,:))                                                             
                                
             call matvec (nx_block           , ny_block,           &
                          icellu       (iblk),                     & 
@@ -1010,19 +1002,19 @@ end subroutine calc_zeta_Pr
 
 ! Computes VP stress without the rep. pressure Pr (included in b vector)
 
-      subroutine stress_vp (nx_block,   ny_block,   & 
-                            kOL,        icellt,     & 
-                            indxti,     indxtj,     & 
-                            uvel,       vvel,       & 
-                            dxt,        dyt,        & 
-                            dxhy,       dyhx,       & 
-                            cxp,        cyp,        & 
-                            cxm,        cym,        & 
-                            tarear,     tinyarea,   & 
-                            zetaD,                  & 
-                            shear,      divu,       & 
-                            rdg_conv,   rdg_shear,  & 
-                            str )
+      subroutine stress_prime_vp (nx_block,   ny_block,   & 
+                                  kOL,        icellt,     & 
+                                  indxti,     indxtj,     & 
+                                  uvel,       vvel,       & 
+                                  dxt,        dyt,        & 
+                                  dxhy,       dyhx,       & 
+                                  cxp,        cyp,        & 
+                                  cxm,        cym,        & 
+                                  tarear,     tinyarea,   & 
+                                  zetaD,                  & 
+                                  shear,      divu,       & 
+                                  rdg_conv,   rdg_shear,  & 
+                                  str )
 
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
@@ -1282,14 +1274,14 @@ subroutine stress_vp (nx_block,   ny_block,   &
 
       enddo                     ! ij
 
-      end subroutine stress_vp      
+      end subroutine stress_prime_vp      
       
       
 !=======================================================================
 
 ! Computes the VP stress (as diagnostic)
 
-      subroutine Diagstress_vp (nx_block,   ny_block,   & 
+      subroutine stress_vp (nx_block,   ny_block,   & 
                             kOL,        icellt,     & 
                             indxti,     indxtj,     & 
                             uvel,       vvel,       & 
@@ -1567,7 +1559,7 @@ subroutine Diagstress_vp (nx_block,   ny_block,   &
 
       enddo                     ! ij
 
-      end subroutine Diagstress_vp
+      end subroutine stress_vp
       
 !=======================================================================
 
@@ -1963,6 +1955,373 @@ end subroutine residual_vec
 !=======================================================================
 
       subroutine formDiag_step1  (nx_block,   ny_block,   & 
+                                  icellu,                 & 
+                                  indxui,     indxuj,     & 
+                                  dxt,        dyt,        & 
+                                  dxhy,       dyhx,       & 
+                                  cxp,        cyp,        & 
+                                  cxm,        cym,        & 
+                                  zetaD,      Dstr )
+
+      integer (kind=int_kind), intent(in) :: & 
+         nx_block, ny_block, & ! block dimensions
+         icellu                ! no. of cells where icetmask = 1 JFL
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(in) :: &
+         indxui   , & ! compressed index in i-direction JFL
+         indxuj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm          ! 0.5*HTN - 1.5*HTN
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(in) :: &
+         zetaD          ! 2*zeta      
+         
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+         intent(inout) :: &
+         Dstr          ! stress combinations         
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij, iu, ju, di, dj, cc
+
+      real (kind=dbl_kind) :: &
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+        uij,ui1j,uij1,ui1j1,vij,vi1j,vij1,vi1j1   , & ! c0 or c1
+        stressp_1, stressp_2, stressp_3, stressp_4, &
+        stressm_1, stressm_2, stressm_3, stressm_4, &
+        stress12_1,stress12_2,stress12_3,stress12_4,&
+        ssigpn, ssigps, ssigpe, ssigpw            , &
+        ssigmn, ssigms, ssigme, ssigmw            , &
+        ssig12n, ssig12s, ssig12e, ssig12w        , &
+        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
+        csigpne, csigpnw, csigpse, csigpsw        , &
+        csigmne, csigmnw, csigmse, csigmsw        , &
+        csig12ne, csig12nw, csig12se, csig12sw    , &
+        str12ew, str12we, str12ns, str12sn        , &
+        strp_tmp, strm_tmp, tmp
+
+      !-----------------------------------------------------------------
+      ! Initialize
+      !-----------------------------------------------------------------
+
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+
+      Dstr(:,:,:) = c0
+       
+!      strintx = uarear(i,j)* &
+ !            (Dstr(i,j,1) + Dstr(i+1,j,2) + Dstr(i,j+1,3) + Dstr(i+1,j+1,4))
+ !         strinty = uarear(i,j)* &
+ !            (Dstr(i,j,5) + Dstr(i,j+1,6) + Dstr(i+1,j,7) + Dstr(i+1,j+1,8))
+             
+      do cc=1, 8 ! 4 for u and 4 for v
+      
+       if (cc .eq. 1) then     ! u comp, T cell i,j
+        uij   = c1
+        ui1j  = c0
+        uij1  = c0
+        ui1j1 = c0
+        vij   = c0
+        vi1j  = c0
+        vij1  = c0
+        vi1j1 = c0
+        di    = 0
+        dj    = 0
+       elseif (cc .eq. 2) then ! u comp, T cell i+1,j
+        uij   = c0
+        ui1j  = c1
+        uij1  = c0
+        ui1j1 = c0
+        vij   = c0
+        vi1j  = c0
+        vij1  = c0
+        vi1j1 = c0
+        di    = 1
+        dj    = 0
+       elseif (cc .eq. 3) then ! u comp, T cell i,j+1
+        uij   = c0
+        ui1j  = c0
+        uij1  = c1
+        ui1j1 = c0
+        vij   = c0
+        vi1j  = c0
+        vij1  = c0
+        vi1j1 = c0
+        di    = 0
+        dj    = 1
+       elseif (cc .eq. 4) then ! u comp, T cell i+1,j+1
+        uij   = c0
+        ui1j  = c0
+        uij1  = c0
+        ui1j1 = c1
+        vij   = c0
+        vi1j  = c0
+        vij1  = c0
+        vi1j1 = c0
+        di    = 1
+        dj    = 1
+       elseif (cc .eq. 5) then ! v comp, T cell i,j
+        uij   = c0
+        ui1j  = c0
+        uij1  = c0
+        ui1j1 = c0
+        vij   = c1
+        vi1j  = c0
+        vij1  = c0
+        vi1j1 = c0
+        di    = 0
+        dj    = 0
+       elseif (cc .eq. 6) then ! v comp, T cell i,j+1
+        uij   = c0
+        ui1j  = c0
+        uij1  = c0
+        ui1j1 = c0
+        vij   = c0
+        vi1j  = c0
+        vij1  = c1
+        vi1j1 = c0
+        di    = 0
+        dj    = 1
+       elseif (cc .eq. 7) then ! v comp, T cell i+1,j
+        uij   = c0
+        ui1j  = c0
+        uij1  = c0
+        ui1j1 = c0
+        vij   = c0
+        vi1j  = c1
+        vij1  = c0
+        vi1j1 = c0
+        di    = 1
+        dj    = 0
+       elseif (cc .eq. 8) then ! v comp, T cell i+1,j+1
+        uij   = c0
+        ui1j  = c0
+        uij1  = c0
+        ui1j1 = c0
+        vij   = c0
+        vi1j  = c0
+        vij1  = c0
+        vi1j1 = c1
+        di    = 1
+        dj    = 1
+       endif 
+       
+      do ij = 1, icellu
+      
+         iu = indxui(ij)
+         ju = indxuj(ij)
+         i=iu+di
+         j=ju+dj
+          
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divune    = cyp(i,j)*uij - dyt(i,j)*ui1j &
+                   + cxp(i,j)*vij - dxt(i,j)*vij1
+         divunw    = cym(i,j)*ui1j + dyt(i,j)*uij &
+                   + cxp(i,j)*vi1j - dxt(i,j)*vi1j1
+         divusw    = cym(i,j)*ui1j1 + dyt(i,j)*uij1 &
+                   + cxm(i,j)*vi1j1 + dxt(i,j)*vi1j
+         divuse    = cyp(i,j)*uij1 - dyt(i,j)*ui1j1 &
+                   + cxm(i,j)*vij1 + dxt(i,j)*vij
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionne = -cym(i,j)*uij - dyt(i,j)*ui1j &
+                   +  cxm(i,j)*vij + dxt(i,j)*vij1
+         tensionnw = -cyp(i,j)*ui1j + dyt(i,j)*uij &
+                   +  cxm(i,j)*vi1j + dxt(i,j)*vi1j1
+         tensionsw = -cyp(i,j)*ui1j1 + dyt(i,j)*uij1 &
+                   +  cxp(i,j)*vi1j1 - dxt(i,j)*vi1j
+         tensionse = -cym(i,j)*uij1  - dyt(i,j)*ui1j1 &
+                   +  cxp(i,j)*vij1 - dxt(i,j)*vij
+
+         ! shearing strain rate  =  e_12
+         shearne = -cym(i,j)*vij - dyt(i,j)*vi1j &
+                 -  cxm(i,j)*uij - dxt(i,j)*uij1
+         shearnw = -cyp(i,j)*vi1j + dyt(i,j)*vij &
+                 -  cxm(i,j)*ui1j - dxt(i,j)*ui1j1
+         shearsw = -cyp(i,j)*vi1j1 + dyt(i,j)*vij1 &
+                 -  cxp(i,j)*ui1j1 + dxt(i,j)*ui1j
+         shearse = -cym(i,j)*vij1 - dyt(i,j)*vi1j1 &
+                 -  cxp(i,j)*uij1 + dxt(i,j)*uij
+         
+      !-----------------------------------------------------------------
+      ! the stresses                            ! kg/s^2
+      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+      !-----------------------------------------------------------------
+         
+         stressp_1 = zetaD(i,j,1)*divune*(c1+Ktens)
+         stressp_2 = zetaD(i,j,2)*divunw*(c1+Ktens)
+         stressp_3 = zetaD(i,j,3)*divusw*(c1+Ktens)
+         stressp_4 = zetaD(i,j,4)*divuse*(c1+Ktens)
+         
+         stressm_1 = zetaD(i,j,1)*tensionne*(c1+Ktens)*ecci
+         stressm_2 = zetaD(i,j,2)*tensionnw*(c1+Ktens)*ecci
+         stressm_3 = zetaD(i,j,3)*tensionsw*(c1+Ktens)*ecci
+         stressm_4 = zetaD(i,j,4)*tensionse*(c1+Ktens)*ecci
+                          
+         stress12_1 = zetaD(i,j,1)*shearne*p5*(c1+Ktens)*ecci
+         stress12_2 = zetaD(i,j,2)*shearnw*p5*(c1+Ktens)*ecci
+         stress12_3 = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
+         stress12_4 = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
+
+      !-----------------------------------------------------------------
+      ! combinations of the stresses for the momentum equation ! kg/s^2
+      !-----------------------------------------------------------------
+
+         ssigpn  = stressp_1 + stressp_2
+         ssigps  = stressp_3 + stressp_4
+         ssigpe  = stressp_1 + stressp_4
+         ssigpw  = stressp_2 + stressp_3
+         ssigp1  =(stressp_1 + stressp_3)*p055
+         ssigp2  =(stressp_2 + stressp_4)*p055
+
+         ssigmn  = stressm_1 + stressm_2
+         ssigms  = stressm_3 + stressm_4
+         ssigme  = stressm_1 + stressm_4
+         ssigmw  = stressm_2 + stressm_3
+         ssigm1  =(stressm_1 + stressm_3)*p055
+         ssigm2  =(stressm_2 + stressm_4)*p055
+
+         ssig12n = stress12_1 + stress12_2
+         ssig12s = stress12_3 + stress12_4
+         ssig12e = stress12_1 + stress12_4
+         ssig12w = stress12_2 + stress12_3
+         ssig121 =(stress12_1 + stress12_3)*p111
+         ssig122 =(stress12_2 + stress12_4)*p111
+
+         csigpne = p111*stressp_1 + ssigp2 + p027*stressp_3
+         csigpnw = p111*stressp_2 + ssigp1 + p027*stressp_4
+         csigpsw = p111*stressp_3 + ssigp2 + p027*stressp_1
+         csigpse = p111*stressp_4 + ssigp1 + p027*stressp_2
+         
+         csigmne = p111*stressm_1 + ssigm2 + p027*stressm_3
+         csigmnw = p111*stressm_2 + ssigm1 + p027*stressm_4
+         csigmsw = p111*stressm_3 + ssigm2 + p027*stressm_1
+         csigmse = p111*stressm_4 + ssigm1 + p027*stressm_2
+         
+         csig12ne = p222*stress12_1 + ssig122 &
+                  + p055*stress12_3
+         csig12nw = p222*stress12_2 + ssig121 &
+                  + p055*stress12_4
+         csig12sw = p222*stress12_3 + ssig122 &
+                  + p055*stress12_1
+         csig12se = p222*stress12_4 + ssig121 &
+                  + p055*stress12_2
+
+         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
+         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
+         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
+         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
+
+      !-----------------------------------------------------------------
+      ! for dF/dx (u momentum)
+      !-----------------------------------------------------------------
+      
+         if (cc .eq. 1) then ! T cell i,j
+         
+          strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+          strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+
+         ! northeast (i,j)
+         Dstr(iu,ju,1) = -strp_tmp - strm_tmp - str12ew &
+              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
+              
+         elseif (cc .eq. 2) then ! T cell i+1,j     
+          
+          strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+          strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+          
+         ! northwest (i+1,j)
+         Dstr(iu,ju,2) = strp_tmp + strm_tmp - str12we &
+              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
+
+         elseif (cc .eq. 3) then ! T cell i,j+1        
+              
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
+
+         ! southeast (i,j+1)
+         Dstr(iu,ju,3) = -strp_tmp - strm_tmp + str12ew &
+              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
+
+         elseif (cc .eq. 4) then ! T cell i+1,j+1     
+              
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)     
+              
+         ! southwest (i+1,j+1)
+         Dstr(iu,ju,4) = strp_tmp + strm_tmp + str12we &
+              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
+
+      !-----------------------------------------------------------------
+      ! for dF/dy (v momentum)
+      !-----------------------------------------------------------------
+         
+         elseif (cc .eq. 5) then ! T cell i,j 
+         
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
+
+         ! northeast (i,j)
+         Dstr(iu,ju,5) = -strp_tmp + strm_tmp - str12ns &
+              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
+
+         elseif (cc .eq. 6) then ! T cell i,j+1      
+              
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)              
+              
+         ! southeast (i,j+1)
+         Dstr(iu,ju,6) = strp_tmp - strm_tmp - str12sn &
+              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
+
+         elseif (cc .eq. 7) then ! T cell i,j+1     
+              
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
+
+         ! northwest (i+1,j)
+         Dstr(iu,ju,7) = -strp_tmp + strm_tmp + str12ns &
+              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
+
+         elseif (cc .eq. 8) then ! T cell i+1,j+1        
+              
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)     
+              
+         ! southwest (i+1,j+1)
+         Dstr(iu,ju,8) = strp_tmp - strm_tmp + str12sn &
+              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
+              
+         endif     
+
+      enddo                     ! ij
+      
+      enddo                     ! cc
+
+      end subroutine formDiag_step1      
+      
+      !=======================================================================
+
+      subroutine OLDformDiag_step1  (nx_block,   ny_block,   & 
                                   icellt,     velcode,    & 
                                   indxti,     indxtj,     & 
                                   dxt,        dyt,        & 
@@ -2060,6 +2419,13 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
       endif
       
 
+!      strintx = uarear(i,j)* &
+ !            (Dstr(i,j,1) + Dstr(i+1,j,2) + Dstr(i,j+1,3) + Dstr(i+1,j+1,4))
+ !         strinty = uarear(i,j)* &
+ !            (Dstr(i,j,5) + Dstr(i,j+1,6) + Dstr(i+1,j,7) + Dstr(i+1,j+1,8))
+             
+             !attention!!!!!!!!!!!!!!!!
+      
       do ij = 1, icellt
          i = indxti(ij)
          j = indxtj(ij)
@@ -2172,6 +2538,10 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
       !-----------------------------------------------------------------
       ! for dF/dx (u momentum)
       !-----------------------------------------------------------------
+      
+      !JFL contribution des cell (i,j), (i+1,j), (i,j+1) et (i+1,j+1) autour du point u(i,j)
+      ! pour chaque cell (T) il y a les 4 corners qui sont utilises et tout est combine ici...
+      
          strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
          strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
 
@@ -2225,7 +2595,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
               
       enddo                     ! ij
 
-      end subroutine formDiag_step1      
+      end subroutine OLDformDiag_step1      
       
 !=======================================================================
 

From adb70e7793c2511fa27e9303856f82af5abb9865 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 18 Jul 2018 13:31:17 +0000
Subject: [PATCH 057/196] precond 3 works but not as good as precond 2 (free
 drift)...removed OLD code

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 279 +---------------------
 1 file changed, 1 insertion(+), 278 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index d1c232b4b..d217a8d44 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -199,7 +199,7 @@ subroutine imp_solver (dt)
       gamma=1e-6_dbl_kind   !nonlinear stopping criterion:
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
-      precond=3 ! 1: identity, 2: diagonal (fd), 3: complete diagonal 
+      precond=2 ! 1: identity, 2: diagonal (fd), 3: complete diagonal 
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -2319,283 +2319,6 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
 
       end subroutine formDiag_step1      
       
-      !=======================================================================
-
-      subroutine OLDformDiag_step1  (nx_block,   ny_block,   & 
-                                  icellt,     velcode,    & 
-                                  indxti,     indxtj,     & 
-                                  dxt,        dyt,        & 
-                                  dxhy,       dyhx,       & 
-                                  cxp,        cyp,        & 
-                                  cxm,        cym,        & 
-                                  zetaD,      Dstr )
-
-      integer (kind=int_kind), intent(in) :: & 
-         nx_block, ny_block, & ! block dimensions
-         icellt,             & ! no. of cells where icetmask = 1
-         velcode               ! 1: u comp, 2: v comp
-
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
-         intent(in) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj       ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         dxt      , & ! width of T-cell through the middle (m)
-         dyt      , & ! height of T-cell through the middle (m)
-         dxhy     , & ! 0.5*(HTE - HTE)
-         dyhx     , & ! 0.5*(HTN - HTN)
-         cyp      , & ! 1.5*HTE - 0.5*HTE
-         cxp      , & ! 1.5*HTN - 0.5*HTN
-         cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm          ! 0.5*HTN - 1.5*HTN
-         
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
-         intent(in) :: &
-         zetaD          ! 2*zeta      
-         
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
-         intent(inout) :: &
-         Dstr          ! stress combinations         
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         i, j, ij
-
-      real (kind=dbl_kind) :: &
-        divune, divunw, divuse, divusw            , & ! divergence
-        tensionne, tensionnw, tensionse, tensionsw, & ! tension
-        shearne, shearnw, shearse, shearsw        , & ! shearing
-        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
-        uij,ui1j,uij1,ui1j1,vij,vi1j,vij1,vi1j1   , & ! c0 or c1
-        stressp_1, stressp_2, stressp_3, stressp_4, &
-        stressm_1, stressm_2, stressm_3, stressm_4, &
-        stress12_1,stress12_2,stress12_3,stress12_4,&
-        ssigpn, ssigps, ssigpe, ssigpw            , &
-        ssigmn, ssigms, ssigme, ssigmw            , &
-        ssig12n, ssig12s, ssig12e, ssig12w        , &
-        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
-        csigpne, csigpnw, csigpse, csigpsw        , &
-        csigmne, csigmnw, csigmse, csigmsw        , &
-        csig12ne, csig12nw, csig12se, csig12sw    , &
-        str12ew, str12we, str12ns, str12sn        , &
-        strp_tmp, strm_tmp, tmp
-
-      !-----------------------------------------------------------------
-      ! Initialize
-      !-----------------------------------------------------------------
-
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
-
-      if (velcode .eq. 1) then
-       
-       uij   = c1
-       ui1j  = c0
-       uij1  = c0
-       ui1j1 = c0
-       
-       vij   = c0
-       vi1j  = c0
-       vij1  = c0
-       vi1j1 = c0
-       
-       Dstr(:,:,:) = c0
-       
-      elseif (velcode .eq. 2) then
-       
-       uij   = c0
-       ui1j  = c0
-       uij1  = c0
-       ui1j1 = c0
-       
-       vij   = c1
-       vi1j  = c0
-       vij1  = c0
-       vi1j1 = c0
-       
-      endif
-      
-
-!      strintx = uarear(i,j)* &
- !            (Dstr(i,j,1) + Dstr(i+1,j,2) + Dstr(i,j+1,3) + Dstr(i+1,j+1,4))
- !         strinty = uarear(i,j)* &
- !            (Dstr(i,j,5) + Dstr(i,j+1,6) + Dstr(i+1,j,7) + Dstr(i+1,j+1,8))
-             
-             !attention!!!!!!!!!!!!!!!!
-      
-      do ij = 1, icellt
-         i = indxti(ij)
-         j = indxtj(ij)
-
-      !-----------------------------------------------------------------
-      ! strain rates
-      ! NOTE these are actually strain rates * area  (m^2/s)
-      !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divune    = cyp(i,j)*uij - dyt(i,j)*ui1j &
-                   + cxp(i,j)*vij - dxt(i,j)*vij1
-         divunw    = cym(i,j)*ui1j + dyt(i,j)*uij &
-                   + cxp(i,j)*vi1j - dxt(i,j)*vi1j1
-         divusw    = cym(i,j)*ui1j1 + dyt(i,j)*uij1 &
-                   + cxm(i,j)*vi1j1 + dxt(i,j)*vi1j
-         divuse    = cyp(i,j)*uij1 - dyt(i,j)*ui1j1 &
-                   + cxm(i,j)*vij1 + dxt(i,j)*vij
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionne = -cym(i,j)*uij - dyt(i,j)*ui1j &
-                   +  cxm(i,j)*vij + dxt(i,j)*vij1
-         tensionnw = -cyp(i,j)*ui1j + dyt(i,j)*uij &
-                   +  cxm(i,j)*vi1j + dxt(i,j)*vi1j1
-         tensionsw = -cyp(i,j)*ui1j1 + dyt(i,j)*uij1 &
-                   +  cxp(i,j)*vi1j1 - dxt(i,j)*vi1j
-         tensionse = -cym(i,j)*uij1  - dyt(i,j)*ui1j1 &
-                   +  cxp(i,j)*vij1 - dxt(i,j)*vij
-
-         ! shearing strain rate  =  e_12
-         shearne = -cym(i,j)*vij - dyt(i,j)*vi1j &
-                 -  cxm(i,j)*uij - dxt(i,j)*uij1
-         shearnw = -cyp(i,j)*vi1j + dyt(i,j)*vij &
-                 -  cxm(i,j)*ui1j - dxt(i,j)*ui1j1
-         shearsw = -cyp(i,j)*vi1j1 + dyt(i,j)*vij1 &
-                 -  cxp(i,j)*ui1j1 + dxt(i,j)*ui1j
-         shearse = -cym(i,j)*vij1 - dyt(i,j)*vi1j1 &
-                 -  cxp(i,j)*uij1 + dxt(i,j)*uij
-         
-      !-----------------------------------------------------------------
-      ! the stresses                            ! kg/s^2
-      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
-      !-----------------------------------------------------------------
-         
-         stressp_1 = zetaD(i,j,1)*divune*(c1+Ktens)
-         stressp_2 = zetaD(i,j,2)*divunw*(c1+Ktens)
-         stressp_3 = zetaD(i,j,3)*divusw*(c1+Ktens)
-         stressp_4 = zetaD(i,j,4)*divuse*(c1+Ktens)
-         
-         stressm_1 = zetaD(i,j,1)*tensionne*(c1+Ktens)*ecci
-         stressm_2 = zetaD(i,j,2)*tensionnw*(c1+Ktens)*ecci
-         stressm_3 = zetaD(i,j,3)*tensionsw*(c1+Ktens)*ecci
-         stressm_4 = zetaD(i,j,4)*tensionse*(c1+Ktens)*ecci
-                          
-         stress12_1 = zetaD(i,j,1)*shearne*p5*(c1+Ktens)*ecci
-         stress12_2 = zetaD(i,j,2)*shearnw*p5*(c1+Ktens)*ecci
-         stress12_3 = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
-         stress12_4 = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
-
-      !-----------------------------------------------------------------
-      ! combinations of the stresses for the momentum equation ! kg/s^2
-      !-----------------------------------------------------------------
-
-         ssigpn  = stressp_1 + stressp_2
-         ssigps  = stressp_3 + stressp_4
-         ssigpe  = stressp_1 + stressp_4
-         ssigpw  = stressp_2 + stressp_3
-         ssigp1  =(stressp_1 + stressp_3)*p055
-         ssigp2  =(stressp_2 + stressp_4)*p055
-
-         ssigmn  = stressm_1 + stressm_2
-         ssigms  = stressm_3 + stressm_4
-         ssigme  = stressm_1 + stressm_4
-         ssigmw  = stressm_2 + stressm_3
-         ssigm1  =(stressm_1 + stressm_3)*p055
-         ssigm2  =(stressm_2 + stressm_4)*p055
-
-         ssig12n = stress12_1 + stress12_2
-         ssig12s = stress12_3 + stress12_4
-         ssig12e = stress12_1 + stress12_4
-         ssig12w = stress12_2 + stress12_3
-         ssig121 =(stress12_1 + stress12_3)*p111
-         ssig122 =(stress12_2 + stress12_4)*p111
-
-         csigpne = p111*stressp_1 + ssigp2 + p027*stressp_3
-         csigpnw = p111*stressp_2 + ssigp1 + p027*stressp_4
-         csigpsw = p111*stressp_3 + ssigp2 + p027*stressp_1
-         csigpse = p111*stressp_4 + ssigp1 + p027*stressp_2
-         
-         csigmne = p111*stressm_1 + ssigm2 + p027*stressm_3
-         csigmnw = p111*stressm_2 + ssigm1 + p027*stressm_4
-         csigmsw = p111*stressm_3 + ssigm2 + p027*stressm_1
-         csigmse = p111*stressm_4 + ssigm1 + p027*stressm_2
-         
-         csig12ne = p222*stress12_1 + ssig122 &
-                  + p055*stress12_3
-         csig12nw = p222*stress12_2 + ssig121 &
-                  + p055*stress12_4
-         csig12sw = p222*stress12_3 + ssig122 &
-                  + p055*stress12_1
-         csig12se = p222*stress12_4 + ssig121 &
-                  + p055*stress12_2
-
-         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
-         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
-         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
-         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
-
-          if (velcode .eq. 1) then
-         
-      !-----------------------------------------------------------------
-      ! for dF/dx (u momentum)
-      !-----------------------------------------------------------------
-      
-      !JFL contribution des cell (i,j), (i+1,j), (i,j+1) et (i+1,j+1) autour du point u(i,j)
-      ! pour chaque cell (T) il y a les 4 corners qui sont utilises et tout est combine ici...
-      
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
-
-         ! northeast (i,j)
-         Dstr(i,j,1) = -strp_tmp - strm_tmp - str12ew &
-              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
-
-         ! northwest (i+1,j)
-         Dstr(i,j,2) = strp_tmp + strm_tmp - str12we &
-              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
-
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
-
-         ! southeast (i,j+1)
-         Dstr(i,j,3) = -strp_tmp - strm_tmp + str12ew &
-              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
-
-         ! southwest (i+1,j+1)
-         Dstr(i,j,4) = strp_tmp + strm_tmp + str12we &
-              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
-
-        elseif (velcode .eq. 2) then      
-              
-      !-----------------------------------------------------------------
-      ! for dF/dy (v momentum)
-      !-----------------------------------------------------------------
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
-
-         ! northeast (i,j)
-         Dstr(i,j,5) = -strp_tmp + strm_tmp - str12ns &
-              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
-
-         ! southeast (i,j+1)
-         Dstr(i,j,6) = strp_tmp - strm_tmp - str12sn &
-              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
-
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
-
-         ! northwest (i+1,j)
-         Dstr(i,j,7) = -strp_tmp + strm_tmp + str12ns &
-              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
-
-         ! southwest (i+1,j+1)
-         Dstr(i,j,8) = strp_tmp - strm_tmp + str12sn &
-              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
-
-         endif     
-              
-      enddo                     ! ij
-
-      end subroutine OLDformDiag_step1      
       
 !=======================================================================
 

From b16cd8aa380fc37a1570e222586c49ca0917d962 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 18 Jul 2018 14:17:31 +0000
Subject: [PATCH 058/196] ok found the problem in step2...now fixed

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 36 +++++++++++++++++------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index d217a8d44..17e117301 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -199,7 +199,7 @@ subroutine imp_solver (dt)
       gamma=1e-6_dbl_kind   !nonlinear stopping criterion:
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
-      precond=2 ! 1: identity, 2: diagonal (fd), 3: complete diagonal 
+      precond=3 ! 1: identity, 2: diagonal (fd), 3: complete diagonal 
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -1988,7 +1988,8 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
          intent(inout) :: &
-         Dstr          ! stress combinations         
+         Dstr          ! intermediate calc for diagonal components of matrix A associated 
+                       ! with rheology term         
 
       ! local variables
 
@@ -2022,12 +2023,17 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
 !cdir nodep      !NEC
 !ocl novrec      !Fujitsu
 
-      Dstr(:,:,:) = c0
+      Dstr(:,:,:) = c0 ! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8 
+                       ! come from the surrounding T cells but are all refrerenced to the i,j (u point)  
        
-!      strintx = uarear(i,j)* &
- !            (Dstr(i,j,1) + Dstr(i+1,j,2) + Dstr(i,j+1,3) + Dstr(i+1,j+1,4))
- !         strinty = uarear(i,j)* &
- !            (Dstr(i,j,5) + Dstr(i,j+1,6) + Dstr(i+1,j,7) + Dstr(i+1,j+1,8))
+ !      Dstr(i,j,1) corresponds to str(i,j,1)
+ !      Dstr(i,j,2) corresponds to str(i+1,j,2)
+ !      Dstr(i,j,3) corresponds to str(i,j+1,3) 
+ !      Dstr(i,j,4) corresponds to str(i+1,j+1,4))
+ !      Dstr(i,j,5) corresponds to str(i,j,5) 
+ !      Dstr(i,j,6) corresponds to str(i,j+1,6)
+ !      Dstr(i,j,7) corresponds to str(i+1,j,7) 
+ !      Dstr(i,j,8) corresponds to str(i+1,j+1,8))
              
       do cc=1, 8 ! 4 for u and 4 for v
       
@@ -2375,6 +2381,18 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
       strintx=c0
       strinty=c0
          
+! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8 
+! come from the surrounding T cells but are all refrerenced to the i,j (u point)  
+       
+ !      Dstr(i,j,1) corresponds to str(i,j,1)
+ !      Dstr(i,j,2) corresponds to str(i+1,j,2)
+ !      Dstr(i,j,3) corresponds to str(i,j+1,3) 
+ !      Dstr(i,j,4) corresponds to str(i+1,j+1,4))
+ !      Dstr(i,j,5) corresponds to str(i,j,5) 
+ !      Dstr(i,j,6) corresponds to str(i,j+1,6)
+ !      Dstr(i,j,7) corresponds to str(i+1,j,7) 
+ !      Dstr(i,j,8) corresponds to str(i+1,j+1,8))
+         
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
@@ -2383,9 +2401,9 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
          
          if (precond .eq. 3) then
           strintx = uarear(i,j)* &
-             (Dstr(i,j,1) + Dstr(i+1,j,2) + Dstr(i,j+1,3) + Dstr(i+1,j+1,4))
+             (Dstr(i,j,1) + Dstr(i,j,2) + Dstr(i,j,3) + Dstr(i,j,4))
           strinty = uarear(i,j)* &
-             (Dstr(i,j,5) + Dstr(i,j+1,6) + Dstr(i+1,j,7) + Dstr(i+1,j+1,8))
+             (Dstr(i,j,5) + Dstr(i,j,6) + Dstr(i,j,7) + Dstr(i,j,8))
          endif    
 
          Diagu(i,j) = ccaimp - strintx

From 9a54b28b0a4832e8dd47d506708637d5e2e8f3a2 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 18 Jul 2018 14:54:12 +0000
Subject: [PATCH 059/196] added routine to calc deformations for mech
 redistribution...for the evp this is done for k=ndte in stress...here it is
 done once we have NL convergence

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 196 +++++++++++++++-------
 1 file changed, 140 insertions(+), 56 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 17e117301..c6893be55 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -582,8 +582,6 @@ subroutine imp_solver (dt)
                                   cxm       (:,:,iblk), cym       (:,:,iblk), & 
                                   tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
                                   zetaD     (:,:,iblk,:),                     &
-                                  shear     (:,:,iblk), divu      (:,:,iblk), & 
-                                  rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk), & 
                                   strtmp    (:,:,:))                                                             
                                
             call matvec (nx_block           , ny_block,           &
@@ -658,7 +656,7 @@ subroutine imp_solver (dt)
               vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
          enddo
          !$OMP END PARALLEL DO  
-                             
+                            
          !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
          do iblk = 1, nblocks                             
                             
@@ -688,9 +686,23 @@ subroutine imp_solver (dt)
          !$OMP END PARALLEL DO
          
       enddo                     ! outer loop
-
+      
       deallocate(bvec, sol, diagvec, wk11, wk22, vv, ww)
       deallocate(fld2)
+      
+      call deformations (nx_block,             ny_block,             & 
+                         icellt(iblk),                               & 
+                         indxti      (:,iblk), indxtj      (:,iblk), & 
+                         uvel      (:,:,iblk), vvel      (:,:,iblk), &     
+                         dxt       (:,:,iblk), dyt       (:,:,iblk), & 
+                         dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
+                         cxp       (:,:,iblk), cyp       (:,:,iblk), & 
+                         cxm       (:,:,iblk), cym       (:,:,iblk), & 
+                         tarear    (:,:,iblk),                       & 
+                         shear     (:,:,iblk), divu      (:,:,iblk), & 
+                         rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk))  
+
+
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
       ! Force symmetry across the tripole seam
@@ -1012,8 +1024,6 @@ subroutine stress_prime_vp (nx_block,   ny_block,   &
                                   cxm,        cym,        & 
                                   tarear,     tinyarea,   & 
                                   zetaD,                  & 
-                                  shear,      divu,       & 
-                                  rdg_conv,   rdg_shear,  & 
                                   str )
 
       integer (kind=int_kind), intent(in) :: & 
@@ -1044,13 +1054,6 @@ subroutine stress_prime_vp (nx_block,   ny_block,   &
          intent(in) :: &
          zetaD          ! 2*zeta   
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
-         intent(inout) :: &
-         shear    , & ! strain rate II component (1/s)
-         divu     , & ! strain rate I component, velocity divergence (1/s)
-         rdg_conv , & ! convergence term for ridging (1/s)
-         rdg_shear    ! shear term for ridging (1/s)
-
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
          intent(out) :: &
          str          ! stress combinations
@@ -1135,23 +1138,6 @@ subroutine stress_prime_vp (nx_block,   ny_block,   &
          Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
          Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
 
-      !-----------------------------------------------------------------
-      ! on last subcycle, save quantities for mechanical redistribution
-      !-----------------------------------------------------------------
-         if (kOL == 100) then ! jfl MODIF
-            divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
-            tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
-            rdg_conv(i,j)  = -min(divu(i,j),c0)
-            rdg_shear(i,j) = p5*(tmp-abs(divu(i,j))) 
-
-            ! diagnostic only
-            ! shear = sqrt(tension**2 + shearing**2)
-            shear(i,j) = p25*tarear(i,j)*sqrt( &
-                 (tensionne + tensionnw + tensionse + tensionsw)**2 &
-                +  (shearne +   shearnw +   shearse +   shearsw)**2)
-
-         endif
-
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2
       ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
@@ -1297,8 +1283,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
                             stressm_3,  stressm_4,  & 
                             stress12_1, stress12_2, & 
                             stress12_3, stress12_4, & 
-                            shear,      divu,       & 
-                            rdg_conv,   rdg_shear,  & 
                             str )
 
       integer (kind=int_kind), intent(in) :: & 
@@ -1335,13 +1319,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
          stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
          stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
-         intent(inout) :: &
-         shear    , & ! strain rate II component (1/s)
-         divu     , & ! strain rate I component, velocity divergence (1/s)
-         rdg_conv , & ! convergence term for ridging (1/s)
-         rdg_shear    ! shear term for ridging (1/s)
-
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
          intent(out) :: &
          str          ! stress combinations
@@ -1421,23 +1398,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
          Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
          Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
 
-      !-----------------------------------------------------------------
-      ! on last subcycle, save quantities for mechanical redistribution
-      !-----------------------------------------------------------------
-         if (kOL == 100) then ! jfl MODIF
-            divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
-            tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
-            rdg_conv(i,j)  = -min(divu(i,j),c0)
-            rdg_shear(i,j) = p5*(tmp-abs(divu(i,j))) 
-
-            ! diagnostic only
-            ! shear = sqrt(tension**2 + shearing**2)
-            shear(i,j) = p25*tarear(i,j)*sqrt( &
-                 (tensionne + tensionnw + tensionse + tensionsw)**2 &
-                +  (shearne +   shearnw +   shearse +   shearsw)**2)
-
-         endif
-
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2
       ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
@@ -1561,6 +1521,130 @@ subroutine stress_vp (nx_block,   ny_block,   &
 
       end subroutine stress_vp
       
+!=======================================================================
+
+! calc deformations for mechanical redistribution
+
+      subroutine deformations (nx_block,   ny_block,   & 
+                               icellt,                 & 
+                               indxti,     indxtj,     & 
+                               uvel,       vvel,       & 
+                               dxt,        dyt,        & 
+                               dxhy,       dyhx,       & 
+                               cxp,        cyp,        & 
+                               cxm,        cym,        & 
+                               tarear,                 & 
+                               shear,      divu,       & 
+                               rdg_conv,   rdg_shear )
+
+      integer (kind=int_kind), intent(in) :: & 
+         nx_block, ny_block, & ! block dimensions
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+         intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         uvel     , & ! x-component of velocity (m/s)
+         vvel     , & ! y-component of velocity (m/s)
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm      , & ! 0.5*HTN - 1.5*HTN
+         tarear       ! 1/tarea
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+         intent(inout) :: &
+         shear    , & ! strain rate II component (1/s)
+         divu     , & ! strain rate I component, velocity divergence (1/s)
+         rdg_conv , & ! convergence term for ridging (1/s)
+         rdg_shear    ! shear term for ridging (1/s)
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+        tmp
+        
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
+         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
+         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
+         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
+         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
+         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
+         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
+
+         ! shearing strain rate  =  e_12
+         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
+                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
+         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
+                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
+         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
+                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
+         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
+                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
+         
+         ! Delta (in the denominator of zeta, eta)
+         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
+         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
+         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
+         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+
+      !-----------------------------------------------------------------
+      ! deformations for mechanical redistribution
+      !-----------------------------------------------------------------
+
+         divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
+            tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
+            rdg_conv(i,j)  = -min(divu(i,j),c0)
+            rdg_shear(i,j) = p5*(tmp-abs(divu(i,j))) 
+
+            ! diagnostic only
+            ! shear = sqrt(tension**2 + shearing**2)
+         shear(i,j) = p25*tarear(i,j)*sqrt( &
+                   (tensionne + tensionnw + tensionse + tensionsw)**2 &
+                +  (shearne +   shearnw +   shearse +   shearsw)**2)
+
+      enddo                     ! ij
+
+      end subroutine deformations            
+      
 !=======================================================================
 
       subroutine calc_vrel_Cb (nx_block,   ny_block, &

From 14d46119509eeaac9f8d1ab6b200235778d3055b Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 18 Jul 2018 15:34:54 +0000
Subject: [PATCH 060/196] chnaged names of evp_prep1 to dyn_prep1...

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index c6893be55..5ae43f30d 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -39,7 +39,7 @@ module ice_dyn_vp
           field_type_scalar, field_type_vector
       use ice_constants, only: c0, c4, p027, p055, p111, p166, &
           p2, p222, p25, p333, p5, c1
-      use ice_dyn_shared, only: evp_prep1, evp_prep2, evp_finish, &
+      use ice_dyn_shared, only: dyn_prep1, dyn_prep2, dyn_finish, &
           yield_curve, ecci, cosw, sinw, fcor_blk, uvel_init,  &
           vvel_init, basal_stress_coeff, basalstress, Ktens
       use ice_fileunits, only: nu_diag
@@ -66,7 +66,7 @@ module ice_dyn_vp
 ! Wind stress is set during this routine from the values supplied
 ! via NEMO (unless calc_strair is true).  These values are supplied 
 ! rotated on u grid and multiplied by aice.  strairxT = 0 in this 
-! case so operations in evp_prep1 are pointless but carried out to 
+! case so operations in dyn_prep1 are pointless but carried out to 
 ! minimise code changes.
 #endif
 !
@@ -194,9 +194,9 @@ subroutine imp_solver (dt)
       
       im_fgmres = 50 
       maxits = 50    
-      kmax=2
-      gammaNL=1e-2_dbl_kind !linear stopping criterion: gamma*(res_ini)
-      gamma=1e-6_dbl_kind   !nonlinear stopping criterion:
+      kmax=1000
+      gammaNL=1e-6_dbl_kind !linear stopping criterion: gamma*(res_ini)
+      gamma=2e-1_dbl_kind   !nonlinear stopping criterion:
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
       precond=3 ! 1: identity, 2: diagonal (fd), 3: complete diagonal 
@@ -232,7 +232,7 @@ subroutine imp_solver (dt)
          enddo
 
       !-----------------------------------------------------------------
-      ! preparation for dynamics JFL change names of evp_prep1 and 2
+      ! preparation for dynamics 
       !-----------------------------------------------------------------
 
          this_block = get_block(blocks_ice(iblk),iblk)         
@@ -241,7 +241,7 @@ subroutine imp_solver (dt)
          jlo = this_block%jlo
          jhi = this_block%jhi
 
-         call evp_prep1 (nx_block,           ny_block,           & 
+         call dyn_prep1 (nx_block,           ny_block,           & 
                          ilo, ihi,           jlo, jhi,           &
                          aice    (:,:,iblk), vice    (:,:,iblk), & 
                          vsno    (:,:,iblk), tmask   (:,:,iblk), & 
@@ -295,7 +295,7 @@ subroutine imp_solver (dt)
          jlo = this_block%jlo
          jhi = this_block%jhi
 
-         call evp_prep2 (nx_block,             ny_block,             & 
+         call dyn_prep2 (nx_block,             ny_block,             & 
                          ilo, ihi,             jlo, jhi,             &
                          icellt(iblk),         icellu(iblk),         & 
                          indxti      (:,iblk), indxtj      (:,iblk), & 
@@ -378,7 +378,7 @@ subroutine imp_solver (dt)
       call ice_timer_start(timer_bound)
       call ice_HaloUpdate (strength,           halo_info, &
                            field_loc_center,   field_type_scalar)
-      ! velocities may have changed in evp_prep2
+      ! velocities may have changed in dyn_prep2
       call ice_HaloUpdate (fld2,               halo_info, &
                            field_loc_NEcorner, field_type_vector)
       call ice_timer_stop(timer_bound)
@@ -499,7 +499,7 @@ subroutine imp_solver (dt)
 !-----------------------------------------------------------------------                             
          
       icode  = 0
-      iout   = 2 !0: nothing printed, 1: 1st ite only, 2: all iterations
+      iout   = 1 !0: nothing printed, 1: 1st ite only, 2: all iterations
 !      its    = 0 
       ischmi = 0 
          
@@ -780,7 +780,7 @@ subroutine imp_solver (dt)
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
 
-         call evp_finish                               & 
+         call dyn_finish                               & 
               (nx_block,           ny_block,           & 
                icellu      (iblk), Cdn_ocn (:,:,iblk), & 
                indxui    (:,iblk), indxuj    (:,iblk), & 

From 5076124187a01ec73665af248d08ee2cd0a35dd8 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 19 Jul 2018 19:39:37 +0000
Subject: [PATCH 061/196] in process of adding gmres as precond to fgmres...

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 |  10 +-
 cicecore/cicedynB/dynamics/pgmres.F90     | 219 ++++++++++++++++++++++
 2 files changed, 226 insertions(+), 3 deletions(-)
 create mode 100644 cicecore/cicedynB/dynamics/pgmres.F90

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 5ae43f30d..573b927a1 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -199,7 +199,7 @@ subroutine imp_solver (dt)
       gamma=2e-1_dbl_kind   !nonlinear stopping criterion:
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
-      precond=3 ! 1: identity, 2: diagonal (fd), 3: complete diagonal 
+      precond=3 ! 1: identity, 2: diagonal (fd), 3: complete diagonal, 4: gmres+complete diag
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -470,7 +470,7 @@ subroutine imp_solver (dt)
                             stPrtmp  (:,:,:))
 
 !     prepare precond matrix
-           if (precond .eq. 3) then
+           if (precond .ge. 3) then
 
            call formDiag_step1  (nx_block           , ny_block,       & ! D term due to rheology
                                  icellu       (iblk),                 &
@@ -550,11 +550,15 @@ subroutine imp_solver (dt)
 
            wk22(:)=wk11(:) ! precond=identity
            
-         elseif (precond .gt. 1) then ! use diagonal of A for precond step
+         elseif (precond .eq. 2 .or. precond .eq. 3) then ! use diagonal of A for precond step
           
            call precond_diag (ntot,            & 
                               diagvec (:),     &
                               wk11 (:), wk22 (:) )
+                              
+         elseif (precond .eq. 4) then
+         
+         
          endif
          
          goto 1
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
new file mode 100644
index 000000000..a54c54d1e
--- /dev/null
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -0,0 +1,219 @@
+
+!**s/r pgmres -  preconditionner for GEM_H : PGmres
+!
+
+       subroutine pgmres(sol,rhs,n,im, eps, maxits, iout,ierr)
+!-----------------------------------------------------------------------
+
+        use grid_options
+        use prec
+       implicit none
+
+#include <arch_specific.hf>
+
+       integer n, im, maxits, iout, ierr
+       real*8 rhs(n), sol(n) ,eps
+!       Abdessamad Qaddouri -  2018
+!
+!revision
+! v5.0 - Qaddouri A.       - initial version
+
+       real*8 vv(n,im+1), gam,eps1
+       real*8  wk(n),r0
+!----------------------------------------------------------------------*
+       integer kmax,ii,i,j,n1,its,k1,i1,jj,k
+       parameter (kmax=50)
+
+       real*8 hh(kmax+1,kmax), c(kmax), s(kmax), rs(kmax+1),t
+       real*8 hhloc(kmax+1,kmax)
+!-------------------------------------------------------------
+! arnoldi size should not exceed kmax=50 in this version..
+! to reset modify paramter kmax accordingly.
+!-------------------------------------------------------------
+       real*8 epsmac ,ro,ddot,dnrm2
+       parameter (epsmac=1.d-16)
+       integer l
+       character(len= 9) communicate_S
+       communicate_S = "GRID"
+       if (Grd_yinyang_L) communicate_S = "MULTIGRID"
+
+
+
+       n1 = n + 1
+       its = 0
+       sol=0.0
+!-------------------------------------------------------------
+! outer loop starts here..
+!-------------- compute initial residual vector --------------
+       do 21 j=1,n
+         vv(j,1) = rhs(j) 
+ 21    continue
+       
+!-------------------------------------------------------------
+  20   continue
+       ro = ddot(n, vv,1,vv,1)
+       ro = dsqrt(ro)
+
+       if (iout .gt. 0 .and. its .eq. 0)&
+           write(iout, 199) its, ro ,eps1
+!      write(6,199) its, ro
+        r0=ro
+ 
+       if (ro .eq. 0.0d0) goto 999
+       t = 1.0d0/ ro
+       do 210 j=1, n
+          vv(j,1) = vv(j,1)*t
+ 210   continue
+       if (its .eq. 0) eps1=eps*ro
+!     ** initialize 1-st term  of rhs of hessenberg system..
+       rs(1) = ro
+       i = 0
+ 4     i=i+1
+       its = its + 1
+       i1 = i + 1
+
+       do l=1,n
+       rhs(l)= 0.0 
+       wk(l)=  vv(l,i)
+       enddo
+! precond
+        call pre_jacobi
+
+!  matrix-vector
+        call sol_matvec_H 
+                         
+
+!     classical gram - schmidt...
+!
+      do 55 j=1, i
+         hhloc(j,i) = ddot(n, vv(1,j), 1, vv(1,i1), 1)
+         hh(j,i) =  hhloc(j,i)
+ 55   continue
+
+      do 56 j=1, i
+         call daxpy(n, -hh(j,i), vv(1,j), 1, vv(1,i1), 1)
+ 56   continue
+      t = ddot(n, vv(1,i1), 1, vv(1,i1), 1)
+!
+       t=dsqrt(t)
+!
+
+       hh(i1,i) = t
+       if ( t .eq. 0.0d0) goto 58
+       t = 1.0d0/t
+       do 57  k=1,n
+          vv(k,i1) = vv(k,i1)*t
+ 57    continue
+!
+!     done with modified gram schimd and arnoldi step..
+!     now  update factorization of hh
+!
+ 58   if (i == 1) goto 121
+!
+!     perfrom previous transformations  on i-th column of h
+!
+      do 66 k=2,i
+         k1 = k-1
+         t = hh(k1,i)
+         hh(k1,i) = c(k1)*t + s(k1)*hh(k,i)
+         hh(k,i) = -s(k1)*t + c(k1)*hh(k,i)
+ 66   continue
+ 121  gam = sqrt(hh(i,i)**2 + hh(i1,i)**2)
+
+!     if gamma is zero then any small value will do...
+!     will affect only residual estimate
+!
+      if (gam == 0.0d0) gam = epsmac
+!-----------#determinenextplane rotation  #-------------------
+      c(i) = hh(i,i)/gam
+      s(i) = hh(i1,i)/gam
+      rs(i1) = -s(i)*rs(i)
+      rs(i) =  c(i)*rs(i)
+
+!
+!     detrermine residual norm and test for convergence-
+!
+       hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
+       ro = abs(rs(i1))
+       if (iout .gt. 0) &
+           write(iout, 199) its, ro , eps1
+       if (i .lt. im .and. (ro .gt. eps1))  goto 4
+!
+!     now compute solution. first solve upper triangular system.
+!
+       rs(i) = rs(i)/hh(i,i)
+       do 30 ii=2,i
+          k=i-ii+1
+          k1 = k+1
+          t=rs(k)
+          do 40 j=k1,i
+             t = t-hh(k,j)*rs(j)
+ 40       continue
+          rs(k) = t/hh(k,k)
+ 30    continue
+!
+!     form linear combination of
+!,i)'s to get solution
+!
+       t = rs(1)
+       do 15 k=1, n
+          rhs(k) = vv(k,1)*t
+ 15    continue
+       do 16 j=2, i
+          t = rs(j)
+          do 161 k=1, n
+             rhs(k) = rhs(k)+t*vv(k,j)
+ 161      continue
+ 16    continue
+!
+!     call preconditioner.
+!
+
+       do l=1,n
+       wk(l)=  rhs(l)
+       rhs(l)=0.0
+       enddo
+! precond
+       call pre_jacobi
+
+
+       do 17 k=1, n
+          sol(k) = sol(k) + rhs(k)
+ 17    continue
+!
+!     restart outer loop  when necessary
+!
+       if (ro .le. eps1) goto 990
+       if (its .ge. maxits) goto 991
+!
+!     else compute residual vector and continue..
+!
+       do 24 j=1,i
+          jj = i1-j+1
+          rs(jj-1) = -s(jj-1)*rs(jj)
+          rs(jj) = c(jj-1)*rs(jj)
+ 24    continue
+       do 25  j=1,i1
+          t = rs(j)
+          if (j .eq. 1)  t = t-1.0d0
+          call daxpy (n, t, vv(1,j), 1,  vv, 1)
+ 25    continue
+ 199   format('   its =', i4, ' res. norm =', d20.6, ' eps1 =', d20.6)
+!     restart outer loop.
+       goto 20
+ 990   ierr = 0
+!           write(iout, 198) its, ro/r0 
+ 198   format('   its =', i4, ' conv =', d20.6)
+       return
+ 991   ierr = 1
+!           write(iout, 198) its, ro/r0
+
+       return
+ 999   continue
+       ierr = -1
+!           write(iout, 198) its, ro/r0
+
+       return
+!-----------------end of pgmres ---------------------------------------
+!-----------------------------------------------------------------------
+       end

From 71214b999dd1a3deddb42d54d18d967cc7dff1fa Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Fri, 20 Jul 2018 19:47:07 +0000
Subject: [PATCH 062/196] matvec is now done in one routine instead of in two
 steps before

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 392 ++++++++++++++++++----
 1 file changed, 331 insertions(+), 61 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 573b927a1..89a769d63 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -70,7 +70,7 @@ module ice_dyn_vp
 ! minimise code changes.
 #endif
 !
-! author: JF Lemieux, F. Dupont and A. Qaddouri, ECCC
+! author: JF Lemieux, A. Qaddouri and F. Dupont ECCC
 
       subroutine imp_solver (dt)
 
@@ -163,8 +163,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind) :: conv, gamma, gammaNL, tolNL, krelax
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
-         strtmp,    & ! stress combinations for momentum equation !JFL CHECK PAS SUR QUE OK
-         stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 3? reuse?
+         stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
          Dstrtmp
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
@@ -421,7 +420,7 @@ subroutine imp_solver (dt)
       ! Calc zetaD, vrel, Cb and vrel = f(uprev_k, vprev_k)
       !-----------------------------------------------------------------
 
-      !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+      !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
 
 	    if (kOL .eq. 1) then
@@ -539,13 +538,6 @@ subroutine imp_solver (dt)
                    
       if (icode == 1) then
 
-!         if (sol2D_precond_S == 'JACOBI')   then
-!               call pre_jacobi2D ( wk22,wk11,Prec_xevec_8,niloc,njloc,&
-!                                   F_nk,Prec_ai_8,Prec_bi_8,Prec_ci_8 )
-!         else
-!            call dcopy (nloc, wk11, 1, wk22, 1) ! precond=identity
-!         endif
-
          if (precond .eq. 1) then
 
            wk22(:)=wk11(:) ! precond=identity
@@ -573,31 +565,25 @@ subroutine imp_solver (dt)
                              wk11 (:),                         &
                              uvel (:,:,:), vvel (:,:,:))    
 
-         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+         !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks                                  
          
-            call stress_prime_vp (nx_block,             ny_block,             & 
-                                  kOL,                  icellt(iblk),         & 
-                                  indxti      (:,iblk), indxtj      (:,iblk), & 
-                                  uvel      (:,:,iblk), vvel      (:,:,iblk), &     
-                                  dxt       (:,:,iblk), dyt       (:,:,iblk), & 
-                                  dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
-                                  cxp       (:,:,iblk), cyp       (:,:,iblk), & 
-                                  cxm       (:,:,iblk), cym       (:,:,iblk), & 
-                                  tarear    (:,:,iblk), tinyarea  (:,:,iblk), & 
-                                  zetaD     (:,:,iblk,:),                     &
-                                  strtmp    (:,:,:))                                                             
-                               
-            call matvec (nx_block           , ny_block,           &
-                         icellu       (iblk),                     & 
-                         indxui     (:,iblk), indxuj    (:,iblk), &
-                         kOL                ,                     &
-                         aiu      (:,:,iblk), strtmp  (:,:,:),    &
-                         vrel     (:,:,iblk),                     &
-                         umassdti (:,:,iblk), fm      (:,:,iblk), & 
-                         uarear   (:,:,iblk), Cb      (:,:,iblk), & 
-                         uvel     (:,:,iblk), vvel    (:,:,iblk), &
-                         Au       (:,:,iblk), Av      (:,:,iblk))
+          call matvec (nx_block             , ny_block,            &
+                       icellu   (iblk)      ,                      & 
+                       indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                       kOL                  , icellt   (iblk)    , & 
+                       indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                       dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+                       dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+                       cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+                       cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
+                       tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                       uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
+                       vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+                       zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                       umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+                       uarear   (:,:,iblk)  ,                      & 
+                       Au       (:,:,iblk)  , Av       (:,:,iblk))                         
                          
          enddo
          !$OMP END PARALLEL DO 
@@ -615,8 +601,6 @@ subroutine imp_solver (dt)
 
       endif
 
-! 199  format (3x,'Iterative FGMRES solver convergence criteria: ',1pe14.7,' at iteration', i3)
-
 !     deallocate (wk11,wk22,rhs1,sol1,vv_8,ww_8)         
          
 !            call calc_L2norm (nx_block           , ny_block,          &
@@ -632,18 +616,6 @@ subroutine imp_solver (dt)
 !                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
 !                               L2norm(iblk))
   
-!            call precondD  (nx_block,             ny_block,             & 
-!                            kOL                 , icellt(iblk),         & 
-!                            indxti      (:,iblk), indxtj      (:,iblk), & 
-!                            dxt       (:,:,iblk), dyt       (:,:,iblk), & 
-!                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
-!                            cxp       (:,:,iblk), cyp       (:,:,iblk), & 
-!                            cxm       (:,:,iblk), cym       (:,:,iblk), & 
-!                            uarear    (:,:,iblk),                       &
-!                            vrel      (:,:,iblk), Cb        (:,:,iblk), &
-!                            umassdti  (:,:,iblk), zetaD   (:,:,iblk,:), &
-!                            Diagu     (:,:,iblk), Diagv   (:,:,iblk))
-
 !-----------------------------------------------------------------------
 !     Put vector sol in uvel and vvel arrays
 !-----------------------------------------------------------------------
@@ -654,14 +626,14 @@ subroutine imp_solver (dt)
                              sol (:),                          &
                              uvel (:,:,:), vvel (:,:,:))    
 
-         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+         !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
               uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
               vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
          enddo
          !$OMP END PARALLEL DO  
                             
-         !$OMP PARALLEL DO PRIVATE(iblk,strtmp)
+         !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks                             
                             
             ! load velocity into array for boundary updates
@@ -1018,7 +990,7 @@ end subroutine calc_zeta_Pr
 
 ! Computes VP stress without the rep. pressure Pr (included in b vector)
 
-      subroutine stress_prime_vp (nx_block,   ny_block,   & 
+      subroutine stress_prime_vpOLD (nx_block,   ny_block,   & 
                                   kOL,        icellt,     & 
                                   indxti,     indxtj,     & 
                                   uvel,       vvel,       & 
@@ -1264,7 +1236,7 @@ subroutine stress_prime_vp (nx_block,   ny_block,   &
 
       enddo                     ! ij
 
-      end subroutine stress_prime_vp      
+      end subroutine stress_prime_vpOLD      
       
       
 !=======================================================================
@@ -1730,7 +1702,7 @@ end subroutine calc_vrel_Cb
       
 !=======================================================================
 
-      subroutine matvec (nx_block,   ny_block, &
+      subroutine matvecOLD (nx_block,   ny_block, &
                          icellu,               &
                          indxui,     indxuj,   &
                          kOL,                  &
@@ -1811,16 +1783,314 @@ subroutine matvec (nx_block,   ny_block, &
          Au(i,j) = ccaimp*utp - ccb*vtp - strintx
          Av(i,j) = ccaimp*vtp + ccb*utp - strinty
          
-      ! calculate basal stress component for outputs ! jfl move this
-!         if (ksub == ndte) then ! on last subcycling iteration
-!          if ( basalstress ) then
-!           taubx(i,j) = -uvel(i,j)*Tbu(i,j) / (sqrt(uold**2 + vold**2) + u0)
-!           tauby(i,j) = -vvel(i,j)*Tbu(i,j) / (sqrt(uold**2 + vold**2) + u0)
-!          endif
-!         endif
-
       enddo                     ! ij
 
+      end subroutine matvecOLD
+      
+!=======================================================================
+
+      subroutine matvec (nx_block,   ny_block, &
+                         icellu,               &
+                         indxui,     indxuj,   &
+                         kOL,        icellt,   &
+                         indxti,     indxtj,   &
+                         dxt,        dyt,      & 
+                         dxhy,       dyhx,     & 
+                         cxp,        cyp,      & 
+                         cxm,        cym,      & 
+                         tarear,     tinyarea, & 
+                         uvel,       vvel,     &
+                         vrel,       Cb,       &
+                         zetaD,      aiu,      & 
+                         umassdti,   fm,       &
+                         uarear,               &
+                         Au,         Av)
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellu,             & ! total count when iceumask is true
+         kOL,                & ! outer loop iteration
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj  , & ! compressed index in j-direction
+         indxti  , & ! compressed index in i-direction
+         indxtj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm      , & ! 0.5*HTN - 1.5*HTN
+         tarear   , & ! 1/tarea
+         tinyarea     ! puny*tarea   
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(in) :: &
+         uvel    , & ! x-component of velocity (m/s)
+         vvel    , & ! y-component of velocity (m/s)
+         vrel    , & ! coefficient for tauw
+         Cb      , & ! coefficient for basal stress
+         aiu     , & ! ice fraction on u-grid
+         umassdti, & ! mass of U-cell/dt (kg/m^2 s)
+         fm      , & ! Coriolis param. * mass in U-cell (kg/s)
+         uarear      ! 1/uarea
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(in) :: &
+         zetaD          ! 2*zeta   
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(out) :: &
+         Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
+         Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
+         str    
+         
+      real (kind=dbl_kind) :: &
+         utp, vtp          , & ! utp = uvel, vtp = vvel
+         ccaimp,ccb        , & ! intermediate variables
+         strintx, strinty
+
+      real (kind=dbl_kind) :: &
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
+        puny                                      , & ! puny
+        ssigpn, ssigps, ssigpe, ssigpw            , &
+        ssigmn, ssigms, ssigme, ssigmw            , &
+        ssig12n, ssig12s, ssig12e, ssig12w        , &
+        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
+        csigpne, csigpnw, csigpse, csigpsw        , &
+        csigmne, csigmnw, csigmse, csigmsw        , &
+        csig12ne, csig12nw, csig12se, csig12sw    , &
+        str12ew, str12we, str12ns, str12sn        , &
+        strp_tmp, strm_tmp, tmp
+        
+      real (kind=dbl_kind) :: &
+         stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22 (without Pr)
+         stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
+         stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12        
+
+      !-----------------------------------------------------------------
+      ! Initialize
+      !-----------------------------------------------------------------
+
+      str(:,:,:) = c0
+
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+         ! divergence  =  e_11 + e_22
+         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
+         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
+         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
+         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
+
+         ! tension strain rate  =  e_11 - e_22
+         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
+         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
+         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
+         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
+
+         ! shearing strain rate  =  e_12
+         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
+                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
+         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
+                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
+         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
+                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
+         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
+                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
+         
+         ! Delta (in the denominator of zeta, eta)
+         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
+         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
+         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
+         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+
+      !-----------------------------------------------------------------
+      ! the stresses                            ! kg/s^2
+      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+      ! JFL commented part of stressp is for the rep pressure Pr
+      !-----------------------------------------------------------------
+
+         stressp_1 = zetaD(i,j,1)*(divune*(c1+Ktens))! - Deltane*(c1-Ktens))
+         stressp_2 = zetaD(i,j,2)*(divunw*(c1+Ktens))! - Deltanw*(c1-Ktens))
+         stressp_3 = zetaD(i,j,3)*(divusw*(c1+Ktens))! - Deltasw*(c1-Ktens))
+         stressp_4 = zetaD(i,j,4)*(divuse*(c1+Ktens))! - Deltase*(c1-Ktens))
+         
+         stressm_1 = zetaD(i,j,1)*tensionne*(c1+Ktens)*ecci
+         stressm_2 = zetaD(i,j,2)*tensionnw*(c1+Ktens)*ecci
+         stressm_3 = zetaD(i,j,3)*tensionsw*(c1+Ktens)*ecci
+         stressm_4 = zetaD(i,j,4)*tensionse*(c1+Ktens)*ecci
+         
+         stress12_1 = zetaD(i,j,1)*shearne*p5*(c1+Ktens)*ecci
+         stress12_2 = zetaD(i,j,2)*shearnw*p5*(c1+Ktens)*ecci
+         stress12_3 = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
+         stress12_4 = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
+
+      !-----------------------------------------------------------------
+      ! combinations of the stresses for the momentum equation ! kg/s^2
+      !-----------------------------------------------------------------
+
+         ssigpn  = stressp_1 + stressp_2
+         ssigps  = stressp_3 + stressp_4
+         ssigpe  = stressp_1 + stressp_4
+         ssigpw  = stressp_2 + stressp_3
+         ssigp1  =(stressp_1 + stressp_3)*p055
+         ssigp2  =(stressp_2 + stressp_4)*p055
+
+         ssigmn  = stressm_1 + stressm_2
+         ssigms  = stressm_3 + stressm_4
+         ssigme  = stressm_1 + stressm_4
+         ssigmw  = stressm_2 + stressm_3
+         ssigm1  =(stressm_1 + stressm_3)*p055
+         ssigm2  =(stressm_2 + stressm_4)*p055
+
+         ssig12n = stress12_1 + stress12_2
+         ssig12s = stress12_3 + stress12_4
+         ssig12e = stress12_1 + stress12_4
+         ssig12w = stress12_2 + stress12_3
+         ssig121 =(stress12_1 + stress12_3)*p111
+         ssig122 =(stress12_2 + stress12_4)*p111
+
+         csigpne = p111*stressp_1 + ssigp2 + p027*stressp_3
+         csigpnw = p111*stressp_2 + ssigp1 + p027*stressp_4
+         csigpsw = p111*stressp_3 + ssigp2 + p027*stressp_1
+         csigpse = p111*stressp_4 + ssigp1 + p027*stressp_2
+         
+         csigmne = p111*stressm_1 + ssigm2 + p027*stressm_3
+         csigmnw = p111*stressm_2 + ssigm1 + p027*stressm_4
+         csigmsw = p111*stressm_3 + ssigm2 + p027*stressm_1
+         csigmse = p111*stressm_4 + ssigm1 + p027*stressm_2
+         
+         csig12ne = p222*stress12_1 + ssig122 &
+                  + p055*stress12_3
+         csig12nw = p222*stress12_2 + ssig121 &
+                  + p055*stress12_4
+         csig12sw = p222*stress12_3 + ssig122 &
+                  + p055*stress12_1
+         csig12se = p222*stress12_4 + ssig121 &
+                  + p055*stress12_2
+
+         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
+         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
+         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
+         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
+
+      !-----------------------------------------------------------------
+      ! for dF/dx (u momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+
+         ! northeast (i,j)
+         str(i,j,1) = -strp_tmp - strm_tmp - str12ew &
+              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
+
+         ! northwest (i+1,j)
+         str(i,j,2) = strp_tmp + strm_tmp - str12we &
+              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
+
+         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
+
+         ! southeast (i,j+1)
+         str(i,j,3) = -strp_tmp - strm_tmp + str12ew &
+              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
+
+         ! southwest (i+1,j+1)
+         str(i,j,4) = strp_tmp + strm_tmp + str12we &
+              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
+
+      !-----------------------------------------------------------------
+      ! for dF/dy (v momentum)
+      !-----------------------------------------------------------------
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
+
+         ! northeast (i,j)
+         str(i,j,5) = -strp_tmp + strm_tmp - str12ns &
+              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
+
+         ! southeast (i,j+1)
+         str(i,j,6) = strp_tmp - strm_tmp - str12sn &
+              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
+
+         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
+
+         ! northwest (i+1,j)
+         str(i,j,7) = -strp_tmp + strm_tmp + str12ns &
+              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
+
+         ! southwest (i+1,j+1)
+         str(i,j,8) = strp_tmp - strm_tmp + str12sn &
+              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
+  
+      enddo ! ij - icellt
+         
+      !-----------------------------------------------------------------
+      ! Form Au and Av
+      !-----------------------------------------------------------------
+
+      call icepack_warnings_flush(nu_diag)
+      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+         file=__FILE__, line=__LINE__)
+
+      do ij =1, icellu
+         i = indxui(ij)
+         j = indxuj(ij)
+
+         utp = uvel(i,j)
+         vtp = vvel(i,j)
+
+         ccaimp = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) ! kg/m^2 s
+               
+         ccb = fm(i,j) + sign(c1,fm(i,j)) * vrel(i,j) * sinw ! kg/m^2 s
+
+         ! divergence of the internal stress tensor
+         strintx = uarear(i,j)* &
+             (str(i,j,1) + str(i+1,j,2) + str(i,j+1,3) + str(i+1,j+1,4))
+         strinty = uarear(i,j)* &
+             (str(i,j,5) + str(i,j+1,6) + str(i+1,j,7) + str(i+1,j+1,8))
+
+         Au(i,j) = ccaimp*utp - ccb*vtp - strintx
+         Av(i,j) = ccaimp*vtp + ccb*utp - strinty
+         
+      enddo ! ij - icellu               
+
       end subroutine matvec
 
 !=======================================================================      

From da9a955c7474cb301944981af13f0fd616b6e3ad Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 23 Jul 2018 15:08:04 +0000
Subject: [PATCH 063/196] in the process of adding gmres precond

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 29 +++++++-
 cicecore/cicedynB/dynamics/pgmres.F90     | 80 +++++++++++++++++++----
 2 files changed, 95 insertions(+), 14 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 89a769d63..701fc092f 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -108,12 +108,15 @@ subroutine imp_solver (dt)
          icode          , & ! for fgmres
          iconvNL        , & ! code for NL convergence criterion
          iout           , & ! for printing fgmres info
+         ioutpgmres     , & ! for printing pgmres info
          its            , & ! iteration nb for fgmres
          ischmi         , & ! Quesse ca!?!?! jfl
          maxits         , & ! max nb of iteration for fgmres
          fgmres_its     , & ! final nb of fgmres_its
-         im_fgmres      , & ! for size of Krylov subspace
+         im_fgmres      , & ! for size of fgmres Krylov subspace
+         im_pgmres      , & ! for size of pgmres Krylov subspace
          precond        , & ! 1: identity, 2: diagonal (free drift), 3: complete diagonal
+         ierr           , & ! for pgmres precond
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
@@ -160,7 +163,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
-      real (kind=dbl_kind) :: conv, gamma, gammaNL, tolNL, krelax
+      real (kind=dbl_kind) :: conv, gamma, gammaNL, tolNL, krelax, epsprecond
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
@@ -192,10 +195,12 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       
       im_fgmres = 50 
+      im_pgmres = 50 
       maxits = 50    
       kmax=1000
       gammaNL=1e-6_dbl_kind !linear stopping criterion: gamma*(res_ini)
       gamma=2e-1_dbl_kind   !nonlinear stopping criterion:
+      epsprecond=1e-6_dbl_kind ! for pgmres
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
       precond=3 ! 1: identity, 2: diagonal (fd), 3: complete diagonal, 4: gmres+complete diag
@@ -499,6 +504,7 @@ subroutine imp_solver (dt)
          
       icode  = 0
       iout   = 1 !0: nothing printed, 1: 1st ite only, 2: all iterations
+      ioutpgmres = 1
 !      its    = 0 
       ischmi = 0 
          
@@ -550,7 +556,24 @@ subroutine imp_solver (dt)
                               
          elseif (precond .eq. 4) then
          
-         
+          call pgmres (nx_block             , ny_block,            &
+                       icellu   (iblk)      ,                      & 
+                       indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                       kOL                  , icellt   (iblk)    , & 
+                       indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                       dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+                       dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+                       cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+                       cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
+                       tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                       vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+                       zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                       umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+                       uarear   (:,:,iblk)  ,                      &
+                       wk22     (:)         , wk11(:)            , &
+                       ntot                 , im_pgmres          , &
+                       epsprecond           , maxits             , &
+                       ioutpgmres           , ierr )         
          endif
          
          goto 1
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
index a54c54d1e..7a1bde5b0 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -2,17 +2,75 @@
 !**s/r pgmres -  preconditionner for GEM_H : PGmres
 !
 
-       subroutine pgmres(sol,rhs,n,im, eps, maxits, iout,ierr)
+       subroutine pgmres(nx_block,   ny_block, &
+                         icellu,               &
+                         indxui,     indxuj,   &
+                         kOL,        icellt,   &
+                         indxti,     indxtj,   &
+                         dxt,        dyt,      & 
+                         dxhy,       dyhx,     & 
+                         cxp,        cyp,      & 
+                         cxm,        cym,      & 
+                         tarear,     tinyarea, & 
+                         vrel,       Cb,       &
+                         zetaD,      aiu,      & 
+                         umassdti,   fm,       &
+                         uarear,               &
+                         sol,        rhs,      &
+                         n,          im,       &
+                         eps,        maxits,   &
+                         iout,       ierr)
+                         
 !-----------------------------------------------------------------------
 
-        use grid_options
-        use prec
+!        use grid_options
+!        use prec
+       use ice_kinds_mod
+
        implicit none
 
-#include <arch_specific.hf>
+!#include <arch_specific.hf>
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellu,             & ! total count when iceumask is true
+         kOL,                & ! outer loop iteration
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj  , & ! compressed index in j-direction
+         indxti  , & ! compressed index in i-direction
+         indxtj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         dxhy     , & ! 0.5*(HTE - HTE)
+         dyhx     , & ! 0.5*(HTN - HTN)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm      , & ! 0.5*HTN - 1.5*HTN
+         tarear   , & ! 1/tarea
+         tinyarea     ! puny*tarea   
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(in) :: &
+         vrel    , & ! coefficient for tauw
+         Cb      , & ! coefficient for basal stress
+         aiu     , & ! ice fraction on u-grid
+         umassdti, & ! mass of U-cell/dt (kg/m^2 s)
+         fm      , & ! Coriolis param. * mass in U-cell (kg/s)
+         uarear      ! 1/uarea
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+         intent(in) :: &
+         zetaD          ! 2*zeta   
 
        integer n, im, maxits, iout, ierr
-       real*8 rhs(n), sol(n) ,eps
+       real*8 rhs(n), sol(n) ,eps ! wk11, wk22, eps
 !       Abdessamad Qaddouri -  2018
 !
 !revision
@@ -33,9 +91,9 @@ subroutine pgmres(sol,rhs,n,im, eps, maxits, iout,ierr)
        real*8 epsmac ,ro,ddot,dnrm2
        parameter (epsmac=1.d-16)
        integer l
-       character(len= 9) communicate_S
-       communicate_S = "GRID"
-       if (Grd_yinyang_L) communicate_S = "MULTIGRID"
+!       character(len= 9) communicate_S
+!       communicate_S = "GRID"
+!       if (Grd_yinyang_L) communicate_S = "MULTIGRID"
 
 
 
@@ -77,10 +135,10 @@ subroutine pgmres(sol,rhs,n,im, eps, maxits, iout,ierr)
        wk(l)=  vv(l,i)
        enddo
 ! precond
-        call pre_jacobi
+!        call pre_jacobi JFL
 
 !  matrix-vector
-        call sol_matvec_H 
+!        call sol_matvec_H JFL
                          
 
 !     classical gram - schmidt...
@@ -174,7 +232,7 @@ subroutine pgmres(sol,rhs,n,im, eps, maxits, iout,ierr)
        rhs(l)=0.0
        enddo
 ! precond
-       call pre_jacobi
+!       call pre_jacobi JFL
 
 
        do 17 k=1, n

From 4b8a7c7913f6f8a88b4a67b1f2124593a3ed285a Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 23 Jul 2018 17:59:15 +0000
Subject: [PATCH 064/196] finalizing implementation of gmres for precond

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 |  38 ++++----
 cicecore/cicedynB/dynamics/pgmres.F90     | 111 ++++++++++++++++------
 2 files changed, 103 insertions(+), 46 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 701fc092f..a60b4c56c 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -52,7 +52,7 @@ module ice_dyn_vp
 
       implicit none
       private
-      public :: imp_solver
+      public :: imp_solver, matvec, arrays_to_vec, vec_to_arrays
 
 !=======================================================================
 
@@ -556,24 +556,24 @@ subroutine imp_solver (dt)
                               
          elseif (precond .eq. 4) then
          
-          call pgmres (nx_block             , ny_block,            &
-                       icellu   (iblk)      ,                      & 
-                       indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                       kOL                  , icellt   (iblk)    , & 
-                       indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-                       dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-                       dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-                       cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                       cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
-                       tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
-                       vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                       zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
-                       umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-                       uarear   (:,:,iblk)  ,                      &
-                       wk22     (:)         , wk11(:)            , &
-                       ntot                 , im_pgmres          , &
-                       epsprecond           , maxits             , &
-                       ioutpgmres           , ierr )         
+          call pgmres (nx_block,    ny_block,    nblocks       , &
+                       max_blocks         , icellu   (:)       , & 
+                       indxui   (:,:)     , indxuj   (:,:)     , &
+                       kOL                , icellt   (:)       , & 
+                       indxti   (:,:)     , indxtj   (:,:)     , &
+                       dxt      (:,:,:)   , dyt      (:,:,:)   , & 
+                       dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
+                       cxp      (:,:,:)   , cyp      (:,:,:)   , & 
+                       cxm      (:,:,:)   , cym      (:,:,:)   , & 
+                       tarear   (:,:,:)   , tinyarea (:,:,:)   , &
+                       vrel     (:,:,:)   , Cb       (:,:,:)   , &  
+                       zetaD    (:,:,:,:) , aiu      (:,:,:)   , &
+                       umassdti (:,:,:)   , fm       (:,:,:)   , & 
+                       uarear   (:,:,:)   , diagvec(:)         , &
+                       wk22     (:)       , wk11(:)            , &
+                       ntot               , im_pgmres          , &
+                       epsprecond         , maxits             , &
+                       ioutpgmres         , ierr )         
          endif
          
          goto 1
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
index 7a1bde5b0..3b084e40d 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -2,23 +2,23 @@
 !**s/r pgmres -  preconditionner for GEM_H : PGmres
 !
 
-       subroutine pgmres(nx_block,   ny_block, &
-                         icellu,               &
-                         indxui,     indxuj,   &
-                         kOL,        icellt,   &
-                         indxti,     indxtj,   &
-                         dxt,        dyt,      & 
-                         dxhy,       dyhx,     & 
-                         cxp,        cyp,      & 
-                         cxm,        cym,      & 
-                         tarear,     tinyarea, & 
-                         vrel,       Cb,       &
-                         zetaD,      aiu,      & 
-                         umassdti,   fm,       &
-                         uarear,               &
-                         sol,        rhs,      &
-                         n,          im,       &
-                         eps,        maxits,   &
+       subroutine pgmres(nx_block, ny_block, nblocks, &
+                         max_blocks, icellu,          &
+                         indxui,     indxuj,          &
+                         kOL,        icellt,          &
+                         indxti,     indxtj,          &
+                         dxt,        dyt,             & 
+                         dxhy,       dyhx,            & 
+                         cxp,        cyp,             & 
+                         cxm,        cym,             & 
+                         tarear,     tinyarea,        & 
+                         vrel,       Cb,              &
+                         zetaD,      aiu,             & 
+                         umassdti,   fm,              &
+                         uarear,     diagvec,         &
+                         sol,        rhs,             &
+                         n,          im,              &
+                         eps,        maxits,          &
                          iout,       ierr)
                          
 !-----------------------------------------------------------------------
@@ -26,6 +26,7 @@ subroutine pgmres(nx_block,   ny_block, &
 !        use grid_options
 !        use prec
        use ice_kinds_mod
+       use ice_dyn_vp, only: matvec, arrays_to_vec, vec_to_arrays
 
        implicit none
 
@@ -33,18 +34,23 @@ subroutine pgmres(nx_block,   ny_block, &
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu,             & ! total count when iceumask is true
-         kOL,                & ! outer loop iteration
+         nblocks,            & ! nb of blocks
+         max_blocks,         & ! max nb of blocks
+         kOL                   ! outer loop iteration
+
+         
+      integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
+         icellu  , &
          icellt                ! no. of cells where icetmask = 1
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
          intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj  , & ! compressed index in j-direction
          indxti  , & ! compressed index in i-direction
          indxtj      ! compressed index in j-direction
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
          dxt      , & ! width of T-cell through the middle (m)
          dyt      , & ! height of T-cell through the middle (m)
          dxhy     , & ! 0.5*(HTE - HTE)
@@ -56,7 +62,7 @@ subroutine pgmres(nx_block,   ny_block, &
          tarear   , & ! 1/tarea
          tinyarea     ! puny*tarea   
          
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), &
          intent(in) :: &
          vrel    , & ! coefficient for tauw
          Cb      , & ! coefficient for basal stress
@@ -65,11 +71,20 @@ subroutine pgmres(nx_block,   ny_block, &
          fm      , & ! Coriolis param. * mass in U-cell (kg/s)
          uarear      ! 1/uarea
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), & 
          intent(in) :: &
          zetaD          ! 2*zeta   
+         
+      real (kind=dbl_kind), dimension (n), intent(in) :: &
+         diagvec
+      
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         utp     , & ! x-component of velocity (m/s)
+         vtp     , & ! y-component of velocity (m/s)         
+         Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
+         Av          ! matvec, Fy = Av - by (N/m^2)! jfl       
 
-       integer n, im, maxits, iout, ierr
+       integer n, im, maxits, iout, ierr, iblk
        real*8 rhs(n), sol(n) ,eps ! wk11, wk22, eps
 !       Abdessamad Qaddouri -  2018
 !
@@ -96,7 +111,6 @@ subroutine pgmres(nx_block,   ny_block, &
 !       if (Grd_yinyang_L) communicate_S = "MULTIGRID"
 
 
-
        n1 = n + 1
        its = 0
        sol=0.0
@@ -135,12 +149,51 @@ subroutine pgmres(nx_block,   ny_block, &
        wk(l)=  vv(l,i)
        enddo
 ! precond
-!        call pre_jacobi JFL
+!       call precond_diag (ntot,            & 
+!                          diagvec (:),     &
+!                          rhs (:), wk22 (:) )
+
+       rhs = wk !!! JFL       
 
 !  matrix-vector
 !        call sol_matvec_H JFL
+
+       call vec_to_arrays (nx_block, ny_block, nblocks,      &
+                           max_blocks, icellu (:), n,        & 
+                           indxui    (:,:), indxuj(:,:),     &
+                           rhs (:),                         &
+                           utp (:,:,:), vtp (:,:,:))    
                          
 
+       !$OMP PARALLEL DO PRIVATE(iblk)
+       do iblk = 1, nblocks                          
+        call matvec (nx_block             , ny_block,            &
+                     icellu   (iblk)      ,                      & 
+                     indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                     kOL                  , icellt   (iblk)    , & 
+                     indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                     dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+                     dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+                     cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+                     cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
+                     tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                     utp      (:,:,iblk)  , vtp      (:,:,iblk), &      
+                     vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+                     zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                     umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+                     uarear   (:,:,iblk)  ,                      & 
+                     Au       (:,:,iblk)  , Av       (:,:,iblk)) 
+                    
+       enddo
+       !$OMP END PARALLEL DO                     
+                    
+               ! form wk2 from Au and Av arrays        
+       call arrays_to_vec (nx_block, ny_block, nblocks,      &
+                           max_blocks, icellu (:), n,        & 
+                           indxui    (:,:), indxuj(:,:),     &
+                           Au      (:,:,:), Av    (:,:,:),   &
+                           vv(1,i1))                  
+                         
 !     classical gram - schmidt...
 !
       do 55 j=1, i
@@ -232,8 +285,12 @@ subroutine pgmres(nx_block,   ny_block, &
        rhs(l)=0.0
        enddo
 ! precond
-!       call pre_jacobi JFL
+! precond
+!       call precond_diag (ntot,            & 
+!                          diagvec (:),     &
+!                          rhs (:), wk22 (:) )
 
+       rhs = wk !!! JFL       
 
        do 17 k=1, n
           sol(k) = sol(k) + rhs(k)

From 76d30b7345601b7e2de95981c2655643a3476ecf Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 23 Jul 2018 18:29:07 +0000
Subject: [PATCH 065/196] pgmres seems to work...I used it as a solver and it
 works fine

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 |  2 +-
 cicecore/cicedynB/dynamics/pgmres.F90     | 20 +++++++++-----------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index a60b4c56c..fdac43851 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -52,7 +52,7 @@ module ice_dyn_vp
 
       implicit none
       private
-      public :: imp_solver, matvec, arrays_to_vec, vec_to_arrays
+      public :: imp_solver, matvec, arrays_to_vec, vec_to_arrays, precond_diag
 
 !=======================================================================
 
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
index 3b084e40d..7ee6bc3ee 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -26,7 +26,7 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
 !        use grid_options
 !        use prec
        use ice_kinds_mod
-       use ice_dyn_vp, only: matvec, arrays_to_vec, vec_to_arrays
+       use ice_dyn_vp, only: matvec, arrays_to_vec, vec_to_arrays, precond_diag
 
        implicit none
 
@@ -149,11 +149,11 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
        wk(l)=  vv(l,i)
        enddo
 ! precond
-!       call precond_diag (ntot,            & 
-!                          diagvec (:),     &
-!                          rhs (:), wk22 (:) )
+       call precond_diag (n,            & 
+                          diagvec (:),     &
+                          wk (:), rhs (:) )
 
-       rhs = wk !!! JFL       
+!       rhs = wk !!! JFL       
 
 !  matrix-vector
 !        call sol_matvec_H JFL
@@ -285,12 +285,10 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
        rhs(l)=0.0
        enddo
 ! precond
-! precond
-!       call precond_diag (ntot,            & 
-!                          diagvec (:),     &
-!                          rhs (:), wk22 (:) )
-
-       rhs = wk !!! JFL       
+       call precond_diag (n,            & 
+                          diagvec (:),     &
+                          wk (:), rhs (:) )
+!       rhs = wk !!! JFL       
 
        do 17 k=1, n
           sol(k) = sol(k) + rhs(k)

From f536a9b4ebff7e0d13dffe3743526df67a1570c1 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Mon, 23 Jul 2018 18:52:03 +0000
Subject: [PATCH 066/196] minor modif: introduction of maxits_pgmres

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index fdac43851..51bb05838 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -111,7 +111,8 @@ subroutine imp_solver (dt)
          ioutpgmres     , & ! for printing pgmres info
          its            , & ! iteration nb for fgmres
          ischmi         , & ! Quesse ca!?!?! jfl
-         maxits         , & ! max nb of iteration for fgmres
+         maxits_fgmres  , & ! max nb of iteration for fgmres
+         maxits_pgmres  , & ! max nb of iteration for fgmres
          fgmres_its     , & ! final nb of fgmres_its
          im_fgmres      , & ! for size of fgmres Krylov subspace
          im_pgmres      , & ! for size of pgmres Krylov subspace
@@ -196,7 +197,8 @@ subroutine imp_solver (dt)
       
       im_fgmres = 50 
       im_pgmres = 50 
-      maxits = 50    
+      maxits_fgmres = 50 
+      maxits_pgmres = 4
       kmax=1000
       gammaNL=1e-6_dbl_kind !linear stopping criterion: gamma*(res_ini)
       gamma=2e-1_dbl_kind   !nonlinear stopping criterion:
@@ -538,7 +540,8 @@ subroutine imp_solver (dt)
       !                     sol_eps, maxits,its,conv,icode )
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-                   gamma, gammaNL, tolNL, maxits,iout,icode,iconvNL,fgmres_its,kOL)                     
+                   gamma, gammaNL, tolNL, maxits_fgmres,iout,   &
+                   icode,iconvNL,fgmres_its,kOL)                     
 
       if (iconvNL .eq. 1) exit             
                    
@@ -572,7 +575,7 @@ subroutine imp_solver (dt)
                        uarear   (:,:,:)   , diagvec(:)         , &
                        wk22     (:)       , wk11(:)            , &
                        ntot               , im_pgmres          , &
-                       epsprecond         , maxits             , &
+                       epsprecond         , maxits_pgmres      , &
                        ioutpgmres         , ierr )         
          endif
          

From 441f8498f9f86c735acfd43bff7c6569b0de6a79 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 25 Jul 2018 14:23:10 +0000
Subject: [PATCH 067/196] modif to comments...very minor

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 10 +++++-----
 cicecore/cicedynB/dynamics/pgmres.F90     |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 51bb05838..ddd426e30 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -196,12 +196,12 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       
       im_fgmres = 50 
-      im_pgmres = 50 
+      im_pgmres = 5 
       maxits_fgmres = 50 
-      maxits_pgmres = 4
-      kmax=1000
-      gammaNL=1e-6_dbl_kind !linear stopping criterion: gamma*(res_ini)
-      gamma=2e-1_dbl_kind   !nonlinear stopping criterion:
+      maxits_pgmres = 5
+      kmax=1
+      gamma=2e-1_dbl_kind   ! linear stopping criterion: gamma(res(k)
+      gammaNL=1e-6_dbl_kind ! nonlinear stopping criterion: gammaNL*res(k=0)
       epsprecond=1e-6_dbl_kind ! for pgmres
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
index 7ee6bc3ee..40d6acad9 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -113,7 +113,7 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
 
        n1 = n + 1
        its = 0
-       sol=0.0
+       sol=0.0 !JFL ...veut-on vraiment mettre sol = 0 ici??????
 !-------------------------------------------------------------
 ! outer loop starts here..
 !-------------- compute initial residual vector --------------

From 21364ec2862c176004857ed5c3d93ef4e5eef6e1 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Thu, 26 Jul 2018 14:50:30 +0000
Subject: [PATCH 068/196] something was inconsistent for precond choice...now
 fixed and removed one precond option (free drift)

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 29 ++++++++++-------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ddd426e30..262be856e 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -116,7 +116,7 @@ subroutine imp_solver (dt)
          fgmres_its     , & ! final nb of fgmres_its
          im_fgmres      , & ! for size of fgmres Krylov subspace
          im_pgmres      , & ! for size of pgmres Krylov subspace
-         precond        , & ! 1: identity, 2: diagonal (free drift), 3: complete diagonal
+         precond        , & ! 1: identity, 2: diagonal 3: pgmres
          ierr           , & ! for pgmres precond
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
@@ -205,7 +205,7 @@ subroutine imp_solver (dt)
       epsprecond=1e-6_dbl_kind ! for pgmres
       iconvNL=0 ! equals 1 when NL convergence is reached
       krelax=c1
-      precond=3 ! 1: identity, 2: diagonal (fd), 3: complete diagonal, 4: gmres+complete diag
+      precond=2 ! 1: identity, 2: diagonal 3: gmres + diag
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -476,7 +476,7 @@ subroutine imp_solver (dt)
                             stPrtmp  (:,:,:))
 
 !     prepare precond matrix
-           if (precond .ge. 3) then
+           if (precond .gt. 1) then
 
            call formDiag_step1  (nx_block           , ny_block,       & ! D term due to rheology
                                  icellu       (iblk),                 &
@@ -487,16 +487,16 @@ subroutine imp_solver (dt)
                                  cxm      (:,:,iblk), cym (:,:,iblk), & 
                                  zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
                                 
-          endif                       
-                                
            call formDiag_step2 (nx_block           , ny_block,           &
-                                icellu       (iblk), precond,            & 
+                                icellu       (iblk),                     & 
                                 indxui     (:,iblk), indxuj    (:,iblk), &
                                 Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
                                 umassdti (:,:,iblk),                     & 
                                 uarear   (:,:,iblk), Cb      (:,:,iblk), & 
                                 Diagu    (:,:,iblk), Diagv   (:,:,iblk))         
-                                
+                     
+          endif                     
+                     
          enddo
          !$OMP END PARALLEL DO                            
 
@@ -551,13 +551,13 @@ subroutine imp_solver (dt)
 
            wk22(:)=wk11(:) ! precond=identity
            
-         elseif (precond .eq. 2 .or. precond .eq. 3) then ! use diagonal of A for precond step
+         elseif (precond .eq. 2) then ! use diagonal of A for precond step
           
            call precond_diag (ntot,            & 
                               diagvec (:),     &
                               wk11 (:), wk22 (:) )
                               
-         elseif (precond .eq. 4) then
+         elseif (precond .eq. 3) then
          
           call pgmres (nx_block,    ny_block,    nblocks       , &
                        max_blocks         , icellu   (:)       , & 
@@ -2713,7 +2713,7 @@ end subroutine formDiag_step1
 !=======================================================================
 
       subroutine formDiag_step2 (nx_block,   ny_block, &
-                                 icellu,     precond,  &
+                                 icellu,               &
                                  indxui,     indxuj,   &
                                  Dstr,       vrel,     &
                                  umassdti,             &
@@ -2722,8 +2722,7 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu,             & ! total count when iceumask is true
-         precond               ! precond type
+         icellu                ! total count when iceumask is true
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -2783,12 +2782,10 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
 
          ccaimp = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) ! kg/m^2 s
          
-         if (precond .eq. 3) then
-          strintx = uarear(i,j)* &
+         strintx = uarear(i,j)* &
              (Dstr(i,j,1) + Dstr(i,j,2) + Dstr(i,j,3) + Dstr(i,j,4))
-          strinty = uarear(i,j)* &
+         strinty = uarear(i,j)* &
              (Dstr(i,j,5) + Dstr(i,j,6) + Dstr(i,j,7) + Dstr(i,j,8))
-         endif    
 
          Diagu(i,j) = ccaimp - strintx
          Diagv(i,j) = ccaimp - strinty

From f57e346fd7a6c8a051eef7c2f1f4d8eaa5114a24 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Fri, 31 Aug 2018 13:31:19 +0000
Subject: [PATCH 069/196] removed krelax...not needed as a more complex method
 will be added

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 262be856e..aad360f1f 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -164,7 +164,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
-      real (kind=dbl_kind) :: conv, gamma, gammaNL, tolNL, krelax, epsprecond
+      real (kind=dbl_kind) :: conv, gamma, gammaNL, tolNL, epsprecond
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
@@ -199,13 +199,12 @@ subroutine imp_solver (dt)
       im_pgmres = 5 
       maxits_fgmres = 50 
       maxits_pgmres = 5
-      kmax=1
-      gamma=2e-1_dbl_kind   ! linear stopping criterion: gamma(res(k)
+      kmax=1000
+      gamma=1e-2_dbl_kind   ! linear stopping criterion: gamma(res(k)
       gammaNL=1e-6_dbl_kind ! nonlinear stopping criterion: gammaNL*res(k=0)
       epsprecond=1e-6_dbl_kind ! for pgmres
       iconvNL=0 ! equals 1 when NL convergence is reached
-      krelax=c1
-      precond=2 ! 1: identity, 2: diagonal 3: gmres + diag
+      precond=3 ! 1: identity, 2: diagonal 3: gmres + diag
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -653,10 +652,10 @@ subroutine imp_solver (dt)
                              uvel (:,:,:), vvel (:,:,:))    
 
          !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
-              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
-         enddo
+!         do iblk = 1, nblocks
+!              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
+!              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
+!         enddo
          !$OMP END PARALLEL DO  
                             
          !$OMP PARALLEL DO PRIVATE(iblk)

From c547d30bf9d25236ff69732ba3c22c62900a8820 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Fri, 31 Aug 2018 14:37:44 +0000
Subject: [PATCH 070/196] in the process of adding the RRE1 acceleration method

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 34 +++++++++++++++++------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index aad360f1f..1e136ac1f 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -104,6 +104,8 @@ subroutine imp_solver (dt)
       integer (kind=int_kind) :: & 
          kOL            , & ! outer loop iteration
          kmax           , & ! jfl put in namelist
+         krre           , & ! RRE1 cycling iteration
+         kmaxrre        , & ! nb of RRE1 iterations (hard coded 3)
          ntot           , & ! size of problem for fgmres (for given cpu)
          icode          , & ! for fgmres
          iconvNL        , & ! code for NL convergence criterion
@@ -161,7 +163,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
       real (kind=dbl_kind), allocatable :: bvec(:), sol(:), diagvec(:), wk11(:), wk22(:)
-      real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
+      real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:), uRRE(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
       real (kind=dbl_kind) :: conv, gamma, gammaNL, tolNL, epsprecond
@@ -182,6 +184,8 @@ subroutine imp_solver (dt)
 
       type (block) :: &
          this_block           ! block information for current block
+         
+      logical :: RRE1 ! acceleration method for Picard (see C. Roland PhD thesis)
       
       call ice_timer_start(timer_dynamics) ! dynamics
 
@@ -201,10 +205,11 @@ subroutine imp_solver (dt)
       maxits_pgmres = 5
       kmax=1000
       gamma=1e-2_dbl_kind   ! linear stopping criterion: gamma(res(k)
-      gammaNL=1e-6_dbl_kind ! nonlinear stopping criterion: gammaNL*res(k=0)
+      gammaNL=1e-8_dbl_kind ! nonlinear stopping criterion: gammaNL*res(k=0)
       epsprecond=1e-6_dbl_kind ! for pgmres
       iconvNL=0 ! equals 1 when NL convergence is reached
       precond=3 ! 1: identity, 2: diagonal 3: gmres + diag
+      RRE1=.false.
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -374,6 +379,13 @@ subroutine imp_solver (dt)
       allocate(bvec(ntot), sol(ntot), diagvec(ntot), wk11(ntot), wk22(ntot))
       allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
       
+      if (RRE1) then
+       kmaxrre=3
+       allocate(uRRE(ntot,kmaxrre))
+      else
+       kmaxrre=1
+      endif
+      
       !-----------------------------------------------------------------
       
       call icepack_warnings_flush(nu_diag)
@@ -429,13 +441,13 @@ subroutine imp_solver (dt)
       !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
 
-	    if (kOL .eq. 1) then
+!	    if (kOL .eq. 1) then
 	      ulin(:,:,iblk) = uvel(:,:,iblk)
 	      vlin(:,:,iblk) = vvel(:,:,iblk)
-	    else
-	      ulin(:,:,iblk) = p5*uprev_k(:,:,iblk) + p5*uvel(:,:,iblk)
-	      vlin(:,:,iblk) = p5*vprev_k(:,:,iblk) + p5*vvel(:,:,iblk)
-	    endif
+!	    else
+!	      ulin(:,:,iblk) = p5*uprev_k(:,:,iblk) + p5*uvel(:,:,iblk)
+!	      vlin(:,:,iblk) = p5*vprev_k(:,:,iblk) + p5*vvel(:,:,iblk)
+!	    endif
          
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
@@ -458,7 +470,7 @@ subroutine imp_solver (dt)
                                kOL                ,                     &
                                aiu      (:,:,iblk), Tbu     (:,:,iblk), &
                                uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                               ulin     (:,:,iblk), vlin    (:,:,iblk), & 
+                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
                                vrel     (:,:,iblk), Cb      (:,:,iblk))
 
 !     prepare b vector (RHS)                                                
@@ -469,7 +481,7 @@ subroutine imp_solver (dt)
                             aiu      (:,:,iblk), uarear  (:,:,iblk), & 
                             uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
-                            ulin     (:,:,iblk), vlin    (:,:,iblk), & 
+                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk), &
                             stPrtmp  (:,:,:))
@@ -691,6 +703,10 @@ subroutine imp_solver (dt)
       deallocate(bvec, sol, diagvec, wk11, wk22, vv, ww)
       deallocate(fld2)
       
+      if (RRE1) then
+       deallocate(uRRE)
+      endif
+      
       call deformations (nx_block,             ny_block,             & 
                          icellt(iblk),                               & 
                          indxti      (:,iblk), indxtj      (:,iblk), & 

From 67ae918ecae801061f3ecda62bc259303301e2c8 Mon Sep 17 00:00:00 2001
From: JFLemieux73 <jean-francois.lemieux@canada.ca>
Date: Wed, 5 Sep 2018 15:39:59 +0000
Subject: [PATCH 071/196] fixed problem in halo update (spherical grid...wraps
 on itself) before matvec

---
 cicecore/cicedynB/dynamics/fgmresD.F90    | 14 ++++++-------
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 25 ++++++++++++++++++++---
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
index 6dcf7d809..41e314731 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -1,5 +1,5 @@
       subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
-                  gamma,gammaNL,tolNL,maxits,iout,icode,iconv,its,kOL) 
+                  gamma,gammaNL,tolNL,maxits,iout,icode,iconv,its,kOL, krre) 
 
 !-----------------------------------------------------------------------
 ! jfl Dec 1st 2006. We modified the routine so that it is double precison.
@@ -20,7 +20,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
 !-----------------------------------------------------------------------
 
       implicit double precision (a-h,o-z) !jfl modification
-      integer n, im, maxits, iout, icode, iconv, kOL
+      integer n, im, maxits, iout, icode, iconv, kOL, krre
       double precision rhs(*), sol(*), vv(n,im+1),w(n,im)
       double precision wk1(n), wk2(n), gamma, gammaNL
 !-----------------------------------------------------------------------
@@ -150,15 +150,15 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
       if (its .eq. 0) then 
        r0 = ro
        eps1=gamma*ro
-       if (kOL .eq. 1) tolNL=gammaNL*ro
+       if (kOL .eq. 1 .and. krre .eq. 1) tolNL=gammaNL*ro
       endif 
       
-      if (ro .lt. tolNL) then
+      if (ro .lt. tolNL .and. krre .eq. 1) then
        iconv = 1
        goto 999
       endif 
       
-      if (iout .gt. 0) write(*, 199) kOL, its, ro!&
+      if (iout .gt. 0) write(*, 199) kOL, krre, its, ro!&
 !     
 !     initialize 1-st term  of rhs of hessenberg system..
 !     
@@ -237,7 +237,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
       hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
       ro = abs(rs(i1))
       if (iout .gt. 1) &
-           write(*, 199) kOL, its, ro
+           write(*, 199) kOL, krre, its, ro
       if (i .lt. im .and. (ro .gt. eps1))  goto 4
 !     
 !     now compute solution. first solve upper triangular system.
@@ -285,7 +285,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
      goto 20
  999  icode = 0
 
- 199  format('Picard its=', i4, ' fmgres its =', i4, ' res. norm =', d26.16)
+ 199  format('Picard i=', i4, 'cycling i=', i4, ' fmgres i =', i4, ' L2norm =', d26.16)
 !     
       return 
 !-----end-of-fgmres----------------------------------------------------- 
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 1e136ac1f..2a7037ded 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -384,6 +384,7 @@ subroutine imp_solver (dt)
        allocate(uRRE(ntot,kmaxrre))
       else
        kmaxrre=1
+       krre=1 ! JFL TEMP
       endif
       
       !-----------------------------------------------------------------
@@ -395,8 +396,8 @@ subroutine imp_solver (dt)
       call ice_timer_start(timer_bound)
       call ice_HaloUpdate (strength,           halo_info, &
                            field_loc_center,   field_type_scalar)
-      ! velocities may have changed in dyn_prep2
-      call ice_HaloUpdate (fld2,               halo_info, &
+      ! velocities may have changed in dyn_prep2 ! JFL prends en compte la grille spherique qui se referme sur elle meme...
+      call ice_HaloUpdate (fld2,               halo_info, & 
                            field_loc_NEcorner, field_type_vector)
       call ice_timer_stop(timer_bound)
 
@@ -552,7 +553,7 @@ subroutine imp_solver (dt)
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
                    gamma, gammaNL, tolNL, maxits_fgmres,iout,   &
-                   icode,iconvNL,fgmres_its,kOL)                     
+                   icode,iconvNL,fgmres_its,kOL, krre)                     
 
       if (iconvNL .eq. 1) exit             
                    
@@ -601,6 +602,24 @@ subroutine imp_solver (dt)
                              indxui    (:,:), indxuj(:,:),     &
                              wk11 (:),                         &
                              uvel (:,:,:), vvel (:,:,:))    
+                             
+         ! JFL halo update could be in subroutine...                    
+         !$OMP PARALLEL DO PRIVATE(iblk) 
+         do iblk = 1, nblocks                             
+            fld2(:,:,1,iblk) = uvel(:,:,iblk)
+            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
+         enddo
+         !$OMP END PARALLEL DO                           
+
+         call ice_HaloUpdate (fld2,               halo_info, & 
+                              field_loc_NEcorner, field_type_vector)
+
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            uvel(:,:,iblk) = fld2(:,:,1,iblk)
+            vvel(:,:,iblk) = fld2(:,:,2,iblk)
+         enddo
+         !$OMP END PARALLEL DO                             
 
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks                                  

From b3bc476edec78f68a2fd0327cb078b274daf9062 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 22 Jan 2019 14:56:43 -0500
Subject: [PATCH 072/196] cicecore: add puny_dyn variable to initialize
 tinyarea, remove temporary punyVP variable

---
 cicecore/cicedynB/infrastructure/ice_grid.F90     | 13 +++++--------
 cicecore/drivers/standalone/cice/CICE_InitMod.F90 | 14 ++++++++++++++
 cicecore/shared/ice_constants.F90                 | 15 +++++++++++----
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/cicecore/cicedynB/infrastructure/ice_grid.F90 b/cicecore/cicedynB/infrastructure/ice_grid.F90
index 8832407fd..bc06f6108 100644
--- a/cicecore/cicedynB/infrastructure/ice_grid.F90
+++ b/cicecore/cicedynB/infrastructure/ice_grid.F90
@@ -330,7 +330,8 @@ subroutine init_grid2
       use ice_blocks, only: get_block, block, nx_block, ny_block
       use ice_constants, only: c0, c1, c2, p5, p25, c1p5, &
           field_loc_center, field_loc_NEcorner, &
-          field_type_scalar, field_type_vector, field_type_angle
+          field_type_scalar, field_type_vector, field_type_angle, &
+          puny_dyn
       use ice_domain_size, only: max_blocks
 
       integer (kind=int_kind) :: &
@@ -339,7 +340,7 @@ subroutine init_grid2
 
       real (kind=dbl_kind) :: &
          angle_0, angle_w, angle_s, angle_sw, &
-         pi, pi2, puny, punyVP
+         pi, pi2
 
       logical (kind=log_kind), dimension(nx_block,ny_block,max_blocks):: &
          out_of_range
@@ -356,7 +357,7 @@ subroutine init_grid2
       ! lat, lon, cell widths, angle, land mask
       !-----------------------------------------------------------------
 
-      call icepack_query_parameters(pi_out=pi, pi2_out=pi2, puny_out=puny)
+      call icepack_query_parameters(pi_out=pi, pi2_out=pi2)
       call icepack_warnings_flush(nu_diag)
       if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
          file=__FILE__, line=__LINE__)
@@ -384,9 +385,6 @@ subroutine init_grid2
       ! T-grid cell and U-grid cell quantities
       !-----------------------------------------------------------------
 
-      print *, 'in init_grid2 to set tinyarea with punyVP value'
-      punyVP = 2d-09
-      
 !     tarea(:,:,:) = c0
 
       !$OMP PARALLEL DO PRIVATE(iblk,i,j,ilo,ihi,jlo,jhi,this_block)
@@ -411,8 +409,7 @@ subroutine init_grid2
             else
                uarear(i,j,iblk) = c0 ! possible on boundaries
             endif
-!            tinyarea(i,j,iblk) = puny*tarea(i,j,iblk)
-            tinyarea(i,j,iblk) = punyVP*tarea(i,j,iblk)
+            tinyarea(i,j,iblk) = puny_dyn*tarea(i,j,iblk)
 
             dxhy(i,j,iblk) = p5*(HTE(i,j,iblk) - HTE(i-1,j,iblk))
             dyhx(i,j,iblk) = p5*(HTN(i,j,iblk) - HTN(i,j-1,iblk))
diff --git a/cicecore/drivers/standalone/cice/CICE_InitMod.F90 b/cicecore/drivers/standalone/cice/CICE_InitMod.F90
index 0a8614eb2..6b2cf8916 100644
--- a/cicecore/drivers/standalone/cice/CICE_InitMod.F90
+++ b/cicecore/drivers/standalone/cice/CICE_InitMod.F90
@@ -67,6 +67,7 @@ subroutine cice_init
       use ice_calendar, only: dt, dt_dyn, time, istep, istep1, write_ic, &
           init_calendar, calendar
       use ice_communicate, only: init_communicate, my_task, master_task
+      use ice_constants, only: ice_init_constants
       use ice_diagnostics, only: init_diags
       use ice_domain, only: init_domain_blocks
       use ice_domain_size, only: ncat, nfsd
@@ -90,6 +91,9 @@ subroutine cice_init
 
       logical(kind=log_kind) :: tr_aero, tr_zaero, skl_bgc, z_tracers, &
          tr_iso, tr_fsd, wave_spec
+
+      real (kind=dbl_kind) :: puny
+
       character(len=*), parameter :: subname = '(cice_init)'
 
       call init_communicate     ! initial setup for message passing
@@ -117,6 +121,16 @@ subroutine cice_init
       call alloc_flux           ! allocate flux arrays
       call init_ice_timers      ! initialize all timers
       call ice_timer_start(timer_total)   ! start timing entire run
+      ! By default, the puny value used for computing tinyarea in init_grid2 (puny_dyn)
+      ! is set to a special value for use with the implicit solver (kdyn = 3).
+      ! Thus we reset it back to puny if kdyn .ne. 3
+      if (kdyn /= 3) then
+         call icepack_query_parameters(puny_out=puny)
+         call icepack_warnings_flush(nu_diag)
+         if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
+            file=__FILE__, line=__LINE__)
+         call ice_init_constants(puny_dyn_in=puny)
+      endif
       call init_grid2           ! grid variables
       call init_zbgc            ! vertical biogeochemistry initialization
       call init_calendar        ! initialize some calendar stuff
diff --git a/cicecore/shared/ice_constants.F90 b/cicecore/shared/ice_constants.F90
index c49732e35..b0370b0b1 100644
--- a/cicecore/shared/ice_constants.F90
+++ b/cicecore/shared/ice_constants.F90
@@ -33,6 +33,9 @@ module ice_constants
       real (kind=dbl_kind), public :: &
          shlat  =  30.0_dbl_kind   ,&! artificial masking edge (deg)
          nhlat  = -30.0_dbl_kind     ! artificial masking edge (deg)
+
+      real (kind=dbl_kind), public :: &
+         puny_dyn = 2e-09_dbl_kind      ! special puny value for computing tinyarea for implicit solver
    
       !-----------------------------------------------------------------
       ! numbers used outside the column package
@@ -132,7 +135,7 @@ module ice_constants
 ! subroutine to set the cice constants
 
       subroutine ice_init_constants(   &
-         omega_in, radius_in, spval_dbl_in, spval_in, shlat_in, nhlat_in)
+         omega_in, radius_in, spval_dbl_in, spval_in, shlat_in, nhlat_in, puny_dyn_in)
 
       real (kind=dbl_kind), intent(in), optional :: &
          omega_in     , &   ! angular velocity of earth (rad/sec)
@@ -140,7 +143,8 @@ subroutine ice_init_constants(   &
          spval_dbl_in , &   ! special value (double precision)
          spval_in     , &   ! special value for netCDF output
          shlat_in     , &   ! artificial masking edge (deg)
-         nhlat_in           ! artificial masking edge (deg)
+         nhlat_in     , &   ! artificial masking edge (deg)
+         puny_dyn_in        ! special puny value for computing tinyarea
 
       character(len=*),parameter :: subname='(ice_init_constants)'
 
@@ -150,6 +154,7 @@ subroutine ice_init_constants(   &
       if (present(spval_in)) spval = spval_in
       if (present(shlat_in)) shlat = shlat_in
       if (present(nhlat_in)) nhlat = nhlat_in
+      if (present(puny_dyn_in)) puny_dyn = puny_dyn_in
 
       end subroutine ice_init_constants
 
@@ -158,7 +163,7 @@ end subroutine ice_init_constants
 ! subroutine to set the cice constants
 
       subroutine ice_query_constants(   &
-         omega_out, radius_out, spval_dbl_out, spval_out, shlat_out, nhlat_out)
+         omega_out, radius_out, spval_dbl_out, spval_out, shlat_out, nhlat_out, puny_dyn_out)
 
       real (kind=dbl_kind), intent(out), optional :: &
          omega_out     , &   ! angular velocity of earth (rad/sec)
@@ -166,7 +171,8 @@ subroutine ice_query_constants(   &
          spval_dbl_out , &   ! special value (double precision)
          spval_out     , &   ! special value for netCDF output
          shlat_out     , &   ! artificial masking edge (deg)
-         nhlat_out           ! artificial masking edge (deg)
+         nhlat_out     , &   ! artificial masking edge (deg)
+         puny_dyn_out        ! special puny value for computing tinyarea
 
       character(len=*),parameter :: subname='(ice_query_constants)'
 
@@ -176,6 +182,7 @@ subroutine ice_query_constants(   &
       if (present(spval_out)) spval_out = spval
       if (present(shlat_out)) shlat_out = shlat
       if (present(nhlat_out)) nhlat_out = nhlat
+      if (present(puny_dyn_out)) puny_dyn_out = puny_dyn
 
       end subroutine ice_query_constants
 

From 3ddca0ee5669e23f7c74cc2fbccee30c04dc2acb Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 23 Jan 2019 12:02:26 -0500
Subject: [PATCH 073/196] ice_dyn_vp: move solver algorithm and output
 parameters to namelist

Also, since 'yield_curve' and 'e_ratio' are used for both VP and EVP dynamics,
print their values for both solvers.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 38 +++++++--------
 cicecore/cicedynB/general/ice_init.F90    | 59 ++++++++++++++++-------
 configuration/scripts/ice_in              | 12 +++++
 3 files changed, 71 insertions(+), 38 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 2a7037ded..52b30ea96 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -54,6 +54,23 @@ module ice_dyn_vp
       private
       public :: imp_solver, matvec, arrays_to_vec, vec_to_arrays, precond_diag
 
+      ! namelist parameters
+
+      integer (kind=int_kind), public :: &
+         kmax           , & ! max nb of iteration for nonlinear solver
+         precond        , & ! preconditioner for fgmres: 1: identity, 2: diagonal 3: pgmres + diag
+         im_fgmres      , & ! size of fgmres Krylov subspace
+         im_pgmres      , & ! size of pgmres Krylov subspace
+         maxits_fgmres  , & ! max nb of iteration for fgmres
+         maxits_pgmres  , & ! max nb of iteration for pgmres
+         iout           , & ! for printing fgmres info
+         ioutpgmres         ! for printing pgmres info
+
+      real (kind=dbl_kind), public :: &
+         gammaNL        , & ! nonlinear stopping criterion: gammaNL*res(k=0)
+         gamma          , & ! fgmres stopping criterion: gamma*res(k)
+         epsprecond         ! pgmres stopping criterion: epsprecond*res(k)
+
 !=======================================================================
 
       contains
@@ -103,22 +120,14 @@ subroutine imp_solver (dt)
 
       integer (kind=int_kind) :: & 
          kOL            , & ! outer loop iteration
-         kmax           , & ! jfl put in namelist
          krre           , & ! RRE1 cycling iteration
          kmaxrre        , & ! nb of RRE1 iterations (hard coded 3)
          ntot           , & ! size of problem for fgmres (for given cpu)
          icode          , & ! for fgmres
          iconvNL        , & ! code for NL convergence criterion
-         iout           , & ! for printing fgmres info
-         ioutpgmres     , & ! for printing pgmres info
          its            , & ! iteration nb for fgmres
          ischmi         , & ! Quesse ca!?!?! jfl
-         maxits_fgmres  , & ! max nb of iteration for fgmres
-         maxits_pgmres  , & ! max nb of iteration for fgmres
          fgmres_its     , & ! final nb of fgmres_its
-         im_fgmres      , & ! for size of fgmres Krylov subspace
-         im_pgmres      , & ! for size of pgmres Krylov subspace
-         precond        , & ! 1: identity, 2: diagonal 3: pgmres
          ierr           , & ! for pgmres precond
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
@@ -166,7 +175,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:), uRRE(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
-      real (kind=dbl_kind) :: conv, gamma, gammaNL, tolNL, epsprecond
+      real (kind=dbl_kind) :: conv, tolNL
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
@@ -199,16 +208,7 @@ subroutine imp_solver (dt)
       ! Define a few things for FGMRES and Picard solver
       !-----------------------------------------------------------------
       
-      im_fgmres = 50 
-      im_pgmres = 5 
-      maxits_fgmres = 50 
-      maxits_pgmres = 5
-      kmax=1000
-      gamma=1e-2_dbl_kind   ! linear stopping criterion: gamma(res(k)
-      gammaNL=1e-8_dbl_kind ! nonlinear stopping criterion: gammaNL*res(k=0)
-      epsprecond=1e-6_dbl_kind ! for pgmres
       iconvNL=0 ! equals 1 when NL convergence is reached
-      precond=3 ! 1: identity, 2: diagonal 3: gmres + diag
       RRE1=.false.
 
        ! This call is needed only if dt changes during runtime.
@@ -517,8 +517,6 @@ subroutine imp_solver (dt)
 !-----------------------------------------------------------------------                             
          
       icode  = 0
-      iout   = 1 !0: nothing printed, 1: 1st ite only, 2: all iterations
-      ioutpgmres = 1
 !      its    = 0 
       ischmi = 0 
          
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index d3b096eb3..422475859 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -100,6 +100,8 @@ subroutine input_data
                                 basalstress, k1, k2, alphab, threshold_hw, &
                                 Ktens, e_ratio, coriolis, ssh_stress, &
                                 kridge, ktransport, brlx, arlx
+      use ice_dyn_vp, only: kmax, precond, im_fgmres, im_pgmres, maxits_fgmres, &
+                            maxits_pgmres, iout, ioutpgmres, gammaNL, gamma, epsprecond
       use ice_transport_driver, only: advection, conserv_check
       use ice_restoring, only: restore_ice
 #ifdef CESMCOUPLED
@@ -194,7 +196,10 @@ subroutine input_data
         advection,      coriolis,       kridge,         ktransport,     &
         kstrength,      krdg_partic,    krdg_redist,    mu_rdg,         &
         e_ratio,        Ktens,          Cf,             basalstress,    &
-        k1,             k2,             alphab,         threshold_hw,   &
+        k1,             kmax,           precond,        im_fgmres,      &
+        im_pgmres,      maxits_fgmres,  maxits_pgmres,  iout,           &
+        ioutpgmres,     gammaNL,        gamma,          epsprecond,     &
+        k2,             alphab,         threshold_hw,                   &
         Pstar,          Cstar
 
       namelist /shortwave_nml/ &
@@ -322,7 +327,18 @@ subroutine input_data
       alphab = 20.0_dbl_kind       ! alphab=Cb factor in Lemieux et al 2015
       threshold_hw = 30.0_dbl_kind ! max water depth for grounding
       Ktens = 0.0_dbl_kind   ! T=Ktens*P (tensile strength: see Konig and Holland, 2010)
-      e_ratio = 2.0_dbl_kind ! EVP ellipse aspect ratio
+      e_ratio = 2.0_dbl_kind ! VP ellipse aspect ratio
+      kmax = 1000            ! max nb of iteration for nonlinear solver
+      precond = 3            ! preconditioner for fgmres: 1: identity, 2: diagonal 3: pgmres + diag
+      im_fgmres = 50         ! size of fgmres Krylov subspace
+      im_pgmres = 5          ! size of pgmres Krylov subspace
+      maxits_fgmres = 50     ! max nb of iteration for fgmres
+      maxits_pgmres = 5      ! max nb of iteration for pgmres
+      iout = 1               ! print fgmres info (0: nothing printed, 1: 1st ite only, 2: all iterations)
+      ioutpgmres = 1         ! print pgmres info
+      gammaNL = 1e-8_dbl_kind    ! nonlinear stopping criterion: gammaNL*res(k=0)
+      gamma = 1e-2_dbl_kind      ! fgmres stopping criterion: gamma*res(k)
+      epsprecond = 1e-6_dbl_kind ! pgmres stopping criterion: epsprecond*res(k)
       advection  = 'remap'   ! incremental remapping transport scheme
       conserv_check = .false.! tracer conservation check
       shortwave = 'ccsm3'    ! 'ccsm3' or 'dEdd' (delta-Eddington)
@@ -827,7 +843,7 @@ subroutine input_data
          revised_evp = .false.
       endif
 
-      if (kdyn > 2) then
+      if (kdyn > 3) then
          if (my_task == master_task) then
             write(nu_diag,*) subname//' WARNING: kdyn out of range'
          endif
@@ -1135,28 +1151,35 @@ subroutine input_data
          write(nu_diag,*) '--------------------------------'
          if (kdyn == 1) then
             tmpstr2 = ' elastic-viscous-plastic dynamics'
-            write(nu_diag,*)    'yield_curve      = ', trim(yield_curve)
-            if (trim(yield_curve) == 'ellipse') &
-            write(nu_diag,1007) ' e_ratio          = ', e_ratio, ' aspect ratio of ellipse'
          elseif (kdyn == 2) then
             tmpstr2 = ' elastic-anisotropic-plastic dynamics'
+         elseif (kdyn == 3) then
+            tmpstr2 = ' viscous-plastic dynamics'
          elseif (kdyn < 1) then
             tmpstr2 = ' dynamics disabled'
          endif
          write(nu_diag,1022) ' kdyn             = ', kdyn,trim(tmpstr2)
          if (kdyn >= 1) then
-            if (revised_evp) then
-               tmpstr2 = ' revised EVP formulation used'
-            else
-               tmpstr2 = ' revised EVP formulation not used'
-            endif
-            write(nu_diag,1012) ' revised_evp      = ', revised_evp,trim(tmpstr2)
-            write(nu_diag,1022) ' kevp_kernel      = ', kevp_kernel,' EVP solver'
-
-            write(nu_diag,1022) ' ndtd             = ', ndtd, ' number of dynamics/advection/ridging/steps per thermo timestep'
-            write(nu_diag,1022) ' ndte             = ', ndte, ' number of EVP or EAP subcycles'
-            write(nu_diag,1007) ' arlx             = ', arlx, ' stress equation factor alpha'
-            write(nu_diag,1007) ' brlx             = ', brlx, ' stress equation factor beta'
+            if (kdyn == 1 .or. kdyn == 2) then
+               if (revised_evp) then
+                  tmpstr2 = ' revised EVP formulation used'
+                  write(nu_diag,1007) ' arlx             = ', arlx, ' stress equation factor alpha'
+                  write(nu_diag,1007) ' brlx             = ', brlx, ' stress equation factor beta'
+               else
+                  tmpstr2 = ' revised EVP formulation not used'
+               endif
+               write(nu_diag,1012) ' revised_evp      = ', revised_evp,trim(tmpstr2)
+               write(nu_diag,1022) ' kevp_kernel      = ', kevp_kernel,' EVP solver'
+
+               write(nu_diag,1022) ' ndtd             = ', ndtd, ' number of dynamics/advection/ridging/steps per thermo timestep'
+               write(nu_diag,1022) ' ndte             = ', ndte, ' number of EVP or EAP subcycles'
+           endif
+
+           if (kdyn == 1 .or. kdyn == 3) then
+              write(nu_diag,*)    'yield_curve      = ', trim(yield_curve)
+              if (trim(yield_curve) == 'ellipse') &
+              write(nu_diag,1007) ' e_ratio          = ', e_ratio, ' aspect ratio of ellipse'
+           endif
 
             if (trim(coriolis) == 'latitude') then
                tmpstr2 = ': latitude-dependent Coriolis parameter'
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index a26579df1..719fe5171 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -139,6 +139,18 @@
     kridge          = 1
     ktransport      = 1
     ssh_stress      = 'geostrophic'
+    kmax            = 1000
+    precond         = 3
+    im_fgmres       = 50
+    im_pgmres       = 5
+    maxits_fgmres   = 50
+    maxits_pgmres   = 5
+    iout            = 1
+    ioutpgmres      = 1
+    gammaNL         = 1e-8
+    gamma           = 1e-2
+    epsprecond      = 1e-6
+
 /
 
 &shortwave_nml

From 596ae1e8424fcecab661e3c6ae3e92c88fb39300 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 23 Jan 2019 14:20:54 -0500
Subject: [PATCH 074/196] {f,}gmres: use nu_diag for output

---
 cicecore/cicedynB/dynamics/fgmresD.F90 | 6 ++++--
 cicecore/cicedynB/dynamics/pgmres.F90  | 5 +++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
index 41e314731..e3b8ad695 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -1,6 +1,8 @@
       subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
                   gamma,gammaNL,tolNL,maxits,iout,icode,iconv,its,kOL, krre) 
 
+      use ice_fileunits, only: nu_diag
+
 !-----------------------------------------------------------------------
 ! jfl Dec 1st 2006. We modified the routine so that it is double precison.
 ! Here are the modifications:
@@ -158,7 +160,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
        goto 999
       endif 
       
-      if (iout .gt. 0) write(*, 199) kOL, krre, its, ro!&
+      if (iout .gt. 0) write(nu_diag, 199) kOL, krre, its, ro!&
 !     
 !     initialize 1-st term  of rhs of hessenberg system..
 !     
@@ -237,7 +239,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
       hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
       ro = abs(rs(i1))
       if (iout .gt. 1) &
-           write(*, 199) kOL, krre, its, ro
+           write(nu_diag, 199) kOL, krre, its, ro
       if (i .lt. im .and. (ro .gt. eps1))  goto 4
 !     
 !     now compute solution. first solve upper triangular system.
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
index 40d6acad9..1f51b2fd1 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -27,6 +27,7 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
 !        use prec
        use ice_kinds_mod
        use ice_dyn_vp, only: matvec, arrays_to_vec, vec_to_arrays, precond_diag
+       use ice_fileunits, only: nu_diag
 
        implicit none
 
@@ -127,7 +128,7 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
        ro = dsqrt(ro)
 
        if (iout .gt. 0 .and. its .eq. 0)&
-           write(iout, 199) its, ro ,eps1
+           write(nu_diag, 199) its, ro ,eps1
 !      write(6,199) its, ro
         r0=ro
  
@@ -247,7 +248,7 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
        hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
        ro = abs(rs(i1))
        if (iout .gt. 0) &
-           write(iout, 199) its, ro , eps1
+           write(nu_diag, 199) its, ro , eps1
        if (i .lt. im .and. (ro .gt. eps1))  goto 4
 !
 !     now compute solution. first solve upper triangular system.

From a6ae6e5781fbef5fb09d2da08fcd741da71358a6 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 25 Jan 2019 09:05:45 -0500
Subject: [PATCH 075/196] dynamics: correct 2 bugs found using debugging flags

- eps1 was printed before being initialized in pgmres
- a loop on iblk was missing around the call to deformations in ice_dyn_vp
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 27 +++++++++++++----------
 cicecore/cicedynB/dynamics/pgmres.F90     |  5 ++---
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 52b30ea96..8b7ac0951 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -724,18 +724,21 @@ subroutine imp_solver (dt)
        deallocate(uRRE)
       endif
       
-      call deformations (nx_block,             ny_block,             & 
-                         icellt(iblk),                               & 
-                         indxti      (:,iblk), indxtj      (:,iblk), & 
-                         uvel      (:,:,iblk), vvel      (:,:,iblk), &     
-                         dxt       (:,:,iblk), dyt       (:,:,iblk), & 
-                         dxhy      (:,:,iblk), dyhx      (:,:,iblk), & 
-                         cxp       (:,:,iblk), cyp       (:,:,iblk), & 
-                         cxm       (:,:,iblk), cym       (:,:,iblk), & 
-                         tarear    (:,:,iblk),                       & 
-                         shear     (:,:,iblk), divu      (:,:,iblk), & 
-                         rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk))  
-
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         call deformations (nx_block,             ny_block,             &
+                            icellt(iblk),                               &
+                            indxti      (:,iblk), indxtj      (:,iblk), &
+                            uvel      (:,:,iblk), vvel      (:,:,iblk), &
+                            dxt       (:,:,iblk), dyt       (:,:,iblk), &
+                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), &
+                            cxp       (:,:,iblk), cyp       (:,:,iblk), &
+                            cxm       (:,:,iblk), cym       (:,:,iblk), &
+                            tarear    (:,:,iblk),                       &
+                            shear     (:,:,iblk), divu      (:,:,iblk), &
+                            rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk))
+      enddo
+      !$OMP END PARALLEL DO
 
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
index 1f51b2fd1..54ac2b92e 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -127,9 +127,6 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
        ro = ddot(n, vv,1,vv,1)
        ro = dsqrt(ro)
 
-       if (iout .gt. 0 .and. its .eq. 0)&
-           write(nu_diag, 199) its, ro ,eps1
-!      write(6,199) its, ro
         r0=ro
  
        if (ro .eq. 0.0d0) goto 999
@@ -138,6 +135,8 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
           vv(j,1) = vv(j,1)*t
  210   continue
        if (its .eq. 0) eps1=eps*ro
+       if (iout .gt. 0 .and. its .eq. 0)&
+          write(nu_diag, 199) its, ro ,eps1
 !     ** initialize 1-st term  of rhs of hessenberg system..
        rs(1) = ro
        i = 0

From 5a777759e9792738554088eb9273c92dc2efd851 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 28 Jan 2019 09:46:51 -0500
Subject: [PATCH 076/196] ice_dyn_vp: rename VP namelist variables to be more
 descriptive

Also, add namelist flag to monitor nonlinear convergence
and document VP namelist variables in runtime log.
---
 cicecore/cicedynB/dynamics/fgmresD.F90    |  7 ++--
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 13 ++++---
 cicecore/cicedynB/general/ice_init.F90    | 41 ++++++++++++++++++++---
 configuration/scripts/ice_in              |  5 +--
 4 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
index e3b8ad695..14ce066d5 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -160,7 +160,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
        goto 999
       endif 
       
-      if (iout .gt. 0) write(nu_diag, 199) kOL, krre, its, ro!&
+      if (iout .gt. 0) write(nu_diag, 199) kOL, its, ro!&
 !     
 !     initialize 1-st term  of rhs of hessenberg system..
 !     
@@ -239,7 +239,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
       hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
       ro = abs(rs(i1))
       if (iout .gt. 1) &
-           write(nu_diag, 199) kOL, krre, its, ro
+           write(nu_diag, 199) kOL, its, ro
       if (i .lt. im .and. (ro .gt. eps1))  goto 4
 !     
 !     now compute solution. first solve upper triangular system.
@@ -287,10 +287,9 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
      goto 20
  999  icode = 0
 
- 199  format('Picard i=', i4, 'cycling i=', i4, ' fmgres i =', i4, ' L2norm =', d26.16)
+ 199  format('monitor_fgmres: iter_nonlin=', i4, ' iter_fmgres=', i4, ' L2norm=', d26.16)
 !     
       return 
 !-----end-of-fgmres----------------------------------------------------- 
 !-----------------------------------------------------------------------
       end
-
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 8b7ac0951..ec2a4a9b9 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -63,8 +63,11 @@ module ice_dyn_vp
          im_pgmres      , & ! size of pgmres Krylov subspace
          maxits_fgmres  , & ! max nb of iteration for fgmres
          maxits_pgmres  , & ! max nb of iteration for pgmres
-         iout           , & ! for printing fgmres info
-         ioutpgmres         ! for printing pgmres info
+         monitor_fgmres , & ! print fgmres residual norm
+         monitor_pgmres     ! print pgmres residual norm
+
+      logical (kind=log_kind), public :: &
+         monitor_nonlin     ! print nonlinear residual norm
 
       real (kind=dbl_kind), public :: &
          gammaNL        , & ! nonlinear stopping criterion: gammaNL*res(k=0)
@@ -550,7 +553,7 @@ subroutine imp_solver (dt)
       !                     sol_eps, maxits,its,conv,icode )
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-                   gamma, gammaNL, tolNL, maxits_fgmres,iout,   &
+                   gamma, gammaNL, tolNL, maxits_fgmres,monitor_fgmres,   &
                    icode,iconvNL,fgmres_its,kOL, krre)                     
 
       if (iconvNL .eq. 1) exit             
@@ -586,8 +589,8 @@ subroutine imp_solver (dt)
                        wk22     (:)       , wk11(:)            , &
                        ntot               , im_pgmres          , &
                        epsprecond         , maxits_pgmres      , &
-                       ioutpgmres         , ierr )         
-         endif
+                       monitor_pgmres     , ierr )         
+         endif ! precond
          
          goto 1
 
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index 422475859..bfffcc084 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -101,7 +101,8 @@ subroutine input_data
                                 Ktens, e_ratio, coriolis, ssh_stress, &
                                 kridge, ktransport, brlx, arlx
       use ice_dyn_vp, only: kmax, precond, im_fgmres, im_pgmres, maxits_fgmres, &
-                            maxits_pgmres, iout, ioutpgmres, gammaNL, gamma, epsprecond
+                            maxits_pgmres, monitor_nonlin, monitor_fgmres, &
+                            monitor_pgmres, gammaNL, gamma, epsprecond
       use ice_transport_driver, only: advection, conserv_check
       use ice_restoring, only: restore_ice
 #ifdef CESMCOUPLED
@@ -197,8 +198,9 @@ subroutine input_data
         kstrength,      krdg_partic,    krdg_redist,    mu_rdg,         &
         e_ratio,        Ktens,          Cf,             basalstress,    &
         k1,             kmax,           precond,        im_fgmres,      &
-        im_pgmres,      maxits_fgmres,  maxits_pgmres,  iout,           &
-        ioutpgmres,     gammaNL,        gamma,          epsprecond,     &
+        im_pgmres,      maxits_fgmres,  maxits_pgmres,  monitor_nonlin, &
+        monitor_fgmres, monitor_pgmres, gammaNL,        gamma,          &
+        epsprecond,                                                     &
         k2,             alphab,         threshold_hw,                   &
         Pstar,          Cstar
 
@@ -334,8 +336,9 @@ subroutine input_data
       im_pgmres = 5          ! size of pgmres Krylov subspace
       maxits_fgmres = 50     ! max nb of iteration for fgmres
       maxits_pgmres = 5      ! max nb of iteration for pgmres
-      iout = 1               ! print fgmres info (0: nothing printed, 1: 1st ite only, 2: all iterations)
-      ioutpgmres = 1         ! print pgmres info
+      monitor_nonlin = .false. ! print nonlinear solver info
+      monitor_fgmres = 1     ! print fgmres info (0: nothing printed, 1: 1st ite only, 2: all iterations)
+      monitor_pgmres = 1     ! print pgmres info (0: nothing printed, 1: all iterations)
       gammaNL = 1e-8_dbl_kind    ! nonlinear stopping criterion: gammaNL*res(k=0)
       gamma = 1e-2_dbl_kind      ! fgmres stopping criterion: gamma*res(k)
       epsprecond = 1e-6_dbl_kind ! pgmres stopping criterion: epsprecond*res(k)
@@ -644,6 +647,18 @@ subroutine input_data
       call broadcast_scalar(ssh_stress,         master_task)
       call broadcast_scalar(kridge,             master_task)
       call broadcast_scalar(ktransport,         master_task)
+      call broadcast_scalar(kmax,               master_task)
+      call broadcast_scalar(precond,            master_task)
+      call broadcast_scalar(im_fgmres,          master_task)
+      call broadcast_scalar(im_pgmres,          master_task)
+      call broadcast_scalar(maxits_fgmres,      master_task)
+      call broadcast_scalar(maxits_pgmres,      master_task)
+      call broadcast_scalar(monitor_nonlin,     master_task)      
+      call broadcast_scalar(monitor_fgmres,     master_task)
+      call broadcast_scalar(monitor_pgmres,     master_task)
+      call broadcast_scalar(gammaNL,            master_task)      
+      call broadcast_scalar(gamma,              master_task)
+      call broadcast_scalar(epsprecond,         master_task)
       call broadcast_scalar(conduct,            master_task)
       call broadcast_scalar(R_ice,              master_task)
       call broadcast_scalar(R_pnd,              master_task)
@@ -1541,6 +1556,21 @@ subroutine input_data
          write(nu_diag,1010) ' orca_halogrid             = ', &
                                orca_halogrid
 
+         if (kdyn == 3) then
+            write(nu_diag,1020) ' kmax                      = ', kmax
+            write(nu_diag,1020) ' precond                   = ', precond
+            write(nu_diag,1020) ' im_fgmres                 = ', im_fgmres
+            write(nu_diag,1020) ' im_pgmres                 = ', im_pgmres
+            write(nu_diag,1020) ' maxits_fgmres             = ', maxits_fgmres
+            write(nu_diag,1020) ' maxits_pgmres             = ', maxits_pgmres
+            write(nu_diag,1010) ' monitor_nonlin            = ', monitor_nonlin
+            write(nu_diag,1020) ' monitor_fgmres            = ', monitor_fgmres
+            write(nu_diag,1020) ' monitor_pgmres            = ', monitor_pgmres
+            write(nu_diag,1008) ' gammaNL                   = ', gammaNL
+            write(nu_diag,1008) ' gamma                     = ', gamma
+            write(nu_diag,1008) ' epsprecond                = ', epsprecond
+         endif
+
          write(nu_diag,1010) ' conserv_check             = ', conserv_check
 
          write(nu_diag,1020) ' fyear_init                = ', &
@@ -1692,6 +1722,7 @@ subroutine input_data
  1005    format (a30,2x,f12.6) ! float
  1006    format (a20,2x,f10.6,a)
  1007    format (a20,2x,f6.2,a)
+ 1008    format (a30,2x,d12.6)  ! float, exponential notation
  1009    format (a20,2x,d13.6,a)  ! float, exponential notation
  1010    format (a30,2x,l6)    ! logical
  1012    format (a20,2x,l3,1x,a)  ! logical
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index 719fe5171..f8d116c1c 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -145,8 +145,9 @@
     im_pgmres       = 5
     maxits_fgmres   = 50
     maxits_pgmres   = 5
-    iout            = 1
-    ioutpgmres      = 1
+    monitor_nonlin  = .false.
+    monitor_fgmres  = 1
+    monitor_pgmres  = 1
     gammaNL         = 1e-8
     gamma           = 1e-2
     epsprecond      = 1e-6

From 84f2973cc3f4b2ace5ec9a7b5c153ebb81152c15 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 29 Jan 2019 09:39:47 -0500
Subject: [PATCH 077/196] pgmres: streamlined monitoring output

Bring the PGMRES output more in line with the FGMRES and nonlinear
solvers.
---
 cicecore/cicedynB/dynamics/pgmres.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
index 54ac2b92e..2eaa6d01a 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -311,7 +311,7 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
           if (j .eq. 1)  t = t-1.0d0
           call daxpy (n, t, vv(1,j), 1,  vv, 1)
  25    continue
- 199   format('   its =', i4, ' res. norm =', d20.6, ' eps1 =', d20.6)
+ 199   format('monitor_pgmres: iter_pmgres=', i4, ' L2norm=', d26.16, ' epsprecond*initial_L2norm=', d26.6)
 !     restart outer loop.
        goto 20
  990   ierr = 0

From da380754bfa6171011dc1c14a2781f1199ccf751 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 29 Jan 2019 15:32:53 -0500
Subject: [PATCH 078/196] ice_dyn_vp: add computation of nonlinear and fixed
 point residuals

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 109 +++++++++++++---------
 1 file changed, 67 insertions(+), 42 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ec2a4a9b9..277b1388b 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -162,6 +162,8 @@ subroutine imp_solver (dt)
          Diagv    , & ! Diagonal (v component) of the matrix A
          Fx       , & ! x residual vector, Fx = Au - bx 
          Fy       , & ! y residual vector, Fy = Av - by 
+         fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
+         fpresy   , & ! y fixed point residual vector, fy = vvel - vprev_k
          uprev_k  , & ! uvel at previous Picard iteration
          vprev_k  , & ! vvel at previous Picard iteration
          ulin     , & ! uvel to linearize vrel
@@ -515,6 +517,40 @@ subroutine imp_solver (dt)
          enddo
          !$OMP END PARALLEL DO                            
 
+         ! Compute nonlinear residual norm
+         if (monitor_nonlin) then
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               call matvec (nx_block             , ny_block,            &
+                            icellu   (iblk)      ,                      & 
+                            indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                            kOL                  , icellt   (iblk)    , & 
+                            indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                            dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+                            dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+                            cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+                            cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
+                            tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                            uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
+                            vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+                            zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                            umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+                            uarear   (:,:,iblk)  ,                      & 
+                            Au       (:,:,iblk)  , Av       (:,:,iblk))
+               call residual_vec (nx_block           , ny_block,           &
+                                  icellu       (iblk),                     & 
+                                  indxui     (:,iblk), indxuj    (:,iblk), &
+                                  bx       (:,:,iblk), by      (:,:,iblk), &
+                                  Au       (:,:,iblk), Av      (:,:,iblk), &
+                                  Fx       (:,:,iblk), Fy      (:,:,iblk), &
+                                  L2norm(iblk))
+            enddo
+            !$OMP END PARALLEL DO
+            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
+                                              " nonlin_res_L2norm= ", L2norm
+         endif
+
+
 !-----------------------------------------------------------------------
 !     prep F G M R E S 
 !-----------------------------------------------------------------------                             
@@ -594,9 +630,7 @@ subroutine imp_solver (dt)
          
          goto 1
 
-      else
-
-         if (icode >= 2) then
+      elseif (icode >= 2) then
 
          call vec_to_arrays (nx_block, ny_block, nblocks,      &
                              max_blocks, icellu (:), ntot,     & 
@@ -654,25 +688,8 @@ subroutine imp_solver (dt)
 
             goto 1
 
-         endif
+      endif ! icode
 
-      endif
-
-!     deallocate (wk11,wk22,rhs1,sol1,vv_8,ww_8)         
-         
-!            call calc_L2norm (nx_block           , ny_block,          &
-!                             icellu       (iblk),                     & 
-!                             indxui     (:,iblk), indxuj    (:,iblk), &
-!                             uvel     (:,:,iblk), vvel    (:,:,iblk))                                                     
-           
-!            call residual_vec (nx_block           , ny_block,           &
-!                               icellu       (iblk),                     & 
-!                               indxui     (:,iblk), indxuj    (:,iblk), &
-!                               bx       (:,:,iblk), by      (:,:,iblk), &
-!                               Au       (:,:,iblk), Av      (:,:,iblk), &
-!                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
-!                               L2norm(iblk))
-  
 !-----------------------------------------------------------------------
 !     Put vector sol in uvel and vvel arrays
 !-----------------------------------------------------------------------
@@ -718,6 +735,23 @@ subroutine imp_solver (dt)
          enddo
          !$OMP END PARALLEL DO
          
+         ! Compute fixed point residual norm
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
+            fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
+            call calc_L2norm (nx_block        , ny_block,         &
+                              icellu    (iblk),                   & 
+                              indxui  (:,iblk), indxuj  (:,iblk), &
+                              fpresx(:,:,iblk), fpresy(:,:,iblk), &
+                              L2norm    (iblk))
+            if (monitor_nonlin) then
+               write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
+                                                 " fixed_point_res_L2norm= ", L2norm
+            endif
+         enddo
+         !$OMP END PARALLEL DO
+         
       enddo                     ! outer loop
       
       deallocate(bvec, sol, diagvec, wk11, wk22, vv, ww)
@@ -2313,7 +2347,7 @@ subroutine residual_vec (nx_block,   ny_block, &
                                bx,         by,       &
                                Au,         Av,       &
                                Fx,         Fy,       &
-                               L2normtp)
+                               L2norm)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -2336,20 +2370,20 @@ subroutine residual_vec (nx_block,   ny_block, &
          Fy          ! y residual vector, Fy = Av - by (N/m^2)
          
       real (kind=dbl_kind), intent(inout) :: &
-         L2normtp    ! (L2norm)^2
+         L2norm      ! L2norm of residual vector
       
       integer (kind=int_kind) :: &
          i, j, ij
 
       !-----------------------------------------------------------------
-      ! calc b vector
+      ! calc residual and its L2 norm
       !-----------------------------------------------------------------
 
       call icepack_warnings_flush(nu_diag)
       if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
          file=__FILE__, line=__LINE__)
 
-      L2normtp=c0
+      L2norm=c0
          
       do ij =1, icellu
          i = indxui(ij)
@@ -2357,20 +2391,9 @@ subroutine residual_vec (nx_block,   ny_block, &
 
          Fx(i,j) = Au(i,j) - bx(i,j)
          Fy(i,j) = Av(i,j) - by(i,j)
-         L2normtp = L2normtp + Fx(i,j)**2 + Fy(i,j)**2
-!         Fres(2*ij-1) = Au(i,j) - bx(i,j)
-!         Fres(2*ij)   = Av(i,j) - by(i,j)
-         
+         L2norm = L2norm + Fx(i,j)**2 + Fy(i,j)**2
       enddo                     ! ij
-      
-!      do ij = 1, ntot
-      
-!	Ftp(ij) = Aw(ij) - bvec(ij)
-      
-!      enddo
-      
-!       L2norm = sqrt(DOT_PRODUCT(Fres,Fres))
-!       print *, 'ici L2norm', sqrt(L2normtp)
+      L2norm = sqrt(L2norm)
 
       end subroutine residual_vec
       
@@ -2871,7 +2894,8 @@ end subroutine precond_diag
       subroutine calc_L2norm (nx_block,   ny_block, &
                               icellu,               &
                               indxui,     indxuj,   &
-                              tpu,        tpv )
+                              tpu,        tpv,      &
+                              L2norm)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -2886,15 +2910,16 @@ subroutine calc_L2norm (nx_block,   ny_block, &
          tpu     , & ! x-component of vector
          tpv         ! y-component of vector         
 
+      real (kind=dbl_kind), intent(out) :: &
+         L2norm      ! l^2 norm of vector grid function (tpu,tpv)
+
       ! local variables
 
       integer (kind=int_kind) :: &
          i, j, ij
-         
-      real (kind=dbl_kind) :: L2norm
 
       !-----------------------------------------------------------------
-      ! form vector
+      ! compute l^2 norm of vector grid function (tpu,tpv)
       !-----------------------------------------------------------------
 
      L2norm = c0

From df8ff72c211d35cada306becb54789f9156b5904 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 29 Jan 2019 16:04:45 -0500
Subject: [PATCH 079/196] ice_dyn_vp: remove references to RRE acceleration
 method

We will not use this acceleration method,
so remove remaining references to it.
---
 cicecore/cicedynB/dynamics/fgmresD.F90    |  2 +-
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 21 ++-------------------
 2 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
index 14ce066d5..ffe11f7ee 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -1,5 +1,5 @@
       subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
-                  gamma,gammaNL,tolNL,maxits,iout,icode,iconv,its,kOL, krre) 
+                  gamma,gammaNL,tolNL,maxits,iout,icode,iconv,its,kOL) 
 
       use ice_fileunits, only: nu_diag
 
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 277b1388b..5f965bfa9 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -123,8 +123,6 @@ subroutine imp_solver (dt)
 
       integer (kind=int_kind) :: & 
          kOL            , & ! outer loop iteration
-         krre           , & ! RRE1 cycling iteration
-         kmaxrre        , & ! nb of RRE1 iterations (hard coded 3)
          ntot           , & ! size of problem for fgmres (for given cpu)
          icode          , & ! for fgmres
          iconvNL        , & ! code for NL convergence criterion
@@ -177,7 +175,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
       real (kind=dbl_kind), allocatable :: bvec(:), sol(:), diagvec(:), wk11(:), wk22(:)
-      real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:), uRRE(:,:)
+      real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
       
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
       real (kind=dbl_kind) :: conv, tolNL
@@ -198,8 +196,6 @@ subroutine imp_solver (dt)
 
       type (block) :: &
          this_block           ! block information for current block
-         
-      logical :: RRE1 ! acceleration method for Picard (see C. Roland PhD thesis)
       
       call ice_timer_start(timer_dynamics) ! dynamics
 
@@ -214,7 +210,6 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       
       iconvNL=0 ! equals 1 when NL convergence is reached
-      RRE1=.false.
 
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -384,14 +379,6 @@ subroutine imp_solver (dt)
       allocate(bvec(ntot), sol(ntot), diagvec(ntot), wk11(ntot), wk22(ntot))
       allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
       
-      if (RRE1) then
-       kmaxrre=3
-       allocate(uRRE(ntot,kmaxrre))
-      else
-       kmaxrre=1
-       krre=1 ! JFL TEMP
-      endif
-      
       !-----------------------------------------------------------------
       
       call icepack_warnings_flush(nu_diag)
@@ -590,7 +577,7 @@ subroutine imp_solver (dt)
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
                    gamma, gammaNL, tolNL, maxits_fgmres,monitor_fgmres,   &
-                   icode,iconvNL,fgmres_its,kOL, krre)                     
+                   icode,iconvNL,fgmres_its,kOL)                     
 
       if (iconvNL .eq. 1) exit             
                    
@@ -757,10 +744,6 @@ subroutine imp_solver (dt)
       deallocate(bvec, sol, diagvec, wk11, wk22, vv, ww)
       deallocate(fld2)
       
-      if (RRE1) then
-       deallocate(uRRE)
-      endif
-      
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
          call deformations (nx_block,             ny_block,             &

From aca44ed37157ff0424b32c13c25fec8994c5880c Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 29 Jan 2019 16:31:11 -0500
Subject: [PATCH 080/196] ice_dyn_vp: remove unused 'ulin', 'vlin' variables

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 5f965bfa9..9c780ff41 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -164,8 +164,6 @@ subroutine imp_solver (dt)
          fpresy   , & ! y fixed point residual vector, fy = vvel - vprev_k
          uprev_k  , & ! uvel at previous Picard iteration
          vprev_k  , & ! vvel at previous Picard iteration
-         ulin     , & ! uvel to linearize vrel
-         vlin     , & ! vvel to linearize vrel
          vrel     , & ! coeff for tauw 
          Cb       , & ! seabed stress coeff
          aiu      , & ! ice fraction on u-grid
@@ -433,14 +431,6 @@ subroutine imp_solver (dt)
 
       !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
-
-!	    if (kOL .eq. 1) then
-	      ulin(:,:,iblk) = uvel(:,:,iblk)
-	      vlin(:,:,iblk) = vvel(:,:,iblk)
-!	    else
-!	      ulin(:,:,iblk) = p5*uprev_k(:,:,iblk) + p5*uvel(:,:,iblk)
-!	      vlin(:,:,iblk) = p5*vprev_k(:,:,iblk) + p5*vvel(:,:,iblk)
-!	    endif
          
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)

From 945dada2a9b72c6478a37b744c06b566bbf0275b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 7 Feb 2019 17:16:22 -0500
Subject: [PATCH 081/196] ice_dyn_vp: move Picard iteration to separate
 subroutine

Make the code more modular by moving the Picard solver
to its own subroutine.

Also, rename 'kmax' to 'maxits_nonlin' to bring it in line with
'maxits_{f,p}gmres'.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 472 ++++++++++++++--------
 cicecore/cicedynB/general/ice_init.F90    |  10 +-
 configuration/scripts/ice_in              |   2 +-
 3 files changed, 306 insertions(+), 178 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 9c780ff41..7d26c33ff 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -57,7 +57,7 @@ module ice_dyn_vp
       ! namelist parameters
 
       integer (kind=int_kind), public :: &
-         kmax           , & ! max nb of iteration for nonlinear solver
+         maxits_nonlin  , & ! max nb of iteration for nonlinear solver
          precond        , & ! preconditioner for fgmres: 1: identity, 2: diagonal 3: pgmres + diag
          im_fgmres      , & ! size of fgmres Krylov subspace
          im_pgmres      , & ! size of pgmres Krylov subspace
@@ -121,15 +121,8 @@ subroutine imp_solver (dt)
 
       ! local variables
 
-      integer (kind=int_kind) :: & 
-         kOL            , & ! outer loop iteration
+      integer (kind=int_kind) :: &
          ntot           , & ! size of problem for fgmres (for given cpu)
-         icode          , & ! for fgmres
-         iconvNL        , & ! code for NL convergence criterion
-         its            , & ! iteration nb for fgmres
-         ischmi         , & ! Quesse ca!?!?! jfl
-         fgmres_its     , & ! final nb of fgmres_its
-         ierr           , & ! for pgmres precond
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
@@ -151,21 +144,9 @@ subroutine imp_solver (dt)
          forcex   , & ! work array: combined atm stress and ocn tilt, x
          forcey   , & ! work array: combined atm stress and ocn tilt, y
          bxfix    , & ! part of bx that is constant during Picard 
-         byfix    , & ! part of by that is constant during Picard 
-         bx       , & ! b vector
-         by       , & ! b vector
-         Au       , & ! matvec, Fx = Au - bx
-         Av       , & ! matvec, Fy = Av - by
-         Diagu    , & ! Diagonal (u component) of the matrix A
-         Diagv    , & ! Diagonal (v component) of the matrix A
-         Fx       , & ! x residual vector, Fx = Au - bx 
-         Fy       , & ! y residual vector, Fy = Av - by 
+         byfix    , & ! part of by that is constant during Picard
          fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
          fpresy   , & ! y fixed point residual vector, fy = vvel - vprev_k
-         uprev_k  , & ! uvel at previous Picard iteration
-         vprev_k  , & ! vvel at previous Picard iteration
-         vrel     , & ! coeff for tauw 
-         Cb       , & ! seabed stress coeff
          aiu      , & ! ice fraction on u-grid
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
@@ -178,13 +159,6 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), dimension (max_blocks) :: L2norm
       real (kind=dbl_kind) :: conv, tolNL
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
-         stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
-         Dstrtmp
-         
-      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
-         zetaD      ! zetaD = 2zeta (viscous coeff)
-
       integer (kind=int_kind), dimension (nx_block,ny_block,max_blocks) :: &
          icetmask, &  ! ice extent mask (T-cell)
          halomask     ! generic halo mask
@@ -203,12 +177,6 @@ subroutine imp_solver (dt)
 
       allocate(fld2(nx_block,ny_block,2,max_blocks))
       
-      !-----------------------------------------------------------------
-      ! Define a few things for FGMRES and Picard solver
-      !-----------------------------------------------------------------
-      
-      iconvNL=0 ! equals 1 when NL convergence is reached
-
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
 
@@ -423,10 +391,283 @@ subroutine imp_solver (dt)
        enddo                           
       endif
       
-      do kOL = 1,kmax        ! outer loop 
+
+      !-----------------------------------------------------------------
+      ! Start of nonlinear iteration
+      !-----------------------------------------------------------------
+
+      call picard_solver (icellt,   icellu,  &
+                          indxti,   indxtj,  &
+                          indxui,   indxuj,  &
+                          fld2,              &
+                          aiu,      ntot,    &
+                          waterx,   watery,  & 
+                          bxfix,    byfix,   &
+                          umassdti, bvec,    & 
+                          sol,      diagvec, &
+                          fpresx,   fpresy,  &
+                          halo_info_mask)
+
+      !-----------------------------------------------------------------
+      ! End of nonlinear iteration
+      !-----------------------------------------------------------------
+
+      deallocate(bvec, sol, diagvec, wk11, wk22, vv, ww)
+      deallocate(fld2)
+      
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         call deformations (nx_block,             ny_block,             &
+                            icellt(iblk),                               &
+                            indxti      (:,iblk), indxtj      (:,iblk), &
+                            uvel      (:,:,iblk), vvel      (:,:,iblk), &
+                            dxt       (:,:,iblk), dyt       (:,:,iblk), &
+                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), &
+                            cxp       (:,:,iblk), cyp       (:,:,iblk), &
+                            cxm       (:,:,iblk), cym       (:,:,iblk), &
+                            tarear    (:,:,iblk),                       &
+                            shear     (:,:,iblk), divu      (:,:,iblk), &
+                            rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk))
+      enddo
+      !$OMP END PARALLEL DO
+
+      if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
+
+      ! Force symmetry across the tripole seam
+      if (trim(grid_type) == 'tripole') then
+      if (maskhalo_dyn) then
+         !-------------------------------------------------------
+         ! set halomask to zero because ice_HaloMask always keeps
+         ! local copies AND tripole zipper communication
+         !-------------------------------------------------------
+         halomask = 0
+         call ice_HaloMask(halo_info_mask, halo_info, halomask)
+
+         call ice_HaloUpdate_stress(stressp_1, stressp_3, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_3, stressp_1, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_2, stressp_4, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_4, stressp_2, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloUpdate_stress(stressm_1, stressm_3, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_3, stressm_1, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_2, stressm_4, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_4, stressm_2, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloUpdate_stress(stress12_1, stress12_3, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_3, stress12_1, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_2, stress12_4, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_4, stress12_2, halo_info_mask, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloDestroy(halo_info_mask)
+      else
+         call ice_HaloUpdate_stress(stressp_1, stressp_3, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_3, stressp_1, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_2, stressp_4, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressp_4, stressp_2, halo_info, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloUpdate_stress(stressm_1, stressm_3, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_3, stressm_1, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_2, stressm_4, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stressm_4, stressm_2, halo_info, &
+                              field_loc_center,  field_type_scalar)
+
+         call ice_HaloUpdate_stress(stress12_1, stress12_3, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_3, stress12_1, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_2, stress12_4, halo_info, &
+                              field_loc_center,  field_type_scalar)
+         call ice_HaloUpdate_stress(stress12_4, stress12_2, halo_info, &
+                              field_loc_center,  field_type_scalar)
+      endif   ! maskhalo
+      endif   ! tripole
+
+      !-----------------------------------------------------------------
+      ! ice-ocean stress
+      !-----------------------------------------------------------------
+
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+
+         call dyn_finish                               & 
+              (nx_block,           ny_block,           & 
+               icellu      (iblk), Cdn_ocn (:,:,iblk), & 
+               indxui    (:,iblk), indxuj    (:,iblk), & 
+               uvel    (:,:,iblk), vvel    (:,:,iblk), & 
+               uocn    (:,:,iblk), vocn    (:,:,iblk), & 
+               aiu     (:,:,iblk), fm      (:,:,iblk), & 
+               strintx (:,:,iblk), strinty (:,:,iblk), &
+               strairx (:,:,iblk), strairy (:,:,iblk), &
+               strocnx (:,:,iblk), strocny (:,:,iblk), & 
+               strocnxT(:,:,iblk), strocnyT(:,:,iblk))
+
+      enddo
+      !$OMP END PARALLEL DO
+
+      call u2tgrid_vector(strocnxT)    ! shift
+      call u2tgrid_vector(strocnyT)
+
+      call ice_timer_stop(timer_dynamics)    ! dynamics
+
+      end subroutine imp_solver
+
+!=======================================================================
+
+! Solve nonlinear equation using Picard iterative solver
+!
+! author: JF Lemieux, A. Qaddouri and F. Dupont ECCC
+
+      subroutine picard_solver (icellt,   icellu,  &
+                                indxti,   indxtj,  &
+                                indxui,   indxuj,  &
+                                fld2,              &
+                                aiu,      ntot,    &
+                                waterx,   watery,  & 
+                                bxfix,    byfix,   &
+                                umassdti, bvec,    & 
+                                sol,      diagvec, &
+                                fpresx,   fpresy,  &
+                                halo_info_mask)
+
+
+      use ice_arrays_column, only: Cdn_ocn
+      use ice_blocks, only: nx_block, ny_block
+      use ice_boundary, only: ice_halo, ice_HaloMask, ice_HaloUpdate, &
+          ice_HaloDestroy, ice_HaloUpdate_stress
+      use ice_domain, only: nblocks, halo_info, maskhalo_dyn
+      use ice_domain_size, only: max_blocks
+      use ice_flux, only: rdg_conv, rdg_shear, strairxT, strairyT, &
+          strairx, strairy, uocn, vocn, ss_tltx, ss_tlty, iceumask, fm, &
+          strtltx, strtlty, strocnx, strocny, strintx, strinty, taubx, tauby, &
+          strocnxT, strocnyT, strax, stray, &
+          Tbu, hwater, &
+          stressp_1, stressp_2, stressp_3, stressp_4, &
+          stressm_1, stressm_2, stressm_3, stressm_4, &
+          stress12_1, stress12_2, stress12_3, stress12_4
+      use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
+          tarear, uarear, tinyarea
+      use ice_state, only: uvel, vvel, strength
+      use ice_timers, only: timer_dynamics, timer_bound, &
+          ice_timer_start, ice_timer_stop
+
+      integer (kind=int_kind), intent(in) :: & 
+         ntot         ! size of problem for fgmres (for given cpu)
+
+      integer (kind=int_kind), dimension(max_blocks), intent(in) :: & 
+         icellt   , & ! no. of cells where icetmask = 1
+         icellu       ! no. of cells where iceumask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj   , & ! compressed index in j-direction
+         indxui   , & ! compressed index in i-direction
+         indxuj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(inout) :: &
+         fld2        ! work array for boundary updates
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         aiu      , & ! ice fraction on u-grid
+         waterx   , & ! for ocean stress calculation, x (m/s)
+         watery   , & ! for ocean stress calculation, y (m/s)
+         bxfix    , & ! part of bx that is constant during Picard
+         byfix    , & ! part of by that is constant during Picard
+         umassdti     ! mass of U-cell/dte (kg/m^2 s)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
+         fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
+         fpresy       ! y fixed point residual vector, fy = vvel - vprev_k
+
+      real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
+         bvec     , & ! RHS vector for FGMRES
+         sol      , & ! solution vector for FGMRES
+         diagvec      ! diagonal of matrix A for preconditioners
+
+      type (ice_halo) :: &
+         halo_info_mask !  ghost cell update info for masked halo
+
+      ! local variables
+
+      integer (kind=int_kind) :: & 
+         kOL            , & ! outer loop iteration
+         iblk           , & ! block index
+         icode          , & ! code for fgmres solver
+         ischmi         , & ! Quesse ca!?!?! jfl
+         its            , & ! iteration nb for fgmres
+         fgmres_its     , & ! final nb of fgmres iterations
+         iconvNL        , & ! code for NL convergence criterion (equals 1 when NL convergence is reached)
+         ierr               ! code for pgmres preconditioner !phb: needed?
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         uprev_k  , & ! uvel at previous Picard iteration
+         vprev_k  , & ! vvel at previous Picard iteration
+         vrel     , & ! coeff for tauw 
+         Cb       , & ! seabed stress coeff
+         bx       , & ! b vector
+         by       , & ! b vector
+         Diagu    , & ! Diagonal (u component) of the matrix A
+         Diagv    , & ! Diagonal (v component) of the matrix A
+         Au       , & ! matvec, Fx = Au - bx
+         Av       , & ! matvec, Fy = Av - by
+         Fx       , & ! x residual vector, Fx = Au - bx
+         Fy           ! y residual vector, Fy = Av - by
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
+         zetaD      ! zetaD = 2zeta (viscous coeff)
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
+         stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
+         Dstrtmp
+
+      real (kind=dbl_kind), dimension (max_blocks) :: &
+         L2norm       ! to compute l^2 norm of grid function
+
+      real (kind=dbl_kind), allocatable :: & 
+         vv(:,:), ww(:,:)  ! work arrays for FGMRES
+
+      real (kind=dbl_kind), allocatable :: &
+         wk11(:), wk22(:)  ! work vectors for FGMRES
+
+      real (kind=dbl_kind) :: & 
+         conv,  & ! ratio of current residual and initial residual for FGMRES !phb: needed?
+         tolNL    ! gammaNL * (current residual) !phb: needed?
+
+      character(len=*), parameter :: subname = '(picard_solver)'
+
+      ! Allocate space for FGMRES work arrays
+      allocate(wk11(ntot), wk22(ntot))
+      allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
+
+      !-----------------------------------------------------------------
+      ! Define a few things for FGMRES and Picard solver
+      !-----------------------------------------------------------------
       
+      iconvNL=0 ! equals 1 when NL convergence is reached
+
+      ! Start iterations
+      do kOL = 1,maxits_nonlin        ! outer loop 
+
       !-----------------------------------------------------------------
-      ! Calc zetaD, vrel, Cb and vrel = f(uprev_k, vprev_k)
+      ! Calc zetaD, Pr, Cb and vrel = f(uprev_k, vprev_k)
       !-----------------------------------------------------------------
 
       !$OMP PARALLEL DO PRIVATE(iblk)
@@ -456,7 +697,7 @@ subroutine imp_solver (dt)
                                uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
                                vrel     (:,:,iblk), Cb      (:,:,iblk))
 
-!     prepare b vector (RHS)                                                
+      !     prepare b vector (RHS)                                                
             call calc_bvec (nx_block           , ny_block,           &
                             icellu       (iblk),                     & 
                             indxui     (:,iblk), indxuj    (:,iblk), &
@@ -469,7 +710,7 @@ subroutine imp_solver (dt)
                             bx       (:,:,iblk), by      (:,:,iblk), &
                             stPrtmp  (:,:,:))
 
-!     prepare precond matrix
+      !     prepare precond matrix
            if (precond .gt. 1) then
 
            call formDiag_step1  (nx_block           , ny_block,       & ! D term due to rheology
@@ -528,12 +769,12 @@ subroutine imp_solver (dt)
          endif
 
 
-!-----------------------------------------------------------------------
-!     prep F G M R E S 
-!-----------------------------------------------------------------------                             
+      !-----------------------------------------------------------------------
+      !     prep F G M R E S 
+      !-----------------------------------------------------------------------                             
          
       icode  = 0
-!      its    = 0 
+      !      its    = 0 
       ischmi = 0 
          
          ! form b vector from matrices (nblocks matrices)      
@@ -556,11 +797,11 @@ subroutine imp_solver (dt)
                              Diagu     (:,:,:), Diagv(:,:,:),&
                              diagvec(:))                             
 
-!-----------------------------------------------------------------------
-!     F G M R E S   L O O P
-!-----------------------------------------------------------------------
- 1    continue
-!-----------------------------------------------------------------------
+      !-----------------------------------------------------------------------
+      !     F G M R E S   L O O P
+      !-----------------------------------------------------------------------
+      1    continue
+      !-----------------------------------------------------------------------
 
       !call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
       !                     sol_eps, maxits,its,conv,icode )
@@ -655,7 +896,7 @@ subroutine imp_solver (dt)
                          
          enddo
          !$OMP END PARALLEL DO 
-        
+       
          ! form wk2 from Au and Av arrays        
          call arrays_to_vec (nx_block, ny_block, nblocks,      &
                              max_blocks, icellu (:), ntot,     & 
@@ -667,9 +908,9 @@ subroutine imp_solver (dt)
 
       endif ! icode
 
-!-----------------------------------------------------------------------
-!     Put vector sol in uvel and vvel arrays
-!-----------------------------------------------------------------------
+      !-----------------------------------------------------------------------
+      !     Put vector sol in uvel and vvel arrays
+      !-----------------------------------------------------------------------
 
          call vec_to_arrays (nx_block, ny_block, nblocks,      &
                              max_blocks, icellu (:), ntot,     & 
@@ -678,10 +919,10 @@ subroutine imp_solver (dt)
                              uvel (:,:,:), vvel (:,:,:))    
 
          !$OMP PARALLEL DO PRIVATE(iblk)
-!         do iblk = 1, nblocks
-!              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
-!              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
-!         enddo
+      !         do iblk = 1, nblocks
+      !              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
+      !              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
+      !         enddo
          !$OMP END PARALLEL DO  
                             
          !$OMP PARALLEL DO PRIVATE(iblk)
@@ -730,124 +971,11 @@ subroutine imp_solver (dt)
          !$OMP END PARALLEL DO
          
       enddo                     ! outer loop
-      
-      deallocate(bvec, sol, diagvec, wk11, wk22, vv, ww)
-      deallocate(fld2)
-      
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         call deformations (nx_block,             ny_block,             &
-                            icellt(iblk),                               &
-                            indxti      (:,iblk), indxtj      (:,iblk), &
-                            uvel      (:,:,iblk), vvel      (:,:,iblk), &
-                            dxt       (:,:,iblk), dyt       (:,:,iblk), &
-                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), &
-                            cxp       (:,:,iblk), cyp       (:,:,iblk), &
-                            cxm       (:,:,iblk), cym       (:,:,iblk), &
-                            tarear    (:,:,iblk),                       &
-                            shear     (:,:,iblk), divu      (:,:,iblk), &
-                            rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk))
-      enddo
-      !$OMP END PARALLEL DO
-
-      if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
-
-      ! Force symmetry across the tripole seam
-      if (trim(grid_type) == 'tripole') then
-      if (maskhalo_dyn) then
-         !-------------------------------------------------------
-         ! set halomask to zero because ice_HaloMask always keeps
-         ! local copies AND tripole zipper communication
-         !-------------------------------------------------------
-         halomask = 0
-         call ice_HaloMask(halo_info_mask, halo_info, halomask)
 
-         call ice_HaloUpdate_stress(stressp_1, stressp_3, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressp_3, stressp_1, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressp_2, stressp_4, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressp_4, stressp_2, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
+      ! deallocate FGMRES work arrays
+      deallocate(wk11, wk22, vv, ww)
 
-         call ice_HaloUpdate_stress(stressm_1, stressm_3, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressm_3, stressm_1, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressm_2, stressm_4, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressm_4, stressm_2, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-
-         call ice_HaloUpdate_stress(stress12_1, stress12_3, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stress12_3, stress12_1, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stress12_2, stress12_4, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stress12_4, stress12_2, halo_info_mask, &
-                              field_loc_center,  field_type_scalar)
-
-         call ice_HaloDestroy(halo_info_mask)
-      else
-         call ice_HaloUpdate_stress(stressp_1, stressp_3, halo_info, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressp_3, stressp_1, halo_info, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressp_2, stressp_4, halo_info, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressp_4, stressp_2, halo_info, &
-                              field_loc_center,  field_type_scalar)
-
-         call ice_HaloUpdate_stress(stressm_1, stressm_3, halo_info, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressm_3, stressm_1, halo_info, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressm_2, stressm_4, halo_info, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stressm_4, stressm_2, halo_info, &
-                              field_loc_center,  field_type_scalar)
-
-         call ice_HaloUpdate_stress(stress12_1, stress12_3, halo_info, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stress12_3, stress12_1, halo_info, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stress12_2, stress12_4, halo_info, &
-                              field_loc_center,  field_type_scalar)
-         call ice_HaloUpdate_stress(stress12_4, stress12_2, halo_info, &
-                              field_loc_center,  field_type_scalar)
-      endif   ! maskhalo
-      endif   ! tripole
-
-      !-----------------------------------------------------------------
-      ! ice-ocean stress
-      !-----------------------------------------------------------------
-
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-
-         call dyn_finish                               & 
-              (nx_block,           ny_block,           & 
-               icellu      (iblk), Cdn_ocn (:,:,iblk), & 
-               indxui    (:,iblk), indxuj    (:,iblk), & 
-               uvel    (:,:,iblk), vvel    (:,:,iblk), & 
-               uocn    (:,:,iblk), vocn    (:,:,iblk), & 
-               aiu     (:,:,iblk), fm      (:,:,iblk), & 
-               strintx (:,:,iblk), strinty (:,:,iblk), &
-               strairx (:,:,iblk), strairy (:,:,iblk), &
-               strocnx (:,:,iblk), strocny (:,:,iblk), & 
-               strocnxT(:,:,iblk), strocnyT(:,:,iblk))
-
-      enddo
-      !$OMP END PARALLEL DO
-
-      call u2tgrid_vector(strocnxT)    ! shift
-      call u2tgrid_vector(strocnyT)
-
-      call ice_timer_stop(timer_dynamics)    ! dynamics
-
-      end subroutine imp_solver
+      end subroutine picard_solver
 
 !=======================================================================
 
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index bfffcc084..4c85eab97 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -100,7 +100,7 @@ subroutine input_data
                                 basalstress, k1, k2, alphab, threshold_hw, &
                                 Ktens, e_ratio, coriolis, ssh_stress, &
                                 kridge, ktransport, brlx, arlx
-      use ice_dyn_vp, only: kmax, precond, im_fgmres, im_pgmres, maxits_fgmres, &
+      use ice_dyn_vp, only: maxits_nonlin, precond, im_fgmres, im_pgmres, maxits_fgmres, &
                             maxits_pgmres, monitor_nonlin, monitor_fgmres, &
                             monitor_pgmres, gammaNL, gamma, epsprecond
       use ice_transport_driver, only: advection, conserv_check
@@ -197,7 +197,7 @@ subroutine input_data
         advection,      coriolis,       kridge,         ktransport,     &
         kstrength,      krdg_partic,    krdg_redist,    mu_rdg,         &
         e_ratio,        Ktens,          Cf,             basalstress,    &
-        k1,             kmax,           precond,        im_fgmres,      &
+        k1,             maxits_nonlin,  precond,        im_fgmres,      &
         im_pgmres,      maxits_fgmres,  maxits_pgmres,  monitor_nonlin, &
         monitor_fgmres, monitor_pgmres, gammaNL,        gamma,          &
         epsprecond,                                                     &
@@ -330,7 +330,7 @@ subroutine input_data
       threshold_hw = 30.0_dbl_kind ! max water depth for grounding
       Ktens = 0.0_dbl_kind   ! T=Ktens*P (tensile strength: see Konig and Holland, 2010)
       e_ratio = 2.0_dbl_kind ! VP ellipse aspect ratio
-      kmax = 1000            ! max nb of iteration for nonlinear solver
+      maxits_nonlin = 1000   ! max nb of iteration for nonlinear solver
       precond = 3            ! preconditioner for fgmres: 1: identity, 2: diagonal 3: pgmres + diag
       im_fgmres = 50         ! size of fgmres Krylov subspace
       im_pgmres = 5          ! size of pgmres Krylov subspace
@@ -647,7 +647,7 @@ subroutine input_data
       call broadcast_scalar(ssh_stress,         master_task)
       call broadcast_scalar(kridge,             master_task)
       call broadcast_scalar(ktransport,         master_task)
-      call broadcast_scalar(kmax,               master_task)
+      call broadcast_scalar(maxits_nonlin,      master_task)
       call broadcast_scalar(precond,            master_task)
       call broadcast_scalar(im_fgmres,          master_task)
       call broadcast_scalar(im_pgmres,          master_task)
@@ -1557,7 +1557,7 @@ subroutine input_data
                                orca_halogrid
 
          if (kdyn == 3) then
-            write(nu_diag,1020) ' kmax                      = ', kmax
+            write(nu_diag,1020) ' maxits_nonlin             = ', maxits_nonlin
             write(nu_diag,1020) ' precond                   = ', precond
             write(nu_diag,1020) ' im_fgmres                 = ', im_fgmres
             write(nu_diag,1020) ' im_pgmres                 = ', im_pgmres
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index f8d116c1c..72d93d842 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -139,7 +139,7 @@
     kridge          = 1
     ktransport      = 1
     ssh_stress      = 'geostrophic'
-    kmax            = 1000
+    maxits_nonlin   = 1000
     precond         = 3
     im_fgmres       = 50
     im_pgmres       = 5

From e33986c5758789c455095bd912fc2b42458fa743 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 21 Feb 2019 15:02:22 -0500
Subject: [PATCH 082/196] ice_dyn_vp: add Anderson acceleration for Picard
 iteration

* Remove nonlinear convergence logic from fgmresD.F90 (move to picard_solver in ice_dyn_vp.F90)
* Add ouput of residual norm to fgmresD.F90
* Remove unused variables (fgmresD.F90, pgmres.F90, ice_dyn_vp.F90)
* Fix warning for format 1008 (ice_init.F90)
* Move FGMRES solver to separate subroutine (ice_dyn_vp.F90)
* Remove unused use statements in picard_solver (ice_dyyn_vp.F90)
---
 cicecore/cicedynB/dynamics/fgmresD.F90    |  20 +-
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 809 ++++++++++++++++++++--
 cicecore/cicedynB/dynamics/pgmres.F90     |   8 +-
 cicecore/cicedynB/general/ice_init.F90    |  36 +-
 configuration/scripts/ice_in              |   7 +-
 5 files changed, 787 insertions(+), 93 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
index ffe11f7ee..5a93e688d 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -1,5 +1,5 @@
       subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
-                  gamma,gammaNL,tolNL,maxits,iout,icode,iconv,its,kOL) 
+                  gamma,maxits,iout,icode,its,ro) 
 
       use ice_fileunits, only: nu_diag
 
@@ -22,9 +22,9 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
 !-----------------------------------------------------------------------
 
       implicit double precision (a-h,o-z) !jfl modification
-      integer n, im, maxits, iout, icode, iconv, kOL, krre
+      integer n, im, maxits, iout, icode
       double precision rhs(*), sol(*), vv(n,im+1),w(n,im)
-      double precision wk1(n), wk2(n), gamma, gammaNL
+      double precision wk1(n), wk2(n), gamma, ro
 !-----------------------------------------------------------------------
 ! flexible GMRES routine. This is a version of GMRES which allows a 
 ! a variable preconditioner. Implemented with a reverse communication 
@@ -114,7 +114,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
 !         a value .ne. 0. 
 !-----------------------------------------------------------------------
 !     local variables -- !jfl modif
-      double precision hh(201,200),c(200),s(200),rs(201),t,ro,ddot,sqrt 
+      double precision hh(201,200),c(200),s(200),rs(201),t,ddot,sqrt 
 !
 !-------------------------------------------------------------
 !     arnoldi size should not exceed 50 in this version..
@@ -152,15 +152,9 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
       if (its .eq. 0) then 
        r0 = ro
        eps1=gamma*ro
-       if (kOL .eq. 1 .and. krre .eq. 1) tolNL=gammaNL*ro
       endif 
       
-      if (ro .lt. tolNL .and. krre .eq. 1) then
-       iconv = 1
-       goto 999
-      endif 
-      
-      if (iout .gt. 0) write(nu_diag, 199) kOL, its, ro!&
+      if (iout .gt. 0) write(nu_diag, 199) its, ro!&
 !     
 !     initialize 1-st term  of rhs of hessenberg system..
 !     
@@ -239,7 +233,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
       hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
       ro = abs(rs(i1))
       if (iout .gt. 1) &
-           write(nu_diag, 199) kOL, its, ro
+           write(nu_diag, 199) its, ro
       if (i .lt. im .and. (ro .gt. eps1))  goto 4
 !     
 !     now compute solution. first solve upper triangular system.
@@ -287,7 +281,7 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
      goto 20
  999  icode = 0
 
- 199  format('monitor_fgmres: iter_nonlin=', i4, ' iter_fmgres=', i4, ' L2norm=', d26.16)
+ 199  format('monitor_fgmres: iter_fmgres=', i4, ' L2norm=', d26.16)
 !     
       return 
 !-----end-of-fgmres----------------------------------------------------- 
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 7d26c33ff..5ca37f092 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -64,7 +64,11 @@ module ice_dyn_vp
          maxits_fgmres  , & ! max nb of iteration for fgmres
          maxits_pgmres  , & ! max nb of iteration for pgmres
          monitor_fgmres , & ! print fgmres residual norm
-         monitor_pgmres     ! print pgmres residual norm
+         monitor_pgmres , & ! print pgmres residual norm
+         algo_nonlin    , & ! nonlinear algorithm: 1: Picard iteration, 2: Anderson acceleration (andacc)
+         fpfunc_andacc  , & ! fixed point function for Anderson acceleration: 1: g(x) = FMGRES(A(x),b(x)), 2: g(x) = x - A(x)x + b(x)
+         im_andacc      , & ! size of Anderson minimization matrix (number of saved previous residuals)
+         start_andacc       ! acceleration delay factor (acceleration starts at this iteration)
 
       logical (kind=log_kind), public :: &
          monitor_nonlin     ! print nonlinear residual norm
@@ -72,7 +76,9 @@ module ice_dyn_vp
       real (kind=dbl_kind), public :: &
          gammaNL        , & ! nonlinear stopping criterion: gammaNL*res(k=0)
          gamma          , & ! fgmres stopping criterion: gamma*res(k)
-         epsprecond         ! pgmres stopping criterion: epsprecond*res(k)
+         epsprecond     , & ! pgmres stopping criterion: epsprecond*res(k)
+         damping_andacc , & ! damping factor for Anderson acceleration
+         reltol_andacc      ! relative tolerance for Anderson acceleration
 
 !=======================================================================
 
@@ -342,8 +348,7 @@ subroutine imp_solver (dt)
       enddo
       ntot = 2*ntot ! times 2 because of u and v
       
-      allocate(bvec(ntot), sol(ntot), diagvec(ntot), wk11(ntot), wk22(ntot))
-      allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
+      allocate(bvec(ntot), sol(ntot), diagvec(ntot))
       
       !-----------------------------------------------------------------
       
@@ -395,24 +400,36 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       ! Start of nonlinear iteration
       !-----------------------------------------------------------------
-
-      call picard_solver (icellt,   icellu,  &
-                          indxti,   indxtj,  &
-                          indxui,   indxuj,  &
-                          fld2,              &
-                          aiu,      ntot,    &
-                          waterx,   watery,  & 
-                          bxfix,    byfix,   &
-                          umassdti, bvec,    & 
-                          sol,      diagvec, &
-                          fpresx,   fpresy,  &
-                          halo_info_mask)
-
+      if (algo_nonlin == 1) then
+         call picard_solver (icellt,   icellu,  &
+                             indxti,   indxtj,  &
+                             indxui,   indxuj,  &
+                             fld2,              &
+                             aiu,      ntot,    &
+                             waterx,   watery,  & 
+                             bxfix,    byfix,   &
+                             umassdti, bvec,    & 
+                             sol,      diagvec, &
+                             fpresx,   fpresy,  &
+                             halo_info_mask)
+      elseif (algo_nonlin == 2) then
+         call anderson_solver (icellt,   icellu,  &
+                               indxti,   indxtj,  &
+                               indxui,   indxuj,  &
+                               fld2,              &
+                               aiu,      ntot,    &
+                               waterx,   watery,  & 
+                               bxfix,    byfix,   &
+                               umassdti, bvec,    & 
+                               sol,      diagvec, &
+                               fpresx,   fpresy,  &
+                               halo_info_mask)
+      endif
       !-----------------------------------------------------------------
       ! End of nonlinear iteration
       !-----------------------------------------------------------------
 
-      deallocate(bvec, sol, diagvec, wk11, wk22, vv, ww)
+      deallocate(bvec, sol, diagvec)
       deallocate(fld2)
       
       !$OMP PARALLEL DO PRIVATE(iblk)
@@ -430,7 +447,8 @@ subroutine imp_solver (dt)
                             rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk))
       enddo
       !$OMP END PARALLEL DO
-
+      ! phb: here we do halo updates for stresses (stressp_i, stressm_i, stress12_i, i=1..4),
+      !      but stresses have not been updated ! (should be done in deformations ?)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
       ! Force symmetry across the tripole seam
@@ -548,26 +566,16 @@ subroutine picard_solver (icellt,   icellu,  &
                                 fpresx,   fpresy,  &
                                 halo_info_mask)
 
-
       use ice_arrays_column, only: Cdn_ocn
       use ice_blocks, only: nx_block, ny_block
-      use ice_boundary, only: ice_halo, ice_HaloMask, ice_HaloUpdate, &
-          ice_HaloDestroy, ice_HaloUpdate_stress
+      use ice_boundary, only: ice_halo, ice_HaloUpdate
       use ice_domain, only: nblocks, halo_info, maskhalo_dyn
       use ice_domain_size, only: max_blocks
-      use ice_flux, only: rdg_conv, rdg_shear, strairxT, strairyT, &
-          strairx, strairy, uocn, vocn, ss_tltx, ss_tlty, iceumask, fm, &
-          strtltx, strtlty, strocnx, strocny, strintx, strinty, taubx, tauby, &
-          strocnxT, strocnyT, strax, stray, &
-          Tbu, hwater, &
-          stressp_1, stressp_2, stressp_3, stressp_4, &
-          stressm_1, stressm_2, stressm_3, stressm_4, &
-          stress12_1, stress12_2, stress12_3, stress12_4
+      use ice_flux, only: uocn, vocn, fm, Tbu
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
           tarear, uarear, tinyarea
       use ice_state, only: uvel, vvel, strength
-      use ice_timers, only: timer_dynamics, timer_bound, &
-          ice_timer_start, ice_timer_stop
+      use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
 
       integer (kind=int_kind), intent(in) :: & 
          ntot         ! size of problem for fgmres (for given cpu)
@@ -614,7 +622,6 @@ subroutine picard_solver (icellt,   icellu,  &
          ischmi         , & ! Quesse ca!?!?! jfl
          its            , & ! iteration nb for fgmres
          fgmres_its     , & ! final nb of fgmres iterations
-         iconvNL        , & ! code for NL convergence criterion (equals 1 when NL convergence is reached)
          ierr               ! code for pgmres preconditioner !phb: needed?
 
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
@@ -648,8 +655,9 @@ subroutine picard_solver (icellt,   icellu,  &
          wk11(:), wk22(:)  ! work vectors for FGMRES
 
       real (kind=dbl_kind) :: & 
-         conv,  & ! ratio of current residual and initial residual for FGMRES !phb: needed?
-         tolNL    ! gammaNL * (current residual) !phb: needed?
+         conv     , & ! ratio of current residual and initial residual for FGMRES !phb: needed for fgmres2
+         tol      , & ! tolerance for nonlinear convergence: gammaNL * initial residual norm
+         res_norm     ! residual norm for FGMRES
 
       character(len=*), parameter :: subname = '(picard_solver)'
 
@@ -657,12 +665,6 @@ subroutine picard_solver (icellt,   icellu,  &
       allocate(wk11(ntot), wk22(ntot))
       allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
 
-      !-----------------------------------------------------------------
-      ! Define a few things for FGMRES and Picard solver
-      !-----------------------------------------------------------------
-      
-      iconvNL=0 ! equals 1 when NL convergence is reached
-
       ! Start iterations
       do kOL = 1,maxits_nonlin        ! outer loop 
 
@@ -677,7 +679,7 @@ subroutine picard_solver (icellt,   icellu,  &
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
             
             call calc_zeta_Pr (nx_block           , ny_block,           &
-                               kOL                , icellt(iblk),       & 
+                               icellt(iblk),                            & 
                                indxti   (:,iblk)  , indxtj(:,iblk),     & 
                                uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
                                dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
@@ -691,7 +693,6 @@ subroutine picard_solver (icellt,   icellu,  &
             call calc_vrel_Cb (nx_block           , ny_block,           &
                                icellu       (iblk), Cdn_ocn (:,:,iblk), & 
                                indxui     (:,iblk), indxuj    (:,iblk), &
-                               kOL                ,                     &
                                aiu      (:,:,iblk), Tbu     (:,:,iblk), &
                                uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                                uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
@@ -701,14 +702,13 @@ subroutine picard_solver (icellt,   icellu,  &
             call calc_bvec (nx_block           , ny_block,           &
                             icellu       (iblk),                     & 
                             indxui     (:,iblk), indxuj    (:,iblk), &
-                            kOL                , Cdn_ocn (:,:,iblk), &
+                            stPrtmp  (:,:,:)   , Cdn_ocn (:,:,iblk), &
                             aiu      (:,:,iblk), uarear  (:,:,iblk), & 
                             uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
                             uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-                            bx       (:,:,iblk), by      (:,:,iblk), &
-                            stPrtmp  (:,:,:))
+                            bx       (:,:,iblk), by      (:,:,iblk))
 
       !     prepare precond matrix
            if (precond .gt. 1) then
@@ -740,9 +740,8 @@ subroutine picard_solver (icellt,   icellu,  &
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
                call matvec (nx_block             , ny_block,            &
-                            icellu   (iblk)      ,                      & 
+                            icellu   (iblk)      , icellt   (iblk)    , & 
                             indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                            kOL                  , icellt   (iblk)    , & 
                             indxti   (:,iblk)    , indxtj   (:,iblk)  , &
                             dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
                             dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
@@ -807,10 +806,8 @@ subroutine picard_solver (icellt,   icellu,  &
       !                     sol_eps, maxits,its,conv,icode )
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-                   gamma, gammaNL, tolNL, maxits_fgmres,monitor_fgmres,   &
-                   icode,iconvNL,fgmres_its,kOL)                     
-
-      if (iconvNL .eq. 1) exit             
+                   gamma, maxits_fgmres, monitor_fgmres,   &
+                   icode,fgmres_its, res_norm)
                    
       if (icode == 1) then
 
@@ -829,7 +826,7 @@ subroutine picard_solver (icellt,   icellu,  &
           call pgmres (nx_block,    ny_block,    nblocks       , &
                        max_blocks         , icellu   (:)       , & 
                        indxui   (:,:)     , indxuj   (:,:)     , &
-                       kOL                , icellt   (:)       , & 
+                       icellt   (:)                            , & 
                        indxti   (:,:)     , indxtj   (:,:)     , &
                        dxt      (:,:,:)   , dyt      (:,:,:)   , & 
                        dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
@@ -878,9 +875,8 @@ subroutine picard_solver (icellt,   icellu,  &
          do iblk = 1, nblocks                                  
          
           call matvec (nx_block             , ny_block,            &
-                       icellu   (iblk)      ,                      & 
+                       icellu   (iblk)      , icellt   (iblk)    , & 
                        indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                       kOL                  , icellt   (iblk)    , & 
                        indxti   (:,iblk)    , indxtj   (:,iblk)  , &
                        dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
                        dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
@@ -907,6 +903,15 @@ subroutine picard_solver (icellt,   icellu,  &
             goto 1
 
       endif ! icode
+      
+      ! Compute relative tolerance at first iteration
+      if (kOL == 1) then
+         tol = gammaNL*res_norm
+      endif
+      ! Check for nonlinear convergence
+      if (res_norm < tol) then
+         exit
+      endif
 
       !-----------------------------------------------------------------------
       !     Put vector sol in uvel and vvel arrays
@@ -979,10 +984,638 @@ end subroutine picard_solver
 
 !=======================================================================
 
+! Solve nonlinear equation using fixed point iteration, accelerated with 
+! Anderson acceleration
+!
+! author: P. Blain ECCC
+
+      subroutine anderson_solver (icellt,   icellu,  &
+                                  indxti,   indxtj,  &
+                                  indxui,   indxuj,  &
+                                  fld2,              &
+                                  aiu,      ntot,    &
+                                  waterx,   watery,  & 
+                                  bxfix,    byfix,   &
+                                  umassdti, bvec,    & 
+                                  sol,      diagvec, &
+                                  fpresx,   fpresy,  &
+                                  halo_info_mask)
+
+      use ice_arrays_column, only: Cdn_ocn
+      use ice_blocks, only: nx_block, ny_block
+      use ice_boundary, only: ice_halo, ice_HaloUpdate
+      use ice_constants, only: c1
+      use ice_domain, only: nblocks, halo_info, maskhalo_dyn
+      use ice_domain_size, only: max_blocks
+      use ice_flux, only:   uocn, vocn, fm, Tbu
+      use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
+          tarear, uarear, tinyarea
+      use ice_state, only: uvel, vvel, strength
+      use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
+
+      integer (kind=int_kind), intent(in) :: & 
+         ntot         ! size of problem for fgmres (for given cpu)
+
+      integer (kind=int_kind), dimension(max_blocks), intent(in) :: & 
+         icellt   , & ! no. of cells where icetmask = 1
+         icellu       ! no. of cells where iceumask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj   , & ! compressed index in j-direction
+         indxui   , & ! compressed index in i-direction
+         indxuj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(inout) :: &
+         fld2        ! work array for boundary updates
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         aiu      , & ! ice fraction on u-grid
+         waterx   , & ! for ocean stress calculation, x (m/s)
+         watery   , & ! for ocean stress calculation, y (m/s)
+         bxfix    , & ! part of bx that is constant during Picard
+         byfix    , & ! part of by that is constant during Picard
+         umassdti     ! mass of U-cell/dte (kg/m^2 s)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
+         fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
+         fpresy       ! y fixed point residual vector, fy = vvel - vprev_k
+
+      real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
+         bvec     , & ! RHS vector for FGMRES
+         sol      , & ! current approximate solution
+         diagvec      ! diagonal of matrix A for preconditioners
+
+      type (ice_halo) :: &
+         halo_info_mask !  ghost cell update info for masked halo
+
+      ! local variables
+
+      integer (kind=int_kind) :: & 
+         it_nl      , & ! nonlinear loop iteration index
+         res_num    , & ! current number of stored residuals
+         j          , & ! iteration index for QR update
+         iblk       , & ! block index
+         icode      , & ! code for fgmres solver
+         ischmi     , & ! Quesse ca!?!?! jfl
+         its        , & ! iteration nb for fgmres
+         fgmres_its , & ! final nb of fgmres iterations
+         iconvNL    , & ! code for NL convergence criterion (equals 1 when NL convergence is reached)
+         ierr           ! code for pgmres preconditioner !phb: needed?
+
+      integer (kind=int_kind), parameter :: &
+         inc = 1        ! increment value for BLAS calls
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         uprev_k  , & ! uvel at previous Picard iteration
+         vprev_k  , & ! vvel at previous Picard iteration
+         vrel     , & ! coeff for tauw 
+         Cb       , & ! seabed stress coeff
+         bx       , & ! b vector
+         by       , & ! b vector
+         Diagu    , & ! Diagonal (u component) of the matrix A
+         Diagv    , & ! Diagonal (v component) of the matrix A
+         Au       , & ! matvec, Fx = Au - bx
+         Av       , & ! matvec, Fy = Av - by
+         Fx       , & ! x residual vector, Fx = Au - bx
+         Fy           ! y residual vector, Fy = Av - by
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
+         zetaD        ! zetaD = 2zeta (viscous coeff)
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
+         stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
+         Dstrtmp
+
+      real (kind=dbl_kind), dimension (max_blocks) :: &
+         L2norm       ! to compute l^2 norm of grid function
+
+      real (kind=dbl_kind), dimension (ntot) :: &
+         res        , & ! current residual
+         res_old    , & ! previous residual
+         res_diff   , & ! difference between current and previous residuals
+         fpfunc     , & ! current value of fixed point function
+         fpfunc_old , & ! previous value of fixed point function
+         Fvec       , & ! (Fx,Fy) (nonlinear residual) as vector
+         tmp            ! temporary vector for BLAS calls
+      
+      real (kind=dbl_kind), dimension(ntot,im_andacc) :: &
+         Q        , & ! Q factor for QR factorization of F (residuals) matrix
+         G_diff       ! Matrix containing the differences of g(x) (fixed point function) evaluations
+      
+      real (kind=dbl_kind), dimension(im_andacc,im_andacc) :: &
+         R            ! R factor for QR factorization of F (residuals) matrix
+      
+      real (kind=dbl_kind), dimension(im_andacc) :: &
+         rhs_tri  , & ! right hand side vector for matrix-vector product
+         coeffs       ! coeffs used to combine previous solutions
+
+      real (kind=dbl_kind) :: & 
+         conv        , & ! ratio of current residual and initial residual for FGMRES !phb: needed for fgmres2
+         tol         , & ! tolerance for fixed point convergence: reltol_andacc * (initial fixed point residual norm)
+         tol_nl      , & ! tolerance for nonlinear convergence: gammaNL * (initial nonlinear residual norm)
+         fpres_norm  , & ! norm of current fixed point residual
+         nlres_norm  , & ! norm of current nonlinear residual
+         ddot, dnrm2     ! BLAS functions
+
+      character(len=*), parameter :: subname = '(anderson_solver)'
+      
+      ! Initialization
+      res_num = 0
+      
+      ! Start iterations
+      do it_nl = 0, maxits_nonlin        ! nonlinear iteration loop 
+         ! Compute quantities needed for computing PDE residual and g(x) (fixed point map)
+         !-----------------------------------------------------------------
+         ! Calc zetaD, Pr, Cb and vrel = f(uprev_k, vprev_k)
+         !-----------------------------------------------------------------
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            
+            uprev_k(:,:,iblk) = uvel(:,:,iblk)
+            vprev_k(:,:,iblk) = vvel(:,:,iblk)
+            
+            call calc_zeta_Pr (nx_block           , ny_block,           &
+                               icellt(iblk),                            & 
+                               indxti   (:,iblk)  , indxtj(:,iblk),     & 
+                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                               dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
+                               dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
+                               cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
+                               cxm      (:,:,iblk), cym   (:,:,iblk),   & 
+                               tarear   (:,:,iblk), tinyarea (:,:,iblk),& 
+                               strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
+                               stPrtmp  (:,:,:) )                      
+            
+            call calc_vrel_Cb (nx_block           , ny_block,           &
+                               icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                               indxui     (:,iblk), indxuj    (:,iblk), &
+                               aiu      (:,:,iblk), Tbu     (:,:,iblk), &
+                               uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
+                               vrel     (:,:,iblk), Cb      (:,:,iblk))
+            
+            ! prepare b vector (RHS)
+            call calc_bvec (nx_block           , ny_block,           &
+                            icellu       (iblk),                     & 
+                            indxui     (:,iblk), indxuj    (:,iblk), &
+                            stPrtmp  (:,:,:)   , Cdn_ocn (:,:,iblk), &
+                            aiu      (:,:,iblk), uarear  (:,:,iblk), & 
+                            uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                            waterx   (:,:,iblk), watery  (:,:,iblk), & 
+                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
+                            bxfix    (:,:,iblk), byfix   (:,:,iblk), &
+                            bx       (:,:,iblk), by      (:,:,iblk))
+            
+            ! Compute nonlinear residual norm (PDE residual)
+            call matvec (nx_block             , ny_block,            &
+                         icellu   (iblk)      , icellt   (iblk)    , &
+                         indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                         indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                         dxt      (:,:,iblk)  , dyt      (:,:,iblk), &
+                         dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), &
+                         cxp      (:,:,iblk)  , cyp      (:,:,iblk), &
+                         cxm      (:,:,iblk)  , cym      (:,:,iblk), &
+                         tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                         uprev_k  (:,:,iblk)  , vprev_k  (:,:,iblk), &
+                         vrel     (:,:,iblk)  , Cb       (:,:,iblk), &
+                         zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                         umassdti (:,:,iblk)  , fm       (:,:,iblk), &
+                         uarear   (:,:,iblk)  ,                      &
+                         Au       (:,:,iblk)  , Av       (:,:,iblk))
+            call residual_vec (nx_block           , ny_block,           &
+                               icellu       (iblk),                     & 
+                               indxui     (:,iblk), indxuj    (:,iblk), &
+                               bx       (:,:,iblk), by      (:,:,iblk), &
+                               Au       (:,:,iblk), Av      (:,:,iblk), &
+                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
+                               L2norm(iblk))
+            nlres_norm = L2norm(iblk) ! phb: change after parallelization
+         enddo
+         !$OMP END PARALLEL DO
+         if (monitor_nonlin) then
+            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
+                                              " nonlin_res_L2norm= ", nlres_norm
+         endif
+         ! Compute relative tolerance at first iteration
+         if (it_nl == 0) then
+            tol_nl = gammaNL*nlres_norm
+         endif
+         
+         ! Check for nonlinear convergence
+         if (nlres_norm < tol_nl) then
+            exit
+         endif
+         
+         ! Form b vector from matrices (nblocks matrices)
+         call arrays_to_vec (nx_block, ny_block, nblocks,    &
+                             max_blocks, icellu (:), ntot,   &
+                             indxui      (:,:), indxuj(:,:), &
+                             bx        (:,:,:), by  (:,:,:), &
+                             bvec(:))
+         ! Form sol vector for fgmres (sol is iniguess at the beginning)
+         call arrays_to_vec (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     &
+                             indxui    (:,:), indxuj(:,:),     &
+                             uprev_k (:,:,:), vprev_k (:,:,:), &
+                             sol(:))
+         ! Compute fixed point map g(x)
+         if (fpfunc_andacc == 1) then
+            ! g_1(x) = FGMRES(A(x), b(x))
+            
+            ! Prepare precond matrix
+            if (precond .gt. 1) then
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  call formDiag_step1 (nx_block           , ny_block,       & ! D term due to rheology
+                                       icellu       (iblk),                 &
+                                       indxui     (:,iblk), indxuj(:,iblk), &
+                                       dxt      (:,:,iblk), dyt (:,:,iblk), & 
+                                       dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
+                                       cxp      (:,:,iblk), cyp (:,:,iblk), & 
+                                       cxm      (:,:,iblk), cym (:,:,iblk), & 
+                                       zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
+                  call formDiag_step2 (nx_block           , ny_block,           &
+                                       icellu       (iblk),                     & 
+                                       indxui     (:,iblk), indxuj    (:,iblk), &
+                                       Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
+                                       umassdti (:,:,iblk),                     & 
+                                       uarear   (:,:,iblk), Cb      (:,:,iblk), & 
+                                       Diagu    (:,:,iblk), Diagv   (:,:,iblk))
+               enddo
+               !$OMP END PARALLEL DO
+               ! Form matrix diagonal as a vector from Diagu and Diagv arrays      
+               call arrays_to_vec (nx_block, ny_block, nblocks,    &
+                                   max_blocks, icellu (:), ntot,   & 
+                                   indxui      (:,:), indxuj(:,:), &
+                                   Diagu     (:,:,:), Diagv(:,:,:),&
+                                   diagvec(:))
+            endif
+            
+            ! FGMRES linear solver (solution is in fpfunc)
+            fpfunc = sol
+            call fgmres_solver (ntot,   bvec,     &
+                                fpfunc, diagvec,  &
+                                icellt, icellu,   &
+                                indxti, indxtj,   &
+                                indxui, indxuj,   &
+                                zetaD,            &
+                                Cb,     vrel,     &
+                                aiu,    umassdti, & 
+                                fld2)
+         elseif (fpfunc_andacc == 2) then
+            ! g_2(x) = x - A(x)x + b(x) = x - F(x)
+         endif
+
+         ! Compute residual
+         res = fpfunc - sol
+         fpres_norm = dnrm2(size(res), res, inc)
+         ! if (monitor_nonlin) then
+         !    write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
+         !                                      " fixed_point_res_L2norm= ", fpres_norm
+         ! endif
+         
+         ! Store initial residual norm
+         if (it_nl == 0) then
+            tol = reltol_andacc*fpres_norm
+         endif
+
+         ! Check residual
+         if (fpres_norm < tol) then
+            exit
+         endif
+
+         if (im_andacc == 0 .or. it_nl < start_andacc) then 
+            ! Simple fixed point (Picard) iteration in this case
+            sol = fpfunc
+         else
+            ! Begin Anderson acceleration
+            if (it_nl > start_andacc) then
+               ! Update residual difference vector
+               res_diff = res - res_old
+               ! Update fixed point function difference matrix
+               if (res_num < im_andacc) then
+                  ! Add column
+                  G_diff(:,res_num+1) = fpfunc - fpfunc_old
+               else
+                  ! Delete first column and add column
+                  G_diff(:,1:res_num-1) = G_diff(:,2:res_num)
+                  G_diff(:,res_num) = fpfunc - fpfunc_old
+               endif
+               res_num = res_num + 1
+            endif
+            res_old = res
+            fpfunc_old = fpfunc
+            if (res_num == 0) then
+               sol = fpfunc
+            else
+               if (res_num == 1) then
+                  ! Initialize QR factorization
+                  R(1,1) = dnrm2(size(res_diff), res_diff, inc)
+                  Q(:,1) = res_diff/R(1,1)
+               else
+                  if (res_num > im_andacc) then
+                     ! Update factorization since 1st column was deleted
+                     call qr_delete(Q,R)
+                     res_num = res_num - 1
+                  endif
+                  ! Update QR factorization for new column
+                  do j = 1, res_num - 1
+                     R(j,res_num) = ddot(ntot, Q(:,j), inc, res_diff, inc)
+                     res_diff = res_diff - R(j,res_num) * Q(:,j)
+                  enddo
+                  R(res_num, res_num) = dnrm2(size(res_diff) ,res_diff, inc)
+                  Q(:,res_num) = res_diff / R(res_num, res_num)
+               endif
+               ! phb: here, drop more columns to improve conditioning
+               ! if (droptol) then
+               
+               ! endif
+               ! Solve least square problem for coefficients
+               ! 1. Compute rhs_tri = Q^T * res
+               call dgemv ('t', size(Q,1), res_num, c1, Q(:,1:res_num), size(Q,1), res, inc, c0, rhs_tri, inc)
+               ! 2. Solve R*coeffs = rhs_tri, puts result in rhs_tri
+               call dtrsv ('u', 'n', 'n', res_num, R(1:res_num,1:res_num), res_num, rhs_tri, inc)
+               coeffs = rhs_tri
+               ! Update approximate solution: x = fpfunc - G_diff*coeffs, puts result in fpfunc
+               call dgemv ('n', size(G_diff,1), res_num, -c1, G_diff(:,1:res_num), size(G_diff,1), coeffs, inc, c1, fpfunc, inc)
+               sol = fpfunc
+               ! Apply damping
+               if (damping_andacc > 0 .and. damping_andacc /= 1) then
+                  ! x = x - (1-beta) (res - Q*R*coeffs)
+                  
+                  ! tmp = R*coeffs
+                  call dgemv ('n', res_num, res_num, c1, R(1:res_num,1:res_num), res_num, coeffs, inc, c0, tmp, inc)
+                  ! res = res - Q*tmp
+                  call dgemv ('n', size(Q,1), res_num, -c1, Q(:,1:res_num), size(Q,1), tmp, inc, c1, res, inc)
+                  ! x = x - (1-beta)*res
+                  sol = sol - (1-damping_andacc)*res
+               endif
+            endif
+         endif
+         
+         !-----------------------------------------------------------------------
+         !     Put vector sol in uvel and vvel arrays
+         !-----------------------------------------------------------------------
+         call vec_to_arrays (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             sol (:),                          &
+                             uvel (:,:,:), vvel (:,:,:))
+         ! Load velocity into array for boundary updates
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            fld2(:,:,1,iblk) = uvel(:,:,iblk)
+            fld2(:,:,2,iblk) = vvel(:,:,iblk)
+         enddo
+         !$OMP END PARALLEL DO
+
+         call ice_timer_start(timer_bound)
+         if (maskhalo_dyn) then
+            call ice_HaloUpdate (fld2,               halo_info_mask, &
+                                 field_loc_NEcorner, field_type_vector)
+         else
+            call ice_HaloUpdate (fld2,               halo_info, &
+                                 field_loc_NEcorner, field_type_vector)
+         endif
+         call ice_timer_stop(timer_bound)
+
+         ! Unload
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            uvel(:,:,iblk) = fld2(:,:,1,iblk)
+            vvel(:,:,iblk) = fld2(:,:,2,iblk)
+         enddo
+         !$OMP END PARALLEL DO
+         
+         ! Compute fixed point residual norm
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
+            fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
+            call calc_L2norm (nx_block        , ny_block,         &
+                              icellu    (iblk),                   & 
+                              indxui  (:,iblk), indxuj  (:,iblk), &
+                              fpresx(:,:,iblk), fpresy(:,:,iblk), &
+                              L2norm    (iblk))
+            if (monitor_nonlin) then
+               write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
+                                                 " fixed_point_res_L2norm= ", L2norm
+            endif
+         enddo
+         !$OMP END PARALLEL DO
+         
+      enddo ! nonlinear iteration loop
+      
+      end subroutine anderson_solver
+
+!=======================================================================
+
+! Driver for the FGMRES linear solver
+
+      subroutine fgmres_solver (ntot,   bvec,     &
+                                sol,    diagvec,  &
+                                icellt, icellu,   &
+                                indxti, indxtj,   &
+                                indxui, indxuj,   &
+                                zetaD,            &
+                                Cb,     vrel,     &
+                                aiu,    umassdti, & 
+                                fld2)
+
+      use ice_blocks, only: nx_block, ny_block
+      use ice_boundary, only: ice_HaloUpdate
+      use ice_domain, only: nblocks, halo_info, maskhalo_dyn
+      use ice_domain_size, only: max_blocks
+      use ice_flux, only:   uocn, vocn, fm, Tbu
+      use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
+          tarear, uarear, tinyarea
+      use ice_state, only: uvel, vvel, strength
+      
+      integer (kind=int_kind), intent(in) :: & 
+         ntot         ! size of problem for fgmres (for given cpu)
+      
+      real (kind=dbl_kind), dimension (ntot), intent(in) :: &
+         bvec     , & ! RHS vector for FGMRES
+         diagvec      ! diagonal of matrix A for preconditioners
+      
+      real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
+         sol          ! solution vector for FGMRES
+      
+      integer (kind=int_kind), dimension(max_blocks), intent(in) :: & 
+         icellt   , & ! no. of cells where icetmask = 1
+         icellu       ! no. of cells where iceumask = 1
+      
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj   , & ! compressed index in j-direction
+         indxui   , & ! compressed index in i-direction
+         indxuj       ! compressed index in j-direction
+      
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         vrel     , & ! coeff for tauw 
+         Cb       , & ! seabed stress coeff
+         aiu      , & ! ice fraction on u-grid
+         umassdti     ! mass of U-cell/dte (kg/m^2 s)
+      
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
+         zetaD      ! zetaD = 2zeta (viscous coeff)
+      
+      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(inout) :: &
+         fld2        ! work array for boundary updates
+      
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         iblk           , & ! block index
+         icode          , & ! code for fgmres solver
+         its            , & ! iteration nb for fgmres
+         fgmres_its     , & ! final nb of fgmres iterations
+         ierr               ! code for pgmres preconditioner !phb: needed?
+      
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         Au       , & ! matvec, Fx = Au - bx
+         Av           ! matvec, Fy = Av - by
+      
+      real (kind=dbl_kind), allocatable :: & 
+         vv(:,:), ww(:,:)  ! work arrays for FGMRES
+
+      real (kind=dbl_kind), allocatable :: &
+         wk11(:), wk22(:)  ! work vectors for FGMRES
+         
+      real (kind=dbl_kind) :: &
+         res_norm          ! residual norm for FGMRES
+      
+      character(len=*), parameter :: subname = '(fgmres_solver)'
+
+      ! Allocate space for FGMRES work arrays
+      allocate(wk11(ntot), wk22(ntot))
+      allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
+      
+      !-----------------------------------------------------------------------
+      !     prep F G M R E S 
+      !-----------------------------------------------------------------------
+         
+      icode  = 0
+      
+      !-----------------------------------------------------------------------
+      !     F G M R E S   L O O P
+      !-----------------------------------------------------------------------
+      1    continue
+      !-----------------------------------------------------------------------
+
+      !call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
+      !                     sol_eps, maxits,its,conv,icode )
+                           
+      call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
+                   gamma, maxits_fgmres,monitor_fgmres, &
+                   icode, fgmres_its, res_norm)
+
+      if (icode == 1) then
+
+         if (precond .eq. 1) then
+
+           wk22(:)=wk11(:) ! precond=identity
+           
+         elseif (precond .eq. 2) then ! use diagonal of A for precond step
+          
+           call precond_diag (ntot,            & 
+                              diagvec (:),     &
+                              wk11 (:), wk22 (:) )
+                              
+         elseif (precond .eq. 3) then
+         
+          call pgmres (nx_block,    ny_block,    nblocks       , &
+                       max_blocks         , icellu   (:)       , & 
+                       indxui   (:,:)     , indxuj   (:,:)     , &
+                       icellt   (:)       ,                      & 
+                       indxti   (:,:)     , indxtj   (:,:)     , &
+                       dxt      (:,:,:)   , dyt      (:,:,:)   , & 
+                       dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
+                       cxp      (:,:,:)   , cyp      (:,:,:)   , & 
+                       cxm      (:,:,:)   , cym      (:,:,:)   , & 
+                       tarear   (:,:,:)   , tinyarea (:,:,:)   , &
+                       vrel     (:,:,:)   , Cb       (:,:,:)   , &  
+                       zetaD    (:,:,:,:) , aiu      (:,:,:)   , &
+                       umassdti (:,:,:)   , fm       (:,:,:)   , & 
+                       uarear   (:,:,:)   , diagvec(:)         , &
+                       wk22     (:)       , wk11(:)            , &
+                       ntot               , im_pgmres          , &
+                       epsprecond         , maxits_pgmres      , &
+                       monitor_pgmres     , ierr )         
+         endif ! precond
+         
+         goto 1
+
+      elseif (icode >= 2) then
+
+         call vec_to_arrays (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             wk11 (:),                         &
+                             uvel (:,:,:), vvel (:,:,:))    
+                             
+         ! JFL halo update could be in subroutine...                    
+         !$OMP PARALLEL DO PRIVATE(iblk) 
+         do iblk = 1, nblocks                             
+            fld2(:,:,1,iblk) = uvel(:,:,iblk)
+            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
+         enddo
+         !$OMP END PARALLEL DO                           
+
+         call ice_HaloUpdate (fld2,               halo_info, & 
+                              field_loc_NEcorner, field_type_vector)
+
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            uvel(:,:,iblk) = fld2(:,:,1,iblk)
+            vvel(:,:,iblk) = fld2(:,:,2,iblk)
+         enddo
+         !$OMP END PARALLEL DO                             
+
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks                                  
+         
+          call matvec (nx_block             , ny_block,            &
+                       icellu   (iblk)      , icellt   (iblk)    , &
+                       indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                       indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                       dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+                       dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+                       cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+                       cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
+                       tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                       uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
+                       vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+                       zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                       umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+                       uarear   (:,:,iblk)  ,                      & 
+                       Au       (:,:,iblk)  , Av       (:,:,iblk))                         
+                         
+         enddo
+         !$OMP END PARALLEL DO 
+       
+         ! form wk2 from Au and Av arrays        
+         call arrays_to_vec (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             Au      (:,:,:), Av    (:,:,:),   &
+                             wk22(:))    
+
+            goto 1
+
+      endif ! icode
+      
+      deallocate(wk11, wk22, vv, ww)
+         
+      end subroutine fgmres_solver
+
+!=======================================================================
+
 ! Computes the viscous coefficients (in fact zetaD=2*zeta) and dPr/dx. 
 
       subroutine calc_zeta_Pr  (nx_block,   ny_block,   & 
-                                kOL,        icellt,     & 
+                                icellt,                 & 
                                 indxti,     indxtj,     & 
                                 uvel,       vvel,       & 
                                 dxt,        dyt,        & 
@@ -995,7 +1628,6 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
 
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
-         kOL               , & ! subcycling step
          icellt                ! no. of cells where icetmask = 1
 
       integer (kind=int_kind), dimension (nx_block*ny_block), & 
@@ -1825,7 +2457,6 @@ end subroutine deformations
       subroutine calc_vrel_Cb (nx_block,   ny_block, &
                                icellu,     Cw,       &
                                indxui,     indxuj,   &
-                               kOL,                  &
                                aiu,        Tbu,      &
                                uocn,       vocn,     &
                                uvel,       vvel,     &
@@ -1833,8 +2464,7 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu,             & ! total count when iceumask is true
-         kOL                   ! outer loop iteration
+         icellu                ! total count when iceumask is true
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -1989,9 +2619,8 @@ end subroutine matvecOLD
 !=======================================================================
 
       subroutine matvec (nx_block,   ny_block, &
-                         icellu,               &
+                         icellu,     icellt ,  &
                          indxui,     indxuj,   &
-                         kOL,        icellt,   &
                          indxti,     indxtj,   &
                          dxt,        dyt,      & 
                          dxhy,       dyhx,     & 
@@ -2008,7 +2637,6 @@ subroutine matvec (nx_block,   ny_block, &
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellu,             & ! total count when iceumask is true
-         kOL,                & ! outer loop iteration
          icellt                ! no. of cells where icetmask = 1
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
@@ -2349,19 +2977,17 @@ end subroutine calc_bfix
       subroutine calc_bvec (nx_block,   ny_block, &
                        icellu,               &
                        indxui,     indxuj,   &
-                       kOL,        Cw,       &
+                       stPr,       Cw,       &
                        aiu,        uarear,   &
                        uocn,       vocn,     &
                        waterx,     watery,   &
                        uvel,       vvel,     &
                        bxfix,      byfix,    &
-                       bx,         by,       &
-                       stPr)
+                       bx,         by)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu,             & ! total count when iceumask is true
-         kOL                   ! outer loop iteration
+         icellu                ! total count when iceumask is true
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -2470,7 +3096,7 @@ subroutine residual_vec (nx_block,   ny_block, &
          Fx      , & ! x residual vector, Fx = Au - bx (N/m^2)
          Fy          ! y residual vector, Fy = Av - by (N/m^2)
          
-      real (kind=dbl_kind), intent(inout) :: &
+      real (kind=dbl_kind), intent(out) :: &
          L2norm      ! L2norm of residual vector
       
       integer (kind=int_kind) :: &
@@ -3151,6 +3777,51 @@ end subroutine vec_to_arrays
       
 !      JFL ROUTINE POUR CALC STRESS OCN POUR COUPLAGE
       
+!=======================================================================
+
+! Update Q and R factor after deletion of the 1st column of G_diff
+!
+! author: P. Blain ECCC
+      subroutine qr_delete(Q, R)
+      
+      real (kind=dbl_kind), intent(inout) :: &
+         Q(:,:),  & ! Q factor
+         R(:,:)     ! R factor
+      
+      ! local variables
+      
+      integer (kind=int_kind) :: &
+         i, j, k, & ! loop indices
+         m, n       ! size of Q matrix
+         
+      real (kind=dbl_kind) :: &
+         temp, c, s
+               
+      n = size(Q,1)
+      m = size(Q,2)
+      do i = 1, m-1
+         temp = sqrt(R(i,i+1)**2 + R(i+1,i+1)**2)
+         c = R(i,i+1)/temp
+         s = R(i+1,i+1)/temp
+         R(i,i+1) = temp
+         R(i+1,i+1) = 0
+         if (i < m-1) then
+            do j = i+2, m
+               temp = c*R(i,j) + s*R(i+1,j)
+               R(i+1,j) = -s*R(i,j) + c*R(i+1,j)
+               R(i,j) = temp
+            enddo
+         endif
+         do k = 1, n
+            temp = c*Q(k,i) + s*Q(k,i+1);
+            Q(k,i+1) = -s*Q(k,i) + c*Q(k,i+1);
+            Q(k,i) = temp
+         enddo
+      enddo
+      R(:,1:m-1) = R(:,2:m)
+      
+      end subroutine qr_delete
+
 !=======================================================================
 
       end module ice_dyn_vp
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
index 2eaa6d01a..073c2a5ce 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -5,7 +5,7 @@
        subroutine pgmres(nx_block, ny_block, nblocks, &
                          max_blocks, icellu,          &
                          indxui,     indxuj,          &
-                         kOL,        icellt,          &
+                         icellt,                      &
                          indxti,     indxtj,          &
                          dxt,        dyt,             & 
                          dxhy,       dyhx,            & 
@@ -36,8 +36,7 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          nblocks,            & ! nb of blocks
-         max_blocks,         & ! max nb of blocks
-         kOL                   ! outer loop iteration
+         max_blocks            ! max nb of blocks
 
          
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
@@ -168,9 +167,8 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
        !$OMP PARALLEL DO PRIVATE(iblk)
        do iblk = 1, nblocks                          
         call matvec (nx_block             , ny_block,            &
-                     icellu   (iblk)      ,                      & 
+                     icellu   (iblk)      , icellt   (iblk)    , & 
                      indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                     kOL                  , icellt   (iblk)    , & 
                      indxti   (:,iblk)    , indxtj   (:,iblk)  , &
                      dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
                      dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index 4c85eab97..512b37220 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -102,7 +102,9 @@ subroutine input_data
                                 kridge, ktransport, brlx, arlx
       use ice_dyn_vp, only: maxits_nonlin, precond, im_fgmres, im_pgmres, maxits_fgmres, &
                             maxits_pgmres, monitor_nonlin, monitor_fgmres, &
-                            monitor_pgmres, gammaNL, gamma, epsprecond
+                            monitor_pgmres, gammaNL, gamma, epsprecond, &
+                            algo_nonlin, fpfunc_andacc, im_andacc, reltol_andacc, &
+                            damping_andacc, start_andacc, use_mean_vrel
       use ice_transport_driver, only: advection, conserv_check
       use ice_restoring, only: restore_ice
 #ifdef CESMCOUPLED
@@ -200,7 +202,8 @@ subroutine input_data
         k1,             maxits_nonlin,  precond,        im_fgmres,      &
         im_pgmres,      maxits_fgmres,  maxits_pgmres,  monitor_nonlin, &
         monitor_fgmres, monitor_pgmres, gammaNL,        gamma,          &
-        epsprecond,                                                     &
+        epsprecond,     algo_nonlin,    im_andacc,      reltol_andacc,  &
+        damping_andacc, start_andacc,   fpfunc_andacc,  use_mean_vrel,  &
         k2,             alphab,         threshold_hw,                   &
         Pstar,          Cstar
 
@@ -342,6 +345,13 @@ subroutine input_data
       gammaNL = 1e-8_dbl_kind    ! nonlinear stopping criterion: gammaNL*res(k=0)
       gamma = 1e-2_dbl_kind      ! fgmres stopping criterion: gamma*res(k)
       epsprecond = 1e-6_dbl_kind ! pgmres stopping criterion: epsprecond*res(k)
+      algo_nonlin = 1        ! nonlinear algorithm: 1: Picard iteration, 2: Anderson acceleration (andacc)
+      fpfunc_andacc = 1      ! fixed point function for Anderson acceleration: 1: g(x) = FMGRES(A(x),b(x)), 2: g(x) = x - A(x)x + b(x)
+      im_andacc = 5          ! size of Anderson minimization matrix (number of saved previous residuals)
+      reltol_andacc = 1e-6_dbl_kind  ! relative tolerance for Anderson acceleration
+      damping_andacc = 0     ! damping factor for Anderson acceleration
+      start_andacc = 0       ! acceleration delay factor (acceleration starts at this iteration)
+      use_mean_vrel = .false. ! use mean of previous 2 iterates to compute vrel
       advection  = 'remap'   ! incremental remapping transport scheme
       conserv_check = .false.! tracer conservation check
       shortwave = 'ccsm3'    ! 'ccsm3' or 'dEdd' (delta-Eddington)
@@ -653,12 +663,19 @@ subroutine input_data
       call broadcast_scalar(im_pgmres,          master_task)
       call broadcast_scalar(maxits_fgmres,      master_task)
       call broadcast_scalar(maxits_pgmres,      master_task)
-      call broadcast_scalar(monitor_nonlin,     master_task)      
+      call broadcast_scalar(monitor_nonlin,     master_task)
       call broadcast_scalar(monitor_fgmres,     master_task)
       call broadcast_scalar(monitor_pgmres,     master_task)
-      call broadcast_scalar(gammaNL,            master_task)      
+      call broadcast_scalar(gammaNL,            master_task)
       call broadcast_scalar(gamma,              master_task)
       call broadcast_scalar(epsprecond,         master_task)
+      call broadcast_scalar(algo_nonlin,        master_task)
+      call broadcast_scalar(fpfunc_andacc,      master_task)
+      call broadcast_scalar(im_andacc,          master_task)
+      call broadcast_scalar(reltol_andacc,      master_task)
+      call broadcast_scalar(damping_andacc,     master_task)
+      call broadcast_scalar(start_andacc,       master_task)
+      call broadcast_scalar(use_mean_vrel,      master_task)
       call broadcast_scalar(conduct,            master_task)
       call broadcast_scalar(R_ice,              master_task)
       call broadcast_scalar(R_pnd,              master_task)
@@ -1569,6 +1586,15 @@ subroutine input_data
             write(nu_diag,1008) ' gammaNL                   = ', gammaNL
             write(nu_diag,1008) ' gamma                     = ', gamma
             write(nu_diag,1008) ' epsprecond                = ', epsprecond
+            write(nu_diag,1020) ' algo_nonlin               = ', algo_nonlin
+            write(nu_diag,1010) ' use_mean_vrel             = ', use_mean_vrel
+            if (algo_nonlin == 2) then
+               write(nu_diag,1020) ' fpfunc_andacc          = ', fpfunc_andacc
+               write(nu_diag,1020) ' im_andacc              = ', im_andacc
+               write(nu_diag,1008) ' reltol_andacc          = ', reltol_andacc
+               write(nu_diag,1005) ' damping_andacc         = ', damping_andacc
+               write(nu_diag,1020) ' start_andacc           = ', start_andacc
+            endif
          endif
 
          write(nu_diag,1010) ' conserv_check             = ', conserv_check
@@ -1722,7 +1748,7 @@ subroutine input_data
  1005    format (a30,2x,f12.6) ! float
  1006    format (a20,2x,f10.6,a)
  1007    format (a20,2x,f6.2,a)
- 1008    format (a30,2x,d12.6)  ! float, exponential notation
+ 1008    format (a30,2x,d13.6)  ! float, exponential notation
  1009    format (a20,2x,d13.6,a)  ! float, exponential notation
  1010    format (a30,2x,l6)    ! logical
  1012    format (a20,2x,l3,1x,a)  ! logical
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index 72d93d842..871a7bda4 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -151,7 +151,12 @@
     gammaNL         = 1e-8
     gamma           = 1e-2
     epsprecond      = 1e-6
-
+    algo_nonlin     = 1
+    fpfunc_andacc   = 1
+    im_andacc       = 5
+    reltol_andacc   = 1e-6
+    damping_andacc  = 0
+    start_andacc    = 0
 /
 
 &shortwave_nml

From 03a152ef3c54952f69555d92dccf253d078c03d0 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 3 May 2019 14:16:29 -0400
Subject: [PATCH 083/196] options: add options files for implicit solver

- diag_imp sets all monitoring options (nonlin, {f,p}gmres) to on
- dynanderson sets the implicit solver to on and the nonlinear algorithm
  to Anderson acceleration
- dynpicard sets the implicit solver to 'on' and the nonlinear algorithm
  to Picard iteration
- run3dt runs the model for three time steps
---
 configuration/scripts/options/set_nml.diagimp     | 3 +++
 configuration/scripts/options/set_nml.dynanderson | 3 +++
 configuration/scripts/options/set_nml.dynpicard   | 3 +++
 configuration/scripts/options/set_nml.run3dt      | 6 ++++++
 4 files changed, 15 insertions(+)
 create mode 100644 configuration/scripts/options/set_nml.diagimp
 create mode 100644 configuration/scripts/options/set_nml.dynanderson
 create mode 100644 configuration/scripts/options/set_nml.dynpicard
 create mode 100644 configuration/scripts/options/set_nml.run3dt

diff --git a/configuration/scripts/options/set_nml.diagimp b/configuration/scripts/options/set_nml.diagimp
new file mode 100644
index 000000000..ea875b0cc
--- /dev/null
+++ b/configuration/scripts/options/set_nml.diagimp
@@ -0,0 +1,3 @@
+monitor_nonlin  = .true.
+monitor_fgmres  = 2
+monitor_pgmres  = 1
diff --git a/configuration/scripts/options/set_nml.dynanderson b/configuration/scripts/options/set_nml.dynanderson
new file mode 100644
index 000000000..91a7ab367
--- /dev/null
+++ b/configuration/scripts/options/set_nml.dynanderson
@@ -0,0 +1,3 @@
+kdyn            = 3
+algo_nonlin     = 2
+maxits_nonlin   = 5000
diff --git a/configuration/scripts/options/set_nml.dynpicard b/configuration/scripts/options/set_nml.dynpicard
new file mode 100644
index 000000000..a4453e251
--- /dev/null
+++ b/configuration/scripts/options/set_nml.dynpicard
@@ -0,0 +1,3 @@
+kdyn            = 3
+algo_nonlin     = 1
+maxits_nonlin   = 5000
diff --git a/configuration/scripts/options/set_nml.run3dt b/configuration/scripts/options/set_nml.run3dt
new file mode 100644
index 000000000..102a19d80
--- /dev/null
+++ b/configuration/scripts/options/set_nml.run3dt
@@ -0,0 +1,6 @@
+npt             = 3
+dump_last       = .true.
+histfreq        = '1','x','x','x','x'
+hist_avg        = .false.
+f_uvel          = '1' 
+f_vvel          = '1' 

From 65c80126268b628956697f0f17365fd7fe47f667 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 4 Mar 2019 14:51:22 -0500
Subject: [PATCH 084/196] ice_dyn_vp: correct residual computations if
 max_blocks > 1

The way we compute the nonlinear residual norm does not take into
account that `L2norm` is an array of size `max_blocks`.

Fix that by summing the components, squared, and taking the square root
(we need to square the components because the subroutine 'calc_L2norm'
returns the square root).
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 5ca37f092..8f26257a1 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -764,7 +764,7 @@ subroutine picard_solver (icellt,   icellu,  &
             enddo
             !$OMP END PARALLEL DO
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-                                              " nonlin_res_L2norm= ", L2norm
+                                              " nonlin_res_L2norm= ", sqrt(sum(L2norm**2))
          endif
 
 
@@ -968,12 +968,12 @@ subroutine picard_solver (icellt,   icellu,  &
                               indxui  (:,iblk), indxuj  (:,iblk), &
                               fpresx(:,:,iblk), fpresy(:,:,iblk), &
                               L2norm    (iblk))
-            if (monitor_nonlin) then
-               write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-                                                 " fixed_point_res_L2norm= ", L2norm
-            endif
          enddo
          !$OMP END PARALLEL DO
+         if (monitor_nonlin) then
+            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
+                                              " fixed_point_res_L2norm= ", sqrt(sum(L2norm**2))
+         endif
          
       enddo                     ! outer loop
 
@@ -1190,9 +1190,9 @@ subroutine anderson_solver (icellt,   icellu,  &
                                Au       (:,:,iblk), Av      (:,:,iblk), &
                                Fx       (:,:,iblk), Fy      (:,:,iblk), &
                                L2norm(iblk))
-            nlres_norm = L2norm(iblk) ! phb: change after parallelization
          enddo
          !$OMP END PARALLEL DO
+         nlres_norm = sqrt(sum(L2norm**2))  ! phb: change after parallelization
          if (monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
                                               " nonlin_res_L2norm= ", nlres_norm

From 7d2c6d435ada3e851c7f389d460d0477f81d32e9 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 4 Mar 2019 14:55:00 -0500
Subject: [PATCH 085/196] ice_dyn_vp: differentiate fixed point and progress
 residuals

For Anderson acceleration, the fixed point residual (g(x) - x) is not
the same as the 'progress residual' (u-u_old).

Make this distinction clear in the code and in the monitoring output.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 39 +++++++++++++++++------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 8f26257a1..bbcd2debc 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1114,8 +1114,9 @@ subroutine anderson_solver (icellt,   icellu,  &
          conv        , & ! ratio of current residual and initial residual for FGMRES !phb: needed for fgmres2
          tol         , & ! tolerance for fixed point convergence: reltol_andacc * (initial fixed point residual norm)
          tol_nl      , & ! tolerance for nonlinear convergence: gammaNL * (initial nonlinear residual norm)
-         fpres_norm  , & ! norm of current fixed point residual
-         nlres_norm  , & ! norm of current nonlinear residual
+         fpres_norm  , & ! norm of current fixed point residual : f(x) = g(x) - x
+         prog_norm   , & ! norm of difference between current and previous solution
+         nlres_norm  , & ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
          ddot, dnrm2     ! BLAS functions
 
       character(len=*), parameter :: subname = '(anderson_solver)'
@@ -1270,10 +1271,28 @@ subroutine anderson_solver (icellt,   icellu,  &
          ! Compute residual
          res = fpfunc - sol
          fpres_norm = dnrm2(size(res), res, inc)
-         ! if (monitor_nonlin) then
-         !    write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
-         !                                      " fixed_point_res_L2norm= ", fpres_norm
-         ! endif
+         if (monitor_nonlin) then
+            ! commented code is to compare fixed_point_res_L2norm BFB with progress_res_L2norm
+            ! (should be BFB if Picard iteration is used)
+            ! call vec_to_arrays (nx_block, ny_block, nblocks,      &
+            !                     max_blocks, icellu (:), ntot,     & 
+            !                     indxui    (:,:), indxuj(:,:),     &
+            !                     res (:),                          &
+            !                     fpresx (:,:,:), fpresy (:,:,:))
+            ! !$OMP PARALLEL DO PRIVATE(iblk)
+            ! do iblk = 1, nblocks
+            ! call calc_L2norm (nx_block        , ny_block,         &
+            !                   icellu    (iblk),                   & 
+            !                   indxui  (:,iblk), indxuj  (:,iblk), &
+            !                   fpresx(:,:,iblk), fpresy(:,:,iblk), &
+            !                   L2norm    (iblk))
+            ! enddo
+            ! !$OMP END PARALLEL DO
+            ! write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
+            !                                   " fixed_point_res_L2norm= ", sqrt(sum(L2norm**2))
+            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
+                                              " fixed_point_res_L2norm= ", fpres_norm
+         endif
          
          ! Store initial residual norm
          if (it_nl == 0) then
@@ -1398,12 +1417,12 @@ subroutine anderson_solver (icellt,   icellu,  &
                               indxui  (:,iblk), indxuj  (:,iblk), &
                               fpresx(:,:,iblk), fpresy(:,:,iblk), &
                               L2norm    (iblk))
-            if (monitor_nonlin) then
-               write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
-                                                 " fixed_point_res_L2norm= ", L2norm
-            endif
          enddo
          !$OMP END PARALLEL DO
+         if (monitor_nonlin) then
+            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
+                                              " progress_res_L2norm= ", sqrt(sum(L2norm**2))
+         endif
          
       enddo ! nonlinear iteration loop
       

From 779cf43a817f647aceb33fb60581cda349c87727 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 7 Mar 2019 09:35:15 -0500
Subject: [PATCH 086/196] ice_dyn_vp: fix convergence issues in Picard and
 Anderson solvers

Both solvers currently converge, but the solution (uvel,vvel)
is not qualitatively the same as the EVP solution; velocities are
too small.

For picard_solver, it is due to the fact that the initial residual
stored and used to check for convergence is not the initial residual at
the start of the nonlinear iteration, but the last residual at the end
of the first FGMRES iteration. Since this residual is smaller,
convergence is "faster" but the solution is not the same; it is not
converged enough in the sense of the nonlinear residual. This is an
implementation error that was introduced when the nonlinear convergence
logic was removed from the fgmres solver.

For anderson_solver, it is due to using the fixed point residual norm to
check for convergence.  Similarly, this residual is smaller than the
nonlinear residual and thus leads to a "faster" convergence, but to a
different solution (not converged enough in the sense of the nonlinear
residual).

Fix both errors.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 93 ++++++++++++-----------
 1 file changed, 47 insertions(+), 46 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index bbcd2debc..795b3b2ad 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -657,6 +657,7 @@ subroutine picard_solver (icellt,   icellu,  &
       real (kind=dbl_kind) :: & 
          conv     , & ! ratio of current residual and initial residual for FGMRES !phb: needed for fgmres2
          tol      , & ! tolerance for nonlinear convergence: gammaNL * initial residual norm
+         nlres_norm  , & ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
          res_norm     ! residual norm for FGMRES
 
       character(len=*), parameter :: subname = '(picard_solver)'
@@ -736,37 +737,45 @@ subroutine picard_solver (icellt,   icellu,  &
          !$OMP END PARALLEL DO                            
 
          ! Compute nonlinear residual norm
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            call matvec (nx_block             , ny_block,            &
+                         icellu   (iblk)      , icellt   (iblk)    , & 
+                         indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                         indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                         dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+                         dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+                         cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+                         cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
+                         tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                         uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
+                         vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+                         zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                         umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+                         uarear   (:,:,iblk)  ,                      & 
+                         Au       (:,:,iblk)  , Av       (:,:,iblk))
+            call residual_vec (nx_block           , ny_block,           &
+                               icellu       (iblk),                     & 
+                               indxui     (:,iblk), indxuj    (:,iblk), &
+                               bx       (:,:,iblk), by      (:,:,iblk), &
+                               Au       (:,:,iblk), Av      (:,:,iblk), &
+                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
+                               L2norm(iblk))
+         enddo
+         !$OMP END PARALLEL DO
+         nlres_norm = sqrt(sum(L2norm**2))
          if (monitor_nonlin) then
-            !$OMP PARALLEL DO PRIVATE(iblk)
-            do iblk = 1, nblocks
-               call matvec (nx_block             , ny_block,            &
-                            icellu   (iblk)      , icellt   (iblk)    , & 
-                            indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                            indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-                            dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-                            dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-                            cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                            cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
-                            tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
-                            uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
-                            vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                            zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
-                            umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-                            uarear   (:,:,iblk)  ,                      & 
-                            Au       (:,:,iblk)  , Av       (:,:,iblk))
-               call residual_vec (nx_block           , ny_block,           &
-                                  icellu       (iblk),                     & 
-                                  indxui     (:,iblk), indxuj    (:,iblk), &
-                                  bx       (:,:,iblk), by      (:,:,iblk), &
-                                  Au       (:,:,iblk), Av      (:,:,iblk), &
-                                  Fx       (:,:,iblk), Fy      (:,:,iblk), &
-                                  L2norm(iblk))
-            enddo
-            !$OMP END PARALLEL DO
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-                                              " nonlin_res_L2norm= ", sqrt(sum(L2norm**2))
+                                              " nonlin_res_L2norm= ", nlres_norm
+         endif
+         ! Compute relative tolerance at first iteration
+         if (kOL == 1) then
+            tol = gammaNL*nlres_norm
+         endif
+         ! Check for nonlinear convergence
+         if (nlres_norm < tol) then
+            exit
          endif
-
 
       !-----------------------------------------------------------------------
       !     prep F G M R E S 
@@ -903,15 +912,6 @@ subroutine picard_solver (icellt,   icellu,  &
             goto 1
 
       endif ! icode
-      
-      ! Compute relative tolerance at first iteration
-      if (kOL == 1) then
-         tol = gammaNL*res_norm
-      endif
-      ! Check for nonlinear convergence
-      if (res_norm < tol) then
-         exit
-      endif
 
       !-----------------------------------------------------------------------
       !     Put vector sol in uvel and vvel arrays
@@ -1294,15 +1294,16 @@ subroutine anderson_solver (icellt,   icellu,  &
                                               " fixed_point_res_L2norm= ", fpres_norm
          endif
          
-         ! Store initial residual norm
-         if (it_nl == 0) then
-            tol = reltol_andacc*fpres_norm
-         endif
-
-         ! Check residual
-         if (fpres_norm < tol) then
-            exit
-         endif
+         ! Not used for now (only nonlinear residual is checked)
+         ! ! Store initial residual norm
+         ! if (it_nl == 0) then
+         !    tol = reltol_andacc*fpres_norm
+         ! endif
+         ! 
+         ! ! Check residual 
+         ! if (fpres_norm < tol) then
+         !    exit
+         ! endif
 
          if (im_andacc == 0 .or. it_nl < start_andacc) then 
             ! Simple fixed point (Picard) iteration in this case

From 57e096f2cc8a1daa0b6972eb0f2eaefc715bb20b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 14 Mar 2019 16:00:40 -0400
Subject: [PATCH 087/196] ice_dyn_vp: add (ulin,vlin) to compute vrel based on
 2 previous iterates

Initial test shows that using the mean of the 2 previous iterates to
compute vrel accelerates convergence for Picard iteration (this is
robust).

For Anderson it is sometimes better, sometimes worse, so by
default it is set to false in the namelist.

This also removes the oscillation produced by Picard iteration, which
is in line with results in doi:10.1029/2008JC005017
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 29 ++++++++++++++++++-----
 configuration/scripts/ice_in              |  1 +
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 795b3b2ad..6294ff2e0 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -71,7 +71,8 @@ module ice_dyn_vp
          start_andacc       ! acceleration delay factor (acceleration starts at this iteration)
 
       logical (kind=log_kind), public :: &
-         monitor_nonlin     ! print nonlinear residual norm
+         monitor_nonlin , & ! print nonlinear residual norm
+         use_mean_vrel      ! use mean of previous 2 iterates to compute vrel
 
       real (kind=dbl_kind), public :: &
          gammaNL        , & ! nonlinear stopping criterion: gammaNL*res(k=0)
@@ -675,7 +676,7 @@ subroutine picard_solver (icellt,   icellu,  &
 
       !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
-         
+            
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
             
@@ -696,7 +697,7 @@ subroutine picard_solver (icellt,   icellu,  &
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                aiu      (:,:,iblk), Tbu     (:,:,iblk), &
                                uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
+                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
                                vrel     (:,:,iblk), Cb      (:,:,iblk))
 
       !     prepare b vector (RHS)                                                
@@ -707,7 +708,7 @@ subroutine picard_solver (icellt,   icellu,  &
                             aiu      (:,:,iblk), uarear  (:,:,iblk), & 
                             uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
-                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
+                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk))
 
@@ -1069,6 +1070,8 @@ subroutine anderson_solver (icellt,   icellu,  &
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
          uprev_k  , & ! uvel at previous Picard iteration
          vprev_k  , & ! vvel at previous Picard iteration
+         ulin     , & ! uvel to linearize vrel
+         vlin     , & ! vvel to linearize vrel
          vrel     , & ! coeff for tauw 
          Cb       , & ! seabed stress coeff
          bx       , & ! b vector
@@ -1124,6 +1127,13 @@ subroutine anderson_solver (icellt,   icellu,  &
       ! Initialization
       res_num = 0
       
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         uprev_k(:,:,iblk) = uvel(:,:,iblk)
+         vprev_k(:,:,iblk) = vvel(:,:,iblk)
+      enddo
+      !$OMP END PARALLEL DO
+      
       ! Start iterations
       do it_nl = 0, maxits_nonlin        ! nonlinear iteration loop 
          ! Compute quantities needed for computing PDE residual and g(x) (fixed point map)
@@ -1133,6 +1143,13 @@ subroutine anderson_solver (icellt,   icellu,  &
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
             
+            if (use_mean_vrel) then
+               ulin(:,:,iblk) = p5*uprev_k(:,:,iblk) + p5*uvel(:,:,iblk)
+               vlin(:,:,iblk) = p5*vprev_k(:,:,iblk) + p5*vvel(:,:,iblk)
+            else
+               ulin(:,:,iblk) = uvel(:,:,iblk)
+               vlin(:,:,iblk) = vvel(:,:,iblk)
+            endif
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
             
@@ -1153,7 +1170,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                aiu      (:,:,iblk), Tbu     (:,:,iblk), &
                                uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
+                               ulin     (:,:,iblk), vlin    (:,:,iblk), &
                                vrel     (:,:,iblk), Cb      (:,:,iblk))
             
             ! prepare b vector (RHS)
@@ -1164,7 +1181,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                             aiu      (:,:,iblk), uarear  (:,:,iblk), & 
                             uocn     (:,:,iblk), vocn    (:,:,iblk), &     
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
-                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
+                            ulin     (:,:,iblk), vlin    (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk))
             
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index 871a7bda4..7cfdf1505 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -152,6 +152,7 @@
     gamma           = 1e-2
     epsprecond      = 1e-6
     algo_nonlin     = 1
+    use_mean_vrel   = .false.
     fpfunc_andacc   = 1
     im_andacc       = 5
     reltol_andacc   = 1e-6

From 4a96460c71d028dccb13e922fa65f479730a72ca Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 15 Mar 2019 09:49:36 -0400
Subject: [PATCH 088/196] machines: cesium: add LAPACK and BLAS library to
 Macros

---
 configuration/scripts/machines/Macros.cesium_intel | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configuration/scripts/machines/Macros.cesium_intel b/configuration/scripts/machines/Macros.cesium_intel
index 1bca1ddac..ae112780d 100644
--- a/configuration/scripts/machines/Macros.cesium_intel
+++ b/configuration/scripts/machines/Macros.cesium_intel
@@ -50,7 +50,7 @@ LIB_PNETCDF := $(PNETCDF_PATH)/lib
 LIB_MPI := $(IMPILIBDIR)
 
 #SLIBS   := -L$(LIB_NETCDF) -lnetcdff -lnetcdf -L$(LIB_PNETCDF) -lpnetcdf -lgptl
-SLIBS   := -L$(LIB_NETCDF) -lnetcdff -lnetcdf
+SLIBS   := -L$(LIB_NETCDF) -lnetcdff -lnetcdf  -llapack -lblas
 
 ifeq ($(ICE_THREADED), true) 
    LDFLAGS += -openmp 

From 836e546635c8b06b6f1354aa1d1542a4d0945bbd Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 29 Apr 2019 10:22:16 -0400
Subject: [PATCH 089/196] ice_dyn_vp: add 'subname' variable to each subroutine

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 36 +++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 6294ff2e0..ac77a1255 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -176,6 +176,8 @@ subroutine imp_solver (dt)
       type (block) :: &
          this_block           ! block information for current block
       
+      character(len=*), parameter :: subname = '(imp_solver)'
+      
       call ice_timer_start(timer_dynamics) ! dynamics
 
       !-----------------------------------------------------------------
@@ -1712,6 +1714,8 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
         
       logical :: capping ! of the viscous coeff  
 
+      character(len=*), parameter :: subname = '(calc_zeta_Pr)'
+
       capping = .false.
       
 !DIR$ CONCURRENT !Cray
@@ -1928,6 +1932,8 @@ subroutine stress_prime_vpOLD (nx_block,   ny_block,   &
          stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
          stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12        
 
+      character(len=*), parameter :: subname = '(stress_prime_vpOLD)'
+
       !-----------------------------------------------------------------
       ! Initialize
       !-----------------------------------------------------------------
@@ -2188,6 +2194,8 @@ subroutine stress_vp (nx_block,   ny_block,   &
         str12ew, str12we, str12ns, str12sn        , &
         strp_tmp, strm_tmp, tmp
 
+        character(len=*), parameter :: subname = '(stress_vp)'
+
       !-----------------------------------------------------------------
       ! Initialize
       !-----------------------------------------------------------------
@@ -2422,6 +2430,8 @@ subroutine deformations (nx_block,   ny_block,   &
         Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
         tmp
         
+      character(len=*), parameter :: subname = '(deformations)'
+      
 !DIR$ CONCURRENT !Cray
 !cdir nodep      !NEC
 !ocl novrec      !Fujitsu
@@ -2540,6 +2550,8 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
       real (kind=dbl_kind) :: &
          u0 = 5e-5_dbl_kind    ! residual velocity for basal stress (m/s)
          
+      character(len=*), parameter :: subname = '(calc_vrel_Cb)'
+      
       !-----------------------------------------------------------------
       ! integrate the momentum equation
       !-----------------------------------------------------------------
@@ -2621,6 +2633,8 @@ subroutine matvecOLD (nx_block,   ny_block, &
          ccaimp,ccb        , & ! intermediate variables
          strintx, strinty
 
+      character(len=*), parameter :: subname = '(matvecOLD)'
+
       !-----------------------------------------------------------------
       ! integrate the momentum equation
       !-----------------------------------------------------------------
@@ -2749,6 +2763,8 @@ subroutine matvec (nx_block,   ny_block, &
          stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
          stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12        
 
+      character(len=*), parameter :: subname = '(matvec)'
+
       !-----------------------------------------------------------------
       ! Initialize
       !-----------------------------------------------------------------
@@ -2994,6 +3010,8 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
       integer (kind=int_kind) :: &
          i, j, ij
 
+      character(len=*), parameter :: subname = '(calc_bfix)'
+
       !-----------------------------------------------------------------
       ! Define variables for momentum equation
       !-----------------------------------------------------------------
@@ -3065,6 +3083,8 @@ subroutine calc_bvec (nx_block,   ny_block, &
          strintx, strinty  , & ! divergence of the internal stress tensor (only Pr part)
          rhow                  !
          
+      character(len=*), parameter :: subname = '(calc_bvec)'
+      
       !-----------------------------------------------------------------
       ! calc b vector
       !-----------------------------------------------------------------
@@ -3139,6 +3159,8 @@ subroutine residual_vec (nx_block,   ny_block, &
       integer (kind=int_kind) :: &
          i, j, ij
 
+      character(len=*), parameter :: subname = '(residual_vec)'
+
       !-----------------------------------------------------------------
       ! calc residual and its L2 norm
       !-----------------------------------------------------------------
@@ -3224,6 +3246,8 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
         str12ew, str12we, str12ns, str12sn        , &
         strp_tmp, strm_tmp, tmp
 
+      character(len=*), parameter :: subname = '(formDiag_step1)'
+
       !-----------------------------------------------------------------
       ! Initialize
       !-----------------------------------------------------------------
@@ -3578,6 +3602,8 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
          ccaimp             , & ! intermediate variables
          strintx, strinty
 
+      character(len=*), parameter :: subname = '(formDiag_step2)'
+
       !-----------------------------------------------------------------
       ! integrate the momentum equation
       !-----------------------------------------------------------------
@@ -3639,6 +3665,8 @@ subroutine precond_diag   (ntot,    &
       integer (kind=int_kind) :: &
          i         
 
+      character(len=*), parameter :: subname = '(precond_diag)'
+
       !-----------------------------------------------------------------
       ! form vector (converts from max_blocks arrays to single vector
       !-----------------------------------------------------------------
@@ -3682,6 +3710,8 @@ subroutine calc_L2norm (nx_block,   ny_block, &
       integer (kind=int_kind) :: &
          i, j, ij
 
+      character(len=*), parameter :: subname = '(calc_L2norm)'
+
       !-----------------------------------------------------------------
       ! compute l^2 norm of vector grid function (tpu,tpv)
       !-----------------------------------------------------------------
@@ -3736,6 +3766,8 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
          i, j, iblk, tot, ij
          
 
+      character(len=*), parameter :: subname = '(arrays_to_vec)'
+
       !-----------------------------------------------------------------
       ! form vector (converts from max_blocks arrays to single vector
       !-----------------------------------------------------------------
@@ -3791,6 +3823,8 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
          i, j, iblk, tot, ij
          
 
+      character(len=*), parameter :: subname = '(vec_to_arrays)'
+
       !-----------------------------------------------------------------
       ! form arrays (converts from vector to the max_blocks arrays
       !-----------------------------------------------------------------
@@ -3834,6 +3868,8 @@ subroutine qr_delete(Q, R)
       real (kind=dbl_kind) :: &
          temp, c, s
                
+      character(len=*), parameter :: subname = '(qr_delete)'
+      
       n = size(Q,1)
       m = size(Q,2)
       do i = 1, m-1

From 7640444fe5bd27f08f3072a4cc0942123b14fd4e Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 29 Apr 2019 10:30:53 -0400
Subject: [PATCH 090/196] ice_dyn_vp: cleanup 'icepack_warnings_*' calls

Some calls to Icepack warning interfaces are not needed; remove them.

Another call is misplaced; move it to the right place.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 24 ++++-------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ac77a1255..07cbf20f3 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -341,6 +341,10 @@ subroutine imp_solver (dt)
       enddo  ! iblk
       !$TCXOMP END PARALLEL DO
 
+      call icepack_warnings_flush(nu_diag)
+      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+         file=__FILE__, line=__LINE__)
+
       !-----------------------------------------------------------------
       ! calc size of problem (ntot) and allocate arrays and vectors
       !-----------------------------------------------------------------
@@ -355,10 +359,6 @@ subroutine imp_solver (dt)
       
       !-----------------------------------------------------------------
       
-      call icepack_warnings_flush(nu_diag)
-      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
-         file=__FILE__, line=__LINE__)
-
       call ice_timer_start(timer_bound)
       call ice_HaloUpdate (strength,           halo_info, &
                            field_loc_center,   field_type_scalar)
@@ -2639,10 +2639,6 @@ subroutine matvecOLD (nx_block,   ny_block, &
       ! integrate the momentum equation
       !-----------------------------------------------------------------
 
-      call icepack_warnings_flush(nu_diag)
-      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
-         file=__FILE__, line=__LINE__)
-
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
@@ -2945,10 +2941,6 @@ subroutine matvec (nx_block,   ny_block, &
       ! Form Au and Av
       !-----------------------------------------------------------------
 
-      call icepack_warnings_flush(nu_diag)
-      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
-         file=__FILE__, line=__LINE__)
-
       do ij =1, icellu
          i = indxui(ij)
          j = indxuj(ij)
@@ -3165,10 +3157,6 @@ subroutine residual_vec (nx_block,   ny_block, &
       ! calc residual and its L2 norm
       !-----------------------------------------------------------------
 
-      call icepack_warnings_flush(nu_diag)
-      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
-         file=__FILE__, line=__LINE__)
-
       L2norm=c0
          
       do ij =1, icellu
@@ -3608,10 +3596,6 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
       ! integrate the momentum equation
       !-----------------------------------------------------------------
 
-      call icepack_warnings_flush(nu_diag)
-      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
-         file=__FILE__, line=__LINE__)
-
       strintx=c0
       strinty=c0
          

From 148668c3f4e478313457c4633be6c9212bed8dd7 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 29 Apr 2019 14:52:37 -0400
Subject: [PATCH 091/196] ice_dyn_vp, pgmres: remove unused arguments,
 variables and 'use' statements variables

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 108 +++++++---------------
 cicecore/cicedynB/dynamics/pgmres.F90     |  15 +--
 2 files changed, 39 insertions(+), 84 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 07cbf20f3..f07fa1ecc 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -37,10 +37,10 @@ module ice_dyn_vp
       use ice_kinds_mod
       use ice_constants, only: field_loc_center, field_loc_NEcorner, &
           field_type_scalar, field_type_vector
-      use ice_constants, only: c0, c4, p027, p055, p111, p166, &
-          p2, p222, p25, p333, p5, c1
+      use ice_constants, only: c0, p027, p055, p111, p166, &
+          p222, p25, p333, p5, c1
       use ice_dyn_shared, only: dyn_prep1, dyn_prep2, dyn_finish, &
-          yield_curve, ecci, cosw, sinw, fcor_blk, uvel_init,  &
+          ecci, cosw, sinw, fcor_blk, uvel_init,  &
           vvel_init, basal_stress_coeff, basalstress, Ktens
       use ice_fileunits, only: nu_diag
       use ice_exit, only: abort_ice
@@ -116,7 +116,7 @@ subroutine imp_solver (dt)
           stressm_1, stressm_2, stressm_3, stressm_4, &
           stress12_1, stress12_2, stress12_3, stress12_4
       use ice_grid, only: tmask, umask, dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
-          tarear, uarear, tinyarea, to_ugrid, t2ugrid_vector, u2tgrid_vector, &
+          tarear, to_ugrid, t2ugrid_vector, u2tgrid_vector, &
           grid_type
       use ice_state, only: aice, vice, vsno, uvel, vvel, divu, shear, &
           aice_init, aice0, aicen, vicen, strength
@@ -160,12 +160,8 @@ subroutine imp_solver (dt)
          
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
       
-      real (kind=dbl_kind), allocatable :: bvec(:), sol(:), diagvec(:), wk11(:), wk22(:)
-      real (kind=dbl_kind), allocatable :: vv(:,:), ww(:,:)
+      real (kind=dbl_kind), allocatable :: bvec(:), sol(:), diagvec(:)
       
-      real (kind=dbl_kind), dimension (max_blocks) :: L2norm
-      real (kind=dbl_kind) :: conv, tolNL
-
       integer (kind=int_kind), dimension (nx_block,ny_block,max_blocks) :: &
          icetmask, &  ! ice extent mask (T-cell)
          halomask     ! generic halo mask
@@ -442,7 +438,6 @@ subroutine imp_solver (dt)
                             indxti      (:,iblk), indxtj      (:,iblk), &
                             uvel      (:,:,iblk), vvel      (:,:,iblk), &
                             dxt       (:,:,iblk), dyt       (:,:,iblk), &
-                            dxhy      (:,:,iblk), dyhx      (:,:,iblk), &
                             cxp       (:,:,iblk), cyp       (:,:,iblk), &
                             cxm       (:,:,iblk), cym       (:,:,iblk), &
                             tarear    (:,:,iblk),                       &
@@ -690,7 +685,7 @@ subroutine picard_solver (icellt,   icellu,  &
                                dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
                                cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
                                cxm      (:,:,iblk), cym   (:,:,iblk),   & 
-                               tarear   (:,:,iblk), tinyarea (:,:,iblk),& 
+                               tinyarea (:,:,iblk),                     & 
                                strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
                                stPrtmp  (:,:,:) )                      
             
@@ -750,10 +745,9 @@ subroutine picard_solver (icellt,   icellu,  &
                          dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
                          cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
                          cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
-                         tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
                          uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
                          vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                         zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                         zetaD    (:,:,iblk,:),                      &
                          umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
                          uarear   (:,:,iblk)  ,                      & 
                          Au       (:,:,iblk)  , Av       (:,:,iblk))
@@ -844,9 +838,8 @@ subroutine picard_solver (icellt,   icellu,  &
                        dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
                        cxp      (:,:,:)   , cyp      (:,:,:)   , & 
                        cxm      (:,:,:)   , cym      (:,:,:)   , & 
-                       tarear   (:,:,:)   , tinyarea (:,:,:)   , &
                        vrel     (:,:,:)   , Cb       (:,:,:)   , &  
-                       zetaD    (:,:,:,:) , aiu      (:,:,:)   , &
+                       zetaD    (:,:,:,:) ,                      &
                        umassdti (:,:,:)   , fm       (:,:,:)   , & 
                        uarear   (:,:,:)   , diagvec(:)         , &
                        wk22     (:)       , wk11(:)            , &
@@ -893,11 +886,10 @@ subroutine picard_solver (icellt,   icellu,  &
                        dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
                        dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
                        cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                       cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
-                       tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                       cxm      (:,:,iblk)  , cym      (:,:,iblk), &
                        uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
                        vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                       zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                       zetaD    (:,:,iblk,:),                      &
                        umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
                        uarear   (:,:,iblk)  ,                      & 
                        Au       (:,:,iblk)  , Av       (:,:,iblk))                         
@@ -1012,7 +1004,7 @@ subroutine anderson_solver (icellt,   icellu,  &
       use ice_domain_size, only: max_blocks
       use ice_flux, only:   uocn, vocn, fm, Tbu
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
-          tarear, uarear, tinyarea
+          uarear, tinyarea
       use ice_state, only: uvel, vvel, strength
       use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
 
@@ -1058,13 +1050,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          it_nl      , & ! nonlinear loop iteration index
          res_num    , & ! current number of stored residuals
          j          , & ! iteration index for QR update
-         iblk       , & ! block index
-         icode      , & ! code for fgmres solver
-         ischmi     , & ! Quesse ca!?!?! jfl
-         its        , & ! iteration nb for fgmres
-         fgmres_its , & ! final nb of fgmres iterations
-         iconvNL    , & ! code for NL convergence criterion (equals 1 when NL convergence is reached)
-         ierr           ! code for pgmres preconditioner !phb: needed?
+         iblk           ! block index
 
       integer (kind=int_kind), parameter :: &
          inc = 1        ! increment value for BLAS calls
@@ -1163,7 +1149,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                                dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
                                cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
                                cxm      (:,:,iblk), cym   (:,:,iblk),   & 
-                               tarear   (:,:,iblk), tinyarea (:,:,iblk),& 
+                               tinyarea (:,:,iblk),                     & 
                                strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
                                stPrtmp  (:,:,:) )                      
             
@@ -1196,10 +1182,9 @@ subroutine anderson_solver (icellt,   icellu,  &
                          dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), &
                          cxp      (:,:,iblk)  , cyp      (:,:,iblk), &
                          cxm      (:,:,iblk)  , cym      (:,:,iblk), &
-                         tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
                          uprev_k  (:,:,iblk)  , vprev_k  (:,:,iblk), &
                          vrel     (:,:,iblk)  , Cb       (:,:,iblk), &
-                         zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                         zetaD    (:,:,iblk,:),                      &
                          umassdti (:,:,iblk)  , fm       (:,:,iblk), &
                          uarear   (:,:,iblk)  ,                      &
                          Au       (:,:,iblk)  , Av       (:,:,iblk))
@@ -1464,12 +1449,12 @@ subroutine fgmres_solver (ntot,   bvec,     &
 
       use ice_blocks, only: nx_block, ny_block
       use ice_boundary, only: ice_HaloUpdate
-      use ice_domain, only: nblocks, halo_info, maskhalo_dyn
+      use ice_domain, only: nblocks, halo_info
       use ice_domain_size, only: max_blocks
-      use ice_flux, only:   uocn, vocn, fm, Tbu
+      use ice_flux, only:  fm
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
           tarear, uarear, tinyarea
-      use ice_state, only: uvel, vvel, strength
+      use ice_state, only: uvel, vvel
       
       integer (kind=int_kind), intent(in) :: & 
          ntot         ! size of problem for fgmres (for given cpu)
@@ -1572,10 +1557,9 @@ subroutine fgmres_solver (ntot,   bvec,     &
                        dxt      (:,:,:)   , dyt      (:,:,:)   , & 
                        dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
                        cxp      (:,:,:)   , cyp      (:,:,:)   , & 
-                       cxm      (:,:,:)   , cym      (:,:,:)   , & 
-                       tarear   (:,:,:)   , tinyarea (:,:,:)   , &
+                       cxm      (:,:,:)   , cym      (:,:,:)   , &
                        vrel     (:,:,:)   , Cb       (:,:,:)   , &  
-                       zetaD    (:,:,:,:) , aiu      (:,:,:)   , &
+                       zetaD    (:,:,:,:) ,                      &
                        umassdti (:,:,:)   , fm       (:,:,:)   , & 
                        uarear   (:,:,:)   , diagvec(:)         , &
                        wk22     (:)       , wk11(:)            , &
@@ -1622,11 +1606,10 @@ subroutine fgmres_solver (ntot,   bvec,     &
                        dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
                        dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
                        cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                       cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
-                       tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                       cxm      (:,:,iblk)  , cym      (:,:,iblk), &
                        uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
                        vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                       zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                       zetaD    (:,:,iblk,:),                      &
                        umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
                        uarear   (:,:,iblk)  ,                      & 
                        Au       (:,:,iblk)  , Av       (:,:,iblk))                         
@@ -1661,7 +1644,7 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
                                 dxhy,       dyhx,       & 
                                 cxp,        cyp,        & 
                                 cxm,        cym,        & 
-                                tarear,     tinyarea,   & 
+                                tinyarea,               & 
                                 strength,   zetaD,      &
                                 stPr)
 
@@ -1686,7 +1669,6 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm      , & ! 0.5*HTN - 1.5*HTN
-         tarear   , & ! 1/tarea
          tinyarea     ! puny*tarea
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
@@ -1863,20 +1845,18 @@ end subroutine calc_zeta_Pr
 ! Computes VP stress without the rep. pressure Pr (included in b vector)
 
       subroutine stress_prime_vpOLD (nx_block,   ny_block,   & 
-                                  kOL,        icellt,     & 
+                                  icellt,                 & 
                                   indxti,     indxtj,     & 
                                   uvel,       vvel,       & 
                                   dxt,        dyt,        & 
                                   dxhy,       dyhx,       & 
                                   cxp,        cyp,        & 
                                   cxm,        cym,        & 
-                                  tarear,     tinyarea,   & 
                                   zetaD,                  & 
                                   str )
 
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
-         kOL               , & ! subcycling step
          icellt                ! no. of cells where icetmask = 1
 
       integer (kind=int_kind), dimension (nx_block*ny_block), & 
@@ -1894,9 +1874,7 @@ subroutine stress_prime_vpOLD (nx_block,   ny_block,   &
          cyp      , & ! 1.5*HTE - 0.5*HTE
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm      , & ! 0.5*HTN - 1.5*HTN
-         tarear   , & ! 1/tarea
-         tinyarea     ! puny*tarea
+         cxm          ! 0.5*HTN - 1.5*HTN
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
          intent(in) :: &
@@ -1916,7 +1894,6 @@ subroutine stress_prime_vpOLD (nx_block,   ny_block,   &
         tensionne, tensionnw, tensionse, tensionsw, & ! tension
         shearne, shearnw, shearse, shearsw        , & ! shearing
         Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
-        puny                                      , & ! puny
         ssigpn, ssigps, ssigpe, ssigpw            , &
         ssigmn, ssigms, ssigme, ssigmw            , &
         ssig12n, ssig12s, ssig12e, ssig12w        , &
@@ -1925,7 +1902,7 @@ subroutine stress_prime_vpOLD (nx_block,   ny_block,   &
         csigmne, csigmnw, csigmse, csigmsw        , &
         csig12ne, csig12nw, csig12se, csig12sw    , &
         str12ew, str12we, str12ns, str12sn        , &
-        strp_tmp, strm_tmp, tmp
+        strp_tmp, strm_tmp
         
       real (kind=dbl_kind) :: &
          stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22 (without Pr)
@@ -2118,14 +2095,13 @@ end subroutine stress_prime_vpOLD
 ! Computes the VP stress (as diagnostic)
 
       subroutine stress_vp (nx_block,   ny_block,   & 
-                            kOL,        icellt,     & 
+                            icellt,                 & 
                             indxti,     indxtj,     & 
                             uvel,       vvel,       & 
                             dxt,        dyt,        & 
                             dxhy,       dyhx,       & 
                             cxp,        cyp,        & 
                             cxm,        cym,        & 
-                            tarear,     tinyarea,   & 
                             zetaD,                  & 
                             stressp_1,  stressp_2,  & 
                             stressp_3,  stressp_4,  & 
@@ -2137,7 +2113,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
 
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
-         kOL               , & ! subcycling step
          icellt                ! no. of cells where icetmask = 1
 
       integer (kind=int_kind), dimension (nx_block*ny_block), & 
@@ -2155,9 +2130,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
          cyp      , & ! 1.5*HTE - 0.5*HTE
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm      , & ! 0.5*HTN - 1.5*HTN
-         tarear   , & ! 1/tarea
-         tinyarea     ! puny*tarea
+         cxm          ! 0.5*HTN - 1.5*HTN
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
          intent(in) :: &
@@ -2183,7 +2156,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
         tensionne, tensionnw, tensionse, tensionsw, & ! tension
         shearne, shearnw, shearse, shearsw        , & ! shearing
         Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
-        puny                                      , & ! puny
         ssigpn, ssigps, ssigpe, ssigpw            , &
         ssigmn, ssigms, ssigme, ssigmw            , &
         ssig12n, ssig12s, ssig12e, ssig12w        , &
@@ -2192,7 +2164,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
         csigmne, csigmnw, csigmse, csigmsw        , &
         csig12ne, csig12nw, csig12se, csig12sw    , &
         str12ew, str12we, str12ns, str12sn        , &
-        strp_tmp, strm_tmp, tmp
+        strp_tmp, strm_tmp
 
         character(len=*), parameter :: subname = '(stress_vp)'
 
@@ -2382,7 +2354,6 @@ subroutine deformations (nx_block,   ny_block,   &
                                indxti,     indxtj,     & 
                                uvel,       vvel,       & 
                                dxt,        dyt,        & 
-                               dxhy,       dyhx,       & 
                                cxp,        cyp,        & 
                                cxm,        cym,        & 
                                tarear,                 & 
@@ -2403,8 +2374,6 @@ subroutine deformations (nx_block,   ny_block,   &
          vvel     , & ! y-component of velocity (m/s)
          dxt      , & ! width of T-cell through the middle (m)
          dyt      , & ! height of T-cell through the middle (m)
-         dxhy     , & ! 0.5*(HTE - HTE)
-         dyhx     , & ! 0.5*(HTN - HTN)
          cyp      , & ! 1.5*HTE - 0.5*HTE
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
@@ -2583,8 +2552,7 @@ end subroutine calc_vrel_Cb
       subroutine matvecOLD (nx_block,   ny_block, &
                          icellu,               &
                          indxui,     indxuj,   &
-                         kOL,                  &
-                         aiu,        str,      &
+                         str,                  &
                          vrel,                 &
                          umassdti,   fm,       &
                          uarear,     Cb,       &
@@ -2593,8 +2561,7 @@ subroutine matvecOLD (nx_block,   ny_block, &
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
-         icellu,             & ! total count when iceumask is true
-         kOL                   ! outer loop iteration
+         icellu                ! total count when iceumask is true
 
       integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
@@ -2604,7 +2571,6 @@ subroutine matvecOLD (nx_block,   ny_block, &
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
          vrel,     & ! coefficient for tauw
          Cb,       & ! coefficient for basal stress
-         aiu     , & ! ice fraction on u-grid
          umassdti, & ! mass of U-cell/dt (kg/m^2 s)
          fm      , & ! Coriolis param. * mass in U-cell (kg/s)
          uarear      ! 1/uarea
@@ -2673,10 +2639,9 @@ subroutine matvec (nx_block,   ny_block, &
                          dxhy,       dyhx,     & 
                          cxp,        cyp,      & 
                          cxm,        cym,      & 
-                         tarear,     tinyarea, & 
                          uvel,       vvel,     &
                          vrel,       Cb,       &
-                         zetaD,      aiu,      & 
+                         zetaD,                & 
                          umassdti,   fm,       &
                          uarear,               &
                          Au,         Av)
@@ -2701,9 +2666,7 @@ subroutine matvec (nx_block,   ny_block, &
          cyp      , & ! 1.5*HTE - 0.5*HTE
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm      , & ! 0.5*HTN - 1.5*HTN
-         tarear   , & ! 1/tarea
-         tinyarea     ! puny*tarea   
+         cxm          ! 0.5*HTN - 1.5*HTN
          
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(in) :: &
@@ -2711,7 +2674,6 @@ subroutine matvec (nx_block,   ny_block, &
          vvel    , & ! y-component of velocity (m/s)
          vrel    , & ! coefficient for tauw
          Cb      , & ! coefficient for basal stress
-         aiu     , & ! ice fraction on u-grid
          umassdti, & ! mass of U-cell/dt (kg/m^2 s)
          fm      , & ! Coriolis param. * mass in U-cell (kg/s)
          uarear      ! 1/uarea
@@ -2743,7 +2705,6 @@ subroutine matvec (nx_block,   ny_block, &
         tensionne, tensionnw, tensionse, tensionsw, & ! tension
         shearne, shearnw, shearse, shearsw        , & ! shearing
         Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
-        puny                                      , & ! puny
         ssigpn, ssigps, ssigpe, ssigpw            , &
         ssigmn, ssigms, ssigme, ssigmw            , &
         ssig12n, ssig12s, ssig12e, ssig12w        , &
@@ -2752,7 +2713,7 @@ subroutine matvec (nx_block,   ny_block, &
         csigmne, csigmnw, csigmse, csigmsw        , &
         csig12ne, csig12nw, csig12se, csig12sw    , &
         str12ew, str12we, str12ns, str12sn        , &
-        strp_tmp, strm_tmp, tmp
+        strp_tmp, strm_tmp
         
       real (kind=dbl_kind) :: &
          stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22 (without Pr)
@@ -3219,7 +3180,6 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
         divune, divunw, divuse, divusw            , & ! divergence
         tensionne, tensionnw, tensionse, tensionsw, & ! tension
         shearne, shearnw, shearse, shearsw        , & ! shearing
-        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
         uij,ui1j,uij1,ui1j1,vij,vi1j,vij1,vi1j1   , & ! c0 or c1
         stressp_1, stressp_2, stressp_3, stressp_4, &
         stressm_1, stressm_2, stressm_3, stressm_4, &
@@ -3232,7 +3192,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
         csigmne, csigmnw, csigmse, csigmsw        , &
         csig12ne, csig12nw, csig12se, csig12sw    , &
         str12ew, str12we, str12ns, str12sn        , &
-        strp_tmp, strm_tmp, tmp
+        strp_tmp, strm_tmp
 
       character(len=*), parameter :: subname = '(formDiag_step1)'
 
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
index 073c2a5ce..04b277e15 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -11,9 +11,8 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
                          dxhy,       dyhx,            & 
                          cxp,        cyp,             & 
                          cxm,        cym,             & 
-                         tarear,     tinyarea,        & 
                          vrel,       Cb,              &
-                         zetaD,      aiu,             & 
+                         zetaD,                       & 
                          umassdti,   fm,              &
                          uarear,     diagvec,         &
                          sol,        rhs,             &
@@ -58,15 +57,12 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
          cyp      , & ! 1.5*HTE - 0.5*HTE
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm      , & ! 0.5*HTN - 1.5*HTN
-         tarear   , & ! 1/tarea
-         tinyarea     ! puny*tarea   
+         cxm          ! 0.5*HTN - 1.5*HTN
          
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), &
          intent(in) :: &
          vrel    , & ! coefficient for tauw
          Cb      , & ! coefficient for basal stress
-         aiu     , & ! ice fraction on u-grid
          umassdti, & ! mass of U-cell/dt (kg/m^2 s)
          fm      , & ! Coriolis param. * mass in U-cell (kg/s)
          uarear      ! 1/uarea
@@ -103,7 +99,7 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
 ! arnoldi size should not exceed kmax=50 in this version..
 ! to reset modify paramter kmax accordingly.
 !-------------------------------------------------------------
-       real*8 epsmac ,ro,ddot,dnrm2
+       real*8 epsmac ,ro,ddot
        parameter (epsmac=1.d-16)
        integer l
 !       character(len= 9) communicate_S
@@ -173,11 +169,10 @@ subroutine pgmres(nx_block, ny_block, nblocks, &
                      dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
                      dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
                      cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                     cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
-                     tarear   (:,:,iblk)  , tinyarea (:,:,iblk), &
+                     cxm      (:,:,iblk)  , cym      (:,:,iblk), &
                      utp      (:,:,iblk)  , vtp      (:,:,iblk), &      
                      vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                     zetaD    (:,:,iblk,:), aiu      (:,:,iblk), &
+                     zetaD    (:,:,iblk,:),                      &
                      umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
                      uarear   (:,:,iblk)  ,                      & 
                      Au       (:,:,iblk)  , Av       (:,:,iblk)) 

From 6eb86cda87973473dbafe75cbf23605aaa1b1dc8 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 10 Jun 2019 13:34:12 -0400
Subject: [PATCH 092/196] ice_dyn_vp: refactor 'puny' treatment for implicit
 solver

The way 'puny_dyn' is set in the 'cice' driver, and in 'ice_constants'
and 'ice_grid' is kind of hacky. Revert those changes.

Instead, add an 'init_vp' subroutine that is called from the drivers and
that recomputes 'tinyarea' with the new 'puny_vp' value.

Call 'init_evp' from 'init_vp', mimicking what is done ine 'init_eap'.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     | 59 ++++++++++++++++++-
 cicecore/cicedynB/infrastructure/ice_grid.F90 |  9 ++-
 .../drivers/direct/hadgem3/CICE_InitMod.F90   |  3 +
 cicecore/drivers/mct/cesm1/CICE_InitMod.F90   |  3 +
 .../drivers/standalone/cice/CICE_InitMod.F90  | 17 +-----
 cicecore/shared/ice_constants.F90             | 15 ++---
 6 files changed, 75 insertions(+), 31 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index f07fa1ecc..3bab1290e 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -52,7 +52,8 @@ module ice_dyn_vp
 
       implicit none
       private
-      public :: imp_solver, matvec, arrays_to_vec, vec_to_arrays, precond_diag
+      public :: imp_solver, matvec, arrays_to_vec, vec_to_arrays, precond_diag, &
+                init_vp
 
       ! namelist parameters
 
@@ -87,6 +88,62 @@ module ice_dyn_vp
 
 !=======================================================================
 
+! Initialize parameters and variables needed for the vp dynamics
+! author: Philippe Blain, ECCC
+
+      subroutine init_vp (dt)
+      
+      use ice_blocks, only: get_block, block
+      use ice_boundary, only: ice_HaloUpdate
+      use ice_constants, only: c1, &
+          field_loc_center, field_type_scalar
+      use ice_domain, only: nblocks, blocks_ice, halo_info
+      use ice_dyn_shared, only: init_evp
+      use ice_grid, only: tarea, tinyarea
+      
+      real (kind=dbl_kind), intent(in) :: &
+         dt      ! time step
+      
+      ! local variables
+      
+      integer (kind=int_kind) :: &
+         i, j, iblk, &
+         ilo,ihi,jlo,jhi      ! beginning and end of physical domain
+
+      type (block) :: &
+         this_block           ! block information for current block
+         
+      real (kind=dbl_kind) :: &
+         puny_vp = 2e-09_dbl_kind      ! special puny value for computing tinyarea
+      
+      ! Initialize variables shared with evp
+      call init_evp(dt)
+      
+      ! Redefine tinyarea using a different puny value
+      
+      !$OMP PARALLEL DO PRIVATE(iblk,i,j,ilo,ihi,jlo,jhi,this_block)
+      do iblk = 1, nblocks
+         this_block = get_block(blocks_ice(iblk),iblk)
+         ilo = this_block%ilo
+         ihi = this_block%ihi
+         jlo = this_block%jlo
+         jhi = this_block%jhi
+
+         do j = jlo, jhi
+         do i = ilo, ihi
+            tinyarea(i,j,iblk) = puny_vp*tarea(i,j,iblk)
+         enddo
+         enddo
+      enddo                     ! iblk
+      !$OMP END PARALLEL DO
+      
+      call ice_HaloUpdate (tinyarea,           halo_info, &
+                           field_loc_center,   field_type_scalar, &
+                           fillValue=c1)
+      
+      end subroutine init_vp
+!=======================================================================
+
 ! Viscous-plastic dynamics driver
 !
 #ifdef CICE_IN_NEMO
diff --git a/cicecore/cicedynB/infrastructure/ice_grid.F90 b/cicecore/cicedynB/infrastructure/ice_grid.F90
index bc06f6108..67129c911 100644
--- a/cicecore/cicedynB/infrastructure/ice_grid.F90
+++ b/cicecore/cicedynB/infrastructure/ice_grid.F90
@@ -330,8 +330,7 @@ subroutine init_grid2
       use ice_blocks, only: get_block, block, nx_block, ny_block
       use ice_constants, only: c0, c1, c2, p5, p25, c1p5, &
           field_loc_center, field_loc_NEcorner, &
-          field_type_scalar, field_type_vector, field_type_angle, &
-          puny_dyn
+          field_type_scalar, field_type_vector, field_type_angle
       use ice_domain_size, only: max_blocks
 
       integer (kind=int_kind) :: &
@@ -340,7 +339,7 @@ subroutine init_grid2
 
       real (kind=dbl_kind) :: &
          angle_0, angle_w, angle_s, angle_sw, &
-         pi, pi2
+         pi, pi2, puny
 
       logical (kind=log_kind), dimension(nx_block,ny_block,max_blocks):: &
          out_of_range
@@ -357,7 +356,7 @@ subroutine init_grid2
       ! lat, lon, cell widths, angle, land mask
       !-----------------------------------------------------------------
 
-      call icepack_query_parameters(pi_out=pi, pi2_out=pi2)
+      call icepack_query_parameters(pi_out=pi, pi2_out=pi2, puny_out=puny)
       call icepack_warnings_flush(nu_diag)
       if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
          file=__FILE__, line=__LINE__)
@@ -409,7 +408,7 @@ subroutine init_grid2
             else
                uarear(i,j,iblk) = c0 ! possible on boundaries
             endif
-            tinyarea(i,j,iblk) = puny_dyn*tarea(i,j,iblk)
+            tinyarea(i,j,iblk) = puny*tarea(i,j,iblk)
 
             dxhy(i,j,iblk) = p5*(HTE(i,j,iblk) - HTE(i-1,j,iblk))
             dyhx(i,j,iblk) = p5*(HTN(i,j,iblk) - HTN(i,j-1,iblk))
diff --git a/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90 b/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90
index dc41ff9fd..2e88f6334 100644
--- a/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90
+++ b/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90
@@ -72,6 +72,7 @@ subroutine cice_init
       use ice_domain_size, only: ncat, nfsd
       use ice_dyn_eap, only: init_eap, alloc_dyn_eap
       use ice_dyn_shared, only: kdyn, init_evp, basalstress, alloc_dyn_shared
+      use ice_dyn_vp, only: init_vp
       use ice_flux, only: init_coupler_flux, init_history_therm, &
           init_history_dyn, init_flux_atm, init_flux_ocn, alloc_flux
       use ice_forcing, only: init_forcing_ocn, init_forcing_atmo, &
@@ -123,6 +124,8 @@ subroutine cice_init
       if (kdyn == 2) then
          call alloc_dyn_eap     ! allocate dyn_eap arrays
          call init_eap (dt_dyn) ! define eap dynamics parameters, variables
+      else if (kdyn == 3) then
+         call init_vp (dt_dyn)  ! define vp dynamics parameters, variables
       else                      ! for both kdyn = 0 or 1
          call init_evp (dt_dyn) ! define evp dynamics parameters, variables
       endif
diff --git a/cicecore/drivers/mct/cesm1/CICE_InitMod.F90 b/cicecore/drivers/mct/cesm1/CICE_InitMod.F90
index 80bb2570e..79d6753b0 100644
--- a/cicecore/drivers/mct/cesm1/CICE_InitMod.F90
+++ b/cicecore/drivers/mct/cesm1/CICE_InitMod.F90
@@ -72,6 +72,7 @@ subroutine cice_init(mpicom_ice)
       use ice_domain_size, only: ncat, nfsd
       use ice_dyn_eap, only: init_eap, alloc_dyn_eap
       use ice_dyn_shared, only: kdyn, init_evp, alloc_dyn_shared
+      use ice_dyn_vp, only: init_vp
       use ice_flux, only: init_coupler_flux, init_history_therm, &
           init_history_dyn, init_flux_atm, init_flux_ocn, alloc_flux
       use ice_forcing, only: init_forcing_ocn, init_forcing_atmo, &
@@ -125,6 +126,8 @@ subroutine cice_init(mpicom_ice)
       if (kdyn == 2) then
          call alloc_dyn_eap     ! allocate dyn_eap arrays
          call init_eap (dt_dyn) ! define eap dynamics parameters, variables
+      else if (kdyn == 3) then
+         call init_vp (dt_dyn)  ! define vp dynamics parameters, variables
       else                      ! for both kdyn = 0 or 1
          call init_evp (dt_dyn) ! define evp dynamics parameters, variables
       endif
diff --git a/cicecore/drivers/standalone/cice/CICE_InitMod.F90 b/cicecore/drivers/standalone/cice/CICE_InitMod.F90
index 6b2cf8916..1fc64c259 100644
--- a/cicecore/drivers/standalone/cice/CICE_InitMod.F90
+++ b/cicecore/drivers/standalone/cice/CICE_InitMod.F90
@@ -67,12 +67,12 @@ subroutine cice_init
       use ice_calendar, only: dt, dt_dyn, time, istep, istep1, write_ic, &
           init_calendar, calendar
       use ice_communicate, only: init_communicate, my_task, master_task
-      use ice_constants, only: ice_init_constants
       use ice_diagnostics, only: init_diags
       use ice_domain, only: init_domain_blocks
       use ice_domain_size, only: ncat, nfsd
       use ice_dyn_eap, only: init_eap, alloc_dyn_eap
       use ice_dyn_shared, only: kdyn, init_evp, alloc_dyn_shared
+      use ice_dyn_vp, only: init_vp
       use ice_flux, only: init_coupler_flux, init_history_therm, &
           init_history_dyn, init_flux_atm, init_flux_ocn, alloc_flux
       use ice_forcing, only: init_forcing_ocn, init_forcing_atmo, &
@@ -91,9 +91,6 @@ subroutine cice_init
 
       logical(kind=log_kind) :: tr_aero, tr_zaero, skl_bgc, z_tracers, &
          tr_iso, tr_fsd, wave_spec
-
-      real (kind=dbl_kind) :: puny
-
       character(len=*), parameter :: subname = '(cice_init)'
 
       call init_communicate     ! initial setup for message passing
@@ -121,16 +118,6 @@ subroutine cice_init
       call alloc_flux           ! allocate flux arrays
       call init_ice_timers      ! initialize all timers
       call ice_timer_start(timer_total)   ! start timing entire run
-      ! By default, the puny value used for computing tinyarea in init_grid2 (puny_dyn)
-      ! is set to a special value for use with the implicit solver (kdyn = 3).
-      ! Thus we reset it back to puny if kdyn .ne. 3
-      if (kdyn /= 3) then
-         call icepack_query_parameters(puny_out=puny)
-         call icepack_warnings_flush(nu_diag)
-         if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
-            file=__FILE__, line=__LINE__)
-         call ice_init_constants(puny_dyn_in=puny)
-      endif
       call init_grid2           ! grid variables
       call init_zbgc            ! vertical biogeochemistry initialization
       call init_calendar        ! initialize some calendar stuff
@@ -139,6 +126,8 @@ subroutine cice_init
       if (kdyn == 2) then
          call alloc_dyn_eap     ! allocate dyn_eap arrays
          call init_eap (dt_dyn) ! define eap dynamics parameters, variables
+      else if (kdyn == 3) then
+         call init_vp (dt_dyn)  ! define vp dynamics parameters, variables
       else                      ! for both kdyn = 0 or 1
          call init_evp (dt_dyn) ! define evp dynamics parameters, variables
       endif
diff --git a/cicecore/shared/ice_constants.F90 b/cicecore/shared/ice_constants.F90
index b0370b0b1..c49732e35 100644
--- a/cicecore/shared/ice_constants.F90
+++ b/cicecore/shared/ice_constants.F90
@@ -33,9 +33,6 @@ module ice_constants
       real (kind=dbl_kind), public :: &
          shlat  =  30.0_dbl_kind   ,&! artificial masking edge (deg)
          nhlat  = -30.0_dbl_kind     ! artificial masking edge (deg)
-
-      real (kind=dbl_kind), public :: &
-         puny_dyn = 2e-09_dbl_kind      ! special puny value for computing tinyarea for implicit solver
    
       !-----------------------------------------------------------------
       ! numbers used outside the column package
@@ -135,7 +132,7 @@ module ice_constants
 ! subroutine to set the cice constants
 
       subroutine ice_init_constants(   &
-         omega_in, radius_in, spval_dbl_in, spval_in, shlat_in, nhlat_in, puny_dyn_in)
+         omega_in, radius_in, spval_dbl_in, spval_in, shlat_in, nhlat_in)
 
       real (kind=dbl_kind), intent(in), optional :: &
          omega_in     , &   ! angular velocity of earth (rad/sec)
@@ -143,8 +140,7 @@ subroutine ice_init_constants(   &
          spval_dbl_in , &   ! special value (double precision)
          spval_in     , &   ! special value for netCDF output
          shlat_in     , &   ! artificial masking edge (deg)
-         nhlat_in     , &   ! artificial masking edge (deg)
-         puny_dyn_in        ! special puny value for computing tinyarea
+         nhlat_in           ! artificial masking edge (deg)
 
       character(len=*),parameter :: subname='(ice_init_constants)'
 
@@ -154,7 +150,6 @@ subroutine ice_init_constants(   &
       if (present(spval_in)) spval = spval_in
       if (present(shlat_in)) shlat = shlat_in
       if (present(nhlat_in)) nhlat = nhlat_in
-      if (present(puny_dyn_in)) puny_dyn = puny_dyn_in
 
       end subroutine ice_init_constants
 
@@ -163,7 +158,7 @@ end subroutine ice_init_constants
 ! subroutine to set the cice constants
 
       subroutine ice_query_constants(   &
-         omega_out, radius_out, spval_dbl_out, spval_out, shlat_out, nhlat_out, puny_dyn_out)
+         omega_out, radius_out, spval_dbl_out, spval_out, shlat_out, nhlat_out)
 
       real (kind=dbl_kind), intent(out), optional :: &
          omega_out     , &   ! angular velocity of earth (rad/sec)
@@ -171,8 +166,7 @@ subroutine ice_query_constants(   &
          spval_dbl_out , &   ! special value (double precision)
          spval_out     , &   ! special value for netCDF output
          shlat_out     , &   ! artificial masking edge (deg)
-         nhlat_out     , &   ! artificial masking edge (deg)
-         puny_dyn_out        ! special puny value for computing tinyarea
+         nhlat_out           ! artificial masking edge (deg)
 
       character(len=*),parameter :: subname='(ice_query_constants)'
 
@@ -182,7 +176,6 @@ subroutine ice_query_constants(   &
       if (present(spval_out)) spval_out = spval
       if (present(shlat_out)) shlat_out = shlat
       if (present(nhlat_out)) nhlat_out = nhlat
-      if (present(puny_dyn_out)) puny_dyn_out = puny_dyn
 
       end subroutine ice_query_constants
 

From 58fa7e8a4088eae903ccf8c53dadcc82595eebae Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 10 Jun 2019 13:56:05 -0400
Subject: [PATCH 093/196] ice_dyn_vp: make 'calc_bvec' use already computed
 'vrel'

Pass the already computed 'vrel' as an argument instead of recomputing
it.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 3bab1290e..4892fa398 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -764,7 +764,8 @@ subroutine picard_solver (icellt,   icellu,  &
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
                             uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-                            bx       (:,:,iblk), by      (:,:,iblk))
+                            bx       (:,:,iblk), by      (:,:,iblk), &
+                            vrel     (:,:,iblk))
 
       !     prepare precond matrix
            if (precond .gt. 1) then
@@ -1228,7 +1229,8 @@ subroutine anderson_solver (icellt,   icellu,  &
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
                             ulin     (:,:,iblk), vlin    (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-                            bx       (:,:,iblk), by      (:,:,iblk))
+                            bx       (:,:,iblk), by      (:,:,iblk), &
+                            vrel     (:,:,iblk))
             
             ! Compute nonlinear residual norm (PDE residual)
             call matvec (nx_block             , ny_block,            &
@@ -3048,7 +3050,8 @@ subroutine calc_bvec (nx_block,   ny_block, &
                        waterx,     watery,   &
                        uvel,       vvel,     &
                        bxfix,      byfix,    &
-                       bx,         by)
+                       bx,         by,       &
+                       vrel)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -3070,7 +3073,8 @@ subroutine calc_bvec (nx_block,   ny_block, &
          bxfix   , & ! bx = taux + bxfix !jfl
          byfix   , & ! by = tauy + byfix !jfl
          uocn    , & ! ocean current, x-direction (m/s)
-         vocn        ! ocean current, y-direction (m/s)
+         vocn    , & ! ocean current, y-direction (m/s)
+         vrel        ! relative ice-ocean velocity
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
          intent(in) :: &
@@ -3087,7 +3091,6 @@ subroutine calc_bvec (nx_block,   ny_block, &
          i, j, ij
 
       real (kind=dbl_kind) :: &
-         vrel              , & ! relative ice-ocean velocity
          utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
          taux, tauy        , & ! part of ocean stress term
          strintx, strinty  , & ! divergence of the internal stress tensor (only Pr part)
@@ -3098,8 +3101,6 @@ subroutine calc_bvec (nx_block,   ny_block, &
       !-----------------------------------------------------------------
       ! calc b vector
       !-----------------------------------------------------------------
-
-      !JFL vrel could be sent here (already calc before...
       
       call icepack_query_parameters(rhow_out=rhow)
       call icepack_warnings_flush(nu_diag)
@@ -3113,12 +3114,9 @@ subroutine calc_bvec (nx_block,   ny_block, &
          utp = uvel(i,j)
          vtp = vvel(i,j)
 
-         ! (magnitude of relative ocean current)*rhow*drag*aice
-         vrel = aiu(i,j)*rhow*Cw(i,j)*sqrt((uocn(i,j) - utp)**2 + &
-                                           (vocn(i,j) - vtp)**2)  ! m/s
          ! ice/ocean stress
-         taux = vrel*waterx(i,j) ! NOTE this is not the entire
-         tauy = vrel*watery(i,j) ! ocn stress term
+         taux = vrel(i,j)*waterx(i,j) ! NOTE this is not the entire
+         tauy = vrel(i,j)*watery(i,j) ! ocn stress term
          
          ! divergence of the internal stress tensor (only Pr part, i.e. dPr/dx)
          strintx = uarear(i,j)* &

From b23a94077d55c5379a69c29f2bcb658d67fc3477 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 10 Jun 2019 14:09:53 -0400
Subject: [PATCH 094/196] ice_dyn_vp: remove references to fgmres2 subroutine

We still reference 'fgmres2', an  MPI implementation of FGMRES that was
never completely integrated with the implicit solver.

Remove references to it as well as its arguments.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 4892fa398..3bb10e2a9 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -674,7 +674,6 @@ subroutine picard_solver (icellt,   icellu,  &
          kOL            , & ! outer loop iteration
          iblk           , & ! block index
          icode          , & ! code for fgmres solver
-         ischmi         , & ! Quesse ca!?!?! jfl
          its            , & ! iteration nb for fgmres
          fgmres_its     , & ! final nb of fgmres iterations
          ierr               ! code for pgmres preconditioner !phb: needed?
@@ -838,7 +837,6 @@ subroutine picard_solver (icellt,   icellu,  &
          
       icode  = 0
       !      its    = 0 
-      ischmi = 0 
          
          ! form b vector from matrices (nblocks matrices)      
          call arrays_to_vec (nx_block, ny_block, nblocks,    &
@@ -865,9 +863,6 @@ subroutine picard_solver (icellt,   icellu,  &
       !-----------------------------------------------------------------------
       1    continue
       !-----------------------------------------------------------------------
-
-      !call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
-      !                     sol_eps, maxits,its,conv,icode )
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
                    gamma, maxits_fgmres, monitor_fgmres,   &
@@ -1160,7 +1155,6 @@ subroutine anderson_solver (icellt,   icellu,  &
          coeffs       ! coeffs used to combine previous solutions
 
       real (kind=dbl_kind) :: & 
-         conv        , & ! ratio of current residual and initial residual for FGMRES !phb: needed for fgmres2
          tol         , & ! tolerance for fixed point convergence: reltol_andacc * (initial fixed point residual norm)
          tol_nl      , & ! tolerance for nonlinear convergence: gammaNL * (initial nonlinear residual norm)
          fpres_norm  , & ! norm of current fixed point residual : f(x) = g(x) - x
@@ -1587,8 +1581,6 @@ subroutine fgmres_solver (ntot,   bvec,     &
       1    continue
       !-----------------------------------------------------------------------
 
-      !call fgmres2( ntot,im_fgmres,bvec,sol,ischmi,vv,ww,wk11,wk22, &
-      !                     sol_eps, maxits,its,conv,icode )
                            
       call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
                    gamma, maxits_fgmres,monitor_fgmres, &

From c9bf1c78e8074db2b61e6ff1bcd083776feb0bf6 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 9 Jul 2019 13:07:14 -0400
Subject: [PATCH 095/196] dynamics: remove 'BLAS_routines.F90'

It is a best practice to use the BLAS library natively available on a
system, or install a high performance implementation like ATLAS,
OpenBLAS, etc.

Remove the file 'BLAS_routines.F90', which contains BLAS routines copied
from the reference NETLIB implementation.
---
 cicecore/cicedynB/dynamics/BLAS_routines.F90 | 195 -------------------
 1 file changed, 195 deletions(-)
 delete mode 100644 cicecore/cicedynB/dynamics/BLAS_routines.F90

diff --git a/cicecore/cicedynB/dynamics/BLAS_routines.F90 b/cicecore/cicedynB/dynamics/BLAS_routines.F90
deleted file mode 100644
index 5d0cbfe81..000000000
--- a/cicecore/cicedynB/dynamics/BLAS_routines.F90
+++ /dev/null
@@ -1,195 +0,0 @@
-      subroutine  dcopy(n,dx,incx,dy,incy)
-!
-!     copies a vector, x, to a vector, y.
-!     uses unrolled loops for increments equal to one.
-!     jack dongarra, linpack, 3/11/78.
-!
-      double precision dx(1),dy(1)
-      integer i,incx,incy,ix,iy,m,mp1,n
-!
-      if(n.le.0)return
-      if(incx.eq.1.and.incy.eq.1)go to 20
-!
-!        code for unequal increments or equal increments
-!          not equal to 1
-!
-      ix = 1
-      iy = 1
-      if(incx.lt.0)ix = (-n+1)*incx + 1
-      if(incy.lt.0)iy = (-n+1)*incy + 1
-      do 10 i = 1,n
-        dy(iy) = dx(ix)
-        ix = ix + incx
-        iy = iy + incy
-   10 continue
-      return
-!
-!        code for both increments equal to 1
-!
-!
-!        clean-up loop
-!
-   20 m = mod(n,7)
-      if( m .eq. 0 ) go to 40
-      do 30 i = 1,m
-        dy(i) = dx(i)
-   30 continue
-      if( n .lt. 7 ) return
-   40 mp1 = m + 1
-      do 50 i = mp1,n,7
-        dy(i) = dx(i)
-        dy(i + 1) = dx(i + 1)
-        dy(i + 2) = dx(i + 2)
-        dy(i + 3) = dx(i + 3)
-        dy(i + 4) = dx(i + 4)
-        dy(i + 5) = dx(i + 5)
-        dy(i + 6) = dx(i + 6)
-   50 continue
-      return
-      end
-      
-      subroutine daxpy(n,da,dx,incx,dy,incy)
-!
-!     constant times a vector plus a vector.
-!     uses unrolled loops for increments equal to one.
-!     jack dongarra, linpack, 3/11/78.
-!
-      double precision dx(1),dy(1),da
-      integer i,incx,incy,ix,iy,m,mp1,n
-!
-      if(n.le.0)return
-      if (da .eq. 0.0d0) return
-      if(incx.eq.1.and.incy.eq.1)go to 20
-!
-!        code for unequal increments or equal increments
-!          not equal to 1
-!
-      ix = 1
-      iy = 1
-      if(incx.lt.0)ix = (-n+1)*incx + 1
-      if(incy.lt.0)iy = (-n+1)*incy + 1
-      do 10 i = 1,n
-        dy(iy) = dy(iy) + da*dx(ix)
-        ix = ix + incx
-        iy = iy + incy
-   10 continue
-      return
-!
-!        code for both increments equal to 1
-!
-!
-!        clean-up loop
-!
-   20 m = mod(n,4)
-      if( m .eq. 0 ) go to 40
-      do 30 i = 1,m
-        dy(i) = dy(i) + da*dx(i)
-   30 continue
-      if( n .lt. 4 ) return
-   40 mp1 = m + 1
-      do 50 i = mp1,n,4
-        dy(i) = dy(i) + da*dx(i)
-        dy(i + 1) = dy(i + 1) + da*dx(i + 1)
-        dy(i + 2) = dy(i + 2) + da*dx(i + 2)
-        dy(i + 3) = dy(i + 3) + da*dx(i + 3)
-   50 continue
-      return
-      end
-      
-      subroutine  dscal(n,da,dx,incx)
-
-!     scales a vector by a constant.
-!     uses unrolled loops for increment equal to one.
-!     jack dongarra, linpack, 3/11/78.
-!     modified 3/93 to return if incx .le. 0.
-!     modified 12/3/93, array(1) declarations changed to array(*)
-!
-      double precision da,dx(*)
-      integer i,incx,m,mp1,n,nincx
-!
-      if( n .le. 0 .or. incx .le. 0 )return
-      if(incx==1)go to 20
-!
-!        code for increment not equal to 1
-!
-      nincx = n*incx
-      do 10 i = 1,nincx,incx
-        dx(i) = da*dx(i)
-   10 continue
-      return
-!
-!        code for increment equal to 1
-!
-!
-!        clean-up loop
-!
-   20 m = mod(n,5)
-      if( m == 0 ) go to 40
-      do 30 i = 1,m
-        dx(i) = da*dx(i)
-   30 continue
-      if( n < 5 ) return
-   40 mp1 = m + 1
-      do 50 i = mp1,n,5
-        dx(i) = da*dx(i)
-        dx(i + 1) = da*dx(i + 1)
-        dx(i + 2) = da*dx(i + 2)
-        dx(i + 3) = da*dx(i + 3)
-        dx(i + 4) = da*dx(i + 4)
-   50 continue
-      return
-      end
-      
-      double precision function ddot(n,dx,incx,dy,incy)
-!
-!     forms the dot product of two vectors.
-!     uses unrolled loops for increments equal to one.
-!     jack dongarra, linpack, 3/11/78.
-!
-      double precision dx(1),dy(1),dtemp
-      integer i,incx,incy,ix,iy,m,mp1,n
-!
-      ddot = 0.0d0
-      dtemp = 0.0d0
-      if(n.le.0)return
-      if(incx.eq.1.and.incy.eq.1)go to 20
-!
-!        code for unequal increments or equal increments
-!          not equal to 1
-!
-      ix = 1
-      iy = 1
-      if(incx.lt.0)ix = (-n+1)*incx + 1
-      if(incy.lt.0)iy = (-n+1)*incy + 1
-      do 10 i = 1,n
-        dtemp = dtemp + dx(ix)*dy(iy)
-        ix = ix + incx
-        iy = iy + incy
-   10 continue
-      ddot = dtemp
-      return
-!
-!        code for both increments equal to 1
-!
-!
-!        clean-up loop
-!
-   20 m = mod(n,5)
-      if( m .eq. 0 ) go to 40
-      do 30 i = 1,m
-        dtemp = dtemp + dx(i)*dy(i)
-   30 continue
-      if( n .lt. 5 ) go to 60
-   40 mp1 = m + 1
-      do 50 i = mp1,n,5
-        dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) + &
-         dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4)
-   50 continue
-   60 ddot = dtemp
-
-      return
-      end
-      
-      
-      
-      
\ No newline at end of file

From 5848a506777126790470ca9ee0b03f9641abefc5 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 9 Jul 2019 16:00:55 -0400
Subject: [PATCH 096/196] ice_dyn_vp: synchronize 'imp_solver' with
 ice_dyn_evp::evp

The 'imp_solver' subroutine is the main driver of the implicit VP solver
and was copied from a very old version of the 'evp' EVP driver in
ice_dyn_evp.F90.

Bring 'imp_solver' in line with changes in 'evp', and move some
statements around to keep related statements together.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 79 ++++++++++++-----------
 1 file changed, 41 insertions(+), 38 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 3bb10e2a9..b2cf83a00 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -46,9 +46,6 @@ module ice_dyn_vp
       use ice_exit, only: abort_ice
       use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted
       use icepack_intfc, only: icepack_ice_strength, icepack_query_parameters
-#ifdef CICE_IN_NEMO
-      use icepack_intfc, only: calc_strair
-#endif
 
       implicit none
       private
@@ -216,9 +213,9 @@ subroutine imp_solver (dt)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
          
       real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
-      
-      real (kind=dbl_kind), allocatable :: bvec(:), sol(:), diagvec(:)
-      
+
+      logical (kind=log_kind) :: calc_strair
+
       integer (kind=int_kind), dimension (nx_block,ny_block,max_blocks) :: &
          icetmask, &  ! ice extent mask (T-cell)
          halomask     ! generic halo mask
@@ -229,6 +226,8 @@ subroutine imp_solver (dt)
       type (block) :: &
          this_block           ! block information for current block
       
+      real (kind=dbl_kind), allocatable :: bvec(:), sol(:), diagvec(:)
+      
       character(len=*), parameter :: subname = '(imp_solver)'
       
       call ice_timer_start(timer_dynamics) ! dynamics
@@ -302,21 +301,22 @@ subroutine imp_solver (dt)
       call to_ugrid(tmass,umass)
       call to_ugrid(aice_init, aiu)
 
-#ifdef CICE_IN_NEMO
       !----------------------------------------------------------------
-      ! Set wind stress to values supplied via NEMO
+      ! Set wind stress to values supplied via NEMO or other forcing
       ! This wind stress is rotated on u grid and multiplied by aice
       !----------------------------------------------------------------
+      call icepack_query_parameters(calc_strair_out=calc_strair)
+      call icepack_warnings_flush(nu_diag)
+      if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
+         file=__FILE__, line=__LINE__)
+
       if (.not. calc_strair) then       
          strairx(:,:,:) = strax(:,:,:)
          strairy(:,:,:) = stray(:,:,:)
       else
-#endif
-      call t2ugrid_vector(strairx)
-      call t2ugrid_vector(strairy)
-#ifdef CICE_IN_NEMO
+         call t2ugrid_vector(strairx)
+         call t2ugrid_vector(strairy)
       endif      
-#endif
 
 ! tcraig, tcx, threading here leads to some non-reproducbile results and failures in icepack_ice_strength
 ! need to do more debugging
@@ -395,23 +395,9 @@ subroutine imp_solver (dt)
       !$TCXOMP END PARALLEL DO
 
       call icepack_warnings_flush(nu_diag)
-      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+      if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
          file=__FILE__, line=__LINE__)
 
-      !-----------------------------------------------------------------
-      ! calc size of problem (ntot) and allocate arrays and vectors
-      !-----------------------------------------------------------------
-      
-      ntot=0
-      do iblk = 1, nblocks
-        ntot = ntot + icellu(iblk)      
-      enddo
-      ntot = 2*ntot ! times 2 because of u and v
-      
-      allocate(bvec(ntot), sol(ntot), diagvec(ntot))
-      
-      !-----------------------------------------------------------------
-      
       call ice_timer_start(timer_bound)
       call ice_HaloUpdate (strength,           halo_info, &
                            field_loc_center,   field_type_scalar)
@@ -443,16 +429,29 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       
       if (basalstress) then
-       do iblk = 1, nblocks
-         call basal_stress_coeff (nx_block,         ny_block,       &
-                                  icellu  (iblk),                   &
-                                  indxui(:,iblk),   indxuj(:,iblk), &
-                                  vice(:,:,iblk),   aice(:,:,iblk), &
-                                  hwater(:,:,iblk), Tbu(:,:,iblk))
-       enddo                           
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            call basal_stress_coeff (nx_block,         ny_block,       &
+                                     icellu  (iblk),                   &
+                                     indxui(:,iblk),   indxuj(:,iblk), &
+                                     vice(:,:,iblk),   aice(:,:,iblk), &
+                                     hwater(:,:,iblk), Tbu(:,:,iblk))
+         enddo
+         !$OMP END PARALLEL DO
       endif
       
-
+      !-----------------------------------------------------------------
+      ! calc size of problem (ntot) and allocate arrays and vectors
+      !-----------------------------------------------------------------
+      
+      ntot=0
+      do iblk = 1, nblocks
+        ntot = ntot + icellu(iblk)      
+      enddo
+      ntot = 2*ntot ! times 2 because of u and v
+      
+      allocate(bvec(ntot), sol(ntot), diagvec(ntot))
+      
       !-----------------------------------------------------------------
       ! Start of nonlinear iteration
       !-----------------------------------------------------------------
@@ -486,8 +485,13 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
 
       deallocate(bvec, sol, diagvec)
+      
       deallocate(fld2)
+      if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
       
+      !-----------------------------------------------------------------
+      ! Compute deformations
+      !-----------------------------------------------------------------
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
          call deformations (nx_block,             ny_block,             &
@@ -502,10 +506,9 @@ subroutine imp_solver (dt)
                             rdg_conv  (:,:,iblk), rdg_shear (:,:,iblk))
       enddo
       !$OMP END PARALLEL DO
+      
       ! phb: here we do halo updates for stresses (stressp_i, stressm_i, stress12_i, i=1..4),
       !      but stresses have not been updated ! (should be done in deformations ?)
-      if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
-
       ! Force symmetry across the tripole seam
       if (trim(grid_type) == 'tripole') then
       if (maskhalo_dyn) then

From e0f4c5d4c6b293516073f60ba43ac10c3b49670f Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 10 Jul 2019 09:37:27 -0400
Subject: [PATCH 097/196] dynamics: refactor strain rates and deformation
 calculations

Create subroutines for strain rates and deformations computations, add
them to ice_dyn_shared and call them in ice_dyn_evp and ice_dyn_vp.

This reduces code duplication.
---
 cicecore/cicedynB/dynamics/ice_dyn_evp.F90    |  85 +++----
 cicecore/cicedynB/dynamics/ice_dyn_shared.F90 | 197 ++++++++++++++++-
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     | 209 ++++++------------
 3 files changed, 297 insertions(+), 194 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_evp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_evp.F90
index 0f8acd547..17c61d083 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_evp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_evp.F90
@@ -599,6 +599,8 @@ subroutine stress (nx_block,   ny_block,   &
                          rdg_conv,   rdg_shear,  & 
                          str )
 
+      use ice_dyn_shared, only: strain_rates, deformations
+
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
          ksub              , & ! subcycling step
@@ -676,58 +678,20 @@ subroutine stress (nx_block,   ny_block,   &
       ! strain rates
       ! NOTE these are actually strain rates * area  (m^2/s)
       !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
-         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
-         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
-         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
-         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
-         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
-         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
-
-         ! shearing strain rate  =  e_12
-         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
-                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
-         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
-                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
-         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
-                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
-         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
-                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
-         
-         ! Delta (in the denominator of zeta, eta)
-         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
-         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
-         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
-         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
-
-      !-----------------------------------------------------------------
-      ! on last subcycle, save quantities for mechanical redistribution
-      !-----------------------------------------------------------------
-         if (ksub == ndte) then
-            divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
-            tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
-            rdg_conv(i,j)  = -min(divu(i,j),c0)
-            rdg_shear(i,j) = p5*(tmp-abs(divu(i,j))) 
-
-            ! diagnostic only
-            ! shear = sqrt(tension**2 + shearing**2)
-            shear(i,j) = p25*tarear(i,j)*sqrt( &
-                 (tensionne + tensionnw + tensionse + tensionsw)**2 &
-                +  (shearne +   shearnw +   shearse +   shearsw)**2)
-
-         endif
+         call strain_rates (nx_block,   ny_block,   &
+                            i,          j,          &
+                            uvel,       vvel,       &
+                            dxt,        dyt,        &
+                            cxp,        cyp,        &
+                            cxm,        cym,        &
+                            divune,     divunw,     &
+                            divuse,     divusw,     &
+                            tensionne,  tensionnw,  &
+                            tensionse,  tensionsw,  &
+                            shearne,    shearnw,    &
+                            shearse,    shearsw,    &
+                            Deltane,    Deltanw,    &
+                            Deltase,    Deltasw     )
 
       !-----------------------------------------------------------------
       ! strength/Delta                   ! kg/s
@@ -902,6 +866,23 @@ subroutine stress (nx_block,   ny_block,   &
 
       enddo                     ! ij
 
+      !-----------------------------------------------------------------
+      ! on last subcycle, save quantities for mechanical redistribution
+      !-----------------------------------------------------------------
+      if (ksub == ndte) then
+         call deformations (nx_block  , ny_block  , &
+                            icellt    ,             &
+                            indxti    , indxtj    , &
+                            uvel      , vvel      , &
+                            dxt       , dyt       , &
+                            cxp       , cyp       , &
+                            cxm       , cym       , &
+                            tarear    ,             &
+                            shear     , divu      , &
+                            rdg_conv  , rdg_shear )
+
+      endif
+
       end subroutine stress
 
 !=======================================================================
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
index c3dc83a24..183783350 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
@@ -24,7 +24,7 @@ module ice_dyn_shared
       private
       public :: init_evp, set_evp_parameters, stepu, principal_stress, &
                 dyn_prep1, dyn_prep2, dyn_finish, basal_stress_coeff,  &
-                alloc_dyn_shared
+                alloc_dyn_shared, deformations, strain_rates
 
       ! namelist parameters
 
@@ -991,6 +991,201 @@ subroutine principal_stress(nx_block,   ny_block,  &
 
       end subroutine principal_stress
 
+!=======================================================================
+
+! Compute deformations for mechanical redistribution
+!
+! author: Elizabeth C. Hunke, LANL
+!
+! 2019: subroutine created by Philippe Blain, ECCC
+
+      subroutine deformations (nx_block,   ny_block,   &
+                               icellt,                 &
+                               indxti,     indxtj,     &
+                               uvel,       vvel,       &
+                               dxt,        dyt,        &
+                               cxp,        cyp,        &
+                               cxm,        cym,        &
+                               tarear,                 &
+                               shear,      divu,       &
+                               rdg_conv,   rdg_shear )
+
+      use ice_constants, only: p25, p5
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellt                ! no. of cells where icetmask = 1
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj       ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         uvel     , & ! x-component of velocity (m/s)
+         vvel     , & ! y-component of velocity (m/s)
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm      , & ! 0.5*HTN - 1.5*HTN
+         tarear       ! 1/tarea
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
+         intent(inout) :: &
+         shear    , & ! strain rate II component (1/s)
+         divu     , & ! strain rate I component, velocity divergence (1/s)
+         rdg_conv , & ! convergence term for ridging (1/s)
+         rdg_shear    ! shear term for ridging (1/s)
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      real (kind=dbl_kind) :: &                       ! at each corner :
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw        , & ! Delta
+        tmp                                           ! useful combination
+
+      character(len=*), parameter :: subname = '(deformations)'
+      
+!DIR$ CONCURRENT !Cray
+!cdir nodep      !NEC
+!ocl novrec      !Fujitsu
+      do ij = 1, icellt
+         i = indxti(ij)
+         j = indxtj(ij)
+         
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+         call strain_rates (nx_block,   ny_block,   &
+                            i,          j,          &
+                            uvel,       vvel,       &
+                            dxt,        dyt,        &
+                            cxp,        cyp,        &
+                            cxm,        cym,        &
+                            divune,     divunw,     &
+                            divuse,     divusw,     &
+                            tensionne,  tensionnw,  &
+                            tensionse,  tensionsw,  &
+                            shearne,    shearnw,    &
+                            shearse,    shearsw,    &
+                            Deltane,    Deltanw,    &
+                            Deltase,    Deltasw     )
+      !-----------------------------------------------------------------
+      ! deformations for mechanical redistribution
+      !-----------------------------------------------------------------
+         divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
+         tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
+         rdg_conv(i,j)  = -min(divu(i,j),c0)
+         rdg_shear(i,j) = p5*(tmp-abs(divu(i,j)))
+
+         ! diagnostic only
+         ! shear = sqrt(tension**2 + shearing**2)
+         shear(i,j) = p25*tarear(i,j)*sqrt( &
+                      (tensionne + tensionnw + tensionse + tensionsw)**2 + &
+                      (shearne   + shearnw   + shearse   + shearsw  )**2)
+
+      enddo                     ! ij
+
+      end subroutine deformations
+
+!=======================================================================
+
+! Compute strain rates
+!
+! author: Elizabeth C. Hunke, LANL
+!
+! 2019: subroutine created by Philippe Blain, ECCC
+
+      subroutine strain_rates (nx_block,   ny_block,   &
+                               i,          j,          &
+                               uvel,       vvel,       &
+                               dxt,        dyt,        &
+                               cxp,        cyp,        &
+                               cxm,        cym,        &
+                               divune,     divunw,     &
+                               divuse,     divusw,     &
+                               tensionne,  tensionnw,  &
+                               tensionse,  tensionsw,  &
+                               shearne,    shearnw,    &
+                               shearse,    shearsw,    &
+                               Deltane,    Deltanw,    &
+                               Deltase,    Deltasw     )
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block    ! block dimensions
+         
+      integer (kind=int_kind) :: &
+         i, j                  ! indices
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         uvel     , & ! x-component of velocity (m/s)
+         vvel     , & ! y-component of velocity (m/s)
+         dxt      , & ! width of T-cell through the middle (m)
+         dyt      , & ! height of T-cell through the middle (m)
+         cyp      , & ! 1.5*HTE - 0.5*HTE
+         cxp      , & ! 1.5*HTN - 0.5*HTN
+         cym      , & ! 0.5*HTE - 1.5*HTE
+         cxm          ! 0.5*HTN - 1.5*HTN
+         
+      real (kind=dbl_kind), intent(out):: &           ! at each corner :
+        divune, divunw, divuse, divusw            , & ! divergence
+        tensionne, tensionnw, tensionse, tensionsw, & ! tension
+        shearne, shearnw, shearse, shearsw        , & ! shearing
+        Deltane, Deltanw, Deltase, Deltasw            ! Delta
+         
+      character(len=*), parameter :: subname = '(strain_rates)'
+         
+      !-----------------------------------------------------------------
+      ! strain rates
+      ! NOTE these are actually strain rates * area  (m^2/s)
+      !-----------------------------------------------------------------
+
+      ! divergence  =  e_11 + e_22
+      divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
+      divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
+      divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
+      divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
+
+      ! tension strain rate  =  e_11 - e_22
+      tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
+                +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
+      tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
+                +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
+      tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
+                +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
+      tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
+                +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
+
+      ! shearing strain rate  =  e_12
+      shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
+              -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
+      shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
+              -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
+      shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
+              -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
+      shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
+              -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
+      
+      ! Delta (in the denominator of zeta, eta)
+      Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
+      Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
+      Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
+      Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+
+      end subroutine strain_rates
+
 !=======================================================================
 
       end module ice_dyn_shared
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index b2cf83a00..daed6b298 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -226,7 +226,10 @@ subroutine imp_solver (dt)
       type (block) :: &
          this_block           ! block information for current block
       
-      real (kind=dbl_kind), allocatable :: bvec(:), sol(:), diagvec(:)
+      real (kind=dbl_kind), allocatable :: &
+         bvec(:)     , & ! right-hand-side vector
+         sol(:)      , & ! solution vector
+         diagvec(:)      ! diagonal vector
       
       character(len=*), parameter :: subname = '(imp_solver)'
       
@@ -1702,6 +1705,8 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
                                 strength,   zetaD,      &
                                 stPr)
 
+      use ice_dyn_shared, only: strain_rates
+
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
          icellt                ! no. of cells where icetmask = 1
@@ -1765,41 +1770,20 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
       ! strain rates
       ! NOTE these are actually strain rates * area  (m^2/s)
       !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
-         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
-         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
-         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
-         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
-         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
-         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
-
-         ! shearing strain rate  =  e_12
-         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
-                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
-         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
-                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
-         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
-                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
-         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
-                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
-         
-         ! Delta (in the denominator of zeta, eta)
-         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
-         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
-         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
-         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+         call strain_rates (nx_block,   ny_block,   &
+                            i,          j,          &
+                            uvel,       vvel,       &
+                            dxt,        dyt,        &
+                            cxp,        cyp,        &
+                            cxm,        cym,        &
+                            divune,     divunw,     &
+                            divuse,     divusw,     &
+                            tensionne,  tensionnw,  &
+                            tensionse,  tensionsw,  &
+                            shearne,    shearnw,    &
+                            shearse,    shearsw,    &
+                            Deltane,    Deltanw,    &
+                            Deltase,    Deltasw     )
 
          if (capping) then
          
@@ -1909,6 +1893,8 @@ subroutine stress_prime_vpOLD (nx_block,   ny_block,   &
                                   zetaD,                  & 
                                   str )
 
+      use ice_dyn_shared, only: strain_rates
+
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
          icellt                ! no. of cells where icetmask = 1
@@ -1983,41 +1969,20 @@ subroutine stress_prime_vpOLD (nx_block,   ny_block,   &
       ! strain rates
       ! NOTE these are actually strain rates * area  (m^2/s)
       !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
-         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
-         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
-         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
-         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
-         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
-         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
-
-         ! shearing strain rate  =  e_12
-         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
-                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
-         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
-                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
-         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
-                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
-         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
-                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
-         
-         ! Delta (in the denominator of zeta, eta)
-         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
-         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
-         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
-         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+         call strain_rates (nx_block,   ny_block,   &
+                            i,          j,          &
+                            uvel,       vvel,       &
+                            dxt,        dyt,        &
+                            cxp,        cyp,        &
+                            cxm,        cym,        &
+                            divune,     divunw,     &
+                            divuse,     divusw,     &
+                            tensionne,  tensionnw,  &
+                            tensionse,  tensionsw,  &
+                            shearne,    shearnw,    &
+                            shearse,    shearsw,    &
+                            Deltane,    Deltanw,    &
+                            Deltase,    Deltasw     )
 
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2
@@ -2165,6 +2130,8 @@ subroutine stress_vp (nx_block,   ny_block,   &
                             stress12_3, stress12_4, & 
                             str )
 
+      use ice_dyn_shared, only: strain_rates
+
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
          icellt                ! no. of cells where icetmask = 1
@@ -2240,41 +2207,20 @@ subroutine stress_vp (nx_block,   ny_block,   &
       ! strain rates
       ! NOTE these are actually strain rates * area  (m^2/s)
       !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
-         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
-         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
-         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
-         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
-         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
-         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
-
-         ! shearing strain rate  =  e_12
-         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
-                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
-         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
-                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
-         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
-                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
-         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
-                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
-         
-         ! Delta (in the denominator of zeta, eta)
-         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
-         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
-         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
-         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+         call strain_rates (nx_block,   ny_block,   &
+                            i,          j,          &
+                            uvel,       vvel,       &
+                            dxt,        dyt,        &
+                            cxp,        cyp,        &
+                            cxm,        cym,        &
+                            divune,     divunw,     &
+                            divuse,     divusw,     &
+                            tensionne,  tensionnw,  &
+                            tensionse,  tensionsw,  &
+                            shearne,    shearnw,    &
+                            shearse,    shearsw,    &
+                            Deltane,    Deltanw,    &
+                            Deltase,    Deltasw     )
 
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2
@@ -2700,6 +2646,8 @@ subroutine matvec (nx_block,   ny_block, &
                          uarear,               &
                          Au,         Av)
 
+      use ice_dyn_shared, only: strain_rates
+
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellu,             & ! total count when iceumask is true
@@ -2794,41 +2742,20 @@ subroutine matvec (nx_block,   ny_block, &
       ! strain rates
       ! NOTE these are actually strain rates * area  (m^2/s)
       !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
-         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
-         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
-         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
-         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
-         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
-         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
-
-         ! shearing strain rate  =  e_12
-         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
-                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
-         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
-                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
-         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
-                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
-         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
-                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
-         
-         ! Delta (in the denominator of zeta, eta)
-         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
-         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
-         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
-         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
+         call strain_rates (nx_block,   ny_block,   &
+                            i,          j,          &
+                            uvel,       vvel,       &
+                            dxt,        dyt,        &
+                            cxp,        cyp,        &
+                            cxm,        cym,        &
+                            divune,     divunw,     &
+                            divuse,     divusw,     &
+                            tensionne,  tensionnw,  &
+                            tensionse,  tensionsw,  &
+                            shearne,    shearnw,    &
+                            shearse,    shearsw,    &
+                            Deltane,    Deltanw,    &
+                            Deltase,    Deltasw     )
 
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2

From c41d1e862d4bd38d5013e542e89dcee5a04ca034 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 15 Jul 2019 16:19:27 -0400
Subject: [PATCH 098/196] dynamics: add sol_fgmres2d from GEM as ice_krylov.F90
 (does not compile)

Copy (verbatim) the 2D FGMRES solver from the GEM atmospheric model
as 'ice_krylov.F90'.
---
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 317 ++++++++++++++++++++++
 1 file changed, 317 insertions(+)
 create mode 100644 cicecore/cicedynB/dynamics/ice_krylov.F90

diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
new file mode 100644
index 000000000..d91656a7b
--- /dev/null
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -0,0 +1,317 @@
+!---------------------------------- LICENCE BEGIN -------------------------------
+! GEM - Library of kernel routines for the GEM numerical atmospheric model
+! Copyright (C) 1990-2010 - Division de Recherche en Prevision Numerique
+!                       Environnement Canada
+! This library is free software; you can redistribute it and/or modify it
+! under the terms of the GNU Lesser General Public License as published by
+! the Free Software Foundation, version 2.1 of the License. This library is
+! distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+! without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+! PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+! You should have received a copy of the GNU Lesser General Public License
+! along with this library; if not, write to the Free Software Foundation, Inc.,
+! 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+!---------------------------------- LICENCE END ---------------------------------
+
+!** fgmres - Flexible generalized minimum residual method (with restarts).
+!
+      subroutine sol_fgmres2d (solution, matvec, rhs_b, tolerance, maxinner, maxouter, nbiter, conv, level)
+      use dyn_fisl_options
+      use glb_ld
+      use ldnh
+      use prec
+      use sol
+      use HORgrid_options, only: Grd_yinyang_L
+
+      implicit none
+#include <arch_specific.hf>
+
+      integer, intent(in) :: level
+
+      ! Initial guess on input, approximate solution on output
+      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy), intent(inout) :: solution
+
+      ! A matrix-vector product routine (A.*v).
+      interface
+         subroutine matvec(v, prod, level)
+            use ldnh, only: ldnh_minx, ldnh_maxx, ldnh_miny, ldnh_maxy
+            implicit none
+            integer, intent(in) :: level
+            real*8, dimension (ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy), intent(in) :: v
+            real*8, dimension (ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy), intent(out) :: prod
+         end subroutine
+      end interface
+
+      ! Right hand side of the linear system.
+      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy), intent(in) :: rhs_b
+
+      ! Tolerance to achieve. The algorithm terminates when the relative
+      ! residual is below tolerance.
+      real*8, intent(in) :: tolerance
+
+      ! Restarts the method every maxinner inner iterations.
+      integer, intent(in) :: maxinner
+
+      ! Specifies the maximum number of outer iterations.
+      ! Iteration will stop after maxinner*maxouter steps
+      ! even if the specified tolerance has not been achieved.
+      integer, intent(in) :: maxouter
+
+      ! Total number of iteration performed
+      integer, intent(out) :: nbiter
+
+      real*8, intent(out) :: conv
+
+      ! Author
+      !     Stéphane Gaudreault, Abdessamad Qaddouri -- March 2018
+      !
+      ! References
+      ! C. T. Kelley, Iterative Methods for Linear and Nonlinear Equations, SIAM, 1995
+      ! (https://www.siam.org/books/textbooks/fr16_book.pdf)
+      !
+      ! Y. Saad, Iterative Methods for Sparse Linear Systems. SIAM, 2003.
+      ! (http://www-users.cs.umn.edu/~saad/IterMethBook_2ndEd.pdf)
+      !
+      integer :: i, j, k, k1, ii, jj, ierr
+
+      integer :: initer, outiter, nextit, it
+      real*8 :: relative_tolerance, r0
+      real*8 :: norm_residual, nu, t
+      real*8, dimension(maxinner+1, maxinner) :: hessenberg
+
+      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy) :: work_space
+      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy, maxinner) :: ww
+      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy, maxinner+1) :: vv
+
+      real*8 :: local_dot, dotprod
+      real*8, dimension(maxinner) :: dotprod_local
+
+      real*8, dimension(maxinner+1) :: rot_cos, rot_sin, gg
+      logical almost_zero
+
+      integer i0, in, j0, jn
+      integer niloc,njloc
+      character(len=9) :: communicate_S
+
+      communicate_S = "GRID"
+      if (Grd_yinyang_L) communicate_S = "MULTIGRID"
+
+
+      niloc = (l_ni-pil_e)-(1+pil_w)+1
+      njloc = (l_nj-pil_n)-(1+pil_s)+1
+
+
+      ! Here we go !
+
+      i0 = 1  + sol_pil_w
+      in = l_ni - sol_pil_e
+      j0 = 1  + sol_pil_s
+      jn = l_nj - sol_pil_n
+
+      outiter = 0
+      nbiter = 0
+
+      conv = 1.d0
+
+      ! Residual of the initial iterate
+      call matvec(solution, work_space, level)
+
+      do j=j0,jn
+         do i=i0,in
+            vv(i,j,1) = rhs_b(i,j) - work_space(i,j)
+         end do
+      end do
+
+      do
+
+         local_dot = 0.0d0
+         do j=j0,jn
+!DIR$ SIMD
+            do i=i0,in
+               local_dot = local_dot + (vv(i, j, 1) * vv(i, j, 1))
+            end do
+         end do
+
+         call RPN_COMM_allreduce(local_dot, norm_residual, 1, "MPI_double_precision", "MPI_sum", communicate_S, ierr)
+         norm_residual = sqrt(norm_residual)
+
+         ! Current guess is a good enough solution
+         if (norm_residual < tolerance) then
+            return
+         end if
+
+         nu = 1.0d0 / norm_residual
+         do j=j0,jn
+            do i=i0,in
+               vv(i,j,1) = vv(i,j,1) * nu
+            end do
+         end do
+
+         if (outiter == 0) then
+            relative_tolerance = tolerance * norm_residual
+            r0 = norm_residual
+         end if
+
+         conv = norm_residual / r0
+
+         ! initialize 1-st term of rhs of hessenberg system.
+         gg(1) = norm_residual
+         gg(2:) = 0.d0
+
+         initer = 0
+
+         do
+
+            nbiter = nbiter + 1
+            initer = initer + 1
+            nextit = initer + 1
+
+            if (sol2D_precond_S == 'JACOBI')   then
+               call pre_jacobi2D ( work_space(i0:in,j0:jn), &
+                                   vv(i0:in,j0:jn, initer), &
+                                   Prec_xevec_8, niloc, njloc,&
+                                   Prec_ai_8, Prec_bi_8, Prec_ci_8 )
+            else
+               work_space(i0:in,j0:jn) = vv(i0:in,j0:jn, initer)
+            endif
+
+            ww(i0:in,j0:jn, initer) = work_space(i0:in,j0:jn)
+
+            call matvec ( work_space, vv(:,:,nextit), level )
+
+            ! Classical Gram-Schmidt orthogonalisation process
+            dotprod_local = 0.d0
+            do it=1,initer
+                local_dot = 0.0d0
+                do j=j0,jn
+!DIR$ SIMD
+                   do i=i0,in
+                      local_dot = local_dot + (vv(i, j, it) * vv(i, j, nextit))
+                   end do
+                end do
+                dotprod_local(it) = local_dot
+            end do
+
+            call RPN_COMM_allreduce(dotprod_local(:), hessenberg(1,initer), initer, "MPI_double_precision", "MPI_sum", communicate_S, ierr)
+
+            do it=1,initer
+               do j=j0,jn
+                  do i=i0,in
+                     vv(i, j, nextit) = vv(i, j, nextit) - hessenberg(it,initer) * vv(i, j, it)
+                  end do
+               end do
+            end do
+
+            local_dot = 0.d0
+            do j=j0,jn
+!DIR$ SIMD
+               do i=i0,in
+                  local_dot = local_dot + (vv(i, j, nextit) * vv(i, j, nextit))
+               end do
+            end do
+
+            call RPN_COMM_allreduce(local_dot,dotprod,1,"MPI_double_precision","MPI_sum",communicate_S,ierr)
+
+            hessenberg(nextit,initer) = sqrt(dotprod)
+
+            ! Watch out for happy breakdown
+            if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
+               nu = 1.d0 / hessenberg(nextit,initer)
+               do j=j0,jn
+                  do i=i0,in
+                     vv(i, j, nextit) = vv(i, j, nextit) * nu
+                  end do
+               end do
+            end if
+
+            ! Form and store the information for the new Givens rotation
+            if (initer > 1) then
+               do k=2,initer
+                  k1 = k-1
+                  t = hessenberg(k1,initer)
+                  hessenberg(k1,initer) = rot_cos(k1)*t + rot_sin(k1)*hessenberg(k,initer)
+                  hessenberg(k,initer) = -rot_sin(k1)*t + rot_cos(k1)*hessenberg(k,initer)
+               end do
+
+            end if
+
+            nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
+            if (.not. almost_zero(nu)) then
+               rot_cos(initer) = hessenberg(initer,initer) / nu
+               rot_sin(initer) = hessenberg(nextit,initer) / nu
+
+               gg(nextit) = -rot_sin(initer) * gg(initer)
+               gg(initer) =  rot_cos(initer) * gg(initer)
+
+               hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
+            end if
+
+            norm_residual = abs(gg(nextit))
+
+            conv = norm_residual / r0
+
+            if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
+               exit
+            endif
+
+         end do
+
+         ! At this point either the maximum number of inner iterations
+         ! was reached or the absolute residual is below the scaled tolerance.
+
+         ! Solve upper triangular system
+         gg(initer) = gg(initer) / hessenberg(initer,initer)
+         do ii=2,initer
+            k  = initer - ii + 1
+            k1 = k + 1
+            t  = gg(k)
+            do j=k1,initer
+               t = t - hessenberg(k,j) * gg(j)
+            end do
+            gg(k) = t / hessenberg(k,k)
+         end do
+
+         ! Form linear combination to get solution.
+         do it=1,initer
+            t = gg(it)
+
+            do j=j0,jn
+               do i=i0,in
+                  solution(i, j) = solution(i, j) + t * ww(i, j, it)
+               end do
+            end do
+
+         end do
+
+         outiter = outiter + 1
+
+         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
+            return
+         end if
+
+         ! Solution is not convergent : compute residual vector and continue.
+         do it=1,initer
+            jj = nextit - it + 1
+            gg(jj-1) = -rot_sin(jj-1) * gg(jj)
+            gg(jj)   =  rot_cos(jj-1) * gg(jj)
+         end do
+
+         do it=1,nextit
+            t = gg(it)
+            if (it == 1) then
+               t = t - 1.d0
+            end if
+
+            do j=j0,jn
+!DIR$ SIMD
+               do i=i0,in
+                  vv(i, j, 1) = vv(i, j, 1) + t * vv(i, j, it)
+               end do
+            end do
+
+         end do
+
+      end do
+
+      return
+      end

From 8319355458302477b6c1f0d8b062a14c664f020b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 19 Jul 2019 11:14:56 -0400
Subject: [PATCH 099/196] ice_krylov: adapt FGMRES algo up to inner loop (WIP)

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 |  88 +--
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 666 +++++++++++++---------
 2 files changed, 440 insertions(+), 314 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index daed6b298..3aaa703eb 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -50,7 +50,7 @@ module ice_dyn_vp
       implicit none
       private
       public :: imp_solver, matvec, arrays_to_vec, vec_to_arrays, precond_diag, &
-                init_vp
+                init_vp, residual_vec, calc_L2norm_squared
 
       ! namelist parameters
 
@@ -823,7 +823,7 @@ subroutine picard_solver (icellt,   icellu,  &
                                L2norm(iblk))
          enddo
          !$OMP END PARALLEL DO
-         nlres_norm = sqrt(sum(L2norm**2))
+         nlres_norm = sqrt(sum(L2norm))
          if (monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
                                               " nonlin_res_L2norm= ", nlres_norm
@@ -1017,16 +1017,16 @@ subroutine picard_solver (icellt,   icellu,  &
          do iblk = 1, nblocks
             fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
             fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
-            call calc_L2norm (nx_block        , ny_block,         &
-                              icellu    (iblk),                   & 
-                              indxui  (:,iblk), indxuj  (:,iblk), &
-                              fpresx(:,:,iblk), fpresy(:,:,iblk), &
-                              L2norm    (iblk))
+            call calc_L2norm_squared (nx_block        , ny_block,         &
+                                      icellu    (iblk),                   & 
+                                      indxui  (:,iblk), indxuj  (:,iblk), &
+                                      fpresx(:,:,iblk), fpresy(:,:,iblk), &
+                                      L2norm    (iblk))
          enddo
          !$OMP END PARALLEL DO
          if (monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-                                              " fixed_point_res_L2norm= ", sqrt(sum(L2norm**2))
+                                              " fixed_point_res_L2norm= ", sqrt(sum(L2norm))
          endif
          
       enddo                     ! outer loop
@@ -1256,7 +1256,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                                L2norm(iblk))
          enddo
          !$OMP END PARALLEL DO
-         nlres_norm = sqrt(sum(L2norm**2))  ! phb: change after parallelization
+         nlres_norm = sqrt(sum(L2norm))  ! phb: change after parallelization
          if (monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
                                               " nonlin_res_L2norm= ", nlres_norm
@@ -1344,15 +1344,15 @@ subroutine anderson_solver (icellt,   icellu,  &
             !                     fpresx (:,:,:), fpresy (:,:,:))
             ! !$OMP PARALLEL DO PRIVATE(iblk)
             ! do iblk = 1, nblocks
-            ! call calc_L2norm (nx_block        , ny_block,         &
-            !                   icellu    (iblk),                   & 
-            !                   indxui  (:,iblk), indxuj  (:,iblk), &
-            !                   fpresx(:,:,iblk), fpresy(:,:,iblk), &
-            !                   L2norm    (iblk))
+            ! call calc_L2norm_squared (nx_block        , ny_block,         &
+            !                           icellu    (iblk),                   & 
+            !                           indxui  (:,iblk), indxuj  (:,iblk), &
+            !                           fpresx(:,:,iblk), fpresy(:,:,iblk), &
+            !                           L2norm    (iblk))
             ! enddo
             ! !$OMP END PARALLEL DO
             ! write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
-            !                                   " fixed_point_res_L2norm= ", sqrt(sum(L2norm**2))
+            !                                   " fixed_point_res_L2norm= ", sqrt(sum(L2norm))
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
                                               " fixed_point_res_L2norm= ", fpres_norm
          endif
@@ -1476,16 +1476,16 @@ subroutine anderson_solver (icellt,   icellu,  &
          do iblk = 1, nblocks
             fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
             fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
-            call calc_L2norm (nx_block        , ny_block,         &
-                              icellu    (iblk),                   & 
-                              indxui  (:,iblk), indxuj  (:,iblk), &
-                              fpresx(:,:,iblk), fpresy(:,:,iblk), &
-                              L2norm    (iblk))
+            call calc_L2norm_squared (nx_block        , ny_block,         &
+                                      icellu    (iblk),                   & 
+                                      indxui  (:,iblk), indxuj  (:,iblk), &
+                                      fpresx(:,:,iblk), fpresy(:,:,iblk), &
+                                      L2norm    (iblk))
          enddo
          !$OMP END PARALLEL DO
          if (monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
-                                              " progress_res_L2norm= ", sqrt(sum(L2norm**2))
+                                              " progress_res_L2norm= ", sqrt(sum(L2norm))
          endif
          
       enddo ! nonlinear iteration loop
@@ -3061,7 +3061,7 @@ subroutine residual_vec (nx_block,   ny_block, &
                                bx,         by,       &
                                Au,         Av,       &
                                Fx,         Fy,       &
-                               L2norm)
+                               sum_squared)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -3083,8 +3083,8 @@ subroutine residual_vec (nx_block,   ny_block, &
          Fx      , & ! x residual vector, Fx = Au - bx (N/m^2)
          Fy          ! y residual vector, Fy = Av - by (N/m^2)
          
-      real (kind=dbl_kind), intent(out) :: &
-         L2norm      ! L2norm of residual vector
+      real (kind=dbl_kind), intent(out), optional :: &
+         sum_squared ! sum of squared residual vector components
       
       integer (kind=int_kind) :: &
          i, j, ij
@@ -3092,10 +3092,12 @@ subroutine residual_vec (nx_block,   ny_block, &
       character(len=*), parameter :: subname = '(residual_vec)'
 
       !-----------------------------------------------------------------
-      ! calc residual and its L2 norm
+      ! compute residual and sum its squared components
       !-----------------------------------------------------------------
 
-      L2norm=c0
+      if (present(sum_squared)) then
+         sum_squared = c0
+      endif
          
       do ij =1, icellu
          i = indxui(ij)
@@ -3103,9 +3105,10 @@ subroutine residual_vec (nx_block,   ny_block, &
 
          Fx(i,j) = Au(i,j) - bx(i,j)
          Fy(i,j) = Av(i,j) - by(i,j)
-         L2norm = L2norm + Fx(i,j)**2 + Fy(i,j)**2
+         if (present(sum_squared)) then
+            sum_squared = sum_squared + Fx(i,j)**2 + Fy(i,j)**2
+         endif
       enddo                     ! ij
-      L2norm = sqrt(L2norm)
 
       end subroutine residual_vec
       
@@ -3604,11 +3607,11 @@ end subroutine precond_diag
       
 !=======================================================================      
 
-      subroutine calc_L2norm (nx_block,   ny_block, &
-                              icellu,               &
-                              indxui,     indxuj,   &
-                              tpu,        tpv,      &
-                              L2norm)
+      subroutine calc_L2norm_squared (nx_block,   ny_block, &
+                                      icellu,               &
+                                      indxui,     indxuj,   &
+                                      tpu,        tpv,      &
+                                      L2norm)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -3620,21 +3623,21 @@ subroutine calc_L2norm (nx_block,   ny_block, &
          indxuj      ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         tpu     , & ! x-component of vector
-         tpv         ! y-component of vector         
+         tpu     , & ! x-component of vector grid function
+         tpv         ! y-component of vector grid function
 
       real (kind=dbl_kind), intent(out) :: &
-         L2norm      ! l^2 norm of vector grid function (tpu,tpv)
+         L2norm      ! squared l^2 norm of vector grid function (tpu,tpv)
 
       ! local variables
 
       integer (kind=int_kind) :: &
          i, j, ij
 
-      character(len=*), parameter :: subname = '(calc_L2norm)'
+      character(len=*), parameter :: subname = '(calc_L2norm_squared)'
 
       !-----------------------------------------------------------------
-      ! compute l^2 norm of vector grid function (tpu,tpv)
+      ! compute squared l^2 norm of vector grid function (tpu,tpv)
       !-----------------------------------------------------------------
 
      L2norm = c0
@@ -3643,14 +3646,11 @@ subroutine calc_L2norm (nx_block,   ny_block, &
          i = indxui(ij)
          j = indxuj(ij)
          
-         L2norm = L2norm + tpu(i,j)**2
-         L2norm = L2norm + tpv(i,j)**2
+         L2norm = L2norm + tpu(i,j)**2 + tpv(i,j)**2
          
-      enddo                     ! ij
-
-      L2norm = sqrt(L2norm)
+      enddo ! ij
       
-      end subroutine calc_L2norm
+      end subroutine calc_L2norm_squared
       
             !=======================================================================
 
diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
index d91656a7b..90fc8f80b 100644
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -12,306 +12,432 @@
 ! along with this library; if not, write to the Free Software Foundation, Inc.,
 ! 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 !---------------------------------- LICENCE END ---------------------------------
-
-!** fgmres - Flexible generalized minimum residual method (with restarts).
+!=======================================================================
+!
+! Krylov subspace methods for sea-ice dynamics
+!
+! See:
 !
-      subroutine sol_fgmres2d (solution, matvec, rhs_b, tolerance, maxinner, maxouter, nbiter, conv, level)
-      use dyn_fisl_options
-      use glb_ld
-      use ldnh
-      use prec
-      use sol
-      use HORgrid_options, only: Grd_yinyang_L
+! C. T. Kelley, Iterative Methods for Linear and Nonlinear Equations, SIAM, 1995
+! (https://www.siam.org/books/textbooks/fr16_book.pdf)
+!
+! Y. Saad, Iterative Methods for Sparse Linear Systems. SIAM, 2003.
+! (http://www-users.cs.umn.edu/~saad/IterMethBook_2ndEd.pdf)
+!
+! author: Philippe Blain, ECCC
+
+      module ice_krylov
+
+      use ice_blocks, only: nx_block, ny_block
+      use ice_constants, only: field_loc_NEcorner, c0, c1
+      use ice_domain, only: distrb_info, nblocks
+      use ice_domain_size, only: max_blocks
+      use ice_dyn_vp, only: matvec, residual_vec, calc_L2norm_squared
+      use ice_flux, only:  fm, iceumask
+      use ice_global_reductions, only: global_sum
+      use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
+          uarear, tinyarea
+      use ice_kinds_mod
 
       implicit none
-#include <arch_specific.hf>
+      private
+      public :: fgmres, pgmres
 
-      integer, intent(in) :: level
+!=======================================================================
 
-      ! Initial guess on input, approximate solution on output
-      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy), intent(inout) :: solution
+      contains
 
-      ! A matrix-vector product routine (A.*v).
-      interface
-         subroutine matvec(v, prod, level)
-            use ldnh, only: ldnh_minx, ldnh_maxx, ldnh_miny, ldnh_maxy
-            implicit none
-            integer, intent(in) :: level
-            real*8, dimension (ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy), intent(in) :: v
-            real*8, dimension (ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy), intent(out) :: prod
-         end subroutine
-      end interface
+!=======================================================================
 
-      ! Right hand side of the linear system.
-      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy), intent(in) :: rhs_b
+! FGMRES: Flexible generalized minimum residual method (with restarts). 
+! Solves A x = b using GMRES with a varying (right) preconditioner
+!
+! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
+
+      subroutine fgmres (icellt,   icellu,  &
+                         indxti,   indxtj,  &
+                         indxui,   indxuj,  &
+                         zetaD,             &
+                         Cb,       vrel,    &
+                         umassdti,          &
+                         solx,     soly,    &
+                         bx,       by,      &
+                         tolerance, maxinner, maxouter, nbiter, conv)
 
-      ! Tolerance to achieve. The algorithm terminates when the relative
-      ! residual is below tolerance.
-      real*8, intent(in) :: tolerance
+      integer (kind=int_kind), dimension(max_blocks), intent(in) :: & 
+         icellt   , & ! no. of cells where icetmask = 1
+         icellu       ! no. of cells where iceumask = 1
 
-      ! Restarts the method every maxinner inner iterations.
-      integer, intent(in) :: maxinner
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
+         indxti   , & ! compressed index in i-direction
+         indxtj   , & ! compressed index in j-direction
+         indxui   , & ! compressed index in i-direction
+         indxuj       ! compressed index in j-direction
 
-      ! Specifies the maximum number of outer iterations.
-      ! Iteration will stop after maxinner*maxouter steps
-      ! even if the specified tolerance has not been achieved.
-      integer, intent(in) :: maxouter
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
+         zetaD      ! zetaD = 2*zeta (viscous coefficient)
 
-      ! Total number of iteration performed
-      integer, intent(out) :: nbiter
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         vrel     , & ! coefficient for tauw 
+         Cb       , & ! seabed stress coefficient
+         umassdti     ! mass of U-cell/dte (kg/m^2 s)
 
-      real*8, intent(out) :: conv
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
+         solx     , & ! Initial guess on input, approximate solution on output (x components)
+         soly         ! Initial guess on input, approximate solution on output (y components)
 
-      ! Author
-      !     Stéphane Gaudreault, Abdessamad Qaddouri -- March 2018
-      !
-      ! References
-      ! C. T. Kelley, Iterative Methods for Linear and Nonlinear Equations, SIAM, 1995
-      ! (https://www.siam.org/books/textbooks/fr16_book.pdf)
-      !
-      ! Y. Saad, Iterative Methods for Sparse Linear Systems. SIAM, 2003.
-      ! (http://www-users.cs.umn.edu/~saad/IterMethBook_2ndEd.pdf)
-      !
-      integer :: i, j, k, k1, ii, jj, ierr
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         bx       , & ! Right hand side of the linear system (x components)
+         by           ! Right hand side of the linear system (y components)
 
-      integer :: initer, outiter, nextit, it
-      real*8 :: relative_tolerance, r0
-      real*8 :: norm_residual, nu, t
-      real*8, dimension(maxinner+1, maxinner) :: hessenberg
+      real (kind=dbl_kind), intent(in) :: &
+         tolerance   ! Tolerance to achieve. The algorithm terminates when the relative
+                     ! residual is below tolerance
 
-      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy) :: work_space
-      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy, maxinner) :: ww
-      real*8, dimension(ldnh_minx:ldnh_maxx, ldnh_miny:ldnh_maxy, maxinner+1) :: vv
+      integer (kind=int_kind), intent(in) :: &
+         maxinner    ! Restart the method every maxinner inner iterations
 
-      real*8 :: local_dot, dotprod
-      real*8, dimension(maxinner) :: dotprod_local
+      integer (kind=int_kind), intent(in) :: &
+         maxouter    ! Maximum number of outer iterations
+                     ! Iteration will stop after maxinner*maxouter steps
+                     ! even if the specified tolerance has not been achieved
 
-      real*8, dimension(maxinner+1) :: rot_cos, rot_sin, gg
-      logical almost_zero
+      integer (kind=int_kind), intent(out) :: &
+         nbiter      ! Total number of iteration performed
 
-      integer i0, in, j0, jn
-      integer niloc,njloc
-      character(len=9) :: communicate_S
+      real (kind=dbl_kind), intent(out) :: &
+         conv        ! !phb DESCRIBE IF WE KEEP
 
-      communicate_S = "GRID"
-      if (Grd_yinyang_L) communicate_S = "MULTIGRID"
+      ! local variables
 
+      integer (kind=int_kind) :: &
+         iblk    , & ! block index
+         ij      , & ! compressed index
+         i, j        ! grid indices
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         Au       , & ! matvec result, Au = Ax * u (N/m^2)
+         Av       , & ! matvec result, Av = Ay * v (N/m^2)
+         Fx       , & ! residual vector (x components), Fx = Au - bx (N/m^2)
+         Fy           ! residual vector (y components), Fy = Av - by (N/m^2)
+! real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks) :: work_space
+      ! integer (kind=int_kind) :: i, j, k, k1, ii, jj, ierr
 
-      niloc = (l_ni-pil_e)-(1+pil_w)+1
-      njloc = (l_nj-pil_n)-(1+pil_s)+1
+      real (kind=dbl_kind), dimension (max_blocks) :: &
+         norm_squared   ! array to accumulate squared norm of grid function over blocks
 
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1) :: &
+         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
+         arnoldi_basis_y     ! arnoldi basis (y components)
 
-      ! Here we go !
+      real (kind=dbl_kind) :: &
+         norm_residual   , & ! current L^2 norm of residual vector
+         inverse_norm    , & ! inverse of the residual norm
+         t                   ! 
+
+      integer (kind=int_kind) :: &
+         initer          , & ! inner (Arnoldi) loop counter
+         outiter         , & ! outer (restarts) loop counter
+         nextit          , & ! nextit == initer+1
+         it                  ! reusable loop counter
+
+      real (kind=dbl_kind), dimension(maxinner+1) :: &
+         rot_cos         , & ! 
+         rot_sin         , & ! 
+         rhs_hess            ! right hand side vector of the Hessenberg (least square) system
+
+      real (kind=dbl_kind) :: relative_tolerance, r0
 
-      i0 = 1  + sol_pil_w
-      in = l_ni - sol_pil_e
-      j0 = 1  + sol_pil_s
-      jn = l_nj - sol_pil_n
+      real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: hessenberg
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner) :: ww
+
+
+      real (kind=dbl_kind) :: local_dot, dotprod
+      real (kind=dbl_kind), dimension(maxinner) :: dotprod_local
+
+      
+      logical (kind=log_kind) almost_zero
+
+      ! integer (kind=int_kind) i0, in, j0, jn
+      ! integer (kind=int_kind) niloc,njloc
+
+      ! Here we go !
 
       outiter = 0
       nbiter = 0
 
-      conv = 1.d0
+      conv = c1
 
       ! Residual of the initial iterate
-      call matvec(solution, work_space, level)
+      
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         call matvec (nx_block             , ny_block,            &
+                      icellu   (iblk)      , icellt   (iblk)    , &
+                      indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                      indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                      dxt      (:,:,iblk)  , dyt      (:,:,iblk), &
+                      dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), &
+                      cxp      (:,:,iblk)  , cyp      (:,:,iblk), &
+                      cxm      (:,:,iblk)  , cym      (:,:,iblk), &
+                      solx     (:,:,iblk)  , soly     (:,:,iblk), &
+                      vrel     (:,:,iblk)  , Cb       (:,:,iblk), &
+                      zetaD    (:,:,iblk,:),                      &
+                      umassdti (:,:,iblk)  , fm       (:,:,iblk), &
+                      uarear   (:,:,iblk)  ,                      &
+                      Au       (:,:,iblk)  , Av       (:,:,iblk))
+         call residual_vec (nx_block           , ny_block,           &
+                            icellu       (iblk),                     & 
+                            indxui     (:,iblk), indxuj    (:,iblk), &
+                            bx       (:,:,iblk), by      (:,:,iblk), &
+                            Au       (:,:,iblk), Av      (:,:,iblk), &
+                            arnoldi_basis_x (:,:,iblk, 1),           &
+                            arnoldi_basis_y (:,:,iblk, 1))
+      enddo
+      !$OMP END PARALLEL DO
+!       do j=j0,jn
+!          do i=i0,in
+!             vv(i,j,1) = rhs_b(i,j) - work_space(i,j)
+!          end do
+!       end do
+! 
+      ! Start outer (restarts) loop
+      do
+         ! Compute norm of initial residual
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            call calc_L2norm_squared(nx_block,        ny_block,        &
+                                     icellu   (iblk),                  &
+                                     indxui (:,iblk), indxuj(:, iblk), &
+                                     arnoldi_basis_x(:,:,iblk, 1),     &
+                                     arnoldi_basis_y(:,:,iblk, 1),     &
+                                     norm_squared(iblk))
+
+         enddo
+         !$OMP END PARALLEL DO 
+         norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
+! 
+!          local_dot = 0.0d0
+!          do j=j0,jn
+! !DIR$ SIMD
+!             do i=i0,in
+!                local_dot = local_dot + (vv(i, j, 1) * vv(i, j, 1))
+!             end do
+!          end do
+! 
+!          call RPN_COMM_allreduce(local_dot, norm_residual, 1, "MPI_double_precision", "MPI_sum", communicate_S, ierr)
+!          norm_residual = sqrt(norm_residual)
+
+      ! Current guess is a good enough solution
+      if (norm_residual < tolerance) then
+         return
+      end if
+      
+      ! Normalize the first Arnoldi vector
+      inverse_norm = c1 / norm_residual
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         do ij =1, icellu(iblk)
+            i = indxui(ij, iblk)
+            j = indxuj(ij, iblk)
+
+            arnoldi_basis_x(i, j, iblk, 1) = arnoldi_basis_x(i, j, iblk, 1) * inverse_norm
+            arnoldi_basis_y(i, j, iblk, 1) = arnoldi_basis_y(i, j, iblk, 1) * inverse_norm
+         enddo ! ij
+      enddo
+      !$OMP END PARALLEL DO 
+! 
+!          nu = 1.0d0 / norm_residual
+!          do j=j0,jn
+!             do i=i0,in
+!                vv(i,j,1) = vv(i,j,1) * nu
+!             end do
+!          end do
+! 
+      if (outiter == 0) then
+         relative_tolerance = tolerance * norm_residual
+         r0 = norm_residual
+      end if
+      
+      conv = norm_residual / r0
+      
+      ! Initialize 1-st term of RHS of hessenberg system
+      gg(1) = norm_residual
+      gg(2:) = 0.d0
+! 
+!          initer = 0
+! 
+!          do
+! 
+!             nbiter = nbiter + 1
+!             initer = initer + 1
+!             nextit = initer + 1
+! 
+!             ! here call precond_diag OR PGMRES
+!             ! if (sol2D_precond_S == 'JACOBI')   then
+!             !    call pre_jacobi2D ( work_space(i0:in,j0:jn), &
+!             !                        vv(i0:in,j0:jn, initer), &
+!             !                        Prec_xevec_8, niloc, njloc,&
+!             !                        Prec_ai_8, Prec_bi_8, Prec_ci_8 )
+!             ! else
+!             !    work_space(i0:in,j0:jn) = vv(i0:in,j0:jn, initer)
+!             ! endif
+! 
+!             ww(i0:in,j0:jn, initer) = work_space(i0:in,j0:jn)
+! 
+!             call matvec ( work_space, vv(:,:,nextit), level )
+! 
+!             ! Classical Gram-Schmidt orthogonalisation process
+!             dotprod_local = 0.d0
+!             do it=1,initer
+!                 local_dot = 0.0d0
+!                 do j=j0,jn
+! !DIR$ SIMD
+!                    do i=i0,in
+!                       local_dot = local_dot + (vv(i, j, it) * vv(i, j, nextit))
+!                    end do
+!                 end do
+!                 dotprod_local(it) = local_dot
+!             end do
+! 
+!             call RPN_COMM_allreduce(dotprod_local(:), hessenberg(1,initer), initer, "MPI_double_precision", "MPI_sum", communicate_S, ierr)
+! 
+!             do it=1,initer
+!                do j=j0,jn
+!                   do i=i0,in
+!                      vv(i, j, nextit) = vv(i, j, nextit) - hessenberg(it,initer) * vv(i, j, it)
+!                   end do
+!                end do
+!             end do
+! 
+!             local_dot = 0.d0
+!             do j=j0,jn
+! !DIR$ SIMD
+!                do i=i0,in
+!                   local_dot = local_dot + (vv(i, j, nextit) * vv(i, j, nextit))
+!                end do
+!             end do
+! 
+!             call RPN_COMM_allreduce(local_dot,dotprod,1,"MPI_double_precision","MPI_sum",communicate_S,ierr)
+! 
+!             hessenberg(nextit,initer) = sqrt(dotprod)
+! 
+!             ! Watch out for happy breakdown
+!             if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
+!                nu = 1.d0 / hessenberg(nextit,initer)
+!                do j=j0,jn
+!                   do i=i0,in
+!                      vv(i, j, nextit) = vv(i, j, nextit) * nu
+!                   end do
+!                end do
+!             end if
+! 
+!             ! Form and store the information for the new Givens rotation
+!             if (initer > 1) then
+!                do k=2,initer
+!                   k1 = k-1
+!                   t = hessenberg(k1,initer)
+!                   hessenberg(k1,initer) = rot_cos(k1)*t + rot_sin(k1)*hessenberg(k,initer)
+!                   hessenberg(k,initer) = -rot_sin(k1)*t + rot_cos(k1)*hessenberg(k,initer)
+!                end do
+! 
+!             end if
+! 
+!             nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
+!             if (.not. almost_zero(nu)) then
+!                rot_cos(initer) = hessenberg(initer,initer) / nu
+!                rot_sin(initer) = hessenberg(nextit,initer) / nu
+! 
+!                gg(nextit) = -rot_sin(initer) * gg(initer)
+!                gg(initer) =  rot_cos(initer) * gg(initer)
+! 
+!                hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
+!             end if
+! 
+!             norm_residual = abs(gg(nextit))
+! 
+!             conv = norm_residual / r0
+! 
+!             if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
+!                exit
+!             endif
+! 
+!          end do
+! 
+!          ! At this point either the maximum number of inner iterations
+!          ! was reached or the absolute residual is below the scaled tolerance.
+! 
+!          ! Solve upper triangular system
+!          gg(initer) = gg(initer) / hessenberg(initer,initer)
+!          do ii=2,initer
+!             k  = initer - ii + 1
+!             k1 = k + 1
+!             t  = gg(k)
+!             do j=k1,initer
+!                t = t - hessenberg(k,j) * gg(j)
+!             end do
+!             gg(k) = t / hessenberg(k,k)
+!          end do
+! 
+!          ! Form linear combination to get solution.
+!          do it=1,initer
+!             t = gg(it)
+! 
+!             do j=j0,jn
+!                do i=i0,in
+!                   solution(i, j) = solution(i, j) + t * ww(i, j, it)
+!                end do
+!             end do
+! 
+!          end do
+! 
+!          outiter = outiter + 1
+! 
+!          if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
+!             return
+!          end if
+! 
+!          ! Solution is not convergent : compute residual vector and continue.
+!          do it=1,initer
+!             jj = nextit - it + 1
+!             gg(jj-1) = -rot_sin(jj-1) * gg(jj)
+!             gg(jj)   =  rot_cos(jj-1) * gg(jj)
+!          end do
+! 
+!          do it=1,nextit
+!             t = gg(it)
+!             if (it == 1) then
+!                t = t - 1.d0
+!             end if
+! 
+!             do j=j0,jn
+! !DIR$ SIMD
+!                do i=i0,in
+!                   vv(i, j, 1) = vv(i, j, 1) + t * vv(i, j, it)
+!                end do
+!             end do
+! 
+!          end do
+! 
+      end do ! end of outer (restarts) loop
+! 
+!       return
+      end subroutine fgmres
+
+!=======================================================================
+
+! PGMRES: Right-preconditioned generalized minimum residual method (with restarts). 
+! Solves A x = b using GMRES with a right preconditioner
+!
+! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
 
-      do j=j0,jn
-         do i=i0,in
-            vv(i,j,1) = rhs_b(i,j) - work_space(i,j)
-         end do
-      end do
+      subroutine pgmres()
+         
+      end subroutine pgmres
 
-      do
+!=======================================================================
+
+end module ice_krylov
 
-         local_dot = 0.0d0
-         do j=j0,jn
-!DIR$ SIMD
-            do i=i0,in
-               local_dot = local_dot + (vv(i, j, 1) * vv(i, j, 1))
-            end do
-         end do
-
-         call RPN_COMM_allreduce(local_dot, norm_residual, 1, "MPI_double_precision", "MPI_sum", communicate_S, ierr)
-         norm_residual = sqrt(norm_residual)
-
-         ! Current guess is a good enough solution
-         if (norm_residual < tolerance) then
-            return
-         end if
-
-         nu = 1.0d0 / norm_residual
-         do j=j0,jn
-            do i=i0,in
-               vv(i,j,1) = vv(i,j,1) * nu
-            end do
-         end do
-
-         if (outiter == 0) then
-            relative_tolerance = tolerance * norm_residual
-            r0 = norm_residual
-         end if
-
-         conv = norm_residual / r0
-
-         ! initialize 1-st term of rhs of hessenberg system.
-         gg(1) = norm_residual
-         gg(2:) = 0.d0
-
-         initer = 0
-
-         do
-
-            nbiter = nbiter + 1
-            initer = initer + 1
-            nextit = initer + 1
-
-            if (sol2D_precond_S == 'JACOBI')   then
-               call pre_jacobi2D ( work_space(i0:in,j0:jn), &
-                                   vv(i0:in,j0:jn, initer), &
-                                   Prec_xevec_8, niloc, njloc,&
-                                   Prec_ai_8, Prec_bi_8, Prec_ci_8 )
-            else
-               work_space(i0:in,j0:jn) = vv(i0:in,j0:jn, initer)
-            endif
-
-            ww(i0:in,j0:jn, initer) = work_space(i0:in,j0:jn)
-
-            call matvec ( work_space, vv(:,:,nextit), level )
-
-            ! Classical Gram-Schmidt orthogonalisation process
-            dotprod_local = 0.d0
-            do it=1,initer
-                local_dot = 0.0d0
-                do j=j0,jn
-!DIR$ SIMD
-                   do i=i0,in
-                      local_dot = local_dot + (vv(i, j, it) * vv(i, j, nextit))
-                   end do
-                end do
-                dotprod_local(it) = local_dot
-            end do
-
-            call RPN_COMM_allreduce(dotprod_local(:), hessenberg(1,initer), initer, "MPI_double_precision", "MPI_sum", communicate_S, ierr)
-
-            do it=1,initer
-               do j=j0,jn
-                  do i=i0,in
-                     vv(i, j, nextit) = vv(i, j, nextit) - hessenberg(it,initer) * vv(i, j, it)
-                  end do
-               end do
-            end do
-
-            local_dot = 0.d0
-            do j=j0,jn
-!DIR$ SIMD
-               do i=i0,in
-                  local_dot = local_dot + (vv(i, j, nextit) * vv(i, j, nextit))
-               end do
-            end do
-
-            call RPN_COMM_allreduce(local_dot,dotprod,1,"MPI_double_precision","MPI_sum",communicate_S,ierr)
-
-            hessenberg(nextit,initer) = sqrt(dotprod)
-
-            ! Watch out for happy breakdown
-            if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
-               nu = 1.d0 / hessenberg(nextit,initer)
-               do j=j0,jn
-                  do i=i0,in
-                     vv(i, j, nextit) = vv(i, j, nextit) * nu
-                  end do
-               end do
-            end if
-
-            ! Form and store the information for the new Givens rotation
-            if (initer > 1) then
-               do k=2,initer
-                  k1 = k-1
-                  t = hessenberg(k1,initer)
-                  hessenberg(k1,initer) = rot_cos(k1)*t + rot_sin(k1)*hessenberg(k,initer)
-                  hessenberg(k,initer) = -rot_sin(k1)*t + rot_cos(k1)*hessenberg(k,initer)
-               end do
-
-            end if
-
-            nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
-            if (.not. almost_zero(nu)) then
-               rot_cos(initer) = hessenberg(initer,initer) / nu
-               rot_sin(initer) = hessenberg(nextit,initer) / nu
-
-               gg(nextit) = -rot_sin(initer) * gg(initer)
-               gg(initer) =  rot_cos(initer) * gg(initer)
-
-               hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
-            end if
-
-            norm_residual = abs(gg(nextit))
-
-            conv = norm_residual / r0
-
-            if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
-               exit
-            endif
-
-         end do
-
-         ! At this point either the maximum number of inner iterations
-         ! was reached or the absolute residual is below the scaled tolerance.
-
-         ! Solve upper triangular system
-         gg(initer) = gg(initer) / hessenberg(initer,initer)
-         do ii=2,initer
-            k  = initer - ii + 1
-            k1 = k + 1
-            t  = gg(k)
-            do j=k1,initer
-               t = t - hessenberg(k,j) * gg(j)
-            end do
-            gg(k) = t / hessenberg(k,k)
-         end do
-
-         ! Form linear combination to get solution.
-         do it=1,initer
-            t = gg(it)
-
-            do j=j0,jn
-               do i=i0,in
-                  solution(i, j) = solution(i, j) + t * ww(i, j, it)
-               end do
-            end do
-
-         end do
-
-         outiter = outiter + 1
-
-         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
-            return
-         end if
-
-         ! Solution is not convergent : compute residual vector and continue.
-         do it=1,initer
-            jj = nextit - it + 1
-            gg(jj-1) = -rot_sin(jj-1) * gg(jj)
-            gg(jj)   =  rot_cos(jj-1) * gg(jj)
-         end do
-
-         do it=1,nextit
-            t = gg(it)
-            if (it == 1) then
-               t = t - 1.d0
-            end if
-
-            do j=j0,jn
-!DIR$ SIMD
-               do i=i0,in
-                  vv(i, j, 1) = vv(i, j, 1) + t * vv(i, j, it)
-               end do
-            end do
-
-         end do
-
-      end do
-
-      return
-      end
+!=======================================================================

From b7460a2833a1e44e4b514834a033a1e429ceb409 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 19 Jul 2019 15:34:45 -0400
Subject: [PATCH 100/196] ice_krylov: adapt FGMRES algo up to preconditioner
 call (WIP)

---
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 183 +++++++++++++++++-----
 1 file changed, 142 insertions(+), 41 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
index 90fc8f80b..722a063f7 100644
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -32,7 +32,7 @@ module ice_krylov
       use ice_constants, only: field_loc_NEcorner, c0, c1
       use ice_domain, only: distrb_info, nblocks
       use ice_domain_size, only: max_blocks
-      use ice_dyn_vp, only: matvec, residual_vec, calc_L2norm_squared
+      use ice_dyn_vp, only: matvec, residual_vec, calc_L2norm_squared, precond
       use ice_flux, only:  fm, iceumask
       use ice_global_reductions, only: global_sum
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
@@ -41,7 +41,17 @@ module ice_krylov
 
       implicit none
       private
-      public :: fgmres, pgmres
+      public :: fgmres
+      
+      integer (kind=int_kind), allocatable :: & 
+         icellt(:)    , & ! no. of cells where icetmask = 1
+         icellu(:)        ! no. of cells where iceumask = 1
+
+      integer (kind=int_kind), allocatable :: &
+         indxti(:,:)  , & ! compressed index in i-direction
+         indxtj(:,:)  , & ! compressed index in j-direction
+         indxui(:,:)  , & ! compressed index in i-direction
+         indxuj(:,:)      ! compressed index in j-direction
 
 !=======================================================================
 
@@ -54,25 +64,28 @@ module ice_krylov
 !
 ! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
 
-      subroutine fgmres (icellt,   icellu,  &
-                         indxti,   indxtj,  &
-                         indxui,   indxuj,  &
+      subroutine fgmres (icellt_in,   icellu_in,  &
+                         indxti_in,   indxtj_in,  &
+                         indxui_in,   indxuj_in,  &
                          zetaD,             &
-                         Cb,       vrel,    &
+                         Cb,         vrel,    &
                          umassdti,          &
-                         solx,     soly,    &
-                         bx,       by,      &
+                         solx,       soly,    &
+                         bx,         by,      &
+                         diagx,      diagy,   &
                          tolerance, maxinner, maxouter, nbiter, conv)
 
+      use ice_dyn_vp, only: precond
+
       integer (kind=int_kind), dimension(max_blocks), intent(in) :: & 
-         icellt   , & ! no. of cells where icetmask = 1
-         icellu       ! no. of cells where iceumask = 1
+         icellt_in, & ! no. of cells where icetmask = 1
+         icellu_in    ! no. of cells where iceumask = 1
 
       integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj   , & ! compressed index in j-direction
-         indxui   , & ! compressed index in i-direction
-         indxuj       ! compressed index in j-direction
+         indxti_in, & ! compressed index in i-direction
+         indxtj_in, & ! compressed index in j-direction
+         indxui_in, & ! compressed index in i-direction
+         indxuj_in    ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD      ! zetaD = 2*zeta (viscous coefficient)
@@ -90,6 +103,10 @@ subroutine fgmres (icellt,   icellu,  &
          bx       , & ! Right hand side of the linear system (x components)
          by           ! Right hand side of the linear system (y components)
 
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         diagx    , & ! Diagonal of the system matrix (x components)
+         diagy        ! Diagonal of the system matrix (y components)
+
       real (kind=dbl_kind), intent(in) :: &
          tolerance   ! Tolerance to achieve. The algorithm terminates when the relative
                      ! residual is below tolerance
@@ -112,7 +129,7 @@ subroutine fgmres (icellt,   icellu,  &
 
       integer (kind=int_kind) :: &
          iblk    , & ! block index
-         ij      , & ! compressed index
+         ij      , & ! index for indx[t|u][i|j]
          i, j        ! grid indices
          
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
@@ -130,6 +147,9 @@ subroutine fgmres (icellt,   icellu,  &
          arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
          arnoldi_basis_y     ! arnoldi basis (y components)
 
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner) :: &
+         ww                  ! !phb FIND BETTER NAME
+
       real (kind=dbl_kind) :: &
          norm_residual   , & ! current L^2 norm of residual vector
          inverse_norm    , & ! inverse of the residual norm
@@ -146,11 +166,14 @@ subroutine fgmres (icellt,   icellu,  &
          rot_sin         , & ! 
          rhs_hess            ! right hand side vector of the Hessenberg (least square) system
 
+      integer (kind=int_kind) :: &
+         precond_type ! type of preconditioner
+
       real (kind=dbl_kind) :: relative_tolerance, r0
 
       real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: hessenberg
 
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner) :: ww
+
 
 
       real (kind=dbl_kind) :: local_dot, dotprod
@@ -162,13 +185,30 @@ subroutine fgmres (icellt,   icellu,  &
       ! integer (kind=int_kind) i0, in, j0, jn
       ! integer (kind=int_kind) niloc,njloc
 
+      character(len=*), parameter :: subname = '(fgmres)'
+
+      ! Initialize module variables
+      allocate(icellt(max_blocks), icellu(max_blocks))
+      allocate(indxti(nx_block*ny_block, max_blocks), & 
+               indxtj(nx_block*ny_block, max_blocks), &
+               indxui(nx_block*ny_block, max_blocks), & 
+               indxuj(nx_block*ny_block, max_blocks))
+      icellt = icellt_in
+      icellu = icellu_in
+      indxti = indxti_in
+      indxtj = indxtj_in
+      indxui = indxui_in 
+      indxuj = indxuj_in
+      
       ! Here we go !
 
       outiter = 0
       nbiter = 0
-
+      
       conv = c1
-
+      
+      precond_type = precond
+      
       ! Residual of the initial iterate
       
       !$OMP PARALLEL DO PRIVATE(iblk)
@@ -263,27 +303,32 @@ subroutine fgmres (icellt,   icellu,  &
       conv = norm_residual / r0
       
       ! Initialize 1-st term of RHS of hessenberg system
-      gg(1) = norm_residual
-      gg(2:) = 0.d0
-! 
-!          initer = 0
-! 
-!          do
-! 
-!             nbiter = nbiter + 1
-!             initer = initer + 1
-!             nextit = initer + 1
-! 
-!             ! here call precond_diag OR PGMRES
-!             ! if (sol2D_precond_S == 'JACOBI')   then
-!             !    call pre_jacobi2D ( work_space(i0:in,j0:jn), &
-!             !                        vv(i0:in,j0:jn, initer), &
-!             !                        Prec_xevec_8, niloc, njloc,&
-!             !                        Prec_ai_8, Prec_bi_8, Prec_ci_8 )
-!             ! else
-!             !    work_space(i0:in,j0:jn) = vv(i0:in,j0:jn, initer)
-!             ! endif
-! 
+      rhs_hess(1) = norm_residual
+      rhs_hess(2:) = c0
+      
+      initer = 0
+      
+      ! Start of inner (Arnoldi) loop
+      do
+         
+         nbiter = nbiter + 1
+         initer = initer + 1
+         nextit = initer + 1
+! 
+         ! precondition the current Arnoldi vector
+         call precondition(arnoldi_basis_x(:,:,:,initer), &
+                           arnoldi_basis_y(:,:,:,initer), &
+                           Au, Av, &
+                           precond_type, diagx, diagy)
+!             if (sol2D_precond_S == 'JACOBI')   then
+!                call pre_jacobi2D ( work_space(i0:in,j0:jn), &
+!                                    vv(i0:in,j0:jn, initer), &
+!                                    Prec_xevec_8, niloc, njloc,&
+!                                    Prec_ai_8, Prec_bi_8, Prec_ci_8 )
+!             else
+!                work_space(i0:in,j0:jn) = vv(i0:in,j0:jn, initer)
+!             endif
+      
 !             ww(i0:in,j0:jn, initer) = work_space(i0:in,j0:jn)
 ! 
 !             call matvec ( work_space, vv(:,:,nextit), level )
@@ -363,7 +408,7 @@ subroutine fgmres (icellt,   icellu,  &
 !                exit
 !             endif
 ! 
-!          end do
+      end do ! end of inner (Arnoldi) loop
 ! 
 !          ! At this point either the maximum number of inner iterations
 !          ! was reached or the absolute residual is below the scaled tolerance.
@@ -433,11 +478,67 @@ end subroutine fgmres
 ! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
 
       subroutine pgmres()
-         
+      
+      character(len=*), parameter :: subname = '(pgmres)'
+      
       end subroutine pgmres
 
 !=======================================================================
 
+! Generic routine to precondition a vector
+!
+! authors: Philippe Blain, ECCC
+
+      subroutine precondition(vx, vy, wx, wy, precond_type, diagx, diagy)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         vx       , & ! input vector (x components)
+         vy           ! input vector (y components)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(out) :: &
+         wx       , & ! preconditionned vector (x components)
+         wy           ! preconditionned vector (y components)
+
+      integer (kind=int_kind), intent(in) :: &
+         precond_type ! type of preconditioner
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         diagx    , & ! diagonal of the system matrix (x components)
+         diagy        ! diagonal of the system matrix (y components)
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         iblk    , & ! block index
+         ij      , & ! compressed index
+         i, j        ! grid indices
+
+      character(len=*), parameter :: subname = '(precondition)'
+
+      if     (precond_type == 1) then ! identity (no preconditioner)
+         wx = vx
+         wy = vy
+      elseif (precond_type == 2) then ! Jacobi preconditioner
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            do ij =1, icellu(iblk)
+               i = indxui(ij, iblk)
+               j = indxuj(ij, iblk)
+
+               wx(i,j,iblk) = vx(i,j,iblk) / diagx(i,j,iblk)
+               wy(i,j,iblk) = vy(i,j,iblk) / diagy(i,j,iblk)
+            enddo ! ij
+         enddo
+         !$OMP END PARALLEL DO 
+      elseif (precond_type == 3) then ! PGMRES (Jacobi-preconditioned GMRES)
+         ! !phb TODO!!!
+      else
+         
+      endif
+      end subroutine precondition
+
+!=======================================================================
+
 end module ice_krylov
 
 !=======================================================================

From a8e4ed2523975fce9f04d8f9f49a72b3bb854f8e Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 19 Jul 2019 15:59:23 -0400
Subject: [PATCH 101/196] ice_krylov: adapt FGMRES algo up to before CGS (WIP)

---
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 77 +++++++++++++++--------
 1 file changed, 50 insertions(+), 27 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
index 722a063f7..4c5cbd899 100644
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -133,10 +133,10 @@ subroutine fgmres (icellt_in,   icellu_in,  &
          i, j        ! grid indices
          
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
-         Au       , & ! matvec result, Au = Ax * u (N/m^2)
-         Av       , & ! matvec result, Av = Ay * v (N/m^2)
-         Fx       , & ! residual vector (x components), Fx = Au - bx (N/m^2)
-         Fy           ! residual vector (y components), Fy = Av - by (N/m^2)
+         workspace_x , & ! work vector (x components)
+         workspace_y , & ! work vector (y components)
+         Fx          , & ! residual vector (x components), Fx = Au - bx (N/m^2)
+         Fy              ! residual vector (y components), Fy = Av - by (N/m^2)
 ! real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks) :: work_space
       ! integer (kind=int_kind) :: i, j, k, k1, ii, jj, ierr
 
@@ -148,7 +148,8 @@ subroutine fgmres (icellt_in,   icellu_in,  &
          arnoldi_basis_y     ! arnoldi basis (y components)
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner) :: &
-         ww                  ! !phb FIND BETTER NAME
+         wwx                 ! !phb FIND BETTER NAME (x components)
+         wwy                 ! !phb FIND BETTER NAME (y components)
 
       real (kind=dbl_kind) :: &
          norm_residual   , & ! current L^2 norm of residual vector
@@ -213,26 +214,26 @@ subroutine fgmres (icellt_in,   icellu_in,  &
       
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
-         call matvec (nx_block             , ny_block,            &
-                      icellu   (iblk)      , icellt   (iblk)    , &
-                      indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                      indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-                      dxt      (:,:,iblk)  , dyt      (:,:,iblk), &
-                      dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), &
-                      cxp      (:,:,iblk)  , cyp      (:,:,iblk), &
-                      cxm      (:,:,iblk)  , cym      (:,:,iblk), &
-                      solx     (:,:,iblk)  , soly     (:,:,iblk), &
-                      vrel     (:,:,iblk)  , Cb       (:,:,iblk), &
-                      zetaD    (:,:,iblk,:),                      &
-                      umassdti (:,:,iblk)  , fm       (:,:,iblk), &
-                      uarear   (:,:,iblk)  ,                      &
-                      Au       (:,:,iblk)  , Av       (:,:,iblk))
-         call residual_vec (nx_block           , ny_block,           &
-                            icellu       (iblk),                     & 
-                            indxui     (:,iblk), indxuj    (:,iblk), &
-                            bx       (:,:,iblk), by      (:,:,iblk), &
-                            Au       (:,:,iblk), Av      (:,:,iblk), &
-                            arnoldi_basis_x (:,:,iblk, 1),           &
+         call matvec (nx_block               , ny_block,              &
+                      icellu         (iblk)  , icellt         (iblk), &
+                      indxui       (:,iblk)  , indxuj       (:,iblk), &
+                      indxti       (:,iblk)  , indxtj       (:,iblk), &
+                      dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
+                      dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
+                      cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
+                      cxm        (:,:,iblk)  , cym        (:,:,iblk), &
+                      solx       (:,:,iblk)  , soly       (:,:,iblk), &
+                      vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
+                      zetaD      (:,:,iblk,:),                        &
+                      umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
+                      uarear     (:,:,iblk)  ,                        &
+                      workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
+         call residual_vec (nx_block             , ny_block             . &
+                            icellu         (iblk),                        & 
+                            indxui       (:,iblk), indxuj    (:,iblk)   , &
+                            bx         (:,:,iblk), by      (:,:,iblk)   , &
+                            workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
+                            arnoldi_basis_x (:,:,iblk, 1),                &
                             arnoldi_basis_y (:,:,iblk, 1))
       enddo
       !$OMP END PARALLEL DO
@@ -328,9 +329,31 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 !             else
 !                work_space(i0:in,j0:jn) = vv(i0:in,j0:jn, initer)
 !             endif
-      
+      ! !phb DESCRIBE ww
+      wwx(:,:,initer) = workspace_x
+      wwy(:,:,initer) = workspace_y
 !             ww(i0:in,j0:jn, initer) = work_space(i0:in,j0:jn)
-! 
+      ! 
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         call matvec (nx_block               , ny_block,              &
+                      icellu         (iblk)  , icellt         (iblk), &
+                      indxui       (:,iblk)  , indxuj       (:,iblk), &
+                      indxti       (:,iblk)  , indxtj       (:,iblk), &
+                      dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
+                      dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
+                      cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
+                      cxm        (:,:,iblk)  , cym        (:,:,iblk), &
+                      workspace_x(:,:,iblk)  , workspace_y(:,:,iblk), &
+                      vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
+                      zetaD      (:,:,iblk,:),                        &
+                      umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
+                      uarear     (:,:,iblk)  ,                        &
+                      arnoldi_basis_x(:,:,iblk,nextit),               &
+                      arnoldi_basis_y(:,:,iblk,nextit))
+      enddo
+      !$OMP END PARALLEL DO
+      
 !             call matvec ( work_space, vv(:,:,nextit), level )
 ! 
 !             ! Classical Gram-Schmidt orthogonalisation process

From a1d6d77dad4e2fcaa8a43c03a4d909611902c36f Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 19 Jul 2019 17:13:27 -0400
Subject: [PATCH 102/196] ice_krylov: adapt FGMRES algo up to end of Arnoldi
 (except 1st GS loop) (WIP)

---
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 142 ++++++++++++++++++----
 1 file changed, 119 insertions(+), 23 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
index 4c5cbd899..3786ff06e 100644
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -148,40 +148,40 @@ subroutine fgmres (icellt_in,   icellu_in,  &
          arnoldi_basis_y     ! arnoldi basis (y components)
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner) :: &
-         wwx                 ! !phb FIND BETTER NAME (x components)
+         wwx             , & ! !phb FIND BETTER NAME (x components)
          wwy                 ! !phb FIND BETTER NAME (y components)
 
       real (kind=dbl_kind) :: &
          norm_residual   , & ! current L^2 norm of residual vector
-         inverse_norm    , & ! inverse of the residual norm
-         t                   ! 
+         inverse_norm    , & ! inverse of the norm of a vector
+         nu, t               ! local temporary values
 
       integer (kind=int_kind) :: &
          initer          , & ! inner (Arnoldi) loop counter
          outiter         , & ! outer (restarts) loop counter
          nextit          , & ! nextit == initer+1
-         it                  ! reusable loop counter
+         it, k               ! reusable loop counters
 
       real (kind=dbl_kind), dimension(maxinner+1) :: &
-         rot_cos         , & ! 
-         rot_sin         , & ! 
-         rhs_hess            ! right hand side vector of the Hessenberg (least square) system
+         rot_cos         , & ! cosine elements of Givens rotations 
+         rot_sin         , & ! sine elements of Givens rotations
+         rhs_hess            ! right hand side vector of the Hessenberg (least squares) system
+
+      real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: &
+         hessenberg        ! system matrix of the Hessenberg (least squares) system
 
       integer (kind=int_kind) :: &
          precond_type ! type of preconditioner
 
-      real (kind=dbl_kind) :: relative_tolerance, r0
-
-      real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: hessenberg
-
 
 
+      real (kind=dbl_kind) :: relative_tolerance, r0
 
       real (kind=dbl_kind) :: local_dot, dotprod
       real (kind=dbl_kind), dimension(maxinner) :: dotprod_local
 
       
-      logical (kind=log_kind) almost_zero
+      ! logical (kind=log_kind) almost_zero
 
       ! integer (kind=int_kind) i0, in, j0, jn
       ! integer (kind=int_kind) niloc,njloc
@@ -228,7 +228,7 @@ subroutine fgmres (icellt_in,   icellu_in,  &
                       umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
                       uarear     (:,:,iblk)  ,                        &
                       workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
-         call residual_vec (nx_block             , ny_block             . &
+         call residual_vec (nx_block             , ny_block             , &
                             icellu         (iblk),                        & 
                             indxui       (:,iblk), indxuj    (:,iblk)   , &
                             bx         (:,:,iblk), by      (:,:,iblk)   , &
@@ -319,7 +319,7 @@ subroutine fgmres (icellt_in,   icellu_in,  &
          ! precondition the current Arnoldi vector
          call precondition(arnoldi_basis_x(:,:,:,initer), &
                            arnoldi_basis_y(:,:,:,initer), &
-                           Au, Av, &
+                           workspace_x , workspace_y    , &
                            precond_type, diagx, diagy)
 !             if (sol2D_precond_S == 'JACOBI')   then
 !                call pre_jacobi2D ( work_space(i0:in,j0:jn), &
@@ -330,8 +330,8 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 !                work_space(i0:in,j0:jn) = vv(i0:in,j0:jn, initer)
 !             endif
       ! !phb DESCRIBE ww
-      wwx(:,:,initer) = workspace_x
-      wwy(:,:,initer) = workspace_y
+      wwx(:,:,:,initer) = workspace_x
+      wwy(:,:,:,initer) = workspace_y
 !             ww(i0:in,j0:jn, initer) = work_space(i0:in,j0:jn)
       ! 
       !$OMP PARALLEL DO PRIVATE(iblk)
@@ -356,7 +356,9 @@ subroutine fgmres (icellt_in,   icellu_in,  &
       
 !             call matvec ( work_space, vv(:,:,nextit), level )
 ! 
-!             ! Classical Gram-Schmidt orthogonalisation process
+      ! Classical Gram-Schmidt orthogonalisation process
+      ! TODO
+      ! First loop of Gram-Schmidt (compute coefficients)
 !             dotprod_local = 0.d0
 !             do it=1,initer
 !                 local_dot = 0.0d0
@@ -370,7 +372,23 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 !             end do
 ! 
 !             call RPN_COMM_allreduce(dotprod_local(:), hessenberg(1,initer), initer, "MPI_double_precision", "MPI_sum", communicate_S, ierr)
-! 
+
+      ! Second loop of Gram-Schmidt (orthonormalize)
+      do it = 1, initer
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            do ij =1, icellu(iblk)
+               i = indxui(ij, iblk)
+               j = indxuj(ij, iblk)
+
+               arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
+                                                     - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
+               arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
+                                                     - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
+            enddo ! ij
+         enddo
+         !$OMP END PARALLEL DO 
+      end do
 !             do it=1,initer
 !                do j=j0,jn
 !                   do i=i0,in
@@ -378,7 +396,20 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 !                   end do
 !                end do
 !             end do
-! 
+
+      ! Compute norm of new arnoldi vector and update Hessenberg matrix
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         call calc_L2norm_squared(nx_block      ,  ny_block        , &
+                                  icellu   (iblk),                   &
+                                  indxui (:,iblk), indxuj(:, iblk) , &
+                                  arnoldi_basis_x(:,:,iblk, nextit), &
+                                  arnoldi_basis_y(:,:,iblk, nextit), &
+                                  norm_squared(iblk))
+
+      enddo
+      !$OMP END PARALLEL DO 
+      hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
 !             local_dot = 0.d0
 !             do j=j0,jn
 ! !DIR$ SIMD
@@ -390,7 +421,23 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 !             call RPN_COMM_allreduce(local_dot,dotprod,1,"MPI_double_precision","MPI_sum",communicate_S,ierr)
 ! 
 !             hessenberg(nextit,initer) = sqrt(dotprod)
-! 
+
+      ! Watch out for happy breakdown
+      if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
+         ! Normalize next Arnoldi vector
+         inverse_norm = c1 / hessenberg(nextit,initer)
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            do ij =1, icellu(iblk)
+               i = indxui(ij, iblk)
+               j = indxuj(ij, iblk)
+
+               arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit)*inverse_norm
+               arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
+            enddo ! ij
+         enddo
+         !$OMP END PARALLEL DO 
+      end if
 !             ! Watch out for happy breakdown
 !             if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
 !                nu = 1.d0 / hessenberg(nextit,initer)
@@ -400,7 +447,15 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 !                   end do
 !                end do
 !             end if
-! 
+
+      ! Apply previous Givens rotation to the last column of the Hessenberg matrix
+      if (initer > 1) then
+         do k = 2, initer
+            t = hessenberg(k-1, initer)
+            hessenberg(k-1, initer) =  rot_cos(k-1)*t + rot_sin(k-1)*hessenberg(k, initer)
+            hessenberg(k,   initer) = -rot_sin(k-1)*t + rot_cos(k-1)*hessenberg(k, initer)
+         end do
+      end if
 !             ! Form and store the information for the new Givens rotation
 !             if (initer > 1) then
 !                do k=2,initer
@@ -411,7 +466,18 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 !                end do
 ! 
 !             end if
-! 
+
+      ! Compute new Givens rotation
+      nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
+      if (.not. almost_zero(nu)) then
+         rot_cos(initer) = hessenberg(initer,initer) / inverse_norm
+         rot_sin(initer) = hessenberg(nextit,initer) / inverse_norm
+
+         rhs_hess(nextit) = -rot_sin(initer) * rhs_hess(initer)
+         rhs_hess(initer) =  rot_cos(initer) * rhs_hess(initer)
+
+         hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
+      end if
 !             nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
 !             if (.not. almost_zero(nu)) then
 !                rot_cos(initer) = hessenberg(initer,initer) / nu
@@ -422,7 +488,12 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 ! 
 !                hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
 !             end if
-! 
+      ! Check for convergence
+      norm_residual = abs(rhs_hess(nextit))
+      conv = norm_residual / r0
+       if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
+         exit
+      endif
 !             norm_residual = abs(gg(nextit))
 ! 
 !             conv = norm_residual / r0
@@ -562,6 +633,31 @@ end subroutine precondition
 
 !=======================================================================
 
+logical function almost_zero(A) result(retval)
+   ! Check if value A is close to zero, up to machine precision
+   !
+   !author
+   !     Stéphane Gaudreault, ECCC -- June 2014
+   !
+   !revision
+   !     v4-80 - Gaudreault S.         - gfortran compatibility
+   !     2019  - Philippe Blain, ECCC  - converted to CICE standards
+   implicit none
+
+   real (kind=dbl_kind), intent(in) :: A
+   integer (kind=int_kind) :: aBit
+   integer (kind=int_kind), parameter :: two_complement = int(Z'80000000', kind=int_kind)
+   aBit = 0
+   aBit = transfer(A, aBit)
+   if (aBit < 0) then
+      aBit = two_complement - aBit
+   end if
+   ! lexicographic order test with a tolerance of 1 adjacent float
+   retval = (abs(aBit) <= 1)
+end function almost_zero
+
+!=======================================================================
+
 end module ice_krylov
 
 !=======================================================================

From 34a602f6a1e2432cbc2da31a3e913c22d3118a9c Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 19 Jul 2019 17:24:23 -0400
Subject: [PATCH 103/196] ice_krylov: adapt FGMRES algo up to updating of
 solution (WIP)

---
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 38 ++++++++++++++++++-----
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
index 3786ff06e..045cfa68c 100644
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -160,7 +160,7 @@ subroutine fgmres (icellt_in,   icellu_in,  &
          initer          , & ! inner (Arnoldi) loop counter
          outiter         , & ! outer (restarts) loop counter
          nextit          , & ! nextit == initer+1
-         it, k               ! reusable loop counters
+         it, k, ii           ! reusable loop counters
 
       real (kind=dbl_kind), dimension(maxinner+1) :: &
          rot_cos         , & ! cosine elements of Givens rotations 
@@ -503,11 +503,21 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 !             endif
 ! 
       end do ! end of inner (Arnoldi) loop
-! 
-!          ! At this point either the maximum number of inner iterations
-!          ! was reached or the absolute residual is below the scaled tolerance.
-! 
-!          ! Solve upper triangular system
+      
+      ! At this point either the maximum number of inner iterations
+      ! was reached or the absolute residual is below the scaled tolerance.
+
+      ! Solve the (now upper triangular) system "hessenberg * sol_hess = rhs_hess"
+      ! (sol_hess is stored in rhs_hess)
+      rhs_hess(initer) = rhs_hess(initer) / hessenberg(initer,initer)
+      do ii = 2, initer
+         k  = initer - ii + 1
+         t  = rhs_hess(k)
+         do j = k + 1, initer
+            t = t - hessenberg(k,j) * rhs_hess(j)
+         end do
+         rhs_hess(k) = t / hessenberg(k,k)
+      end do
 !          gg(initer) = gg(initer) / hessenberg(initer,initer)
 !          do ii=2,initer
 !             k  = initer - ii + 1
@@ -519,7 +529,21 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 !             gg(k) = t / hessenberg(k,k)
 !          end do
 ! 
-!          ! Form linear combination to get solution.
+      ! Form linear combination to get solution
+      do it = 1, initer
+         t = rhs_hess(it)
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            do ij =1, icellu(iblk)
+               i = indxui(ij, iblk)
+               j = indxuj(ij, iblk)
+
+               solx(i, j, iblk) = solx(i, j, iblk) + t * wwx(i, j, iblk, it)
+               soly(i, j, iblk) = soly(i, j, iblk) + t * wwy(i, j, iblk, it)
+            enddo ! ij
+         enddo
+         !$OMP END PARALLEL DO
+      end do
 !          do it=1,initer
 !             t = gg(it)
 ! 

From 5925c94977d0085de5ef4a5ab28cdd4264543e0e Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 19 Jul 2019 17:57:28 -0400
Subject: [PATCH 104/196] ice_krylov: adapt FGMRES algo up to residual
 calculation (WIP)

---
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 56 +++++++++++++++++++----
 1 file changed, 47 insertions(+), 9 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
index 045cfa68c..aec7e2597 100644
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -160,7 +160,7 @@ subroutine fgmres (icellt_in,   icellu_in,  &
          initer          , & ! inner (Arnoldi) loop counter
          outiter         , & ! outer (restarts) loop counter
          nextit          , & ! nextit == initer+1
-         it, k, ii           ! reusable loop counters
+         it, k, ii, jj       ! reusable loop counters
 
       real (kind=dbl_kind), dimension(maxinner+1) :: &
          rot_cos         , & ! cosine elements of Givens rotations 
@@ -555,13 +555,51 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 ! 
 !          end do
 ! 
-!          outiter = outiter + 1
-! 
-!          if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
-!             return
-!          end if
-! 
-!          ! Solution is not convergent : compute residual vector and continue.
+      ! Increment outer loop counter and check for convergence
+      outiter = outiter + 1
+      if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
+         return
+      end if
+
+      ! Solution is not convergent : compute residual vector and continue.
+      ! The residual vector is computed here using (see Saad p. 177) :
+      ! \begin{equation}
+      !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
+      ! \end{equation}
+      ! where : 
+      ! $r$ is the residual
+      ! $V_{m+1}$ is a matrix whose columns are the the Arnoldi vectors from 1 to nextit (m+1)
+      ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
+      ! $gamma_{m+1}$ is the last element of rhs_hess
+      ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
+      
+      ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
+      ! store the result in rhs_hess
+      do it = 1, initer
+         jj = nextit - it + 1
+         rhs_hess(jj-1) = -rot_sin(jj-1) * rhs_hess(jj) ! + rot_cos(jj-1) * g(jj-1) (== 0)
+         rhs_hess(jj)   =  rot_cos(jj-1) * rhs_hess(jj) ! + rot_sin(jj-1) * g(jj-1) (== 0)
+      end do
+      
+      ! Compute the residual by multiplying V_{m+1} and rhs_hess
+      workspace_x = c0
+      workspace_y = c0
+      do it = 1, nextit
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            do ij =1, icellu(iblk)
+               i = indxui(ij, iblk)
+               j = indxuj(ij, iblk)
+
+               workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
+               workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
+            enddo ! ij
+         enddo
+         !$OMP END PARALLEL DO
+         arnoldi_basis_x(:,:,:,1) = workspace_x
+         arnoldi_basis_y(:,:,:,1) = workspace_y
+      end do
+      
 !          do it=1,initer
 !             jj = nextit - it + 1
 !             gg(jj-1) = -rot_sin(jj-1) * gg(jj)
@@ -585,7 +623,7 @@ subroutine fgmres (icellt_in,   icellu_in,  &
 ! 
       end do ! end of outer (restarts) loop
 ! 
-!       return
+      return
       end subroutine fgmres
 
 !=======================================================================

From b6fa84210b89bc28549bd4ccd06b1dbbbd920735 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 22 Jul 2019 11:46:51 -0400
Subject: [PATCH 105/196] ice_krylov: correct CICE kinds in function
 almost_zero

---
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
index aec7e2597..48a5beb26 100644
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -707,8 +707,8 @@ logical function almost_zero(A) result(retval)
    implicit none
 
    real (kind=dbl_kind), intent(in) :: A
-   integer (kind=int_kind) :: aBit
-   integer (kind=int_kind), parameter :: two_complement = int(Z'80000000', kind=int_kind)
+   integer (kind=int8_kind) :: aBit
+   integer (kind=int8_kind), parameter :: two_complement = int(Z'80000000', kind=int8_kind)
    aBit = 0
    aBit = transfer(A, aBit)
    if (aBit < 0) then

From af5adaaa257a3777e6819766e2a4bb32bbf9c509 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 13 Jul 2020 14:25:45 -0400
Subject: [PATCH 106/196] comm: add 'global_sums' module procedure

Add a 'global_sums' module procedure that computes the global sum of
several scalars, held in a 1D array ('vector').

Make use of the existing 'compute_sums_dbl' subroutine.
---
 .../comm/mpi/ice_global_reductions.F90        | 72 ++++++++++++++++++-
 .../comm/serial/ice_global_reductions.F90     | 72 ++++++++++++++++++-
 2 files changed, 142 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90 b/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90
index 2b4172d81..cb926f8dd 100644
--- a/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90
+++ b/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90
@@ -22,7 +22,7 @@ module ice_global_reductions
 #else
    use ice_communicate, only: my_task, mpiR16, mpiR8, mpiR4, master_task
 #endif
-   use ice_constants, only: field_loc_Nface, field_loc_NEcorner
+   use ice_constants, only: field_loc_Nface, field_loc_NEcorner, c0
    use ice_fileunits, only: bfbflag
    use ice_exit, only: abort_ice
    use ice_distribution, only: distrb, ice_distributionGet, &
@@ -36,6 +36,7 @@ module ice_global_reductions
    private
 
    public :: global_sum,      &
+             global_sums,     &
              global_sum_prod, &
              global_maxval,   &
              global_minval
@@ -55,6 +56,12 @@ module ice_global_reductions
                       global_sum_scalar_int
    end interface
 
+   interface global_sums
+     module procedure global_sums_dbl!,              &
+     ! module procedure global_sums_real,            & ! not yet implemented
+     ! module procedure global_sums_int                ! not yet implemented
+   end interface
+
    interface global_sum_prod
      module procedure global_sum_prod_dbl,         &
                       global_sum_prod_real,        &
@@ -700,6 +707,69 @@ function global_sum_scalar_int(scalar, dist) &
 
  end function global_sum_scalar_int
 
+!***********************************************************************
+
+ function global_sums_dbl(vector, dist) &
+          result(globalSums)
+
+!  Computes the global sums of sets of scalars (elements of 'vector') 
+!  distributed across a parallel machine.
+!
+!  This is actually the specific interface for the generic global_sums
+!  function corresponding to double precision vectors.  The generic
+!  interface is identical but will handle real and integer vectors.
+
+   real (dbl_kind), dimension(:), intent(in) :: &
+      vector               ! vector whose components are to be summed
+
+   type (distrb), intent(in) :: &
+      dist                 ! block distribution
+
+   real (dbl_kind), dimension(size(vector)) :: &
+      globalSums           ! resulting array of global sums
+
+!-----------------------------------------------------------------------
+!
+!  local variables
+!
+!-----------------------------------------------------------------------
+
+   integer (int_kind) :: &
+      ierr,         &! mpi error flag
+      numProcs,     &! number of processor participating
+      numBlocks,    &! number of local blocks
+      communicator, &! communicator for this distribution
+      numElem        ! number of elements in vector
+
+   real (dbl_kind), dimension(:,:), allocatable :: &
+      work           ! temporary local array
+
+   character(len=*), parameter :: subname = '(global_sums_dbl)'
+
+!-----------------------------------------------------------------------
+!
+!  get communicator for MPI calls
+!
+!-----------------------------------------------------------------------
+
+   call ice_distributionGet(dist, &
+                            numLocalBlocks = numBlocks, &
+                            nprocs = numProcs,        &
+                            communicator = communicator)
+
+   numElem = size(vector)
+   allocate(work(1,numElem))
+   work(1,:) = vector
+   globalSums = c0
+
+   call compute_sums_dbl(work,globalSums,communicator,numProcs)
+
+   deallocate(work)
+
+!-----------------------------------------------------------------------
+
+ end function global_sums_dbl
+
 !***********************************************************************
 
  function global_sum_prod_dbl (array1, array2, dist, field_loc, &
diff --git a/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90 b/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90
index 1517bd73b..3b37b50d5 100644
--- a/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90
+++ b/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90
@@ -23,7 +23,7 @@ module ice_global_reductions
 #else
    use ice_communicate, only: my_task, mpiR16, mpiR8, mpiR4, master_task
 #endif
-   use ice_constants, only: field_loc_Nface, field_loc_NEcorner
+   use ice_constants, only: field_loc_Nface, field_loc_NEcorner, c0
    use ice_fileunits, only: bfbflag
    use ice_exit, only: abort_ice
    use ice_distribution, only: distrb, ice_distributionGet, &
@@ -37,6 +37,7 @@ module ice_global_reductions
    private
 
    public :: global_sum,      &
+             global_sums,     &
              global_sum_prod, &
              global_maxval,   &
              global_minval
@@ -56,6 +57,12 @@ module ice_global_reductions
                       global_sum_scalar_int
    end interface
 
+   interface global_sums
+     module procedure global_sums_dbl!,              &
+     ! module procedure global_sums_real,            & ! not yet implemented
+     ! module procedure global_sums_int                ! not yet implemented
+   end interface
+
    interface global_sum_prod
      module procedure global_sum_prod_dbl,         &
                       global_sum_prod_real,        &
@@ -701,6 +708,69 @@ function global_sum_scalar_int(scalar, dist) &
 
  end function global_sum_scalar_int
 
+!***********************************************************************
+
+ function global_sums_dbl(vector, dist) &
+          result(globalSums)
+
+!  Computes the global sums of sets of scalars (elements of 'vector') 
+!  distributed across a parallel machine.
+!
+!  This is actually the specific interface for the generic global_sums
+!  function corresponding to double precision vectors.  The generic
+!  interface is identical but will handle real and integer vectors.
+
+   real (dbl_kind), dimension(:), intent(in) :: &
+      vector               ! vector whose components are to be summed
+
+   type (distrb), intent(in) :: &
+      dist                 ! block distribution
+
+   real (dbl_kind), dimension(size(vector)) :: &
+      globalSums           ! resulting array of global sums
+
+!-----------------------------------------------------------------------
+!
+!  local variables
+!
+!-----------------------------------------------------------------------
+
+   integer (int_kind) :: &
+      ierr,         &! mpi error flag
+      numProcs,     &! number of processor participating
+      numBlocks,    &! number of local blocks
+      communicator, &! communicator for this distribution
+      numElem        ! number of elements in vector
+
+   real (dbl_kind), dimension(:,:), allocatable :: &
+      work           ! temporary local array
+
+   character(len=*), parameter :: subname = '(global_sums_dbl)'
+
+!-----------------------------------------------------------------------
+!
+!  get communicator for MPI calls
+!
+!-----------------------------------------------------------------------
+
+   call ice_distributionGet(dist, &
+                            numLocalBlocks = numBlocks, &
+                            nprocs = numProcs,        &
+                            communicator = communicator)
+
+   numElem = size(vector)
+   allocate(work(1,numElem))
+   work(1,:) = vector
+   globalSums = c0
+
+   call compute_sums_dbl(work,globalSums,communicator,numProcs)
+
+   deallocate(work)
+
+!-----------------------------------------------------------------------
+
+ end function global_sums_dbl
+
 !***********************************************************************
 
  function global_sum_prod_dbl (array1, array2, dist, field_loc, &

From 0e79c533e8862ab9249a1fa9cb080fc0624694c5 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 22 Jul 2019 14:41:17 -0400
Subject: [PATCH 107/196] ice_krylov: adapt FGMRES including Gram-Schmidt (WIP)

---
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 612 +++++++++-------------
 1 file changed, 241 insertions(+), 371 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
index 48a5beb26..1d90a7d09 100644
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -34,7 +34,7 @@ module ice_krylov
       use ice_domain_size, only: max_blocks
       use ice_dyn_vp, only: matvec, residual_vec, calc_L2norm_squared, precond
       use ice_flux, only:  fm, iceumask
-      use ice_global_reductions, only: global_sum
+      use ice_global_reductions, only: global_sum, global_sums
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
           uarear, tinyarea
       use ice_kinds_mod
@@ -137,8 +137,6 @@ subroutine fgmres (icellt_in,   icellu_in,  &
          workspace_y , & ! work vector (y components)
          Fx          , & ! residual vector (x components), Fx = Au - bx (N/m^2)
          Fy              ! residual vector (y components), Fy = Av - by (N/m^2)
-! real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks) :: work_space
-      ! integer (kind=int_kind) :: i, j, k, k1, ii, jj, ierr
 
       real (kind=dbl_kind), dimension (max_blocks) :: &
          norm_squared   ! array to accumulate squared norm of grid function over blocks
@@ -173,18 +171,13 @@ subroutine fgmres (icellt_in,   icellu_in,  &
       integer (kind=int_kind) :: &
          precond_type ! type of preconditioner
 
+      real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
 
-
-      real (kind=dbl_kind) :: relative_tolerance, r0
-
-      real (kind=dbl_kind) :: local_dot, dotprod
-      real (kind=dbl_kind), dimension(maxinner) :: dotprod_local
-
-      
-      ! logical (kind=log_kind) almost_zero
-
-      ! integer (kind=int_kind) i0, in, j0, jn
-      ! integer (kind=int_kind) niloc,njloc
+      real (kind=dbl_kind) :: &
+         local_dot         ! local value to accumulate dot product computations
+         
+      real (kind=dbl_kind), dimension(maxinner) :: &
+         dotprod_local     ! local array to accumulate several dot product computations
 
       character(len=*), parameter :: subname = '(fgmres)'
 
@@ -237,12 +230,7 @@ subroutine fgmres (icellt_in,   icellu_in,  &
                             arnoldi_basis_y (:,:,iblk, 1))
       enddo
       !$OMP END PARALLEL DO
-!       do j=j0,jn
-!          do i=i0,in
-!             vv(i,j,1) = rhs_b(i,j) - work_space(i,j)
-!          end do
-!       end do
-! 
+      
       ! Start outer (restarts) loop
       do
          ! Compute norm of initial residual
@@ -258,371 +246,253 @@ subroutine fgmres (icellt_in,   icellu_in,  &
          enddo
          !$OMP END PARALLEL DO 
          norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
-! 
-!          local_dot = 0.0d0
-!          do j=j0,jn
-! !DIR$ SIMD
-!             do i=i0,in
-!                local_dot = local_dot + (vv(i, j, 1) * vv(i, j, 1))
-!             end do
-!          end do
-! 
-!          call RPN_COMM_allreduce(local_dot, norm_residual, 1, "MPI_double_precision", "MPI_sum", communicate_S, ierr)
-!          norm_residual = sqrt(norm_residual)
-
-      ! Current guess is a good enough solution
-      if (norm_residual < tolerance) then
-         return
-      end if
-      
-      ! Normalize the first Arnoldi vector
-      inverse_norm = c1 / norm_residual
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         do ij =1, icellu(iblk)
-            i = indxui(ij, iblk)
-            j = indxuj(ij, iblk)
-
-            arnoldi_basis_x(i, j, iblk, 1) = arnoldi_basis_x(i, j, iblk, 1) * inverse_norm
-            arnoldi_basis_y(i, j, iblk, 1) = arnoldi_basis_y(i, j, iblk, 1) * inverse_norm
-         enddo ! ij
-      enddo
-      !$OMP END PARALLEL DO 
-! 
-!          nu = 1.0d0 / norm_residual
-!          do j=j0,jn
-!             do i=i0,in
-!                vv(i,j,1) = vv(i,j,1) * nu
-!             end do
-!          end do
-! 
-      if (outiter == 0) then
-         relative_tolerance = tolerance * norm_residual
-         r0 = norm_residual
-      end if
-      
-      conv = norm_residual / r0
-      
-      ! Initialize 1-st term of RHS of hessenberg system
-      rhs_hess(1) = norm_residual
-      rhs_hess(2:) = c0
-      
-      initer = 0
-      
-      ! Start of inner (Arnoldi) loop
-      do
          
-         nbiter = nbiter + 1
-         initer = initer + 1
-         nextit = initer + 1
-! 
-         ! precondition the current Arnoldi vector
-         call precondition(arnoldi_basis_x(:,:,:,initer), &
-                           arnoldi_basis_y(:,:,:,initer), &
-                           workspace_x , workspace_y    , &
-                           precond_type, diagx, diagy)
-!             if (sol2D_precond_S == 'JACOBI')   then
-!                call pre_jacobi2D ( work_space(i0:in,j0:jn), &
-!                                    vv(i0:in,j0:jn, initer), &
-!                                    Prec_xevec_8, niloc, njloc,&
-!                                    Prec_ai_8, Prec_bi_8, Prec_ci_8 )
-!             else
-!                work_space(i0:in,j0:jn) = vv(i0:in,j0:jn, initer)
-!             endif
-      ! !phb DESCRIBE ww
-      wwx(:,:,:,initer) = workspace_x
-      wwy(:,:,:,initer) = workspace_y
-!             ww(i0:in,j0:jn, initer) = work_space(i0:in,j0:jn)
-      ! 
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         call matvec (nx_block               , ny_block,              &
-                      icellu         (iblk)  , icellt         (iblk), &
-                      indxui       (:,iblk)  , indxuj       (:,iblk), &
-                      indxti       (:,iblk)  , indxtj       (:,iblk), &
-                      dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
-                      dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
-                      cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
-                      cxm        (:,:,iblk)  , cym        (:,:,iblk), &
-                      workspace_x(:,:,iblk)  , workspace_y(:,:,iblk), &
-                      vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
-                      zetaD      (:,:,iblk,:),                        &
-                      umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
-                      uarear     (:,:,iblk)  ,                        &
-                      arnoldi_basis_x(:,:,iblk,nextit),               &
-                      arnoldi_basis_y(:,:,iblk,nextit))
-      enddo
-      !$OMP END PARALLEL DO
-      
-!             call matvec ( work_space, vv(:,:,nextit), level )
-! 
-      ! Classical Gram-Schmidt orthogonalisation process
-      ! TODO
-      ! First loop of Gram-Schmidt (compute coefficients)
-!             dotprod_local = 0.d0
-!             do it=1,initer
-!                 local_dot = 0.0d0
-!                 do j=j0,jn
-! !DIR$ SIMD
-!                    do i=i0,in
-!                       local_dot = local_dot + (vv(i, j, it) * vv(i, j, nextit))
-!                    end do
-!                 end do
-!                 dotprod_local(it) = local_dot
-!             end do
-! 
-!             call RPN_COMM_allreduce(dotprod_local(:), hessenberg(1,initer), initer, "MPI_double_precision", "MPI_sum", communicate_S, ierr)
-
-      ! Second loop of Gram-Schmidt (orthonormalize)
-      do it = 1, initer
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            do ij =1, icellu(iblk)
-               i = indxui(ij, iblk)
-               j = indxuj(ij, iblk)
-
-               arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
-                                                     - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
-               arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
-                                                     - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
-            enddo ! ij
-         enddo
-         !$OMP END PARALLEL DO 
-      end do
-!             do it=1,initer
-!                do j=j0,jn
-!                   do i=i0,in
-!                      vv(i, j, nextit) = vv(i, j, nextit) - hessenberg(it,initer) * vv(i, j, it)
-!                   end do
-!                end do
-!             end do
-
-      ! Compute norm of new arnoldi vector and update Hessenberg matrix
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         call calc_L2norm_squared(nx_block      ,  ny_block        , &
-                                  icellu   (iblk),                   &
-                                  indxui (:,iblk), indxuj(:, iblk) , &
-                                  arnoldi_basis_x(:,:,iblk, nextit), &
-                                  arnoldi_basis_y(:,:,iblk, nextit), &
-                                  norm_squared(iblk))
-
-      enddo
-      !$OMP END PARALLEL DO 
-      hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
-!             local_dot = 0.d0
-!             do j=j0,jn
-! !DIR$ SIMD
-!                do i=i0,in
-!                   local_dot = local_dot + (vv(i, j, nextit) * vv(i, j, nextit))
-!                end do
-!             end do
-! 
-!             call RPN_COMM_allreduce(local_dot,dotprod,1,"MPI_double_precision","MPI_sum",communicate_S,ierr)
-! 
-!             hessenberg(nextit,initer) = sqrt(dotprod)
-
-      ! Watch out for happy breakdown
-      if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
-         ! Normalize next Arnoldi vector
-         inverse_norm = c1 / hessenberg(nextit,initer)
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            do ij =1, icellu(iblk)
-               i = indxui(ij, iblk)
-               j = indxuj(ij, iblk)
-
-               arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit)*inverse_norm
-               arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
-            enddo ! ij
-         enddo
-         !$OMP END PARALLEL DO 
-      end if
-!             ! Watch out for happy breakdown
-!             if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
-!                nu = 1.d0 / hessenberg(nextit,initer)
-!                do j=j0,jn
-!                   do i=i0,in
-!                      vv(i, j, nextit) = vv(i, j, nextit) * nu
-!                   end do
-!                end do
-!             end if
-
-      ! Apply previous Givens rotation to the last column of the Hessenberg matrix
-      if (initer > 1) then
-         do k = 2, initer
-            t = hessenberg(k-1, initer)
-            hessenberg(k-1, initer) =  rot_cos(k-1)*t + rot_sin(k-1)*hessenberg(k, initer)
-            hessenberg(k,   initer) = -rot_sin(k-1)*t + rot_cos(k-1)*hessenberg(k, initer)
-         end do
-      end if
-!             ! Form and store the information for the new Givens rotation
-!             if (initer > 1) then
-!                do k=2,initer
-!                   k1 = k-1
-!                   t = hessenberg(k1,initer)
-!                   hessenberg(k1,initer) = rot_cos(k1)*t + rot_sin(k1)*hessenberg(k,initer)
-!                   hessenberg(k,initer) = -rot_sin(k1)*t + rot_cos(k1)*hessenberg(k,initer)
-!                end do
-! 
-!             end if
-
-      ! Compute new Givens rotation
-      nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
-      if (.not. almost_zero(nu)) then
-         rot_cos(initer) = hessenberg(initer,initer) / inverse_norm
-         rot_sin(initer) = hessenberg(nextit,initer) / inverse_norm
-
-         rhs_hess(nextit) = -rot_sin(initer) * rhs_hess(initer)
-         rhs_hess(initer) =  rot_cos(initer) * rhs_hess(initer)
-
-         hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
-      end if
-!             nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
-!             if (.not. almost_zero(nu)) then
-!                rot_cos(initer) = hessenberg(initer,initer) / nu
-!                rot_sin(initer) = hessenberg(nextit,initer) / nu
-! 
-!                gg(nextit) = -rot_sin(initer) * gg(initer)
-!                gg(initer) =  rot_cos(initer) * gg(initer)
-! 
-!                hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
-!             end if
-      ! Check for convergence
-      norm_residual = abs(rhs_hess(nextit))
-      conv = norm_residual / r0
-       if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
-         exit
-      endif
-!             norm_residual = abs(gg(nextit))
-! 
-!             conv = norm_residual / r0
-! 
-!             if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
-!                exit
-!             endif
-! 
-      end do ! end of inner (Arnoldi) loop
-      
-      ! At this point either the maximum number of inner iterations
-      ! was reached or the absolute residual is below the scaled tolerance.
-
-      ! Solve the (now upper triangular) system "hessenberg * sol_hess = rhs_hess"
-      ! (sol_hess is stored in rhs_hess)
-      rhs_hess(initer) = rhs_hess(initer) / hessenberg(initer,initer)
-      do ii = 2, initer
-         k  = initer - ii + 1
-         t  = rhs_hess(k)
-         do j = k + 1, initer
-            t = t - hessenberg(k,j) * rhs_hess(j)
-         end do
-         rhs_hess(k) = t / hessenberg(k,k)
-      end do
-!          gg(initer) = gg(initer) / hessenberg(initer,initer)
-!          do ii=2,initer
-!             k  = initer - ii + 1
-!             k1 = k + 1
-!             t  = gg(k)
-!             do j=k1,initer
-!                t = t - hessenberg(k,j) * gg(j)
-!             end do
-!             gg(k) = t / hessenberg(k,k)
-!          end do
-! 
-      ! Form linear combination to get solution
-      do it = 1, initer
-         t = rhs_hess(it)
+         ! Current guess is a good enough solution
+         if (norm_residual < tolerance) then
+            return
+         end if
+         
+         ! Normalize the first Arnoldi vector
+         inverse_norm = c1 / norm_residual
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
             do ij =1, icellu(iblk)
                i = indxui(ij, iblk)
                j = indxuj(ij, iblk)
 
-               solx(i, j, iblk) = solx(i, j, iblk) + t * wwx(i, j, iblk, it)
-               soly(i, j, iblk) = soly(i, j, iblk) + t * wwy(i, j, iblk, it)
+               arnoldi_basis_x(i, j, iblk, 1) = arnoldi_basis_x(i, j, iblk, 1) * inverse_norm
+               arnoldi_basis_y(i, j, iblk, 1) = arnoldi_basis_y(i, j, iblk, 1) * inverse_norm
             enddo ! ij
          enddo
          !$OMP END PARALLEL DO
-      end do
-!          do it=1,initer
-!             t = gg(it)
-! 
-!             do j=j0,jn
-!                do i=i0,in
-!                   solution(i, j) = solution(i, j) + t * ww(i, j, it)
-!                end do
-!             end do
-! 
-!          end do
-! 
-      ! Increment outer loop counter and check for convergence
-      outiter = outiter + 1
-      if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
-         return
-      end if
-
-      ! Solution is not convergent : compute residual vector and continue.
-      ! The residual vector is computed here using (see Saad p. 177) :
-      ! \begin{equation}
-      !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
-      ! \end{equation}
-      ! where : 
-      ! $r$ is the residual
-      ! $V_{m+1}$ is a matrix whose columns are the the Arnoldi vectors from 1 to nextit (m+1)
-      ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
-      ! $gamma_{m+1}$ is the last element of rhs_hess
-      ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
+         
+         if (outiter == 0) then
+            relative_tolerance = tolerance * norm_residual
+            r0 = norm_residual
+         end if
+         
+         conv = norm_residual / r0
+         
+         ! Initialize 1-st term of RHS of Hessenberg system
+         rhs_hess(1)  = norm_residual
+         rhs_hess(2:) = c0
+         
+         initer = 0
       
-      ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
-      ! store the result in rhs_hess
-      do it = 1, initer
-         jj = nextit - it + 1
-         rhs_hess(jj-1) = -rot_sin(jj-1) * rhs_hess(jj) ! + rot_cos(jj-1) * g(jj-1) (== 0)
-         rhs_hess(jj)   =  rot_cos(jj-1) * rhs_hess(jj) ! + rot_sin(jj-1) * g(jj-1) (== 0)
-      end do
+         ! Start of inner (Arnoldi) loop
+         do
+            
+            nbiter = nbiter + 1
+            initer = initer + 1
+            nextit = initer + 1
+            
+            ! precondition the current Arnoldi vector
+            call precondition(arnoldi_basis_x(:,:,:,initer), &
+                              arnoldi_basis_y(:,:,:,initer), &
+                              workspace_x , workspace_y    , &
+                              precond_type, diagx, diagy)
+            ! !phb DESCRIBE ww
+            wwx(:,:,:,initer) = workspace_x
+            wwy(:,:,:,initer) = workspace_y
+            
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               call matvec (nx_block               , ny_block,              &
+                            icellu         (iblk)  , icellt         (iblk), &
+                            indxui       (:,iblk)  , indxuj       (:,iblk), &
+                            indxti       (:,iblk)  , indxtj       (:,iblk), &
+                            dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
+                            dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
+                            cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
+                            cxm        (:,:,iblk)  , cym        (:,:,iblk), &
+                            workspace_x(:,:,iblk)  , workspace_y(:,:,iblk), &
+                            vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
+                            zetaD      (:,:,iblk,:),                        &
+                            umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
+                            uarear     (:,:,iblk)  ,                        &
+                            arnoldi_basis_x(:,:,iblk,nextit),               &
+                            arnoldi_basis_y(:,:,iblk,nextit))
+            enddo
+            !$OMP END PARALLEL DO
+            
+            ! Classical Gram-Schmidt orthogonalisation process
+            ! First loop of Gram-Schmidt (compute coefficients)
+            dotprod_local = c0
+            do it=1,initer
+               local_dot = c0
+               
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     local_dot = local_dot + (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
+                                             (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO
+               
+                dotprod_local(it) = local_dot
+            end do
+            
+            hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
+            
+            ! Second loop of Gram-Schmidt (orthonormalize)
+            do it = 1, initer
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
+                                                           - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
+                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
+                                                           - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO 
+            end do
+            
+            ! Compute norm of new Arnoldi vector and update Hessenberg matrix
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               call calc_L2norm_squared(nx_block      ,  ny_block        , &
+                                        icellu   (iblk),                   &
+                                        indxui (:,iblk), indxuj(:, iblk) , &
+                                        arnoldi_basis_x(:,:,iblk, nextit), &
+                                        arnoldi_basis_y(:,:,iblk, nextit), &
+                                        norm_squared(iblk))
+            enddo
+            !$OMP END PARALLEL DO 
+            hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
+            
+            ! Watch out for happy breakdown
+            if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
+               ! Normalize next Arnoldi vector
+               inverse_norm = c1 / hessenberg(nextit,initer)
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit)*inverse_norm
+                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO 
+            end if
+            
+            ! Apply previous Givens rotation to the last column of the Hessenberg matrix
+            if (initer > 1) then
+               do k = 2, initer
+                  t = hessenberg(k-1, initer)
+                  hessenberg(k-1, initer) =  rot_cos(k-1)*t + rot_sin(k-1)*hessenberg(k, initer)
+                  hessenberg(k,   initer) = -rot_sin(k-1)*t + rot_cos(k-1)*hessenberg(k, initer)
+               end do
+            end if
+            
+            ! Compute new Givens rotation
+            nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
+            if (.not. almost_zero(nu)) then
+               rot_cos(initer) = hessenberg(initer,initer) / nu
+               rot_sin(initer) = hessenberg(nextit,initer) / nu
+               
+               rhs_hess(nextit) = -rot_sin(initer) * rhs_hess(initer)
+               rhs_hess(initer) =  rot_cos(initer) * rhs_hess(initer)
+               
+               hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
+            end if
+            
+            ! Check for convergence
+            norm_residual = abs(rhs_hess(nextit))
+            conv = norm_residual / r0
+             if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
+               exit
+            endif
+            
+         end do ! end of inner (Arnoldi) loop
       
-      ! Compute the residual by multiplying V_{m+1} and rhs_hess
-      workspace_x = c0
-      workspace_y = c0
-      do it = 1, nextit
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            do ij =1, icellu(iblk)
-               i = indxui(ij, iblk)
-               j = indxuj(ij, iblk)
+         ! At this point either the maximum number of inner iterations
+         ! was reached or the absolute residual is below the scaled tolerance.
+
+         ! Solve the (now upper triangular) system "hessenberg * sol_hess = rhs_hess"
+         ! (sol_hess is stored in rhs_hess)
+         rhs_hess(initer) = rhs_hess(initer) / hessenberg(initer,initer)
+         do ii = 2, initer
+            k  = initer - ii + 1
+            t  = rhs_hess(k)
+            do j = k + 1, initer
+               t = t - hessenberg(k,j) * rhs_hess(j)
+            end do
+            rhs_hess(k) = t / hessenberg(k,k)
+         end do
+         
+         ! Form linear combination to get new solution iterate
+         do it = 1, initer
+            t = rhs_hess(it)
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+
+                  solx(i, j, iblk) = solx(i, j, iblk) + t * wwx(i, j, iblk, it)
+                  soly(i, j, iblk) = soly(i, j, iblk) + t * wwy(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+         end do
+         
+         ! Increment outer loop counter and check for convergence
+         outiter = outiter + 1
+         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
+            return
+         end if
 
-               workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
-               workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
-            enddo ! ij
-         enddo
-         !$OMP END PARALLEL DO
-         arnoldi_basis_x(:,:,:,1) = workspace_x
-         arnoldi_basis_y(:,:,:,1) = workspace_y
-      end do
-      
-!          do it=1,initer
-!             jj = nextit - it + 1
-!             gg(jj-1) = -rot_sin(jj-1) * gg(jj)
-!             gg(jj)   =  rot_cos(jj-1) * gg(jj)
-!          end do
-! 
-!          do it=1,nextit
-!             t = gg(it)
-!             if (it == 1) then
-!                t = t - 1.d0
-!             end if
-! 
-!             do j=j0,jn
-! !DIR$ SIMD
-!                do i=i0,in
-!                   vv(i, j, 1) = vv(i, j, 1) + t * vv(i, j, it)
-!                end do
-!             end do
-! 
-!          end do
-! 
+         ! Solution is not convergent : compute residual vector and continue.
+         
+         ! The residual vector is computed here using (see Saad p. 177) :
+         ! \begin{equation}
+         !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
+         ! \end{equation}
+         ! where : 
+         ! $r$ is the residual
+         ! $V_{m+1}$ is a matrix whose columns are the Arnoldi vectors from 1 to nextit (m+1)
+         ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
+         ! $gamma_{m+1}$ is the last element of rhs_hess
+         ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
+         
+         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
+         ! store the result in rhs_hess
+         do it = 1, initer
+            jj = nextit - it + 1
+            rhs_hess(jj-1) = -rot_sin(jj-1) * rhs_hess(jj) ! + rot_cos(jj-1) * g(jj-1) (== 0)
+            rhs_hess(jj)   =  rot_cos(jj-1) * rhs_hess(jj) ! + rot_sin(jj-1) * g(jj-1) (== 0)
+         end do
+         
+         ! Compute the residual by multiplying V_{m+1} and rhs_hess
+         workspace_x = c0
+         workspace_y = c0
+         do it = 1, nextit
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+
+                  workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+            arnoldi_basis_x(:,:,:,1) = workspace_x
+            arnoldi_basis_y(:,:,:,1) = workspace_y
+         end do
       end do ! end of outer (restarts) loop
-! 
+      
       return
       end subroutine fgmres
 

From c5c3ab18db98848da29bcccb986ad2e349205564 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 22 Jul 2019 15:38:54 -0400
Subject: [PATCH 108/196] ice_krylov: add 'pgmres' subroutine (mostly the same
 as 'fgmres')

The PGMRES algorithm is the same as using FGMRES with the same
preconditioner at each iteration.

However, in order to reuse the FGMRES code the 'fgmres' subroutine would
have to be a recursive subroutine, which might have performance
implications.

For now, add it a separate subroutine.
---
 cicecore/cicedynB/dynamics/ice_krylov.F90 | 407 +++++++++++++++++++++-
 1 file changed, 405 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
index 1d90a7d09..85812dbfb 100644
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ b/cicecore/cicedynB/dynamics/ice_krylov.F90
@@ -503,10 +503,413 @@ end subroutine fgmres
 !
 ! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
 
-      subroutine pgmres()
-      
+      subroutine pgmres (zetaD,             &
+                         Cb,         vrel,    &
+                         umassdti,          &
+                         solx,       soly,    &
+                         bx,         by,      &
+                         diagx,      diagy,   &
+                         tolerance, maxinner, maxouter, nbiter, conv)
+
+      use ice_dyn_vp, only: precond
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
+         zetaD      ! zetaD = 2*zeta (viscous coefficient)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         vrel     , & ! coefficient for tauw 
+         Cb       , & ! seabed stress coefficient
+         umassdti     ! mass of U-cell/dte (kg/m^2 s)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
+         solx     , & ! Initial guess on input, approximate solution on output (x components)
+         soly         ! Initial guess on input, approximate solution on output (y components)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         bx       , & ! Right hand side of the linear system (x components)
+         by           ! Right hand side of the linear system (y components)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         diagx    , & ! Diagonal of the system matrix (x components)
+         diagy        ! Diagonal of the system matrix (y components)
+
+      real (kind=dbl_kind), intent(in) :: &
+         tolerance   ! Tolerance to achieve. The algorithm terminates when the relative
+                     ! residual is below tolerance
+
+      integer (kind=int_kind), intent(in) :: &
+         maxinner    ! Restart the method every maxinner inner iterations
+
+      integer (kind=int_kind), intent(in) :: &
+         maxouter    ! Maximum number of outer iterations
+                     ! Iteration will stop after maxinner*maxouter steps
+                     ! even if the specified tolerance has not been achieved
+
+      integer (kind=int_kind), intent(out) :: &
+         nbiter      ! Total number of iteration performed
+
+      real (kind=dbl_kind), intent(out) :: &
+         conv        ! !phb DESCRIBE IF WE KEEP
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         iblk    , & ! block index
+         ij      , & ! index for indx[t|u][i|j]
+         i, j        ! grid indices
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         workspace_x , & ! work vector (x components)
+         workspace_y , & ! work vector (y components)
+         Fx          , & ! residual vector (x components), Fx = Au - bx (N/m^2)
+         Fy              ! residual vector (y components), Fy = Av - by (N/m^2)
+
+      real (kind=dbl_kind), dimension (max_blocks) :: &
+         norm_squared   ! array to accumulate squared norm of grid function over blocks
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1) :: &
+         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
+         arnoldi_basis_y     ! arnoldi basis (y components)
+
+      real (kind=dbl_kind) :: &
+         norm_residual   , & ! current L^2 norm of residual vector
+         inverse_norm    , & ! inverse of the norm of a vector
+         nu, t               ! local temporary values
+
+      integer (kind=int_kind) :: &
+         initer          , & ! inner (Arnoldi) loop counter
+         outiter         , & ! outer (restarts) loop counter
+         nextit          , & ! nextit == initer+1
+         it, k, ii, jj       ! reusable loop counters
+
+      real (kind=dbl_kind), dimension(maxinner+1) :: &
+         rot_cos         , & ! cosine elements of Givens rotations 
+         rot_sin         , & ! sine elements of Givens rotations
+         rhs_hess            ! right hand side vector of the Hessenberg (least squares) system
+
+      real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: &
+         hessenberg        ! system matrix of the Hessenberg (least squares) system
+
+      integer (kind=int_kind) :: &
+         precond_type ! type of preconditioner
+
+      real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
+
+      real (kind=dbl_kind) :: &
+         local_dot         ! local value to accumulate dot product computations
+         
+      real (kind=dbl_kind), dimension(maxinner) :: &
+         dotprod_local     ! local array to accumulate several dot product computations
+
       character(len=*), parameter :: subname = '(pgmres)'
       
+      ! Here we go !
+
+      outiter = 0
+      nbiter = 0
+      
+      conv = c1
+      
+      precond_type = precond
+      
+      ! Residual of the initial iterate
+      
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         call matvec (nx_block               , ny_block,              &
+                      icellu         (iblk)  , icellt         (iblk), &
+                      indxui       (:,iblk)  , indxuj       (:,iblk), &
+                      indxti       (:,iblk)  , indxtj       (:,iblk), &
+                      dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
+                      dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
+                      cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
+                      cxm        (:,:,iblk)  , cym        (:,:,iblk), &
+                      solx       (:,:,iblk)  , soly       (:,:,iblk), &
+                      vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
+                      zetaD      (:,:,iblk,:),                        &
+                      umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
+                      uarear     (:,:,iblk)  ,                        &
+                      workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
+         call residual_vec (nx_block             , ny_block             , &
+                            icellu         (iblk),                        & 
+                            indxui       (:,iblk), indxuj    (:,iblk)   , &
+                            bx         (:,:,iblk), by      (:,:,iblk)   , &
+                            workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
+                            arnoldi_basis_x (:,:,iblk, 1),                &
+                            arnoldi_basis_y (:,:,iblk, 1))
+      enddo
+      !$OMP END PARALLEL DO
+      
+      ! Start outer (restarts) loop
+      do
+         ! Compute norm of initial residual
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            call calc_L2norm_squared(nx_block,        ny_block,        &
+                                     icellu   (iblk),                  &
+                                     indxui (:,iblk), indxuj(:, iblk), &
+                                     arnoldi_basis_x(:,:,iblk, 1),     &
+                                     arnoldi_basis_y(:,:,iblk, 1),     &
+                                     norm_squared(iblk))
+
+         enddo
+         !$OMP END PARALLEL DO 
+         norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
+         
+         ! Current guess is a good enough solution
+         if (norm_residual < tolerance) then
+            return
+         end if
+         
+         ! Normalize the first Arnoldi vector
+         inverse_norm = c1 / norm_residual
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            do ij =1, icellu(iblk)
+               i = indxui(ij, iblk)
+               j = indxuj(ij, iblk)
+
+               arnoldi_basis_x(i, j, iblk, 1) = arnoldi_basis_x(i, j, iblk, 1) * inverse_norm
+               arnoldi_basis_y(i, j, iblk, 1) = arnoldi_basis_y(i, j, iblk, 1) * inverse_norm
+            enddo ! ij
+         enddo
+         !$OMP END PARALLEL DO
+         
+         if (outiter == 0) then
+            relative_tolerance = tolerance * norm_residual
+            r0 = norm_residual
+         end if
+         
+         conv = norm_residual / r0
+         
+         ! Initialize 1-st term of RHS of Hessenberg system
+         rhs_hess(1)  = norm_residual
+         rhs_hess(2:) = c0
+         
+         initer = 0
+      
+         ! Start of inner (Arnoldi) loop
+         do
+            
+            nbiter = nbiter + 1
+            initer = initer + 1
+            nextit = initer + 1
+            
+            ! precondition the current Arnoldi vector
+            call precondition(arnoldi_basis_x(:,:,:,initer), &
+                              arnoldi_basis_y(:,:,:,initer), &
+                              workspace_x , workspace_y    , &
+                              precond_type, diagx, diagy)
+            
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               call matvec (nx_block               , ny_block,              &
+                            icellu         (iblk)  , icellt         (iblk), &
+                            indxui       (:,iblk)  , indxuj       (:,iblk), &
+                            indxti       (:,iblk)  , indxtj       (:,iblk), &
+                            dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
+                            dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
+                            cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
+                            cxm        (:,:,iblk)  , cym        (:,:,iblk), &
+                            workspace_x(:,:,iblk)  , workspace_y(:,:,iblk), &
+                            vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
+                            zetaD      (:,:,iblk,:),                        &
+                            umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
+                            uarear     (:,:,iblk)  ,                        &
+                            arnoldi_basis_x(:,:,iblk,nextit),               &
+                            arnoldi_basis_y(:,:,iblk,nextit))
+            enddo
+            !$OMP END PARALLEL DO
+            
+            ! Classical Gram-Schmidt orthogonalisation process
+            ! First loop of Gram-Schmidt (compute coefficients)
+            dotprod_local = c0
+            do it=1,initer
+               local_dot = c0
+               
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     local_dot = local_dot + (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
+                                             (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO
+               
+                dotprod_local(it) = local_dot
+            end do
+            
+            hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
+            
+            ! Second loop of Gram-Schmidt (orthonormalize)
+            do it = 1, initer
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
+                                                           - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
+                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
+                                                           - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO 
+            end do
+            
+            ! Compute norm of new Arnoldi vector and update Hessenberg matrix
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               call calc_L2norm_squared(nx_block      ,  ny_block        , &
+                                        icellu   (iblk),                   &
+                                        indxui (:,iblk), indxuj(:, iblk) , &
+                                        arnoldi_basis_x(:,:,iblk, nextit), &
+                                        arnoldi_basis_y(:,:,iblk, nextit), &
+                                        norm_squared(iblk))
+            enddo
+            !$OMP END PARALLEL DO 
+            hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
+            
+            ! Watch out for happy breakdown
+            if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
+               ! Normalize next Arnoldi vector
+               inverse_norm = c1 / hessenberg(nextit,initer)
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit)*inverse_norm
+                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO 
+            end if
+            
+            ! Apply previous Givens rotation to the last column of the Hessenberg matrix
+            if (initer > 1) then
+               do k = 2, initer
+                  t = hessenberg(k-1, initer)
+                  hessenberg(k-1, initer) =  rot_cos(k-1)*t + rot_sin(k-1)*hessenberg(k, initer)
+                  hessenberg(k,   initer) = -rot_sin(k-1)*t + rot_cos(k-1)*hessenberg(k, initer)
+               end do
+            end if
+            
+            ! Compute new Givens rotation
+            nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
+            if (.not. almost_zero(nu)) then
+               rot_cos(initer) = hessenberg(initer,initer) / nu
+               rot_sin(initer) = hessenberg(nextit,initer) / nu
+               
+               rhs_hess(nextit) = -rot_sin(initer) * rhs_hess(initer)
+               rhs_hess(initer) =  rot_cos(initer) * rhs_hess(initer)
+               
+               hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
+            end if
+            
+            ! Check for convergence
+            norm_residual = abs(rhs_hess(nextit))
+            conv = norm_residual / r0
+             if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
+               exit
+            endif
+            
+         end do ! end of inner (Arnoldi) loop
+      
+         ! At this point either the maximum number of inner iterations
+         ! was reached or the absolute residual is below the scaled tolerance.
+
+         ! Solve the (now upper triangular) system "hessenberg * sol_hess = rhs_hess"
+         ! (sol_hess is stored in rhs_hess)
+         rhs_hess(initer) = rhs_hess(initer) / hessenberg(initer,initer)
+         do ii = 2, initer
+            k  = initer - ii + 1
+            t  = rhs_hess(k)
+            do j = k + 1, initer
+               t = t - hessenberg(k,j) * rhs_hess(j)
+            end do
+            rhs_hess(k) = t / hessenberg(k,k)
+         end do
+         
+         ! Form linear combination to get new solution iterate
+         workspace_x = c0
+         workspace_y = c0
+         do it = 1, initer
+            t = rhs_hess(it)
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+
+                  workspace_x(i, j, iblk) = workspace_x(i, j, iblk) + t * arnoldi_basis_x(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = workspace_y(i, j, iblk) + t * arnoldi_basis_y(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+         end do
+         
+         ! Call preconditioner
+         call precondition(workspace_x(:,:,:), &
+                           workspace_y(:,:,:), &
+                           workspace_x , workspace_y    , &
+                           precond_type, diagx, diagy)
+         
+         solx = solx + workspace_x
+         soly = soly + workspace_y
+         
+         ! Increment outer loop counter and check for convergence
+         outiter = outiter + 1
+         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
+            return
+         end if
+
+         ! Solution is not convergent : compute residual vector and continue.
+         
+         ! The residual vector is computed here using (see Saad p. 177) :
+         ! \begin{equation}
+         !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
+         ! \end{equation}
+         ! where : 
+         ! $r$ is the residual
+         ! $V_{m+1}$ is a matrix whose columns are the Arnoldi vectors from 1 to nextit (m+1)
+         ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
+         ! $gamma_{m+1}$ is the last element of rhs_hess
+         ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
+         
+         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
+         ! store the result in rhs_hess
+         do it = 1, initer
+            jj = nextit - it + 1
+            rhs_hess(jj-1) = -rot_sin(jj-1) * rhs_hess(jj) ! + rot_cos(jj-1) * g(jj-1) (== 0)
+            rhs_hess(jj)   =  rot_cos(jj-1) * rhs_hess(jj) ! + rot_sin(jj-1) * g(jj-1) (== 0)
+         end do
+         
+         ! Compute the residual by multiplying V_{m+1} and rhs_hess
+         workspace_x = c0
+         workspace_y = c0
+         do it = 1, nextit
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+
+                  workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+            arnoldi_basis_x(:,:,:,1) = workspace_x
+            arnoldi_basis_y(:,:,:,1) = workspace_y
+         end do
+      end do ! end of outer (restarts) loop
+      
       end subroutine pgmres
 
 !=======================================================================

From be63b4562cb54c3e6ba423a70cd59398ab96c96d Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 23 Jul 2019 12:27:41 -0400
Subject: [PATCH 109/196] ice_krylov: move subroutines to ice_dyn_vp to work
 around circular deps

'ice_krylov' and 'ice_dyn_vp' modules each 'use' variables from each
other, which makes them impossible to compile in a clean build.

Transfer the subroutines in 'ice_krylov' to 'ice_dyn_vp'.

While at it, update the public interface of module ice_dyn_vp
to only expose what is used in other modules.

Also, make 'icellu', 'icellt' and 'indxtij' et al. module variables, to
reduce subroutine argument counts.

In the same spirit, add use statements.
---
 .../{fgmresD.F90 => fgmresD.F90.unused}       |    0
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     | 1924 +++++++++++++----
 cicecore/cicedynB/dynamics/ice_krylov.F90     |  998 ---------
 .../{pgmres.F90 => pgmres.F90.unused}         |    0
 4 files changed, 1450 insertions(+), 1472 deletions(-)
 rename cicecore/cicedynB/dynamics/{fgmresD.F90 => fgmresD.F90.unused} (100%)
 delete mode 100644 cicecore/cicedynB/dynamics/ice_krylov.F90
 rename cicecore/cicedynB/dynamics/{pgmres.F90 => pgmres.F90.unused} (100%)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90.unused
similarity index 100%
rename from cicecore/cicedynB/dynamics/fgmresD.F90
rename to cicecore/cicedynB/dynamics/fgmresD.F90.unused
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 3aaa703eb..340b9d502 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -35,22 +35,27 @@
       module ice_dyn_vp
 
       use ice_kinds_mod
+      use ice_blocks, only: nx_block, ny_block
       use ice_constants, only: field_loc_center, field_loc_NEcorner, &
           field_type_scalar, field_type_vector
       use ice_constants, only: c0, p027, p055, p111, p166, &
           p222, p25, p333, p5, c1
+      use ice_domain, only: nblocks, distrb_info
+      use ice_domain_size, only: max_blocks
       use ice_dyn_shared, only: dyn_prep1, dyn_prep2, dyn_finish, &
           ecci, cosw, sinw, fcor_blk, uvel_init,  &
           vvel_init, basal_stress_coeff, basalstress, Ktens
       use ice_fileunits, only: nu_diag
+      use ice_flux, only: fm
+      use ice_global_reductions, only: global_sum, global_sums
+      use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, uarear
       use ice_exit, only: abort_ice
       use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted
       use icepack_intfc, only: icepack_ice_strength, icepack_query_parameters
 
       implicit none
       private
-      public :: imp_solver, matvec, arrays_to_vec, vec_to_arrays, precond_diag, &
-                init_vp, residual_vec, calc_L2norm_squared
+      public :: imp_solver, init_vp
 
       ! namelist parameters
 
@@ -79,6 +84,18 @@ module ice_dyn_vp
          damping_andacc , & ! damping factor for Anderson acceleration
          reltol_andacc      ! relative tolerance for Anderson acceleration
 
+      ! mmodule variables
+
+      integer (kind=int_kind), allocatable :: &
+         icellt(:)    , & ! no. of cells where icetmask = 1
+         icellu(:)        ! no. of cells where iceumask = 1
+
+      integer (kind=int_kind), allocatable :: &
+         indxti(:,:)  , & ! compressed index in i-direction
+         indxtj(:,:)  , & ! compressed index in j-direction
+         indxui(:,:)  , & ! compressed index in i-direction
+         indxuj(:,:)      ! compressed index in j-direction
+
 !=======================================================================
 
       contains
@@ -94,7 +111,7 @@ subroutine init_vp (dt)
       use ice_boundary, only: ice_HaloUpdate
       use ice_constants, only: c1, &
           field_loc_center, field_type_scalar
-      use ice_domain, only: nblocks, blocks_ice, halo_info
+      use ice_domain, only: blocks_ice, halo_info
       use ice_dyn_shared, only: init_evp
       use ice_grid, only: tarea, tinyarea
       
@@ -116,6 +133,13 @@ subroutine init_vp (dt)
       ! Initialize variables shared with evp
       call init_evp(dt)
       
+      ! Initialize module variables
+      allocate(icellt(max_blocks), icellu(max_blocks))
+      allocate(indxti(nx_block*ny_block, max_blocks), &
+               indxtj(nx_block*ny_block, max_blocks), &
+               indxui(nx_block*ny_block, max_blocks), &
+               indxuj(nx_block*ny_block, max_blocks))
+      
       ! Redefine tinyarea using a different puny value
       
       !$OMP PARALLEL DO PRIVATE(iblk,i,j,ilo,ihi,jlo,jhi,this_block)
@@ -159,7 +183,7 @@ subroutine imp_solver (dt)
       use ice_boundary, only: ice_halo, ice_HaloMask, ice_HaloUpdate, &
           ice_HaloDestroy, ice_HaloUpdate_stress
       use ice_blocks, only: block, get_block, nx_block, ny_block
-      use ice_domain, only: nblocks, blocks_ice, halo_info, maskhalo_dyn
+      use ice_domain, only: blocks_ice, halo_info, maskhalo_dyn
       use ice_domain_size, only: max_blocks, ncat
       use ice_flux, only: rdg_conv, rdg_shear, strairxT, strairyT, &
           strairx, strairy, uocn, vocn, ss_tltx, ss_tlty, iceumask, fm, &
@@ -188,16 +212,6 @@ subroutine imp_solver (dt)
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
 
-      integer (kind=int_kind), dimension(max_blocks) :: & 
-         icellt   , & ! no. of cells where icetmask = 1
-         icellu       ! no. of cells where iceumask = 1
-
-      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj   , & ! compressed index in j-direction
-         indxui   , & ! compressed index in i-direction
-         indxuj       ! compressed index in j-direction
-
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
          tmass    , & ! total mass of ice and snow (kg/m^2)
          waterx   , & ! for ocean stress calculation, x (m/s)
@@ -630,7 +644,7 @@ subroutine picard_solver (icellt,   icellu,  &
       use ice_arrays_column, only: Cdn_ocn
       use ice_blocks, only: nx_block, ny_block
       use ice_boundary, only: ice_halo, ice_HaloUpdate
-      use ice_domain, only: nblocks, halo_info, maskhalo_dyn
+      use ice_domain, only: halo_info, maskhalo_dyn
       use ice_domain_size, only: max_blocks
       use ice_flux, only: uocn, vocn, fm, Tbu
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
@@ -722,317 +736,317 @@ subroutine picard_solver (icellt,   icellu,  &
 
       character(len=*), parameter :: subname = '(picard_solver)'
 
-      ! Allocate space for FGMRES work arrays
-      allocate(wk11(ntot), wk22(ntot))
-      allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
-
-      ! Start iterations
-      do kOL = 1,maxits_nonlin        ! outer loop 
-
-      !-----------------------------------------------------------------
-      ! Calc zetaD, Pr, Cb and vrel = f(uprev_k, vprev_k)
-      !-----------------------------------------------------------------
-
-      !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            
-            uprev_k(:,:,iblk) = uvel(:,:,iblk)
-            vprev_k(:,:,iblk) = vvel(:,:,iblk)
-            
-            call calc_zeta_Pr (nx_block           , ny_block,           &
-                               icellt(iblk),                            & 
-                               indxti   (:,iblk)  , indxtj(:,iblk),     & 
-                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-                               dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
-                               dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
-                               cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
-                               cxm      (:,:,iblk), cym   (:,:,iblk),   & 
-                               tinyarea (:,:,iblk),                     & 
-                               strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
-                               stPrtmp  (:,:,:) )                      
-            
-            call calc_vrel_Cb (nx_block           , ny_block,           &
-                               icellu       (iblk), Cdn_ocn (:,:,iblk), & 
-                               indxui     (:,iblk), indxuj    (:,iblk), &
-                               aiu      (:,:,iblk), Tbu     (:,:,iblk), &
-                               uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-                               vrel     (:,:,iblk), Cb      (:,:,iblk))
-
-      !     prepare b vector (RHS)                                                
-            call calc_bvec (nx_block           , ny_block,           &
-                            icellu       (iblk),                     & 
-                            indxui     (:,iblk), indxuj    (:,iblk), &
-                            stPrtmp  (:,:,:)   , Cdn_ocn (:,:,iblk), &
-                            aiu      (:,:,iblk), uarear  (:,:,iblk), & 
-                            uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                            waterx   (:,:,iblk), watery  (:,:,iblk), & 
-                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-                            bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-                            bx       (:,:,iblk), by      (:,:,iblk), &
-                            vrel     (:,:,iblk))
-
-      !     prepare precond matrix
-           if (precond .gt. 1) then
-
-           call formDiag_step1  (nx_block           , ny_block,       & ! D term due to rheology
-                                 icellu       (iblk),                 &
-                                 indxui     (:,iblk), indxuj(:,iblk), &
-                                 dxt      (:,:,iblk), dyt (:,:,iblk), & 
-                                 dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
-                                 cxp      (:,:,iblk), cyp (:,:,iblk), & 
-                                 cxm      (:,:,iblk), cym (:,:,iblk), & 
-                                 zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
-                                
-           call formDiag_step2 (nx_block           , ny_block,           &
-                                icellu       (iblk),                     & 
-                                indxui     (:,iblk), indxuj    (:,iblk), &
-                                Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
-                                umassdti (:,:,iblk),                     & 
-                                uarear   (:,:,iblk), Cb      (:,:,iblk), & 
-                                Diagu    (:,:,iblk), Diagv   (:,:,iblk))         
-                     
-          endif                     
-                     
-         enddo
-         !$OMP END PARALLEL DO                            
-
-         ! Compute nonlinear residual norm
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            call matvec (nx_block             , ny_block,            &
-                         icellu   (iblk)      , icellt   (iblk)    , & 
-                         indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                         indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-                         dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-                         dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-                         cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                         cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
-                         uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
-                         vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                         zetaD    (:,:,iblk,:),                      &
-                         umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-                         uarear   (:,:,iblk)  ,                      & 
-                         Au       (:,:,iblk)  , Av       (:,:,iblk))
-            call residual_vec (nx_block           , ny_block,           &
-                               icellu       (iblk),                     & 
-                               indxui     (:,iblk), indxuj    (:,iblk), &
-                               bx       (:,:,iblk), by      (:,:,iblk), &
-                               Au       (:,:,iblk), Av      (:,:,iblk), &
-                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
-                               L2norm(iblk))
-         enddo
-         !$OMP END PARALLEL DO
-         nlres_norm = sqrt(sum(L2norm))
-         if (monitor_nonlin) then
-            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-                                              " nonlin_res_L2norm= ", nlres_norm
-         endif
-         ! Compute relative tolerance at first iteration
-         if (kOL == 1) then
-            tol = gammaNL*nlres_norm
-         endif
-         ! Check for nonlinear convergence
-         if (nlres_norm < tol) then
-            exit
-         endif
-
-      !-----------------------------------------------------------------------
-      !     prep F G M R E S 
-      !-----------------------------------------------------------------------                             
-         
-      icode  = 0
-      !      its    = 0 
-         
-         ! form b vector from matrices (nblocks matrices)      
-         call arrays_to_vec (nx_block, ny_block, nblocks,    &
-                             max_blocks, icellu (:), ntot,   & 
-                             indxui      (:,:), indxuj(:,:), &
-                             bx        (:,:,:), by  (:,:,:), &
-                             bvec(:))
-         ! form sol vector for fgmres (sol is iniguess at the beginning)        
-         call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,   &  
-                             indxui    (:,:), indxuj(:,:),     &
-                             uprev_k (:,:,:), vprev_k (:,:,:), &
-                             sol(:))
-                             
-         ! form matrix diagonal as a vector from Diagu and Diagv arrays      
-         call arrays_to_vec (nx_block, ny_block, nblocks,    &
-                             max_blocks, icellu (:), ntot,   & 
-                             indxui      (:,:), indxuj(:,:), &
-                             Diagu     (:,:,:), Diagv(:,:,:),&
-                             diagvec(:))                             
-
-      !-----------------------------------------------------------------------
-      !     F G M R E S   L O O P
-      !-----------------------------------------------------------------------
-      1    continue
-      !-----------------------------------------------------------------------
-                           
-      call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-                   gamma, maxits_fgmres, monitor_fgmres,   &
-                   icode,fgmres_its, res_norm)
-                   
-      if (icode == 1) then
-
-         if (precond .eq. 1) then
-
-           wk22(:)=wk11(:) ! precond=identity
-           
-         elseif (precond .eq. 2) then ! use diagonal of A for precond step
-          
-           call precond_diag (ntot,            & 
-                              diagvec (:),     &
-                              wk11 (:), wk22 (:) )
-                              
-         elseif (precond .eq. 3) then
-         
-          call pgmres (nx_block,    ny_block,    nblocks       , &
-                       max_blocks         , icellu   (:)       , & 
-                       indxui   (:,:)     , indxuj   (:,:)     , &
-                       icellt   (:)                            , & 
-                       indxti   (:,:)     , indxtj   (:,:)     , &
-                       dxt      (:,:,:)   , dyt      (:,:,:)   , & 
-                       dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
-                       cxp      (:,:,:)   , cyp      (:,:,:)   , & 
-                       cxm      (:,:,:)   , cym      (:,:,:)   , & 
-                       vrel     (:,:,:)   , Cb       (:,:,:)   , &  
-                       zetaD    (:,:,:,:) ,                      &
-                       umassdti (:,:,:)   , fm       (:,:,:)   , & 
-                       uarear   (:,:,:)   , diagvec(:)         , &
-                       wk22     (:)       , wk11(:)            , &
-                       ntot               , im_pgmres          , &
-                       epsprecond         , maxits_pgmres      , &
-                       monitor_pgmres     , ierr )         
-         endif ! precond
-         
-         goto 1
-
-      elseif (icode >= 2) then
-
-         call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     & 
-                             indxui    (:,:), indxuj(:,:),     &
-                             wk11 (:),                         &
-                             uvel (:,:,:), vvel (:,:,:))    
-                             
-         ! JFL halo update could be in subroutine...                    
-         !$OMP PARALLEL DO PRIVATE(iblk) 
-         do iblk = 1, nblocks                             
-            fld2(:,:,1,iblk) = uvel(:,:,iblk)
-            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
-         enddo
-         !$OMP END PARALLEL DO                           
-
-         call ice_HaloUpdate (fld2,               halo_info, & 
-                              field_loc_NEcorner, field_type_vector)
-
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            uvel(:,:,iblk) = fld2(:,:,1,iblk)
-            vvel(:,:,iblk) = fld2(:,:,2,iblk)
-         enddo
-         !$OMP END PARALLEL DO                             
-
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks                                  
-         
-          call matvec (nx_block             , ny_block,            &
-                       icellu   (iblk)      , icellt   (iblk)    , & 
-                       indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                       indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-                       dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-                       dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-                       cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                       cxm      (:,:,iblk)  , cym      (:,:,iblk), &
-                       uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
-                       vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                       zetaD    (:,:,iblk,:),                      &
-                       umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-                       uarear   (:,:,iblk)  ,                      & 
-                       Au       (:,:,iblk)  , Av       (:,:,iblk))                         
-                         
-         enddo
-         !$OMP END PARALLEL DO 
-       
-         ! form wk2 from Au and Av arrays        
-         call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     & 
-                             indxui    (:,:), indxuj(:,:),     &
-                             Au      (:,:,:), Av    (:,:,:),   &
-                             wk22(:))    
-
-            goto 1
-
-      endif ! icode
-
-      !-----------------------------------------------------------------------
-      !     Put vector sol in uvel and vvel arrays
-      !-----------------------------------------------------------------------
-
-         call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     & 
-                             indxui    (:,:), indxuj(:,:),     &
-                             sol (:),                          &
-                             uvel (:,:,:), vvel (:,:,:))    
-
-         !$OMP PARALLEL DO PRIVATE(iblk)
-      !         do iblk = 1, nblocks
-      !              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
-      !              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
-      !         enddo
-         !$OMP END PARALLEL DO  
-                            
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks                             
-                            
-            ! load velocity into array for boundary updates
-            fld2(:,:,1,iblk) = uvel(:,:,iblk)
-            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
-
-         enddo
-         !$OMP END PARALLEL DO                           
-
-         call ice_timer_start(timer_bound)
-         if (maskhalo_dyn) then
-            call ice_HaloUpdate (fld2,               halo_info_mask, &
-                                 field_loc_NEcorner, field_type_vector)
-         else
-            call ice_HaloUpdate (fld2,               halo_info, &
-                                 field_loc_NEcorner, field_type_vector)
-         endif
-         call ice_timer_stop(timer_bound)
-
-         ! unload
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            uvel(:,:,iblk) = fld2(:,:,1,iblk)
-            vvel(:,:,iblk) = fld2(:,:,2,iblk)
-         enddo
-         !$OMP END PARALLEL DO
-         
-         ! Compute fixed point residual norm
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
-            fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
-            call calc_L2norm_squared (nx_block        , ny_block,         &
-                                      icellu    (iblk),                   & 
-                                      indxui  (:,iblk), indxuj  (:,iblk), &
-                                      fpresx(:,:,iblk), fpresy(:,:,iblk), &
-                                      L2norm    (iblk))
-         enddo
-         !$OMP END PARALLEL DO
-         if (monitor_nonlin) then
-            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-                                              " fixed_point_res_L2norm= ", sqrt(sum(L2norm))
-         endif
-         
-      enddo                     ! outer loop
-
-      ! deallocate FGMRES work arrays
-      deallocate(wk11, wk22, vv, ww)
+      ! ! Allocate space for FGMRES work arrays
+      ! allocate(wk11(ntot), wk22(ntot))
+      ! allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
+      ! 
+      ! ! Start iterations
+      ! do kOL = 1,maxits_nonlin        ! outer loop 
+      ! 
+      ! !-----------------------------------------------------------------
+      ! ! Calc zetaD, Pr, Cb and vrel = f(uprev_k, vprev_k)
+      ! !-----------------------------------------------------------------
+      ! 
+      ! !$OMP PARALLEL DO PRIVATE(iblk)
+      !    do iblk = 1, nblocks
+      ! 
+      !       uprev_k(:,:,iblk) = uvel(:,:,iblk)
+      !       vprev_k(:,:,iblk) = vvel(:,:,iblk)
+      ! 
+      !       call calc_zeta_Pr (nx_block           , ny_block,           &
+      !                          icellt(iblk),                            & 
+      !                          indxti   (:,iblk)  , indxtj(:,iblk),     & 
+      !                          uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+      !                          dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
+      !                          dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
+      !                          cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
+      !                          cxm      (:,:,iblk), cym   (:,:,iblk),   & 
+      !                          tinyarea (:,:,iblk),                     & 
+      !                          strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
+      !                          stPrtmp  (:,:,:) )                      
+      ! 
+      !       call calc_vrel_Cb (nx_block           , ny_block,           &
+      !                          icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+      !                          indxui     (:,iblk), indxuj    (:,iblk), &
+      !                          aiu      (:,:,iblk), Tbu     (:,:,iblk), &
+      !                          uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+      !                          uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+      !                          vrel     (:,:,iblk), Cb      (:,:,iblk))
+      ! 
+      ! !     prepare b vector (RHS)                                                
+      !       call calc_bvec (nx_block           , ny_block,           &
+      !                       icellu       (iblk),                     & 
+      !                       indxui     (:,iblk), indxuj    (:,iblk), &
+      !                       stPrtmp  (:,:,:)   , Cdn_ocn (:,:,iblk), &
+      !                       aiu      (:,:,iblk), uarear  (:,:,iblk), & 
+      !                       uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+      !                       waterx   (:,:,iblk), watery  (:,:,iblk), & 
+      !                       uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+      !                       bxfix    (:,:,iblk), byfix   (:,:,iblk), &
+      !                       bx       (:,:,iblk), by      (:,:,iblk), &
+      !                       vrel     (:,:,iblk))
+      ! 
+      ! !     prepare precond matrix
+      !      if (precond .gt. 1) then
+      ! 
+      !      call formDiag_step1  (nx_block           , ny_block,       & ! D term due to rheology
+      !                            icellu       (iblk),                 &
+      !                            indxui     (:,iblk), indxuj(:,iblk), &
+      !                            dxt      (:,:,iblk), dyt (:,:,iblk), & 
+      !                            dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
+      !                            cxp      (:,:,iblk), cyp (:,:,iblk), & 
+      !                            cxm      (:,:,iblk), cym (:,:,iblk), & 
+      !                            zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
+      ! 
+      !      call formDiag_step2 (nx_block           , ny_block,           &
+      !                           icellu       (iblk),                     & 
+      !                           indxui     (:,iblk), indxuj    (:,iblk), &
+      !                           Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
+      !                           umassdti (:,:,iblk),                     & 
+      !                           uarear   (:,:,iblk), Cb      (:,:,iblk), & 
+      !                           Diagu    (:,:,iblk), Diagv   (:,:,iblk))         
+      ! 
+      !     endif                     
+      ! 
+      !    enddo
+      !    !$OMP END PARALLEL DO                            
+      ! 
+      !    ! Compute nonlinear residual norm
+      !    !$OMP PARALLEL DO PRIVATE(iblk)
+      !    do iblk = 1, nblocks
+      !       call matvec (nx_block             , ny_block,            &
+      !                    icellu   (iblk)      , icellt   (iblk)    , & 
+      !                    indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+      !                    indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+      !                    dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+      !                    dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+      !                    cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+      !                    cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
+      !                    uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
+      !                    vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+      !                    zetaD    (:,:,iblk,:),                      &
+      !                    umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+      !                    uarear   (:,:,iblk)  ,                      & 
+      !                    Au       (:,:,iblk)  , Av       (:,:,iblk))
+      !       call residual_vec (nx_block           , ny_block,           &
+      !                          icellu       (iblk),                     & 
+      !                          indxui     (:,iblk), indxuj    (:,iblk), &
+      !                          bx       (:,:,iblk), by      (:,:,iblk), &
+      !                          Au       (:,:,iblk), Av      (:,:,iblk), &
+      !                          Fx       (:,:,iblk), Fy      (:,:,iblk), &
+      !                          L2norm(iblk))
+      !    enddo
+      !    !$OMP END PARALLEL DO
+      !    nlres_norm = sqrt(sum(L2norm))
+      !    if (monitor_nonlin) then
+      !       write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
+      !                                         " nonlin_res_L2norm= ", nlres_norm
+      !    endif
+      !    ! Compute relative tolerance at first iteration
+      !    if (kOL == 1) then
+      !       tol = gammaNL*nlres_norm
+      !    endif
+      !    ! Check for nonlinear convergence
+      !    if (nlres_norm < tol) then
+      !       exit
+      !    endif
+      ! 
+      ! !-----------------------------------------------------------------------
+      ! !     prep F G M R E S 
+      ! !-----------------------------------------------------------------------                             
+      ! 
+      ! icode  = 0
+      ! !      its    = 0 
+      ! 
+      !    ! form b vector from matrices (nblocks matrices)      
+      !    call arrays_to_vec (nx_block, ny_block, nblocks,    &
+      !                        max_blocks, icellu (:), ntot,   & 
+      !                        indxui      (:,:), indxuj(:,:), &
+      !                        bx        (:,:,:), by  (:,:,:), &
+      !                        bvec(:))
+      !    ! form sol vector for fgmres (sol is iniguess at the beginning)        
+      !    call arrays_to_vec (nx_block, ny_block, nblocks,      &
+      !                        max_blocks, icellu (:), ntot,   &  
+      !                        indxui    (:,:), indxuj(:,:),     &
+      !                        uprev_k (:,:,:), vprev_k (:,:,:), &
+      !                        sol(:))
+      ! 
+      !    ! form matrix diagonal as a vector from Diagu and Diagv arrays      
+      !    call arrays_to_vec (nx_block, ny_block, nblocks,    &
+      !                        max_blocks, icellu (:), ntot,   & 
+      !                        indxui      (:,:), indxuj(:,:), &
+      !                        Diagu     (:,:,:), Diagv(:,:,:),&
+      !                        diagvec(:))                             
+      ! 
+      ! !-----------------------------------------------------------------------
+      ! !     F G M R E S   L O O P
+      ! !-----------------------------------------------------------------------
+      ! 1    continue
+      ! !-----------------------------------------------------------------------
+      ! 
+      ! call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
+      !              gamma, maxits_fgmres, monitor_fgmres,   &
+      !              icode,fgmres_its, res_norm)
+      ! 
+      ! if (icode == 1) then
+      ! 
+      !    if (precond .eq. 1) then
+      ! 
+      !      wk22(:)=wk11(:) ! precond=identity
+      ! 
+      !    elseif (precond .eq. 2) then ! use diagonal of A for precond step
+      ! 
+      !      call precond_diag (ntot,            & 
+      !                         diagvec (:),     &
+      !                         wk11 (:), wk22 (:) )
+      ! 
+      !    elseif (precond .eq. 3) then
+      ! 
+      !     call pgmres (nx_block,    ny_block,    nblocks       , &
+      !                  max_blocks         , icellu   (:)       , & 
+      !                  indxui   (:,:)     , indxuj   (:,:)     , &
+      !                  icellt   (:)                            , & 
+      !                  indxti   (:,:)     , indxtj   (:,:)     , &
+      !                  dxt      (:,:,:)   , dyt      (:,:,:)   , & 
+      !                  dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
+      !                  cxp      (:,:,:)   , cyp      (:,:,:)   , & 
+      !                  cxm      (:,:,:)   , cym      (:,:,:)   , & 
+      !                  vrel     (:,:,:)   , Cb       (:,:,:)   , &  
+      !                  zetaD    (:,:,:,:) ,                      &
+      !                  umassdti (:,:,:)   , fm       (:,:,:)   , & 
+      !                  uarear   (:,:,:)   , diagvec(:)         , &
+      !                  wk22     (:)       , wk11(:)            , &
+      !                  ntot               , im_pgmres          , &
+      !                  epsprecond         , maxits_pgmres      , &
+      !                  monitor_pgmres     , ierr )         
+      !    endif ! precond
+      ! 
+      !    goto 1
+      ! 
+      ! elseif (icode >= 2) then
+      ! 
+      !    call vec_to_arrays (nx_block, ny_block, nblocks,      &
+      !                        max_blocks, icellu (:), ntot,     & 
+      !                        indxui    (:,:), indxuj(:,:),     &
+      !                        wk11 (:),                         &
+      !                        uvel (:,:,:), vvel (:,:,:))    
+      ! 
+      !    ! JFL halo update could be in subroutine...                    
+      !    !$OMP PARALLEL DO PRIVATE(iblk) 
+      !    do iblk = 1, nblocks                             
+      !       fld2(:,:,1,iblk) = uvel(:,:,iblk)
+      !       fld2(:,:,2,iblk) = vvel(:,:,iblk)            
+      !    enddo
+      !    !$OMP END PARALLEL DO                           
+      ! 
+      !    call ice_HaloUpdate (fld2,               halo_info, & 
+      !                         field_loc_NEcorner, field_type_vector)
+      ! 
+      !    !$OMP PARALLEL DO PRIVATE(iblk)
+      !    do iblk = 1, nblocks
+      !       uvel(:,:,iblk) = fld2(:,:,1,iblk)
+      !       vvel(:,:,iblk) = fld2(:,:,2,iblk)
+      !    enddo
+      !    !$OMP END PARALLEL DO                             
+      ! 
+      !    !$OMP PARALLEL DO PRIVATE(iblk)
+      !    do iblk = 1, nblocks                                  
+      ! 
+      !     call matvec (nx_block             , ny_block,            &
+      !                  icellu   (iblk)      , icellt   (iblk)    , & 
+      !                  indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+      !                  indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+      !                  dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+      !                  dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+      !                  cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+      !                  cxm      (:,:,iblk)  , cym      (:,:,iblk), &
+      !                  uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
+      !                  vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+      !                  zetaD    (:,:,iblk,:),                      &
+      !                  umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+      !                  uarear   (:,:,iblk)  ,                      & 
+      !                  Au       (:,:,iblk)  , Av       (:,:,iblk))                         
+      ! 
+      !    enddo
+      !    !$OMP END PARALLEL DO 
+      ! 
+      !    ! form wk2 from Au and Av arrays        
+      !    call arrays_to_vec (nx_block, ny_block, nblocks,      &
+      !                        max_blocks, icellu (:), ntot,     & 
+      !                        indxui    (:,:), indxuj(:,:),     &
+      !                        Au      (:,:,:), Av    (:,:,:),   &
+      !                        wk22(:))    
+      ! 
+      !       goto 1
+      ! 
+      ! endif ! icode
+      ! 
+      ! !-----------------------------------------------------------------------
+      ! !     Put vector sol in uvel and vvel arrays
+      ! !-----------------------------------------------------------------------
+      ! 
+      !    call vec_to_arrays (nx_block, ny_block, nblocks,      &
+      !                        max_blocks, icellu (:), ntot,     & 
+      !                        indxui    (:,:), indxuj(:,:),     &
+      !                        sol (:),                          &
+      !                        uvel (:,:,:), vvel (:,:,:))    
+      ! 
+      !    !$OMP PARALLEL DO PRIVATE(iblk)
+      ! !         do iblk = 1, nblocks
+      ! !              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
+      ! !              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
+      ! !         enddo
+      !    !$OMP END PARALLEL DO  
+      ! 
+      !    !$OMP PARALLEL DO PRIVATE(iblk)
+      !    do iblk = 1, nblocks                             
+      ! 
+      !       ! load velocity into array for boundary updates
+      !       fld2(:,:,1,iblk) = uvel(:,:,iblk)
+      !       fld2(:,:,2,iblk) = vvel(:,:,iblk)            
+      ! 
+      !    enddo
+      !    !$OMP END PARALLEL DO                           
+      ! 
+      !    call ice_timer_start(timer_bound)
+      !    if (maskhalo_dyn) then
+      !       call ice_HaloUpdate (fld2,               halo_info_mask, &
+      !                            field_loc_NEcorner, field_type_vector)
+      !    else
+      !       call ice_HaloUpdate (fld2,               halo_info, &
+      !                            field_loc_NEcorner, field_type_vector)
+      !    endif
+      !    call ice_timer_stop(timer_bound)
+      ! 
+      !    ! unload
+      !    !$OMP PARALLEL DO PRIVATE(iblk)
+      !    do iblk = 1, nblocks
+      !       uvel(:,:,iblk) = fld2(:,:,1,iblk)
+      !       vvel(:,:,iblk) = fld2(:,:,2,iblk)
+      !    enddo
+      !    !$OMP END PARALLEL DO
+      ! 
+      !    ! Compute fixed point residual norm
+      !    !$OMP PARALLEL DO PRIVATE(iblk)
+      !    do iblk = 1, nblocks
+      !       fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
+      !       fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
+      !       call calc_L2norm_squared (nx_block        , ny_block,         &
+      !                                 icellu    (iblk),                   & 
+      !                                 indxui  (:,iblk), indxuj  (:,iblk), &
+      !                                 fpresx(:,:,iblk), fpresy(:,:,iblk), &
+      !                                 L2norm    (iblk))
+      !    enddo
+      !    !$OMP END PARALLEL DO
+      !    if (monitor_nonlin) then
+      !       write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
+      !                                         " fixed_point_res_L2norm= ", sqrt(sum(L2norm))
+      !    endif
+      ! 
+      ! enddo                     ! outer loop
+      ! 
+      ! ! deallocate FGMRES work arrays
+      ! deallocate(wk11, wk22, vv, ww)
 
       end subroutine picard_solver
 
@@ -1059,7 +1073,7 @@ subroutine anderson_solver (icellt,   icellu,  &
       use ice_blocks, only: nx_block, ny_block
       use ice_boundary, only: ice_halo, ice_HaloUpdate
       use ice_constants, only: c1
-      use ice_domain, only: nblocks, halo_info, maskhalo_dyn
+      use ice_domain, only: halo_info, maskhalo_dyn
       use ice_domain_size, only: max_blocks
       use ice_flux, only:   uocn, vocn, fm, Tbu
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
@@ -1109,7 +1123,8 @@ subroutine anderson_solver (icellt,   icellu,  &
          it_nl      , & ! nonlinear loop iteration index
          res_num    , & ! current number of stored residuals
          j          , & ! iteration index for QR update
-         iblk           ! block index
+         iblk       , & ! block index
+         nbiter         ! number of FGMRES iterations performed
 
       integer (kind=int_kind), parameter :: &
          inc = 1        ! increment value for BLAS calls
@@ -1128,7 +1143,9 @@ subroutine anderson_solver (icellt,   icellu,  &
          Au       , & ! matvec, Fx = Au - bx
          Av       , & ! matvec, Fy = Av - by
          Fx       , & ! x residual vector, Fx = Au - bx
-         Fy           ! y residual vector, Fy = Av - by
+         Fy       , & ! y residual vector, Fy = Av - by
+         solx     , & ! solution of FGMRES (x components)
+         soly         ! solution of FGMRES (y components)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
          zetaD        ! zetaD = 2zeta (viscous coeff)
@@ -1166,7 +1183,8 @@ subroutine anderson_solver (icellt,   icellu,  &
          fpres_norm  , & ! norm of current fixed point residual : f(x) = g(x) - x
          prog_norm   , & ! norm of difference between current and previous solution
          nlres_norm  , & ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
-         ddot, dnrm2     ! BLAS functions
+         ddot, dnrm2 , & ! BLAS functions
+         conv            ! needed for FGMRES !phb keep ?
 
       character(len=*), parameter :: subname = '(anderson_solver)'
       
@@ -1271,18 +1289,10 @@ subroutine anderson_solver (icellt,   icellu,  &
             exit
          endif
          
-         ! Form b vector from matrices (nblocks matrices)
-         call arrays_to_vec (nx_block, ny_block, nblocks,    &
-                             max_blocks, icellu (:), ntot,   &
-                             indxui      (:,:), indxuj(:,:), &
-                             bx        (:,:,:), by  (:,:,:), &
-                             bvec(:))
-         ! Form sol vector for fgmres (sol is iniguess at the beginning)
-         call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     &
-                             indxui    (:,:), indxuj(:,:),     &
-                             uprev_k (:,:,:), vprev_k (:,:,:), &
-                             sol(:))
+         ! Put initial guess for FGMRES in solx,soly
+         solx = uprev_k
+         soly = vprev_k
+         
          ! Compute fixed point map g(x)
          if (fpfunc_andacc == 1) then
             ! g_1(x) = FGMRES(A(x), b(x))
@@ -1308,25 +1318,35 @@ subroutine anderson_solver (icellt,   icellu,  &
                                        Diagu    (:,:,iblk), Diagv   (:,:,iblk))
                enddo
                !$OMP END PARALLEL DO
-               ! Form matrix diagonal as a vector from Diagu and Diagv arrays      
-               call arrays_to_vec (nx_block, ny_block, nblocks,    &
-                                   max_blocks, icellu (:), ntot,   & 
-                                   indxui      (:,:), indxuj(:,:), &
-                                   Diagu     (:,:,:), Diagv(:,:,:),&
-                                   diagvec(:))
             endif
             
+            ! FGMRES linear solver
+            call fgmres (zetaD,               &
+                         Cb,         vrel,    &
+                         umassdti,            &
+                         solx,       soly,    &
+                         bx,         by,      &
+                         Diagu,      Diagv,   &
+                         gamma, im_fgmres, &
+                         maxits_fgmres, nbiter, conv)
+            ! Put FGMRES solution solx,soly in fpfunc vector
+            call arrays_to_vec (nx_block, ny_block, nblocks,      &
+                                max_blocks, icellu (:), ntot,     &
+                                indxui    (:,:), indxuj(:,:),     &
+                                solx    (:,:,:), soly  (:,:,:),   &
+                                fpfunc(:))
             ! FGMRES linear solver (solution is in fpfunc)
-            fpfunc = sol
-            call fgmres_solver (ntot,   bvec,     &
-                                fpfunc, diagvec,  &
-                                icellt, icellu,   &
-                                indxti, indxtj,   &
-                                indxui, indxuj,   &
-                                zetaD,            &
-                                Cb,     vrel,     &
-                                aiu,    umassdti, & 
-                                fld2)
+            ! fpfunc = sol
+            ! call fgmres_solver (ntot,   bvec,     &
+            !                     fpfunc, diagvec,  &
+            !                     icellt, icellu,   &
+            !                     indxti, indxtj,   &
+            !                     indxui, indxuj,   &
+            !                     zetaD,            &
+            !                     Cb,     vrel,     &
+            !                     aiu,    umassdti, &
+            !                     fld2)
+
          elseif (fpfunc_andacc == 2) then
             ! g_2(x) = x - A(x)x + b(x) = x - F(x)
          endif
@@ -1508,7 +1528,7 @@ subroutine fgmres_solver (ntot,   bvec,     &
 
       use ice_blocks, only: nx_block, ny_block
       use ice_boundary, only: ice_HaloUpdate
-      use ice_domain, only: nblocks, halo_info
+      use ice_domain, only: halo_info
       use ice_domain_size, only: max_blocks
       use ice_flux, only:  fm
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
@@ -1571,121 +1591,121 @@ subroutine fgmres_solver (ntot,   bvec,     &
       
       character(len=*), parameter :: subname = '(fgmres_solver)'
 
-      ! Allocate space for FGMRES work arrays
-      allocate(wk11(ntot), wk22(ntot))
-      allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
-      
-      !-----------------------------------------------------------------------
-      !     prep F G M R E S 
-      !-----------------------------------------------------------------------
-         
-      icode  = 0
-      
-      !-----------------------------------------------------------------------
-      !     F G M R E S   L O O P
-      !-----------------------------------------------------------------------
-      1    continue
-      !-----------------------------------------------------------------------
-
-                           
-      call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-                   gamma, maxits_fgmres,monitor_fgmres, &
-                   icode, fgmres_its, res_norm)
-
-      if (icode == 1) then
-
-         if (precond .eq. 1) then
-
-           wk22(:)=wk11(:) ! precond=identity
-           
-         elseif (precond .eq. 2) then ! use diagonal of A for precond step
-          
-           call precond_diag (ntot,            & 
-                              diagvec (:),     &
-                              wk11 (:), wk22 (:) )
-                              
-         elseif (precond .eq. 3) then
-         
-          call pgmres (nx_block,    ny_block,    nblocks       , &
-                       max_blocks         , icellu   (:)       , & 
-                       indxui   (:,:)     , indxuj   (:,:)     , &
-                       icellt   (:)       ,                      & 
-                       indxti   (:,:)     , indxtj   (:,:)     , &
-                       dxt      (:,:,:)   , dyt      (:,:,:)   , & 
-                       dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
-                       cxp      (:,:,:)   , cyp      (:,:,:)   , & 
-                       cxm      (:,:,:)   , cym      (:,:,:)   , &
-                       vrel     (:,:,:)   , Cb       (:,:,:)   , &  
-                       zetaD    (:,:,:,:) ,                      &
-                       umassdti (:,:,:)   , fm       (:,:,:)   , & 
-                       uarear   (:,:,:)   , diagvec(:)         , &
-                       wk22     (:)       , wk11(:)            , &
-                       ntot               , im_pgmres          , &
-                       epsprecond         , maxits_pgmres      , &
-                       monitor_pgmres     , ierr )         
-         endif ! precond
-         
-         goto 1
-
-      elseif (icode >= 2) then
-
-         call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     & 
-                             indxui    (:,:), indxuj(:,:),     &
-                             wk11 (:),                         &
-                             uvel (:,:,:), vvel (:,:,:))    
-                             
-         ! JFL halo update could be in subroutine...                    
-         !$OMP PARALLEL DO PRIVATE(iblk) 
-         do iblk = 1, nblocks                             
-            fld2(:,:,1,iblk) = uvel(:,:,iblk)
-            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
-         enddo
-         !$OMP END PARALLEL DO                           
-
-         call ice_HaloUpdate (fld2,               halo_info, & 
-                              field_loc_NEcorner, field_type_vector)
-
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            uvel(:,:,iblk) = fld2(:,:,1,iblk)
-            vvel(:,:,iblk) = fld2(:,:,2,iblk)
-         enddo
-         !$OMP END PARALLEL DO                             
-
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks                                  
-         
-          call matvec (nx_block             , ny_block,            &
-                       icellu   (iblk)      , icellt   (iblk)    , &
-                       indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                       indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-                       dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-                       dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-                       cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                       cxm      (:,:,iblk)  , cym      (:,:,iblk), &
-                       uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
-                       vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                       zetaD    (:,:,iblk,:),                      &
-                       umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-                       uarear   (:,:,iblk)  ,                      & 
-                       Au       (:,:,iblk)  , Av       (:,:,iblk))                         
-                         
-         enddo
-         !$OMP END PARALLEL DO 
-       
-         ! form wk2 from Au and Av arrays        
-         call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     & 
-                             indxui    (:,:), indxuj(:,:),     &
-                             Au      (:,:,:), Av    (:,:,:),   &
-                             wk22(:))    
-
-            goto 1
-
-      endif ! icode
-      
-      deallocate(wk11, wk22, vv, ww)
+      ! ! Allocate space for FGMRES work arrays
+      ! allocate(wk11(ntot), wk22(ntot))
+      ! allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
+      ! 
+      ! !-----------------------------------------------------------------------
+      ! !     prep F G M R E S 
+      ! !-----------------------------------------------------------------------
+      ! 
+      ! icode  = 0
+      ! 
+      ! !-----------------------------------------------------------------------
+      ! !     F G M R E S   L O O P
+      ! !-----------------------------------------------------------------------
+      ! 1    continue
+      ! !-----------------------------------------------------------------------
+      ! 
+      ! 
+      ! call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
+      !              gamma, maxits_fgmres,monitor_fgmres, &
+      !              icode, fgmres_its, res_norm)
+      ! 
+      ! if (icode == 1) then
+      ! 
+      !    if (precond .eq. 1) then
+      ! 
+      !      wk22(:)=wk11(:) ! precond=identity
+      ! 
+      !    elseif (precond .eq. 2) then ! use diagonal of A for precond step
+      ! 
+      !      call precond_diag (ntot,            & 
+      !                         diagvec (:),     &
+      !                         wk11 (:), wk22 (:) )
+      ! 
+      !    elseif (precond .eq. 3) then
+      ! 
+      !     call pgmres (nx_block,    ny_block,    nblocks       , &
+      !                  max_blocks         , icellu   (:)       , & 
+      !                  indxui   (:,:)     , indxuj   (:,:)     , &
+      !                  icellt   (:)       ,                      & 
+      !                  indxti   (:,:)     , indxtj   (:,:)     , &
+      !                  dxt      (:,:,:)   , dyt      (:,:,:)   , & 
+      !                  dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
+      !                  cxp      (:,:,:)   , cyp      (:,:,:)   , & 
+      !                  cxm      (:,:,:)   , cym      (:,:,:)   , &
+      !                  vrel     (:,:,:)   , Cb       (:,:,:)   , &  
+      !                  zetaD    (:,:,:,:) ,                      &
+      !                  umassdti (:,:,:)   , fm       (:,:,:)   , & 
+      !                  uarear   (:,:,:)   , diagvec(:)         , &
+      !                  wk22     (:)       , wk11(:)            , &
+      !                  ntot               , im_pgmres          , &
+      !                  epsprecond         , maxits_pgmres      , &
+      !                  monitor_pgmres     , ierr )         
+      !    endif ! precond
+      ! 
+      !    goto 1
+      ! 
+      ! elseif (icode >= 2) then
+      ! 
+      !    call vec_to_arrays (nx_block, ny_block, nblocks,      &
+      !                        max_blocks, icellu (:), ntot,     & 
+      !                        indxui    (:,:), indxuj(:,:),     &
+      !                        wk11 (:),                         &
+      !                        uvel (:,:,:), vvel (:,:,:))    
+      ! 
+      !    ! JFL halo update could be in subroutine...                    
+      !    !$OMP PARALLEL DO PRIVATE(iblk) 
+      !    do iblk = 1, nblocks                             
+      !       fld2(:,:,1,iblk) = uvel(:,:,iblk)
+      !       fld2(:,:,2,iblk) = vvel(:,:,iblk)            
+      !    enddo
+      !    !$OMP END PARALLEL DO                           
+      ! 
+      !    call ice_HaloUpdate (fld2,               halo_info, & 
+      !                         field_loc_NEcorner, field_type_vector)
+      ! 
+      !    !$OMP PARALLEL DO PRIVATE(iblk)
+      !    do iblk = 1, nblocks
+      !       uvel(:,:,iblk) = fld2(:,:,1,iblk)
+      !       vvel(:,:,iblk) = fld2(:,:,2,iblk)
+      !    enddo
+      !    !$OMP END PARALLEL DO                             
+      ! 
+      !    !$OMP PARALLEL DO PRIVATE(iblk)
+      !    do iblk = 1, nblocks                                  
+      ! 
+      !     call matvec (nx_block             , ny_block,            &
+      !                  icellu   (iblk)      , icellt   (iblk)    , &
+      !                  indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+      !                  indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+      !                  dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+      !                  dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+      !                  cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+      !                  cxm      (:,:,iblk)  , cym      (:,:,iblk), &
+      !                  uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
+      !                  vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+      !                  zetaD    (:,:,iblk,:),                      &
+      !                  umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+      !                  uarear   (:,:,iblk)  ,                      & 
+      !                  Au       (:,:,iblk)  , Av       (:,:,iblk))                         
+      ! 
+      !    enddo
+      !    !$OMP END PARALLEL DO 
+      ! 
+      !    ! form wk2 from Au and Av arrays        
+      !    call arrays_to_vec (nx_block, ny_block, nblocks,      &
+      !                        max_blocks, icellu (:), ntot,     & 
+      !                        indxui    (:,:), indxuj(:,:),     &
+      !                        Au      (:,:,:), Av    (:,:,:),   &
+      !                        wk22(:))    
+      ! 
+      !       goto 1
+      ! 
+      ! endif ! icode
+      ! 
+      ! deallocate(wk11, wk22, vv, ww)
          
       end subroutine fgmres_solver
 
@@ -3816,6 +3836,962 @@ subroutine qr_delete(Q, R)
       
       end subroutine qr_delete
 
+!=======================================================================
+
+! FGMRES: Flexible generalized minimum residual method (with restarts). 
+! Solves A x = b using GMRES with a varying (right) preconditioner
+!
+! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
+
+      subroutine fgmres (zetaD,               &
+                         Cb,         vrel,    &
+                         umassdti,            &
+                         solx,       soly,    &
+                         bx,         by,      &
+                         diagx,      diagy,   &
+                         tolerance, maxinner, &
+                         maxouter, nbiter, conv)
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
+         zetaD   ! zetaD = 2*zeta (viscous coefficient)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         vrel  , & ! coefficient for tauw 
+         Cb    , & ! seabed stress coefficient
+         umassdti  ! mass of U-cell/dte (kg/m^2 s)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
+         solx     , & ! Initial guess on input, approximate solution on output (x components)
+         soly         ! Initial guess on input, approximate solution on output (y components)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         bx       , & ! Right hand side of the linear system (x components)
+         by           ! Right hand side of the linear system (y components)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         diagx    , & ! Diagonal of the system matrix (x components)
+         diagy        ! Diagonal of the system matrix (y components)
+
+      real (kind=dbl_kind), intent(in) :: &
+         tolerance   ! Tolerance to achieve. The algorithm terminates when the relative
+                     ! residual is below tolerance
+
+      integer (kind=int_kind), intent(in) :: &
+         maxinner    ! Restart the method every maxinner inner iterations
+
+      integer (kind=int_kind), intent(in) :: &
+         maxouter    ! Maximum number of outer iterations
+                     ! Iteration will stop after maxinner*maxouter steps
+                     ! even if the specified tolerance has not been achieved
+
+      integer (kind=int_kind), intent(out) :: &
+         nbiter      ! Total number of iteration performed
+
+      real (kind=dbl_kind), intent(out) :: &
+         conv        ! !phb DESCRIBE IF WE KEEP
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         iblk    , & ! block index
+         ij      , & ! index for indx[t|u][i|j]
+         i, j        ! grid indices
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         workspace_x , & ! work vector (x components)
+         workspace_y , & ! work vector (y components)
+         Fx          , & ! residual vector (x components), Fx = Au - bx (N/m^2)
+         Fy              ! residual vector (y components), Fy = Av - by (N/m^2)
+
+      real (kind=dbl_kind), dimension (max_blocks) :: &
+         norm_squared   ! array to accumulate squared norm of grid function over blocks
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1) :: &
+         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
+         arnoldi_basis_y     ! arnoldi basis (y components)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner) :: &
+         wwx             , & ! !phb FIND BETTER NAME (x components)
+         wwy                 ! !phb FIND BETTER NAME (y components)
+
+      real (kind=dbl_kind) :: &
+         norm_residual   , & ! current L^2 norm of residual vector
+         inverse_norm    , & ! inverse of the norm of a vector
+         nu, t               ! local temporary values
+
+      integer (kind=int_kind) :: &
+         initer          , & ! inner (Arnoldi) loop counter
+         outiter         , & ! outer (restarts) loop counter
+         nextit          , & ! nextit == initer+1
+         it, k, ii, jj       ! reusable loop counters
+
+      real (kind=dbl_kind), dimension(maxinner+1) :: &
+         rot_cos         , & ! cosine elements of Givens rotations 
+         rot_sin         , & ! sine elements of Givens rotations
+         rhs_hess            ! right hand side vector of the Hessenberg (least squares) system
+
+      real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: &
+         hessenberg        ! system matrix of the Hessenberg (least squares) system
+
+      integer (kind=int_kind) :: &
+         precond_type ! type of preconditioner
+
+      real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
+
+      real (kind=dbl_kind) :: &
+         local_dot         ! local value to accumulate dot product computations
+         
+      real (kind=dbl_kind), dimension(maxinner) :: &
+         dotprod_local     ! local array to accumulate several dot product computations
+
+      character(len=*), parameter :: subname = '(fgmres)'
+
+      ! Here we go !
+
+      outiter = 0
+      nbiter = 0
+      
+      conv = c1
+      
+      precond_type = precond
+      
+      ! Residual of the initial iterate
+      
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         call matvec (nx_block               , ny_block,              &
+                      icellu         (iblk)  , icellt         (iblk), &
+                      indxui       (:,iblk)  , indxuj       (:,iblk), &
+                      indxti       (:,iblk)  , indxtj       (:,iblk), &
+                      dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
+                      dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
+                      cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
+                      cxm        (:,:,iblk)  , cym        (:,:,iblk), &
+                      solx       (:,:,iblk)  , soly       (:,:,iblk), &
+                      vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
+                      zetaD      (:,:,iblk,:),                        &
+                      umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
+                      uarear     (:,:,iblk)  ,                        &
+                      workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
+         call residual_vec (nx_block             , ny_block             , &
+                            icellu         (iblk),                        & 
+                            indxui       (:,iblk), indxuj    (:,iblk)   , &
+                            bx         (:,:,iblk), by      (:,:,iblk)   , &
+                            workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
+                            arnoldi_basis_x (:,:,iblk, 1),                &
+                            arnoldi_basis_y (:,:,iblk, 1))
+      enddo
+      !$OMP END PARALLEL DO
+      
+      ! Start outer (restarts) loop
+      do
+         ! Compute norm of initial residual
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            call calc_L2norm_squared(nx_block,        ny_block,        &
+                                     icellu   (iblk),                  &
+                                     indxui (:,iblk), indxuj(:, iblk), &
+                                     arnoldi_basis_x(:,:,iblk, 1),     &
+                                     arnoldi_basis_y(:,:,iblk, 1),     &
+                                     norm_squared(iblk))
+
+         enddo
+         !$OMP END PARALLEL DO 
+         norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
+         
+         ! Current guess is a good enough solution
+         if (norm_residual < tolerance) then
+            return
+         end if
+         
+         ! Normalize the first Arnoldi vector
+         inverse_norm = c1 / norm_residual
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            do ij =1, icellu(iblk)
+               i = indxui(ij, iblk)
+               j = indxuj(ij, iblk)
+
+               arnoldi_basis_x(i, j, iblk, 1) = arnoldi_basis_x(i, j, iblk, 1) * inverse_norm
+               arnoldi_basis_y(i, j, iblk, 1) = arnoldi_basis_y(i, j, iblk, 1) * inverse_norm
+            enddo ! ij
+         enddo
+         !$OMP END PARALLEL DO
+         
+         if (outiter == 0) then
+            relative_tolerance = tolerance * norm_residual
+            r0 = norm_residual
+         end if
+         
+         conv = norm_residual / r0
+         
+         ! Initialize 1-st term of RHS of Hessenberg system
+         rhs_hess(1)  = norm_residual
+         rhs_hess(2:) = c0
+         
+         initer = 0
+      
+         ! Start of inner (Arnoldi) loop
+         do
+            
+            nbiter = nbiter + 1
+            initer = initer + 1
+            nextit = initer + 1
+            
+            ! precondition the current Arnoldi vector
+            call precondition(zetaD,                &
+                              Cb,         vrel,     &
+                              umassdti,             &
+                              arnoldi_basis_x(:,:,:,initer), &
+                              arnoldi_basis_y(:,:,:,initer), &
+                              workspace_x , workspace_y    , &
+                              precond_type, diagx, diagy)
+            ! !phb DESCRIBE ww
+            wwx(:,:,:,initer) = workspace_x
+            wwy(:,:,:,initer) = workspace_y
+            
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               call matvec (nx_block               , ny_block,              &
+                            icellu         (iblk)  , icellt         (iblk), &
+                            indxui       (:,iblk)  , indxuj       (:,iblk), &
+                            indxti       (:,iblk)  , indxtj       (:,iblk), &
+                            dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
+                            dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
+                            cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
+                            cxm        (:,:,iblk)  , cym        (:,:,iblk), &
+                            workspace_x(:,:,iblk)  , workspace_y(:,:,iblk), &
+                            vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
+                            zetaD      (:,:,iblk,:),                        &
+                            umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
+                            uarear     (:,:,iblk)  ,                        &
+                            arnoldi_basis_x(:,:,iblk,nextit),               &
+                            arnoldi_basis_y(:,:,iblk,nextit))
+            enddo
+            !$OMP END PARALLEL DO
+            
+            ! Classical Gram-Schmidt orthogonalisation process
+            ! First loop of Gram-Schmidt (compute coefficients)
+            dotprod_local = c0
+            do it=1,initer
+               local_dot = c0
+               
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     local_dot = local_dot + (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
+                                             (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO
+               
+                dotprod_local(it) = local_dot
+            end do
+            
+            hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
+            
+            ! Second loop of Gram-Schmidt (orthonormalize)
+            do it = 1, initer
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
+                                                           - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
+                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
+                                                           - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO 
+            end do
+            
+            ! Compute norm of new Arnoldi vector and update Hessenberg matrix
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               call calc_L2norm_squared(nx_block      ,  ny_block        , &
+                                        icellu   (iblk),                   &
+                                        indxui (:,iblk), indxuj(:, iblk) , &
+                                        arnoldi_basis_x(:,:,iblk, nextit), &
+                                        arnoldi_basis_y(:,:,iblk, nextit), &
+                                        norm_squared(iblk))
+            enddo
+            !$OMP END PARALLEL DO 
+            hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
+            
+            ! Watch out for happy breakdown
+            if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
+               ! Normalize next Arnoldi vector
+               inverse_norm = c1 / hessenberg(nextit,initer)
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit)*inverse_norm
+                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO 
+            end if
+            
+            ! Apply previous Givens rotation to the last column of the Hessenberg matrix
+            if (initer > 1) then
+               do k = 2, initer
+                  t = hessenberg(k-1, initer)
+                  hessenberg(k-1, initer) =  rot_cos(k-1)*t + rot_sin(k-1)*hessenberg(k, initer)
+                  hessenberg(k,   initer) = -rot_sin(k-1)*t + rot_cos(k-1)*hessenberg(k, initer)
+               end do
+            end if
+            
+            ! Compute new Givens rotation
+            nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
+            if (.not. almost_zero(nu)) then
+               rot_cos(initer) = hessenberg(initer,initer) / nu
+               rot_sin(initer) = hessenberg(nextit,initer) / nu
+               
+               rhs_hess(nextit) = -rot_sin(initer) * rhs_hess(initer)
+               rhs_hess(initer) =  rot_cos(initer) * rhs_hess(initer)
+               
+               hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
+            end if
+            
+            ! Check for convergence
+            norm_residual = abs(rhs_hess(nextit))
+            conv = norm_residual / r0
+             if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
+               exit
+            endif
+            
+         end do ! end of inner (Arnoldi) loop
+      
+         ! At this point either the maximum number of inner iterations
+         ! was reached or the absolute residual is below the scaled tolerance.
+
+         ! Solve the (now upper triangular) system "hessenberg * sol_hess = rhs_hess"
+         ! (sol_hess is stored in rhs_hess)
+         rhs_hess(initer) = rhs_hess(initer) / hessenberg(initer,initer)
+         do ii = 2, initer
+            k  = initer - ii + 1
+            t  = rhs_hess(k)
+            do j = k + 1, initer
+               t = t - hessenberg(k,j) * rhs_hess(j)
+            end do
+            rhs_hess(k) = t / hessenberg(k,k)
+         end do
+         
+         ! Form linear combination to get new solution iterate
+         do it = 1, initer
+            t = rhs_hess(it)
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+
+                  solx(i, j, iblk) = solx(i, j, iblk) + t * wwx(i, j, iblk, it)
+                  soly(i, j, iblk) = soly(i, j, iblk) + t * wwy(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+         end do
+         
+         ! Increment outer loop counter and check for convergence
+         outiter = outiter + 1
+         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
+            return
+         end if
+
+         ! Solution is not convergent : compute residual vector and continue.
+         
+         ! The residual vector is computed here using (see Saad p. 177) :
+         ! \begin{equation}
+         !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
+         ! \end{equation}
+         ! where : 
+         ! $r$ is the residual
+         ! $V_{m+1}$ is a matrix whose columns are the Arnoldi vectors from 1 to nextit (m+1)
+         ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
+         ! $gamma_{m+1}$ is the last element of rhs_hess
+         ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
+         
+         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
+         ! store the result in rhs_hess
+         do it = 1, initer
+            jj = nextit - it + 1
+            rhs_hess(jj-1) = -rot_sin(jj-1) * rhs_hess(jj) ! + rot_cos(jj-1) * g(jj-1) (== 0)
+            rhs_hess(jj)   =  rot_cos(jj-1) * rhs_hess(jj) ! + rot_sin(jj-1) * g(jj-1) (== 0)
+         end do
+         
+         ! Compute the residual by multiplying V_{m+1} and rhs_hess
+         workspace_x = c0
+         workspace_y = c0
+         do it = 1, nextit
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+
+                  workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+            arnoldi_basis_x(:,:,:,1) = workspace_x
+            arnoldi_basis_y(:,:,:,1) = workspace_y
+         end do
+      end do ! end of outer (restarts) loop
+      
+      return
+      end subroutine fgmres
+
+!=======================================================================
+
+! PGMRES: Right-preconditioned generalized minimum residual method (with restarts). 
+! Solves A x = b using GMRES with a right preconditioner
+!
+! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
+
+      subroutine pgmres (zetaD,                &
+                         Cb,         vrel,     &
+                         umassdti,             &
+                         solx,       soly,     &
+                         bx,         by,       &
+                         diagx,      diagy,    &
+                         tolerance,  maxinner, &
+                         maxouter, nbiter, conv)
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
+         zetaD   ! zetaD = 2*zeta (viscous coefficient)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         vrel  , & ! coefficient for tauw 
+         Cb    , & ! seabed stress coefficient
+         umassdti  ! mass of U-cell/dte (kg/m^2 s)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
+         solx     , & ! Initial guess on input, approximate solution on output (x components)
+         soly         ! Initial guess on input, approximate solution on output (y components)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         bx       , & ! Right hand side of the linear system (x components)
+         by           ! Right hand side of the linear system (y components)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         diagx    , & ! Diagonal of the system matrix (x components)
+         diagy        ! Diagonal of the system matrix (y components)
+
+      real (kind=dbl_kind), intent(in) :: &
+         tolerance   ! Tolerance to achieve. The algorithm terminates when the relative
+                     ! residual is below tolerance
+
+      integer (kind=int_kind), intent(in) :: &
+         maxinner    ! Restart the method every maxinner inner iterations
+
+      integer (kind=int_kind), intent(in) :: &
+         maxouter    ! Maximum number of outer iterations
+                     ! Iteration will stop after maxinner*maxouter steps
+                     ! even if the specified tolerance has not been achieved
+
+      integer (kind=int_kind), intent(out) :: &
+         nbiter      ! Total number of iteration performed
+
+      real (kind=dbl_kind), intent(out) :: &
+         conv        ! !phb DESCRIBE IF WE KEEP
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         iblk    , & ! block index
+         ij      , & ! index for indx[t|u][i|j]
+         i, j        ! grid indices
+         
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
+         workspace_x , & ! work vector (x components)
+         workspace_y , & ! work vector (y components)
+         Fx          , & ! residual vector (x components), Fx = Au - bx (N/m^2)
+         Fy              ! residual vector (y components), Fy = Av - by (N/m^2)
+
+      real (kind=dbl_kind), dimension (max_blocks) :: &
+         norm_squared   ! array to accumulate squared norm of grid function over blocks
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1) :: &
+         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
+         arnoldi_basis_y     ! arnoldi basis (y components)
+
+      real (kind=dbl_kind) :: &
+         norm_residual   , & ! current L^2 norm of residual vector
+         inverse_norm    , & ! inverse of the norm of a vector
+         nu, t               ! local temporary values
+
+      integer (kind=int_kind) :: &
+         initer          , & ! inner (Arnoldi) loop counter
+         outiter         , & ! outer (restarts) loop counter
+         nextit          , & ! nextit == initer+1
+         it, k, ii, jj       ! reusable loop counters
+
+      real (kind=dbl_kind), dimension(maxinner+1) :: &
+         rot_cos         , & ! cosine elements of Givens rotations 
+         rot_sin         , & ! sine elements of Givens rotations
+         rhs_hess            ! right hand side vector of the Hessenberg (least squares) system
+
+      real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: &
+         hessenberg        ! system matrix of the Hessenberg (least squares) system
+
+      integer (kind=int_kind) :: &
+         precond_type ! type of preconditioner
+
+      real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
+
+      real (kind=dbl_kind) :: &
+         local_dot         ! local value to accumulate dot product computations
+         
+      real (kind=dbl_kind), dimension(maxinner) :: &
+         dotprod_local     ! local array to accumulate several dot product computations
+
+      character(len=*), parameter :: subname = '(pgmres)'
+      
+      ! Here we go !
+
+      outiter = 0
+      nbiter = 0
+      
+      conv = c1
+      
+      precond_type = 2 ! Jacobi preconditioner
+      
+      ! Residual of the initial iterate
+      
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         call matvec (nx_block               , ny_block,              &
+                      icellu         (iblk)  , icellt         (iblk), &
+                      indxui       (:,iblk)  , indxuj       (:,iblk), &
+                      indxti       (:,iblk)  , indxtj       (:,iblk), &
+                      dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
+                      dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
+                      cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
+                      cxm        (:,:,iblk)  , cym        (:,:,iblk), &
+                      solx       (:,:,iblk)  , soly       (:,:,iblk), &
+                      vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
+                      zetaD      (:,:,iblk,:),                        &
+                      umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
+                      uarear     (:,:,iblk)  ,                        &
+                      workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
+         call residual_vec (nx_block             , ny_block             , &
+                            icellu         (iblk),                        & 
+                            indxui       (:,iblk), indxuj    (:,iblk)   , &
+                            bx         (:,:,iblk), by      (:,:,iblk)   , &
+                            workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
+                            arnoldi_basis_x (:,:,iblk, 1),                &
+                            arnoldi_basis_y (:,:,iblk, 1))
+      enddo
+      !$OMP END PARALLEL DO
+      
+      ! Start outer (restarts) loop
+      do
+         ! Compute norm of initial residual
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            call calc_L2norm_squared(nx_block,        ny_block,        &
+                                     icellu   (iblk),                  &
+                                     indxui (:,iblk), indxuj(:, iblk), &
+                                     arnoldi_basis_x(:,:,iblk, 1),     &
+                                     arnoldi_basis_y(:,:,iblk, 1),     &
+                                     norm_squared(iblk))
+
+         enddo
+         !$OMP END PARALLEL DO 
+         norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
+         
+         ! Current guess is a good enough solution
+         if (norm_residual < tolerance) then
+            return
+         end if
+         
+         ! Normalize the first Arnoldi vector
+         inverse_norm = c1 / norm_residual
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            do ij =1, icellu(iblk)
+               i = indxui(ij, iblk)
+               j = indxuj(ij, iblk)
+
+               arnoldi_basis_x(i, j, iblk, 1) = arnoldi_basis_x(i, j, iblk, 1) * inverse_norm
+               arnoldi_basis_y(i, j, iblk, 1) = arnoldi_basis_y(i, j, iblk, 1) * inverse_norm
+            enddo ! ij
+         enddo
+         !$OMP END PARALLEL DO
+         
+         if (outiter == 0) then
+            relative_tolerance = tolerance * norm_residual
+            r0 = norm_residual
+         end if
+         
+         conv = norm_residual / r0
+         
+         ! Initialize 1-st term of RHS of Hessenberg system
+         rhs_hess(1)  = norm_residual
+         rhs_hess(2:) = c0
+         
+         initer = 0
+      
+         ! Start of inner (Arnoldi) loop
+         do
+            
+            nbiter = nbiter + 1
+            initer = initer + 1
+            nextit = initer + 1
+            
+            ! precondition the current Arnoldi vector
+            call precondition(zetaD,                &
+                              Cb,         vrel,     &
+                              umassdti,             &
+                              arnoldi_basis_x(:,:,:,initer), &
+                              arnoldi_basis_y(:,:,:,initer), &
+                              workspace_x , workspace_y    , &
+                              precond_type, diagx, diagy)
+            
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               call matvec (nx_block               , ny_block,              &
+                            icellu         (iblk)  , icellt         (iblk), &
+                            indxui       (:,iblk)  , indxuj       (:,iblk), &
+                            indxti       (:,iblk)  , indxtj       (:,iblk), &
+                            dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
+                            dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
+                            cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
+                            cxm        (:,:,iblk)  , cym        (:,:,iblk), &
+                            workspace_x(:,:,iblk)  , workspace_y(:,:,iblk), &
+                            vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
+                            zetaD      (:,:,iblk,:),                        &
+                            umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
+                            uarear     (:,:,iblk)  ,                        &
+                            arnoldi_basis_x(:,:,iblk,nextit),               &
+                            arnoldi_basis_y(:,:,iblk,nextit))
+            enddo
+            !$OMP END PARALLEL DO
+            
+            ! Classical Gram-Schmidt orthogonalisation process
+            ! First loop of Gram-Schmidt (compute coefficients)
+            dotprod_local = c0
+            do it=1,initer
+               local_dot = c0
+               
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     local_dot = local_dot + (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
+                                             (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO
+               
+                dotprod_local(it) = local_dot
+            end do
+            
+            hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
+            
+            ! Second loop of Gram-Schmidt (orthonormalize)
+            do it = 1, initer
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
+                                                           - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
+                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
+                                                           - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO 
+            end do
+            
+            ! Compute norm of new Arnoldi vector and update Hessenberg matrix
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               call calc_L2norm_squared(nx_block      ,  ny_block        , &
+                                        icellu   (iblk),                   &
+                                        indxui (:,iblk), indxuj(:, iblk) , &
+                                        arnoldi_basis_x(:,:,iblk, nextit), &
+                                        arnoldi_basis_y(:,:,iblk, nextit), &
+                                        norm_squared(iblk))
+            enddo
+            !$OMP END PARALLEL DO 
+            hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
+            
+            ! Watch out for happy breakdown
+            if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
+               ! Normalize next Arnoldi vector
+               inverse_norm = c1 / hessenberg(nextit,initer)
+               !$OMP PARALLEL DO PRIVATE(iblk)
+               do iblk = 1, nblocks
+                  do ij =1, icellu(iblk)
+                     i = indxui(ij, iblk)
+                     j = indxuj(ij, iblk)
+                     
+                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit)*inverse_norm
+                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
+                  enddo ! ij
+               enddo
+               !$OMP END PARALLEL DO 
+            end if
+            
+            ! Apply previous Givens rotation to the last column of the Hessenberg matrix
+            if (initer > 1) then
+               do k = 2, initer
+                  t = hessenberg(k-1, initer)
+                  hessenberg(k-1, initer) =  rot_cos(k-1)*t + rot_sin(k-1)*hessenberg(k, initer)
+                  hessenberg(k,   initer) = -rot_sin(k-1)*t + rot_cos(k-1)*hessenberg(k, initer)
+               end do
+            end if
+            
+            ! Compute new Givens rotation
+            nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
+            if (.not. almost_zero(nu)) then
+               rot_cos(initer) = hessenberg(initer,initer) / nu
+               rot_sin(initer) = hessenberg(nextit,initer) / nu
+               
+               rhs_hess(nextit) = -rot_sin(initer) * rhs_hess(initer)
+               rhs_hess(initer) =  rot_cos(initer) * rhs_hess(initer)
+               
+               hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
+            end if
+            
+            ! Check for convergence
+            norm_residual = abs(rhs_hess(nextit))
+            conv = norm_residual / r0
+             if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
+               exit
+            endif
+            
+         end do ! end of inner (Arnoldi) loop
+      
+         ! At this point either the maximum number of inner iterations
+         ! was reached or the absolute residual is below the scaled tolerance.
+
+         ! Solve the (now upper triangular) system "hessenberg * sol_hess = rhs_hess"
+         ! (sol_hess is stored in rhs_hess)
+         rhs_hess(initer) = rhs_hess(initer) / hessenberg(initer,initer)
+         do ii = 2, initer
+            k  = initer - ii + 1
+            t  = rhs_hess(k)
+            do j = k + 1, initer
+               t = t - hessenberg(k,j) * rhs_hess(j)
+            end do
+            rhs_hess(k) = t / hessenberg(k,k)
+         end do
+         
+         ! Form linear combination to get new solution iterate
+         workspace_x = c0
+         workspace_y = c0
+         do it = 1, initer
+            t = rhs_hess(it)
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+
+                  workspace_x(i, j, iblk) = workspace_x(i, j, iblk) + t * arnoldi_basis_x(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = workspace_y(i, j, iblk) + t * arnoldi_basis_y(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+         end do
+         
+         ! Call preconditioner
+         call precondition(zetaD,                &
+                           Cb,         vrel,     &
+                           umassdti,             &
+                           workspace_x , workspace_y, &
+                           workspace_x , workspace_y, &
+                           precond_type, diagx, diagy)
+         
+         solx = solx + workspace_x
+         soly = soly + workspace_y
+         
+         ! Increment outer loop counter and check for convergence
+         outiter = outiter + 1
+         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
+            return
+         end if
+
+         ! Solution is not convergent : compute residual vector and continue.
+         
+         ! The residual vector is computed here using (see Saad p. 177) :
+         ! \begin{equation}
+         !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
+         ! \end{equation}
+         ! where : 
+         ! $r$ is the residual
+         ! $V_{m+1}$ is a matrix whose columns are the Arnoldi vectors from 1 to nextit (m+1)
+         ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
+         ! $gamma_{m+1}$ is the last element of rhs_hess
+         ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
+         
+         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
+         ! store the result in rhs_hess
+         do it = 1, initer
+            jj = nextit - it + 1
+            rhs_hess(jj-1) = -rot_sin(jj-1) * rhs_hess(jj) ! + rot_cos(jj-1) * g(jj-1) (== 0)
+            rhs_hess(jj)   =  rot_cos(jj-1) * rhs_hess(jj) ! + rot_sin(jj-1) * g(jj-1) (== 0)
+         end do
+         
+         ! Compute the residual by multiplying V_{m+1} and rhs_hess
+         workspace_x = c0
+         workspace_y = c0
+         do it = 1, nextit
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+
+                  workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+            arnoldi_basis_x(:,:,:,1) = workspace_x
+            arnoldi_basis_y(:,:,:,1) = workspace_y
+         end do
+      end do ! end of outer (restarts) loop
+      
+      end subroutine pgmres
+
+!=======================================================================
+
+! Generic routine to precondition a vector
+!
+! authors: Philippe Blain, ECCC
+
+      subroutine precondition(zetaD,                &
+                              Cb,         vrel,     &
+                              umassdti,             &
+                              vx,         vy,       &
+                              wx,         wy,       &
+                              precond_type,         &
+                              diagx,      diagy)
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
+         zetaD   ! zetaD = 2*zeta (viscous coefficient)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         vrel  , & ! coefficient for tauw 
+         Cb    , & ! seabed stress coefficient
+         umassdti  ! mass of U-cell/dte (kg/m^2 s)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         vx       , & ! input vector (x components)
+         vy           ! input vector (y components)
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(out) :: &
+         wx       , & ! preconditionned vector (x components)
+         wy           ! preconditionned vector (y components)
+
+      integer (kind=int_kind), intent(in) :: &
+         precond_type ! type of preconditioner
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         diagx    , & ! diagonal of the system matrix (x components)
+         diagy        ! diagonal of the system matrix (y components)
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         iblk    , & ! block index
+         ij      , & ! compressed index
+         i, j        ! grid indices
+
+      real (kind=dbl_kind) :: &
+         tolerance   ! Tolerance for pgmres
+
+      integer (kind=int_kind) :: &
+         maxinner    ! Restart parameter for pgmres
+
+      integer (kind=int_kind) :: &
+         maxouter    ! Maximum number of outer iterations for pgmres
+
+      integer (kind=int_kind) :: &
+         nbiter      ! Total number of iteration pgmres performed
+
+      real (kind=dbl_kind) :: &
+         conv        ! !phb DESCRIBE IF WE KEEP for pgmres
+
+      character(len=*), parameter :: subname = '(precondition)'
+
+      if     (precond_type == 1) then ! identity (no preconditioner)
+         wx = vx
+         wy = vy
+      elseif (precond_type == 2) then ! Jacobi preconditioner (diagonal)
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            do ij =1, icellu(iblk)
+               i = indxui(ij, iblk)
+               j = indxuj(ij, iblk)
+
+               wx(i,j,iblk) = vx(i,j,iblk) / diagx(i,j,iblk)
+               wy(i,j,iblk) = vy(i,j,iblk) / diagy(i,j,iblk)
+            enddo ! ij
+         enddo
+         !$OMP END PARALLEL DO 
+      elseif (precond_type == 3) then ! PGMRES (Jacobi-preconditioned GMRES)
+         ! Initialize preconditioned vector to 0 !phb try with wx = vx or vx/diagx
+         wx = c0
+         wy = c0
+         tolerance = epsprecond
+         maxinner = im_pgmres
+         maxouter = maxits_pgmres
+         call pgmres (zetaD,                &
+                      Cb,         vrel,     &
+                      umassdti,             &
+                      wx,         wy,       &
+                      vx,         vy,       &
+                      diagx,      diagy,    &
+                      tolerance,  maxinner, &
+                      maxouter, nbiter, conv)
+      else
+         
+      endif
+      end subroutine precondition
+
+!=======================================================================
+
+logical function almost_zero(A) result(retval)
+   ! Check if value A is close to zero, up to machine precision
+   !
+   !author
+   !     Stéphane Gaudreault, ECCC -- June 2014
+   !
+   !revision
+   !     v4-80 - Gaudreault S.         - gfortran compatibility
+   !     2019  - Philippe Blain, ECCC  - converted to CICE standards
+   implicit none
+
+   real (kind=dbl_kind), intent(in) :: A
+   integer (kind=int8_kind) :: aBit
+   integer (kind=int8_kind), parameter :: two_complement = int(Z'80000000', kind=int8_kind)
+   aBit = 0
+   aBit = transfer(A, aBit)
+   if (aBit < 0) then
+      aBit = two_complement - aBit
+   end if
+   ! lexicographic order test with a tolerance of 1 adjacent float
+   retval = (abs(aBit) <= 1)
+end function almost_zero
+
 !=======================================================================
 
       end module ice_dyn_vp
diff --git a/cicecore/cicedynB/dynamics/ice_krylov.F90 b/cicecore/cicedynB/dynamics/ice_krylov.F90
deleted file mode 100644
index 85812dbfb..000000000
--- a/cicecore/cicedynB/dynamics/ice_krylov.F90
+++ /dev/null
@@ -1,998 +0,0 @@
-!---------------------------------- LICENCE BEGIN -------------------------------
-! GEM - Library of kernel routines for the GEM numerical atmospheric model
-! Copyright (C) 1990-2010 - Division de Recherche en Prevision Numerique
-!                       Environnement Canada
-! This library is free software; you can redistribute it and/or modify it
-! under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation, version 2.1 of the License. This library is
-! distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
-! without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-! PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
-! You should have received a copy of the GNU Lesser General Public License
-! along with this library; if not, write to the Free Software Foundation, Inc.,
-! 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-!---------------------------------- LICENCE END ---------------------------------
-!=======================================================================
-!
-! Krylov subspace methods for sea-ice dynamics
-!
-! See:
-!
-! C. T. Kelley, Iterative Methods for Linear and Nonlinear Equations, SIAM, 1995
-! (https://www.siam.org/books/textbooks/fr16_book.pdf)
-!
-! Y. Saad, Iterative Methods for Sparse Linear Systems. SIAM, 2003.
-! (http://www-users.cs.umn.edu/~saad/IterMethBook_2ndEd.pdf)
-!
-! author: Philippe Blain, ECCC
-
-      module ice_krylov
-
-      use ice_blocks, only: nx_block, ny_block
-      use ice_constants, only: field_loc_NEcorner, c0, c1
-      use ice_domain, only: distrb_info, nblocks
-      use ice_domain_size, only: max_blocks
-      use ice_dyn_vp, only: matvec, residual_vec, calc_L2norm_squared, precond
-      use ice_flux, only:  fm, iceumask
-      use ice_global_reductions, only: global_sum, global_sums
-      use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
-          uarear, tinyarea
-      use ice_kinds_mod
-
-      implicit none
-      private
-      public :: fgmres
-      
-      integer (kind=int_kind), allocatable :: & 
-         icellt(:)    , & ! no. of cells where icetmask = 1
-         icellu(:)        ! no. of cells where iceumask = 1
-
-      integer (kind=int_kind), allocatable :: &
-         indxti(:,:)  , & ! compressed index in i-direction
-         indxtj(:,:)  , & ! compressed index in j-direction
-         indxui(:,:)  , & ! compressed index in i-direction
-         indxuj(:,:)      ! compressed index in j-direction
-
-!=======================================================================
-
-      contains
-
-!=======================================================================
-
-! FGMRES: Flexible generalized minimum residual method (with restarts). 
-! Solves A x = b using GMRES with a varying (right) preconditioner
-!
-! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
-
-      subroutine fgmres (icellt_in,   icellu_in,  &
-                         indxti_in,   indxtj_in,  &
-                         indxui_in,   indxuj_in,  &
-                         zetaD,             &
-                         Cb,         vrel,    &
-                         umassdti,          &
-                         solx,       soly,    &
-                         bx,         by,      &
-                         diagx,      diagy,   &
-                         tolerance, maxinner, maxouter, nbiter, conv)
-
-      use ice_dyn_vp, only: precond
-
-      integer (kind=int_kind), dimension(max_blocks), intent(in) :: & 
-         icellt_in, & ! no. of cells where icetmask = 1
-         icellu_in    ! no. of cells where iceumask = 1
-
-      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
-         indxti_in, & ! compressed index in i-direction
-         indxtj_in, & ! compressed index in j-direction
-         indxui_in, & ! compressed index in i-direction
-         indxuj_in    ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
-         zetaD      ! zetaD = 2*zeta (viscous coefficient)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
-         vrel     , & ! coefficient for tauw 
-         Cb       , & ! seabed stress coefficient
-         umassdti     ! mass of U-cell/dte (kg/m^2 s)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
-         solx     , & ! Initial guess on input, approximate solution on output (x components)
-         soly         ! Initial guess on input, approximate solution on output (y components)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
-         bx       , & ! Right hand side of the linear system (x components)
-         by           ! Right hand side of the linear system (y components)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
-         diagx    , & ! Diagonal of the system matrix (x components)
-         diagy        ! Diagonal of the system matrix (y components)
-
-      real (kind=dbl_kind), intent(in) :: &
-         tolerance   ! Tolerance to achieve. The algorithm terminates when the relative
-                     ! residual is below tolerance
-
-      integer (kind=int_kind), intent(in) :: &
-         maxinner    ! Restart the method every maxinner inner iterations
-
-      integer (kind=int_kind), intent(in) :: &
-         maxouter    ! Maximum number of outer iterations
-                     ! Iteration will stop after maxinner*maxouter steps
-                     ! even if the specified tolerance has not been achieved
-
-      integer (kind=int_kind), intent(out) :: &
-         nbiter      ! Total number of iteration performed
-
-      real (kind=dbl_kind), intent(out) :: &
-         conv        ! !phb DESCRIBE IF WE KEEP
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         iblk    , & ! block index
-         ij      , & ! index for indx[t|u][i|j]
-         i, j        ! grid indices
-         
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
-         workspace_x , & ! work vector (x components)
-         workspace_y , & ! work vector (y components)
-         Fx          , & ! residual vector (x components), Fx = Au - bx (N/m^2)
-         Fy              ! residual vector (y components), Fy = Av - by (N/m^2)
-
-      real (kind=dbl_kind), dimension (max_blocks) :: &
-         norm_squared   ! array to accumulate squared norm of grid function over blocks
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1) :: &
-         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
-         arnoldi_basis_y     ! arnoldi basis (y components)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner) :: &
-         wwx             , & ! !phb FIND BETTER NAME (x components)
-         wwy                 ! !phb FIND BETTER NAME (y components)
-
-      real (kind=dbl_kind) :: &
-         norm_residual   , & ! current L^2 norm of residual vector
-         inverse_norm    , & ! inverse of the norm of a vector
-         nu, t               ! local temporary values
-
-      integer (kind=int_kind) :: &
-         initer          , & ! inner (Arnoldi) loop counter
-         outiter         , & ! outer (restarts) loop counter
-         nextit          , & ! nextit == initer+1
-         it, k, ii, jj       ! reusable loop counters
-
-      real (kind=dbl_kind), dimension(maxinner+1) :: &
-         rot_cos         , & ! cosine elements of Givens rotations 
-         rot_sin         , & ! sine elements of Givens rotations
-         rhs_hess            ! right hand side vector of the Hessenberg (least squares) system
-
-      real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: &
-         hessenberg        ! system matrix of the Hessenberg (least squares) system
-
-      integer (kind=int_kind) :: &
-         precond_type ! type of preconditioner
-
-      real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
-
-      real (kind=dbl_kind) :: &
-         local_dot         ! local value to accumulate dot product computations
-         
-      real (kind=dbl_kind), dimension(maxinner) :: &
-         dotprod_local     ! local array to accumulate several dot product computations
-
-      character(len=*), parameter :: subname = '(fgmres)'
-
-      ! Initialize module variables
-      allocate(icellt(max_blocks), icellu(max_blocks))
-      allocate(indxti(nx_block*ny_block, max_blocks), & 
-               indxtj(nx_block*ny_block, max_blocks), &
-               indxui(nx_block*ny_block, max_blocks), & 
-               indxuj(nx_block*ny_block, max_blocks))
-      icellt = icellt_in
-      icellu = icellu_in
-      indxti = indxti_in
-      indxtj = indxtj_in
-      indxui = indxui_in 
-      indxuj = indxuj_in
-      
-      ! Here we go !
-
-      outiter = 0
-      nbiter = 0
-      
-      conv = c1
-      
-      precond_type = precond
-      
-      ! Residual of the initial iterate
-      
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         call matvec (nx_block               , ny_block,              &
-                      icellu         (iblk)  , icellt         (iblk), &
-                      indxui       (:,iblk)  , indxuj       (:,iblk), &
-                      indxti       (:,iblk)  , indxtj       (:,iblk), &
-                      dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
-                      dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
-                      cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
-                      cxm        (:,:,iblk)  , cym        (:,:,iblk), &
-                      solx       (:,:,iblk)  , soly       (:,:,iblk), &
-                      vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
-                      zetaD      (:,:,iblk,:),                        &
-                      umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
-                      uarear     (:,:,iblk)  ,                        &
-                      workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
-         call residual_vec (nx_block             , ny_block             , &
-                            icellu         (iblk),                        & 
-                            indxui       (:,iblk), indxuj    (:,iblk)   , &
-                            bx         (:,:,iblk), by      (:,:,iblk)   , &
-                            workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
-                            arnoldi_basis_x (:,:,iblk, 1),                &
-                            arnoldi_basis_y (:,:,iblk, 1))
-      enddo
-      !$OMP END PARALLEL DO
-      
-      ! Start outer (restarts) loop
-      do
-         ! Compute norm of initial residual
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            call calc_L2norm_squared(nx_block,        ny_block,        &
-                                     icellu   (iblk),                  &
-                                     indxui (:,iblk), indxuj(:, iblk), &
-                                     arnoldi_basis_x(:,:,iblk, 1),     &
-                                     arnoldi_basis_y(:,:,iblk, 1),     &
-                                     norm_squared(iblk))
-
-         enddo
-         !$OMP END PARALLEL DO 
-         norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
-         
-         ! Current guess is a good enough solution
-         if (norm_residual < tolerance) then
-            return
-         end if
-         
-         ! Normalize the first Arnoldi vector
-         inverse_norm = c1 / norm_residual
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            do ij =1, icellu(iblk)
-               i = indxui(ij, iblk)
-               j = indxuj(ij, iblk)
-
-               arnoldi_basis_x(i, j, iblk, 1) = arnoldi_basis_x(i, j, iblk, 1) * inverse_norm
-               arnoldi_basis_y(i, j, iblk, 1) = arnoldi_basis_y(i, j, iblk, 1) * inverse_norm
-            enddo ! ij
-         enddo
-         !$OMP END PARALLEL DO
-         
-         if (outiter == 0) then
-            relative_tolerance = tolerance * norm_residual
-            r0 = norm_residual
-         end if
-         
-         conv = norm_residual / r0
-         
-         ! Initialize 1-st term of RHS of Hessenberg system
-         rhs_hess(1)  = norm_residual
-         rhs_hess(2:) = c0
-         
-         initer = 0
-      
-         ! Start of inner (Arnoldi) loop
-         do
-            
-            nbiter = nbiter + 1
-            initer = initer + 1
-            nextit = initer + 1
-            
-            ! precondition the current Arnoldi vector
-            call precondition(arnoldi_basis_x(:,:,:,initer), &
-                              arnoldi_basis_y(:,:,:,initer), &
-                              workspace_x , workspace_y    , &
-                              precond_type, diagx, diagy)
-            ! !phb DESCRIBE ww
-            wwx(:,:,:,initer) = workspace_x
-            wwy(:,:,:,initer) = workspace_y
-            
-            !$OMP PARALLEL DO PRIVATE(iblk)
-            do iblk = 1, nblocks
-               call matvec (nx_block               , ny_block,              &
-                            icellu         (iblk)  , icellt         (iblk), &
-                            indxui       (:,iblk)  , indxuj       (:,iblk), &
-                            indxti       (:,iblk)  , indxtj       (:,iblk), &
-                            dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
-                            dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
-                            cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
-                            cxm        (:,:,iblk)  , cym        (:,:,iblk), &
-                            workspace_x(:,:,iblk)  , workspace_y(:,:,iblk), &
-                            vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
-                            zetaD      (:,:,iblk,:),                        &
-                            umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
-                            uarear     (:,:,iblk)  ,                        &
-                            arnoldi_basis_x(:,:,iblk,nextit),               &
-                            arnoldi_basis_y(:,:,iblk,nextit))
-            enddo
-            !$OMP END PARALLEL DO
-            
-            ! Classical Gram-Schmidt orthogonalisation process
-            ! First loop of Gram-Schmidt (compute coefficients)
-            dotprod_local = c0
-            do it=1,initer
-               local_dot = c0
-               
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     local_dot = local_dot + (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
-                                             (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO
-               
-                dotprod_local(it) = local_dot
-            end do
-            
-            hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
-            
-            ! Second loop of Gram-Schmidt (orthonormalize)
-            do it = 1, initer
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
-                                                           - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
-                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
-                                                           - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO 
-            end do
-            
-            ! Compute norm of new Arnoldi vector and update Hessenberg matrix
-            !$OMP PARALLEL DO PRIVATE(iblk)
-            do iblk = 1, nblocks
-               call calc_L2norm_squared(nx_block      ,  ny_block        , &
-                                        icellu   (iblk),                   &
-                                        indxui (:,iblk), indxuj(:, iblk) , &
-                                        arnoldi_basis_x(:,:,iblk, nextit), &
-                                        arnoldi_basis_y(:,:,iblk, nextit), &
-                                        norm_squared(iblk))
-            enddo
-            !$OMP END PARALLEL DO 
-            hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
-            
-            ! Watch out for happy breakdown
-            if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
-               ! Normalize next Arnoldi vector
-               inverse_norm = c1 / hessenberg(nextit,initer)
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit)*inverse_norm
-                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO 
-            end if
-            
-            ! Apply previous Givens rotation to the last column of the Hessenberg matrix
-            if (initer > 1) then
-               do k = 2, initer
-                  t = hessenberg(k-1, initer)
-                  hessenberg(k-1, initer) =  rot_cos(k-1)*t + rot_sin(k-1)*hessenberg(k, initer)
-                  hessenberg(k,   initer) = -rot_sin(k-1)*t + rot_cos(k-1)*hessenberg(k, initer)
-               end do
-            end if
-            
-            ! Compute new Givens rotation
-            nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
-            if (.not. almost_zero(nu)) then
-               rot_cos(initer) = hessenberg(initer,initer) / nu
-               rot_sin(initer) = hessenberg(nextit,initer) / nu
-               
-               rhs_hess(nextit) = -rot_sin(initer) * rhs_hess(initer)
-               rhs_hess(initer) =  rot_cos(initer) * rhs_hess(initer)
-               
-               hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
-            end if
-            
-            ! Check for convergence
-            norm_residual = abs(rhs_hess(nextit))
-            conv = norm_residual / r0
-             if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
-               exit
-            endif
-            
-         end do ! end of inner (Arnoldi) loop
-      
-         ! At this point either the maximum number of inner iterations
-         ! was reached or the absolute residual is below the scaled tolerance.
-
-         ! Solve the (now upper triangular) system "hessenberg * sol_hess = rhs_hess"
-         ! (sol_hess is stored in rhs_hess)
-         rhs_hess(initer) = rhs_hess(initer) / hessenberg(initer,initer)
-         do ii = 2, initer
-            k  = initer - ii + 1
-            t  = rhs_hess(k)
-            do j = k + 1, initer
-               t = t - hessenberg(k,j) * rhs_hess(j)
-            end do
-            rhs_hess(k) = t / hessenberg(k,k)
-         end do
-         
-         ! Form linear combination to get new solution iterate
-         do it = 1, initer
-            t = rhs_hess(it)
-            !$OMP PARALLEL DO PRIVATE(iblk)
-            do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
-                  i = indxui(ij, iblk)
-                  j = indxuj(ij, iblk)
-
-                  solx(i, j, iblk) = solx(i, j, iblk) + t * wwx(i, j, iblk, it)
-                  soly(i, j, iblk) = soly(i, j, iblk) + t * wwy(i, j, iblk, it)
-               enddo ! ij
-            enddo
-            !$OMP END PARALLEL DO
-         end do
-         
-         ! Increment outer loop counter and check for convergence
-         outiter = outiter + 1
-         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
-            return
-         end if
-
-         ! Solution is not convergent : compute residual vector and continue.
-         
-         ! The residual vector is computed here using (see Saad p. 177) :
-         ! \begin{equation}
-         !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
-         ! \end{equation}
-         ! where : 
-         ! $r$ is the residual
-         ! $V_{m+1}$ is a matrix whose columns are the Arnoldi vectors from 1 to nextit (m+1)
-         ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
-         ! $gamma_{m+1}$ is the last element of rhs_hess
-         ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
-         
-         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
-         ! store the result in rhs_hess
-         do it = 1, initer
-            jj = nextit - it + 1
-            rhs_hess(jj-1) = -rot_sin(jj-1) * rhs_hess(jj) ! + rot_cos(jj-1) * g(jj-1) (== 0)
-            rhs_hess(jj)   =  rot_cos(jj-1) * rhs_hess(jj) ! + rot_sin(jj-1) * g(jj-1) (== 0)
-         end do
-         
-         ! Compute the residual by multiplying V_{m+1} and rhs_hess
-         workspace_x = c0
-         workspace_y = c0
-         do it = 1, nextit
-            !$OMP PARALLEL DO PRIVATE(iblk)
-            do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
-                  i = indxui(ij, iblk)
-                  j = indxuj(ij, iblk)
-
-                  workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
-                  workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
-               enddo ! ij
-            enddo
-            !$OMP END PARALLEL DO
-            arnoldi_basis_x(:,:,:,1) = workspace_x
-            arnoldi_basis_y(:,:,:,1) = workspace_y
-         end do
-      end do ! end of outer (restarts) loop
-      
-      return
-      end subroutine fgmres
-
-!=======================================================================
-
-! PGMRES: Right-preconditioned generalized minimum residual method (with restarts). 
-! Solves A x = b using GMRES with a right preconditioner
-!
-! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
-
-      subroutine pgmres (zetaD,             &
-                         Cb,         vrel,    &
-                         umassdti,          &
-                         solx,       soly,    &
-                         bx,         by,      &
-                         diagx,      diagy,   &
-                         tolerance, maxinner, maxouter, nbiter, conv)
-
-      use ice_dyn_vp, only: precond
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
-         zetaD      ! zetaD = 2*zeta (viscous coefficient)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
-         vrel     , & ! coefficient for tauw 
-         Cb       , & ! seabed stress coefficient
-         umassdti     ! mass of U-cell/dte (kg/m^2 s)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
-         solx     , & ! Initial guess on input, approximate solution on output (x components)
-         soly         ! Initial guess on input, approximate solution on output (y components)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
-         bx       , & ! Right hand side of the linear system (x components)
-         by           ! Right hand side of the linear system (y components)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
-         diagx    , & ! Diagonal of the system matrix (x components)
-         diagy        ! Diagonal of the system matrix (y components)
-
-      real (kind=dbl_kind), intent(in) :: &
-         tolerance   ! Tolerance to achieve. The algorithm terminates when the relative
-                     ! residual is below tolerance
-
-      integer (kind=int_kind), intent(in) :: &
-         maxinner    ! Restart the method every maxinner inner iterations
-
-      integer (kind=int_kind), intent(in) :: &
-         maxouter    ! Maximum number of outer iterations
-                     ! Iteration will stop after maxinner*maxouter steps
-                     ! even if the specified tolerance has not been achieved
-
-      integer (kind=int_kind), intent(out) :: &
-         nbiter      ! Total number of iteration performed
-
-      real (kind=dbl_kind), intent(out) :: &
-         conv        ! !phb DESCRIBE IF WE KEEP
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         iblk    , & ! block index
-         ij      , & ! index for indx[t|u][i|j]
-         i, j        ! grid indices
-         
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
-         workspace_x , & ! work vector (x components)
-         workspace_y , & ! work vector (y components)
-         Fx          , & ! residual vector (x components), Fx = Au - bx (N/m^2)
-         Fy              ! residual vector (y components), Fy = Av - by (N/m^2)
-
-      real (kind=dbl_kind), dimension (max_blocks) :: &
-         norm_squared   ! array to accumulate squared norm of grid function over blocks
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1) :: &
-         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
-         arnoldi_basis_y     ! arnoldi basis (y components)
-
-      real (kind=dbl_kind) :: &
-         norm_residual   , & ! current L^2 norm of residual vector
-         inverse_norm    , & ! inverse of the norm of a vector
-         nu, t               ! local temporary values
-
-      integer (kind=int_kind) :: &
-         initer          , & ! inner (Arnoldi) loop counter
-         outiter         , & ! outer (restarts) loop counter
-         nextit          , & ! nextit == initer+1
-         it, k, ii, jj       ! reusable loop counters
-
-      real (kind=dbl_kind), dimension(maxinner+1) :: &
-         rot_cos         , & ! cosine elements of Givens rotations 
-         rot_sin         , & ! sine elements of Givens rotations
-         rhs_hess            ! right hand side vector of the Hessenberg (least squares) system
-
-      real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: &
-         hessenberg        ! system matrix of the Hessenberg (least squares) system
-
-      integer (kind=int_kind) :: &
-         precond_type ! type of preconditioner
-
-      real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
-
-      real (kind=dbl_kind) :: &
-         local_dot         ! local value to accumulate dot product computations
-         
-      real (kind=dbl_kind), dimension(maxinner) :: &
-         dotprod_local     ! local array to accumulate several dot product computations
-
-      character(len=*), parameter :: subname = '(pgmres)'
-      
-      ! Here we go !
-
-      outiter = 0
-      nbiter = 0
-      
-      conv = c1
-      
-      precond_type = precond
-      
-      ! Residual of the initial iterate
-      
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         call matvec (nx_block               , ny_block,              &
-                      icellu         (iblk)  , icellt         (iblk), &
-                      indxui       (:,iblk)  , indxuj       (:,iblk), &
-                      indxti       (:,iblk)  , indxtj       (:,iblk), &
-                      dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
-                      dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
-                      cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
-                      cxm        (:,:,iblk)  , cym        (:,:,iblk), &
-                      solx       (:,:,iblk)  , soly       (:,:,iblk), &
-                      vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
-                      zetaD      (:,:,iblk,:),                        &
-                      umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
-                      uarear     (:,:,iblk)  ,                        &
-                      workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
-         call residual_vec (nx_block             , ny_block             , &
-                            icellu         (iblk),                        & 
-                            indxui       (:,iblk), indxuj    (:,iblk)   , &
-                            bx         (:,:,iblk), by      (:,:,iblk)   , &
-                            workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
-                            arnoldi_basis_x (:,:,iblk, 1),                &
-                            arnoldi_basis_y (:,:,iblk, 1))
-      enddo
-      !$OMP END PARALLEL DO
-      
-      ! Start outer (restarts) loop
-      do
-         ! Compute norm of initial residual
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            call calc_L2norm_squared(nx_block,        ny_block,        &
-                                     icellu   (iblk),                  &
-                                     indxui (:,iblk), indxuj(:, iblk), &
-                                     arnoldi_basis_x(:,:,iblk, 1),     &
-                                     arnoldi_basis_y(:,:,iblk, 1),     &
-                                     norm_squared(iblk))
-
-         enddo
-         !$OMP END PARALLEL DO 
-         norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
-         
-         ! Current guess is a good enough solution
-         if (norm_residual < tolerance) then
-            return
-         end if
-         
-         ! Normalize the first Arnoldi vector
-         inverse_norm = c1 / norm_residual
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            do ij =1, icellu(iblk)
-               i = indxui(ij, iblk)
-               j = indxuj(ij, iblk)
-
-               arnoldi_basis_x(i, j, iblk, 1) = arnoldi_basis_x(i, j, iblk, 1) * inverse_norm
-               arnoldi_basis_y(i, j, iblk, 1) = arnoldi_basis_y(i, j, iblk, 1) * inverse_norm
-            enddo ! ij
-         enddo
-         !$OMP END PARALLEL DO
-         
-         if (outiter == 0) then
-            relative_tolerance = tolerance * norm_residual
-            r0 = norm_residual
-         end if
-         
-         conv = norm_residual / r0
-         
-         ! Initialize 1-st term of RHS of Hessenberg system
-         rhs_hess(1)  = norm_residual
-         rhs_hess(2:) = c0
-         
-         initer = 0
-      
-         ! Start of inner (Arnoldi) loop
-         do
-            
-            nbiter = nbiter + 1
-            initer = initer + 1
-            nextit = initer + 1
-            
-            ! precondition the current Arnoldi vector
-            call precondition(arnoldi_basis_x(:,:,:,initer), &
-                              arnoldi_basis_y(:,:,:,initer), &
-                              workspace_x , workspace_y    , &
-                              precond_type, diagx, diagy)
-            
-            !$OMP PARALLEL DO PRIVATE(iblk)
-            do iblk = 1, nblocks
-               call matvec (nx_block               , ny_block,              &
-                            icellu         (iblk)  , icellt         (iblk), &
-                            indxui       (:,iblk)  , indxuj       (:,iblk), &
-                            indxti       (:,iblk)  , indxtj       (:,iblk), &
-                            dxt        (:,:,iblk)  , dyt        (:,:,iblk), &
-                            dxhy       (:,:,iblk)  , dyhx       (:,:,iblk), &
-                            cxp        (:,:,iblk)  , cyp        (:,:,iblk), &
-                            cxm        (:,:,iblk)  , cym        (:,:,iblk), &
-                            workspace_x(:,:,iblk)  , workspace_y(:,:,iblk), &
-                            vrel       (:,:,iblk)  , Cb         (:,:,iblk), &
-                            zetaD      (:,:,iblk,:),                        &
-                            umassdti   (:,:,iblk)  , fm         (:,:,iblk), &
-                            uarear     (:,:,iblk)  ,                        &
-                            arnoldi_basis_x(:,:,iblk,nextit),               &
-                            arnoldi_basis_y(:,:,iblk,nextit))
-            enddo
-            !$OMP END PARALLEL DO
-            
-            ! Classical Gram-Schmidt orthogonalisation process
-            ! First loop of Gram-Schmidt (compute coefficients)
-            dotprod_local = c0
-            do it=1,initer
-               local_dot = c0
-               
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     local_dot = local_dot + (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
-                                             (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO
-               
-                dotprod_local(it) = local_dot
-            end do
-            
-            hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
-            
-            ! Second loop of Gram-Schmidt (orthonormalize)
-            do it = 1, initer
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
-                                                           - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
-                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
-                                                           - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO 
-            end do
-            
-            ! Compute norm of new Arnoldi vector and update Hessenberg matrix
-            !$OMP PARALLEL DO PRIVATE(iblk)
-            do iblk = 1, nblocks
-               call calc_L2norm_squared(nx_block      ,  ny_block        , &
-                                        icellu   (iblk),                   &
-                                        indxui (:,iblk), indxuj(:, iblk) , &
-                                        arnoldi_basis_x(:,:,iblk, nextit), &
-                                        arnoldi_basis_y(:,:,iblk, nextit), &
-                                        norm_squared(iblk))
-            enddo
-            !$OMP END PARALLEL DO 
-            hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
-            
-            ! Watch out for happy breakdown
-            if (.not. almost_zero( hessenberg(nextit,initer) ) ) then
-               ! Normalize next Arnoldi vector
-               inverse_norm = c1 / hessenberg(nextit,initer)
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit)*inverse_norm
-                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO 
-            end if
-            
-            ! Apply previous Givens rotation to the last column of the Hessenberg matrix
-            if (initer > 1) then
-               do k = 2, initer
-                  t = hessenberg(k-1, initer)
-                  hessenberg(k-1, initer) =  rot_cos(k-1)*t + rot_sin(k-1)*hessenberg(k, initer)
-                  hessenberg(k,   initer) = -rot_sin(k-1)*t + rot_cos(k-1)*hessenberg(k, initer)
-               end do
-            end if
-            
-            ! Compute new Givens rotation
-            nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
-            if (.not. almost_zero(nu)) then
-               rot_cos(initer) = hessenberg(initer,initer) / nu
-               rot_sin(initer) = hessenberg(nextit,initer) / nu
-               
-               rhs_hess(nextit) = -rot_sin(initer) * rhs_hess(initer)
-               rhs_hess(initer) =  rot_cos(initer) * rhs_hess(initer)
-               
-               hessenberg(initer,initer) = rot_cos(initer) * hessenberg(initer,initer) + rot_sin(initer) * hessenberg(nextit,initer)
-            end if
-            
-            ! Check for convergence
-            norm_residual = abs(rhs_hess(nextit))
-            conv = norm_residual / r0
-             if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
-               exit
-            endif
-            
-         end do ! end of inner (Arnoldi) loop
-      
-         ! At this point either the maximum number of inner iterations
-         ! was reached or the absolute residual is below the scaled tolerance.
-
-         ! Solve the (now upper triangular) system "hessenberg * sol_hess = rhs_hess"
-         ! (sol_hess is stored in rhs_hess)
-         rhs_hess(initer) = rhs_hess(initer) / hessenberg(initer,initer)
-         do ii = 2, initer
-            k  = initer - ii + 1
-            t  = rhs_hess(k)
-            do j = k + 1, initer
-               t = t - hessenberg(k,j) * rhs_hess(j)
-            end do
-            rhs_hess(k) = t / hessenberg(k,k)
-         end do
-         
-         ! Form linear combination to get new solution iterate
-         workspace_x = c0
-         workspace_y = c0
-         do it = 1, initer
-            t = rhs_hess(it)
-            !$OMP PARALLEL DO PRIVATE(iblk)
-            do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
-                  i = indxui(ij, iblk)
-                  j = indxuj(ij, iblk)
-
-                  workspace_x(i, j, iblk) = workspace_x(i, j, iblk) + t * arnoldi_basis_x(i, j, iblk, it)
-                  workspace_y(i, j, iblk) = workspace_y(i, j, iblk) + t * arnoldi_basis_y(i, j, iblk, it)
-               enddo ! ij
-            enddo
-            !$OMP END PARALLEL DO
-         end do
-         
-         ! Call preconditioner
-         call precondition(workspace_x(:,:,:), &
-                           workspace_y(:,:,:), &
-                           workspace_x , workspace_y    , &
-                           precond_type, diagx, diagy)
-         
-         solx = solx + workspace_x
-         soly = soly + workspace_y
-         
-         ! Increment outer loop counter and check for convergence
-         outiter = outiter + 1
-         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
-            return
-         end if
-
-         ! Solution is not convergent : compute residual vector and continue.
-         
-         ! The residual vector is computed here using (see Saad p. 177) :
-         ! \begin{equation}
-         !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
-         ! \end{equation}
-         ! where : 
-         ! $r$ is the residual
-         ! $V_{m+1}$ is a matrix whose columns are the Arnoldi vectors from 1 to nextit (m+1)
-         ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
-         ! $gamma_{m+1}$ is the last element of rhs_hess
-         ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
-         
-         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
-         ! store the result in rhs_hess
-         do it = 1, initer
-            jj = nextit - it + 1
-            rhs_hess(jj-1) = -rot_sin(jj-1) * rhs_hess(jj) ! + rot_cos(jj-1) * g(jj-1) (== 0)
-            rhs_hess(jj)   =  rot_cos(jj-1) * rhs_hess(jj) ! + rot_sin(jj-1) * g(jj-1) (== 0)
-         end do
-         
-         ! Compute the residual by multiplying V_{m+1} and rhs_hess
-         workspace_x = c0
-         workspace_y = c0
-         do it = 1, nextit
-            !$OMP PARALLEL DO PRIVATE(iblk)
-            do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
-                  i = indxui(ij, iblk)
-                  j = indxuj(ij, iblk)
-
-                  workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
-                  workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
-               enddo ! ij
-            enddo
-            !$OMP END PARALLEL DO
-            arnoldi_basis_x(:,:,:,1) = workspace_x
-            arnoldi_basis_y(:,:,:,1) = workspace_y
-         end do
-      end do ! end of outer (restarts) loop
-      
-      end subroutine pgmres
-
-!=======================================================================
-
-! Generic routine to precondition a vector
-!
-! authors: Philippe Blain, ECCC
-
-      subroutine precondition(vx, vy, wx, wy, precond_type, diagx, diagy)
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
-         vx       , & ! input vector (x components)
-         vy           ! input vector (y components)
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(out) :: &
-         wx       , & ! preconditionned vector (x components)
-         wy           ! preconditionned vector (y components)
-
-      integer (kind=int_kind), intent(in) :: &
-         precond_type ! type of preconditioner
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
-         diagx    , & ! diagonal of the system matrix (x components)
-         diagy        ! diagonal of the system matrix (y components)
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         iblk    , & ! block index
-         ij      , & ! compressed index
-         i, j        ! grid indices
-
-      character(len=*), parameter :: subname = '(precondition)'
-
-      if     (precond_type == 1) then ! identity (no preconditioner)
-         wx = vx
-         wy = vy
-      elseif (precond_type == 2) then ! Jacobi preconditioner
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            do ij =1, icellu(iblk)
-               i = indxui(ij, iblk)
-               j = indxuj(ij, iblk)
-
-               wx(i,j,iblk) = vx(i,j,iblk) / diagx(i,j,iblk)
-               wy(i,j,iblk) = vy(i,j,iblk) / diagy(i,j,iblk)
-            enddo ! ij
-         enddo
-         !$OMP END PARALLEL DO 
-      elseif (precond_type == 3) then ! PGMRES (Jacobi-preconditioned GMRES)
-         ! !phb TODO!!!
-      else
-         
-      endif
-      end subroutine precondition
-
-!=======================================================================
-
-logical function almost_zero(A) result(retval)
-   ! Check if value A is close to zero, up to machine precision
-   !
-   !author
-   !     Stéphane Gaudreault, ECCC -- June 2014
-   !
-   !revision
-   !     v4-80 - Gaudreault S.         - gfortran compatibility
-   !     2019  - Philippe Blain, ECCC  - converted to CICE standards
-   implicit none
-
-   real (kind=dbl_kind), intent(in) :: A
-   integer (kind=int8_kind) :: aBit
-   integer (kind=int8_kind), parameter :: two_complement = int(Z'80000000', kind=int8_kind)
-   aBit = 0
-   aBit = transfer(A, aBit)
-   if (aBit < 0) then
-      aBit = two_complement - aBit
-   end if
-   ! lexicographic order test with a tolerance of 1 adjacent float
-   retval = (abs(aBit) <= 1)
-end function almost_zero
-
-!=======================================================================
-
-end module ice_krylov
-
-!=======================================================================
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90.unused
similarity index 100%
rename from cicecore/cicedynB/dynamics/pgmres.F90
rename to cicecore/cicedynB/dynamics/pgmres.F90.unused

From 17a290ef5db9002bdd5faafae54b1ff999e221a0 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 24 Jul 2019 10:58:22 -0400
Subject: [PATCH 110/196] ice_dyn_vp: add 'subname' to subroutine 'almost_zero'
 and correct indentation

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 50 +++++++++++++----------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 340b9d502..79c4158e0 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -4769,28 +4769,34 @@ end subroutine precondition
 
 !=======================================================================
 
-logical function almost_zero(A) result(retval)
-   ! Check if value A is close to zero, up to machine precision
-   !
-   !author
-   !     Stéphane Gaudreault, ECCC -- June 2014
-   !
-   !revision
-   !     v4-80 - Gaudreault S.         - gfortran compatibility
-   !     2019  - Philippe Blain, ECCC  - converted to CICE standards
-   implicit none
-
-   real (kind=dbl_kind), intent(in) :: A
-   integer (kind=int8_kind) :: aBit
-   integer (kind=int8_kind), parameter :: two_complement = int(Z'80000000', kind=int8_kind)
-   aBit = 0
-   aBit = transfer(A, aBit)
-   if (aBit < 0) then
-      aBit = two_complement - aBit
-   end if
-   ! lexicographic order test with a tolerance of 1 adjacent float
-   retval = (abs(aBit) <= 1)
-end function almost_zero
+! Check if value A is close to zero, up to machine precision
+!
+!author
+!     Stéphane Gaudreault, ECCC -- June 2014
+!
+!revision
+!     v4-80 - Gaudreault S.         - gfortran compatibility
+!     2019  - Philippe Blain, ECCC  - converted to CICE standards
+
+      logical function almost_zero(A) result(retval)
+
+      real (kind=dbl_kind), intent(in) :: A
+
+      ! local variables
+
+      character(len=*), parameter :: subname = '(almost_zero)'
+
+      integer (kind=int8_kind) :: aBit
+      integer (kind=int8_kind), parameter :: two_complement = int(Z'80000000', kind=int8_kind)
+      aBit = 0
+      aBit = transfer(A, aBit)
+      if (aBit < 0) then
+         aBit = two_complement - aBit
+      end if
+      ! lexicographic order test with a tolerance of 1 adjacent float
+      retval = (abs(aBit) <= 1)
+      
+      end function almost_zero
 
 !=======================================================================
 

From 9fd638d9d303f0ca9414bba7b2ad6ea4ef6793ea Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 24 Jul 2019 10:59:30 -0400
Subject: [PATCH 111/196] ice_dyn_vp: add 'ice_HaloUpdate_vel' subroutine

Introduce the subroutine 'ice_HaloUpdate_vel', which can be used to do
halo updates for vector field, such as the ice velocity (uvel,vvel).
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 61 +++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 79c4158e0..4bded41e9 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -998,6 +998,8 @@ subroutine picard_solver (icellt,   icellu,  &
       ! !         enddo
       !    !$OMP END PARALLEL DO  
       ! 
+      
+      !    ! phb NOT SURE IF THIS HALO UPDATE IS NEEDED
       !    !$OMP PARALLEL DO PRIVATE(iblk)
       !    do iblk = 1, nblocks                             
       ! 
@@ -1465,6 +1467,8 @@ subroutine anderson_solver (icellt,   icellu,  &
                              indxui    (:,:), indxuj(:,:),     &
                              sol (:),                          &
                              uvel (:,:,:), vvel (:,:,:))
+         
+         ! phb NOT SURE IF THIS HALO UPDATE IS ACTUALLY NEEDED
          ! Load velocity into array for boundary updates
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
@@ -4798,6 +4802,63 @@ logical function almost_zero(A) result(retval)
       
       end function almost_zero
 
+!=======================================================================
+
+! Perform a halo update for the velocity field
+! author: Philippe Blain, ECCC
+
+      subroutine ice_HaloUpdate_vel(uvel, vvel, fld2, halo_info_mask)
+
+      use ice_boundary, only: ice_halo, ice_HaloUpdate
+      use ice_constants, only: field_loc_NEcorner, field_type_vector
+      use ice_domain, only: halo_info, maskhalo_dyn
+      use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
+         uvel    , & ! u components of velocity vector
+         vvel        ! v components of velocity vector
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(inout) :: &
+         fld2        ! work array to perform halo update
+
+      type (ice_halo), intent(in) :: &
+         halo_info_mask !  ghost cell update info for masked halo
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         iblk        ! block index
+
+      character(len=*), parameter :: subname = '(ice_HaloUpdate_vel)'
+
+      ! load velocity into array for boundary updates
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         fld2(:,:,1,iblk) = uvel(:,:,iblk)
+         fld2(:,:,2,iblk) = vvel(:,:,iblk)
+      enddo
+      !$OMP END PARALLEL DO
+
+      call ice_timer_start(timer_bound)
+      if (maskhalo_dyn) then
+         call ice_HaloUpdate (fld2,               halo_info_mask, &
+                              field_loc_NEcorner, field_type_vector)
+      else
+         call ice_HaloUpdate (fld2,               halo_info, &
+                              field_loc_NEcorner, field_type_vector)
+      endif
+      call ice_timer_stop(timer_bound)
+
+      ! Unload
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         uvel(:,:,iblk) = fld2(:,:,1,iblk)
+         vvel(:,:,iblk) = fld2(:,:,2,iblk)
+      enddo
+      !$OMP END PARALLEL DO
+
+      end subroutine ice_HaloUpdate_vel
+
 !=======================================================================
 
       end module ice_dyn_vp

From 865a71643702200d9b423444dae8fd6c849087ca Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 24 Jul 2019 12:49:03 -0400
Subject: [PATCH 112/196] ice_dyn_vp: make 'fld2' a module variable

Reduce subroutine argument counts by making 'fld2' a module variable, and
allocate it in 'init_vp'.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 35 ++++++-----------------
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 4bded41e9..f6eaa0b4e 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -95,6 +95,9 @@ module ice_dyn_vp
          indxtj(:,:)  , & ! compressed index in j-direction
          indxui(:,:)  , & ! compressed index in i-direction
          indxuj(:,:)      ! compressed index in j-direction
+         
+      real (kind=dbl_kind), allocatable :: & 
+         fld2(:,:,:,:)    ! work array for boundary updates
 
 !=======================================================================
 
@@ -139,6 +142,7 @@ subroutine init_vp (dt)
                indxtj(nx_block*ny_block, max_blocks), &
                indxui(nx_block*ny_block, max_blocks), &
                indxuj(nx_block*ny_block, max_blocks))
+      allocate(fld2(nx_block,ny_block,2,max_blocks))
       
       ! Redefine tinyarea using a different puny value
       
@@ -225,8 +229,6 @@ subroutine imp_solver (dt)
          aiu      , & ! ice fraction on u-grid
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
-         
-      real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
 
       logical (kind=log_kind) :: calc_strair
 
@@ -252,8 +254,6 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       ! Initialize
       !-----------------------------------------------------------------
-
-      allocate(fld2(nx_block,ny_block,2,max_blocks))
       
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
@@ -476,7 +476,6 @@ subroutine imp_solver (dt)
          call picard_solver (icellt,   icellu,  &
                              indxti,   indxtj,  &
                              indxui,   indxuj,  &
-                             fld2,              &
                              aiu,      ntot,    &
                              waterx,   watery,  & 
                              bxfix,    byfix,   &
@@ -488,7 +487,6 @@ subroutine imp_solver (dt)
          call anderson_solver (icellt,   icellu,  &
                                indxti,   indxtj,  &
                                indxui,   indxuj,  &
-                               fld2,              &
                                aiu,      ntot,    &
                                waterx,   watery,  & 
                                bxfix,    byfix,   &
@@ -503,7 +501,6 @@ subroutine imp_solver (dt)
 
       deallocate(bvec, sol, diagvec)
       
-      deallocate(fld2)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
       
       !-----------------------------------------------------------------
@@ -632,7 +629,6 @@ end subroutine imp_solver
       subroutine picard_solver (icellt,   icellu,  &
                                 indxti,   indxtj,  &
                                 indxui,   indxuj,  &
-                                fld2,              &
                                 aiu,      ntot,    &
                                 waterx,   watery,  & 
                                 bxfix,    byfix,   &
@@ -665,9 +661,6 @@ subroutine picard_solver (icellt,   icellu,  &
          indxui   , & ! compressed index in i-direction
          indxuj       ! compressed index in j-direction
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(inout) :: &
-         fld2        ! work array for boundary updates
-
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
          aiu      , & ! ice fraction on u-grid
          waterx   , & ! for ocean stress calculation, x (m/s)
@@ -1062,7 +1055,6 @@ end subroutine picard_solver
       subroutine anderson_solver (icellt,   icellu,  &
                                   indxti,   indxtj,  &
                                   indxui,   indxuj,  &
-                                  fld2,              &
                                   aiu,      ntot,    &
                                   waterx,   watery,  & 
                                   bxfix,    byfix,   &
@@ -1096,9 +1088,6 @@ subroutine anderson_solver (icellt,   icellu,  &
          indxui   , & ! compressed index in i-direction
          indxuj       ! compressed index in j-direction
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(inout) :: &
-         fld2        ! work array for boundary updates
-
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
          aiu      , & ! ice fraction on u-grid
          waterx   , & ! for ocean stress calculation, x (m/s)
@@ -1346,8 +1335,7 @@ subroutine anderson_solver (icellt,   icellu,  &
             !                     indxui, indxuj,   &
             !                     zetaD,            &
             !                     Cb,     vrel,     &
-            !                     aiu,    umassdti, &
-            !                     fld2)
+            !                     aiu,    umassdti)
 
          elseif (fpfunc_andacc == 2) then
             ! g_2(x) = x - A(x)x + b(x) = x - F(x)
@@ -1527,8 +1515,7 @@ subroutine fgmres_solver (ntot,   bvec,     &
                                 indxui, indxuj,   &
                                 zetaD,            &
                                 Cb,     vrel,     &
-                                aiu,    umassdti, & 
-                                fld2)
+                                aiu,    umassdti  )
 
       use ice_blocks, only: nx_block, ny_block
       use ice_boundary, only: ice_HaloUpdate
@@ -1567,10 +1554,7 @@ subroutine fgmres_solver (ntot,   bvec,     &
       
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD      ! zetaD = 2zeta (viscous coeff)
-      
-      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(inout) :: &
-         fld2        ! work array for boundary updates
-      
+
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -4807,7 +4791,7 @@ end function almost_zero
 ! Perform a halo update for the velocity field
 ! author: Philippe Blain, ECCC
 
-      subroutine ice_HaloUpdate_vel(uvel, vvel, fld2, halo_info_mask)
+      subroutine ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
 
       use ice_boundary, only: ice_halo, ice_HaloUpdate
       use ice_constants, only: field_loc_NEcorner, field_type_vector
@@ -4818,9 +4802,6 @@ subroutine ice_HaloUpdate_vel(uvel, vvel, fld2, halo_info_mask)
          uvel    , & ! u components of velocity vector
          vvel        ! v components of velocity vector
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(inout) :: &
-         fld2        ! work array to perform halo update
-
       type (ice_halo), intent(in) :: &
          halo_info_mask !  ghost cell update info for masked halo
 

From 93c03354da0dfee576e521b96fb4039c4b2190b0 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 24 Jul 2019 13:38:58 -0400
Subject: [PATCH 113/196] ice_dyn_vp: move 'ice_halo' to module 'use' statement

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index f6eaa0b4e..6921ce1da 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -36,6 +36,7 @@ module ice_dyn_vp
 
       use ice_kinds_mod
       use ice_blocks, only: nx_block, ny_block
+      use ice_boundary, only: ice_halo
       use ice_constants, only: field_loc_center, field_loc_NEcorner, &
           field_type_scalar, field_type_vector
       use ice_constants, only: c0, p027, p055, p111, p166, &
@@ -184,7 +185,7 @@ end subroutine init_vp
       subroutine imp_solver (dt)
 
       use ice_arrays_column, only: Cdn_ocn
-      use ice_boundary, only: ice_halo, ice_HaloMask, ice_HaloUpdate, &
+      use ice_boundary, only: ice_HaloMask, ice_HaloUpdate, &
           ice_HaloDestroy, ice_HaloUpdate_stress
       use ice_blocks, only: block, get_block, nx_block, ny_block
       use ice_domain, only: blocks_ice, halo_info, maskhalo_dyn
@@ -639,7 +640,7 @@ subroutine picard_solver (icellt,   icellu,  &
 
       use ice_arrays_column, only: Cdn_ocn
       use ice_blocks, only: nx_block, ny_block
-      use ice_boundary, only: ice_halo, ice_HaloUpdate
+      use ice_boundary, only: ice_HaloUpdate
       use ice_domain, only: halo_info, maskhalo_dyn
       use ice_domain_size, only: max_blocks
       use ice_flux, only: uocn, vocn, fm, Tbu
@@ -1065,7 +1066,7 @@ subroutine anderson_solver (icellt,   icellu,  &
 
       use ice_arrays_column, only: Cdn_ocn
       use ice_blocks, only: nx_block, ny_block
-      use ice_boundary, only: ice_halo, ice_HaloUpdate
+      use ice_boundary, only: ice_HaloUpdate
       use ice_constants, only: c1
       use ice_domain, only: halo_info, maskhalo_dyn
       use ice_domain_size, only: max_blocks
@@ -4793,7 +4794,7 @@ end function almost_zero
 
       subroutine ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
 
-      use ice_boundary, only: ice_halo, ice_HaloUpdate
+      use ice_boundary, only: ice_HaloUpdate
       use ice_constants, only: field_loc_NEcorner, field_type_vector
       use ice_domain, only: halo_info, maskhalo_dyn
       use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop

From 282edf91ca570d30ef2ca1e2350fed9437ddafcb Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 24 Jul 2019 13:57:16 -0400
Subject: [PATCH 114/196] ice_dyn_vp: add workspace initialization to 0 and
 haloupdate before matvec in fgmres (WIP)

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 25 ++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 6921ce1da..a00c6cf2c 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1281,9 +1281,15 @@ subroutine anderson_solver (icellt,   icellu,  &
             exit
          endif
          
-         ! Put initial guess for FGMRES in solx,soly
+         ! Put initial guess for FGMRES in solx,soly and sol (needed for anderson)
          solx = uprev_k
          soly = vprev_k
+         ! Form sol vector for fgmres (sol is iniguess at the beginning)
+         call arrays_to_vec (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     &
+                             indxui    (:,:), indxuj(:,:),     &
+                             uprev_k (:,:,:), vprev_k (:,:,:), &
+                             sol(:))
          
          ! Compute fixed point map g(x)
          if (fpfunc_andacc == 1) then
@@ -1316,6 +1322,7 @@ subroutine anderson_solver (icellt,   icellu,  &
             call fgmres (zetaD,               &
                          Cb,         vrel,    &
                          umassdti,            &
+                         halo_info_mask,      &
                          solx,       soly,    &
                          bx,         by,      &
                          Diagu,      Diagv,   &
@@ -3835,6 +3842,7 @@ end subroutine qr_delete
       subroutine fgmres (zetaD,               &
                          Cb,         vrel,    &
                          umassdti,            &
+                         halo_info_mask,      &
                          solx,       soly,    &
                          bx,         by,      &
                          diagx,      diagy,   &
@@ -3849,6 +3857,9 @@ subroutine fgmres (zetaD,               &
          Cb    , & ! seabed stress coefficient
          umassdti  ! mass of U-cell/dte (kg/m^2 s)
 
+      type (ice_halo) :: &
+         halo_info_mask !  ghost cell update info for masked halo
+
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
          solx     , & ! Initial guess on input, approximate solution on output (x components)
          soly         ! Initial guess on input, approximate solution on output (y components)
@@ -3937,6 +3948,7 @@ subroutine fgmres (zetaD,               &
 
       ! Here we go !
 
+      ! Initialize
       outiter = 0
       nbiter = 0
       
@@ -3944,6 +3956,9 @@ subroutine fgmres (zetaD,               &
       
       precond_type = precond
       
+      ! workspace_x = c0
+      ! workspace_y = c0
+      
       ! Residual of the initial iterate
       
       !$OMP PARALLEL DO PRIVATE(iblk)
@@ -4039,6 +4054,9 @@ subroutine fgmres (zetaD,               &
             wwx(:,:,:,initer) = workspace_x
             wwy(:,:,:,initer) = workspace_y
             
+            ! Update workspace with boundary values
+            call ice_HaloUpdate_vel(workspace_x, workspace_y,  &
+                                    halo_info_mask)
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
                call matvec (nx_block               , ny_block,              &
@@ -4348,6 +4366,7 @@ subroutine pgmres (zetaD,                &
       
       ! Here we go !
 
+      ! Initialize
       outiter = 0
       nbiter = 0
       
@@ -4355,6 +4374,9 @@ subroutine pgmres (zetaD,                &
       
       precond_type = 2 ! Jacobi preconditioner
       
+      workspace_x = c0
+      workspace_y = c0
+      
       ! Residual of the initial iterate
       
       !$OMP PARALLEL DO PRIVATE(iblk)
@@ -4447,6 +4469,7 @@ subroutine pgmres (zetaD,                &
                               workspace_x , workspace_y    , &
                               precond_type, diagx, diagy)
             
+            ! !phb haloUpdate would go here (for workspace_x, _y)
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
                call matvec (nx_block               , ny_block,              &

From aaf9a8943e91befdb01fa721c4140f44a0c625dd Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 4 Nov 2019 13:41:25 -0500
Subject: [PATCH 115/196] ice_dyn_vp: add choice between classical and modified
 Gram-Schmidt orthogonalization

The FGMRES solver from GEM only has classical Gram-Schmidt (CGS), but
modified Gram-Schmidt (MGS) is more robust. Preliminary tests show that
the convergence of the non-linear solver is seriously degraded when CGS
is used instead of MGS inside FGMRES.  This was tested by changing MGS
to CGS in the Saad FMGRES implementation :

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
index 5a93e68..c7d49fa 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -192,12 +192,13 @@ subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
 !      if (icode .eq. 3) goto 11
       call  dcopy (n, wk2, 1, vv(1,i1), 1) !jfl modification
 !
-!     modified gram - schmidt...
+!     classical gram - schmidt...
 !
       do j=1, i
-         t = ddot(n, vv(1,j), 1, vv(1,i1), 1) !jfl modification
-         hh(j,i) = t
-         call daxpy(n, -t, vv(1,j), 1, vv(1,i1), 1) !jfl modification
+         hh(j,i) = ddot(n, vv(1,j), 1, vv(1,i1), 1) !jfl modification
+      enddo
+      do j=1, i
+         call daxpy(n, -hh(j,i), vv(1,j), 1, vv(1,i1), 1) !jfl modification
       enddo
       t = sqrt(ddot(n, vv(1,i1), 1, vv(1,i1), 1)) !jfl modification
       hh(i1,i) = t

Add an 'orthogonalize' subroutine that can do either CGS or MGS to
abstract away the orthogonalization procedure.  Add a namelist parameter
'ortho_type' (defaulting to 'mgs') so that the user can change at run
time if the FGMRES solver should use CGS or MGS as an orthogonalization
method.
Note: at the moment the 'pgmres' subroutine is still hard-coded to use CGS.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 169 +++++++++++++++++-----
 cicecore/cicedynB/general/ice_init.F90    |   6 +-
 configuration/scripts/ice_in              |   1 +
 3 files changed, 137 insertions(+), 39 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index a00c6cf2c..aac9d6b18 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -85,6 +85,9 @@ module ice_dyn_vp
          damping_andacc , & ! damping factor for Anderson acceleration
          reltol_andacc      ! relative tolerance for Anderson acceleration
 
+      character (len=char_len), public :: &
+         ortho_type         ! type of orthogonalization for FGMRES ('cgs' or 'mgs')
+
       ! mmodule variables
 
       integer (kind=int_kind), allocatable :: &
@@ -4077,45 +4080,10 @@ subroutine fgmres (zetaD,               &
             enddo
             !$OMP END PARALLEL DO
             
-            ! Classical Gram-Schmidt orthogonalisation process
-            ! First loop of Gram-Schmidt (compute coefficients)
-            dotprod_local = c0
-            do it=1,initer
-               local_dot = c0
-               
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     local_dot = local_dot + (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
-                                             (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO
-               
-                dotprod_local(it) = local_dot
-            end do
+            ! Orthogonalize the new vector
+            call orthogonalize(arnoldi_basis_x, arnoldi_basis_y, &
+                               hessenberg, initer, nextit, maxinner, ortho_type)
             
-            hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
-            
-            ! Second loop of Gram-Schmidt (orthonormalize)
-            do it = 1, initer
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
-                                                           - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
-                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
-                                                           - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO 
-            end do
             
             ! Compute norm of new Arnoldi vector and update Hessenberg matrix
             !$OMP PARALLEL DO PRIVATE(iblk)
@@ -4781,6 +4749,131 @@ end subroutine precondition
 
 !=======================================================================
 
+! Generic routine to orthogonalize a vector (arnoldi_basis_y(:, :, :, nextit))
+! against a set of vectors (arnoldi_basis_y(:, :, :, 1:initer))
+!
+! authors: Philippe Blain, ECCC
+
+      subroutine orthogonalize(arnoldi_basis_x, arnoldi_basis_y, & 
+                               hessenberg, initer, nextit, maxinner, ortho_type)
+
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1), intent(inout) :: &
+         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
+         arnoldi_basis_y     ! arnoldi basis (y components)
+
+      integer (kind=int_kind), intent(in) :: &
+         initer          , & ! inner (Arnoldi) loop counter
+         nextit              ! nextit == initer+1
+
+      real (kind=dbl_kind), dimension(maxinner+1, maxinner), intent(inout) :: &
+         hessenberg        ! system matrix of the Hessenberg (least squares) system
+         !phb: removing this parameter and argument makes ifort error in the .i90 file
+      integer (kind=int_kind), intent(in) :: &
+         maxinner    ! Restart the method every maxinner inner iterations
+
+      character(len=*), intent(in) :: &
+         ortho_type ! type of orthogonalization
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         it      , & ! reusable loop counter
+         iblk    , & ! block index
+         ij      , & ! compressed index
+         i, j        ! grid indices
+
+      real (kind=dbl_kind), dimension (max_blocks) :: &
+         local_dot      ! local array value to accumulate dot product of grid function over blocks
+
+      real (kind=dbl_kind), dimension(maxinner) :: &
+         dotprod_local     ! local array to accumulate several dot product computations
+
+      character(len=*), parameter :: subname = '(orthogonalize)'
+
+      if (trim(ortho_type) == 'cgs') then ! Classical Gram-Schmidt
+         ! Classical Gram-Schmidt orthogonalisation process
+         ! First loop of Gram-Schmidt (compute coefficients)
+         dotprod_local = c0
+         do it=1,initer
+            local_dot = c0
+            
+            !$OMP PARALLEL DO PRIVATE(iblk, ij, i, j)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+                  
+                  local_dot(iblk) = local_dot(iblk) + & 
+                                    (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
+                                    (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+            
+            dotprod_local(it) = sum(local_dot)
+         end do
+
+         hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
+
+         ! Second loop of Gram-Schmidt (orthonormalize)
+         do it = 1, initer
+            !$OMP PARALLEL DO PRIVATE(iblk)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+                  
+                  arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
+                                                        - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
+                  arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
+                                                        - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO 
+         end do
+      elseif (trim(ortho_type) == 'mgs') then ! Modified Gram-Schmidt
+         ! Modified Gram-Schmidt orthogonalisation process
+         do it=1,initer
+            local_dot = c0
+            
+            !$OMP PARALLEL DO PRIVATE(iblk, ij, i, j)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+                  
+                  local_dot(iblk) = local_dot(iblk) + &
+                                    (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
+                                    (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO
+            
+            hessenberg(it,initer) = global_sum(sum(local_dot), distrb_info)
+            
+            !$OMP PARALLEL DO PRIVATE(iblk, ij, i, j)
+            do iblk = 1, nblocks
+               do ij =1, icellu(iblk)
+                  i = indxui(ij, iblk)
+                  j = indxuj(ij, iblk)
+                  
+                  arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
+                                                        - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
+                  arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
+                                                        - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
+               enddo ! ij
+            enddo
+            !$OMP END PARALLEL DO 
+         end do
+      else
+         call abort_ice(error_message='wrong orthonalization in ' // subname, &
+         file=__FILE__, line=__LINE__)
+      endif
+      
+   end subroutine orthogonalize
+
+!=======================================================================
+
 ! Check if value A is close to zero, up to machine precision
 !
 !author
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index 512b37220..0c862f0e2 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -104,7 +104,7 @@ subroutine input_data
                             maxits_pgmres, monitor_nonlin, monitor_fgmres, &
                             monitor_pgmres, gammaNL, gamma, epsprecond, &
                             algo_nonlin, fpfunc_andacc, im_andacc, reltol_andacc, &
-                            damping_andacc, start_andacc, use_mean_vrel
+                            damping_andacc, start_andacc, use_mean_vrel, ortho_type
       use ice_transport_driver, only: advection, conserv_check
       use ice_restoring, only: restore_ice
 #ifdef CESMCOUPLED
@@ -204,6 +204,7 @@ subroutine input_data
         monitor_fgmres, monitor_pgmres, gammaNL,        gamma,          &
         epsprecond,     algo_nonlin,    im_andacc,      reltol_andacc,  &
         damping_andacc, start_andacc,   fpfunc_andacc,  use_mean_vrel,  &
+        ortho_type,                                                     &
         k2,             alphab,         threshold_hw,                   &
         Pstar,          Cstar
 
@@ -342,6 +343,7 @@ subroutine input_data
       monitor_nonlin = .false. ! print nonlinear solver info
       monitor_fgmres = 1     ! print fgmres info (0: nothing printed, 1: 1st ite only, 2: all iterations)
       monitor_pgmres = 1     ! print pgmres info (0: nothing printed, 1: all iterations)
+      ortho_type = 'mgs'     ! orthogonalization procedure 'cgs' or 'mgs'
       gammaNL = 1e-8_dbl_kind    ! nonlinear stopping criterion: gammaNL*res(k=0)
       gamma = 1e-2_dbl_kind      ! fgmres stopping criterion: gamma*res(k)
       epsprecond = 1e-6_dbl_kind ! pgmres stopping criterion: epsprecond*res(k)
@@ -666,6 +668,7 @@ subroutine input_data
       call broadcast_scalar(monitor_nonlin,     master_task)
       call broadcast_scalar(monitor_fgmres,     master_task)
       call broadcast_scalar(monitor_pgmres,     master_task)
+      call broadcast_scalar(ortho_type,         master_task)
       call broadcast_scalar(gammaNL,            master_task)
       call broadcast_scalar(gamma,              master_task)
       call broadcast_scalar(epsprecond,         master_task)
@@ -1583,6 +1586,7 @@ subroutine input_data
             write(nu_diag,1010) ' monitor_nonlin            = ', monitor_nonlin
             write(nu_diag,1020) ' monitor_fgmres            = ', monitor_fgmres
             write(nu_diag,1020) ' monitor_pgmres            = ', monitor_pgmres
+            write(nu_diag,1030) ' ortho_type                = ', ortho_type
             write(nu_diag,1008) ' gammaNL                   = ', gammaNL
             write(nu_diag,1008) ' gamma                     = ', gamma
             write(nu_diag,1008) ' epsprecond                = ', epsprecond
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index 7cfdf1505..7adc7701f 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -148,6 +148,7 @@
     monitor_nonlin  = .false.
     monitor_fgmres  = 1
     monitor_pgmres  = 1
+    ortho_type      = 'mgs'
     gammaNL         = 1e-8
     gamma           = 1e-2
     epsprecond      = 1e-6

From 79749ad6e568013e5441029205945c0f29e7c30d Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 5 Nov 2019 12:40:11 -0500
Subject: [PATCH 116/196] ice_dyn_vp: change order of arguments for
 'precondition' subroutine

The arguments with 'intent(out)' and 'intent(inout)' are usually listed
last in the argument list of subroutines and functions. This is in line
with [1].

Change 'precondition' to be in line with this standard.

[1] European Standards for Writing and Documenting Exchangeable Fortran 90 Code,
https://wiki.c2sm.ethz.ch/pub/COSMO/CosmoRules/europ_sd.pdf
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 50 ++++++++++++-----------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index aac9d6b18..6d12590b3 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -4044,15 +4044,15 @@ subroutine fgmres (zetaD,               &
             nbiter = nbiter + 1
             initer = initer + 1
             nextit = initer + 1
-            
             ! precondition the current Arnoldi vector
-            call precondition(zetaD,                &
-                              Cb,         vrel,     &
-                              umassdti,             &
+            call precondition(zetaD,                         &
+                              Cb,           vrel,            &
+                              umassdti,                      &
                               arnoldi_basis_x(:,:,:,initer), &
                               arnoldi_basis_y(:,:,:,initer), &
-                              workspace_x , workspace_y    , &
-                              precond_type, diagx, diagy)
+                              diagx,        diagy,           &
+                              precond_type,                  &
+                              workspace_x , workspace_y)
             ! !phb DESCRIBE ww
             wwx(:,:,:,initer) = workspace_x
             wwy(:,:,:,initer) = workspace_y
@@ -4429,13 +4429,14 @@ subroutine pgmres (zetaD,                &
             nextit = initer + 1
             
             ! precondition the current Arnoldi vector
-            call precondition(zetaD,                &
-                              Cb,         vrel,     &
-                              umassdti,             &
+            call precondition(zetaD,                         &
+                              Cb,           vrel,            &
+                              umassdti,                      &
                               arnoldi_basis_x(:,:,:,initer), &
                               arnoldi_basis_y(:,:,:,initer), &
-                              workspace_x , workspace_y    , &
-                              precond_type, diagx, diagy)
+                              diagx,        diagy,           &
+                              precond_type,                  &
+                              workspace_x , workspace_y)
             
             ! !phb haloUpdate would go here (for workspace_x, _y)
             !$OMP PARALLEL DO PRIVATE(iblk)
@@ -4592,12 +4593,13 @@ subroutine pgmres (zetaD,                &
          end do
          
          ! Call preconditioner
-         call precondition(zetaD,                &
-                           Cb,         vrel,     &
-                           umassdti,             &
+         call precondition(zetaD,                     &
+                           Cb,           vrel,        &
+                           umassdti,                  &
                            workspace_x , workspace_y, &
-                           workspace_x , workspace_y, &
-                           precond_type, diagx, diagy)
+                           diagx,        diagy,       &
+                           precond_type,              &
+                           workspace_x , workspace_y)
          
          solx = solx + workspace_x
          soly = soly + workspace_y
@@ -4661,9 +4663,9 @@ subroutine precondition(zetaD,                &
                               Cb,         vrel,     &
                               umassdti,             &
                               vx,         vy,       &
-                              wx,         wy,       &
+                              diagx,      diagy,    &
                               precond_type,         &
-                              diagx,      diagy)
+                              wx,         wy)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
@@ -4677,16 +4679,16 @@ subroutine precondition(zetaD,                &
          vx       , & ! input vector (x components)
          vy           ! input vector (y components)
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(out) :: &
-         wx       , & ! preconditionned vector (x components)
-         wy           ! preconditionned vector (y components)
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
+         diagx    , & ! diagonal of the system matrix (x components)
+         diagy        ! diagonal of the system matrix (y components)
 
       integer (kind=int_kind), intent(in) :: &
          precond_type ! type of preconditioner
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
-         diagx    , & ! diagonal of the system matrix (x components)
-         diagy        ! diagonal of the system matrix (y components)
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
+         wx       , & ! preconditionned vector (x components)
+         wy           ! preconditionned vector (y components)
 
       ! local variables
 

From b6769e99fcb8a6a6dad9407ebecf2c2416140c00 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 5 Nov 2019 12:59:22 -0500
Subject: [PATCH 117/196] ice_dyn_vp: fgmres: correctly initialize
 workspace_[xy] and arnoldi_basis_[xy]

A previous commit only initialied them in pgmres.
This should be squashed with 1c1b5cf (Add workspace initialization to 0
and haloupdate before matvec in fgmres (WIP), 2019-07-24)
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 6d12590b3..bc2478650 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -3959,8 +3959,11 @@ subroutine fgmres (zetaD,               &
       
       precond_type = precond
       
-      ! workspace_x = c0
-      ! workspace_y = c0
+      ! Cells with no ice should be zero-initialized
+      workspace_x = c0
+      workspace_y = c0
+      arnoldi_basis_x = c0
+      arnoldi_basis_y = c0
       
       ! Residual of the initial iterate
       
@@ -4342,8 +4345,11 @@ subroutine pgmres (zetaD,                &
       
       precond_type = 2 ! Jacobi preconditioner
       
+      ! Cells with no ice should be zero-initialized
       workspace_x = c0
       workspace_y = c0
+      arnoldi_basis_x = c0
+      arnoldi_basis_y = c0
       
       ! Residual of the initial iterate
       

From 9e0a27a13b0a6d32ea22117d31a87ac032a9ef0b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 5 Nov 2019 13:05:04 -0500
Subject: [PATCH 118/196] ice_dyn_vp: matvec: change Au,Av from 'intent(out)'
 to 'intent(inout)'

According to The Fortran 2003 Handbook [1], the Fortran standard states
that arguments with 'intent(out)' are left uninitialized if the
subroutine/function does not set them. This is dangerous here since the
'Au', 'Av' arguments are set in 'matvec' only for grid cells where there
is ice, so any grid cells with no ice that were previously initialized
(eg. to zero) in the actual arguments could be left with garbage values
after the call to 'matvec' (this does not happen with the Intel compiler
but it's still better to follow the standard).

[1] J. C. Adams, W. S. Brainerd, R. A. Hendrickson, R. E. Maine, J. T. Martin,
    and B. T. Smith, The Fortran 2003 Handbook. London: Springer London, 2009.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index bc2478650..05a1fc257 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -2704,7 +2704,7 @@ subroutine matvec (nx_block,   ny_block, &
          zetaD          ! 2*zeta   
          
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(out) :: &
+         intent(inout) :: &
          Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
          Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
 

From 1e35c9ac6f13ae76d60151d59bb9986e57af7d5f Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 7 Nov 2019 13:58:42 -0500
Subject: [PATCH 119/196] ice_dyn_vp: clarify decriptions of FGMRES variables

User 'Arnoldi' and 'restarts' in addition to 'inner' and 'outer'.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 05a1fc257..207cfc0fe 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -3880,15 +3880,15 @@ subroutine fgmres (zetaD,               &
                      ! residual is below tolerance
 
       integer (kind=int_kind), intent(in) :: &
-         maxinner    ! Restart the method every maxinner inner iterations
+         maxinner    ! Restart the method every maxinner inner (Arnoldi) iterations
 
       integer (kind=int_kind), intent(in) :: &
-         maxouter    ! Maximum number of outer iterations
-                     ! Iteration will stop after maxinner*maxouter steps
+         maxouter    ! Maximum number of outer (restarts) iterations
+                     ! Iteration will stop after maxinner*maxouter Arnoldi steps
                      ! even if the specified tolerance has not been achieved
 
       integer (kind=int_kind), intent(out) :: &
-         nbiter      ! Total number of iteration performed
+         nbiter      ! Total number of Arnoldi iterations performed
 
       real (kind=dbl_kind), intent(out) :: &
          conv        ! !phb DESCRIBE IF WE KEEP

From 07c2b016da0d28e2794aeba6f6cfa9a1bff9b83f Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 8 Nov 2019 13:48:05 -0500
Subject: [PATCH 120/196] ice_dyn_vp: add subroutines description and clarify
 existing ones

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 64 ++++++++++++++++++-----
 1 file changed, 52 insertions(+), 12 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 207cfc0fe..fedd59afa 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1051,8 +1051,9 @@ end subroutine picard_solver
 
 !=======================================================================
 
-! Solve nonlinear equation using fixed point iteration, accelerated with 
-! Anderson acceleration
+! Solve the nonlinear equation F(u,v) = 0, where
+! F(u,v) := A(u,v) * (u,v) - b(u,v)
+! using Anderson acceleration (accelerated fixed point iteration)
 !
 ! author: P. Blain ECCC
 
@@ -2366,7 +2367,7 @@ end subroutine stress_vp
       
 !=======================================================================
 
-! calc deformations for mechanical redistribution
+! Compute deformations for mechanical redistribution
 
       subroutine deformations (nx_block,   ny_block,   & 
                                icellt,                 & 
@@ -2489,6 +2490,8 @@ end subroutine deformations
       
 !=======================================================================
 
+! Compute vrel and basal stress coefficients
+
       subroutine calc_vrel_Cb (nx_block,   ny_block, &
                                icellu,     Cw,       &
                                indxui,     indxuj,   &
@@ -2568,6 +2571,10 @@ end subroutine calc_vrel_Cb
       
 !=======================================================================
 
+! OLD matrix vector product A(u,v) * (u,v)
+! Au = A(u,v)_[x] * uvel (x components of  A(u,v) * (u,v))
+! Av = A(u,v)_[y] * vvel (y components of  A(u,v) * (u,v))
+
       subroutine matvecOLD (nx_block,   ny_block, &
                          icellu,               &
                          indxui,     indxuj,   &
@@ -2650,6 +2657,10 @@ end subroutine matvecOLD
       
 !=======================================================================
 
+! Computes the matrix vector product A(u,v) * (u,v)
+! Au = A(u,v)_[x] * uvel (x components of  A(u,v) * (u,v))
+! Av = A(u,v)_[y] * vvel (y components of  A(u,v) * (u,v))
+
       subroutine matvec (nx_block,   ny_block, &
                          icellu,     icellt ,  &
                          indxui,     indxuj,   &
@@ -2927,7 +2938,10 @@ subroutine matvec (nx_block,   ny_block, &
       end subroutine matvec
 
 !=======================================================================      
-      
+
+! Compute the constant component of b(u,v) i.e. the part of b(u,v) that
+! does not depend on (u,v) and thus do not change during the nonlinear iteration
+
      subroutine calc_bfix  (nx_block,   ny_block,   & 
                             icellu,                 & 
                             indxui,     indxuj,     & 
@@ -2982,6 +2996,10 @@ end subroutine calc_bfix
       
 !=======================================================================
 
+! Compute the vector b(u,v), i.e. the part of the nonlinear function F(u,v)
+! that cannot be written as A(u,v)*(u,v), where A(u,v) is a matrix with entries
+! depending on (u,v)
+
       subroutine calc_bvec (nx_block,   ny_block, &
                        icellu,               &
                        indxui,     indxuj,   &
@@ -3072,7 +3090,12 @@ subroutine calc_bvec (nx_block,   ny_block, &
 
       end subroutine calc_bvec
       
-      !=======================================================================
+!=======================================================================
+
+! Compute the non linear residual F(u,v) = A(u,v) * (u,v) - b(u,v),
+! with Au, Av precomputed as
+! Au = A(u,v)_[x] * uvel (x components of  A(u,v) * (u,v))
+! Av = A(u,v)_[y] * vvel (y components of  A(u,v) * (u,v))
 
       subroutine residual_vec (nx_block,   ny_block, &
                                icellu,               &
@@ -3133,6 +3156,9 @@ end subroutine residual_vec
       
 !=======================================================================
 
+! Form the diagonal of the matrix A(u,v) (first part of the computation)
+! Part 1: compute the contributions of the diagonal to the rheology term
+
       subroutine formDiag_step1  (nx_block,   ny_block,   & 
                                   icellu,                 & 
                                   indxui,     indxuj,     & 
@@ -3508,6 +3534,9 @@ end subroutine formDiag_step1
       
 !=======================================================================
 
+! Form the diagonal of the matrix A(u,v) (second part of the computation)
+! Part 2: compute diagonal
+
       subroutine formDiag_step2 (nx_block,   ny_block, &
                                  icellu,               &
                                  indxui,     indxuj,   &
@@ -3589,7 +3618,9 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
       end subroutine formDiag_step2     
       
 !=======================================================================
-      
+
+! Diagonal (Jacobi) preconditioner for the legacy FGMRES driver
+
      subroutine precond_diag   (ntot,    &
                                 diagvec, &
                                 wk1, wk2)
@@ -3626,6 +3657,8 @@ end subroutine precond_diag
       
 !=======================================================================      
 
+! Compute squared l^2 norm of a grid function (tpu,tpv)
+
       subroutine calc_L2norm_squared (nx_block,   ny_block, &
                                       icellu,               &
                                       indxui,     indxuj,   &
@@ -3671,7 +3704,10 @@ subroutine calc_L2norm_squared (nx_block,   ny_block, &
       
       end subroutine calc_L2norm_squared
       
-            !=======================================================================
+!=======================================================================
+
+! Convert a grid function (tpu,tpv) to a one dimensional vector
+! to be passed to the legacy FGMRES driver
 
       subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
                                 max_blocks, icellu,   ntot,  &
@@ -3728,7 +3764,10 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
 
       end subroutine arrays_to_vec
       
-            !=======================================================================
+!=======================================================================
+
+! Convert one dimensional vector received from the legacy FGMRES driver
+! to a grid function (tpu,tpv)
 
       subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
                                 max_blocks, icellu,   ntot,  &
@@ -3793,6 +3832,7 @@ end subroutine vec_to_arrays
 ! Update Q and R factor after deletion of the 1st column of G_diff
 !
 ! author: P. Blain ECCC
+
       subroutine qr_delete(Q, R)
       
       real (kind=dbl_kind), intent(inout) :: &
@@ -3838,7 +3878,7 @@ end subroutine qr_delete
 !=======================================================================
 
 ! FGMRES: Flexible generalized minimum residual method (with restarts). 
-! Solves A x = b using GMRES with a varying (right) preconditioner
+! Solves the linear system A x = b using GMRES with a varying (right) preconditioner
 !
 ! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
 
@@ -4232,7 +4272,7 @@ end subroutine fgmres
 !=======================================================================
 
 ! PGMRES: Right-preconditioned generalized minimum residual method (with restarts). 
-! Solves A x = b using GMRES with a right preconditioner
+! Solves the linear A x = b using GMRES with a right preconditioner
 !
 ! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
 
@@ -4757,8 +4797,8 @@ end subroutine precondition
 
 !=======================================================================
 
-! Generic routine to orthogonalize a vector (arnoldi_basis_y(:, :, :, nextit))
-! against a set of vectors (arnoldi_basis_y(:, :, :, 1:initer))
+! Generic routine to orthogonalize a vector (arnoldi_basis_[xy](:, :, :, nextit))
+! against a set of vectors (arnoldi_basis_[xy](:, :, :, 1:initer))
 !
 ! authors: Philippe Blain, ECCC
 

From d1e9804dfe28cdebe2c156186620469c49f8476c Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 8 Nov 2019 13:48:44 -0500
Subject: [PATCH 121/196] ice_dyn_vp: fix comments in FGMRES algorithm

The next block does not only compute the new Givens rotation, it applies
it, so tweak the comment to be in line with the code.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index fedd59afa..1b790a48c 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -4167,7 +4167,7 @@ subroutine fgmres (zetaD,               &
                end do
             end if
             
-            ! Compute new Givens rotation
+            ! Compute and apply new Givens rotation
             nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
             if (.not. almost_zero(nu)) then
                rot_cos(initer) = hessenberg(initer,initer) / nu

From 867318a9e27950544acdc7c73afd62d027e5991a Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 8 Nov 2019 15:44:04 -0500
Subject: [PATCH 122/196] ice_dyn_vp: fgmres: fix misleading check for return

Using `outiter > maxouter` means that the algorithm would always perform
at most `maxouter+1` outer (restarts) iterations, since at the end of the
`maxouter`-th iteration we would have `maxouter > maxouter`, which is false,
so the algorithm would do a last outer iteration before returning.

Fix this by using ">=" so that the `maxouter` value is really the maximum number of
outer iterations permitted.

This error was in the GEM version.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 1b790a48c..f18f353b3 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -4221,7 +4221,7 @@ subroutine fgmres (zetaD,               &
          
          ! Increment outer loop counter and check for convergence
          outiter = outiter + 1
-         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
+         if (norm_residual <= relative_tolerance .or. outiter >= maxouter) then
             return
          end if
 

From 10fc1ea07665e0bd77f80ee36a9a32ee518dd057 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 8 Nov 2019 16:02:37 -0500
Subject: [PATCH 123/196] ice_dyn_vp: fgmres: fix error in the last loop
 computing the residual

workspace_[xy] was rewritten at each iteration instead of being
incremented.

Fix that.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index f18f353b3..b328e772c 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -4256,8 +4256,8 @@ subroutine fgmres (zetaD,               &
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
 
-                  workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
-                  workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
+                  workspace_x(i, j, iblk) = workspace_x(i, j, iblk) + rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = workspace_x(i, j, iblk) + rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
                enddo ! ij
             enddo
             !$OMP END PARALLEL DO

From 9c8667fe6e18984c1507322ac897ff4226d9b6f8 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 8 Nov 2019 16:18:52 -0500
Subject: [PATCH 124/196] ice_dyn_vp: change the definition of F(x) from 'A.x -
 b' to 'b - A.x'

This makes the result of the 'residual_vec' subroutine conform with the
definition of the residual in the FGMRES algorithm (r := b - A.x).

This should have no other impact than making the new fgmres subroutine
actually work.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 54 +++++++++++------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index b328e772c..db17e7d51 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -704,10 +704,10 @@ subroutine picard_solver (icellt,   icellu,  &
          by       , & ! b vector
          Diagu    , & ! Diagonal (u component) of the matrix A
          Diagv    , & ! Diagonal (v component) of the matrix A
-         Au       , & ! matvec, Fx = Au - bx
-         Av       , & ! matvec, Fy = Av - by
-         Fx       , & ! x residual vector, Fx = Au - bx
-         Fy           ! y residual vector, Fy = Av - by
+         Au       , & ! matvec, Fx = bx - Au
+         Av       , & ! matvec, Fy = by - Av
+         Fx       , & ! x residual vector, Fx = bx - Au
+         Fy           ! y residual vector, Fy = by - Av
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
          zetaD      ! zetaD = 2zeta (viscous coeff)
@@ -1136,10 +1136,10 @@ subroutine anderson_solver (icellt,   icellu,  &
          by       , & ! b vector
          Diagu    , & ! Diagonal (u component) of the matrix A
          Diagv    , & ! Diagonal (v component) of the matrix A
-         Au       , & ! matvec, Fx = Au - bx
-         Av       , & ! matvec, Fy = Av - by
-         Fx       , & ! x residual vector, Fx = Au - bx
-         Fy       , & ! y residual vector, Fy = Av - by
+         Au       , & ! matvec, Fx = bx - Au
+         Av       , & ! matvec, Fy = by - Av
+         Fx       , & ! x residual vector, Fx = bx - Au
+         Fy       , & ! y residual vector, Fy = by - Av
          solx     , & ! solution of FGMRES (x components)
          soly         ! solution of FGMRES (y components)
 
@@ -1577,8 +1577,8 @@ subroutine fgmres_solver (ntot,   bvec,     &
          ierr               ! code for pgmres preconditioner !phb: needed?
       
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
-         Au       , & ! matvec, Fx = Au - bx
-         Av           ! matvec, Fy = Av - by
+         Au       , & ! matvec, Fx = bx - Au
+         Av           ! matvec, Fy = by - Av
       
       real (kind=dbl_kind), allocatable :: & 
          vv(:,:), ww(:,:)  ! work arrays for FGMRES
@@ -2612,8 +2612,8 @@ subroutine matvecOLD (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
-         Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
+         Au      , & ! matvec, Fx = bx - Au (N/m^2)! jfl
+         Av          ! matvec, Fy = by - Av (N/m^2)! jfl    
 
       ! local variables
 
@@ -2716,8 +2716,8 @@ subroutine matvec (nx_block,   ny_block, &
          
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
-         Av          ! matvec, Fy = Av - by (N/m^2)! jfl    
+         Au      , & ! matvec, Fx = bx - Au (N/m^2)! jfl
+         Av          ! matvec, Fy = by - Av (N/m^2)! jfl    
 
       ! local variables
 
@@ -3092,7 +3092,7 @@ end subroutine calc_bvec
       
 !=======================================================================
 
-! Compute the non linear residual F(u,v) = A(u,v) * (u,v) - b(u,v),
+! Compute the non linear residual F(u,v) = b(u,v) - A(u,v) * (u,v),
 ! with Au, Av precomputed as
 ! Au = A(u,v)_[x] * uvel (x components of  A(u,v) * (u,v))
 ! Av = A(u,v)_[y] * vvel (y components of  A(u,v) * (u,v))
@@ -3117,13 +3117,13 @@ subroutine residual_vec (nx_block,   ny_block, &
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
          bx       , & ! b vector, bx = taux + bxfix (N/m^2) !jfl
          by       , & ! b vector, by = tauy + byfix (N/m^2) !jfl
-         Au       , & ! matvec, Fx = Au - bx (N/m^2) ! jfl
-         Av           ! matvec, Fy = Av - by (N/m^2) ! jfl
+         Au       , & ! matvec, Fx = bx - Au (N/m^2) ! jfl
+         Av           ! matvec, Fy = by - Av (N/m^2) ! jfl
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         Fx      , & ! x residual vector, Fx = Au - bx (N/m^2)
-         Fy          ! y residual vector, Fy = Av - by (N/m^2)
+         Fx      , & ! x residual vector, Fx = bx - Au (N/m^2)
+         Fy          ! y residual vector, Fy = by - Av (N/m^2)
          
       real (kind=dbl_kind), intent(out), optional :: &
          sum_squared ! sum of squared residual vector components
@@ -3145,8 +3145,8 @@ subroutine residual_vec (nx_block,   ny_block, &
          i = indxui(ij)
          j = indxuj(ij)
 
-         Fx(i,j) = Au(i,j) - bx(i,j)
-         Fy(i,j) = Av(i,j) - by(i,j)
+         Fx(i,j) = bx(i,j) - Au(i,j)
+         Fy(i,j) = by(i,j) - Av(i,j)
          if (present(sum_squared)) then
             sum_squared = sum_squared + Fx(i,j)**2 + Fy(i,j)**2
          endif
@@ -3566,8 +3566,8 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         Diagu   , & ! matvec, Fx = Au - bx (N/m^2)
-         Diagv       ! matvec, Fy = Av - by (N/m^2)
+         Diagu   , & ! matvec, Fx = bx - Au (N/m^2)
+         Diagv       ! matvec, Fy = by - Av (N/m^2)
 
       ! local variables
 
@@ -3943,8 +3943,8 @@ subroutine fgmres (zetaD,               &
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
          workspace_x , & ! work vector (x components)
          workspace_y , & ! work vector (y components)
-         Fx          , & ! residual vector (x components), Fx = Au - bx (N/m^2)
-         Fy              ! residual vector (y components), Fy = Av - by (N/m^2)
+         Fx          , & ! residual vector (x components), Fx = bx - Au (N/m^2)
+         Fy              ! residual vector (y components), Fy = by - Av (N/m^2)
 
       real (kind=dbl_kind), dimension (max_blocks) :: &
          norm_squared   ! array to accumulate squared norm of grid function over blocks
@@ -4333,8 +4333,8 @@ subroutine pgmres (zetaD,                &
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
          workspace_x , & ! work vector (x components)
          workspace_y , & ! work vector (y components)
-         Fx          , & ! residual vector (x components), Fx = Au - bx (N/m^2)
-         Fy              ! residual vector (y components), Fy = Av - by (N/m^2)
+         Fx          , & ! residual vector (x components), Fx = bx - Au (N/m^2)
+         Fy              ! residual vector (y components), Fy = by - Av (N/m^2)
 
       real (kind=dbl_kind), dimension (max_blocks) :: &
          norm_squared   ! array to accumulate squared norm of grid function over blocks

From 829f2c4072186e34923895afee0ab96dff87d628 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 13 Nov 2019 11:48:32 -0500
Subject: [PATCH 125/196] ice_dyn_vp: fgmres: comment out early return (WIP)

This is different than the legacy FGMRES and thus needs to be commented
to compare both versions of the algorithm.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index db17e7d51..64bcd71ab 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -4050,9 +4050,9 @@ subroutine fgmres (zetaD,               &
          norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
          
          ! Current guess is a good enough solution
-         if (norm_residual < tolerance) then
-            return
-         end if
+         ! if (norm_residual < tolerance) then
+         !    return
+         ! end if
          
          ! Normalize the first Arnoldi vector
          inverse_norm = c1 / norm_residual

From 3ddc81cd05470fba0035c8781ba12c5bfd2f080b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 13 Nov 2019 12:28:50 -0500
Subject: [PATCH 126/196] ice_dyn_vp: pgmres: synchronize with fgmres (WIP)

Also remove unnecessary return statement at the end of fgmres.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 65 +++++------------------
 1 file changed, 14 insertions(+), 51 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 64bcd71ab..fe829bdcb 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -4266,7 +4266,6 @@ subroutine fgmres (zetaD,               &
          end do
       end do ! end of outer (restarts) loop
       
-      return
       end subroutine fgmres
 
 !=======================================================================
@@ -4310,15 +4309,15 @@ subroutine pgmres (zetaD,                &
                      ! residual is below tolerance
 
       integer (kind=int_kind), intent(in) :: &
-         maxinner    ! Restart the method every maxinner inner iterations
+         maxinner    ! Restart the method every maxinner inner (Arnoldi) iterations
 
       integer (kind=int_kind), intent(in) :: &
-         maxouter    ! Maximum number of outer iterations
-                     ! Iteration will stop after maxinner*maxouter steps
+         maxouter    ! Maximum number of outer (restarts) iterations
+                     ! Iteration will stop after maxinner*maxouter Arnoldi steps
                      ! even if the specified tolerance has not been achieved
 
       integer (kind=int_kind), intent(out) :: &
-         nbiter      ! Total number of iteration performed
+         nbiter      ! Total number of Arnoldi iterations performed
 
       real (kind=dbl_kind), intent(out) :: &
          conv        ! !phb DESCRIBE IF WE KEEP
@@ -4436,9 +4435,9 @@ subroutine pgmres (zetaD,                &
          norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
          
          ! Current guess is a good enough solution
-         if (norm_residual < tolerance) then
-            return
-         end if
+         ! if (norm_residual < tolerance) then
+         !    return
+         ! end if
          
          ! Normalize the first Arnoldi vector
          inverse_norm = c1 / norm_residual
@@ -4505,45 +4504,9 @@ subroutine pgmres (zetaD,                &
             enddo
             !$OMP END PARALLEL DO
             
-            ! Classical Gram-Schmidt orthogonalisation process
-            ! First loop of Gram-Schmidt (compute coefficients)
-            dotprod_local = c0
-            do it=1,initer
-               local_dot = c0
-               
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     local_dot = local_dot + (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
-                                             (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO
-               
-                dotprod_local(it) = local_dot
-            end do
-            
-            hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
-            
-            ! Second loop of Gram-Schmidt (orthonormalize)
-            do it = 1, initer
-               !$OMP PARALLEL DO PRIVATE(iblk)
-               do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
-                     i = indxui(ij, iblk)
-                     j = indxuj(ij, iblk)
-                     
-                     arnoldi_basis_x(i, j, iblk, nextit) = arnoldi_basis_x(i, j, iblk, nextit) &
-                                                           - hessenberg(it,initer) * arnoldi_basis_x(i, j, iblk, it)
-                     arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit) &
-                                                           - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
-                  enddo ! ij
-               enddo
-               !$OMP END PARALLEL DO 
-            end do
+            ! Orthogonalize the new vector
+            call orthogonalize(arnoldi_basis_x, arnoldi_basis_y, &
+                               hessenberg, initer, nextit, maxinner, ortho_type)
             
             ! Compute norm of new Arnoldi vector and update Hessenberg matrix
             !$OMP PARALLEL DO PRIVATE(iblk)
@@ -4584,7 +4547,7 @@ subroutine pgmres (zetaD,                &
                end do
             end if
             
-            ! Compute new Givens rotation
+            ! Compute and apply new Givens rotation
             nu = sqrt(hessenberg(initer,initer)**2 + hessenberg(nextit,initer)**2)
             if (.not. almost_zero(nu)) then
                rot_cos(initer) = hessenberg(initer,initer) / nu
@@ -4652,7 +4615,7 @@ subroutine pgmres (zetaD,                &
          
          ! Increment outer loop counter and check for convergence
          outiter = outiter + 1
-         if (norm_residual <= relative_tolerance .or. outiter > maxouter) then
+         if (norm_residual <= relative_tolerance .or. outiter >= maxouter) then
             return
          end if
 
@@ -4687,8 +4650,8 @@ subroutine pgmres (zetaD,                &
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
 
-                  workspace_x(i, j, iblk) = rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
-                  workspace_y(i, j, iblk) = rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
+                  workspace_x(i, j, iblk) = workspace_x(i, j, iblk) + rhs_hess(it) * arnoldi_basis_x(i, j, iblk, it)
+                  workspace_y(i, j, iblk) = workspace_x(i, j, iblk) + rhs_hess(it) * arnoldi_basis_y(i, j, iblk, it)
                enddo ! ij
             enddo
             !$OMP END PARALLEL DO

From fc4fa1ce135147c909b3b2bc1ca0985c949e0198 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 26 Nov 2019 10:47:47 -0500
Subject: [PATCH 127/196] ice_dyn_vp: picard_solver: re-enable legacy solver

Rename the subroutines fgmres and pgmres in standalone files
with a '_legacy' suffix.
---
 .../{fgmresD.F90.unused => fgmresD.F90}       |   2 +-
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     | 624 +++++++++---------
 .../{pgmres.F90.unused => pgmres.F90}         |   2 +-
 3 files changed, 314 insertions(+), 314 deletions(-)
 rename cicecore/cicedynB/dynamics/{fgmresD.F90.unused => fgmresD.F90} (99%)
 rename cicecore/cicedynB/dynamics/{pgmres.F90.unused => pgmres.F90} (99%)

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90.unused b/cicecore/cicedynB/dynamics/fgmresD.F90
similarity index 99%
rename from cicecore/cicedynB/dynamics/fgmresD.F90.unused
rename to cicecore/cicedynB/dynamics/fgmresD.F90
index 5a93e688d..d65e8cba0 100644
--- a/cicecore/cicedynB/dynamics/fgmresD.F90.unused
+++ b/cicecore/cicedynB/dynamics/fgmresD.F90
@@ -1,4 +1,4 @@
-      subroutine fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2, &
+      subroutine fgmres_legacy (n,im,rhs,sol,i,vv,w,wk1, wk2, &
                   gamma,maxits,iout,icode,its,ro) 
 
       use ice_fileunits, only: nu_diag
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index fe829bdcb..3f2a4b3ab 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -733,319 +733,319 @@ subroutine picard_solver (icellt,   icellu,  &
 
       character(len=*), parameter :: subname = '(picard_solver)'
 
-      ! ! Allocate space for FGMRES work arrays
-      ! allocate(wk11(ntot), wk22(ntot))
-      ! allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
-      ! 
-      ! ! Start iterations
-      ! do kOL = 1,maxits_nonlin        ! outer loop 
-      ! 
-      ! !-----------------------------------------------------------------
-      ! ! Calc zetaD, Pr, Cb and vrel = f(uprev_k, vprev_k)
-      ! !-----------------------------------------------------------------
-      ! 
-      ! !$OMP PARALLEL DO PRIVATE(iblk)
-      !    do iblk = 1, nblocks
-      ! 
-      !       uprev_k(:,:,iblk) = uvel(:,:,iblk)
-      !       vprev_k(:,:,iblk) = vvel(:,:,iblk)
-      ! 
-      !       call calc_zeta_Pr (nx_block           , ny_block,           &
-      !                          icellt(iblk),                            & 
-      !                          indxti   (:,iblk)  , indxtj(:,iblk),     & 
-      !                          uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-      !                          dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
-      !                          dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
-      !                          cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
-      !                          cxm      (:,:,iblk), cym   (:,:,iblk),   & 
-      !                          tinyarea (:,:,iblk),                     & 
-      !                          strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
-      !                          stPrtmp  (:,:,:) )                      
-      ! 
-      !       call calc_vrel_Cb (nx_block           , ny_block,           &
-      !                          icellu       (iblk), Cdn_ocn (:,:,iblk), & 
-      !                          indxui     (:,iblk), indxuj    (:,iblk), &
-      !                          aiu      (:,:,iblk), Tbu     (:,:,iblk), &
-      !                          uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-      !                          uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-      !                          vrel     (:,:,iblk), Cb      (:,:,iblk))
-      ! 
-      ! !     prepare b vector (RHS)                                                
-      !       call calc_bvec (nx_block           , ny_block,           &
-      !                       icellu       (iblk),                     & 
-      !                       indxui     (:,iblk), indxuj    (:,iblk), &
-      !                       stPrtmp  (:,:,:)   , Cdn_ocn (:,:,iblk), &
-      !                       aiu      (:,:,iblk), uarear  (:,:,iblk), & 
-      !                       uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-      !                       waterx   (:,:,iblk), watery  (:,:,iblk), & 
-      !                       uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-      !                       bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-      !                       bx       (:,:,iblk), by      (:,:,iblk), &
-      !                       vrel     (:,:,iblk))
-      ! 
-      ! !     prepare precond matrix
-      !      if (precond .gt. 1) then
-      ! 
-      !      call formDiag_step1  (nx_block           , ny_block,       & ! D term due to rheology
-      !                            icellu       (iblk),                 &
-      !                            indxui     (:,iblk), indxuj(:,iblk), &
-      !                            dxt      (:,:,iblk), dyt (:,:,iblk), & 
-      !                            dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
-      !                            cxp      (:,:,iblk), cyp (:,:,iblk), & 
-      !                            cxm      (:,:,iblk), cym (:,:,iblk), & 
-      !                            zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
-      ! 
-      !      call formDiag_step2 (nx_block           , ny_block,           &
-      !                           icellu       (iblk),                     & 
-      !                           indxui     (:,iblk), indxuj    (:,iblk), &
-      !                           Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
-      !                           umassdti (:,:,iblk),                     & 
-      !                           uarear   (:,:,iblk), Cb      (:,:,iblk), & 
-      !                           Diagu    (:,:,iblk), Diagv   (:,:,iblk))         
-      ! 
-      !     endif                     
-      ! 
-      !    enddo
-      !    !$OMP END PARALLEL DO                            
-      ! 
-      !    ! Compute nonlinear residual norm
-      !    !$OMP PARALLEL DO PRIVATE(iblk)
-      !    do iblk = 1, nblocks
-      !       call matvec (nx_block             , ny_block,            &
-      !                    icellu   (iblk)      , icellt   (iblk)    , & 
-      !                    indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-      !                    indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-      !                    dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-      !                    dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-      !                    cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-      !                    cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
-      !                    uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
-      !                    vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-      !                    zetaD    (:,:,iblk,:),                      &
-      !                    umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-      !                    uarear   (:,:,iblk)  ,                      & 
-      !                    Au       (:,:,iblk)  , Av       (:,:,iblk))
-      !       call residual_vec (nx_block           , ny_block,           &
-      !                          icellu       (iblk),                     & 
-      !                          indxui     (:,iblk), indxuj    (:,iblk), &
-      !                          bx       (:,:,iblk), by      (:,:,iblk), &
-      !                          Au       (:,:,iblk), Av      (:,:,iblk), &
-      !                          Fx       (:,:,iblk), Fy      (:,:,iblk), &
-      !                          L2norm(iblk))
-      !    enddo
-      !    !$OMP END PARALLEL DO
-      !    nlres_norm = sqrt(sum(L2norm))
-      !    if (monitor_nonlin) then
-      !       write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-      !                                         " nonlin_res_L2norm= ", nlres_norm
-      !    endif
-      !    ! Compute relative tolerance at first iteration
-      !    if (kOL == 1) then
-      !       tol = gammaNL*nlres_norm
-      !    endif
-      !    ! Check for nonlinear convergence
-      !    if (nlres_norm < tol) then
-      !       exit
-      !    endif
-      ! 
-      ! !-----------------------------------------------------------------------
-      ! !     prep F G M R E S 
-      ! !-----------------------------------------------------------------------                             
-      ! 
-      ! icode  = 0
-      ! !      its    = 0 
-      ! 
-      !    ! form b vector from matrices (nblocks matrices)      
-      !    call arrays_to_vec (nx_block, ny_block, nblocks,    &
-      !                        max_blocks, icellu (:), ntot,   & 
-      !                        indxui      (:,:), indxuj(:,:), &
-      !                        bx        (:,:,:), by  (:,:,:), &
-      !                        bvec(:))
-      !    ! form sol vector for fgmres (sol is iniguess at the beginning)        
-      !    call arrays_to_vec (nx_block, ny_block, nblocks,      &
-      !                        max_blocks, icellu (:), ntot,   &  
-      !                        indxui    (:,:), indxuj(:,:),     &
-      !                        uprev_k (:,:,:), vprev_k (:,:,:), &
-      !                        sol(:))
-      ! 
-      !    ! form matrix diagonal as a vector from Diagu and Diagv arrays      
-      !    call arrays_to_vec (nx_block, ny_block, nblocks,    &
-      !                        max_blocks, icellu (:), ntot,   & 
-      !                        indxui      (:,:), indxuj(:,:), &
-      !                        Diagu     (:,:,:), Diagv(:,:,:),&
-      !                        diagvec(:))                             
-      ! 
-      ! !-----------------------------------------------------------------------
-      ! !     F G M R E S   L O O P
-      ! !-----------------------------------------------------------------------
-      ! 1    continue
-      ! !-----------------------------------------------------------------------
-      ! 
-      ! call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-      !              gamma, maxits_fgmres, monitor_fgmres,   &
-      !              icode,fgmres_its, res_norm)
-      ! 
-      ! if (icode == 1) then
-      ! 
-      !    if (precond .eq. 1) then
-      ! 
-      !      wk22(:)=wk11(:) ! precond=identity
-      ! 
-      !    elseif (precond .eq. 2) then ! use diagonal of A for precond step
-      ! 
-      !      call precond_diag (ntot,            & 
-      !                         diagvec (:),     &
-      !                         wk11 (:), wk22 (:) )
-      ! 
-      !    elseif (precond .eq. 3) then
-      ! 
-      !     call pgmres (nx_block,    ny_block,    nblocks       , &
-      !                  max_blocks         , icellu   (:)       , & 
-      !                  indxui   (:,:)     , indxuj   (:,:)     , &
-      !                  icellt   (:)                            , & 
-      !                  indxti   (:,:)     , indxtj   (:,:)     , &
-      !                  dxt      (:,:,:)   , dyt      (:,:,:)   , & 
-      !                  dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
-      !                  cxp      (:,:,:)   , cyp      (:,:,:)   , & 
-      !                  cxm      (:,:,:)   , cym      (:,:,:)   , & 
-      !                  vrel     (:,:,:)   , Cb       (:,:,:)   , &  
-      !                  zetaD    (:,:,:,:) ,                      &
-      !                  umassdti (:,:,:)   , fm       (:,:,:)   , & 
-      !                  uarear   (:,:,:)   , diagvec(:)         , &
-      !                  wk22     (:)       , wk11(:)            , &
-      !                  ntot               , im_pgmres          , &
-      !                  epsprecond         , maxits_pgmres      , &
-      !                  monitor_pgmres     , ierr )         
-      !    endif ! precond
-      ! 
-      !    goto 1
-      ! 
-      ! elseif (icode >= 2) then
-      ! 
-      !    call vec_to_arrays (nx_block, ny_block, nblocks,      &
-      !                        max_blocks, icellu (:), ntot,     & 
-      !                        indxui    (:,:), indxuj(:,:),     &
-      !                        wk11 (:),                         &
-      !                        uvel (:,:,:), vvel (:,:,:))    
-      ! 
-      !    ! JFL halo update could be in subroutine...                    
-      !    !$OMP PARALLEL DO PRIVATE(iblk) 
-      !    do iblk = 1, nblocks                             
-      !       fld2(:,:,1,iblk) = uvel(:,:,iblk)
-      !       fld2(:,:,2,iblk) = vvel(:,:,iblk)            
-      !    enddo
-      !    !$OMP END PARALLEL DO                           
-      ! 
-      !    call ice_HaloUpdate (fld2,               halo_info, & 
-      !                         field_loc_NEcorner, field_type_vector)
-      ! 
-      !    !$OMP PARALLEL DO PRIVATE(iblk)
-      !    do iblk = 1, nblocks
-      !       uvel(:,:,iblk) = fld2(:,:,1,iblk)
-      !       vvel(:,:,iblk) = fld2(:,:,2,iblk)
-      !    enddo
-      !    !$OMP END PARALLEL DO                             
-      ! 
-      !    !$OMP PARALLEL DO PRIVATE(iblk)
-      !    do iblk = 1, nblocks                                  
-      ! 
-      !     call matvec (nx_block             , ny_block,            &
-      !                  icellu   (iblk)      , icellt   (iblk)    , & 
-      !                  indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-      !                  indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-      !                  dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-      !                  dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-      !                  cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-      !                  cxm      (:,:,iblk)  , cym      (:,:,iblk), &
-      !                  uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
-      !                  vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-      !                  zetaD    (:,:,iblk,:),                      &
-      !                  umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-      !                  uarear   (:,:,iblk)  ,                      & 
-      !                  Au       (:,:,iblk)  , Av       (:,:,iblk))                         
-      ! 
-      !    enddo
-      !    !$OMP END PARALLEL DO 
-      ! 
-      !    ! form wk2 from Au and Av arrays        
-      !    call arrays_to_vec (nx_block, ny_block, nblocks,      &
-      !                        max_blocks, icellu (:), ntot,     & 
-      !                        indxui    (:,:), indxuj(:,:),     &
-      !                        Au      (:,:,:), Av    (:,:,:),   &
-      !                        wk22(:))    
-      ! 
-      !       goto 1
-      ! 
-      ! endif ! icode
-      ! 
-      ! !-----------------------------------------------------------------------
-      ! !     Put vector sol in uvel and vvel arrays
-      ! !-----------------------------------------------------------------------
-      ! 
-      !    call vec_to_arrays (nx_block, ny_block, nblocks,      &
-      !                        max_blocks, icellu (:), ntot,     & 
-      !                        indxui    (:,:), indxuj(:,:),     &
-      !                        sol (:),                          &
-      !                        uvel (:,:,:), vvel (:,:,:))    
-      ! 
-      !    !$OMP PARALLEL DO PRIVATE(iblk)
-      ! !         do iblk = 1, nblocks
-      ! !              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
-      ! !              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
-      ! !         enddo
-      !    !$OMP END PARALLEL DO  
-      ! 
+      ! Allocate space for FGMRES work arrays
+      allocate(wk11(ntot), wk22(ntot))
+      allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
       
-      !    ! phb NOT SURE IF THIS HALO UPDATE IS NEEDED
-      !    !$OMP PARALLEL DO PRIVATE(iblk)
-      !    do iblk = 1, nblocks                             
-      ! 
-      !       ! load velocity into array for boundary updates
-      !       fld2(:,:,1,iblk) = uvel(:,:,iblk)
-      !       fld2(:,:,2,iblk) = vvel(:,:,iblk)            
-      ! 
-      !    enddo
-      !    !$OMP END PARALLEL DO                           
-      ! 
-      !    call ice_timer_start(timer_bound)
-      !    if (maskhalo_dyn) then
-      !       call ice_HaloUpdate (fld2,               halo_info_mask, &
-      !                            field_loc_NEcorner, field_type_vector)
-      !    else
-      !       call ice_HaloUpdate (fld2,               halo_info, &
-      !                            field_loc_NEcorner, field_type_vector)
-      !    endif
-      !    call ice_timer_stop(timer_bound)
-      ! 
-      !    ! unload
-      !    !$OMP PARALLEL DO PRIVATE(iblk)
-      !    do iblk = 1, nblocks
-      !       uvel(:,:,iblk) = fld2(:,:,1,iblk)
-      !       vvel(:,:,iblk) = fld2(:,:,2,iblk)
-      !    enddo
-      !    !$OMP END PARALLEL DO
-      ! 
-      !    ! Compute fixed point residual norm
-      !    !$OMP PARALLEL DO PRIVATE(iblk)
-      !    do iblk = 1, nblocks
-      !       fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
-      !       fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
-      !       call calc_L2norm_squared (nx_block        , ny_block,         &
-      !                                 icellu    (iblk),                   & 
-      !                                 indxui  (:,iblk), indxuj  (:,iblk), &
-      !                                 fpresx(:,:,iblk), fpresy(:,:,iblk), &
-      !                                 L2norm    (iblk))
-      !    enddo
-      !    !$OMP END PARALLEL DO
-      !    if (monitor_nonlin) then
-      !       write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-      !                                         " fixed_point_res_L2norm= ", sqrt(sum(L2norm))
-      !    endif
-      ! 
-      ! enddo                     ! outer loop
-      ! 
-      ! ! deallocate FGMRES work arrays
-      ! deallocate(wk11, wk22, vv, ww)
+      ! Start iterations
+      do kOL = 1,maxits_nonlin        ! outer loop 
+      
+      !-----------------------------------------------------------------
+      ! Calc zetaD, Pr, Cb and vrel = f(uprev_k, vprev_k)
+      !-----------------------------------------------------------------
+      
+      !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+      
+            uprev_k(:,:,iblk) = uvel(:,:,iblk)
+            vprev_k(:,:,iblk) = vvel(:,:,iblk)
+      
+            call calc_zeta_Pr (nx_block           , ny_block,           &
+                               icellt(iblk),                            & 
+                               indxti   (:,iblk)  , indxtj(:,iblk),     & 
+                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                               dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
+                               dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
+                               cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
+                               cxm      (:,:,iblk), cym   (:,:,iblk),   & 
+                               tinyarea (:,:,iblk),                     & 
+                               strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
+                               stPrtmp  (:,:,:) )                      
+      
+            call calc_vrel_Cb (nx_block           , ny_block,           &
+                               icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                               indxui     (:,iblk), indxuj    (:,iblk), &
+                               aiu      (:,:,iblk), Tbu     (:,:,iblk), &
+                               uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                               vrel     (:,:,iblk), Cb      (:,:,iblk))
+      
+      !     prepare b vector (RHS)                                                
+            call calc_bvec (nx_block           , ny_block,           &
+                            icellu       (iblk),                     & 
+                            indxui     (:,iblk), indxuj    (:,iblk), &
+                            stPrtmp  (:,:,:)   , Cdn_ocn (:,:,iblk), &
+                            aiu      (:,:,iblk), uarear  (:,:,iblk), & 
+                            uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                            waterx   (:,:,iblk), watery  (:,:,iblk), & 
+                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
+                            bxfix    (:,:,iblk), byfix   (:,:,iblk), &
+                            bx       (:,:,iblk), by      (:,:,iblk), &
+                            vrel     (:,:,iblk))
+      
+      !     prepare precond matrix
+           if (precond .gt. 1) then
+      
+           call formDiag_step1  (nx_block           , ny_block,       & ! D term due to rheology
+                                 icellu       (iblk),                 &
+                                 indxui     (:,iblk), indxuj(:,iblk), &
+                                 dxt      (:,:,iblk), dyt (:,:,iblk), & 
+                                 dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
+                                 cxp      (:,:,iblk), cyp (:,:,iblk), & 
+                                 cxm      (:,:,iblk), cym (:,:,iblk), & 
+                                 zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
+      
+           call formDiag_step2 (nx_block           , ny_block,           &
+                                icellu       (iblk),                     & 
+                                indxui     (:,iblk), indxuj    (:,iblk), &
+                                Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
+                                umassdti (:,:,iblk),                     & 
+                                uarear   (:,:,iblk), Cb      (:,:,iblk), & 
+                                Diagu    (:,:,iblk), Diagv   (:,:,iblk))         
+      
+          endif                     
+      
+         enddo
+         !$OMP END PARALLEL DO                            
+      
+         ! Compute nonlinear residual norm
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            call matvec (nx_block             , ny_block,            &
+                         icellu   (iblk)      , icellt   (iblk)    , & 
+                         indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                         indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                         dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+                         dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+                         cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+                         cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
+                         uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
+                         vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+                         zetaD    (:,:,iblk,:),                      &
+                         umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+                         uarear   (:,:,iblk)  ,                      & 
+                         Au       (:,:,iblk)  , Av       (:,:,iblk))
+            call residual_vec (nx_block           , ny_block,           &
+                               icellu       (iblk),                     & 
+                               indxui     (:,iblk), indxuj    (:,iblk), &
+                               bx       (:,:,iblk), by      (:,:,iblk), &
+                               Au       (:,:,iblk), Av      (:,:,iblk), &
+                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
+                               L2norm(iblk))
+         enddo
+         !$OMP END PARALLEL DO
+         nlres_norm = sqrt(sum(L2norm))
+         if (monitor_nonlin) then
+            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
+                                              " nonlin_res_L2norm= ", nlres_norm
+         endif
+         ! Compute relative tolerance at first iteration
+         if (kOL == 1) then
+            tol = gammaNL*nlres_norm
+         endif
+         ! Check for nonlinear convergence
+         if (nlres_norm < tol) then
+            exit
+         endif
+      
+      !-----------------------------------------------------------------------
+      !     prep F G M R E S 
+      !-----------------------------------------------------------------------                             
+      
+      icode  = 0
+      !      its    = 0 
+      
+         ! form b vector from matrices (nblocks matrices)      
+         call arrays_to_vec (nx_block, ny_block, nblocks,    &
+                             max_blocks, icellu (:), ntot,   & 
+                             indxui      (:,:), indxuj(:,:), &
+                             bx        (:,:,:), by  (:,:,:), &
+                             bvec(:))
+         ! form sol vector for fgmres (sol is iniguess at the beginning)        
+         call arrays_to_vec (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,   &  
+                             indxui    (:,:), indxuj(:,:),     &
+                             uprev_k (:,:,:), vprev_k (:,:,:), &
+                             sol(:))
+      
+         ! form matrix diagonal as a vector from Diagu and Diagv arrays      
+         call arrays_to_vec (nx_block, ny_block, nblocks,    &
+                             max_blocks, icellu (:), ntot,   & 
+                             indxui      (:,:), indxuj(:,:), &
+                             Diagu     (:,:,:), Diagv(:,:,:),&
+                             diagvec(:))                             
+      
+      !-----------------------------------------------------------------------
+      !     F G M R E S   L O O P
+      !-----------------------------------------------------------------------
+      1    continue
+      !-----------------------------------------------------------------------
+      
+      call fgmres_legacy (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
+                   gamma, maxits_fgmres, monitor_fgmres,   &
+                   icode,fgmres_its, res_norm)
+      
+      if (icode == 1) then
+      
+         if (precond .eq. 1) then
+      
+           wk22(:)=wk11(:) ! precond=identity
+      
+         elseif (precond .eq. 2) then ! use diagonal of A for precond step
+      
+           call precond_diag (ntot,            & 
+                              diagvec (:),     &
+                              wk11 (:), wk22 (:) )
+      
+         elseif (precond .eq. 3) then
+      
+          call pgmres_legacy (nx_block,    ny_block,    nblocks       , &
+                       max_blocks         , icellu   (:)       , & 
+                       indxui   (:,:)     , indxuj   (:,:)     , &
+                       icellt   (:)                            , & 
+                       indxti   (:,:)     , indxtj   (:,:)     , &
+                       dxt      (:,:,:)   , dyt      (:,:,:)   , & 
+                       dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
+                       cxp      (:,:,:)   , cyp      (:,:,:)   , & 
+                       cxm      (:,:,:)   , cym      (:,:,:)   , & 
+                       vrel     (:,:,:)   , Cb       (:,:,:)   , &  
+                       zetaD    (:,:,:,:) ,                      &
+                       umassdti (:,:,:)   , fm       (:,:,:)   , & 
+                       uarear   (:,:,:)   , diagvec(:)         , &
+                       wk22     (:)       , wk11(:)            , &
+                       ntot               , im_pgmres          , &
+                       epsprecond         , maxits_pgmres      , &
+                       monitor_pgmres     , ierr )         
+         endif ! precond
+      
+         goto 1
+      
+      elseif (icode >= 2) then
+      
+         call vec_to_arrays (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             wk11 (:),                         &
+                             uvel (:,:,:), vvel (:,:,:))    
+      
+         ! JFL halo update could be in subroutine...                    
+         !$OMP PARALLEL DO PRIVATE(iblk) 
+         do iblk = 1, nblocks                             
+            fld2(:,:,1,iblk) = uvel(:,:,iblk)
+            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
+         enddo
+         !$OMP END PARALLEL DO                           
+      
+         call ice_HaloUpdate (fld2,               halo_info, & 
+                              field_loc_NEcorner, field_type_vector)
+      
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            uvel(:,:,iblk) = fld2(:,:,1,iblk)
+            vvel(:,:,iblk) = fld2(:,:,2,iblk)
+         enddo
+         !$OMP END PARALLEL DO                             
+      
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks                                  
+      
+          call matvec (nx_block             , ny_block,            &
+                       icellu   (iblk)      , icellt   (iblk)    , & 
+                       indxui   (:,iblk)    , indxuj   (:,iblk)  , &
+                       indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+                       dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
+                       dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
+                       cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
+                       cxm      (:,:,iblk)  , cym      (:,:,iblk), &
+                       uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
+                       vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
+                       zetaD    (:,:,iblk,:),                      &
+                       umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
+                       uarear   (:,:,iblk)  ,                      & 
+                       Au       (:,:,iblk)  , Av       (:,:,iblk))                         
+      
+         enddo
+         !$OMP END PARALLEL DO 
+      
+         ! form wk2 from Au and Av arrays        
+         call arrays_to_vec (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             Au      (:,:,:), Av    (:,:,:),   &
+                             wk22(:))    
+      
+            goto 1
+      
+      endif ! icode
+      
+      !-----------------------------------------------------------------------
+      !     Put vector sol in uvel and vvel arrays
+      !-----------------------------------------------------------------------
+      
+         call vec_to_arrays (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     & 
+                             indxui    (:,:), indxuj(:,:),     &
+                             sol (:),                          &
+                             uvel (:,:,:), vvel (:,:,:))    
+      
+         !$OMP PARALLEL DO PRIVATE(iblk)
+      !         do iblk = 1, nblocks
+      !              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
+      !              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
+      !         enddo
+         !$OMP END PARALLEL DO  
+      
+      
+         ! phb NOT SURE IF THIS HALO UPDATE IS NEEDED
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks                             
+      
+            ! load velocity into array for boundary updates
+            fld2(:,:,1,iblk) = uvel(:,:,iblk)
+            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
+      
+         enddo
+         !$OMP END PARALLEL DO                           
+      
+         call ice_timer_start(timer_bound)
+         if (maskhalo_dyn) then
+            call ice_HaloUpdate (fld2,               halo_info_mask, &
+                                 field_loc_NEcorner, field_type_vector)
+         else
+            call ice_HaloUpdate (fld2,               halo_info, &
+                                 field_loc_NEcorner, field_type_vector)
+         endif
+         call ice_timer_stop(timer_bound)
+      
+         ! unload
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            uvel(:,:,iblk) = fld2(:,:,1,iblk)
+            vvel(:,:,iblk) = fld2(:,:,2,iblk)
+         enddo
+         !$OMP END PARALLEL DO
+      
+         ! Compute fixed point residual norm
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
+            fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
+            call calc_L2norm_squared (nx_block        , ny_block,         &
+                                      icellu    (iblk),                   & 
+                                      indxui  (:,iblk), indxuj  (:,iblk), &
+                                      fpresx(:,:,iblk), fpresy(:,:,iblk), &
+                                      L2norm    (iblk))
+         enddo
+         !$OMP END PARALLEL DO
+         if (monitor_nonlin) then
+            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
+                                              " fixed_point_res_L2norm= ", sqrt(sum(L2norm))
+         endif
+      
+      enddo                     ! outer loop
+      
+      ! deallocate FGMRES work arrays
+      deallocate(wk11, wk22, vv, ww)
 
       end subroutine picard_solver
 
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90.unused b/cicecore/cicedynB/dynamics/pgmres.F90
similarity index 99%
rename from cicecore/cicedynB/dynamics/pgmres.F90.unused
rename to cicecore/cicedynB/dynamics/pgmres.F90
index 04b277e15..5799f209d 100644
--- a/cicecore/cicedynB/dynamics/pgmres.F90.unused
+++ b/cicecore/cicedynB/dynamics/pgmres.F90
@@ -2,7 +2,7 @@
 !**s/r pgmres -  preconditionner for GEM_H : PGmres
 !
 
-       subroutine pgmres(nx_block, ny_block, nblocks, &
+       subroutine pgmres_legacy(nx_block, ny_block, nblocks, &
                          max_blocks, icellu,          &
                          indxui,     indxuj,          &
                          icellt,                      &

From 71b463c506da5bf35ddcd79b8655233150482374 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 11 Feb 2020 16:50:29 -0500
Subject: [PATCH 128/196] cicecore: remove remaining 'SVN:$Id:' lines

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 1 -
 cicecore/shared/ice_fileunits.F90         | 1 -
 2 files changed, 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 3f2a4b3ab..edf28a390 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1,4 +1,3 @@
-!  SVN:$Id: ice_dyn_evp.F90 1228 2017-05-23 21:33:34Z tcraig $
 !=======================================================================
 !
 ! Elastic-viscous-plastic sea ice dynamics model
diff --git a/cicecore/shared/ice_fileunits.F90 b/cicecore/shared/ice_fileunits.F90
index 4c91fdb2a..b6b30d47a 100644
--- a/cicecore/shared/ice_fileunits.F90
+++ b/cicecore/shared/ice_fileunits.F90
@@ -1,4 +1,3 @@
-!  SVN:$Id: ice_fileunits.F90 1228 2017-05-23 21:33:34Z tcraig $
 !=======================================================================
 !
 !  This module contains an I/O unit manager for tracking, assigning

From 9877fd7ba62773a35c27addc35d91b43c2478fa9 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 12 Feb 2020 15:58:09 -0500
Subject: [PATCH 129/196] ice_dyn_vp: remove 'picard_solver' subroutine

The 'picard_solver' subroutine has been abandoned for quite a while
and the Picard algorithm can be used by using the Anderson solver
with `im_andacc = 0`, i.e. not saving any residuals.

Remove the subroutine and move the logic around the namelist variable
'algo_nonlin' from 'imp_solver' to 'anderson_solver'.

This way, the new developments made in 'anderson_solver' (using the FGMRES
solver from GEM, choosing the orthogonalization method, the addition of the
precondition subroutine) can also be used with `algo_nonlin = 1`.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 468 +---------------------
 1 file changed, 18 insertions(+), 450 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index edf28a390..520f1469d 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -475,29 +475,16 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       ! Start of nonlinear iteration
       !-----------------------------------------------------------------
-      if (algo_nonlin == 1) then
-         call picard_solver (icellt,   icellu,  &
-                             indxti,   indxtj,  &
-                             indxui,   indxuj,  &
-                             aiu,      ntot,    &
-                             waterx,   watery,  & 
-                             bxfix,    byfix,   &
-                             umassdti, bvec,    & 
-                             sol,      diagvec, &
-                             fpresx,   fpresy,  &
-                             halo_info_mask)
-      elseif (algo_nonlin == 2) then
-         call anderson_solver (icellt,   icellu,  &
-                               indxti,   indxtj,  &
-                               indxui,   indxuj,  &
-                               aiu,      ntot,    &
-                               waterx,   watery,  & 
-                               bxfix,    byfix,   &
-                               umassdti, bvec,    & 
-                               sol,      diagvec, &
-                               fpresx,   fpresy,  &
-                               halo_info_mask)
-      endif
+      call anderson_solver (icellt,   icellu,  &
+                            indxti,   indxtj,  &
+                            indxui,   indxuj,  &
+                            aiu,      ntot,    &
+                            waterx,   watery,  & 
+                            bxfix,    byfix,   &
+                            umassdti, bvec,    & 
+                            sol,      diagvec, &
+                            fpresx,   fpresy,  &
+                            halo_info_mask)
       !-----------------------------------------------------------------
       ! End of nonlinear iteration
       !-----------------------------------------------------------------
@@ -625,436 +612,11 @@ end subroutine imp_solver
 
 !=======================================================================
 
-! Solve nonlinear equation using Picard iterative solver
-!
-! author: JF Lemieux, A. Qaddouri and F. Dupont ECCC
-
-      subroutine picard_solver (icellt,   icellu,  &
-                                indxti,   indxtj,  &
-                                indxui,   indxuj,  &
-                                aiu,      ntot,    &
-                                waterx,   watery,  & 
-                                bxfix,    byfix,   &
-                                umassdti, bvec,    & 
-                                sol,      diagvec, &
-                                fpresx,   fpresy,  &
-                                halo_info_mask)
-
-      use ice_arrays_column, only: Cdn_ocn
-      use ice_blocks, only: nx_block, ny_block
-      use ice_boundary, only: ice_HaloUpdate
-      use ice_domain, only: halo_info, maskhalo_dyn
-      use ice_domain_size, only: max_blocks
-      use ice_flux, only: uocn, vocn, fm, Tbu
-      use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
-          tarear, uarear, tinyarea
-      use ice_state, only: uvel, vvel, strength
-      use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
-
-      integer (kind=int_kind), intent(in) :: & 
-         ntot         ! size of problem for fgmres (for given cpu)
-
-      integer (kind=int_kind), dimension(max_blocks), intent(in) :: & 
-         icellt   , & ! no. of cells where icetmask = 1
-         icellu       ! no. of cells where iceumask = 1
-
-      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj   , & ! compressed index in j-direction
-         indxui   , & ! compressed index in i-direction
-         indxuj       ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
-         aiu      , & ! ice fraction on u-grid
-         waterx   , & ! for ocean stress calculation, x (m/s)
-         watery   , & ! for ocean stress calculation, y (m/s)
-         bxfix    , & ! part of bx that is constant during Picard
-         byfix    , & ! part of by that is constant during Picard
-         umassdti     ! mass of U-cell/dte (kg/m^2 s)
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
-         fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
-         fpresy       ! y fixed point residual vector, fy = vvel - vprev_k
-
-      real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
-         bvec     , & ! RHS vector for FGMRES
-         sol      , & ! solution vector for FGMRES
-         diagvec      ! diagonal of matrix A for preconditioners
-
-      type (ice_halo) :: &
-         halo_info_mask !  ghost cell update info for masked halo
-
-      ! local variables
-
-      integer (kind=int_kind) :: & 
-         kOL            , & ! outer loop iteration
-         iblk           , & ! block index
-         icode          , & ! code for fgmres solver
-         its            , & ! iteration nb for fgmres
-         fgmres_its     , & ! final nb of fgmres iterations
-         ierr               ! code for pgmres preconditioner !phb: needed?
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
-         uprev_k  , & ! uvel at previous Picard iteration
-         vprev_k  , & ! vvel at previous Picard iteration
-         vrel     , & ! coeff for tauw 
-         Cb       , & ! seabed stress coeff
-         bx       , & ! b vector
-         by       , & ! b vector
-         Diagu    , & ! Diagonal (u component) of the matrix A
-         Diagv    , & ! Diagonal (v component) of the matrix A
-         Au       , & ! matvec, Fx = bx - Au
-         Av       , & ! matvec, Fy = by - Av
-         Fx       , & ! x residual vector, Fx = bx - Au
-         Fy           ! y residual vector, Fy = by - Av
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
-         zetaD      ! zetaD = 2zeta (viscous coeff)
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
-         stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
-         Dstrtmp
-
-      real (kind=dbl_kind), dimension (max_blocks) :: &
-         L2norm       ! to compute l^2 norm of grid function
-
-      real (kind=dbl_kind), allocatable :: & 
-         vv(:,:), ww(:,:)  ! work arrays for FGMRES
-
-      real (kind=dbl_kind), allocatable :: &
-         wk11(:), wk22(:)  ! work vectors for FGMRES
-
-      real (kind=dbl_kind) :: & 
-         conv     , & ! ratio of current residual and initial residual for FGMRES !phb: needed for fgmres2
-         tol      , & ! tolerance for nonlinear convergence: gammaNL * initial residual norm
-         nlres_norm  , & ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
-         res_norm     ! residual norm for FGMRES
-
-      character(len=*), parameter :: subname = '(picard_solver)'
-
-      ! Allocate space for FGMRES work arrays
-      allocate(wk11(ntot), wk22(ntot))
-      allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
-      
-      ! Start iterations
-      do kOL = 1,maxits_nonlin        ! outer loop 
-      
-      !-----------------------------------------------------------------
-      ! Calc zetaD, Pr, Cb and vrel = f(uprev_k, vprev_k)
-      !-----------------------------------------------------------------
-      
-      !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-      
-            uprev_k(:,:,iblk) = uvel(:,:,iblk)
-            vprev_k(:,:,iblk) = vvel(:,:,iblk)
-      
-            call calc_zeta_Pr (nx_block           , ny_block,           &
-                               icellt(iblk),                            & 
-                               indxti   (:,iblk)  , indxtj(:,iblk),     & 
-                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-                               dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
-                               dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
-                               cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
-                               cxm      (:,:,iblk), cym   (:,:,iblk),   & 
-                               tinyarea (:,:,iblk),                     & 
-                               strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
-                               stPrtmp  (:,:,:) )                      
-      
-            call calc_vrel_Cb (nx_block           , ny_block,           &
-                               icellu       (iblk), Cdn_ocn (:,:,iblk), & 
-                               indxui     (:,iblk), indxuj    (:,iblk), &
-                               aiu      (:,:,iblk), Tbu     (:,:,iblk), &
-                               uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-                               vrel     (:,:,iblk), Cb      (:,:,iblk))
-      
-      !     prepare b vector (RHS)                                                
-            call calc_bvec (nx_block           , ny_block,           &
-                            icellu       (iblk),                     & 
-                            indxui     (:,iblk), indxuj    (:,iblk), &
-                            stPrtmp  (:,:,:)   , Cdn_ocn (:,:,iblk), &
-                            aiu      (:,:,iblk), uarear  (:,:,iblk), & 
-                            uocn     (:,:,iblk), vocn    (:,:,iblk), &     
-                            waterx   (:,:,iblk), watery  (:,:,iblk), & 
-                            uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-                            bxfix    (:,:,iblk), byfix   (:,:,iblk), &
-                            bx       (:,:,iblk), by      (:,:,iblk), &
-                            vrel     (:,:,iblk))
-      
-      !     prepare precond matrix
-           if (precond .gt. 1) then
-      
-           call formDiag_step1  (nx_block           , ny_block,       & ! D term due to rheology
-                                 icellu       (iblk),                 &
-                                 indxui     (:,iblk), indxuj(:,iblk), &
-                                 dxt      (:,:,iblk), dyt (:,:,iblk), & 
-                                 dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
-                                 cxp      (:,:,iblk), cyp (:,:,iblk), & 
-                                 cxm      (:,:,iblk), cym (:,:,iblk), & 
-                                 zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
-      
-           call formDiag_step2 (nx_block           , ny_block,           &
-                                icellu       (iblk),                     & 
-                                indxui     (:,iblk), indxuj    (:,iblk), &
-                                Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
-                                umassdti (:,:,iblk),                     & 
-                                uarear   (:,:,iblk), Cb      (:,:,iblk), & 
-                                Diagu    (:,:,iblk), Diagv   (:,:,iblk))         
-      
-          endif                     
-      
-         enddo
-         !$OMP END PARALLEL DO                            
-      
-         ! Compute nonlinear residual norm
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            call matvec (nx_block             , ny_block,            &
-                         icellu   (iblk)      , icellt   (iblk)    , & 
-                         indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                         indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-                         dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-                         dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-                         cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                         cxm      (:,:,iblk)  , cym      (:,:,iblk), & 
-                         uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
-                         vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                         zetaD    (:,:,iblk,:),                      &
-                         umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-                         uarear   (:,:,iblk)  ,                      & 
-                         Au       (:,:,iblk)  , Av       (:,:,iblk))
-            call residual_vec (nx_block           , ny_block,           &
-                               icellu       (iblk),                     & 
-                               indxui     (:,iblk), indxuj    (:,iblk), &
-                               bx       (:,:,iblk), by      (:,:,iblk), &
-                               Au       (:,:,iblk), Av      (:,:,iblk), &
-                               Fx       (:,:,iblk), Fy      (:,:,iblk), &
-                               L2norm(iblk))
-         enddo
-         !$OMP END PARALLEL DO
-         nlres_norm = sqrt(sum(L2norm))
-         if (monitor_nonlin) then
-            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-                                              " nonlin_res_L2norm= ", nlres_norm
-         endif
-         ! Compute relative tolerance at first iteration
-         if (kOL == 1) then
-            tol = gammaNL*nlres_norm
-         endif
-         ! Check for nonlinear convergence
-         if (nlres_norm < tol) then
-            exit
-         endif
-      
-      !-----------------------------------------------------------------------
-      !     prep F G M R E S 
-      !-----------------------------------------------------------------------                             
-      
-      icode  = 0
-      !      its    = 0 
-      
-         ! form b vector from matrices (nblocks matrices)      
-         call arrays_to_vec (nx_block, ny_block, nblocks,    &
-                             max_blocks, icellu (:), ntot,   & 
-                             indxui      (:,:), indxuj(:,:), &
-                             bx        (:,:,:), by  (:,:,:), &
-                             bvec(:))
-         ! form sol vector for fgmres (sol is iniguess at the beginning)        
-         call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,   &  
-                             indxui    (:,:), indxuj(:,:),     &
-                             uprev_k (:,:,:), vprev_k (:,:,:), &
-                             sol(:))
-      
-         ! form matrix diagonal as a vector from Diagu and Diagv arrays      
-         call arrays_to_vec (nx_block, ny_block, nblocks,    &
-                             max_blocks, icellu (:), ntot,   & 
-                             indxui      (:,:), indxuj(:,:), &
-                             Diagu     (:,:,:), Diagv(:,:,:),&
-                             diagvec(:))                             
-      
-      !-----------------------------------------------------------------------
-      !     F G M R E S   L O O P
-      !-----------------------------------------------------------------------
-      1    continue
-      !-----------------------------------------------------------------------
-      
-      call fgmres_legacy (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-                   gamma, maxits_fgmres, monitor_fgmres,   &
-                   icode,fgmres_its, res_norm)
-      
-      if (icode == 1) then
-      
-         if (precond .eq. 1) then
-      
-           wk22(:)=wk11(:) ! precond=identity
-      
-         elseif (precond .eq. 2) then ! use diagonal of A for precond step
-      
-           call precond_diag (ntot,            & 
-                              diagvec (:),     &
-                              wk11 (:), wk22 (:) )
-      
-         elseif (precond .eq. 3) then
-      
-          call pgmres_legacy (nx_block,    ny_block,    nblocks       , &
-                       max_blocks         , icellu   (:)       , & 
-                       indxui   (:,:)     , indxuj   (:,:)     , &
-                       icellt   (:)                            , & 
-                       indxti   (:,:)     , indxtj   (:,:)     , &
-                       dxt      (:,:,:)   , dyt      (:,:,:)   , & 
-                       dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
-                       cxp      (:,:,:)   , cyp      (:,:,:)   , & 
-                       cxm      (:,:,:)   , cym      (:,:,:)   , & 
-                       vrel     (:,:,:)   , Cb       (:,:,:)   , &  
-                       zetaD    (:,:,:,:) ,                      &
-                       umassdti (:,:,:)   , fm       (:,:,:)   , & 
-                       uarear   (:,:,:)   , diagvec(:)         , &
-                       wk22     (:)       , wk11(:)            , &
-                       ntot               , im_pgmres          , &
-                       epsprecond         , maxits_pgmres      , &
-                       monitor_pgmres     , ierr )         
-         endif ! precond
-      
-         goto 1
-      
-      elseif (icode >= 2) then
-      
-         call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     & 
-                             indxui    (:,:), indxuj(:,:),     &
-                             wk11 (:),                         &
-                             uvel (:,:,:), vvel (:,:,:))    
-      
-         ! JFL halo update could be in subroutine...                    
-         !$OMP PARALLEL DO PRIVATE(iblk) 
-         do iblk = 1, nblocks                             
-            fld2(:,:,1,iblk) = uvel(:,:,iblk)
-            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
-         enddo
-         !$OMP END PARALLEL DO                           
-      
-         call ice_HaloUpdate (fld2,               halo_info, & 
-                              field_loc_NEcorner, field_type_vector)
-      
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            uvel(:,:,iblk) = fld2(:,:,1,iblk)
-            vvel(:,:,iblk) = fld2(:,:,2,iblk)
-         enddo
-         !$OMP END PARALLEL DO                             
-      
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks                                  
-      
-          call matvec (nx_block             , ny_block,            &
-                       icellu   (iblk)      , icellt   (iblk)    , & 
-                       indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                       indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-                       dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-                       dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-                       cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                       cxm      (:,:,iblk)  , cym      (:,:,iblk), &
-                       uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
-                       vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                       zetaD    (:,:,iblk,:),                      &
-                       umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-                       uarear   (:,:,iblk)  ,                      & 
-                       Au       (:,:,iblk)  , Av       (:,:,iblk))                         
-      
-         enddo
-         !$OMP END PARALLEL DO 
-      
-         ! form wk2 from Au and Av arrays        
-         call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     & 
-                             indxui    (:,:), indxuj(:,:),     &
-                             Au      (:,:,:), Av    (:,:,:),   &
-                             wk22(:))    
-      
-            goto 1
-      
-      endif ! icode
-      
-      !-----------------------------------------------------------------------
-      !     Put vector sol in uvel and vvel arrays
-      !-----------------------------------------------------------------------
-      
-         call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     & 
-                             indxui    (:,:), indxuj(:,:),     &
-                             sol (:),                          &
-                             uvel (:,:,:), vvel (:,:,:))    
-      
-         !$OMP PARALLEL DO PRIVATE(iblk)
-      !         do iblk = 1, nblocks
-      !              uvel(:,:,iblk) = (c1-krelax)*uprev_k(:,:,iblk) + krelax*uvel(:,:,iblk)
-      !              vvel(:,:,iblk) = (c1-krelax)*vprev_k(:,:,iblk) + krelax*vvel(:,:,iblk)
-      !         enddo
-         !$OMP END PARALLEL DO  
-      
-      
-         ! phb NOT SURE IF THIS HALO UPDATE IS NEEDED
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks                             
-      
-            ! load velocity into array for boundary updates
-            fld2(:,:,1,iblk) = uvel(:,:,iblk)
-            fld2(:,:,2,iblk) = vvel(:,:,iblk)            
-      
-         enddo
-         !$OMP END PARALLEL DO                           
-      
-         call ice_timer_start(timer_bound)
-         if (maskhalo_dyn) then
-            call ice_HaloUpdate (fld2,               halo_info_mask, &
-                                 field_loc_NEcorner, field_type_vector)
-         else
-            call ice_HaloUpdate (fld2,               halo_info, &
-                                 field_loc_NEcorner, field_type_vector)
-         endif
-         call ice_timer_stop(timer_bound)
-      
-         ! unload
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            uvel(:,:,iblk) = fld2(:,:,1,iblk)
-            vvel(:,:,iblk) = fld2(:,:,2,iblk)
-         enddo
-         !$OMP END PARALLEL DO
-      
-         ! Compute fixed point residual norm
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
-            fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
-            call calc_L2norm_squared (nx_block        , ny_block,         &
-                                      icellu    (iblk),                   & 
-                                      indxui  (:,iblk), indxuj  (:,iblk), &
-                                      fpresx(:,:,iblk), fpresy(:,:,iblk), &
-                                      L2norm    (iblk))
-         enddo
-         !$OMP END PARALLEL DO
-         if (monitor_nonlin) then
-            write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", kOL, &
-                                              " fixed_point_res_L2norm= ", sqrt(sum(L2norm))
-         endif
-      
-      enddo                     ! outer loop
-      
-      ! deallocate FGMRES work arrays
-      deallocate(wk11, wk22, vv, ww)
-
-      end subroutine picard_solver
-
-!=======================================================================
-
 ! Solve the nonlinear equation F(u,v) = 0, where
 ! F(u,v) := A(u,v) * (u,v) - b(u,v)
-! using Anderson acceleration (accelerated fixed point iteration)
+! using Anderson acceleration (accelerated fixed point (Picard) iteration)
 !
-! author: P. Blain ECCC
+! author: JF Lemieux, A. Qaddouri, F. Dupont and P. Blain ECCC
 
       subroutine anderson_solver (icellt,   icellu,  &
                                   indxti,   indxtj,  &
@@ -1186,6 +748,11 @@ subroutine anderson_solver (icellt,   icellu,  &
       ! Initialization
       res_num = 0
       
+      ! If Picard iteration chosen, set number of saved residuals to zero
+      if (algo_nonlin == 1) then
+         im_andacc = 0
+      endif
+      
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
          uprev_k(:,:,iblk) = uvel(:,:,iblk)
@@ -1468,6 +1035,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                              uvel (:,:,:), vvel (:,:,:))
          
          ! phb NOT SURE IF THIS HALO UPDATE IS ACTUALLY NEEDED
+         ! Should use ice_Haloupdate_vel
          ! Load velocity into array for boundary updates
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks

From d67e722a908020f1a999490085cded7a87622475 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 17 Feb 2020 13:56:28 -0500
Subject: [PATCH 130/196] ice_dyn_vp: remove legacy FGMRES solver

The new FGMRES solver adapted from GEM was already verified to give the
same results as the legacy implementation.

Remove the legacy implementation.
---
 cicecore/cicedynB/dynamics/fgmresD.F90    | 289 -------------------
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 206 +-------------
 cicecore/cicedynB/dynamics/pgmres.F90     | 325 ----------------------
 3 files changed, 1 insertion(+), 819 deletions(-)
 delete mode 100644 cicecore/cicedynB/dynamics/fgmresD.F90
 delete mode 100644 cicecore/cicedynB/dynamics/pgmres.F90

diff --git a/cicecore/cicedynB/dynamics/fgmresD.F90 b/cicecore/cicedynB/dynamics/fgmresD.F90
deleted file mode 100644
index d65e8cba0..000000000
--- a/cicecore/cicedynB/dynamics/fgmresD.F90
+++ /dev/null
@@ -1,289 +0,0 @@
-      subroutine fgmres_legacy (n,im,rhs,sol,i,vv,w,wk1, wk2, &
-                  gamma,maxits,iout,icode,its,ro) 
-
-      use ice_fileunits, only: nu_diag
-
-!-----------------------------------------------------------------------
-! jfl Dec 1st 2006. We modified the routine so that it is double precison.
-! Here are the modifications:
-! 1) implicit real (a-h,o-z) becomes implicit real*8 (a-h,o-z) 
-! 2) real bocomes real*8
-! 3) subroutine scopy.f has been changed for dcopy.f
-! 4) subroutine saxpy.f has been changed for daxpy.f
-! 5) function sdot.f has been changed for ddot.f
-! 6) 1e-08 becomes 1d-08
-!
-! Be careful with the dcopy, daxpy and ddot code...there is a slight 
-! difference with the single precision versions (scopy, saxpy and sdot).
-! In the single precision versions, the array are declared sightly differently.
-! It is written for single precision:
-!
-! modified 12/3/93, array(1) declarations changed to array(*)
-!-----------------------------------------------------------------------
-
-      implicit double precision (a-h,o-z) !jfl modification
-      integer n, im, maxits, iout, icode
-      double precision rhs(*), sol(*), vv(n,im+1),w(n,im)
-      double precision wk1(n), wk2(n), gamma, ro
-!-----------------------------------------------------------------------
-! flexible GMRES routine. This is a version of GMRES which allows a 
-! a variable preconditioner. Implemented with a reverse communication 
-! protocole for flexibility -
-! DISTRIBUTED VERSION (USES DISTDOT FOR DDOT) 
-! explicit (exact) residual norms for restarts  
-! written by Y. Saad, modified by A. Malevsky, version February 1, 1995
-!-----------------------------------------------------------------------
-! This Is A Reverse Communication Implementation. 
-!------------------------------------------------- 
-! USAGE: (see also comments for icode below). FGMRES
-! should be put in a loop and the loop should be active for as
-! long as icode is not equal to 0. On return fgmres will
-!    1) either be requesting the new preconditioned vector applied
-!       to wk1 in case icode.eq.1 (result should be put in wk2) 
-!    2) or be requesting the product of A applied to the vector wk1
-!       in case icode.eq.2 (result should be put in wk2) 
-!    3) or be terminated in case icode .eq. 0. 
-! on entry always set icode = 0. So icode should be set back to zero
-! upon convergence.
-!-----------------------------------------------------------------------
-! Here is a typical way of running fgmres: 
-!
-!      icode = 0
-! 1    continue
-!      call fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits,iout,icode)
-!
-!      if (icode .eq. 1) then
-!         call  precon(n, wk1, wk2)    <--- user's variable preconditioning
-!         goto 1
-!      else if (icode .ge. 2) then
-!         call  matvec (n,wk1, wk2)    <--- user's matrix vector product. 
-!         goto 1
-!      else 
-!         ----- done ---- 
-!         .........
-!-----------------------------------------------------------------------
-! list of parameters 
-!------------------- 
-!
-! n     == integer. the dimension of the problem
-! im    == size of Krylov subspace:  should not exceed 50 in this
-!          version (can be reset in code. looking at comment below)
-! rhs   == vector of length n containing the right hand side
-! sol   == initial guess on input, approximate solution on output
-! vv    == work space of size n x (im+1)
-! w     == work space of length n x im 
-! wk1,
-! wk2,  == two work vectors of length n each used for the reverse
-!          communication protocole. When on return (icode .ne. 1)
-!          the user should call fgmres again with wk2 = precon * wk1
-!          and icode untouched. When icode.eq.1 then it means that
-!          convergence has taken place.
-!          
-! eps   == tolerance for stopping criterion. process is stopped
-!          as soon as ( ||.|| is the euclidean norm):
-!          || current residual||/||initial residual|| <= eps
-!
-! maxits== maximum number of iterations allowed
-!
-! iout  == output unit number number for printing intermediate results
-!          if (iout .le. 0) no statistics are printed.
-!          if (iout .eq. 1) L2norm of 1st ite is printed.
-!          if (iout .gt. 1) L2norm of all ite are printed.
-! 
-! icode = integer. indicator for the reverse communication protocole.
-!         ON ENTRY : icode should be set to icode = 0.
-!         ON RETURN: 
-!       * icode .eq. 1 value means that fgmres has not finished
-!         and that it is requesting a preconditioned vector before
-!         continuing. The user must compute M**(-1) wk1, where M is
-!         the preconditioing  matrix (may vary at each call) and wk1 is
-!         the vector as provided by fgmres upun return, and put the 
-!         result in wk2. Then fgmres must be called again without
-!         changing any other argument. 
-!       * icode .eq. 2 value means that fgmres has not finished
-!         and that it is requesting a matrix vector product before
-!         continuing. The user must compute  A * wk1, where A is the
-!         coefficient  matrix and wk1 is the vector provided by 
-!         upon return. The result of the operation is to be put in
-!         the vector wk2. Then fgmres must be called again without
-!         changing any other argument. 
-!       * icode .eq. 0 means that fgmres has finished and sol contains 
-!         the approximate solution.
-!         comment: typically fgmres must be implemented in a loop
-!         with fgmres being called as long icode is returned with 
-!         a value .ne. 0. 
-!-----------------------------------------------------------------------
-!     local variables -- !jfl modif
-      double precision hh(201,200),c(200),s(200),rs(201),t,ddot,sqrt 
-!
-!-------------------------------------------------------------
-!     arnoldi size should not exceed 50 in this version..
-!     to reset modify sizes of hh, c, s, rs       
-!-------------------------------------------------------------
-
-      save
-      data epsmac/1.d-16/
-!     
-!     computed goto 
-!     
-      goto (100,200,300,11) icode +1
- 100  continue
-      n1 = n + 1
-      its = 0
-!-------------------------------------------------------------
-!     **  outer loop starts here..
-!--------------compute initial residual vector --------------
-! 10   continue
-      call dcopy (n, sol, 1, wk1, 1) !jfl modification
-      icode = 3
-      return
- 11   continue
-      do j=1,n
-         vv(j,1) = rhs(j) - wk2(j) 
-      enddo
- 20   ro = ddot(n, vv, 1, vv,1) !jfl modification
-      ro = sqrt(ro)
-      if (ro .eq. 0.0d0) goto 999 
-      t = 1.0d0/ ro 
-      do j=1, n
-         vv(j,1) = vv(j,1)*t 
-      enddo
-!      if (its .eq. 0) eps1=eps
-      if (its .eq. 0) then 
-       r0 = ro
-       eps1=gamma*ro
-      endif 
-      
-      if (iout .gt. 0) write(nu_diag, 199) its, ro!&
-!     
-!     initialize 1-st term  of rhs of hessenberg system..
-!     
-      rs(1) = ro
-      i = 0
- 4    i=i+1
-      its = its + 1
-      i1 = i + 1
-      do k=1, n
-         wk1(k) = vv(k,i) 
-      enddo
-!     
-!     return
-!     
-      icode = 1
-
-      return
- 200  continue
-      do k=1, n
-         w(k,i) = wk2(k) 
-      enddo
-!     
-!     call matvec operation
-!     
-      icode = 2
-      call dcopy(n, wk2, 1, wk1, 1) !jfl modification
-!
-!     return
-!     
-      return
- 300  continue
-!     
-!     first call to ope corresponds to intialization goto back to 11.
-!     
-!      if (icode .eq. 3) goto 11
-      call  dcopy (n, wk2, 1, vv(1,i1), 1) !jfl modification
-!     
-!     modified gram - schmidt...
-!     
-      do j=1, i
-         t = ddot(n, vv(1,j), 1, vv(1,i1), 1) !jfl modification
-         hh(j,i) = t
-         call daxpy(n, -t, vv(1,j), 1, vv(1,i1), 1) !jfl modification
-      enddo
-      t = sqrt(ddot(n, vv(1,i1), 1, vv(1,i1), 1)) !jfl modification
-      hh(i1,i) = t
-      if (t .eq. 0.0d0) goto 58
-      t = 1.0d0 / t
-      do k=1,n
-         vv(k,i1) = vv(k,i1)*t
-      enddo
-!     
-!     done with modified gram schimd and arnoldi step. 
-!     now  update factorization of hh
-!     
- 58   if (i .eq. 1) goto 121
-!     
-!     perfrom previous transformations  on i-th column of h
-!     
-      do k=2,i
-         k1 = k-1
-         t = hh(k1,i)
-         hh(k1,i) = c(k1)*t + s(k1)*hh(k,i)
-         hh(k,i) = -s(k1)*t + c(k1)*hh(k,i)
-      enddo
- 121  gam = sqrt(hh(i,i)**2 + hh(i1,i)**2)
-      if (gam .eq. 0.0d0) gam = epsmac
-!-----------#determine next plane rotation  #-------------------
-      c(i) = hh(i,i)/gam
-      s(i) = hh(i1,i)/gam
-      rs(i1) = -s(i)*rs(i)
-      rs(i) =  c(i)*rs(i)
-!     
-!     determine res. norm. and test for convergence-
-!     
-      hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
-      ro = abs(rs(i1))
-      if (iout .gt. 1) &
-           write(nu_diag, 199) its, ro
-      if (i .lt. im .and. (ro .gt. eps1))  goto 4
-!     
-!     now compute solution. first solve upper triangular system.
-!     
-      rs(i) = rs(i)/hh(i,i)
-      do ii=2,i
-         k=i-ii+1
-         k1 = k+1
-         t=rs(k)
-         do j=k1,i
-            t = t-hh(k,j)*rs(j)
-         enddo
-         rs(k) = t/hh(k,k)
-      enddo
-!     
-!     done with back substitution..
-!     now form linear combination to get solution
-!     
-      do j=1, i
-         t = rs(j)
-         call daxpy(n, t, w(1,j), 1, sol,1) !jfl modification
-      enddo
-!     
-!     test for return 
-!     
-      if (ro .le. eps1 .or. its .ge. maxits) goto 999
-!     
-!     else compute residual vector and continue..
-!     
-!       goto 10
-
-     do j=1,i
-        jj = i1-j+1
-        rs(jj-1) = -s(jj-1)*rs(jj)
-        rs(jj) = c(jj-1)*rs(jj)
-     enddo
-     do j=1,i1
-        t = rs(j)
-        if (j .eq. 1)  t = t-1.0d0
-        call daxpy (n, t, vv(1,j), 1,  vv, 1)
-     enddo
-!     
-!     restart outer loop.
-!     
-     goto 20
- 999  icode = 0
-
- 199  format('monitor_fgmres: iter_fmgres=', i4, ' L2norm=', d26.16)
-!     
-      return 
-!-----end-of-fgmres----------------------------------------------------- 
-!-----------------------------------------------------------------------
-      end
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 520f1469d..b0a969114 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -898,23 +898,12 @@ subroutine anderson_solver (icellt,   icellu,  &
                          Diagu,      Diagv,   &
                          gamma, im_fgmres, &
                          maxits_fgmres, nbiter, conv)
-            ! Put FGMRES solution solx,soly in fpfunc vector
+            ! Put FGMRES solution solx,soly in fpfunc vector (needed for anderson)
             call arrays_to_vec (nx_block, ny_block, nblocks,      &
                                 max_blocks, icellu (:), ntot,     &
                                 indxui    (:,:), indxuj(:,:),     &
                                 solx    (:,:,:), soly  (:,:,:),   &
                                 fpfunc(:))
-            ! FGMRES linear solver (solution is in fpfunc)
-            ! fpfunc = sol
-            ! call fgmres_solver (ntot,   bvec,     &
-            !                     fpfunc, diagvec,  &
-            !                     icellt, icellu,   &
-            !                     indxti, indxtj,   &
-            !                     indxui, indxuj,   &
-            !                     zetaD,            &
-            !                     Cb,     vrel,     &
-            !                     aiu,    umassdti)
-
          elseif (fpfunc_andacc == 2) then
             ! g_2(x) = x - A(x)x + b(x) = x - F(x)
          endif
@@ -1085,199 +1074,6 @@ end subroutine anderson_solver
 
 !=======================================================================
 
-! Driver for the FGMRES linear solver
-
-      subroutine fgmres_solver (ntot,   bvec,     &
-                                sol,    diagvec,  &
-                                icellt, icellu,   &
-                                indxti, indxtj,   &
-                                indxui, indxuj,   &
-                                zetaD,            &
-                                Cb,     vrel,     &
-                                aiu,    umassdti  )
-
-      use ice_blocks, only: nx_block, ny_block
-      use ice_boundary, only: ice_HaloUpdate
-      use ice_domain, only: halo_info
-      use ice_domain_size, only: max_blocks
-      use ice_flux, only:  fm
-      use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
-          tarear, uarear, tinyarea
-      use ice_state, only: uvel, vvel
-      
-      integer (kind=int_kind), intent(in) :: & 
-         ntot         ! size of problem for fgmres (for given cpu)
-      
-      real (kind=dbl_kind), dimension (ntot), intent(in) :: &
-         bvec     , & ! RHS vector for FGMRES
-         diagvec      ! diagonal of matrix A for preconditioners
-      
-      real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
-         sol          ! solution vector for FGMRES
-      
-      integer (kind=int_kind), dimension(max_blocks), intent(in) :: & 
-         icellt   , & ! no. of cells where icetmask = 1
-         icellu       ! no. of cells where iceumask = 1
-      
-      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj   , & ! compressed index in j-direction
-         indxui   , & ! compressed index in i-direction
-         indxuj       ! compressed index in j-direction
-      
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
-         vrel     , & ! coeff for tauw 
-         Cb       , & ! seabed stress coeff
-         aiu      , & ! ice fraction on u-grid
-         umassdti     ! mass of U-cell/dte (kg/m^2 s)
-      
-      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
-         zetaD      ! zetaD = 2zeta (viscous coeff)
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         iblk           , & ! block index
-         icode          , & ! code for fgmres solver
-         its            , & ! iteration nb for fgmres
-         fgmres_its     , & ! final nb of fgmres iterations
-         ierr               ! code for pgmres preconditioner !phb: needed?
-      
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
-         Au       , & ! matvec, Fx = bx - Au
-         Av           ! matvec, Fy = by - Av
-      
-      real (kind=dbl_kind), allocatable :: & 
-         vv(:,:), ww(:,:)  ! work arrays for FGMRES
-
-      real (kind=dbl_kind), allocatable :: &
-         wk11(:), wk22(:)  ! work vectors for FGMRES
-         
-      real (kind=dbl_kind) :: &
-         res_norm          ! residual norm for FGMRES
-      
-      character(len=*), parameter :: subname = '(fgmres_solver)'
-
-      ! ! Allocate space for FGMRES work arrays
-      ! allocate(wk11(ntot), wk22(ntot))
-      ! allocate(vv(ntot,im_fgmres+1), ww(ntot,im_fgmres))
-      ! 
-      ! !-----------------------------------------------------------------------
-      ! !     prep F G M R E S 
-      ! !-----------------------------------------------------------------------
-      ! 
-      ! icode  = 0
-      ! 
-      ! !-----------------------------------------------------------------------
-      ! !     F G M R E S   L O O P
-      ! !-----------------------------------------------------------------------
-      ! 1    continue
-      ! !-----------------------------------------------------------------------
-      ! 
-      ! 
-      ! call fgmres (ntot,im_fgmres,bvec,sol,its,vv,ww,wk11,wk22, &
-      !              gamma, maxits_fgmres,monitor_fgmres, &
-      !              icode, fgmres_its, res_norm)
-      ! 
-      ! if (icode == 1) then
-      ! 
-      !    if (precond .eq. 1) then
-      ! 
-      !      wk22(:)=wk11(:) ! precond=identity
-      ! 
-      !    elseif (precond .eq. 2) then ! use diagonal of A for precond step
-      ! 
-      !      call precond_diag (ntot,            & 
-      !                         diagvec (:),     &
-      !                         wk11 (:), wk22 (:) )
-      ! 
-      !    elseif (precond .eq. 3) then
-      ! 
-      !     call pgmres (nx_block,    ny_block,    nblocks       , &
-      !                  max_blocks         , icellu   (:)       , & 
-      !                  indxui   (:,:)     , indxuj   (:,:)     , &
-      !                  icellt   (:)       ,                      & 
-      !                  indxti   (:,:)     , indxtj   (:,:)     , &
-      !                  dxt      (:,:,:)   , dyt      (:,:,:)   , & 
-      !                  dxhy     (:,:,:)   , dyhx     (:,:,:)   , & 
-      !                  cxp      (:,:,:)   , cyp      (:,:,:)   , & 
-      !                  cxm      (:,:,:)   , cym      (:,:,:)   , &
-      !                  vrel     (:,:,:)   , Cb       (:,:,:)   , &  
-      !                  zetaD    (:,:,:,:) ,                      &
-      !                  umassdti (:,:,:)   , fm       (:,:,:)   , & 
-      !                  uarear   (:,:,:)   , diagvec(:)         , &
-      !                  wk22     (:)       , wk11(:)            , &
-      !                  ntot               , im_pgmres          , &
-      !                  epsprecond         , maxits_pgmres      , &
-      !                  monitor_pgmres     , ierr )         
-      !    endif ! precond
-      ! 
-      !    goto 1
-      ! 
-      ! elseif (icode >= 2) then
-      ! 
-      !    call vec_to_arrays (nx_block, ny_block, nblocks,      &
-      !                        max_blocks, icellu (:), ntot,     & 
-      !                        indxui    (:,:), indxuj(:,:),     &
-      !                        wk11 (:),                         &
-      !                        uvel (:,:,:), vvel (:,:,:))    
-      ! 
-      !    ! JFL halo update could be in subroutine...                    
-      !    !$OMP PARALLEL DO PRIVATE(iblk) 
-      !    do iblk = 1, nblocks                             
-      !       fld2(:,:,1,iblk) = uvel(:,:,iblk)
-      !       fld2(:,:,2,iblk) = vvel(:,:,iblk)            
-      !    enddo
-      !    !$OMP END PARALLEL DO                           
-      ! 
-      !    call ice_HaloUpdate (fld2,               halo_info, & 
-      !                         field_loc_NEcorner, field_type_vector)
-      ! 
-      !    !$OMP PARALLEL DO PRIVATE(iblk)
-      !    do iblk = 1, nblocks
-      !       uvel(:,:,iblk) = fld2(:,:,1,iblk)
-      !       vvel(:,:,iblk) = fld2(:,:,2,iblk)
-      !    enddo
-      !    !$OMP END PARALLEL DO                             
-      ! 
-      !    !$OMP PARALLEL DO PRIVATE(iblk)
-      !    do iblk = 1, nblocks                                  
-      ! 
-      !     call matvec (nx_block             , ny_block,            &
-      !                  icellu   (iblk)      , icellt   (iblk)    , &
-      !                  indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-      !                  indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-      !                  dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-      !                  dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-      !                  cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-      !                  cxm      (:,:,iblk)  , cym      (:,:,iblk), &
-      !                  uvel     (:,:,iblk)  , vvel     (:,:,iblk), &      
-      !                  vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-      !                  zetaD    (:,:,iblk,:),                      &
-      !                  umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-      !                  uarear   (:,:,iblk)  ,                      & 
-      !                  Au       (:,:,iblk)  , Av       (:,:,iblk))                         
-      ! 
-      !    enddo
-      !    !$OMP END PARALLEL DO 
-      ! 
-      !    ! form wk2 from Au and Av arrays        
-      !    call arrays_to_vec (nx_block, ny_block, nblocks,      &
-      !                        max_blocks, icellu (:), ntot,     & 
-      !                        indxui    (:,:), indxuj(:,:),     &
-      !                        Au      (:,:,:), Av    (:,:,:),   &
-      !                        wk22(:))    
-      ! 
-      !       goto 1
-      ! 
-      ! endif ! icode
-      ! 
-      ! deallocate(wk11, wk22, vv, ww)
-         
-      end subroutine fgmres_solver
-
-!=======================================================================
-
 ! Computes the viscous coefficients (in fact zetaD=2*zeta) and dPr/dx. 
 
       subroutine calc_zeta_Pr  (nx_block,   ny_block,   & 
diff --git a/cicecore/cicedynB/dynamics/pgmres.F90 b/cicecore/cicedynB/dynamics/pgmres.F90
deleted file mode 100644
index 5799f209d..000000000
--- a/cicecore/cicedynB/dynamics/pgmres.F90
+++ /dev/null
@@ -1,325 +0,0 @@
-
-!**s/r pgmres -  preconditionner for GEM_H : PGmres
-!
-
-       subroutine pgmres_legacy(nx_block, ny_block, nblocks, &
-                         max_blocks, icellu,          &
-                         indxui,     indxuj,          &
-                         icellt,                      &
-                         indxti,     indxtj,          &
-                         dxt,        dyt,             & 
-                         dxhy,       dyhx,            & 
-                         cxp,        cyp,             & 
-                         cxm,        cym,             & 
-                         vrel,       Cb,              &
-                         zetaD,                       & 
-                         umassdti,   fm,              &
-                         uarear,     diagvec,         &
-                         sol,        rhs,             &
-                         n,          im,              &
-                         eps,        maxits,          &
-                         iout,       ierr)
-                         
-!-----------------------------------------------------------------------
-
-!        use grid_options
-!        use prec
-       use ice_kinds_mod
-       use ice_dyn_vp, only: matvec, arrays_to_vec, vec_to_arrays, precond_diag
-       use ice_fileunits, only: nu_diag
-
-       implicit none
-
-!#include <arch_specific.hf>
-
-      integer (kind=int_kind), intent(in) :: &
-         nx_block, ny_block, & ! block dimensions
-         nblocks,            & ! nb of blocks
-         max_blocks            ! max nb of blocks
-
-         
-      integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
-         icellu  , &
-         icellt                ! no. of cells where icetmask = 1
-
-      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
-         intent(in) :: &
-         indxui  , & ! compressed index in i-direction
-         indxuj  , & ! compressed index in j-direction
-         indxti  , & ! compressed index in i-direction
-         indxtj      ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
-         dxt      , & ! width of T-cell through the middle (m)
-         dyt      , & ! height of T-cell through the middle (m)
-         dxhy     , & ! 0.5*(HTE - HTE)
-         dyhx     , & ! 0.5*(HTN - HTN)
-         cyp      , & ! 1.5*HTE - 0.5*HTE
-         cxp      , & ! 1.5*HTN - 0.5*HTN
-         cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm          ! 0.5*HTN - 1.5*HTN
-         
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), &
-         intent(in) :: &
-         vrel    , & ! coefficient for tauw
-         Cb      , & ! coefficient for basal stress
-         umassdti, & ! mass of U-cell/dt (kg/m^2 s)
-         fm      , & ! Coriolis param. * mass in U-cell (kg/s)
-         uarear      ! 1/uarea
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), & 
-         intent(in) :: &
-         zetaD          ! 2*zeta   
-         
-      real (kind=dbl_kind), dimension (n), intent(in) :: &
-         diagvec
-      
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
-         utp     , & ! x-component of velocity (m/s)
-         vtp     , & ! y-component of velocity (m/s)         
-         Au      , & ! matvec, Fx = Au - bx (N/m^2)! jfl
-         Av          ! matvec, Fy = Av - by (N/m^2)! jfl       
-
-       integer n, im, maxits, iout, ierr, iblk
-       real*8 rhs(n), sol(n) ,eps ! wk11, wk22, eps
-!       Abdessamad Qaddouri -  2018
-!
-!revision
-! v5.0 - Qaddouri A.       - initial version
-
-       real*8 vv(n,im+1), gam,eps1
-       real*8  wk(n),r0
-!----------------------------------------------------------------------*
-       integer kmax,ii,i,j,n1,its,k1,i1,jj,k
-       parameter (kmax=50)
-
-       real*8 hh(kmax+1,kmax), c(kmax), s(kmax), rs(kmax+1),t
-       real*8 hhloc(kmax+1,kmax)
-!-------------------------------------------------------------
-! arnoldi size should not exceed kmax=50 in this version..
-! to reset modify paramter kmax accordingly.
-!-------------------------------------------------------------
-       real*8 epsmac ,ro,ddot
-       parameter (epsmac=1.d-16)
-       integer l
-!       character(len= 9) communicate_S
-!       communicate_S = "GRID"
-!       if (Grd_yinyang_L) communicate_S = "MULTIGRID"
-
-
-       n1 = n + 1
-       its = 0
-       sol=0.0 !JFL ...veut-on vraiment mettre sol = 0 ici??????
-!-------------------------------------------------------------
-! outer loop starts here..
-!-------------- compute initial residual vector --------------
-       do 21 j=1,n
-         vv(j,1) = rhs(j) 
- 21    continue
-       
-!-------------------------------------------------------------
-  20   continue
-       ro = ddot(n, vv,1,vv,1)
-       ro = dsqrt(ro)
-
-        r0=ro
- 
-       if (ro .eq. 0.0d0) goto 999
-       t = 1.0d0/ ro
-       do 210 j=1, n
-          vv(j,1) = vv(j,1)*t
- 210   continue
-       if (its .eq. 0) eps1=eps*ro
-       if (iout .gt. 0 .and. its .eq. 0)&
-          write(nu_diag, 199) its, ro ,eps1
-!     ** initialize 1-st term  of rhs of hessenberg system..
-       rs(1) = ro
-       i = 0
- 4     i=i+1
-       its = its + 1
-       i1 = i + 1
-
-       do l=1,n
-       rhs(l)= 0.0 
-       wk(l)=  vv(l,i)
-       enddo
-! precond
-       call precond_diag (n,            & 
-                          diagvec (:),     &
-                          wk (:), rhs (:) )
-
-!       rhs = wk !!! JFL       
-
-!  matrix-vector
-!        call sol_matvec_H JFL
-
-       call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                           max_blocks, icellu (:), n,        & 
-                           indxui    (:,:), indxuj(:,:),     &
-                           rhs (:),                         &
-                           utp (:,:,:), vtp (:,:,:))    
-                         
-
-       !$OMP PARALLEL DO PRIVATE(iblk)
-       do iblk = 1, nblocks                          
-        call matvec (nx_block             , ny_block,            &
-                     icellu   (iblk)      , icellt   (iblk)    , & 
-                     indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                     indxti   (:,iblk)    , indxtj   (:,iblk)  , &
-                     dxt      (:,:,iblk)  , dyt      (:,:,iblk), & 
-                     dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), & 
-                     cxp      (:,:,iblk)  , cyp      (:,:,iblk), & 
-                     cxm      (:,:,iblk)  , cym      (:,:,iblk), &
-                     utp      (:,:,iblk)  , vtp      (:,:,iblk), &      
-                     vrel     (:,:,iblk)  , Cb       (:,:,iblk), &  
-                     zetaD    (:,:,iblk,:),                      &
-                     umassdti (:,:,iblk)  , fm       (:,:,iblk), & 
-                     uarear   (:,:,iblk)  ,                      & 
-                     Au       (:,:,iblk)  , Av       (:,:,iblk)) 
-                    
-       enddo
-       !$OMP END PARALLEL DO                     
-                    
-               ! form wk2 from Au and Av arrays        
-       call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                           max_blocks, icellu (:), n,        & 
-                           indxui    (:,:), indxuj(:,:),     &
-                           Au      (:,:,:), Av    (:,:,:),   &
-                           vv(1,i1))                  
-                         
-!     classical gram - schmidt...
-!
-      do 55 j=1, i
-         hhloc(j,i) = ddot(n, vv(1,j), 1, vv(1,i1), 1)
-         hh(j,i) =  hhloc(j,i)
- 55   continue
-
-      do 56 j=1, i
-         call daxpy(n, -hh(j,i), vv(1,j), 1, vv(1,i1), 1)
- 56   continue
-      t = ddot(n, vv(1,i1), 1, vv(1,i1), 1)
-!
-       t=dsqrt(t)
-!
-
-       hh(i1,i) = t
-       if ( t .eq. 0.0d0) goto 58
-       t = 1.0d0/t
-       do 57  k=1,n
-          vv(k,i1) = vv(k,i1)*t
- 57    continue
-!
-!     done with modified gram schimd and arnoldi step..
-!     now  update factorization of hh
-!
- 58   if (i == 1) goto 121
-!
-!     perfrom previous transformations  on i-th column of h
-!
-      do 66 k=2,i
-         k1 = k-1
-         t = hh(k1,i)
-         hh(k1,i) = c(k1)*t + s(k1)*hh(k,i)
-         hh(k,i) = -s(k1)*t + c(k1)*hh(k,i)
- 66   continue
- 121  gam = sqrt(hh(i,i)**2 + hh(i1,i)**2)
-
-!     if gamma is zero then any small value will do...
-!     will affect only residual estimate
-!
-      if (gam == 0.0d0) gam = epsmac
-!-----------#determinenextplane rotation  #-------------------
-      c(i) = hh(i,i)/gam
-      s(i) = hh(i1,i)/gam
-      rs(i1) = -s(i)*rs(i)
-      rs(i) =  c(i)*rs(i)
-
-!
-!     detrermine residual norm and test for convergence-
-!
-       hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
-       ro = abs(rs(i1))
-       if (iout .gt. 0) &
-           write(nu_diag, 199) its, ro , eps1
-       if (i .lt. im .and. (ro .gt. eps1))  goto 4
-!
-!     now compute solution. first solve upper triangular system.
-!
-       rs(i) = rs(i)/hh(i,i)
-       do 30 ii=2,i
-          k=i-ii+1
-          k1 = k+1
-          t=rs(k)
-          do 40 j=k1,i
-             t = t-hh(k,j)*rs(j)
- 40       continue
-          rs(k) = t/hh(k,k)
- 30    continue
-!
-!     form linear combination of
-!,i)'s to get solution
-!
-       t = rs(1)
-       do 15 k=1, n
-          rhs(k) = vv(k,1)*t
- 15    continue
-       do 16 j=2, i
-          t = rs(j)
-          do 161 k=1, n
-             rhs(k) = rhs(k)+t*vv(k,j)
- 161      continue
- 16    continue
-!
-!     call preconditioner.
-!
-
-       do l=1,n
-       wk(l)=  rhs(l)
-       rhs(l)=0.0
-       enddo
-! precond
-       call precond_diag (n,            & 
-                          diagvec (:),     &
-                          wk (:), rhs (:) )
-!       rhs = wk !!! JFL       
-
-       do 17 k=1, n
-          sol(k) = sol(k) + rhs(k)
- 17    continue
-!
-!     restart outer loop  when necessary
-!
-       if (ro .le. eps1) goto 990
-       if (its .ge. maxits) goto 991
-!
-!     else compute residual vector and continue..
-!
-       do 24 j=1,i
-          jj = i1-j+1
-          rs(jj-1) = -s(jj-1)*rs(jj)
-          rs(jj) = c(jj-1)*rs(jj)
- 24    continue
-       do 25  j=1,i1
-          t = rs(j)
-          if (j .eq. 1)  t = t-1.0d0
-          call daxpy (n, t, vv(1,j), 1,  vv, 1)
- 25    continue
- 199   format('monitor_pgmres: iter_pmgres=', i4, ' L2norm=', d26.16, ' epsprecond*initial_L2norm=', d26.6)
-!     restart outer loop.
-       goto 20
- 990   ierr = 0
-!           write(iout, 198) its, ro/r0 
- 198   format('   its =', i4, ' conv =', d20.6)
-       return
- 991   ierr = 1
-!           write(iout, 198) its, ro/r0
-
-       return
- 999   continue
-       ierr = -1
-!           write(iout, 198) its, ro/r0
-
-       return
-!-----------------end of pgmres ---------------------------------------
-!-----------------------------------------------------------------------
-       end

From 829d6ed9ef0b5737c0a42073d4c9b7ea9ca612f6 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 2 Mar 2020 14:55:24 -0500
Subject: [PATCH 131/196] ice_dyn_vp: pgmres: use Classical Gram-Schmidt

The legay PGMRES preconditioner uses Classical Gram-Schmidt
for orthogonalization, so in order to compare the new solver
with the old one we must use CGS in PGMRES.

A following commit will add a namelist variable to control this
behavior.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index b0a969114..ce08da292 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -3727,6 +3727,9 @@ subroutine pgmres (zetaD,                &
       integer (kind=int_kind) :: &
          precond_type ! type of preconditioner
 
+      character(len=char_len) :: &
+         ortho_type ! type of orthogonalization
+
       real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
 
       real (kind=dbl_kind) :: &
@@ -3746,6 +3749,7 @@ subroutine pgmres (zetaD,                &
       conv = c1
       
       precond_type = 2 ! Jacobi preconditioner
+      ortho_type = 'cgs' ! classical gram-schmidt
       
       ! Cells with no ice should be zero-initialized
       workspace_x = c0

From 5eaf2e6f98acaca19475a0515faf5e8df6278c0d Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 2 Mar 2020 15:27:52 -0500
Subject: [PATCH 132/196] ice_in: change maxits_{f,p}gmres to be in line with
 new meaning

Change the reference namelist to conform with the new  meaning of
`maxits_{f,p}gmres`, which is different between the old and the new
{F,P}GMRES solvers:

- in the old solvers 'maxits_*' is the total number of inner (Arnoldi) iterations
- in the new solvers 'maxits_*' is given as the 'maxouter' argument,
  i.e. the total number of outer (restarts) iterations and so
  'maxits' from the old solvers correspond to 'maxouter*maxinner'

This means that in the namelist 'maxits_*' must be set to 1 if it was previously
set to the same thing as 'im_*' (i.e., do not do any restarts).
---
 configuration/scripts/ice_in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index 7adc7701f..ce7e33d52 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -143,8 +143,8 @@
     precond         = 3
     im_fgmres       = 50
     im_pgmres       = 5
-    maxits_fgmres   = 50
-    maxits_pgmres   = 5
+    maxits_fgmres   = 1
+    maxits_pgmres   = 1
     monitor_nonlin  = .false.
     monitor_fgmres  = 1
     monitor_pgmres  = 1

From 668cad8f2aa6cbb1536e87b2fe5821c02bc9ea8d Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 3 Mar 2020 09:00:37 -0500
Subject: [PATCH 133/196] ice_dyn_vp: uniformize naming of namelist parameters
 for solver tolerances

'gammaNL', 'gamma' and 'epsprecond' refer respectively to the relative tolerances
of the nonlinear solver, linear solver (FGMRES) and preconditioner (PGMRES).

For consistency and to use meaningful names in the code, rename them to 'reltol_nonlin',
'reltol_fgmres' and 'reltol_pgmres'.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 28 +++++++++++------------
 cicecore/cicedynB/general/ice_init.F90    | 24 +++++++++----------
 configuration/scripts/ice_in              |  6 ++---
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ce08da292..55c7067d8 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -78,9 +78,9 @@ module ice_dyn_vp
          use_mean_vrel      ! use mean of previous 2 iterates to compute vrel
 
       real (kind=dbl_kind), public :: &
-         gammaNL        , & ! nonlinear stopping criterion: gammaNL*res(k=0)
-         gamma          , & ! fgmres stopping criterion: gamma*res(k)
-         epsprecond     , & ! pgmres stopping criterion: epsprecond*res(k)
+         reltol_nonlin  , & ! nonlinear stopping criterion: reltol_nonlin*res(k=0)
+         reltol_fgmres  , & ! fgmres stopping criterion: reltol_fgmres*res(k)
+         reltol_pgmres  , & ! pgmres stopping criterion: reltol_pgmres*res(k)
          damping_andacc , & ! damping factor for Anderson acceleration
          reltol_andacc      ! relative tolerance for Anderson acceleration
 
@@ -736,7 +736,7 @@ subroutine anderson_solver (icellt,   icellu,  &
 
       real (kind=dbl_kind) :: & 
          tol         , & ! tolerance for fixed point convergence: reltol_andacc * (initial fixed point residual norm)
-         tol_nl      , & ! tolerance for nonlinear convergence: gammaNL * (initial nonlinear residual norm)
+         tol_nl      , & ! tolerance for nonlinear convergence: reltol_nonlin * (initial nonlinear residual norm)
          fpres_norm  , & ! norm of current fixed point residual : f(x) = g(x) - x
          prog_norm   , & ! norm of difference between current and previous solution
          nlres_norm  , & ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
@@ -843,7 +843,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          endif
          ! Compute relative tolerance at first iteration
          if (it_nl == 0) then
-            tol_nl = gammaNL*nlres_norm
+            tol_nl = reltol_nonlin*nlres_norm
          endif
          
          ! Check for nonlinear convergence
@@ -889,14 +889,14 @@ subroutine anderson_solver (icellt,   icellu,  &
             endif
             
             ! FGMRES linear solver
-            call fgmres (zetaD,               &
-                         Cb,         vrel,    &
-                         umassdti,            &
-                         halo_info_mask,      &
-                         solx,       soly,    &
-                         bx,         by,      &
-                         Diagu,      Diagv,   &
-                         gamma, im_fgmres, &
+            call fgmres (zetaD,                    &
+                         Cb,            vrel,      &
+                         umassdti,                 &
+                         halo_info_mask,           &
+                         solx,          soly,      &
+                         bx,            by,        &
+                         Diagu,         Diagv,     &
+                         reltol_fgmres, im_fgmres, &
                          maxits_fgmres, nbiter, conv)
             ! Put FGMRES solution solx,soly in fpfunc vector (needed for anderson)
             call arrays_to_vec (nx_block, ny_block, nblocks,      &
@@ -4109,7 +4109,7 @@ subroutine precondition(zetaD,                &
          ! Initialize preconditioned vector to 0 !phb try with wx = vx or vx/diagx
          wx = c0
          wy = c0
-         tolerance = epsprecond
+         tolerance = reltol_pgmres
          maxinner = im_pgmres
          maxouter = maxits_pgmres
          call pgmres (zetaD,                &
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index 0c862f0e2..9ba45bff9 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -102,7 +102,7 @@ subroutine input_data
                                 kridge, ktransport, brlx, arlx
       use ice_dyn_vp, only: maxits_nonlin, precond, im_fgmres, im_pgmres, maxits_fgmres, &
                             maxits_pgmres, monitor_nonlin, monitor_fgmres, &
-                            monitor_pgmres, gammaNL, gamma, epsprecond, &
+                            monitor_pgmres, reltol_nonlin, reltol_fgmres, reltol_pgmres, &
                             algo_nonlin, fpfunc_andacc, im_andacc, reltol_andacc, &
                             damping_andacc, start_andacc, use_mean_vrel, ortho_type
       use ice_transport_driver, only: advection, conserv_check
@@ -201,8 +201,8 @@ subroutine input_data
         e_ratio,        Ktens,          Cf,             basalstress,    &
         k1,             maxits_nonlin,  precond,        im_fgmres,      &
         im_pgmres,      maxits_fgmres,  maxits_pgmres,  monitor_nonlin, &
-        monitor_fgmres, monitor_pgmres, gammaNL,        gamma,          &
-        epsprecond,     algo_nonlin,    im_andacc,      reltol_andacc,  &
+        monitor_fgmres, monitor_pgmres, reltol_nonlin,  reltol_fgmres,  &
+        reltol_pgmres,  algo_nonlin,    im_andacc,      reltol_andacc,  &
         damping_andacc, start_andacc,   fpfunc_andacc,  use_mean_vrel,  &
         ortho_type,                                                     &
         k2,             alphab,         threshold_hw,                   &
@@ -344,9 +344,9 @@ subroutine input_data
       monitor_fgmres = 1     ! print fgmres info (0: nothing printed, 1: 1st ite only, 2: all iterations)
       monitor_pgmres = 1     ! print pgmres info (0: nothing printed, 1: all iterations)
       ortho_type = 'mgs'     ! orthogonalization procedure 'cgs' or 'mgs'
-      gammaNL = 1e-8_dbl_kind    ! nonlinear stopping criterion: gammaNL*res(k=0)
-      gamma = 1e-2_dbl_kind      ! fgmres stopping criterion: gamma*res(k)
-      epsprecond = 1e-6_dbl_kind ! pgmres stopping criterion: epsprecond*res(k)
+      reltol_nonlin = 1e-8_dbl_kind ! nonlinear stopping criterion: reltol_nonlin*res(k=0)
+      reltol_fgmres = 1e-2_dbl_kind ! fgmres stopping criterion: reltol_fgmres*res(k)
+      reltol_pgmres = 1e-6_dbl_kind ! pgmres stopping criterion: reltol_pgmres*res(k)
       algo_nonlin = 1        ! nonlinear algorithm: 1: Picard iteration, 2: Anderson acceleration (andacc)
       fpfunc_andacc = 1      ! fixed point function for Anderson acceleration: 1: g(x) = FMGRES(A(x),b(x)), 2: g(x) = x - A(x)x + b(x)
       im_andacc = 5          ! size of Anderson minimization matrix (number of saved previous residuals)
@@ -669,9 +669,9 @@ subroutine input_data
       call broadcast_scalar(monitor_fgmres,     master_task)
       call broadcast_scalar(monitor_pgmres,     master_task)
       call broadcast_scalar(ortho_type,         master_task)
-      call broadcast_scalar(gammaNL,            master_task)
-      call broadcast_scalar(gamma,              master_task)
-      call broadcast_scalar(epsprecond,         master_task)
+      call broadcast_scalar(reltol_nonlin,      master_task)
+      call broadcast_scalar(reltol_fgmres,      master_task)
+      call broadcast_scalar(reltol_pgmres,      master_task)
       call broadcast_scalar(algo_nonlin,        master_task)
       call broadcast_scalar(fpfunc_andacc,      master_task)
       call broadcast_scalar(im_andacc,          master_task)
@@ -1587,9 +1587,9 @@ subroutine input_data
             write(nu_diag,1020) ' monitor_fgmres            = ', monitor_fgmres
             write(nu_diag,1020) ' monitor_pgmres            = ', monitor_pgmres
             write(nu_diag,1030) ' ortho_type                = ', ortho_type
-            write(nu_diag,1008) ' gammaNL                   = ', gammaNL
-            write(nu_diag,1008) ' gamma                     = ', gamma
-            write(nu_diag,1008) ' epsprecond                = ', epsprecond
+            write(nu_diag,1008) ' reltol_nonlin             = ', reltol_nonlin
+            write(nu_diag,1008) ' reltol_fgmres             = ', reltol_fgmres
+            write(nu_diag,1008) ' reltol_pgmres             = ', reltol_pgmres
             write(nu_diag,1020) ' algo_nonlin               = ', algo_nonlin
             write(nu_diag,1010) ' use_mean_vrel             = ', use_mean_vrel
             if (algo_nonlin == 2) then
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index ce7e33d52..e0ee87054 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -149,9 +149,9 @@
     monitor_fgmres  = 1
     monitor_pgmres  = 1
     ortho_type      = 'mgs'
-    gammaNL         = 1e-8
-    gamma           = 1e-2
-    epsprecond      = 1e-6
+    reltol_nonlin   = 1e-8
+    reltol_fgmres   = 1e-2
+    reltol_pgmres   = 1e-6
     algo_nonlin     = 1
     use_mean_vrel   = .false.
     fpfunc_andacc   = 1

From 4e55a33ba48b3e24d470773933561710be19041b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 3 Mar 2020 10:40:09 -0500
Subject: [PATCH 134/196] ice_dyn_vp: anderson_solver: adapt residual norm
 computations for MPI

The norm of the different residuals need to be computed using the squared
value of all components across MPI processes.

We keep the 'res' and 'fpfunc' vectors as 1D vectors for now, to minimize code
changes. This will be revised when the Anderson solver is parallelized.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 55c7067d8..eb0141a48 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -836,7 +836,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                                L2norm(iblk))
          enddo
          !$OMP END PARALLEL DO
-         nlres_norm = sqrt(sum(L2norm))  ! phb: change after parallelization
+         nlres_norm = sqrt(global_sum(sum(L2norm), distrb_info))
          if (monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
                                               " nonlin_res_L2norm= ", nlres_norm
@@ -854,7 +854,6 @@ subroutine anderson_solver (icellt,   icellu,  &
          ! Put initial guess for FGMRES in solx,soly and sol (needed for anderson)
          solx = uprev_k
          soly = vprev_k
-         ! Form sol vector for fgmres (sol is iniguess at the beginning)
          call arrays_to_vec (nx_block, ny_block, nblocks,      &
                              max_blocks, icellu (:), ntot,     &
                              indxui    (:,:), indxuj(:,:),     &
@@ -910,7 +909,7 @@ subroutine anderson_solver (icellt,   icellu,  &
 
          ! Compute residual
          res = fpfunc - sol
-         fpres_norm = dnrm2(size(res), res, inc)
+         fpres_norm = global_sum(dnrm2(size(res), res, inc)**2, distrb_info)
          if (monitor_nonlin) then
             ! commented code is to compare fixed_point_res_L2norm BFB with progress_res_L2norm
             ! (should be BFB if Picard iteration is used)
@@ -929,7 +928,7 @@ subroutine anderson_solver (icellt,   icellu,  &
             ! enddo
             ! !$OMP END PARALLEL DO
             ! write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
-            !                                   " fixed_point_res_L2norm= ", sqrt(sum(L2norm))
+            !                                   " fixed_point_res_L2norm= ", sqrt(global_sum(sum(L2norm), distrb_info))
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
                                               " fixed_point_res_L2norm= ", fpres_norm
          endif
@@ -1065,7 +1064,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          !$OMP END PARALLEL DO
          if (monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
-                                              " progress_res_L2norm= ", sqrt(sum(L2norm))
+                                              " progress_res_L2norm= ", sqrt(global_sum(sum(L2norm), distrb_info))
          endif
          
       enddo ! nonlinear iteration loop

From 7608081bd3570584640c7cd5383eea02ef00e400 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 3 Mar 2020 11:13:38 -0500
Subject: [PATCH 135/196] ice_dyn_vp: write solver diagnostics only for master
 task

Note that care must be taken not to call 'global_sum' inside an
`if (my_task == master_task)` construct, as this will create an incorrect
communication pattern and ultimately an MPI failure.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 45 ++++++++++++-----------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index eb0141a48..618a78353 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -36,6 +36,7 @@ module ice_dyn_vp
       use ice_kinds_mod
       use ice_blocks, only: nx_block, ny_block
       use ice_boundary, only: ice_halo
+      use ice_communicate, only: my_task, master_task
       use ice_constants, only: field_loc_center, field_loc_NEcorner, &
           field_type_scalar, field_type_vector
       use ice_constants, only: c0, p027, p055, p111, p166, &
@@ -837,7 +838,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          enddo
          !$OMP END PARALLEL DO
          nlres_norm = sqrt(global_sum(sum(L2norm), distrb_info))
-         if (monitor_nonlin) then
+         if (my_task == master_task .and. monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
                                               " nonlin_res_L2norm= ", nlres_norm
          endif
@@ -910,25 +911,24 @@ subroutine anderson_solver (icellt,   icellu,  &
          ! Compute residual
          res = fpfunc - sol
          fpres_norm = global_sum(dnrm2(size(res), res, inc)**2, distrb_info)
-         if (monitor_nonlin) then
-            ! commented code is to compare fixed_point_res_L2norm BFB with progress_res_L2norm
-            ! (should be BFB if Picard iteration is used)
-            ! call vec_to_arrays (nx_block, ny_block, nblocks,      &
-            !                     max_blocks, icellu (:), ntot,     & 
-            !                     indxui    (:,:), indxuj(:,:),     &
-            !                     res (:),                          &
-            !                     fpresx (:,:,:), fpresy (:,:,:))
-            ! !$OMP PARALLEL DO PRIVATE(iblk)
-            ! do iblk = 1, nblocks
-            ! call calc_L2norm_squared (nx_block        , ny_block,         &
-            !                           icellu    (iblk),                   & 
-            !                           indxui  (:,iblk), indxuj  (:,iblk), &
-            !                           fpresx(:,:,iblk), fpresy(:,:,iblk), &
-            !                           L2norm    (iblk))
-            ! enddo
-            ! !$OMP END PARALLEL DO
-            ! write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
-            !                                   " fixed_point_res_L2norm= ", sqrt(global_sum(sum(L2norm), distrb_info))
+         ! commented code is to compare fixed_point_res_L2norm BFB with progress_res_L2norm
+         ! (should be BFB if Picard iteration is used)
+         ! call vec_to_arrays (nx_block, ny_block, nblocks,      &
+         !                     max_blocks, icellu (:), ntot,     & 
+         !                     indxui    (:,:), indxuj(:,:),     &
+         !                     res (:),                          &
+         !                     fpresx (:,:,:), fpresy (:,:,:))
+         ! !$OMP PARALLEL DO PRIVATE(iblk)
+         ! do iblk = 1, nblocks
+         !    call calc_L2norm_squared (nx_block        , ny_block,         &
+         !                              icellu    (iblk),                   & 
+         !                              indxui  (:,iblk), indxuj  (:,iblk), &
+         !                              fpresx(:,:,iblk), fpresy(:,:,iblk), &
+         !                              L2norm    (iblk))
+         ! enddo
+         ! !$OMP END PARALLEL DO
+         ! fpres_norm = sqrt(global_sum(sum(L2norm), distrb_info))
+         if (my_task == master_task .and. monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
                                               " fixed_point_res_L2norm= ", fpres_norm
          endif
@@ -1062,9 +1062,10 @@ subroutine anderson_solver (icellt,   icellu,  &
                                       L2norm    (iblk))
          enddo
          !$OMP END PARALLEL DO
-         if (monitor_nonlin) then
+         prog_norm = sqrt(global_sum(sum(L2norm), distrb_info))
+         if (my_task == master_task .and. monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
-                                              " progress_res_L2norm= ", sqrt(global_sum(sum(L2norm), distrb_info))
+                                              " progress_res_L2norm= ", prog_norm
          endif
          
       enddo ! nonlinear iteration loop

From c45658c1cc1bb778c223bfdbdd031fd748d4b88f Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 6 Mar 2020 11:33:12 -0500
Subject: [PATCH 136/196] ice_dyn_vp: remove references to EVP

Reomve leftovers references from the initial copy of ice_dyn_evp.F90
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 25 ++---------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 618a78353..ad8e47b61 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1,35 +1,14 @@
 !=======================================================================
 !
-! Elastic-viscous-plastic sea ice dynamics model
+! Viscous-plastic sea ice dynamics model
 ! Computes ice velocity and deformation
 !
 ! See:
 !
-! Hunke, E. C., and J. K. Dukowicz (1997). An elastic-viscous-plastic model
-! for sea ice dynamics. {\em J. Phys. Oceanogr.}, {\bf 27}, 1849--1867.
 !
-! Hunke, E. C. (2001).  Viscous-Plastic Sea Ice Dynamics with the EVP Model:
-! Linearization Issues. {\em Journal of Computational Physics}, {\bf 170},
-! 18--38.
 !
-! Hunke, E. C., and J. K. Dukowicz (2002).  The Elastic-Viscous-Plastic
-! Sea Ice Dynamics Model in General Orthogonal Curvilinear Coordinates
-! on a Sphere---Incorporation of Metric Terms. {\em Monthly Weather Review},
-! {\bf 130}, 1848--1865.
+! authors: JF Lemieux, ECCC, Philppe Blain, ECCC
 !
-! Hunke, E. C., and J. K. Dukowicz (2003).  The sea ice momentum
-! equation in the free drift regime.  Los Alamos Tech. Rep. LA-UR-03-2219.
-!
-! Bouillon, S., T. Fichefet, V. Legat and G. Madec (submitted 2013).  The 
-! revised elastic-viscous-plastic method.  Ocean Modelling.
-!
-! author: Elizabeth C. Hunke, LANL
-!
-! 2003: Vectorized by Clifford Chen (Fujitsu) and William Lipscomb (LANL)
-! 2004: Block structure added by William Lipscomb
-! 2005: Removed boundary calls for stress arrays (WHL)
-! 2006: Streamlined for efficiency by Elizabeth Hunke
-!       Converted to free source form (F90)
 
       module ice_dyn_vp
 

From f233baf397f25d0af34bec59388173ace79d421f Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 6 Mar 2020 14:37:43 -0500
Subject: [PATCH 137/196] ice_dyn_vp: rename 'puny_vp' to 'min_strain_rate'

This value really is a minimum strain rate, so name it as such.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ad8e47b61..522ad772c 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -114,7 +114,7 @@ subroutine init_vp (dt)
          this_block           ! block information for current block
          
       real (kind=dbl_kind) :: &
-         puny_vp = 2e-09_dbl_kind      ! special puny value for computing tinyarea
+         min_strain_rate = 2e-09_dbl_kind      ! used for recomputing tinyarea
       
       ! Initialize variables shared with evp
       call init_evp(dt)
@@ -127,7 +127,7 @@ subroutine init_vp (dt)
                indxuj(nx_block*ny_block, max_blocks))
       allocate(fld2(nx_block,ny_block,2,max_blocks))
       
-      ! Redefine tinyarea using a different puny value
+      ! Redefine tinyarea using min_strain_rate
       
       !$OMP PARALLEL DO PRIVATE(iblk,i,j,ilo,ihi,jlo,jhi,this_block)
       do iblk = 1, nblocks
@@ -139,7 +139,7 @@ subroutine init_vp (dt)
 
          do j = jlo, jhi
          do i = ilo, ihi
-            tinyarea(i,j,iblk) = puny_vp*tarea(i,j,iblk)
+            tinyarea(i,j,iblk) = min_strain_rate*tarea(i,j,iblk)
          enddo
          enddo
       enddo                     ! iblk
@@ -1090,7 +1090,7 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm      , & ! 0.5*HTN - 1.5*HTN
-         tinyarea     ! puny*tarea
+         tinyarea     ! min_strain_rate*tarea
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
          intent(out) :: &

From 2baf569e4f6798c75f48abb37a1f54ca9e929d96 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 16 Mar 2020 15:15:54 -0400
Subject: [PATCH 138/196] ice_dyn_vp: remove "signature" comments from JF

These comments do not add value to the code, remove them.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 41 +++++++++++------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 522ad772c..ee35851be 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -387,7 +387,7 @@ subroutine imp_solver (dt)
                                       strength(i,j,  iblk) )
          enddo  ! ij
 
-         ! load velocity into array for boundary updates JFL move?
+         ! load velocity into array for boundary updates
          fld2(:,:,1,iblk) = uvel(:,:,iblk)
          fld2(:,:,2,iblk) = vvel(:,:,iblk)
 
@@ -401,7 +401,7 @@ subroutine imp_solver (dt)
       call ice_timer_start(timer_bound)
       call ice_HaloUpdate (strength,           halo_info, &
                            field_loc_center,   field_type_scalar)
-      ! velocities may have changed in dyn_prep2 ! JFL prends en compte la grille spherique qui se referme sur elle meme...
+      ! velocities may have changed in dyn_prep2
       call ice_HaloUpdate (fld2,               halo_info, & 
                            field_loc_NEcorner, field_type_vector)
       call ice_timer_stop(timer_bound)
@@ -1864,8 +1864,8 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         vrel      , & ! coeff for tauw ! jfl
-         Cb            ! seabed stress coeff ! jfl
+         vrel      , & ! coeff for tauw
+         Cb            ! seabed stress coeff
          
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
@@ -1954,8 +1954,8 @@ subroutine matvecOLD (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         Au      , & ! matvec, Fx = bx - Au (N/m^2)! jfl
-         Av          ! matvec, Fy = by - Av (N/m^2)! jfl    
+         Au      , & ! matvec, Fx = bx - Au (N/m^2)
+         Av          ! matvec, Fy = by - Av (N/m^2)
 
       ! local variables
 
@@ -2058,8 +2058,8 @@ subroutine matvec (nx_block,   ny_block, &
          
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         Au      , & ! matvec, Fx = bx - Au (N/m^2)! jfl
-         Av          ! matvec, Fy = by - Av (N/m^2)! jfl    
+         Au      , & ! matvec, Fx = bx - Au (N/m^2)
+         Av          ! matvec, Fy = by - Av (N/m^2)
 
       ! local variables
 
@@ -2311,8 +2311,8 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), & 
          intent(out) :: &
-         bxfix   , & ! bx = taux + bxfix !jfl
-         byfix       ! by = tauy + byfix !jfl
+         bxfix   , & ! bx = taux + bxfix
+         byfix       ! by = tauy + byfix
 
       ! local variables
 
@@ -2371,8 +2371,8 @@ subroutine calc_bvec (nx_block,   ny_block, &
          uarear  , & ! 1/uarea
          waterx  , & ! for ocean stress calculation, x (m/s)
          watery  , & ! for ocean stress calculation, y (m/s)
-         bxfix   , & ! bx = taux + bxfix !jfl
-         byfix   , & ! by = tauy + byfix !jfl
+         bxfix   , & ! bx = taux + bxfix
+         byfix   , & ! by = tauy + byfix
          uocn    , & ! ocean current, x-direction (m/s)
          vocn    , & ! ocean current, y-direction (m/s)
          vrel        ! relative ice-ocean velocity
@@ -2383,8 +2383,8 @@ subroutine calc_bvec (nx_block,   ny_block, &
          
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
-         bx      , & ! b vector, bx = taux + bxfix (N/m^2) !jfl
-         by          ! b vector, by = tauy + byfix (N/m^2) !jfl
+         bx      , & ! b vector, bx = taux + bxfix (N/m^2)
+         by          ! b vector, by = tauy + byfix (N/m^2)
          
       ! local variables
 
@@ -2457,10 +2457,10 @@ subroutine residual_vec (nx_block,   ny_block, &
          indxuj      ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         bx       , & ! b vector, bx = taux + bxfix (N/m^2) !jfl
-         by       , & ! b vector, by = tauy + byfix (N/m^2) !jfl
-         Au       , & ! matvec, Fx = bx - Au (N/m^2) ! jfl
-         Av           ! matvec, Fy = by - Av (N/m^2) ! jfl
+         bx       , & ! b vector, bx = taux + bxfix (N/m^2)
+         by       , & ! b vector, by = tauy + byfix (N/m^2)
+         Au       , & ! matvec, Fx = bx - Au (N/m^2)
+         Av           ! matvec, Fy = by - Av (N/m^2)
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
@@ -2512,11 +2512,11 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
 
       integer (kind=int_kind), intent(in) :: & 
          nx_block, ny_block, & ! block dimensions
-         icellu                ! no. of cells where icetmask = 1 JFL
+         icellu                ! no. of cells where icetmask = 1
 
       integer (kind=int_kind), dimension (nx_block*ny_block), & 
          intent(in) :: &
-         indxui   , & ! compressed index in i-direction JFL
+         indxui   , & ! compressed index in i-direction
          indxuj       ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
@@ -3167,7 +3167,6 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
 
       end subroutine vec_to_arrays
       
-!      JFL ROUTINE POUR CALC STRESS OCN POUR COUPLAGE
       
 !=======================================================================
 

From 5ed406d7d4cc9496f2bd525af56672ff50b8607e Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 14 Apr 2020 13:35:26 -0400
Subject: [PATCH 139/196] ice_dyn_vp: write stresses at end of time step

In the EVP solver, stress are subcycled using the global
'stress{p,m,12}_[1-4]' arrays, so these arrays contain the subcycled
stresses at the end of the EVP iterations.

These global arrays are used to write the stresses to the history tape
if the stress variables are enabled in the namelist.

In the VP solver, stresses are not explicitly computed to solve the
momentum equation, so they need to be computed separately at the end of
the time step so that they can be properly written to the history tape.

Use the existing, but unused, 'stress_vp' subroutine to compute the
stresses, and call it at the end of the 'imp_solver' driver. Remove
uneeded computations from 'stress_vp'.

Move the definition of the 'zetaD' array to 'imp_solver' so it can be
passed to 'stress_vp'.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 195 ++++++----------------
 1 file changed, 47 insertions(+), 148 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ee35851be..74eeed54b 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -213,6 +213,9 @@ subroutine imp_solver (dt)
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
 
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
+         zetaD        ! zetaD = 2zeta (viscous coeff)
+
       logical (kind=log_kind) :: calc_strair
 
       integer (kind=int_kind), dimension (nx_block,ny_block,max_blocks) :: &
@@ -464,6 +467,7 @@ subroutine imp_solver (dt)
                             umassdti, bvec,    & 
                             sol,      diagvec, &
                             fpresx,   fpresy,  &
+                            zetaD,             &
                             halo_info_mask)
       !-----------------------------------------------------------------
       ! End of nonlinear iteration
@@ -472,7 +476,29 @@ subroutine imp_solver (dt)
       deallocate(bvec, sol, diagvec)
       
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
-      
+
+      !-----------------------------------------------------------------
+      ! Compute stresses
+      !-----------------------------------------------------------------
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         call stress_vp (nx_block,             ny_block,             &
+                         icellt(iblk),                               &
+                         indxti      (:,iblk), indxtj      (:,iblk), &
+                         uvel      (:,:,iblk), vvel      (:,:,iblk), &
+                         dxt       (:,:,iblk), dyt       (:,:,iblk), &
+                         cxp       (:,:,iblk), cyp       (:,:,iblk), &
+                         cxm       (:,:,iblk), cym       (:,:,iblk), &
+                         zetaD   (:,:,iblk,:),                       &
+                         stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), &
+                         stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), &
+                         stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), &
+                         stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), &
+                         stress12_1(:,:,iblk), stress12_2(:,:,iblk), &
+                         stress12_3(:,:,iblk), stress12_4(:,:,iblk))
+      enddo ! iblk
+      !$OMP END PARALLEL DO
+
       !-----------------------------------------------------------------
       ! Compute deformations
       !-----------------------------------------------------------------
@@ -491,8 +517,6 @@ subroutine imp_solver (dt)
       enddo
       !$OMP END PARALLEL DO
       
-      ! phb: here we do halo updates for stresses (stressp_i, stressm_i, stress12_i, i=1..4),
-      !      but stresses have not been updated ! (should be done in deformations ?)
       ! Force symmetry across the tripole seam
       if (trim(grid_type) == 'tripole') then
       if (maskhalo_dyn) then
@@ -607,6 +631,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                                   umassdti, bvec,    & 
                                   sol,      diagvec, &
                                   fpresx,   fpresy,  &
+                                  zetaD,             &
                                   halo_info_mask)
 
       use ice_arrays_column, only: Cdn_ocn
@@ -642,6 +667,9 @@ subroutine anderson_solver (icellt,   icellu,  &
          byfix    , & ! part of by that is constant during Picard
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
 
+      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(out) :: &
+         zetaD        ! zetaD = 2zeta (viscous coeff)
+
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
          fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
          fpresy       ! y fixed point residual vector, fy = vvel - vprev_k
@@ -684,9 +712,6 @@ subroutine anderson_solver (icellt,   icellu,  &
          solx     , & ! solution of FGMRES (x components)
          soly         ! solution of FGMRES (y components)
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4):: &
-         zetaD        ! zetaD = 2zeta (viscous coeff)
-
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
          Dstrtmp
@@ -1475,22 +1500,20 @@ end subroutine stress_prime_vpOLD
 
 ! Computes the VP stress (as diagnostic)
 
-      subroutine stress_vp (nx_block,   ny_block,   & 
-                            icellt,                 & 
-                            indxti,     indxtj,     & 
-                            uvel,       vvel,       & 
-                            dxt,        dyt,        & 
-                            dxhy,       dyhx,       & 
-                            cxp,        cyp,        & 
-                            cxm,        cym,        & 
-                            zetaD,                  & 
-                            stressp_1,  stressp_2,  & 
-                            stressp_3,  stressp_4,  & 
-                            stressm_1,  stressm_2,  & 
-                            stressm_3,  stressm_4,  & 
-                            stress12_1, stress12_2, & 
-                            stress12_3, stress12_4, & 
-                            str )
+      subroutine stress_vp (nx_block,   ny_block,   &
+                            icellt,                 &
+                            indxti,     indxtj,     &
+                            uvel,       vvel,       &
+                            dxt,        dyt,        &
+                            cxp,        cyp,        &
+                            cxm,        cym,        &
+                            zetaD,                  &
+                            stressp_1,  stressp_2,  &
+                            stressp_3,  stressp_4,  &
+                            stressm_1,  stressm_2,  &
+                            stressm_3,  stressm_4,  &
+                            stress12_1, stress12_2, &
+                            stress12_3, stress12_4)
 
       use ice_dyn_shared, only: strain_rates
 
@@ -1508,8 +1531,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
          vvel     , & ! y-component of velocity (m/s)
          dxt      , & ! width of T-cell through the middle (m)
          dyt      , & ! height of T-cell through the middle (m)
-         dxhy     , & ! 0.5*(HTE - HTE)
-         dyhx     , & ! 0.5*(HTN - HTN)
          cyp      , & ! 1.5*HTE - 0.5*HTE
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
@@ -1525,10 +1546,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
          stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
          stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
-         intent(out) :: &
-         str          ! stress combinations
-
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -1538,28 +1555,9 @@ subroutine stress_vp (nx_block,   ny_block,   &
         divune, divunw, divuse, divusw            , & ! divergence
         tensionne, tensionnw, tensionse, tensionsw, & ! tension
         shearne, shearnw, shearse, shearsw        , & ! shearing
-        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
-        ssigpn, ssigps, ssigpe, ssigpw            , &
-        ssigmn, ssigms, ssigme, ssigmw            , &
-        ssig12n, ssig12s, ssig12e, ssig12w        , &
-        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
-        csigpne, csigpnw, csigpse, csigpsw        , &
-        csigmne, csigmnw, csigmse, csigmsw        , &
-        csig12ne, csig12nw, csig12se, csig12sw    , &
-        str12ew, str12we, str12ns, str12sn        , &
-        strp_tmp, strm_tmp
+        Deltane, Deltanw, Deltase, Deltasw            ! Delt
 
-        character(len=*), parameter :: subname = '(stress_vp)'
-
-      !-----------------------------------------------------------------
-      ! Initialize
-      !-----------------------------------------------------------------
-
-      str(:,:,:) = c0
-
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
+      character(len=*), parameter :: subname = '(stress_vp)'
 
       do ij = 1, icellt
          i = indxti(ij)
@@ -1604,105 +1602,6 @@ subroutine stress_vp (nx_block,   ny_block,   &
          stress12_3(i,j) = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
          stress12_4(i,j) = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
 
-      !-----------------------------------------------------------------
-      ! combinations of the stresses for the momentum equation ! kg/s^2
-      !-----------------------------------------------------------------
-
-         ssigpn  = stressp_1(i,j) + stressp_2(i,j)
-         ssigps  = stressp_3(i,j) + stressp_4(i,j)
-         ssigpe  = stressp_1(i,j) + stressp_4(i,j)
-         ssigpw  = stressp_2(i,j) + stressp_3(i,j)
-         ssigp1  =(stressp_1(i,j) + stressp_3(i,j))*p055
-         ssigp2  =(stressp_2(i,j) + stressp_4(i,j))*p055
-
-         ssigmn  = stressm_1(i,j) + stressm_2(i,j)
-         ssigms  = stressm_3(i,j) + stressm_4(i,j)
-         ssigme  = stressm_1(i,j) + stressm_4(i,j)
-         ssigmw  = stressm_2(i,j) + stressm_3(i,j)
-         ssigm1  =(stressm_1(i,j) + stressm_3(i,j))*p055
-         ssigm2  =(stressm_2(i,j) + stressm_4(i,j))*p055
-
-         ssig12n = stress12_1(i,j) + stress12_2(i,j)
-         ssig12s = stress12_3(i,j) + stress12_4(i,j)
-         ssig12e = stress12_1(i,j) + stress12_4(i,j)
-         ssig12w = stress12_2(i,j) + stress12_3(i,j)
-         ssig121 =(stress12_1(i,j) + stress12_3(i,j))*p111
-         ssig122 =(stress12_2(i,j) + stress12_4(i,j))*p111
-
-         csigpne = p111*stressp_1(i,j) + ssigp2 + p027*stressp_3(i,j)
-         csigpnw = p111*stressp_2(i,j) + ssigp1 + p027*stressp_4(i,j)
-         csigpsw = p111*stressp_3(i,j) + ssigp2 + p027*stressp_1(i,j)
-         csigpse = p111*stressp_4(i,j) + ssigp1 + p027*stressp_2(i,j)
-         
-         csigmne = p111*stressm_1(i,j) + ssigm2 + p027*stressm_3(i,j)
-         csigmnw = p111*stressm_2(i,j) + ssigm1 + p027*stressm_4(i,j)
-         csigmsw = p111*stressm_3(i,j) + ssigm2 + p027*stressm_1(i,j)
-         csigmse = p111*stressm_4(i,j) + ssigm1 + p027*stressm_2(i,j)
-         
-         csig12ne = p222*stress12_1(i,j) + ssig122 &
-                  + p055*stress12_3(i,j)
-         csig12nw = p222*stress12_2(i,j) + ssig121 &
-                  + p055*stress12_4(i,j)
-         csig12sw = p222*stress12_3(i,j) + ssig122 &
-                  + p055*stress12_1(i,j)
-         csig12se = p222*stress12_4(i,j) + ssig121 &
-                  + p055*stress12_2(i,j)
-
-         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
-         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
-         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
-         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
-
-      !-----------------------------------------------------------------
-      ! for dF/dx (u momentum)
-      !-----------------------------------------------------------------
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
-
-         ! northeast (i,j)
-         str(i,j,1) = -strp_tmp - strm_tmp - str12ew &
-              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
-
-         ! northwest (i+1,j)
-         str(i,j,2) = strp_tmp + strm_tmp - str12we &
-              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
-
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
-
-         ! southeast (i,j+1)
-         str(i,j,3) = -strp_tmp - strm_tmp + str12ew &
-              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
-
-         ! southwest (i+1,j+1)
-         str(i,j,4) = strp_tmp + strm_tmp + str12we &
-              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
-
-      !-----------------------------------------------------------------
-      ! for dF/dy (v momentum)
-      !-----------------------------------------------------------------
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
-
-         ! northeast (i,j)
-         str(i,j,5) = -strp_tmp + strm_tmp - str12ns &
-              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
-
-         ! southeast (i,j+1)
-         str(i,j,6) = strp_tmp - strm_tmp - str12sn &
-              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
-
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
-
-         ! northwest (i+1,j)
-         str(i,j,7) = -strp_tmp + strm_tmp + str12ns &
-              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
-
-         ! southwest (i+1,j+1)
-         str(i,j,8) = strp_tmp - strm_tmp + str12sn &
-              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
-
       enddo                     ! ij
 
       end subroutine stress_vp

From ca7c4d27b21ade5b5b625e6ff9d7fb9eb6fccdb0 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Apr 2020 15:24:39 -0400
Subject: [PATCH 140/196] ice_dyn_vp: cleanup 'intent's

- move 'intent(out)' and 'intent(inout)' arguments to the end of the
  argument list (ice_HaloUpdate_vel is an exception, for consistency
  with other ice_HaloUpdate subroutines).
- move 'intent(in)' before any 'intent(out)' or 'intent(inout)'
- change 'intent(inout)' to 'intent(out)' if the argument is rewritten
  inside the subroutine
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 120 +++++++++++-----------
 1 file changed, 61 insertions(+), 59 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 74eeed54b..51975cdff 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -670,6 +670,9 @@ subroutine anderson_solver (icellt,   icellu,  &
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(out) :: &
          zetaD        ! zetaD = 2zeta (viscous coeff)
 
+      type (ice_halo), intent(in) :: &
+         halo_info_mask !  ghost cell update info for masked halo
+
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
          fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
          fpresy       ! y fixed point residual vector, fy = vvel - vprev_k
@@ -679,9 +682,6 @@ subroutine anderson_solver (icellt,   icellu,  &
          sol      , & ! current approximate solution
          diagvec      ! diagonal of matrix A for preconditioners
 
-      type (ice_halo) :: &
-         halo_info_mask !  ghost cell update info for masked halo
-
       ! local variables
 
       integer (kind=int_kind) :: & 
@@ -897,11 +897,12 @@ subroutine anderson_solver (icellt,   icellu,  &
                          Cb,            vrel,      &
                          umassdti,                 &
                          halo_info_mask,           &
-                         solx,          soly,      &
                          bx,            by,        &
                          Diagu,         Diagv,     &
                          reltol_fgmres, im_fgmres, &
-                         maxits_fgmres, nbiter, conv)
+                         maxits_fgmres,            &
+                         solx,          soly,      &
+                         nbiter,        conv)
             ! Put FGMRES solution solx,soly in fpfunc vector (needed for anderson)
             call arrays_to_vec (nx_block, ny_block, nblocks,      &
                                 max_blocks, icellu (:), ntot,     &
@@ -1754,7 +1755,8 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
          Tbu,      & ! coefficient for basal stress (N/m^2)
          aiu     , & ! ice fraction on u-grid
          uocn    , & ! ocean current, x-direction (m/s)
-         vocn        ! ocean current, y-direction (m/s)
+         vocn    , & ! ocean current, y-direction (m/s)
+         Cw          ! ocean-ice neutral drag coefficient
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(in) :: &
@@ -1765,10 +1767,6 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
          intent(inout) :: &
          vrel      , & ! coeff for tauw
          Cb            ! seabed stress coeff
-         
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
-         Cw                   ! ocean-ice neutral drag coefficient
 
       ! local variables
 
@@ -2281,7 +2279,7 @@ subroutine calc_bvec (nx_block,   ny_block, &
          stPr
          
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
+         intent(out) :: &
          bx      , & ! b vector, bx = taux + bxfix (N/m^2)
          by          ! b vector, by = tauy + byfix (N/m^2)
          
@@ -2365,10 +2363,12 @@ subroutine residual_vec (nx_block,   ny_block, &
          intent(inout) :: &
          Fx      , & ! x residual vector, Fx = bx - Au (N/m^2)
          Fy          ! y residual vector, Fy = by - Av (N/m^2)
-         
+
       real (kind=dbl_kind), intent(out), optional :: &
          sum_squared ! sum of squared residual vector components
-      
+
+      ! local variables
+
       integer (kind=int_kind) :: &
          i, j, ij
 
@@ -2433,7 +2433,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          zetaD          ! 2*zeta      
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
-         intent(inout) :: &
+         intent(out) :: &
          Dstr          ! intermediate calc for diagonal components of matrix A associated 
                        ! with rheology term         
 
@@ -2806,7 +2806,7 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
          Dstr
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
+         intent(out) :: &
          Diagu   , & ! matvec, Fx = bx - Au (N/m^2)
          Diagv       ! matvec, Fy = by - Av (N/m^2)
 
@@ -2974,7 +2974,7 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
          tpu     , & ! x-component of vector
          tpv         ! y-component of vector         
          
-      real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
+      real (kind=dbl_kind), dimension (ntot), intent(out) :: &
          outvec
 
       ! local variables
@@ -3033,7 +3033,7 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
       real (kind=dbl_kind), dimension (ntot), intent(in) :: &
          invec         
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block, max_blocks), intent(inout) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block, max_blocks), intent(out) :: &
          tpu     , & ! x-component of vector
          tpv         ! y-component of vector         
          
@@ -3126,11 +3126,12 @@ subroutine fgmres (zetaD,               &
                          Cb,         vrel,    &
                          umassdti,            &
                          halo_info_mask,      &
-                         solx,       soly,    &
                          bx,         by,      &
                          diagx,      diagy,   &
                          tolerance, maxinner, &
-                         maxouter, nbiter, conv)
+                         maxouter,            &                         
+                         solx,       soly,    &
+                         nbiter,     conv)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
@@ -3140,18 +3141,12 @@ subroutine fgmres (zetaD,               &
          Cb    , & ! seabed stress coefficient
          umassdti  ! mass of U-cell/dte (kg/m^2 s)
 
-      type (ice_halo) :: &
+      type (ice_halo), intent(in) :: &
          halo_info_mask !  ghost cell update info for masked halo
 
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
-         solx     , & ! Initial guess on input, approximate solution on output (x components)
-         soly         ! Initial guess on input, approximate solution on output (y components)
-
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
          bx       , & ! Right hand side of the linear system (x components)
-         by           ! Right hand side of the linear system (y components)
-
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
+         by       , & ! Right hand side of the linear system (y components)
          diagx    , & ! Diagonal of the system matrix (x components)
          diagy        ! Diagonal of the system matrix (y components)
 
@@ -3160,13 +3155,15 @@ subroutine fgmres (zetaD,               &
                      ! residual is below tolerance
 
       integer (kind=int_kind), intent(in) :: &
-         maxinner    ! Restart the method every maxinner inner (Arnoldi) iterations
-
-      integer (kind=int_kind), intent(in) :: &
+         maxinner, & ! Restart the method every maxinner inner (Arnoldi) iterations
          maxouter    ! Maximum number of outer (restarts) iterations
                      ! Iteration will stop after maxinner*maxouter Arnoldi steps
                      ! even if the specified tolerance has not been achieved
 
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
+         solx     , & ! Initial guess on input, approximate solution on output (x components)
+         soly         ! Initial guess on input, approximate solution on output (y components)
+
       integer (kind=int_kind), intent(out) :: &
          nbiter      ! Total number of Arnoldi iterations performed
 
@@ -3364,8 +3361,10 @@ subroutine fgmres (zetaD,               &
             !$OMP END PARALLEL DO
             
             ! Orthogonalize the new vector
-            call orthogonalize(arnoldi_basis_x, arnoldi_basis_y, &
-                               hessenberg, initer, nextit, maxinner, ortho_type)
+            call orthogonalize(ortho_type     , initer         , &
+                               nextit         , maxinner       , &
+                               arnoldi_basis_x, arnoldi_basis_y, &
+                               hessenberg)
             
             
             ! Compute norm of new Arnoldi vector and update Hessenberg matrix
@@ -3518,11 +3517,12 @@ end subroutine fgmres
       subroutine pgmres (zetaD,                &
                          Cb,         vrel,     &
                          umassdti,             &
-                         solx,       soly,     &
                          bx,         by,       &
                          diagx,      diagy,    &
                          tolerance,  maxinner, &
-                         maxouter, nbiter, conv)
+                         maxouter,             &
+                         solx,       soly,     &
+                         nbiter,     conv)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
@@ -3532,10 +3532,6 @@ subroutine pgmres (zetaD,                &
          Cb    , & ! seabed stress coefficient
          umassdti  ! mass of U-cell/dte (kg/m^2 s)
 
-      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
-         solx     , & ! Initial guess on input, approximate solution on output (x components)
-         soly         ! Initial guess on input, approximate solution on output (y components)
-
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
          bx       , & ! Right hand side of the linear system (x components)
          by           ! Right hand side of the linear system (y components)
@@ -3549,13 +3545,15 @@ subroutine pgmres (zetaD,                &
                      ! residual is below tolerance
 
       integer (kind=int_kind), intent(in) :: &
-         maxinner    ! Restart the method every maxinner inner (Arnoldi) iterations
-
-      integer (kind=int_kind), intent(in) :: &
+         maxinner, & ! Restart the method every maxinner inner (Arnoldi) iterations
          maxouter    ! Maximum number of outer (restarts) iterations
                      ! Iteration will stop after maxinner*maxouter Arnoldi steps
                      ! even if the specified tolerance has not been achieved
 
+      real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(inout) :: &
+         solx     , & ! Initial guess on input, approximate solution on output (x components)
+         soly         ! Initial guess on input, approximate solution on output (y components)
+
       integer (kind=int_kind), intent(out) :: &
          nbiter      ! Total number of Arnoldi iterations performed
 
@@ -3749,8 +3747,10 @@ subroutine pgmres (zetaD,                &
             !$OMP END PARALLEL DO
             
             ! Orthogonalize the new vector
-            call orthogonalize(arnoldi_basis_x, arnoldi_basis_y, &
-                               hessenberg, initer, nextit, maxinner, ortho_type)
+            call orthogonalize(ortho_type     , initer         , &
+                               nextit         , maxinner       , &
+                               arnoldi_basis_x, arnoldi_basis_y, &
+                               hessenberg)
             
             ! Compute norm of new Arnoldi vector and update Hessenberg matrix
             !$OMP PARALLEL DO PRIVATE(iblk)
@@ -3992,11 +3992,12 @@ subroutine precondition(zetaD,                &
          call pgmres (zetaD,                &
                       Cb,         vrel,     &
                       umassdti,             &
-                      wx,         wy,       &
                       vx,         vy,       &
                       diagx,      diagy,    &
                       tolerance,  maxinner, &
-                      maxouter, nbiter, conv)
+                      maxouter,             &
+                      wx,         wy,       &
+                      nbiter,     conv)
       else
          
       endif
@@ -4009,25 +4010,26 @@ end subroutine precondition
 !
 ! authors: Philippe Blain, ECCC
 
-      subroutine orthogonalize(arnoldi_basis_x, arnoldi_basis_y, & 
-                               hessenberg, initer, nextit, maxinner, ortho_type)
+      subroutine orthogonalize(ortho_type     , initer         , &
+                               nextit         , maxinner       , &
+                               arnoldi_basis_x, arnoldi_basis_y, &
+                               hessenberg)
+
+      character(len=*), intent(in) :: &
+         ortho_type ! type of orthogonalization
+
+      integer (kind=int_kind), intent(in) :: &
+         initer  , & ! inner (Arnoldi) loop counter
+         nextit  , & ! nextit == initer+1
+         maxinner    ! Restart the method every maxinner inner iterations
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1), intent(inout) :: &
          arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
          arnoldi_basis_y     ! arnoldi basis (y components)
 
-      integer (kind=int_kind), intent(in) :: &
-         initer          , & ! inner (Arnoldi) loop counter
-         nextit              ! nextit == initer+1
-
       real (kind=dbl_kind), dimension(maxinner+1, maxinner), intent(inout) :: &
          hessenberg        ! system matrix of the Hessenberg (least squares) system
          !phb: removing this parameter and argument makes ifort error in the .i90 file
-      integer (kind=int_kind), intent(in) :: &
-         maxinner    ! Restart the method every maxinner inner iterations
-
-      character(len=*), intent(in) :: &
-         ortho_type ! type of orthogonalization
 
       ! local variables
 
@@ -4170,13 +4172,13 @@ subroutine ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
       use ice_domain, only: halo_info, maskhalo_dyn
       use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
 
+      type (ice_halo), intent(in) :: &
+         halo_info_mask !  ghost cell update info for masked halo
+
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
          uvel    , & ! u components of velocity vector
          vvel        ! v components of velocity vector
 
-      type (ice_halo), intent(in) :: &
-         halo_info_mask !  ghost cell update info for masked halo
-
       ! local variables
 
       integer (kind=int_kind) :: &

From 97fb09752e95fcf9d7b1e1a8086a7e543a4be425 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 17 Apr 2020 11:07:24 -0400
Subject: [PATCH 141/196] dynamics: correct the definition of 'shearing strain
 rate' in comments

All occurences of 'shearing strain rate' as a comment before the
computation of the quantities `shear{n,s}{e,w}` define these quantities
as

    shearing strain rate  =  e_12

However, the correct definition of these quantities is 2*e_12.

Bring the code in line with the definition of the shearing strain rate
in the documentation (see the 'Internal stress' section in
doc/source/science_guide/sg_dynamics.rst), which defines

    D_S = 2\dot{\epsilon}_{12}

Correct these.
---
 cicecore/cicedynB/dynamics/ice_dyn_eap.F90    | 2 +-
 cicecore/cicedynB/dynamics/ice_dyn_evp_1d.F90 | 4 ++--
 cicecore/cicedynB/dynamics/ice_dyn_shared.F90 | 2 +-
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90 b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
index 3b31fa8cd..9e16d2cc2 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
@@ -1321,7 +1321,7 @@ subroutine stress_eap  (nx_block,   ny_block,       &
          tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
                    +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
 
-         ! shearing strain rate  =  e_12
+         ! shearing strain rate  =  2*e_12
          shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
                  -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
          shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_evp_1d.F90 b/cicecore/cicedynB/dynamics/ice_dyn_evp_1d.F90
index c88a7de3a..9fac97a89 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_evp_1d.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_evp_1d.F90
@@ -326,7 +326,7 @@ subroutine stress_i(NA_len, &
       ! tension strain rate  =  e_11 - e_22
       tensionne = -cym*uvel(iw) - dyt(iw)*tmp_uvel_ee                      &
                 +  cxm*vvel(iw) + dxt(iw)*tmp_vvel_se
-      ! shearing strain rate  =  e_12
+      ! shearing strain rate  =  2*e_12
       shearne = -cym*vvel(iw) - dyt(iw)*tmp_vvel_ee                        &
               -  cxm*uvel(iw) - dxt(iw)*tmp_uvel_se
       ! Delta (in the denominator of zeta, eta)
@@ -614,7 +614,7 @@ subroutine stress_l(NA_len, tarear, &
       tensionse = -cym*tmp_uvel_se - dyt(iw)*tmp_uvel_ne                  &
                 +  cxp*tmp_vvel_se - dxt(iw)*vvel(iw)
   
-      ! shearing strain rate  =  e_12
+      ! shearing strain rate  =  2*e_12
       shearne = -cym*vvel(iw) - dyt(iw)*tmp_vvel_ee                        &
               -  cxm*uvel(iw) - dxt(iw)*tmp_uvel_se
       shearnw = -cyp*tmp_vvel_ee + dyt(iw)*vvel(iw)                        &
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
index 183783350..d6bb13e93 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
@@ -1168,7 +1168,7 @@ subroutine strain_rates (nx_block,   ny_block,   &
       tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
                 +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
 
-      ! shearing strain rate  =  e_12
+      ! shearing strain rate  =  2*e_12
       shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
               -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
       shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 51975cdff..ff0d59dda 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1695,7 +1695,7 @@ subroutine deformations (nx_block,   ny_block,   &
          tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
                    +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
 
-         ! shearing strain rate  =  e_12
+         ! shearing strain rate  =  2*e_12
          shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
                  -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
          shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
@@ -2605,7 +2605,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          tensionse = -cym(i,j)*uij1  - dyt(i,j)*ui1j1 &
                    +  cxp(i,j)*vij1 - dxt(i,j)*vij
 
-         ! shearing strain rate  =  e_12
+         ! shearing strain rate  =  2*e_12
          shearne = -cym(i,j)*vij - dyt(i,j)*vi1j &
                  -  cxm(i,j)*uij - dxt(i,j)*uij1
          shearnw = -cyp(i,j)*vi1j + dyt(i,j)*vij &

From 22eb6a1b2e7b45c906e9179270bc14ec2ce03661 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 14 May 2020 14:38:39 -0400
Subject: [PATCH 142/196] ice_dyn_vp: correctly use 'subname' in 'abort_ice'
 calls

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ff0d59dda..1d2e44297 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1788,7 +1788,7 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
 
       call icepack_query_parameters(rhow_out=rhow)
       call icepack_warnings_flush(nu_diag)
-      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+      if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
          file=__FILE__, line=__LINE__)
 
       do ij =1, icellu
@@ -2302,7 +2302,7 @@ subroutine calc_bvec (nx_block,   ny_block, &
       
       call icepack_query_parameters(rhow_out=rhow)
       call icepack_warnings_flush(nu_diag)
-      if (icepack_warnings_aborted()) call abort_ice(error_message="subname", &
+      if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
          file=__FILE__, line=__LINE__)
 
       do ij =1, icellu

From e0b0818f88c7d80287642e6aa5bf0e4528713e47 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 14 May 2020 14:39:06 -0400
Subject: [PATCH 143/196] ice_dyn_vp: add 'calc_seabed_stress' subroutine

Add a subroutine to compute the diagnostic seabed stress ('taubx' and
'tauby'), and call it from the 'imp_solver' driver.

Note that in EVP, this is done in the 'stepu' subroutine in
ice_dyn_shared.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 76 +++++++++++++++++++++--
 1 file changed, 70 insertions(+), 6 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 1d2e44297..104b31485 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -207,6 +207,7 @@ subroutine imp_solver (dt)
          forcey   , & ! work array: combined atm stress and ocn tilt, y
          bxfix    , & ! part of bx that is constant during Picard 
          byfix    , & ! part of by that is constant during Picard
+         Cb       , & ! seabed stress coefficient
          fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
          fpresy   , & ! y fixed point residual vector, fy = vvel - vprev_k
          aiu      , & ! ice fraction on u-grid
@@ -467,7 +468,7 @@ subroutine imp_solver (dt)
                             umassdti, bvec,    & 
                             sol,      diagvec, &
                             fpresx,   fpresy,  &
-                            zetaD,             &
+                            zetaD,    Cb,      &
                             halo_info_mask)
       !-----------------------------------------------------------------
       ! End of nonlinear iteration
@@ -517,6 +518,22 @@ subroutine imp_solver (dt)
       enddo
       !$OMP END PARALLEL DO
       
+      !-----------------------------------------------------------------
+      ! Compute seabed stress (diagnostic)
+      !-----------------------------------------------------------------
+      if (basalstress) then
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            call calc_seabed_stress (nx_block            ,  ny_block           , &
+                                     icellu(iblk)        ,                       &
+                                     indxui      (:,iblk), indxuj      (:,iblk), &
+                                     uvel      (:,:,iblk), vvel      (:,:,iblk), &
+                                     Cb        (:,:,iblk),                       &
+                                     taubx     (:,:,iblk), tauby     (:,:,iblk))
+         enddo
+         !$OMP END PARALLEL DO
+      endif
+      
       ! Force symmetry across the tripole seam
       if (trim(grid_type) == 'tripole') then
       if (maskhalo_dyn) then
@@ -631,7 +648,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                                   umassdti, bvec,    & 
                                   sol,      diagvec, &
                                   fpresx,   fpresy,  &
-                                  zetaD,             &
+                                  zetaD,    Cb,      &
                                   halo_info_mask)
 
       use ice_arrays_column, only: Cdn_ocn
@@ -675,7 +692,8 @@ subroutine anderson_solver (icellt,   icellu,  &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
          fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
-         fpresy       ! y fixed point residual vector, fy = vvel - vprev_k
+         fpresy   , & ! y fixed point residual vector, fy = vvel - vprev_k
+         Cb           ! seabed stress coefficient
 
       real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
          bvec     , & ! RHS vector for FGMRES
@@ -700,7 +718,6 @@ subroutine anderson_solver (icellt,   icellu,  &
          ulin     , & ! uvel to linearize vrel
          vlin     , & ! vvel to linearize vrel
          vrel     , & ! coeff for tauw 
-         Cb       , & ! seabed stress coeff
          bx       , & ! b vector
          by       , & ! b vector
          Diagu    , & ! Diagonal (u component) of the matrix A
@@ -1800,13 +1817,60 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
 
          ! (magnitude of relative ocean current)*rhow*drag*aice
          vrel(i,j) = aiu(i,j)*rhow*Cw(i,j)*sqrt((uocn(i,j) - utp)**2 + &
-                                           (vocn(i,j) - vtp)**2)  ! m/s
+                                                (vocn(i,j) - vtp)**2)  ! m/s
       
          Cb(i,j)  = Tbu(i,j) / (sqrt(utp**2 + vtp**2) + u0) ! for basal stress
 
       enddo                     ! ij
 
-      end subroutine calc_vrel_Cb      
+      end subroutine calc_vrel_Cb
+
+!=======================================================================
+
+! Compute seabed stress (diagnostic)
+
+      subroutine calc_seabed_stress (nx_block  , ny_block, &
+                                     icellu    ,           &
+                                     indxui    , indxuj  , &
+                                     uvel      , vvel    , &
+                                     Cb        ,           &
+                                     taubx     , tauby)
+
+      integer (kind=int_kind), intent(in) :: &
+         nx_block, ny_block, & ! block dimensions
+         icellu                ! total count when iceumask is true
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
+         intent(in) :: &
+         indxui  , & ! compressed index in i-direction
+         indxuj      ! compressed index in j-direction
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
+         uvel    , & ! x-component of velocity (m/s)
+         vvel    , & ! y-component of velocity (m/s)
+         Cb          ! seabed stress coefficient
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(out) :: &
+         taubx   , & ! seabed stress, x-direction (N/m^2)
+         tauby       ! seabed stress, y-direction (N/m^2)
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         i, j, ij
+
+      character(len=*), parameter :: subname = '(calc_seabed_stress)'
+
+      do ij =1, icellu
+         i = indxui(ij)
+         j = indxuj(ij)
+         
+         taubx(i,j) = -uvel(i,j)*Cb(i,j)
+         tauby(i,j) = -vvel(i,j)*Cb(i,j)
+
+      enddo                     ! ij
+
+      end subroutine calc_seabed_stress
       
 !=======================================================================
 

From 13257750c0991eca422a4e5ab767b7559730c011 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 14 May 2020 14:47:50 -0400
Subject: [PATCH 144/196] dynamics: move basal stress residual velocity ('u0')
 to ice_dyn_shared

Make 'u0' a module variable in ice_dyn_shared, and use for both EVP and
VP without duplicating the hard-coded value in the code.
---
 cicecore/cicedynB/dynamics/ice_dyn_shared.F90 | 8 +++-----
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     | 5 ++---
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
index d6bb13e93..9c88fe79b 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
@@ -91,8 +91,9 @@ module ice_dyn_shared
          k1, &        ! 1st free parameter for landfast parameterization
          k2, &        ! second free parameter (N/m^3) for landfast parametrization 
          alphab, &    ! alphab=Cb factor in Lemieux et al 2015
-         threshold_hw ! max water depth for grounding 
-                      ! see keel data from Amundrud et al. 2004 (JGR)
+         threshold_hw, & ! max water depth for grounding 
+                         ! see keel data from Amundrud et al. 2004 (JGR)
+         u0 = 5e-5_dbl_kind ! residual velocity for basal stress (m/s)
 
 
 !=======================================================================
@@ -690,9 +691,6 @@ subroutine stepu (nx_block,   ny_block, &
          Cb                , & ! complete basal stress coeff
          rhow                  !
 
-      real (kind=dbl_kind) :: &
-         u0 = 5e-5_dbl_kind    ! residual velocity for basal stress (m/s)
-         
       character(len=*), parameter :: subname = '(stepu)'
 
       !-----------------------------------------------------------------
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 104b31485..2078e2dc8 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1759,6 +1759,8 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
                                uvel,       vvel,     &
                                vrel,       Cb)
 
+      use ice_dyn_shared, only: u0 ! residual velocity for basal stress (m/s)
+
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellu                ! total count when iceumask is true
@@ -1793,9 +1795,6 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
       real (kind=dbl_kind) :: &
          utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
          rhow                  !
-
-      real (kind=dbl_kind) :: &
-         u0 = 5e-5_dbl_kind    ! residual velocity for basal stress (m/s)
          
       character(len=*), parameter :: subname = '(calc_vrel_Cb)'
       

From 945d87a814a62e61e4ea7bdb24e8a8dc4f9bd316 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 15 May 2020 14:01:55 -0400
Subject: [PATCH 145/196] ice_dyn_vp: use 'deformations' from ice_dyn_shared

The 'deformations' subroutine is used in ice_dyn_evp, but not in
ice_dyn_vp.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 124 +---------------------
 1 file changed, 1 insertion(+), 123 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 2078e2dc8..561a9067a 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -172,6 +172,7 @@ subroutine imp_solver (dt)
       use ice_blocks, only: block, get_block, nx_block, ny_block
       use ice_domain, only: blocks_ice, halo_info, maskhalo_dyn
       use ice_domain_size, only: max_blocks, ncat
+      use ice_dyn_shared, only: deformations
       use ice_flux, only: rdg_conv, rdg_shear, strairxT, strairyT, &
           strairx, strairy, uocn, vocn, ss_tltx, ss_tlty, iceumask, fm, &
           strtltx, strtlty, strocnx, strocny, strintx, strinty, taubx, tauby, &
@@ -1626,129 +1627,6 @@ end subroutine stress_vp
       
 !=======================================================================
 
-! Compute deformations for mechanical redistribution
-
-      subroutine deformations (nx_block,   ny_block,   & 
-                               icellt,                 & 
-                               indxti,     indxtj,     & 
-                               uvel,       vvel,       & 
-                               dxt,        dyt,        & 
-                               cxp,        cyp,        & 
-                               cxm,        cym,        & 
-                               tarear,                 & 
-                               shear,      divu,       & 
-                               rdg_conv,   rdg_shear )
-
-      integer (kind=int_kind), intent(in) :: & 
-         nx_block, ny_block, & ! block dimensions
-         icellt                ! no. of cells where icetmask = 1
-
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
-         intent(in) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj       ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         uvel     , & ! x-component of velocity (m/s)
-         vvel     , & ! y-component of velocity (m/s)
-         dxt      , & ! width of T-cell through the middle (m)
-         dyt      , & ! height of T-cell through the middle (m)
-         cyp      , & ! 1.5*HTE - 0.5*HTE
-         cxp      , & ! 1.5*HTN - 0.5*HTN
-         cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm      , & ! 0.5*HTN - 1.5*HTN
-         tarear       ! 1/tarea
-         
-      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
-         intent(inout) :: &
-         shear    , & ! strain rate II component (1/s)
-         divu     , & ! strain rate I component, velocity divergence (1/s)
-         rdg_conv , & ! convergence term for ridging (1/s)
-         rdg_shear    ! shear term for ridging (1/s)
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         i, j, ij
-
-      real (kind=dbl_kind) :: &
-        divune, divunw, divuse, divusw            , & ! divergence
-        tensionne, tensionnw, tensionse, tensionsw, & ! tension
-        shearne, shearnw, shearse, shearsw        , & ! shearing
-        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
-        tmp
-        
-      character(len=*), parameter :: subname = '(deformations)'
-      
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
-
-      do ij = 1, icellt
-         i = indxti(ij)
-         j = indxtj(ij)
-
-      !-----------------------------------------------------------------
-      ! strain rates
-      ! NOTE these are actually strain rates * area  (m^2/s)
-      !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divune    = cyp(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   + cxp(i,j)*vvel(i  ,j  ) - dxt(i,j)*vvel(i  ,j-1)
-         divunw    = cym(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   + cxp(i,j)*vvel(i-1,j  ) - dxt(i,j)*vvel(i-1,j-1)
-         divusw    = cym(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   + cxm(i,j)*vvel(i-1,j-1) + dxt(i,j)*vvel(i-1,j  )
-         divuse    = cyp(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   + cxm(i,j)*vvel(i  ,j-1) + dxt(i,j)*vvel(i  ,j  )
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionne = -cym(i,j)*uvel(i  ,j  ) - dyt(i,j)*uvel(i-1,j  ) &
-                   +  cxm(i,j)*vvel(i  ,j  ) + dxt(i,j)*vvel(i  ,j-1)
-         tensionnw = -cyp(i,j)*uvel(i-1,j  ) + dyt(i,j)*uvel(i  ,j  ) &
-                   +  cxm(i,j)*vvel(i-1,j  ) + dxt(i,j)*vvel(i-1,j-1)
-         tensionsw = -cyp(i,j)*uvel(i-1,j-1) + dyt(i,j)*uvel(i  ,j-1) &
-                   +  cxp(i,j)*vvel(i-1,j-1) - dxt(i,j)*vvel(i-1,j  )
-         tensionse = -cym(i,j)*uvel(i  ,j-1) - dyt(i,j)*uvel(i-1,j-1) &
-                   +  cxp(i,j)*vvel(i  ,j-1) - dxt(i,j)*vvel(i  ,j  )
-
-         ! shearing strain rate  =  2*e_12
-         shearne = -cym(i,j)*vvel(i  ,j  ) - dyt(i,j)*vvel(i-1,j  ) &
-                 -  cxm(i,j)*uvel(i  ,j  ) - dxt(i,j)*uvel(i  ,j-1)
-         shearnw = -cyp(i,j)*vvel(i-1,j  ) + dyt(i,j)*vvel(i  ,j  ) &
-                 -  cxm(i,j)*uvel(i-1,j  ) - dxt(i,j)*uvel(i-1,j-1)
-         shearsw = -cyp(i,j)*vvel(i-1,j-1) + dyt(i,j)*vvel(i  ,j-1) &
-                 -  cxp(i,j)*uvel(i-1,j-1) + dxt(i,j)*uvel(i-1,j  )
-         shearse = -cym(i,j)*vvel(i  ,j-1) - dyt(i,j)*vvel(i-1,j-1) &
-                 -  cxp(i,j)*uvel(i  ,j-1) + dxt(i,j)*uvel(i  ,j  )
-         
-         ! Delta (in the denominator of zeta, eta)
-         Deltane = sqrt(divune**2 + ecci*(tensionne**2 + shearne**2))
-         Deltanw = sqrt(divunw**2 + ecci*(tensionnw**2 + shearnw**2))
-         Deltasw = sqrt(divusw**2 + ecci*(tensionsw**2 + shearsw**2))
-         Deltase = sqrt(divuse**2 + ecci*(tensionse**2 + shearse**2))
-
-      !-----------------------------------------------------------------
-      ! deformations for mechanical redistribution
-      !-----------------------------------------------------------------
-
-         divu(i,j) = p25*(divune + divunw + divuse + divusw) * tarear(i,j)
-            tmp = p25*(Deltane + Deltanw + Deltase + Deltasw)   * tarear(i,j)
-            rdg_conv(i,j)  = -min(divu(i,j),c0)
-            rdg_shear(i,j) = p5*(tmp-abs(divu(i,j))) 
-
-            ! diagnostic only
-            ! shear = sqrt(tension**2 + shearing**2)
-         shear(i,j) = p25*tarear(i,j)*sqrt( &
-                   (tensionne + tensionnw + tensionse + tensionsw)**2 &
-                +  (shearne +   shearnw +   shearse +   shearsw)**2)
-
-      enddo                     ! ij
-
-      end subroutine deformations            
-      
-!=======================================================================
-
 ! Compute vrel and basal stress coefficients
 
       subroutine calc_vrel_Cb (nx_block,   ny_block, &

From de4678235e8735f698f438f0781c6e08b717f5d8 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 15 May 2020 14:02:17 -0400
Subject: [PATCH 146/196] ice_dyn_vp: simplify 'calc_vrel_Cb' subroutine

Remove the uneeded variables 'utp', 'vtp' and directly use 'uvel' and
'vvel' instead.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 561a9067a..c2284316d 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1671,7 +1671,6 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
          i, j, ij
 
       real (kind=dbl_kind) :: &
-         utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
          rhow                  !
          
       character(len=*), parameter :: subname = '(calc_vrel_Cb)'
@@ -1689,14 +1688,11 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
          i = indxui(ij)
          j = indxuj(ij)
 
-         utp = uvel(i,j)
-         vtp = vvel(i,j)
-
          ! (magnitude of relative ocean current)*rhow*drag*aice
-         vrel(i,j) = aiu(i,j)*rhow*Cw(i,j)*sqrt((uocn(i,j) - utp)**2 + &
-                                                (vocn(i,j) - vtp)**2)  ! m/s
+         vrel(i,j) = aiu(i,j)*rhow*Cw(i,j)*sqrt((uocn(i,j) - uvel(i,j))**2 + &
+                                                (vocn(i,j) - vvel(i,j))**2)  ! m/s
       
-         Cb(i,j)  = Tbu(i,j) / (sqrt(utp**2 + vtp**2) + u0) ! for basal stress
+         Cb(i,j)  = Tbu(i,j) / (sqrt(uvel(i,j)**2 + vvel(i,j)**2) + u0) ! for basal stress
 
       enddo                     ! ij
 

From 189703aac4cb2847c095bcf3d2ab603ec714cf4f Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 15 May 2020 14:07:14 -0400
Subject: [PATCH 147/196] ice_dyn_vp: rename 'Diag[uv]' to 'diag[xy]' for
 consistency

The rest of the code uses 'x' ('y') as a suffix for the x- and y-
components of "vector field"-type arrays.

Bring 'Diag[uv]' in line with this convention, and loose the
capitalization.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index c2284316d..d2f3eb8b9 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -721,8 +721,8 @@ subroutine anderson_solver (icellt,   icellu,  &
          vrel     , & ! coeff for tauw 
          bx       , & ! b vector
          by       , & ! b vector
-         Diagu    , & ! Diagonal (u component) of the matrix A
-         Diagv    , & ! Diagonal (v component) of the matrix A
+         diagx    , & ! Diagonal (x component) of the matrix A
+         diagy    , & ! Diagonal (y component) of the matrix A
          Au       , & ! matvec, Fx = bx - Au
          Av       , & ! matvec, Fy = by - Av
          Fx       , & ! x residual vector, Fx = bx - Au
@@ -905,7 +905,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                                        Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
                                        umassdti (:,:,iblk),                     & 
                                        uarear   (:,:,iblk), Cb      (:,:,iblk), & 
-                                       Diagu    (:,:,iblk), Diagv   (:,:,iblk))
+                                       diagx    (:,:,iblk), diagy   (:,:,iblk))
                enddo
                !$OMP END PARALLEL DO
             endif
@@ -916,7 +916,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                          umassdti,                 &
                          halo_info_mask,           &
                          bx,            by,        &
-                         Diagu,         Diagv,     &
+                         diagx,         diagy,     &
                          reltol_fgmres, im_fgmres, &
                          maxits_fgmres,            &
                          solx,          soly,      &
@@ -2721,7 +2721,7 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
                                  Dstr,       vrel,     &
                                  umassdti,             &
                                  uarear,     Cb,       &
-                                 Diagu,      Diagv )
+                                 diagx,      diagy )
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -2744,8 +2744,8 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(out) :: &
-         Diagu   , & ! matvec, Fx = bx - Au (N/m^2)
-         Diagv       ! matvec, Fy = by - Av (N/m^2)
+         diagx   , & ! Diagonal (x component) of the matrix A
+         diagy       ! Diagonal (y component) of the matrix A
 
       ! local variables
 
@@ -2788,8 +2788,8 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
          strinty = uarear(i,j)* &
              (Dstr(i,j,5) + Dstr(i,j,6) + Dstr(i,j,7) + Dstr(i,j,8))
 
-         Diagu(i,j) = ccaimp - strintx
-         Diagv(i,j) = ccaimp - strinty
+         diagx(i,j) = ccaimp - strintx
+         diagy(i,j) = ccaimp - strinty
          
       enddo                     ! ij
 

From 62ac6a4a890f3316c3b0445d871a4dd86ad6b6d2 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 15 May 2020 17:33:20 -0400
Subject: [PATCH 148/196] ice_dyn_vp: introduce 'CICE_USE_LAPACK' preprocessor
 macro

The 'anderson_solver' subroutine uses calls from the LAPACK library,
but these are strictly needed only for the Anderson solver, and not for
the Picard solver.

Introduce a preprocessor macro, 'CICE_USE_LAPACK', that can be used to
"preprocess out" any code that uses LAPACK calls.

Refactor the code so that the Picard solver works without LAPACK (this
is easy since we only have one call to 'dnrm2' to remove, and an
alternative implementation already exist, but is commented). Uncoment it
and add an 'ifdef' so that this code is used if 'CICE_USE_LAPACK' is not
defined.

Also, make the Anderson solver abort if CICE was compiled without
LAPACK.

These two changes make the model compile if LAPACK is not available.

Add an option file 'set_env.lapack' to automatically configure the model
compilation upon 'cice.setup' to use LAPACK (note that Macros and env
files for the chosen machine need to be modified to add LAPACK support;
at the moment only 'cesium' is supported).
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90    | 45 ++++++++++++--------
 configuration/scripts/options/set_env.lapack |  1 +
 2 files changed, 28 insertions(+), 18 deletions(-)
 create mode 100644 configuration/scripts/options/set_env.lapack

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index d2f3eb8b9..f4590e2c8 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -763,7 +763,9 @@ subroutine anderson_solver (icellt,   icellu,  &
          fpres_norm  , & ! norm of current fixed point residual : f(x) = g(x) - x
          prog_norm   , & ! norm of difference between current and previous solution
          nlres_norm  , & ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
+#ifdef CICE_USE_LAPACK
          ddot, dnrm2 , & ! BLAS functions
+#endif
          conv            ! needed for FGMRES !phb keep ?
 
       character(len=*), parameter :: subname = '(anderson_solver)'
@@ -931,26 +933,27 @@ subroutine anderson_solver (icellt,   icellu,  &
             ! g_2(x) = x - A(x)x + b(x) = x - F(x)
          endif
 
-         ! Compute residual
+         ! Compute fixed point residual
          res = fpfunc - sol
+#ifdef CICE_USE_LAPACK
          fpres_norm = global_sum(dnrm2(size(res), res, inc)**2, distrb_info)
-         ! commented code is to compare fixed_point_res_L2norm BFB with progress_res_L2norm
-         ! (should be BFB if Picard iteration is used)
-         ! call vec_to_arrays (nx_block, ny_block, nblocks,      &
-         !                     max_blocks, icellu (:), ntot,     & 
-         !                     indxui    (:,:), indxuj(:,:),     &
-         !                     res (:),                          &
-         !                     fpresx (:,:,:), fpresy (:,:,:))
-         ! !$OMP PARALLEL DO PRIVATE(iblk)
-         ! do iblk = 1, nblocks
-         !    call calc_L2norm_squared (nx_block        , ny_block,         &
-         !                              icellu    (iblk),                   & 
-         !                              indxui  (:,iblk), indxuj  (:,iblk), &
-         !                              fpresx(:,:,iblk), fpresy(:,:,iblk), &
-         !                              L2norm    (iblk))
-         ! enddo
-         ! !$OMP END PARALLEL DO
-         ! fpres_norm = sqrt(global_sum(sum(L2norm), distrb_info))
+#else
+         call vec_to_arrays (nx_block, ny_block, nblocks,      &
+                             max_blocks, icellu (:), ntot,     &
+                             indxui    (:,:), indxuj(:,:),     &
+                             res (:),                          &
+                             fpresx (:,:,:), fpresy (:,:,:))
+         !$OMP PARALLEL DO PRIVATE(iblk)
+         do iblk = 1, nblocks
+            call calc_L2norm_squared (nx_block        , ny_block,         &
+                                      icellu    (iblk),                   &
+                                      indxui  (:,iblk), indxuj  (:,iblk), &
+                                      fpresx(:,:,iblk), fpresy(:,:,iblk), &
+                                      L2norm    (iblk))
+         enddo
+         !$OMP END PARALLEL DO
+         fpres_norm = sqrt(global_sum(sum(L2norm), distrb_info))
+#endif
          if (my_task == master_task .and. monitor_nonlin) then
             write(nu_diag, '(a,i4,a,d26.16)') "monitor_nonlin: iter_nonlin= ", it_nl, &
                                               " fixed_point_res_L2norm= ", fpres_norm
@@ -971,6 +974,7 @@ subroutine anderson_solver (icellt,   icellu,  &
             ! Simple fixed point (Picard) iteration in this case
             sol = fpfunc
          else
+#ifdef CICE_USE_LAPACK
             ! Begin Anderson acceleration
             if (it_nl > start_andacc) then
                ! Update residual difference vector
@@ -1034,6 +1038,11 @@ subroutine anderson_solver (icellt,   icellu,  &
                   sol = sol - (1-damping_andacc)*res
                endif
             endif
+#else
+            ! Anderson solver is not usable without LAPACK; abort
+            call abort_ice(error_message=subname // " CICE was not compiled with LAPACK, and Anderson solver was chosen (algo_nonlin = 2)" , &
+               file=__FILE__, line=__LINE__)
+#endif
          endif
          
          !-----------------------------------------------------------------------
diff --git a/configuration/scripts/options/set_env.lapack b/configuration/scripts/options/set_env.lapack
new file mode 100644
index 000000000..3571cef75
--- /dev/null
+++ b/configuration/scripts/options/set_env.lapack
@@ -0,0 +1 @@
+setenv ICE_CPPDEFS -DCICE_USE_LAPACK

From 6057adab54753cb80d258e9166f46251ffc2be96 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 25 May 2020 11:47:54 -0400
Subject: [PATCH 149/196] ice_dyn_vp: convert 'algo_nonlin' to string

Make the reference namelist easier to use by changing the type of the
'algo_nonlin' variable to a string.

This way, we can use 'picard' and 'anderson' instead of '1' and '2'.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90         | 6 +++---
 cicecore/cicedynB/general/ice_init.F90            | 6 +++---
 configuration/scripts/ice_in                      | 2 +-
 configuration/scripts/options/set_nml.dynanderson | 2 +-
 configuration/scripts/options/set_nml.dynpicard   | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index f4590e2c8..c19292a6a 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -48,7 +48,6 @@ module ice_dyn_vp
          maxits_pgmres  , & ! max nb of iteration for pgmres
          monitor_fgmres , & ! print fgmres residual norm
          monitor_pgmres , & ! print pgmres residual norm
-         algo_nonlin    , & ! nonlinear algorithm: 1: Picard iteration, 2: Anderson acceleration (andacc)
          fpfunc_andacc  , & ! fixed point function for Anderson acceleration: 1: g(x) = FMGRES(A(x),b(x)), 2: g(x) = x - A(x)x + b(x)
          im_andacc      , & ! size of Anderson minimization matrix (number of saved previous residuals)
          start_andacc       ! acceleration delay factor (acceleration starts at this iteration)
@@ -65,6 +64,7 @@ module ice_dyn_vp
          reltol_andacc      ! relative tolerance for Anderson acceleration
 
       character (len=char_len), public :: &
+         algo_nonlin    , & ! nonlinear algorithm: 'picard' (Picard iteration), 'anderson' (Anderson acceleration)
          ortho_type         ! type of orthogonalization for FGMRES ('cgs' or 'mgs')
 
       ! mmodule variables
@@ -774,7 +774,7 @@ subroutine anderson_solver (icellt,   icellu,  &
       res_num = 0
       
       ! If Picard iteration chosen, set number of saved residuals to zero
-      if (algo_nonlin == 1) then
+      if (algo_nonlin == 'picard') then
          im_andacc = 0
       endif
       
@@ -1040,7 +1040,7 @@ subroutine anderson_solver (icellt,   icellu,  &
             endif
 #else
             ! Anderson solver is not usable without LAPACK; abort
-            call abort_ice(error_message=subname // " CICE was not compiled with LAPACK, and Anderson solver was chosen (algo_nonlin = 2)" , &
+            call abort_ice(error_message=subname // " CICE was not compiled with LAPACK, and Anderson solver was chosen (algo_nonlin = 'anderson')" , &
                file=__FILE__, line=__LINE__)
 #endif
          endif
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index 9ba45bff9..6a51dd9f1 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -347,7 +347,7 @@ subroutine input_data
       reltol_nonlin = 1e-8_dbl_kind ! nonlinear stopping criterion: reltol_nonlin*res(k=0)
       reltol_fgmres = 1e-2_dbl_kind ! fgmres stopping criterion: reltol_fgmres*res(k)
       reltol_pgmres = 1e-6_dbl_kind ! pgmres stopping criterion: reltol_pgmres*res(k)
-      algo_nonlin = 1        ! nonlinear algorithm: 1: Picard iteration, 2: Anderson acceleration (andacc)
+      algo_nonlin = 'picard'        ! nonlinear algorithm: 'picard' (Picard iteration), 'anderson' (Anderson acceleration)
       fpfunc_andacc = 1      ! fixed point function for Anderson acceleration: 1: g(x) = FMGRES(A(x),b(x)), 2: g(x) = x - A(x)x + b(x)
       im_andacc = 5          ! size of Anderson minimization matrix (number of saved previous residuals)
       reltol_andacc = 1e-6_dbl_kind  ! relative tolerance for Anderson acceleration
@@ -1590,9 +1590,9 @@ subroutine input_data
             write(nu_diag,1008) ' reltol_nonlin             = ', reltol_nonlin
             write(nu_diag,1008) ' reltol_fgmres             = ', reltol_fgmres
             write(nu_diag,1008) ' reltol_pgmres             = ', reltol_pgmres
-            write(nu_diag,1020) ' algo_nonlin               = ', algo_nonlin
+            write(nu_diag,1030) ' algo_nonlin               = ', algo_nonlin
             write(nu_diag,1010) ' use_mean_vrel             = ', use_mean_vrel
-            if (algo_nonlin == 2) then
+            if (algo_nonlin == 'anderson') then
                write(nu_diag,1020) ' fpfunc_andacc          = ', fpfunc_andacc
                write(nu_diag,1020) ' im_andacc              = ', im_andacc
                write(nu_diag,1008) ' reltol_andacc          = ', reltol_andacc
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index e0ee87054..7bda8a34a 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -152,7 +152,7 @@
     reltol_nonlin   = 1e-8
     reltol_fgmres   = 1e-2
     reltol_pgmres   = 1e-6
-    algo_nonlin     = 1
+    algo_nonlin     = 'picard'
     use_mean_vrel   = .false.
     fpfunc_andacc   = 1
     im_andacc       = 5
diff --git a/configuration/scripts/options/set_nml.dynanderson b/configuration/scripts/options/set_nml.dynanderson
index 91a7ab367..7be744eb9 100644
--- a/configuration/scripts/options/set_nml.dynanderson
+++ b/configuration/scripts/options/set_nml.dynanderson
@@ -1,3 +1,3 @@
 kdyn            = 3
-algo_nonlin     = 2
+algo_nonlin     = 'anderson'
 maxits_nonlin   = 5000
diff --git a/configuration/scripts/options/set_nml.dynpicard b/configuration/scripts/options/set_nml.dynpicard
index a4453e251..1f78fccfb 100644
--- a/configuration/scripts/options/set_nml.dynpicard
+++ b/configuration/scripts/options/set_nml.dynpicard
@@ -1,3 +1,3 @@
 kdyn            = 3
-algo_nonlin     = 1
+algo_nonlin     = 'picard'
 maxits_nonlin   = 5000

From 9b5613a6ceb8f5edc100fb90da5c049e58ce914c Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 25 May 2020 15:53:14 -0400
Subject: [PATCH 150/196] ice_dyn_vp: convert 'precond' to string

Make the namelist easier to use by converting the 'precond' namelist
variable to a string.

This way we can use 'ident', 'diag' or 'pgmres' instead of '1', '2' or
'3'.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 24 +++++++++++------------
 cicecore/cicedynB/general/ice_init.F90    |  4 ++--
 configuration/scripts/ice_in              |  2 +-
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index c19292a6a..d8ba0efe0 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -41,7 +41,6 @@ module ice_dyn_vp
 
       integer (kind=int_kind), public :: &
          maxits_nonlin  , & ! max nb of iteration for nonlinear solver
-         precond        , & ! preconditioner for fgmres: 1: identity, 2: diagonal 3: pgmres + diag
          im_fgmres      , & ! size of fgmres Krylov subspace
          im_pgmres      , & ! size of pgmres Krylov subspace
          maxits_fgmres  , & ! max nb of iteration for fgmres
@@ -64,6 +63,7 @@ module ice_dyn_vp
          reltol_andacc      ! relative tolerance for Anderson acceleration
 
       character (len=char_len), public :: &
+         precond        , & ! preconditioner for fgmres: 'ident' (identity), 'diag' (diagonal), 'pgmres' (Jacobi-preconditioned GMRES)
          algo_nonlin    , & ! nonlinear algorithm: 'picard' (Picard iteration), 'anderson' (Anderson acceleration)
          ortho_type         ! type of orthogonalization for FGMRES ('cgs' or 'mgs')
 
@@ -890,7 +890,7 @@ subroutine anderson_solver (icellt,   icellu,  &
             ! g_1(x) = FGMRES(A(x), b(x))
             
             ! Prepare precond matrix
-            if (precond .gt. 1) then
+            if (precond == 'diag' .or. precond == 'pgmres') then
                !$OMP PARALLEL DO PRIVATE(iblk)
                do iblk = 1, nblocks
                   call formDiag_step1 (nx_block           , ny_block,       & ! D term due to rheology
@@ -3159,7 +3159,7 @@ subroutine fgmres (zetaD,               &
       real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: &
          hessenberg        ! system matrix of the Hessenberg (least squares) system
 
-      integer (kind=int_kind) :: &
+      character (len=char_len) :: &
          precond_type ! type of preconditioner
 
       real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
@@ -3543,13 +3543,11 @@ subroutine pgmres (zetaD,                &
          rhs_hess            ! right hand side vector of the Hessenberg (least squares) system
 
       real (kind=dbl_kind), dimension(maxinner+1, maxinner) :: &
-         hessenberg        ! system matrix of the Hessenberg (least squares) system
-
-      integer (kind=int_kind) :: &
-         precond_type ! type of preconditioner
+         hessenberg          ! system matrix of the Hessenberg (least squares) system
 
       character(len=char_len) :: &
-         ortho_type ! type of orthogonalization
+         precond_type    , & ! type of preconditioner
+         ortho_type          ! type of orthogonalization
 
       real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
 
@@ -3569,7 +3567,7 @@ subroutine pgmres (zetaD,                &
       
       conv = c1
       
-      precond_type = 2 ! Jacobi preconditioner
+      precond_type = 'diag' ! Jacobi preconditioner
       ortho_type = 'cgs' ! classical gram-schmidt
       
       ! Cells with no ice should be zero-initialized
@@ -3882,7 +3880,7 @@ subroutine precondition(zetaD,                &
          diagx    , & ! diagonal of the system matrix (x components)
          diagy        ! diagonal of the system matrix (y components)
 
-      integer (kind=int_kind), intent(in) :: &
+      character (len=char_len), intent(in) :: &
          precond_type ! type of preconditioner
 
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
@@ -3913,10 +3911,10 @@ subroutine precondition(zetaD,                &
 
       character(len=*), parameter :: subname = '(precondition)'
 
-      if     (precond_type == 1) then ! identity (no preconditioner)
+      if     (precond_type == 'ident') then ! identity (no preconditioner)
          wx = vx
          wy = vy
-      elseif (precond_type == 2) then ! Jacobi preconditioner (diagonal)
+      elseif (precond_type == 'diag') then ! Jacobi preconditioner (diagonal)
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
             do ij =1, icellu(iblk)
@@ -3928,7 +3926,7 @@ subroutine precondition(zetaD,                &
             enddo ! ij
          enddo
          !$OMP END PARALLEL DO 
-      elseif (precond_type == 3) then ! PGMRES (Jacobi-preconditioned GMRES)
+      elseif (precond_type == 'pgmres') then ! PGMRES (Jacobi-preconditioned GMRES)
          ! Initialize preconditioned vector to 0 !phb try with wx = vx or vx/diagx
          wx = c0
          wy = c0
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index 6a51dd9f1..c612e6998 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -335,7 +335,7 @@ subroutine input_data
       Ktens = 0.0_dbl_kind   ! T=Ktens*P (tensile strength: see Konig and Holland, 2010)
       e_ratio = 2.0_dbl_kind ! VP ellipse aspect ratio
       maxits_nonlin = 1000   ! max nb of iteration for nonlinear solver
-      precond = 3            ! preconditioner for fgmres: 1: identity, 2: diagonal 3: pgmres + diag
+      precond = 'pgmres'     ! preconditioner for fgmres: 'ident' (identity), 'diag' (diagonal), 'pgmres' (Jacobi-preconditioned GMRES)
       im_fgmres = 50         ! size of fgmres Krylov subspace
       im_pgmres = 5          ! size of pgmres Krylov subspace
       maxits_fgmres = 50     ! max nb of iteration for fgmres
@@ -1578,7 +1578,7 @@ subroutine input_data
 
          if (kdyn == 3) then
             write(nu_diag,1020) ' maxits_nonlin             = ', maxits_nonlin
-            write(nu_diag,1020) ' precond                   = ', precond
+            write(nu_diag,1030) ' precond                   = ', precond
             write(nu_diag,1020) ' im_fgmres                 = ', im_fgmres
             write(nu_diag,1020) ' im_pgmres                 = ', im_pgmres
             write(nu_diag,1020) ' maxits_fgmres             = ', maxits_fgmres
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index 7bda8a34a..39df86b2d 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -140,7 +140,7 @@
     ktransport      = 1
     ssh_stress      = 'geostrophic'
     maxits_nonlin   = 1000
-    precond         = 3
+    precond         = 'pgmres'
     im_fgmres       = 50
     im_pgmres       = 5
     maxits_fgmres   = 1

From 811ba5a34ed3b3c400c55854536ac1b0dcd3814a Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 25 May 2020 16:24:16 -0400
Subject: [PATCH 151/196] ice_dyn_vp: convert 'monitor_{f,p}gmres' to logical

Convert the namelist variables 'monitor_{f,p}gmres', which are used as
logical values, to actual logical values.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     |  4 ++--
 cicecore/cicedynB/general/ice_init.F90        | 10 +++++-----
 configuration/scripts/ice_in                  |  4 ++--
 configuration/scripts/options/set_nml.diagimp |  4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index d8ba0efe0..e175da028 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -45,14 +45,14 @@ module ice_dyn_vp
          im_pgmres      , & ! size of pgmres Krylov subspace
          maxits_fgmres  , & ! max nb of iteration for fgmres
          maxits_pgmres  , & ! max nb of iteration for pgmres
-         monitor_fgmres , & ! print fgmres residual norm
-         monitor_pgmres , & ! print pgmres residual norm
          fpfunc_andacc  , & ! fixed point function for Anderson acceleration: 1: g(x) = FMGRES(A(x),b(x)), 2: g(x) = x - A(x)x + b(x)
          im_andacc      , & ! size of Anderson minimization matrix (number of saved previous residuals)
          start_andacc       ! acceleration delay factor (acceleration starts at this iteration)
 
       logical (kind=log_kind), public :: &
          monitor_nonlin , & ! print nonlinear residual norm
+         monitor_fgmres , & ! print fgmres residual norm
+         monitor_pgmres , & ! print pgmres residual norm
          use_mean_vrel      ! use mean of previous 2 iterates to compute vrel
 
       real (kind=dbl_kind), public :: &
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index c612e6998..286d8aae7 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -340,9 +340,9 @@ subroutine input_data
       im_pgmres = 5          ! size of pgmres Krylov subspace
       maxits_fgmres = 50     ! max nb of iteration for fgmres
       maxits_pgmres = 5      ! max nb of iteration for pgmres
-      monitor_nonlin = .false. ! print nonlinear solver info
-      monitor_fgmres = 1     ! print fgmres info (0: nothing printed, 1: 1st ite only, 2: all iterations)
-      monitor_pgmres = 1     ! print pgmres info (0: nothing printed, 1: all iterations)
+      monitor_nonlin = .false. ! print nonlinear residual norm
+      monitor_fgmres = .false. ! print fgmres residual norm
+      monitor_pgmres = .false. ! print pgmres residual norm
       ortho_type = 'mgs'     ! orthogonalization procedure 'cgs' or 'mgs'
       reltol_nonlin = 1e-8_dbl_kind ! nonlinear stopping criterion: reltol_nonlin*res(k=0)
       reltol_fgmres = 1e-2_dbl_kind ! fgmres stopping criterion: reltol_fgmres*res(k)
@@ -1584,8 +1584,8 @@ subroutine input_data
             write(nu_diag,1020) ' maxits_fgmres             = ', maxits_fgmres
             write(nu_diag,1020) ' maxits_pgmres             = ', maxits_pgmres
             write(nu_diag,1010) ' monitor_nonlin            = ', monitor_nonlin
-            write(nu_diag,1020) ' monitor_fgmres            = ', monitor_fgmres
-            write(nu_diag,1020) ' monitor_pgmres            = ', monitor_pgmres
+            write(nu_diag,1010) ' monitor_fgmres            = ', monitor_fgmres
+            write(nu_diag,1010) ' monitor_pgmres            = ', monitor_pgmres
             write(nu_diag,1030) ' ortho_type                = ', ortho_type
             write(nu_diag,1008) ' reltol_nonlin             = ', reltol_nonlin
             write(nu_diag,1008) ' reltol_fgmres             = ', reltol_fgmres
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index 39df86b2d..954fca9e0 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -146,8 +146,8 @@
     maxits_fgmres   = 1
     maxits_pgmres   = 1
     monitor_nonlin  = .false.
-    monitor_fgmres  = 1
-    monitor_pgmres  = 1
+    monitor_fgmres  = .false.
+    monitor_pgmres  = .false.
     ortho_type      = 'mgs'
     reltol_nonlin   = 1e-8
     reltol_fgmres   = 1e-2
diff --git a/configuration/scripts/options/set_nml.diagimp b/configuration/scripts/options/set_nml.diagimp
index ea875b0cc..940754157 100644
--- a/configuration/scripts/options/set_nml.diagimp
+++ b/configuration/scripts/options/set_nml.diagimp
@@ -1,3 +1,3 @@
 monitor_nonlin  = .true.
-monitor_fgmres  = 2
-monitor_pgmres  = 1
+monitor_fgmres  = .true.
+monitor_pgmres  = .true.

From bf6a0157b341a9120df2205475d19671d09beae0 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 25 May 2020 16:56:11 -0400
Subject: [PATCH 152/196] ice_dyn_vp: remove unimplemented 'fpfunc_andacc' from
 the namelist

In the future, we might add a second fixed point function, g_2(x),
instead of the regular Picard fixed point function currently
implemented,

  g_1(x) = FGMRES(A(x), x0, b(x)) - x

Remove the namelist flag 'fpfunc_andacc' from the reference namelist,
and make the model abort if this flag is used, since only g_1(x) is
currently implemented.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 2 ++
 configuration/scripts/ice_in              | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index e175da028..d92497d24 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -931,6 +931,8 @@ subroutine anderson_solver (icellt,   icellu,  &
                                 fpfunc(:))
          elseif (fpfunc_andacc == 2) then
             ! g_2(x) = x - A(x)x + b(x) = x - F(x)
+            call abort_ice(error_message=subname // " Fixed point function g_2(x) not yet implemented (fpfunc_andacc = 2)" , &
+               file=__FILE__, line=__LINE__)
          endif
 
          ! Compute fixed point residual
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index 954fca9e0..ddecea6f4 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -154,7 +154,6 @@
     reltol_pgmres   = 1e-6
     algo_nonlin     = 'picard'
     use_mean_vrel   = .false.
-    fpfunc_andacc   = 1
     im_andacc       = 5
     reltol_andacc   = 1e-6
     damping_andacc  = 0

From d99912a299436915b2b167e4f86d218b3e14eab1 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 25 May 2020 17:32:41 -0400
Subject: [PATCH 153/196] ice_dyn_vp: add input validation for string namelist
 variables

Add input validation in ice_init for 'algo_nonlin', 'precond' and
'ortho_type'.

Currently, the code that resets 'im_andacc' to zero if the Picard solver
is chosen (algo_nonlin = 1) is inside the 'anderson_solver' subroutine,
which is not clean because 'im_andacc' is used as a dimension for several
local variables. These variables would thus have whatever size
'im_andacc' is set to in the namelist, even if the Picard solver is
chosen.

Fix that by moving the code that sets 'im_andacc' to zero if the Picard
solver is chosen (algo_nonlin = 1) to ice_init.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 10 +++-----
 cicecore/cicedynB/general/ice_init.F90    | 30 +++++++++++++++++++++++
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index d92497d24..878c27c00 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -67,7 +67,7 @@ module ice_dyn_vp
          algo_nonlin    , & ! nonlinear algorithm: 'picard' (Picard iteration), 'anderson' (Anderson acceleration)
          ortho_type         ! type of orthogonalization for FGMRES ('cgs' or 'mgs')
 
-      ! mmodule variables
+      ! module variables
 
       integer (kind=int_kind), allocatable :: &
          icellt(:)    , & ! no. of cells where icetmask = 1
@@ -773,11 +773,6 @@ subroutine anderson_solver (icellt,   icellu,  &
       ! Initialization
       res_num = 0
       
-      ! If Picard iteration chosen, set number of saved residuals to zero
-      if (algo_nonlin == 'picard') then
-         im_andacc = 0
-      endif
-      
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
          uprev_k(:,:,iblk) = uvel(:,:,iblk)
@@ -3945,7 +3940,8 @@ subroutine precondition(zetaD,                &
                       wx,         wy,       &
                       nbiter,     conv)
       else
-         
+         call abort_ice(error_message='wrong preconditioner in ' // subname, &
+         file=__FILE__, line=__LINE__)
       endif
       end subroutine precondition
 
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index 286d8aae7..c9f78ba40 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -1084,6 +1084,36 @@ subroutine input_data
          endif
       endif
 
+      ! Implicit solver input validation
+      if (.not. (trim(algo_nonlin) == 'picard' .or. trim(algo_nonlin) == 'anderson')) then
+         if (my_task == master_task) then
+            write(nu_diag,*) subname//' ERROR: unknown algo_nonlin: '//algo_nonlin
+            write(nu_diag,*) subname//' ERROR:   allowed values: ''picard'', ''anderson'''
+         endif
+         abort_list = trim(abort_list)//":60"
+      endif
+      
+      if (trim(algo_nonlin) == 'picard') then
+         ! Picard solver is implemented in the Anderson solver; reset number of saved residuals to zero
+         im_andacc = 0
+      endif
+      
+      if (.not. (trim(precond) == 'ident' .or. trim(precond) == 'diag' .or. trim(precond) == 'pgmres')) then
+         if (my_task == master_task) then
+            write(nu_diag,*) subname//' ERROR: unknown precond: '//precond
+            write(nu_diag,*) subname//' ERROR:   allowed values: ''ident'', ''diag'', ''pgmres'''
+         endif
+         abort_list = trim(abort_list)//":61"
+      endif
+      
+      if (.not. (trim(ortho_type) == 'cgs' .or. trim(ortho_type) == 'mgs')) then
+         if (my_task == master_task) then
+            write(nu_diag,*) subname//' ERROR: unknown ortho_type: '//ortho_type
+            write(nu_diag,*) subname//' ERROR:   allowed values: ''cgs'', ''mgs'''
+         endif
+         abort_list = trim(abort_list)//":62"
+      endif
+
       ice_IOUnitsMinUnit = numin
       ice_IOUnitsMaxUnit = numax
 

From 63bdac12031c0ed24c75a3fff818700d0954e312 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 29 May 2020 13:57:29 -0400
Subject: [PATCH 154/196] ice_dyn_vp: abort if Anderson solver is used in
 parallel

Since the Anderson solver is not yet parallelized, abort the model if
users attempt to run it on more than one proc.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 878c27c00..a53f65960 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -15,7 +15,7 @@ module ice_dyn_vp
       use ice_kinds_mod
       use ice_blocks, only: nx_block, ny_block
       use ice_boundary, only: ice_halo
-      use ice_communicate, only: my_task, master_task
+      use ice_communicate, only: my_task, master_task, get_num_procs
       use ice_constants, only: field_loc_center, field_loc_NEcorner, &
           field_type_scalar, field_type_vector
       use ice_constants, only: c0, p027, p055, p111, p166, &
@@ -973,6 +973,13 @@ subroutine anderson_solver (icellt,   icellu,  &
          else
 #ifdef CICE_USE_LAPACK
             ! Begin Anderson acceleration
+            if (get_num_procs() > 1) then
+               ! Anderson solver is not yet parallelized; abort
+               if (my_task == master_task) then
+                  call abort_ice(error_message=subname // " Anderson solver (algo_nonlin = 'anderson') is not yet parallelized, and nprocs > 1 " , &
+                     file=__FILE__, line=__LINE__)
+               endif
+            endif
             if (it_nl > start_andacc) then
                ! Update residual difference vector
                res_diff = res - res_old

From 238162540fa661df927be677ba0652aa5638a0bd Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 1 Jun 2020 13:41:27 -0400
Subject: [PATCH 155/196] ice_dyn_vp: reimplement monitor_{f,p}gmres

The ability to monitor the residual norm of the FGMRES solver
and the PGMRES preconditioner was lost when we transitioned to the
new solver implementation.

Re-add this capability.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index a53f65960..264b65da0 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -3236,6 +3236,11 @@ subroutine fgmres (zetaD,               &
          !$OMP END PARALLEL DO 
          norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
          
+         if (my_task == master_task .and. monitor_fgmres) then
+            write(nu_diag, '(a,i4,a,d26.16)') "monitor_fgmres: iter_fgmres= ", nbiter, &
+                                              " fgmres_L2norm= ", norm_residual
+         endif
+         
          ! Current guess is a good enough solution
          ! if (norm_residual < tolerance) then
          !    return
@@ -3371,6 +3376,12 @@ subroutine fgmres (zetaD,               &
             ! Check for convergence
             norm_residual = abs(rhs_hess(nextit))
             conv = norm_residual / r0
+            
+            if (my_task == master_task .and. monitor_fgmres) then
+               write(nu_diag, '(a,i4,a,d26.16)') "monitor_fgmres: iter_fgmres= ", nbiter, &
+                                                 " fgmres_L2norm= ", norm_residual
+            endif
+            
              if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
                exit
             endif
@@ -3624,6 +3635,11 @@ subroutine pgmres (zetaD,                &
          !$OMP END PARALLEL DO 
          norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
          
+         if (my_task == master_task .and. monitor_pgmres) then
+            write(nu_diag, '(a,i4,a,d26.16)') "monitor_pgmres: iter_pgmres= ", nbiter, &
+                                              " pgmres_L2norm= ", norm_residual
+         endif
+         
          ! Current guess is a good enough solution
          ! if (norm_residual < tolerance) then
          !    return
@@ -3753,6 +3769,12 @@ subroutine pgmres (zetaD,                &
             
             ! Check for convergence
             norm_residual = abs(rhs_hess(nextit))
+            
+            if (my_task == master_task .and. monitor_pgmres) then
+               write(nu_diag, '(a,i4,a,d26.16)') "monitor_pgmres: iter_pgmres= ", nbiter, &
+                                                 " pgmres_L2norm= ", norm_residual
+            endif
+            
             conv = norm_residual / r0
              if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
                exit

From c23bf69e653140f75e634a7daf5e2ff8180d8b5e Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 1 Jun 2020 15:49:05 -0400
Subject: [PATCH 156/196] ice_dyn_vp: use 'ice_HaloUpdate_vel' everywhere

By performing the halo update after creating the halo_info_mask in
imp_solver, we can use the ice_HaloUpdate_vel subroutine, increasing
code reuse.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 45 ++---------------------
 1 file changed, 4 insertions(+), 41 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 264b65da0..2208e63ca 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -392,10 +392,6 @@ subroutine imp_solver (dt)
                                       strength(i,j,  iblk) )
          enddo  ! ij
 
-         ! load velocity into array for boundary updates
-         fld2(:,:,1,iblk) = uvel(:,:,iblk)
-         fld2(:,:,2,iblk) = vvel(:,:,iblk)
-
       enddo  ! iblk
       !$TCXOMP END PARALLEL DO
 
@@ -406,19 +402,8 @@ subroutine imp_solver (dt)
       call ice_timer_start(timer_bound)
       call ice_HaloUpdate (strength,           halo_info, &
                            field_loc_center,   field_type_scalar)
-      ! velocities may have changed in dyn_prep2
-      call ice_HaloUpdate (fld2,               halo_info, & 
-                           field_loc_NEcorner, field_type_vector)
       call ice_timer_stop(timer_bound)
 
-      ! unload
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         uvel(:,:,iblk) = fld2(:,:,1,iblk)
-         vvel(:,:,iblk) = fld2(:,:,2,iblk)
-      enddo
-      !$OMP END PARALLEL DO
-
       if (maskhalo_dyn) then
          call ice_timer_start(timer_bound)
          halomask = 0
@@ -429,6 +414,9 @@ subroutine imp_solver (dt)
          call ice_HaloMask(halo_info_mask, halo_info, halomask)
       endif
 
+      ! velocities may have changed in dyn_prep2
+      call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+
       !-----------------------------------------------------------------
       ! basal stress coefficients (landfast ice)
       !-----------------------------------------------------------------
@@ -1059,32 +1047,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                              uvel (:,:,:), vvel (:,:,:))
          
          ! phb NOT SURE IF THIS HALO UPDATE IS ACTUALLY NEEDED
-         ! Should use ice_Haloupdate_vel
-         ! Load velocity into array for boundary updates
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            fld2(:,:,1,iblk) = uvel(:,:,iblk)
-            fld2(:,:,2,iblk) = vvel(:,:,iblk)
-         enddo
-         !$OMP END PARALLEL DO
-
-         call ice_timer_start(timer_bound)
-         if (maskhalo_dyn) then
-            call ice_HaloUpdate (fld2,               halo_info_mask, &
-                                 field_loc_NEcorner, field_type_vector)
-         else
-            call ice_HaloUpdate (fld2,               halo_info, &
-                                 field_loc_NEcorner, field_type_vector)
-         endif
-         call ice_timer_stop(timer_bound)
-
-         ! Unload
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            uvel(:,:,iblk) = fld2(:,:,1,iblk)
-            vvel(:,:,iblk) = fld2(:,:,2,iblk)
-         enddo
-         !$OMP END PARALLEL DO
+         call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
          
          ! Compute fixed point residual norm
          !$OMP PARALLEL DO PRIVATE(iblk)

From 64d7bcc0a73f7c36983e39de207a086b92e21b2e Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 1 Jun 2020 16:35:54 -0400
Subject: [PATCH 157/196] ice_dyn_vp: move ice_HaloUpdate_vel subroutine to
 ice_dyn_shared

Move the subroutine to ice_dyn_shared so it can be used in ice_dyn_evp
and ice_dyn_eap.
---
 cicecore/cicedynB/dynamics/ice_dyn_shared.F90 | 61 ++++++++++++++++++-
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     | 60 +-----------------
 2 files changed, 60 insertions(+), 61 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
index 9c88fe79b..8670812e6 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
@@ -24,7 +24,7 @@ module ice_dyn_shared
       private
       public :: init_evp, set_evp_parameters, stepu, principal_stress, &
                 dyn_prep1, dyn_prep2, dyn_finish, basal_stress_coeff,  &
-                alloc_dyn_shared, deformations, strain_rates
+                alloc_dyn_shared, deformations, strain_rates, ice_HaloUpdate_vel
 
       ! namelist parameters
 
@@ -78,7 +78,7 @@ module ice_dyn_shared
       real (kind=dbl_kind), dimension (:,:,:), allocatable, public :: &
          uvel_init, & ! x-component of velocity (m/s), beginning of timestep
          vvel_init    ! y-component of velocity (m/s), beginning of timestep
-         
+
       ! ice isotropic tensile strength parameter
       real (kind=dbl_kind), public :: &
          Ktens         ! T=Ktens*P (tensile strength: see Konig and Holland, 2010)   
@@ -95,6 +95,8 @@ module ice_dyn_shared
                          ! see keel data from Amundrud et al. 2004 (JGR)
          u0 = 5e-5_dbl_kind ! residual velocity for basal stress (m/s)
 
+      real (kind=dbl_kind), allocatable :: &
+         fld2(:,:,:,:)    ! work array for boundary updates
 
 !=======================================================================
 
@@ -111,6 +113,7 @@ subroutine alloc_dyn_shared
       allocate( &
          uvel_init (nx_block,ny_block,max_blocks), & ! x-component of velocity (m/s), beginning of timestep
          vvel_init (nx_block,ny_block,max_blocks), & ! y-component of velocity (m/s), beginning of timestep
+         fld2      (nx_block,ny_block,2,max_blocks), & ! work array for boundary updates
          stat=ierr)
       if (ierr/=0) call abort_ice('(alloc_dyn_shared): Out of memory')
 
@@ -1184,6 +1187,60 @@ subroutine strain_rates (nx_block,   ny_block,   &
 
       end subroutine strain_rates
 
+!=======================================================================
+
+! Perform a halo update for the velocity field
+! author: Philippe Blain, ECCC
+
+      subroutine ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+
+      use ice_boundary, only: ice_HaloUpdate, ice_halo
+      use ice_constants, only: field_loc_NEcorner, field_type_vector
+      use ice_domain, only: halo_info, maskhalo_dyn, nblocks
+      use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
+
+      type (ice_halo), intent(in) :: &
+         halo_info_mask !  ghost cell update info for masked halo
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
+         uvel    , & ! u components of velocity vector
+         vvel        ! v components of velocity vector
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         iblk        ! block index
+
+      character(len=*), parameter :: subname = '(ice_HaloUpdate_vel)'
+
+      ! load velocity into array for boundary updates
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         fld2(:,:,1,iblk) = uvel(:,:,iblk)
+         fld2(:,:,2,iblk) = vvel(:,:,iblk)
+      enddo
+      !$OMP END PARALLEL DO
+
+      call ice_timer_start(timer_bound)
+      if (maskhalo_dyn) then
+         call ice_HaloUpdate (fld2,               halo_info_mask, &
+                              field_loc_NEcorner, field_type_vector)
+      else
+         call ice_HaloUpdate (fld2,               halo_info, &
+                              field_loc_NEcorner, field_type_vector)
+      endif
+      call ice_timer_stop(timer_bound)
+
+      ! Unload
+      !$OMP PARALLEL DO PRIVATE(iblk)
+      do iblk = 1, nblocks
+         uvel(:,:,iblk) = fld2(:,:,1,iblk)
+         vvel(:,:,iblk) = fld2(:,:,2,iblk)
+      enddo
+      !$OMP END PARALLEL DO
+
+      end subroutine ice_HaloUpdate_vel
+
 !=======================================================================
 
       end module ice_dyn_shared
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 2208e63ca..ecb718e93 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -24,7 +24,7 @@ module ice_dyn_vp
       use ice_domain_size, only: max_blocks
       use ice_dyn_shared, only: dyn_prep1, dyn_prep2, dyn_finish, &
           ecci, cosw, sinw, fcor_blk, uvel_init,  &
-          vvel_init, basal_stress_coeff, basalstress, Ktens
+          vvel_init, basal_stress_coeff, basalstress, Ktens, ice_HaloUpdate_vel
       use ice_fileunits, only: nu_diag
       use ice_flux, only: fm
       use ice_global_reductions, only: global_sum, global_sums
@@ -78,9 +78,6 @@ module ice_dyn_vp
          indxtj(:,:)  , & ! compressed index in j-direction
          indxui(:,:)  , & ! compressed index in i-direction
          indxuj(:,:)      ! compressed index in j-direction
-         
-      real (kind=dbl_kind), allocatable :: & 
-         fld2(:,:,:,:)    ! work array for boundary updates
 
 !=======================================================================
 
@@ -125,7 +122,6 @@ subroutine init_vp (dt)
                indxtj(nx_block*ny_block, max_blocks), &
                indxui(nx_block*ny_block, max_blocks), &
                indxuj(nx_block*ny_block, max_blocks))
-      allocate(fld2(nx_block,ny_block,2,max_blocks))
       
       ! Redefine tinyarea using min_strain_rate
       
@@ -4094,60 +4090,6 @@ logical function almost_zero(A) result(retval)
       
       end function almost_zero
 
-!=======================================================================
-
-! Perform a halo update for the velocity field
-! author: Philippe Blain, ECCC
-
-      subroutine ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
-
-      use ice_boundary, only: ice_HaloUpdate
-      use ice_constants, only: field_loc_NEcorner, field_type_vector
-      use ice_domain, only: halo_info, maskhalo_dyn
-      use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
-
-      type (ice_halo), intent(in) :: &
-         halo_info_mask !  ghost cell update info for masked halo
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
-         uvel    , & ! u components of velocity vector
-         vvel        ! v components of velocity vector
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         iblk        ! block index
-
-      character(len=*), parameter :: subname = '(ice_HaloUpdate_vel)'
-
-      ! load velocity into array for boundary updates
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         fld2(:,:,1,iblk) = uvel(:,:,iblk)
-         fld2(:,:,2,iblk) = vvel(:,:,iblk)
-      enddo
-      !$OMP END PARALLEL DO
-
-      call ice_timer_start(timer_bound)
-      if (maskhalo_dyn) then
-         call ice_HaloUpdate (fld2,               halo_info_mask, &
-                              field_loc_NEcorner, field_type_vector)
-      else
-         call ice_HaloUpdate (fld2,               halo_info, &
-                              field_loc_NEcorner, field_type_vector)
-      endif
-      call ice_timer_stop(timer_bound)
-
-      ! Unload
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         uvel(:,:,iblk) = fld2(:,:,1,iblk)
-         vvel(:,:,iblk) = fld2(:,:,2,iblk)
-      enddo
-      !$OMP END PARALLEL DO
-
-      end subroutine ice_HaloUpdate_vel
-
 !=======================================================================
 
       end module ice_dyn_vp

From e85bca8ade2abefa9de732effbe77cc940457ec8 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 1 Jun 2020 16:47:49 -0400
Subject: [PATCH 158/196] ice_dyn_evp: use ice_HaloUpdate_vel

Now that the subroutine is in ice_dyn_shared, use it to increase
core reuse.
---
 cicecore/cicedynB/dynamics/ice_dyn_evp.F90 | 47 +++-------------------
 1 file changed, 5 insertions(+), 42 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_evp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_evp.F90
index 17c61d083..af06e5d70 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_evp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_evp.F90
@@ -94,7 +94,7 @@ subroutine evp (dt)
           ice_timer_start, ice_timer_stop, timer_evp_1d, timer_evp_2d
       use ice_dyn_evp_1d, only: ice_dyn_evp_1d_copyin, ice_dyn_evp_1d_kernel, &
           ice_dyn_evp_1d_copyout
-      use ice_dyn_shared, only: kevp_kernel
+      use ice_dyn_shared, only: kevp_kernel, ice_HaloUpdate_vel
 
       real (kind=dbl_kind), intent(in) :: &
          dt      ! time step
@@ -127,8 +127,6 @@ subroutine evp (dt)
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
 
-      real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
-
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp       ! stress combinations for momentum equation
 
@@ -154,8 +152,6 @@ subroutine evp (dt)
       ! Initialize
       !-----------------------------------------------------------------
 
-      allocate(fld2(nx_block,ny_block,2,max_blocks))
-
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
 
@@ -297,10 +293,6 @@ subroutine evp (dt)
                                       strength = strength(i,j,  iblk) )
          enddo  ! ij
 
-         ! load velocity into array for boundary updates
-         fld2(:,:,1,iblk) = uvel(:,:,iblk)
-         fld2(:,:,2,iblk) = vvel(:,:,iblk)
-
       enddo  ! iblk
       !$TCXOMP END PARALLEL DO
 
@@ -311,19 +303,8 @@ subroutine evp (dt)
       call ice_timer_start(timer_bound)
       call ice_HaloUpdate (strength,           halo_info, &
                            field_loc_center,   field_type_scalar)
-      ! velocities may have changed in dyn_prep2
-      call ice_HaloUpdate (fld2,               halo_info, &
-                           field_loc_NEcorner, field_type_vector)
       call ice_timer_stop(timer_bound)
 
-      ! unload
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         uvel(:,:,iblk) = fld2(:,:,1,iblk)
-         vvel(:,:,iblk) = fld2(:,:,2,iblk)
-      enddo
-      !$OMP END PARALLEL DO
-
       if (maskhalo_dyn) then
          call ice_timer_start(timer_bound)
          halomask = 0
@@ -334,6 +315,9 @@ subroutine evp (dt)
          call ice_HaloMask(halo_info_mask, halo_info, halomask)
       endif
 
+      ! velocities may have changed in dyn_prep2
+      call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+
       !-----------------------------------------------------------------
       ! basal stress coefficients (landfast ice)
       !-----------------------------------------------------------------
@@ -442,36 +426,15 @@ subroutine evp (dt)
                         uvel_init(:,:,iblk), vvel_init(:,:,iblk),&
                         uvel     (:,:,iblk), vvel    (:,:,iblk), &
                         Tbu      (:,:,iblk))
-
-            ! load velocity into array for boundary updates
-            fld2(:,:,1,iblk) = uvel(:,:,iblk)
-            fld2(:,:,2,iblk) = vvel(:,:,iblk)
          enddo
          !$TCXOMP END PARALLEL DO
 
-         call ice_timer_start(timer_bound)
-         if (maskhalo_dyn) then
-            call ice_HaloUpdate (fld2,               halo_info_mask, &
-                                 field_loc_NEcorner, field_type_vector)
-         else
-            call ice_HaloUpdate (fld2,               halo_info, &
-                                 field_loc_NEcorner, field_type_vector)
-         endif
-         call ice_timer_stop(timer_bound)
-
-         ! unload
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            uvel(:,:,iblk) = fld2(:,:,1,iblk)
-            vvel(:,:,iblk) = fld2(:,:,2,iblk)
-         enddo
-         !$OMP END PARALLEL DO
+         call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
          
       enddo                     ! subcycling
       endif  ! kevp_kernel
       call ice_timer_stop(timer_evp_2d)
 
-      deallocate(fld2)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
       ! Force symmetry across the tripole seam

From becf7e5f9cb7a6b3e8c36569dba73a6d1eae8623 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 1 Jun 2020 16:48:31 -0400
Subject: [PATCH 159/196] ice_dyn_eap: use ice_HaloUpdate_vel

Now that the subroutine is in ice_dyn_shared,
use it to increase code reuse.
---
 cicecore/cicedynB/dynamics/ice_dyn_eap.F90 | 48 +++-------------------
 1 file changed, 5 insertions(+), 43 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90 b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
index 9e16d2cc2..ad99f8482 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
@@ -122,7 +122,7 @@ subroutine eap (dt)
       use ice_dyn_shared, only: fcor_blk, ndte, dtei, &
           denom1, uvel_init, vvel_init, arlx1i, &
           dyn_prep1, dyn_prep2, stepu, dyn_finish, &
-          basal_stress_coeff, basalstress
+          basal_stress_coeff, basalstress, ice_HaloUpdate_vel
       use ice_flux, only: rdg_conv, strairxT, strairyT, &
           strairx, strairy, uocn, vocn, ss_tltx, ss_tlty, iceumask, fm, &
           strtltx, strtlty, strocnx, strocny, strintx, strinty, taubx, tauby, &
@@ -172,8 +172,6 @@ subroutine eap (dt)
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
 
-      real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
-
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp       ! stress combinations for momentum equation
 
@@ -197,8 +195,6 @@ subroutine eap (dt)
       ! Initialize
       !-----------------------------------------------------------------
 
-      allocate(fld2(nx_block,ny_block,2,max_blocks))
-
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
 
@@ -354,11 +350,6 @@ subroutine eap (dt)
                                       vicen    = vicen   (i,j,:,iblk), & 
                                       strength = strength(i,j,  iblk) )
          enddo  ! ij
-
-         ! load velocity into array for boundary updates
-         fld2(:,:,1,iblk) = uvel(:,:,iblk)
-         fld2(:,:,2,iblk) = vvel(:,:,iblk)
-
       enddo  ! iblk
       !$TCXOMP END PARALLEL DO
 
@@ -369,19 +360,8 @@ subroutine eap (dt)
       call ice_timer_start(timer_bound)
       call ice_HaloUpdate (strength,           halo_info, &
                            field_loc_center,   field_type_scalar)
-      ! velocities may have changed in dyn_prep2
-      call ice_HaloUpdate (fld2,               halo_info, &
-                           field_loc_NEcorner, field_type_vector)
       call ice_timer_stop(timer_bound)
 
-      ! unload
-      !$OMP PARALLEL DO PRIVATE(iblk)
-      do iblk = 1, nblocks
-         uvel(:,:,iblk) = fld2(:,:,1,iblk)
-         vvel(:,:,iblk) = fld2(:,:,2,iblk)
-      enddo
-      !$OMP END PARALLEL DO
-
       if (maskhalo_dyn) then
          call ice_timer_start(timer_bound)
          halomask = 0
@@ -392,6 +372,9 @@ subroutine eap (dt)
          call ice_HaloMask(halo_info_mask, halo_info, halomask)
       endif
 
+      ! velocities may have changed in dyn_prep2
+      call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+
       !-----------------------------------------------------------------
       ! basal stress coefficients (landfast ice)
       !-----------------------------------------------------------------
@@ -472,10 +455,6 @@ subroutine eap (dt)
                         uvel     (:,:,iblk), vvel    (:,:,iblk), &
                         Tbu      (:,:,iblk))
 
-            ! load velocity into array for boundary updates
-            fld2(:,:,1,iblk) = uvel(:,:,iblk)
-            fld2(:,:,2,iblk) = vvel(:,:,iblk)
-
       !-----------------------------------------------------------------
       ! evolution of structure tensor A
       !-----------------------------------------------------------------
@@ -501,27 +480,10 @@ subroutine eap (dt)
          enddo
          !$TCXOMP END PARALLEL DO
 
-         call ice_timer_start(timer_bound)
-         if (maskhalo_dyn) then
-            call ice_HaloUpdate (fld2,               halo_info_mask, &
-                                 field_loc_NEcorner, field_type_vector)
-         else
-            call ice_HaloUpdate (fld2,               halo_info, &
-                                 field_loc_NEcorner, field_type_vector)
-         endif
-         call ice_timer_stop(timer_bound)
-
-         ! unload
-         !$OMP PARALLEL DO PRIVATE(iblk)
-         do iblk = 1, nblocks
-            uvel(:,:,iblk) = fld2(:,:,1,iblk)
-            vvel(:,:,iblk) = fld2(:,:,2,iblk)
-         enddo
-         !$OMP END PARALLEL DO
+         call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
 
       enddo                     ! subcycling
 
-      deallocate(fld2)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
       !-----------------------------------------------------------------

From dc4174cae05e96a12be8ea7f37a87b2ee41d06bb Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 2 Jun 2020 15:48:00 -0400
Subject: [PATCH 160/196] options: set 'use_mean_vrel' in 'dynpicard'

Preliminary testing suggests convergence is improved when
using the mean of the two previous estimates to compute 'vrel'
when the Picard solver is used.

Set this as the default with option 'dynpicard'.
---
 configuration/scripts/options/set_nml.dynpicard | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configuration/scripts/options/set_nml.dynpicard b/configuration/scripts/options/set_nml.dynpicard
index 1f78fccfb..8e21b0e0d 100644
--- a/configuration/scripts/options/set_nml.dynpicard
+++ b/configuration/scripts/options/set_nml.dynpicard
@@ -1,3 +1,4 @@
 kdyn            = 3
 algo_nonlin     = 'picard'
 maxits_nonlin   = 5000
+use_mean_vrel   = .true.

From c6e377b95178a4d6ef798cb523d442d96e471cd4 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 2 Jun 2020 15:53:38 -0400
Subject: [PATCH 161/196] conda: add 'libapack' to environment spec and Macros

Add the necessary conda package to build CICE with LAPACK,
so that the Anderson solver can be tested in the conda environment.
---
 configuration/scripts/machines/Macros.conda_linux | 2 +-
 configuration/scripts/machines/Macros.conda_macos | 2 +-
 configuration/scripts/machines/environment.yml    | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/configuration/scripts/machines/Macros.conda_linux b/configuration/scripts/machines/Macros.conda_linux
index 32c5ae012..c821a4592 100644
--- a/configuration/scripts/machines/Macros.conda_linux
+++ b/configuration/scripts/machines/Macros.conda_linux
@@ -40,7 +40,7 @@ LD:= $(FC)
 MODDIR  += -I$(CONDA_PREFIX)/include
 
 # Libraries to be passed to the linker
-SLIBS   := -L$(CONDA_PREFIX)/lib -lnetcdf -lnetcdff
+SLIBS   := -L$(CONDA_PREFIX)/lib -lnetcdf -lnetcdff -llapack
 
 # Necessary flag to compile with OpenMP support
 ifeq ($(ICE_THREADED), true)
diff --git a/configuration/scripts/machines/Macros.conda_macos b/configuration/scripts/machines/Macros.conda_macos
index 0d866d9a2..4acc4d3ba 100644
--- a/configuration/scripts/machines/Macros.conda_macos
+++ b/configuration/scripts/machines/Macros.conda_macos
@@ -48,7 +48,7 @@ else
 endif
 
 # Libraries to be passed to the linker
-SLIBS   := -L$(CONDA_PREFIX)/lib -lnetcdf -lnetcdff
+SLIBS   := -L$(CONDA_PREFIX)/lib -lnetcdf -lnetcdff -llapack
 
 # Necessary flag to compile with OpenMP support
 ifeq ($(ICE_THREADED), true)
diff --git a/configuration/scripts/machines/environment.yml b/configuration/scripts/machines/environment.yml
index aab90d23c..57bdacfec 100644
--- a/configuration/scripts/machines/environment.yml
+++ b/configuration/scripts/machines/environment.yml
@@ -8,6 +8,7 @@ dependencies:
   - netcdf-fortran
   - openmpi
   - make
+  - liblapack
 # Python dependencies for plotting scripts
   - numpy
   - matplotlib-base

From 2b5b45ab3d6bbfab66d2f628cc0e304d552d19b7 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 12 Jun 2020 09:18:14 -0400
Subject: [PATCH 162/196] ice_dyn_vp: initialize 'L2norm' and 'norm_squared' to
 zero

If we don't initialize thess arrays, using any MPI decomposition for which
nblocks /= max_blocks on some process will cause uninitialized values
to be summed when computing the local sum for each block, like in

    nlres_norm = sqrt(global_sum(sum(L2norm), distrb_info))

Initialize them to zero.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ecb718e93..5809ca0db 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -756,6 +756,7 @@ subroutine anderson_solver (icellt,   icellu,  &
       
       ! Initialization
       res_num = 0
+      L2norm  = c0
       
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
@@ -3142,7 +3143,7 @@ subroutine fgmres (zetaD,               &
       nbiter = 0
       
       conv = c1
-      
+      norm_squared = c0
       precond_type = precond
       
       ! Cells with no ice should be zero-initialized
@@ -3540,7 +3541,7 @@ subroutine pgmres (zetaD,                &
       nbiter = 0
       
       conv = c1
-      
+      norm_squared = c0
       precond_type = 'diag' ! Jacobi preconditioner
       ortho_type = 'cgs' ! classical gram-schmidt
       

From a8e420b8efae4b2b73655e490ebaa1f394d75ff1 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 25 Jun 2020 12:21:01 -0400
Subject: [PATCH 163/196] ice_dyn_vp: initialize 'stPr' and 'zetaD' to zero

Since 'stPr' and 'zetaD' are initialized with a loop over 'icellt', but
are used on loops over 'icellu', it is possible that these latter loops
use uninitialized values since 'iceumask' and 'icetmask' can differ at
some grid points since they are not defined using the same criteria (see
ice_dyn_shared::dyn_prep1 and ice_dyn_shared::dyn_prep2).

To avoid using unitialized values, initialize the 'stPr' and 'zetaD'
arrays to zero, so that any index corresponding to a cell with no ice
is set to zero.

This mimics what is done for EVP, where the 'str' array is initialized
to zero in ice_dyn_evp::stress.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 5809ca0db..cadc331f4 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1136,8 +1136,14 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
 
       character(len=*), parameter :: subname = '(calc_zeta_Pr)'
 
+      ! Initialize
+
       capping = .false.
       
+      ! Initialize stPr and zetaD to zero (for cells where icetmask is false)
+      stPr  = c0
+      zetaD = c0
+
 !DIR$ CONCURRENT !Cray
 !cdir nodep      !NEC
 !ocl novrec      !Fujitsu

From 0986749a290ad08bfa32b702a0b217d14624f88b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 7 Jul 2020 18:46:38 -0400
Subject: [PATCH 164/196] doc: document implicit solver

---
 doc/source/cice_index.rst                  |   2 +-
 doc/source/developer_guide/dg_dynamics.rst |  10 +-
 doc/source/master_list.bib                 |  27 +++
 doc/source/science_guide/sg_dynamics.rst   | 226 +++++++++++++++------
 doc/source/user_guide/ug_case_settings.rst |  19 ++
 5 files changed, 214 insertions(+), 70 deletions(-)

diff --git a/doc/source/cice_index.rst b/doc/source/cice_index.rst
index 229fa92d5..ebcbee77d 100644
--- a/doc/source/cice_index.rst
+++ b/doc/source/cice_index.rst
@@ -336,7 +336,7 @@ either Celsius or Kelvin units).
    "kalg", ":math:`\bullet` absorption coefficient for algae", ""
    "kappav", "visible extinction coefficient in ice, wavelength\ :math:`<`\ 700nm", "1.4 m\ :math:`^{-1}`"
    "kcatbound", ":math:`\bullet` category boundary formula", ""
-   "kdyn", ":math:`\bullet` type of dynamics (1 = EVP, 0 = off)", "1"
+   "kdyn", ":math:`\bullet` type of dynamics (1 = EVP, 2 = EAP, 3 = VP, 0,-1 = off)", "1"
    "kg_to_g", "kg to g conversion factor", "1000."
    "kice", "thermal conductivity of fresh ice (:cite:`Bitz99`)", "2.03 W/m/deg"
    "kitd", ":math:`\bullet` type of itd conversions (0 = delta function, 1 = linear remap)", "1"
diff --git a/doc/source/developer_guide/dg_dynamics.rst b/doc/source/developer_guide/dg_dynamics.rst
index 0a48513dc..03f40011d 100644
--- a/doc/source/developer_guide/dg_dynamics.rst
+++ b/doc/source/developer_guide/dg_dynamics.rst
@@ -30,13 +30,13 @@ Dynamical Solvers
 --------------------
 
 The dynamics solvers are found in **cicecore/cicedynB/dynamics/**.  A couple of different solvers are
-available including EVP, revised EVP, and EAP.  The dynamics solver is specified in namelist with the
-``kdyn`` variable.  ``kdyn=1`` is evp, ``kdyn=2`` is eap, and revised evp requires the ``revised_evp``
-namelist flag be set to true.
+available including EVP, revised EVP, EAP and VP.  The dynamics solver is specified in namelist with the
+``kdyn`` variable.  ``kdyn=1`` is evp, ``kdyn=2`` is eap, ``kdyn=3`` is VP and revised EVP requires 
+the ``revised_evp`` namelist flag be set to true.
 
-Multiple evp solvers are supported thru the namelist flag ``kevp_kernel``.  The standard implementation
+Multiple EVP solvers are supported thru the namelist flag ``kevp_kernel``.  The standard implementation
 and current default is ``kevp_kernel=0``.  In this case, the stress is solved on the regular decomposition
-via subcycling and calls to subroutine stress and subroutine stepu with MPI global sums required in each
+via subcycling and calls to subroutine ``stress`` and subroutine ``stepu`` with MPI global sums required in each
 subcycling call.  With ``kevp_kernel=2``, the data required to compute the stress is gathered to the root
 MPI process and the stress calculation is performed on the root task without any MPI global sums.  OpenMP
 parallelism is supported in ``kevp_kernel=2``.  The solutions with ``kevp_kernel`` set to 0 or 2 will 
diff --git a/doc/source/master_list.bib b/doc/source/master_list.bib
index caa93ec06..0b928d012 100644
--- a/doc/source/master_list.bib
+++ b/doc/source/master_list.bib
@@ -59,6 +59,8 @@ @string{GMD
 @string{CRST = {Cold Reg. Sci. Technol.}}
 @string{IJHPCA={Int. J High Perform. Comput. Appl}}
 @string{PTRSA={Philos. Trans. Royal Soc. A}}
+@string{SIAMJCP={SIAM J. Sci. Comput.}}
+
 
 
 %  **********************************************
@@ -977,6 +979,31 @@ @Article{Tsujino18
   pages = {79-139},
   url = {http://dx.doi.org/10.1016/j.ocemod.2018.07.002}
 }
+@Article{Lemieux08,
+  author = "J.-F. Lemieux and B. Tremblay and S. Thomas and J. Sedláček and L. A. Mysak",
+  title = "{Using the preconditioned Generalized Minimum RESidual (GMRES) method to solve the sea-ice momentum equation}",
+  journal = JGRO,
+  volume = {113},
+  number = {C10},
+  pages = {},
+  keywords = {Sea ice, GMRES, Krylov subspace},
+  doi = {10.1029/2007JC004680},
+  url = {https://agupubs.onlinelibrary.wiley.com/doi/abs/10.1029/2007JC004680},
+  eprint = {https://agupubs.onlinelibrary.wiley.com/doi/pdf/10.1029/2007JC004680},
+  year = {2008}
+}
+@Article{Saad93,
+  author = "Y. Saad",
+  title = "{A Flexible Inner-Outer Preconditioned GMRES Algorithm}",
+  journal = SIAMJCP,
+  volume = {14},
+  number = {2},
+  year = {1993},
+  pages = {461-469},
+  doi = {10.1137/0914028},
+  URL = {https://doi.org/10.1137/0914028}
+}
+
 %  **********************************************
 %  For new entries, see example entry in BIB_TEMPLATE.txt
 %  **********************************************
diff --git a/doc/source/science_guide/sg_dynamics.rst b/doc/source/science_guide/sg_dynamics.rst
index 4c9b6d502..c88fd728a 100644
--- a/doc/source/science_guide/sg_dynamics.rst
+++ b/doc/source/science_guide/sg_dynamics.rst
@@ -5,15 +5,19 @@
 Dynamics
 ========
 
-There are now different rheologies available in the CICE code. The
+There are different approaches in the CICE code for representing sea ice
+rheology and for solving the sea ice momentum equation. The
 elastic-viscous-plastic (EVP) model represents a modification of the
 standard viscous-plastic (VP) model for sea ice dynamics
 :cite:`Hibler79`. The elastic-anisotropic-plastic (EAP) model,
 on the other hand, explicitly accounts for the observed sub-continuum
 anisotropy of the sea ice cover :cite:`Wilchinsky06,Weiss09`. If
-`kdyn` = 1 in the namelist then the EVP rheology is used (module
-**ice\_dyn\_evp.F90**), while `kdyn` = 2 is associated with the EAP
-rheology (**ice\_dyn\_eap.F90**). At times scales associated with the
+``kdyn`` = 1 in the namelist then the EVP model is used (module
+**ice\_dyn\_evp.F90**), while ``kdyn`` = 2 is associated with the EAP
+model (**ice\_dyn\_eap.F90**), and ``kdyn`` = 3 is associated with the
+VP model (**ice\_dyn\_vp.F90**).
+
+At times scales associated with the
 wind forcing, the EVP model reduces to the VP model while the EAP model
 reduces to the anisotropic rheology described in detail in
 :cite:`Wilchinsky06,Tsamados13`. At shorter time scales the
@@ -29,14 +33,21 @@ dynamics in :cite:`Tsamados13`. Simulation results and
 performance of the EVP and EAP models have been compared with the VP
 model and with each other in realistic simulations of the Arctic
 respectively in :cite:`Hunke99` and
-:cite:`Tsamados13`. Here we summarize the equations and
-direct the reader to the above references for details. The numerical
+:cite:`Tsamados13`.
+
+The EVP numerical
 implementation in this code release is that of :cite:`Hunke02`
 and :cite:`Hunke03`, with revisions to the numerical solver as
 in :cite:`Bouillon13`. The implementation of the EAP sea ice
 dynamics into CICE is described in detail in
 :cite:`Tsamados13`.
 
+The VP solver implementation mostly follows :cite:`Lemieux08`, with
+FGMRES :cite:`Saad93` as the linear solver and GMRES as the preconditioner.
+
+Here we summarize the equations and
+direct the reader to the above references for details.
+
 .. _momentum:
 
 ********
@@ -67,20 +78,36 @@ concentration regions. A careful explanation of the issue and its
 continuum solution is provided in :cite:`Hunke03` and
 :cite:`Connolley04`.
 
-The momentum equation is discretized in time as follows, for the classic
-EVP approach. First, for clarity, the two components of Equation :eq:`vpmom` are
+For clarity, the two components of Equation :eq:`vpmom` are
 
 .. math::
    \begin{aligned}
-   m{\partial u\over\partial t} &=& {\partial\sigma_{1j}\over\partial x_j} + \tau_{ax} + 
+   m{\partial u\over\partial t} &= {\partial\sigma_{1j}\over\partial x_j} + \tau_{ax} + 
      a_i c_w \rho_w
      \left|{\bf U}_w - {\bf u}\right| \left[\left(U_w-u\right)\cos\theta - \left(V_w-v\right)\sin\theta\right]
      -C_bu +mfv - mg{\partial H_\circ\over\partial x}, \\
-   m{\partial v\over\partial t} &=& {\partial\sigma_{2j}\over\partial x_j} + \tau_{ay} + 
+   m{\partial v\over\partial t} &= {\partial\sigma_{2j}\over\partial x_j} + \tau_{ay} + 
      a_i c_w \rho_w
      \left|{\bf U}_w - {\bf u}\right| \left[\left(U_w-u\right)\sin\theta + \left(V_w-v\right)\cos\theta\right]
      -C_bv-mfu - mg{\partial H_\circ\over\partial y}. \end{aligned}
+   :label: momsys
+
+
+A bilinear discretization is used for the stress terms
+:math:`\partial\sigma_{ij}/\partial x_j`,
+which enables the discrete equations to be derived from the
+continuous equations written in curvilinear coordinates. In this
+manner, metric terms associated with the curvature of the grid are
+incorporated into the discretization explicitly. Details pertaining to
+the spatial discretization are found in :cite:`Hunke02`.
+
+.. _evp-momentum:
+
+Elastic-Viscous-Plastic
+~~~~~~~~~~~~~~~~~~~~~~~
 
+The momentum equation is discretized in time as follows, for the classic
+EVP approach.
 In the code,
 :math:`{\tt vrel}=a_i c_w \rho_w\left|{\bf U}_w - {\bf u}^k\right|` and 
 :math:`C_b=T_b \left( \sqrt{(u^k)^2+(v^k)^2}+u_0 \right)^{-1}`, 
@@ -91,20 +118,20 @@ variables used in the code.
 .. math::
    \underbrace{\left({m\over\Delta t_e}+{\tt vrel} \cos\theta\ + C_b \right)}_{\tt cca} u^{k+1} 
    - \underbrace{\left(mf+{\tt vrel}\sin\theta\right)}_{\tt ccb}v^{k+1}
-    =  \underbrace{{\partial\sigma_{1j}^{k+1}\over\partial x_j}}_{\tt strintx} 
-    + \underbrace{\tau_{ax} - mg{\partial H_\circ\over\partial x} }_{\tt forcex}
-     + {\tt vrel}\underbrace{\left(U_w\cos\theta-V_w\sin\theta\right)}_{\tt waterx}  + {m\over\Delta t_e}u^k,
+    =  &\underbrace{{\partial\sigma_{1j}^{k+1}\over\partial x_j}}_{\tt strintx}
+    + \underbrace{\tau_{ax} - mg{\partial H_\circ\over\partial x} }_{\tt forcex} \\
+     &+ {\tt vrel}\underbrace{\left(U_w\cos\theta-V_w\sin\theta\right)}_{\tt waterx}  + {m\over\Delta t_e}u^k,
    :label: umom
 
 .. math::
     \underbrace{\left(mf+{\tt vrel}\sin\theta\right)}_{\tt ccb} u^{k+1} 
    + \underbrace{\left({m\over\Delta t_e}+{\tt vrel} \cos\theta + C_b \right)}_{\tt cca}v^{k+1}
-    =  \underbrace{{\partial\sigma_{2j}^{k+1}\over\partial x_j}}_{\tt strinty} 
-    + \underbrace{\tau_{ay} - mg{\partial H_\circ\over\partial y} }_{\tt forcey}
-     + {\tt vrel}\underbrace{\left(U_w\sin\theta+V_w\cos\theta\right)}_{\tt watery}  + {m\over\Delta t_e}v^k,
+    =  &\underbrace{{\partial\sigma_{2j}^{k+1}\over\partial x_j}}_{\tt strinty}
+    + \underbrace{\tau_{ay} - mg{\partial H_\circ\over\partial y} }_{\tt forcey} \\
+     &+ {\tt vrel}\underbrace{\left(U_w\sin\theta+V_w\cos\theta\right)}_{\tt watery}  + {m\over\Delta t_e}v^k,
    :label: vmom
 
-and vrel\ :math:`\cdot`\ waterx(y) = taux(y).
+and :math:`{\tt vrel}\ \cdot\ {\tt waterx(y)}= {\tt taux(y)}`.
 
 We solve this system of equations analytically for :math:`u^{k+1}` and
 :math:`v^{k+1}`. Define
@@ -121,8 +148,8 @@ where :math:`{\bf F} = \nabla\cdot\sigma^{k+1}`. Then
 
 .. math::
    \begin{aligned}
-   \left({m\over\Delta t_e} +{\tt vrel}\cos\theta\ + C_b \right)u^{k+1} - \left(mf + {\tt vrel}\sin\theta\right) v^{k+1} &=& \hat{u}  \\
-   \left(mf + {\tt vrel}\sin\theta\right) u^{k+1} + \left({m\over\Delta t_e} +{\tt vrel}\cos\theta + C_b \right)v^{k+1} &=& \hat{v}.\end{aligned}
+   \left({m\over\Delta t_e} +{\tt vrel}\cos\theta\ + C_b \right)u^{k+1} - \left(mf + {\tt vrel}\sin\theta\right) v^{k+1} &= \hat{u}  \\
+   \left(mf + {\tt vrel}\sin\theta\right) u^{k+1} + \left({m\over\Delta t_e} +{\tt vrel}\cos\theta + C_b \right)v^{k+1} &= \hat{v}.\end{aligned}
 
 Solving simultaneously for :math:`u^{k+1}` and :math:`v^{k+1}`,
 
@@ -140,10 +167,62 @@ where
 .. math::
    b = mf + {\tt vrel}\sin\theta.
    :label: cevpb
+   
+.. _vp-momentum:
+
+Viscous-Plastic
+~~~~~~~~~~~~~~~
+
+In the VP approach, equation :eq:`momsys` is discretized implicitly using a Backward Euler approach,
+and stresses are not computed explicitly:
 
-When the subcycling is finished for each (thermodynamic) time step, the
-ice–ocean stress must be constructed from `taux(y)` and the terms
-containing `vrel` on the left hand side of the equations.
+.. math::
+  \begin{align}
+  m\frac{(u^{n}-u^{n-1})}{\Delta t} &= \frac{\partial \sigma_{1j}^n}{\partial x_j}
+  - \tau_{w,x}^n + \tau_{b,x}^n +  mfv^n
+   + r_{x}^n,
+  \\
+  m\frac{(v^{n}-v^{n-1})}{\Delta t} &= \frac{\partial \sigma^{n} _{2j}}{\partial x_j}
+  - \tau_{w,y}^n + \tau_{b,y}^n   -mfu^{n}
+   + r_{y}^n
+  \end{align}
+  :label: u_sit
+
+where :math:`r = (r_x,r_y)` contains all terms that do not depend on the velocities :math:`u^n, v^n` (namely the sea surface tilt and the wind stress).
+As the water drag, seabed stress and rheology term depend on the velocity field, the only unknowns in equation :eq:`u_sit` are :math:`u^n` and :math:`v^n`.
+
+Once discretized in space, equation :eq:`u_sit` leads to a system of :math:`N` nonlinear equations with :math:`N` unknowns that can be concisely written as
+
+.. math::
+  \mathbf{A}(\mathbf{u})\mathbf{u} = \mathbf{b}(\mathbf{u}),
+  :label: nonlin_sys
+
+where :math:`\mathbf{A}` is an :math:`N\times N` matrix and :math:`\mathbf{u}` and :math:`\mathbf{b}` are vectors of size :math:`N`.
+Note that we have dropped the time level index :math:`n`.
+The vector :math:`\mathbf{u}` is formed by stacking first the :math:`u` components, followed by the :math:`v` components of the discretized ice velocity.
+The vector :math:`\mathbf{b}` is a function of the velocity vector :math:`\mathbf{u}` because of the water and seabed stress terms as well as parts of the rheology term that depend non-linearly on :math:`\mathbf{u}`.
+
+The nonlinear system :eq:`nonlin_sys` is solved using a Picard iteration method.
+Starting from a previous iterate :math:`\mathbf{u}_{k-1}`, the nonlinear system is linearized by substituting :math:`\mathbf{u}_{k-1}` in the expression of the matrix :math:`\mathbf{A}` and the vector :math:`\mathbf{b}`:
+
+.. math::
+  \mathbf{A}(\mathbf{u}_{k-1})\mathbf{u}_{k} =  \mathbf{b}(\mathbf{u}_{k-1})
+  :label: picard
+
+The resulting linear system is solved using the Flexible Generalized Minimum RESidual (FGMRES, :cite:`Saad93`) method and this process is repeated iteratively.
+
+The maximum number of Picard iterations can be set using the namelist flag ``maxits_nonlin``.
+The relative tolerance for the Picard solver can be set using the namelist flag ``reltol_nonlin``.
+The Picard iterative process stops when :math:`\left\lVert \mathbf{u}_{k} \right\rVert_2 < {\tt reltol\_nonlin} \cdot \left\lVert\mathbf{u}_{0}\right\rVert_2` or when ``maxits_nonlin`` is reached.
+
+Parameters for the FGMRES linear solver and the preconditioner can be controlled using additional namelist flags (see :ref:`dynamics_nml`).
+
+Ice-Ocean stress
+~~~~~~~~~~~~~~~~
+
+At the end of each (thermodynamic) time step, the
+ice–ocean stress must be constructed from :math:`{\tt taux(y)}` and the terms
+containing :math:`{\tt vrel}` on the left hand side of the equations.
 
 The Hibler-Bryan form for the ice-ocean stress :cite:`Hibler87`
 is included in **ice\_dyn\_shared.F90** but is currently commented out,
@@ -185,7 +264,7 @@ where the :math:`a_i` and :math:`v_i` are the total ice concentrations and ice v
 ridge(s) reaches the seafloor for a water depth :math:`h_{wu}=\min[h_w(i,j),h_w(i+1,j),h_w(i,j+1),h_w(i+1,j+1)]`. Given the formulation of :math:`C_b` in equation :eq:`Cb`, the seabed stress components are non-zero only 
 when :math:`h_u > h_{cu}`. 
 
-The maximum seabed stress depends on the weigth of the ridge 
+The maximum seabed stress depends on the weight of the ridge 
 above hydrostatic balance and the value of :math:`k_2`. It is, however, the parameter :math:`k_1` that has the most notable impact on the simulated extent of landfast ice. 
 The value of :math:`k_1` can be changed at runtime using the namelist variable ``k1``. The grounding scheme can be turned on or off using the namelist logical basalstress. 
 
@@ -207,47 +286,44 @@ For convenience we formulate the stress tensor :math:`\bf \sigma` in
 terms of :math:`\sigma_1=\sigma_{11}+\sigma_{22}`,
 :math:`\sigma_2=\sigma_{11}-\sigma_{22}`, and introduce the
 divergence, :math:`D_D`, and the horizontal tension and shearing
-strain rates, :math:`D_T` and :math:`D_S` respectively.
-
-CICE now outputs the internal ice pressure which is an important field to support navigation in ice-infested water.
-The internal ice pressure :math:`(sigP)` is the average of the normal stresses multiplied by :math:`-1` and 
-is therefore simply equal to :math:`-\sigma_1/2`.
-
-*Elastic-Viscous-Plastic*
-
-In the EVP model the internal stress tensor is determined from a
-regularized version of the VP constitutive law. Following the approach of :cite:`Konig10` (see also :cite:`Lemieux16`), the 
-elliptical yield curve can be modified such that the ice has isotropic tensile strength. 
-The tensile strength :math:`T_p` is expressed as a fraction of the ice strength :math:`P`, that is :math:`T_p=k_t P` 
-where :math:`k_t` should be set to a value between 0 and 1 (this can be changed at runtime with the namelist parameter ``Ktens``). The constitutive law is therefore 
+strain rates, :math:`D_T` and :math:`D_S` respectively:
 
 .. math::
-   {1\over E}{\partial\sigma_1\over\partial t} + {\sigma_1\over 2\zeta} 
-     + {P_R(1-k_t)\over 2\zeta} = D_D, \\
-   :label: sig1 
+   D_D = \dot{\epsilon}_{11} + \dot{\epsilon}_{22}, 
 
 .. math::
-   {1\over E}{\partial\sigma_2\over\partial t} + {\sigma_2\over 2\eta} = D_T,
-   :label: sig2
+   D_T = \dot{\epsilon}_{11} - \dot{\epsilon}_{22}, 
 
 .. math::
-   {1\over E}{\partial\sigma_{12}\over\partial t} + {\sigma_{12}\over
-     2\eta} = {1\over 2}D_S,
-   :label: sig12
+   D_S = 2\dot{\epsilon}_{12}, 
 
 where
 
 .. math::
-   D_D = \dot{\epsilon}_{11} + \dot{\epsilon}_{22}, 
+   \dot{\epsilon}_{ij} = {1\over 2}\left({{\partial u_i}\over{\partial x_j}} + {{\partial u_j}\over{\partial x_i}}\right)
 
-.. math::
-   D_T = \dot{\epsilon}_{11} - \dot{\epsilon}_{22}, 
+CICE can output the internal ice pressure which is an important field to support navigation in ice-infested water.
+The internal ice pressure (``sigP``) is the average of the normal stresses multiplied by :math:`-1` and 
+is therefore simply equal to :math:`-\sigma_1/2`.
 
-.. math::
-   D_S = 2\dot{\epsilon}_{12}, 
+Following the approach of :cite:`Konig10` (see also :cite:`Lemieux16`), the 
+elliptical yield curve can be modified such that the ice has isotropic tensile strength. 
+The tensile strength :math:`T_p` is expressed as a fraction of the ice strength :math:`P`, that is :math:`T_p=k_t P` 
+where :math:`k_t` should be set to a value between 0 and 1 (this can be changed at runtime with the namelist parameter ``Ktens``).
+
+.. _stress-vp:
+
+Viscous-Plastic
+~~~~~~~~~~~~~~~
+
+The VP constitutive law is given by
 
 .. math::
-   \dot{\epsilon}_{ij} = {1\over 2}\left({{\partial u_i}\over{\partial x_j}} + {{\partial u_j}\over{\partial x_i}}\right), 
+   \sigma_{ij} = 2 \eta \dot{\epsilon}_{ij} + (\zeta - \eta) D_D - P_R(1 - k_t)\frac{\delta_{ij}}{2}
+   :label: vp-const
+
+where :math:`\eta` and :math:`\zeta` are the bulk and shear viscosities.
+An elliptical yield curve is used, with the viscosities given by
 
 .. math::
    \zeta = {P(1+k_t)\over 2\Delta}, 
@@ -255,14 +331,41 @@ where
 .. math::
    \eta  = {P(1+k_t)\over {2\Delta e^2}}, 
 
+where
+
 .. math::
-   \Delta = \left[D_D^2 + {1\over e^2}\left(D_T^2 + D_S^2\right)\right]^{1/2},
+   \Delta = \left[D_D^2 + {1\over e^2}\left(D_T^2 + D_S^2\right)\right]^{1/2}
 
 and :math:`P_R` is a “replacement pressure” (see :cite:`Geiger98`, for
 example), which serves to prevent residual ice motion due to spatial
-variations of :math:`P` when the rates of strain are exactly zero. The ice strength :math:`P` 
+variations of :math:`P` when the rates of strain are exactly zero.
+
+The ice strength :math:`P`
 is a function of the ice thickness and concentration
-as it is described in the `Icepack Documentation <https://cice-consortium-icepack.readthedocs.io/en/master/science_guide/index.html>`_. The parameteter :math:`e` is the  ratio of the major and minor axes of the elliptical yield curve, also called the ellipse aspect ratio. It can be changed using the namelist parameter ``e_ratio``.
+as described in the `Icepack Documentation <https://cice-consortium-icepack.readthedocs.io/en/master/science_guide/index.html>`_. The parameter :math:`e` is the  ratio of the major and minor axes of the elliptical yield curve, also called the ellipse aspect ratio. It can be changed using the namelist parameter ``e_ratio``.
+
+.. _stress-evp:
+
+Elastic-Viscous-Plastic
+~~~~~~~~~~~~~~~~~~~~~~~
+
+In the EVP model the internal stress tensor is determined from a
+regularized version of the VP constitutive law :eq:`vp-const`.  The constitutive law is therefore
+
+.. math::
+   {1\over E}{\partial\sigma_1\over\partial t} + {\sigma_1\over 2\zeta}
+     + {P_R(1-k_t)\over 2\zeta} = D_D, \\
+   :label: sig1
+
+.. math::
+   {1\over E}{\partial\sigma_2\over\partial t} + {\sigma_2\over 2\eta} = D_T,
+   :label: sig2
+
+.. math::
+   {1\over E}{\partial\sigma_{12}\over\partial t} + {\sigma_{12}\over
+     2\eta} = {1\over 2}D_S,
+   :label: sig12
+
 
 Viscosities are updated during the subcycling, so that the entire
 dynamics component is subcycled within the time step, and the elastic
@@ -304,15 +407,10 @@ appear explicitly.) Choices of the parameters used to define :math:`E`,
 :math:`T` and :math:`\Delta t_e` are discussed in
 Sections :ref:`revp` and :ref:`parameters`.
 
-The bilinear discretization used for the stress terms
-:math:`\partial\sigma_{ij}/\partial x_j` in the momentum equation is
-now used, which enabled the discrete equations to be derived from the
-continuous equations written in curvilinear coordinates. In this
-manner, metric terms associated with the curvature of the grid are
-incorporated into the discretization explicitly. Details pertaining to
-the spatial discretization are found in :cite:`Hunke02`.
+.. _stress-eap:
 
-*Elastic-Anisotropic-Plastic*
+Elastic-Anisotropic-Plastic
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 In the EAP model the internal stress tensor is related to the
 geometrical properties and orientation of underlying virtual diamond
@@ -558,6 +656,6 @@ Introducing another numerical parameter :math:`\alpha=2T \Delta t_e ^{-1}` :cite
 where as opposed to the classic EVP, the second term in each equation is at iteration :math:`k` :cite:`Bouillon13`. Also, as opposed to the classic EVP, 
 :math:`\Delta t_e` times the number of subcycles (or iterations) does not need to be equal to the advective time step :math:`\Delta t`. 
 Finally, as with the classic EVP approach, the stresses are initialized using the previous time level values. 
-The revised EVP is activated by setting the namelist parameter `revised\_evp` = true. 
-In the code :math:`\alpha = arlx` and :math:`\beta = brlx`. The values of :math:`arlx` and :math:`brlx` can be set in the namelist. 
-It is recommended to use large values of these parameters and to set :math:`arlx=brlx` :cite:`Kimmritz15`.
+The revised EVP is activated by setting the namelist parameter ``revised_evp = true``. 
+In the code :math:`\alpha` is ``arlx`` and :math:`\beta` is ``brlx``. The values of ``arlx`` and ``brlx`` can be set in the namelist. 
+It is recommended to use large values of these parameters and to set :math:`\alpha=\beta` :cite:`Kimmritz15`.
diff --git a/doc/source/user_guide/ug_case_settings.rst b/doc/source/user_guide/ug_case_settings.rst
index 550162515..c91187b27 100644
--- a/doc/source/user_guide/ug_case_settings.rst
+++ b/doc/source/user_guide/ug_case_settings.rst
@@ -349,6 +349,8 @@ thermo_nml
    "``sw_dtemp``", "real", "temperature difference from melt to start redistributing", "0.02"
    "", "", "", ""
 
+.. _dynamics_nml:
+
 dynamics_nml
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -369,10 +371,13 @@ dynamics_nml
    "", "``zero``", "zero coriolis", ""
    "``Cstar``", "real", "constant in Hibler strength formula", "20"
    "``e_ratio``", "real", "EVP ellipse aspect ratio", "2.0"
+   "``im_fgmres``", "integer", "maximum number of Arnoldi iterations for FGMRES solver", "50"
+   "``im_pgmres``", "integer", "maximum number of Arnoldi iterations for PGMRES preconditioner", "5"
    "``kdyn``", "``-1``", "dynamics algorithm OFF", "1"
    "", "``0``", "dynamics OFF", ""
    "", "``1``", "EVP dynamics", ""
    "", "``2``", "EAP dynamics", ""
+   "", "``3``", "VP dynamics", ""
    "``kevp_kernel``", "``0``", "standard 2D EVP memory parallel solver", "0"
    "", "``2``", "1D shared memory solver (not fully validated)", ""
    "``kstrength``", "``0``", "ice strength formulation :cite:`Hibler79`", "1"
@@ -388,9 +393,23 @@ dynamics_nml
    "``Ktens``", "real", "Tensile strength factor (see :cite:`Konig10`)", "0.0"
    "``k1``", "real", "1st free parameter for landfast parameterization", "8.0"
    "``k2``", "real", "2nd free parameter (N/m\ :math:`^3`) for landfast parameterization", "15.0"
+   "``maxits_nonlin``", "integer", "maximum number of nonlinear iterations for VP solver", "1000"
+   "``maxits_fgmres``", "integer", "maximum number of restarts for FGMRES solver", "1"
+   "``maxits_pgmres``", "integer", "maximum number of restarts for PGMRES preconditioner", "1"
+   "``monitor_nonlin``", "logical", "write velocity norm at each nonlinear iteration", "``.false.``"
+   "``monitor_fgmres``", "logical", "write velocity norm at each FGMRES iteration", "``.false.``"
+   "``monitor_pgmres``", "logical", "write velocity norm at each PGMRES iteration", "``.false.``"
    "``mu_rdg``", "real", "e-folding scale of ridged ice for ``krdg_partic`` = 1 in m^0.5", "3.0"
    "``ndte``", "integer", "number of EVP subcycles", "120"
+   "``ortho_type``", "``mgs``", "Use modified Gram-Shchmidt in FGMRES solver", "``mgs``"
+   "", "``cgs``", "Use classical Gram-Shchmidt in FGMRES solver", ""
+   "``precond``", "``pgmres``", "Use GMRES as preconditioner for FGMRES solver", "``pgmres``"
+   "", "``diag``", "Use Jacobi preconditioner for the FGMRES solver", ""
+   "", "``ident``", "Don't use a preconditioner for the FGMRES solver", ""
    "``Pstar``", "real", "constant in Hibler strength formula (N/m\ :math:`^2`)", "2.75e4"
+   "``reltol_nonlin``", "real", "relative tolerance for nonlinear solver", "1e-8"
+   "``reltol_fgmres``", "real", "relative tolerance for FGMRES solver", "1e-2"
+   "``reltol_pgmres``", "real", "relative tolerance for PGMRES preconditioner", "1e-6"
    "``revised_evp``", "logical", "use revised EVP formulation", "``.false.``"
    "``ssh_stress``", "``coupled``", "computed from coupled sea surface height gradient", "``geostrophic``"
    "", "``geostropic``", "computed from ocean velocity", ""

From 37deff5bbd52aa2d4e411022f19b3b29d201c5a6 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 14 Jul 2020 16:58:14 -0400
Subject: [PATCH 165/196] ice_dyn_vp: default 'use_mean_vrel' to 'true'

Previous testing reveals it's better to set 'use_mean_vrel' to true
(i.e., use the mean of the 2 previous nonlinear iterates to compute
`vrel`) for the Picard solver, but the picture is less clear for the
Anderson solver.

Since we are only advertising the Picard solver for now, make this flag
default to 'true'.
---
 cicecore/cicedynB/general/ice_init.F90            | 2 +-
 configuration/scripts/ice_in                      | 2 +-
 configuration/scripts/options/set_nml.dynanderson | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index c9f78ba40..635bb0626 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -353,7 +353,7 @@ subroutine input_data
       reltol_andacc = 1e-6_dbl_kind  ! relative tolerance for Anderson acceleration
       damping_andacc = 0     ! damping factor for Anderson acceleration
       start_andacc = 0       ! acceleration delay factor (acceleration starts at this iteration)
-      use_mean_vrel = .false. ! use mean of previous 2 iterates to compute vrel
+      use_mean_vrel = .true. ! use mean of previous 2 iterates to compute vrel
       advection  = 'remap'   ! incremental remapping transport scheme
       conserv_check = .false.! tracer conservation check
       shortwave = 'ccsm3'    ! 'ccsm3' or 'dEdd' (delta-Eddington)
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index ddecea6f4..f683b8094 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -153,7 +153,7 @@
     reltol_fgmres   = 1e-2
     reltol_pgmres   = 1e-6
     algo_nonlin     = 'picard'
-    use_mean_vrel   = .false.
+    use_mean_vrel   = .true.
     im_andacc       = 5
     reltol_andacc   = 1e-6
     damping_andacc  = 0
diff --git a/configuration/scripts/options/set_nml.dynanderson b/configuration/scripts/options/set_nml.dynanderson
index 7be744eb9..fbfd16af0 100644
--- a/configuration/scripts/options/set_nml.dynanderson
+++ b/configuration/scripts/options/set_nml.dynanderson
@@ -1,3 +1,4 @@
 kdyn            = 3
 algo_nonlin     = 'anderson'
 maxits_nonlin   = 5000
+use_mean_vrel   = .false.

From 60299086d5a0de3fe1047316b41fc0dd9dbccc33 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 14 Jul 2020 17:09:09 -0400
Subject: [PATCH 166/196] ice_in: remove Anderson solver parameters

Since we are only advertising the Picard solver for now,
remove the namelist parameters for the Anderson solver from the
reference namelist.
---
 configuration/scripts/ice_in | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index f683b8094..42b66b47e 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -154,10 +154,6 @@
     reltol_pgmres   = 1e-6
     algo_nonlin     = 'picard'
     use_mean_vrel   = .true.
-    im_andacc       = 5
-    reltol_andacc   = 1e-6
-    damping_andacc  = 0
-    start_andacc    = 0
 /
 
 &shortwave_nml

From 845e15a3194ff0b9515476e1c6dd6624f1089e10 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 15 Jul 2020 12:44:00 -0400
Subject: [PATCH 167/196] ice_dyn_vp: add article references

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 33 ++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index cadc331f4..a2ce00f59 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -5,7 +5,30 @@
 !
 ! See:
 !
+! Lemieux, J.‐F., Tremblay, B., Thomas, S., Sedláček, J., and Mysak, L. A. (2008),
+! Using the preconditioned Generalized Minimum RESidual (GMRES) method to solve
+! the sea‐ice momentum equation, J. Geophys. Res., 113, C10004, doi:10.1029/2007JC004680.
 !
+! Hibler, W. D., and Ackley, S. F. (1983), Numerical simulation of the Weddell Sea pack ice,
+! J. Geophys. Res., 88( C5), 2873– 2887, doi:10.1029/JC088iC05p02873.
+!
+! Y. Saad. A Flexible Inner-Outer Preconditioned GMRES Algorithm. SIAM J. Sci. Comput.,
+! 14(2):461–469, 1993. URL: https://doi.org/10.1137/0914028, doi:10.1137/0914028.
+!
+! C. T. Kelley, Iterative Methods for Linear and Nonlinear Equations, SIAM, 1995.
+! (https://www.siam.org/books/textbooks/fr16_book.pdf)
+!
+! Y. Saad, Iterative Methods for Sparse Linear Systems. SIAM, 2003.
+! (http://www-users.cs.umn.edu/~saad/IterMethBook_2ndEd.pdf)
+!
+! Walker, H. F., & Ni, P. (2011). Anderson Acceleration for Fixed-Point Iterations.
+! SIAM Journal on Numerical Analysis, 49(4), 1715–1735. https://doi.org/10.1137/10078356X
+!
+! Fang, H., & Saad, Y. (2009). Two classes of multisecant methods for nonlinear acceleration.
+! Numerical Linear Algebra with Applications, 16(3), 197–221. https://doi.org/10.1002/nla.617
+!
+! Birken, P. (2015) Termination criteria for inexact fixed‐point schemes.
+! Numer. Linear Algebra Appl., 22: 702– 716. doi: 10.1002/nla.1982.
 !
 ! authors: JF Lemieux, ECCC, Philppe Blain, ECCC
 !
@@ -53,7 +76,7 @@ module ice_dyn_vp
          monitor_nonlin , & ! print nonlinear residual norm
          monitor_fgmres , & ! print fgmres residual norm
          monitor_pgmres , & ! print pgmres residual norm
-         use_mean_vrel      ! use mean of previous 2 iterates to compute vrel
+         use_mean_vrel      ! use mean of previous 2 iterates to compute vrel (see Hibler and Ackley 1983)
 
       real (kind=dbl_kind), public :: &
          reltol_nonlin  , & ! nonlinear stopping criterion: reltol_nonlin*res(k=0)
@@ -623,6 +646,10 @@ end subroutine imp_solver
 ! using Anderson acceleration (accelerated fixed point (Picard) iteration)
 !
 ! author: JF Lemieux, A. Qaddouri, F. Dupont and P. Blain ECCC
+!
+! Anderson algorithm adadpted from:
+! H. F. Walker, “Anderson Acceleration: Algorithms and Implementations”
+!   [Online]. Available: https://users.wpi.edu/~walker/Papers/anderson_accn_algs_imps.pdf
 
       subroutine anderson_solver (icellt,   icellu,  &
                                   indxti,   indxtj,  &
@@ -2988,6 +3015,10 @@ end subroutine vec_to_arrays
 ! Update Q and R factor after deletion of the 1st column of G_diff
 !
 ! author: P. Blain ECCC
+!
+! adapted from :
+! H. F. Walker, “Anderson Acceleration: Algorithms and Implementations”
+!   [Online]. Available: https://users.wpi.edu/~walker/Papers/anderson_accn_algs_imps.pdf
 
       subroutine qr_delete(Q, R)
       

From acc06276227073935a83907e22569424cd1fe509 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 15 Jul 2020 17:11:20 -0400
Subject: [PATCH 168/196] ice_dyn_vp: remove unused subroutines

Remove subroutines 'stress_prime_vpOLD', 'matvecOLD' and 'precond_diag'.

'stress_prime_vpOLD' and 'matvecOLD' have been unused since the creation
of the 'matvec' subroutine.

'precond_diag' has been unused since we deactivated the legacy FGMRES
solver.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 355 ----------------------
 1 file changed, 355 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index a2ce00f59..76163b867 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1292,237 +1292,6 @@ end subroutine calc_zeta_Pr
       
 !=======================================================================
 
-! Computes VP stress without the rep. pressure Pr (included in b vector)
-
-      subroutine stress_prime_vpOLD (nx_block,   ny_block,   & 
-                                  icellt,                 & 
-                                  indxti,     indxtj,     & 
-                                  uvel,       vvel,       & 
-                                  dxt,        dyt,        & 
-                                  dxhy,       dyhx,       & 
-                                  cxp,        cyp,        & 
-                                  cxm,        cym,        & 
-                                  zetaD,                  & 
-                                  str )
-
-      use ice_dyn_shared, only: strain_rates
-
-      integer (kind=int_kind), intent(in) :: & 
-         nx_block, ny_block, & ! block dimensions
-         icellt                ! no. of cells where icetmask = 1
-
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
-         intent(in) :: &
-         indxti   , & ! compressed index in i-direction
-         indxtj       ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         uvel     , & ! x-component of velocity (m/s)
-         vvel     , & ! y-component of velocity (m/s)
-         dxt      , & ! width of T-cell through the middle (m)
-         dyt      , & ! height of T-cell through the middle (m)
-         dxhy     , & ! 0.5*(HTE - HTE)
-         dyhx     , & ! 0.5*(HTN - HTN)
-         cyp      , & ! 1.5*HTE - 0.5*HTE
-         cxp      , & ! 1.5*HTN - 0.5*HTN
-         cym      , & ! 0.5*HTE - 1.5*HTE
-         cxm          ! 0.5*HTN - 1.5*HTN
-         
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
-         intent(in) :: &
-         zetaD          ! 2*zeta   
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
-         intent(out) :: &
-         str          ! stress combinations
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         i, j, ij
-
-      real (kind=dbl_kind) :: &
-        divune, divunw, divuse, divusw            , & ! divergence
-        tensionne, tensionnw, tensionse, tensionsw, & ! tension
-        shearne, shearnw, shearse, shearsw        , & ! shearing
-        Deltane, Deltanw, Deltase, Deltasw        , & ! Delt
-        ssigpn, ssigps, ssigpe, ssigpw            , &
-        ssigmn, ssigms, ssigme, ssigmw            , &
-        ssig12n, ssig12s, ssig12e, ssig12w        , &
-        ssigp1, ssigp2, ssigm1, ssigm2, ssig121, ssig122, &
-        csigpne, csigpnw, csigpse, csigpsw        , &
-        csigmne, csigmnw, csigmse, csigmsw        , &
-        csig12ne, csig12nw, csig12se, csig12sw    , &
-        str12ew, str12we, str12ns, str12sn        , &
-        strp_tmp, strm_tmp
-        
-      real (kind=dbl_kind) :: &
-         stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22 (without Pr)
-         stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
-         stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12        
-
-      character(len=*), parameter :: subname = '(stress_prime_vpOLD)'
-
-      !-----------------------------------------------------------------
-      ! Initialize
-      !-----------------------------------------------------------------
-
-      str(:,:,:) = c0
-
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
-
-      do ij = 1, icellt
-         i = indxti(ij)
-         j = indxtj(ij)
-
-      !-----------------------------------------------------------------
-      ! strain rates
-      ! NOTE these are actually strain rates * area  (m^2/s)
-      !-----------------------------------------------------------------
-         call strain_rates (nx_block,   ny_block,   &
-                            i,          j,          &
-                            uvel,       vvel,       &
-                            dxt,        dyt,        &
-                            cxp,        cyp,        &
-                            cxm,        cym,        &
-                            divune,     divunw,     &
-                            divuse,     divusw,     &
-                            tensionne,  tensionnw,  &
-                            tensionse,  tensionsw,  &
-                            shearne,    shearnw,    &
-                            shearse,    shearsw,    &
-                            Deltane,    Deltanw,    &
-                            Deltase,    Deltasw     )
-
-      !-----------------------------------------------------------------
-      ! the stresses                            ! kg/s^2
-      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
-      ! JFL commented part of stressp is for the rep pressure Pr
-      !-----------------------------------------------------------------
-
-         stressp_1 = zetaD(i,j,1)*(divune*(c1+Ktens))! - Deltane*(c1-Ktens))
-         stressp_2 = zetaD(i,j,2)*(divunw*(c1+Ktens))! - Deltanw*(c1-Ktens))
-         stressp_3 = zetaD(i,j,3)*(divusw*(c1+Ktens))! - Deltasw*(c1-Ktens))
-         stressp_4 = zetaD(i,j,4)*(divuse*(c1+Ktens))! - Deltase*(c1-Ktens))
-         
-         stressm_1 = zetaD(i,j,1)*tensionne*(c1+Ktens)*ecci
-         stressm_2 = zetaD(i,j,2)*tensionnw*(c1+Ktens)*ecci
-         stressm_3 = zetaD(i,j,3)*tensionsw*(c1+Ktens)*ecci
-         stressm_4 = zetaD(i,j,4)*tensionse*(c1+Ktens)*ecci
-         
-         stress12_1 = zetaD(i,j,1)*shearne*p5*(c1+Ktens)*ecci
-         stress12_2 = zetaD(i,j,2)*shearnw*p5*(c1+Ktens)*ecci
-         stress12_3 = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
-         stress12_4 = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
-
-      !-----------------------------------------------------------------
-      ! combinations of the stresses for the momentum equation ! kg/s^2
-      !-----------------------------------------------------------------
-
-         ssigpn  = stressp_1 + stressp_2
-         ssigps  = stressp_3 + stressp_4
-         ssigpe  = stressp_1 + stressp_4
-         ssigpw  = stressp_2 + stressp_3
-         ssigp1  =(stressp_1 + stressp_3)*p055
-         ssigp2  =(stressp_2 + stressp_4)*p055
-
-         ssigmn  = stressm_1 + stressm_2
-         ssigms  = stressm_3 + stressm_4
-         ssigme  = stressm_1 + stressm_4
-         ssigmw  = stressm_2 + stressm_3
-         ssigm1  =(stressm_1 + stressm_3)*p055
-         ssigm2  =(stressm_2 + stressm_4)*p055
-
-         ssig12n = stress12_1 + stress12_2
-         ssig12s = stress12_3 + stress12_4
-         ssig12e = stress12_1 + stress12_4
-         ssig12w = stress12_2 + stress12_3
-         ssig121 =(stress12_1 + stress12_3)*p111
-         ssig122 =(stress12_2 + stress12_4)*p111
-
-         csigpne = p111*stressp_1 + ssigp2 + p027*stressp_3
-         csigpnw = p111*stressp_2 + ssigp1 + p027*stressp_4
-         csigpsw = p111*stressp_3 + ssigp2 + p027*stressp_1
-         csigpse = p111*stressp_4 + ssigp1 + p027*stressp_2
-         
-         csigmne = p111*stressm_1 + ssigm2 + p027*stressm_3
-         csigmnw = p111*stressm_2 + ssigm1 + p027*stressm_4
-         csigmsw = p111*stressm_3 + ssigm2 + p027*stressm_1
-         csigmse = p111*stressm_4 + ssigm1 + p027*stressm_2
-         
-         csig12ne = p222*stress12_1 + ssig122 &
-                  + p055*stress12_3
-         csig12nw = p222*stress12_2 + ssig121 &
-                  + p055*stress12_4
-         csig12sw = p222*stress12_3 + ssig122 &
-                  + p055*stress12_1
-         csig12se = p222*stress12_4 + ssig121 &
-                  + p055*stress12_2
-
-         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
-         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
-         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
-         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
-
-      !-----------------------------------------------------------------
-      ! for dF/dx (u momentum)
-      !-----------------------------------------------------------------
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
-
-         ! northeast (i,j)
-         str(i,j,1) = -strp_tmp - strm_tmp - str12ew &
-              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
-
-         ! northwest (i+1,j)
-         str(i,j,2) = strp_tmp + strm_tmp - str12we &
-              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
-
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
-
-         ! southeast (i,j+1)
-         str(i,j,3) = -strp_tmp - strm_tmp + str12ew &
-              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
-
-         ! southwest (i+1,j+1)
-         str(i,j,4) = strp_tmp + strm_tmp + str12we &
-              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
-
-      !-----------------------------------------------------------------
-      ! for dF/dy (v momentum)
-      !-----------------------------------------------------------------
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
-
-         ! northeast (i,j)
-         str(i,j,5) = -strp_tmp + strm_tmp - str12ns &
-              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
-
-         ! southeast (i,j+1)
-         str(i,j,6) = strp_tmp - strm_tmp - str12sn &
-              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
-
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
-
-         ! northwest (i+1,j)
-         str(i,j,7) = -strp_tmp + strm_tmp + str12ns &
-              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
-
-         ! southwest (i+1,j+1)
-         str(i,j,8) = strp_tmp - strm_tmp + str12sn &
-              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
-
-      enddo                     ! ij
-
-      end subroutine stress_prime_vpOLD      
-      
-      
-!=======================================================================
-
 ! Computes the VP stress (as diagnostic)
 
       subroutine stress_vp (nx_block,   ny_block,   &
@@ -1753,92 +1522,6 @@ end subroutine calc_seabed_stress
       
 !=======================================================================
 
-! OLD matrix vector product A(u,v) * (u,v)
-! Au = A(u,v)_[x] * uvel (x components of  A(u,v) * (u,v))
-! Av = A(u,v)_[y] * vvel (y components of  A(u,v) * (u,v))
-
-      subroutine matvecOLD (nx_block,   ny_block, &
-                         icellu,               &
-                         indxui,     indxuj,   &
-                         str,                  &
-                         vrel,                 &
-                         umassdti,   fm,       &
-                         uarear,     Cb,       &
-                         uvel,       vvel,     &
-                         Au,         Av)
-
-      integer (kind=int_kind), intent(in) :: &
-         nx_block, ny_block, & ! block dimensions
-         icellu                ! total count when iceumask is true
-
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
-         indxui  , & ! compressed index in i-direction
-         indxuj      ! compressed index in j-direction
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         vrel,     & ! coefficient for tauw
-         Cb,       & ! coefficient for basal stress
-         umassdti, & ! mass of U-cell/dt (kg/m^2 s)
-         fm      , & ! Coriolis param. * mass in U-cell (kg/s)
-         uarear      ! 1/uarea
-
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
-         intent(in) :: &
-         str
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(in) :: &
-         uvel    , & ! x-component of velocity (m/s)
-         vvel        ! y-component of velocity (m/s)
-
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
-         Au      , & ! matvec, Fx = bx - Au (N/m^2)
-         Av          ! matvec, Fy = by - Av (N/m^2)
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         i, j, ij
-
-      real (kind=dbl_kind) :: &
-         utp, vtp          , & ! utp = uvel, vtp = vvel
-         ccaimp,ccb        , & ! intermediate variables
-         strintx, strinty
-
-      character(len=*), parameter :: subname = '(matvecOLD)'
-
-      !-----------------------------------------------------------------
-      ! integrate the momentum equation
-      !-----------------------------------------------------------------
-
-      do ij =1, icellu
-         i = indxui(ij)
-         j = indxuj(ij)
-
-         utp = uvel(i,j)
-         vtp = vvel(i,j)
-
-         ccaimp = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) ! kg/m^2 s
-               
-         ccb = fm(i,j) + sign(c1,fm(i,j)) * vrel(i,j) * sinw ! kg/m^2 s
-
-         ! divergence of the internal stress tensor
-         strintx = uarear(i,j)* &
-             (str(i,j,1) + str(i+1,j,2) + str(i,j+1,3) + str(i+1,j+1,4))
-         strinty = uarear(i,j)* &
-             (str(i,j,5) + str(i,j+1,6) + str(i+1,j,7) + str(i+1,j+1,8))
-
-         Au(i,j) = ccaimp*utp - ccb*vtp - strintx
-         Av(i,j) = ccaimp*vtp + ccb*utp - strinty
-         
-      enddo                     ! ij
-
-      end subroutine matvecOLD
-      
-!=======================================================================
-
 ! Computes the matrix vector product A(u,v) * (u,v)
 ! Au = A(u,v)_[x] * uvel (x components of  A(u,v) * (u,v))
 ! Av = A(u,v)_[y] * vvel (y components of  A(u,v) * (u,v))
@@ -2803,44 +2486,6 @@ end subroutine formDiag_step2
       
 !=======================================================================
 
-! Diagonal (Jacobi) preconditioner for the legacy FGMRES driver
-
-     subroutine precond_diag   (ntot,    &
-                                diagvec, &
-                                wk1, wk2)
-
-      integer (kind=int_kind), intent(in) :: &
-         ntot                  ! size of problem for fgmres
-
-      real (kind=dbl_kind), dimension (ntot), intent(in) :: &
-         diagvec, wk1
-         
-      real (kind=dbl_kind), dimension (ntot), intent(out) :: &
-         wk2 
-
-      ! local variables
-
-      integer (kind=int_kind) :: &
-         i         
-
-      character(len=*), parameter :: subname = '(precond_diag)'
-
-      !-----------------------------------------------------------------
-      ! form vector (converts from max_blocks arrays to single vector
-      !-----------------------------------------------------------------
-
-      wk2(:)=c0
-      
-      do i=1, ntot
-
-	wk2(i) = wk1(i)/diagvec(i)
-      
-      enddo! i
-
-      end subroutine precond_diag
-      
-!=======================================================================      
-
 ! Compute squared l^2 norm of a grid function (tpu,tpv)
 
       subroutine calc_L2norm_squared (nx_block,   ny_block, &

From c065deac8b31f4047490d875a8ad2d7d4153385b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 15 Jul 2020 17:19:27 -0400
Subject: [PATCH 169/196] ice_dyn_vp: remove unused local variables

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 76163b867..88fdac3d7 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -754,7 +754,6 @@ subroutine anderson_solver (icellt,   icellu,  &
          res_diff   , & ! difference between current and previous residuals
          fpfunc     , & ! current value of fixed point function
          fpfunc_old , & ! previous value of fixed point function
-         Fvec       , & ! (Fx,Fy) (nonlinear residual) as vector
          tmp            ! temporary vector for BLAS calls
       
       real (kind=dbl_kind), dimension(ntot,im_andacc) :: &
@@ -769,7 +768,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          coeffs       ! coeffs used to combine previous solutions
 
       real (kind=dbl_kind) :: & 
-         tol         , & ! tolerance for fixed point convergence: reltol_andacc * (initial fixed point residual norm)
+         ! tol         , & ! tolerance for fixed point convergence: reltol_andacc * (initial fixed point residual norm)  [unused for now]
          tol_nl      , & ! tolerance for nonlinear convergence: reltol_nonlin * (initial nonlinear residual norm)
          fpres_norm  , & ! norm of current fixed point residual : f(x) = g(x) - x
          prog_norm   , & ! norm of difference between current and previous solution
@@ -2771,9 +2770,7 @@ subroutine fgmres (zetaD,               &
          
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
          workspace_x , & ! work vector (x components)
-         workspace_y , & ! work vector (y components)
-         Fx          , & ! residual vector (x components), Fx = bx - Au (N/m^2)
-         Fy              ! residual vector (y components), Fy = by - Av (N/m^2)
+         workspace_y     ! work vector (y components)
 
       real (kind=dbl_kind), dimension (max_blocks) :: &
          norm_squared   ! array to accumulate squared norm of grid function over blocks
@@ -2810,12 +2807,6 @@ subroutine fgmres (zetaD,               &
 
       real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
 
-      real (kind=dbl_kind) :: &
-         local_dot         ! local value to accumulate dot product computations
-         
-      real (kind=dbl_kind), dimension(maxinner) :: &
-         dotprod_local     ! local array to accumulate several dot product computations
-
       character(len=*), parameter :: subname = '(fgmres)'
 
       ! Here we go !
@@ -3172,9 +3163,7 @@ subroutine pgmres (zetaD,                &
          
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
          workspace_x , & ! work vector (x components)
-         workspace_y , & ! work vector (y components)
-         Fx          , & ! residual vector (x components), Fx = bx - Au (N/m^2)
-         Fy              ! residual vector (y components), Fy = by - Av (N/m^2)
+         workspace_y     ! work vector (y components)
 
       real (kind=dbl_kind), dimension (max_blocks) :: &
          norm_squared   ! array to accumulate squared norm of grid function over blocks
@@ -3208,12 +3197,6 @@ subroutine pgmres (zetaD,                &
 
       real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
 
-      real (kind=dbl_kind) :: &
-         local_dot         ! local value to accumulate dot product computations
-         
-      real (kind=dbl_kind), dimension(maxinner) :: &
-         dotprod_local     ! local array to accumulate several dot product computations
-
       character(len=*), parameter :: subname = '(pgmres)'
       
       ! Here we go !

From 5e946a4740453787aeac9631ac390607f64115bf Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 15 Jul 2020 17:40:07 -0400
Subject: [PATCH 170/196] ice_dyn_vp: calc_bvec: remove unused arguments

This completes 56d55c7 (ice_dyn_vp: make 'calc_bvec' use already
computed 'vrel', 2019-06-10)
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 88fdac3d7..7476967f7 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -834,9 +834,7 @@ subroutine anderson_solver (icellt,   icellu,  &
             call calc_bvec (nx_block           , ny_block,           &
                             icellu       (iblk),                     & 
                             indxui     (:,iblk), indxuj    (:,iblk), &
-                            stPrtmp  (:,:,:)   , Cdn_ocn (:,:,iblk), &
-                            aiu      (:,:,iblk), uarear  (:,:,iblk), & 
-                            uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                            stPrtmp  (:,:,:)   , uarear  (:,:,iblk), &
                             waterx   (:,:,iblk), watery  (:,:,iblk), & 
                             ulin     (:,:,iblk), vlin    (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
@@ -1867,9 +1865,7 @@ end subroutine calc_bfix
       subroutine calc_bvec (nx_block,   ny_block, &
                        icellu,               &
                        indxui,     indxuj,   &
-                       stPr,       Cw,       &
-                       aiu,        uarear,   &
-                       uocn,       vocn,     &
+                       stPr,       uarear,   &
                        waterx,     watery,   &
                        uvel,       vvel,     &
                        bxfix,      byfix,    &
@@ -1888,15 +1884,11 @@ subroutine calc_bvec (nx_block,   ny_block, &
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
          uvel    , & ! x-component of velocity (m/s)
          vvel    , & ! y-component of velocity (m/s)
-         Cw      , & ! ocean-ice neutral drag coefficient
-         aiu     , & ! ice fraction on u-grid
          uarear  , & ! 1/uarea
          waterx  , & ! for ocean stress calculation, x (m/s)
          watery  , & ! for ocean stress calculation, y (m/s)
          bxfix   , & ! bx = taux + bxfix
          byfix   , & ! by = tauy + byfix
-         uocn    , & ! ocean current, x-direction (m/s)
-         vocn    , & ! ocean current, y-direction (m/s)
          vrel        ! relative ice-ocean velocity
          
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), &

From 9cfa74b717ddb03fcf9193fe847b58a0a4c7bd00 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 15 Jul 2020 17:47:02 -0400
Subject: [PATCH 171/196] ice_dyn_vp: anderson_solver: remove unused arguments

This completes 4616628 (ice_dyn_vp: remove legacy FGMRES solver,
2020-02-17).
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 7476967f7..50fb8e41a 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -250,9 +250,7 @@ subroutine imp_solver (dt)
          this_block           ! block information for current block
       
       real (kind=dbl_kind), allocatable :: &
-         bvec(:)     , & ! right-hand-side vector
-         sol(:)      , & ! solution vector
-         diagvec(:)      ! diagonal vector
+         sol(:)          ! solution vector
       
       character(len=*), parameter :: subname = '(imp_solver)'
       
@@ -462,7 +460,7 @@ subroutine imp_solver (dt)
       enddo
       ntot = 2*ntot ! times 2 because of u and v
       
-      allocate(bvec(ntot), sol(ntot), diagvec(ntot))
+      allocate(sol(ntot))
       
       !-----------------------------------------------------------------
       ! Start of nonlinear iteration
@@ -473,8 +471,7 @@ subroutine imp_solver (dt)
                             aiu,      ntot,    &
                             waterx,   watery,  & 
                             bxfix,    byfix,   &
-                            umassdti, bvec,    & 
-                            sol,      diagvec, &
+                            umassdti, sol,     &
                             fpresx,   fpresy,  &
                             zetaD,    Cb,      &
                             halo_info_mask)
@@ -482,7 +479,7 @@ subroutine imp_solver (dt)
       ! End of nonlinear iteration
       !-----------------------------------------------------------------
 
-      deallocate(bvec, sol, diagvec)
+      deallocate(sol)
       
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
@@ -657,8 +654,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                                   aiu,      ntot,    &
                                   waterx,   watery,  & 
                                   bxfix,    byfix,   &
-                                  umassdti, bvec,    & 
-                                  sol,      diagvec, &
+                                  umassdti, sol,     &
                                   fpresx,   fpresy,  &
                                   zetaD,    Cb,      &
                                   halo_info_mask)
@@ -708,9 +704,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          Cb           ! seabed stress coefficient
 
       real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
-         bvec     , & ! RHS vector for FGMRES
-         sol      , & ! current approximate solution
-         diagvec      ! diagonal of matrix A for preconditioners
+         sol          ! current approximate solution
 
       ! local variables
 

From 809fb36674fbca610cf67fc57a2696e954ba3332 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 15 Jul 2020 17:55:40 -0400
Subject: [PATCH 172/196] ice_dyn_vp: remove unused 'use'-imported variables in
 subroutines

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 50fb8e41a..df7e3d3ec 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -200,7 +200,7 @@ subroutine imp_solver (dt)
           stressp_1, stressp_2, stressp_3, stressp_4, &
           stressm_1, stressm_2, stressm_3, stressm_4, &
           stress12_1, stress12_2, stress12_3, stress12_4
-      use ice_grid, only: tmask, umask, dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
+      use ice_grid, only: tmask, umask, dxt, dyt, cxp, cyp, cxm, cym, &
           tarear, to_ugrid, t2ugrid_vector, u2tgrid_vector, &
           grid_type
       use ice_state, only: aice, vice, vsno, uvel, vvel, divu, shear, &
@@ -663,13 +663,13 @@ subroutine anderson_solver (icellt,   icellu,  &
       use ice_blocks, only: nx_block, ny_block
       use ice_boundary, only: ice_HaloUpdate
       use ice_constants, only: c1
-      use ice_domain, only: halo_info, maskhalo_dyn
+      use ice_domain, only: maskhalo_dyn
       use ice_domain_size, only: max_blocks
       use ice_flux, only:   uocn, vocn, fm, Tbu
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
           uarear, tinyarea
       use ice_state, only: uvel, vvel, strength
-      use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
+      use ice_timers, only: ice_timer_start, ice_timer_stop
 
       integer (kind=int_kind), intent(in) :: & 
          ntot         ! size of problem for fgmres (for given cpu)

From b060c372ac781bfc47f90cad6cb4f55518fda88e Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Jul 2020 10:53:35 -0400
Subject: [PATCH 173/196] ice_dyn_vp: fix trailing whitespace errors

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 412 +++++++++++-----------
 1 file changed, 205 insertions(+), 207 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index df7e3d3ec..f45a9158f 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -175,9 +175,9 @@ end subroutine init_vp
 !
 #ifdef CICE_IN_NEMO
 ! Wind stress is set during this routine from the values supplied
-! via NEMO (unless calc_strair is true).  These values are supplied 
-! rotated on u grid and multiplied by aice.  strairxT = 0 in this 
-! case so operations in dyn_prep1 are pointless but carried out to 
+! via NEMO (unless calc_strair is true).  These values are supplied
+! rotated on u grid and multiplied by aice.  strairxT = 0 in this
+! case so operations in dyn_prep1 are pointless but carried out to
 ! minimise code changes.
 #endif
 !
@@ -225,7 +225,7 @@ subroutine imp_solver (dt)
          watery   , & ! for ocean stress calculation, y (m/s)
          forcex   , & ! work array: combined atm stress and ocn tilt, x
          forcey   , & ! work array: combined atm stress and ocn tilt, y
-         bxfix    , & ! part of bx that is constant during Picard 
+         bxfix    , & ! part of bx that is constant during Picard
          byfix    , & ! part of by that is constant during Picard
          Cb       , & ! seabed stress coefficient
          fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
@@ -265,7 +265,7 @@ subroutine imp_solver (dt)
 
       !-----------------------------------------------------------------
       ! boundary updates
-      ! commented out because the ghost cells are freshly 
+      ! commented out because the ghost cells are freshly
       ! updated after cleanup_itd
       !-----------------------------------------------------------------
 
@@ -281,31 +281,31 @@ subroutine imp_solver (dt)
       !$OMP PARALLEL DO PRIVATE(iblk,i,j,ilo,ihi,jlo,jhi,this_block)
       do iblk = 1, nblocks
 
-         do j = 1, ny_block 
-         do i = 1, nx_block 
-            rdg_conv (i,j,iblk) = c0 
-            rdg_shear(i,j,iblk) = c0 
-            divu (i,j,iblk) = c0 
-            shear(i,j,iblk) = c0 
+         do j = 1, ny_block
+         do i = 1, nx_block
+            rdg_conv (i,j,iblk) = c0
+            rdg_shear(i,j,iblk) = c0
+            divu (i,j,iblk) = c0
+            shear(i,j,iblk) = c0
          enddo
          enddo
 
       !-----------------------------------------------------------------
-      ! preparation for dynamics 
+      ! preparation for dynamics
       !-----------------------------------------------------------------
 
-         this_block = get_block(blocks_ice(iblk),iblk)         
+         this_block = get_block(blocks_ice(iblk),iblk)
          ilo = this_block%ilo
          ihi = this_block%ihi
          jlo = this_block%jlo
          jhi = this_block%jhi
 
-         call dyn_prep1 (nx_block,           ny_block,           & 
+         call dyn_prep1 (nx_block,           ny_block,           &
                          ilo, ihi,           jlo, jhi,           &
-                         aice    (:,:,iblk), vice    (:,:,iblk), & 
-                         vsno    (:,:,iblk), tmask   (:,:,iblk), & 
-                         strairxT(:,:,iblk), strairyT(:,:,iblk), & 
-                         strairx (:,:,iblk), strairy (:,:,iblk), & 
+                         aice    (:,:,iblk), vice    (:,:,iblk), &
+                         vsno    (:,:,iblk), tmask   (:,:,iblk), &
+                         strairxT(:,:,iblk), strairyT(:,:,iblk), &
+                         strairx (:,:,iblk), strairy (:,:,iblk), &
                          tmass   (:,:,iblk), icetmask(:,:,iblk))
 
       enddo                     ! iblk
@@ -332,13 +332,13 @@ subroutine imp_solver (dt)
       if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
          file=__FILE__, line=__LINE__)
 
-      if (.not. calc_strair) then       
+      if (.not. calc_strair) then
          strairx(:,:,:) = strax(:,:,:)
          strairy(:,:,:) = stray(:,:,:)
       else
          call t2ugrid_vector(strairx)
          call t2ugrid_vector(strairy)
-      endif      
+      endif
 
 ! tcraig, tcx, threading here leads to some non-reproducbile results and failures in icepack_ice_strength
 ! need to do more debugging
@@ -349,49 +349,49 @@ subroutine imp_solver (dt)
       ! more preparation for dynamics
       !-----------------------------------------------------------------
 
-         this_block = get_block(blocks_ice(iblk),iblk)         
+         this_block = get_block(blocks_ice(iblk),iblk)
          ilo = this_block%ilo
          ihi = this_block%ihi
          jlo = this_block%jlo
          jhi = this_block%jhi
 
-         call dyn_prep2 (nx_block,             ny_block,             & 
+         call dyn_prep2 (nx_block,             ny_block,             &
                          ilo, ihi,             jlo, jhi,             &
-                         icellt(iblk),         icellu(iblk),         & 
-                         indxti      (:,iblk), indxtj      (:,iblk), & 
-                         indxui      (:,iblk), indxuj      (:,iblk), & 
-                         aiu       (:,:,iblk), umass     (:,:,iblk), & 
-                         umassdti  (:,:,iblk), fcor_blk  (:,:,iblk), & 
-                         umask     (:,:,iblk),                       & 
-                         uocn      (:,:,iblk), vocn      (:,:,iblk), & 
-                         strairx   (:,:,iblk), strairy   (:,:,iblk), & 
-                         ss_tltx   (:,:,iblk), ss_tlty   (:,:,iblk), &  
-                         icetmask  (:,:,iblk), iceumask  (:,:,iblk), & 
-                         fm        (:,:,iblk), dt,                   & 
-                         strtltx   (:,:,iblk), strtlty   (:,:,iblk), & 
-                         strocnx   (:,:,iblk), strocny   (:,:,iblk), & 
-                         strintx   (:,:,iblk), strinty   (:,:,iblk), & 
-                         taubx     (:,:,iblk), tauby     (:,:,iblk), & 
-                         waterx    (:,:,iblk), watery    (:,:,iblk), & 
-                         forcex    (:,:,iblk), forcey    (:,:,iblk), & 
-                         stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), & 
-                         stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), & 
-                         stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), & 
-                         stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), & 
-                         stress12_1(:,:,iblk), stress12_2(:,:,iblk), & 
-                         stress12_3(:,:,iblk), stress12_4(:,:,iblk), & 
+                         icellt(iblk),         icellu(iblk),         &
+                         indxti      (:,iblk), indxtj      (:,iblk), &
+                         indxui      (:,iblk), indxuj      (:,iblk), &
+                         aiu       (:,:,iblk), umass     (:,:,iblk), &
+                         umassdti  (:,:,iblk), fcor_blk  (:,:,iblk), &
+                         umask     (:,:,iblk),                       &
+                         uocn      (:,:,iblk), vocn      (:,:,iblk), &
+                         strairx   (:,:,iblk), strairy   (:,:,iblk), &
+                         ss_tltx   (:,:,iblk), ss_tlty   (:,:,iblk), &
+                         icetmask  (:,:,iblk), iceumask  (:,:,iblk), &
+                         fm        (:,:,iblk), dt,                   &
+                         strtltx   (:,:,iblk), strtlty   (:,:,iblk), &
+                         strocnx   (:,:,iblk), strocny   (:,:,iblk), &
+                         strintx   (:,:,iblk), strinty   (:,:,iblk), &
+                         taubx     (:,:,iblk), tauby     (:,:,iblk), &
+                         waterx    (:,:,iblk), watery    (:,:,iblk), &
+                         forcex    (:,:,iblk), forcey    (:,:,iblk), &
+                         stressp_1 (:,:,iblk), stressp_2 (:,:,iblk), &
+                         stressp_3 (:,:,iblk), stressp_4 (:,:,iblk), &
+                         stressm_1 (:,:,iblk), stressm_2 (:,:,iblk), &
+                         stressm_3 (:,:,iblk), stressm_4 (:,:,iblk), &
+                         stress12_1(:,:,iblk), stress12_2(:,:,iblk), &
+                         stress12_3(:,:,iblk), stress12_4(:,:,iblk), &
                          uvel_init (:,:,iblk), vvel_init (:,:,iblk), &
                          uvel      (:,:,iblk), vvel      (:,:,iblk), &
                          Tbu       (:,:,iblk))
 
-         call calc_bfix (nx_block            , ny_block,             & 
-                         icellu(iblk)        ,                       & 
-                         indxui      (:,iblk), indxuj      (:,iblk), & 
-                         umassdti  (:,:,iblk),                       & 
-                         forcex    (:,:,iblk), forcey    (:,:,iblk), & 
+         call calc_bfix (nx_block            , ny_block,             &
+                         icellu(iblk)        ,                       &
+                         indxui      (:,iblk), indxuj      (:,iblk), &
+                         umassdti  (:,:,iblk),                       &
+                         forcex    (:,:,iblk), forcey    (:,:,iblk), &
                          uvel_init (:,:,iblk), vvel_init (:,:,iblk), &
-                         bxfix     (:,:,iblk), byfix     (:,:,iblk))                         
-                         
+                         bxfix     (:,:,iblk), byfix     (:,:,iblk))
+         
       !-----------------------------------------------------------------
       ! ice strength
       !-----------------------------------------------------------------
@@ -401,11 +401,11 @@ subroutine imp_solver (dt)
             i = indxti(ij, iblk)
             j = indxtj(ij, iblk)
             call icepack_ice_strength (ncat,                 &
-                                      aice    (i,j,  iblk), & 
-                                      vice    (i,j,  iblk), & 
-                                      aice0   (i,j,  iblk), & 
-                                      aicen   (i,j,:,iblk), &  
-                                      vicen   (i,j,:,iblk), & 
+                                      aice    (i,j,  iblk), &
+                                      vice    (i,j,  iblk), &
+                                      aice0   (i,j,  iblk), &
+                                      aicen   (i,j,:,iblk), &
+                                      vicen   (i,j,:,iblk), &
                                       strength(i,j,  iblk) )
          enddo  ! ij
 
@@ -456,7 +456,7 @@ subroutine imp_solver (dt)
       
       ntot=0
       do iblk = 1, nblocks
-        ntot = ntot + icellu(iblk)      
+        ntot = ntot + icellu(iblk)
       enddo
       ntot = 2*ntot ! times 2 because of u and v
       
@@ -469,7 +469,7 @@ subroutine imp_solver (dt)
                             indxti,   indxtj,  &
                             indxui,   indxuj,  &
                             aiu,      ntot,    &
-                            waterx,   watery,  & 
+                            waterx,   watery,  &
                             bxfix,    byfix,   &
                             umassdti, sol,     &
                             fpresx,   fpresy,  &
@@ -614,16 +614,16 @@ subroutine imp_solver (dt)
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
 
-         call dyn_finish                               & 
-              (nx_block,           ny_block,           & 
-               icellu      (iblk), Cdn_ocn (:,:,iblk), & 
-               indxui    (:,iblk), indxuj    (:,iblk), & 
-               uvel    (:,:,iblk), vvel    (:,:,iblk), & 
-               uocn    (:,:,iblk), vocn    (:,:,iblk), & 
-               aiu     (:,:,iblk), fm      (:,:,iblk), & 
+         call dyn_finish                               &
+              (nx_block,           ny_block,           &
+               icellu      (iblk), Cdn_ocn (:,:,iblk), &
+               indxui    (:,iblk), indxuj    (:,iblk), &
+               uvel    (:,:,iblk), vvel    (:,:,iblk), &
+               uocn    (:,:,iblk), vocn    (:,:,iblk), &
+               aiu     (:,:,iblk), fm      (:,:,iblk), &
                strintx (:,:,iblk), strinty (:,:,iblk), &
                strairx (:,:,iblk), strairy (:,:,iblk), &
-               strocnx (:,:,iblk), strocny (:,:,iblk), & 
+               strocnx (:,:,iblk), strocny (:,:,iblk), &
                strocnxT(:,:,iblk), strocnyT(:,:,iblk))
 
       enddo
@@ -652,7 +652,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                                   indxti,   indxtj,  &
                                   indxui,   indxuj,  &
                                   aiu,      ntot,    &
-                                  waterx,   watery,  & 
+                                  waterx,   watery,  &
                                   bxfix,    byfix,   &
                                   umassdti, sol,     &
                                   fpresx,   fpresy,  &
@@ -671,10 +671,10 @@ subroutine anderson_solver (icellt,   icellu,  &
       use ice_state, only: uvel, vvel, strength
       use ice_timers, only: ice_timer_start, ice_timer_stop
 
-      integer (kind=int_kind), intent(in) :: & 
+      integer (kind=int_kind), intent(in) :: &
          ntot         ! size of problem for fgmres (for given cpu)
 
-      integer (kind=int_kind), dimension(max_blocks), intent(in) :: & 
+      integer (kind=int_kind), dimension(max_blocks), intent(in) :: &
          icellt   , & ! no. of cells where icetmask = 1
          icellu       ! no. of cells where iceumask = 1
 
@@ -708,7 +708,7 @@ subroutine anderson_solver (icellt,   icellu,  &
 
       ! local variables
 
-      integer (kind=int_kind) :: & 
+      integer (kind=int_kind) :: &
          it_nl      , & ! nonlinear loop iteration index
          res_num    , & ! current number of stored residuals
          j          , & ! iteration index for QR update
@@ -723,7 +723,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          vprev_k  , & ! vvel at previous Picard iteration
          ulin     , & ! uvel to linearize vrel
          vlin     , & ! vvel to linearize vrel
-         vrel     , & ! coeff for tauw 
+         vrel     , & ! coeff for tauw
          bx       , & ! b vector
          by       , & ! b vector
          diagx    , & ! Diagonal (x component) of the matrix A
@@ -761,7 +761,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          rhs_tri  , & ! right hand side vector for matrix-vector product
          coeffs       ! coeffs used to combine previous solutions
 
-      real (kind=dbl_kind) :: & 
+      real (kind=dbl_kind) :: &
          ! tol         , & ! tolerance for fixed point convergence: reltol_andacc * (initial fixed point residual norm)  [unused for now]
          tol_nl      , & ! tolerance for nonlinear convergence: reltol_nonlin * (initial nonlinear residual norm)
          fpres_norm  , & ! norm of current fixed point residual : f(x) = g(x) - x
@@ -786,7 +786,7 @@ subroutine anderson_solver (icellt,   icellu,  &
       !$OMP END PARALLEL DO
       
       ! Start iterations
-      do it_nl = 0, maxits_nonlin        ! nonlinear iteration loop 
+      do it_nl = 0, maxits_nonlin        ! nonlinear iteration loop
          ! Compute quantities needed for computing PDE residual and g(x) (fixed point map)
          !-----------------------------------------------------------------
          ! Calc zetaD, Pr, Cb and vrel = f(uprev_k, vprev_k)
@@ -805,31 +805,31 @@ subroutine anderson_solver (icellt,   icellu,  &
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
             
             call calc_zeta_Pr (nx_block           , ny_block,           &
-                               icellt(iblk),                            & 
-                               indxti   (:,iblk)  , indxtj(:,iblk),     & 
-                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), & 
-                               dxt      (:,:,iblk), dyt   (:,:,iblk),   & 
-                               dxhy     (:,:,iblk), dyhx  (:,:,iblk),   & 
-                               cxp      (:,:,iblk), cyp   (:,:,iblk),   & 
-                               cxm      (:,:,iblk), cym   (:,:,iblk),   & 
-                               tinyarea (:,:,iblk),                     & 
+                               icellt(iblk),                            &
+                               indxti   (:,iblk)  , indxtj(:,iblk),     &
+                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
+                               dxt      (:,:,iblk), dyt   (:,:,iblk),   &
+                               dxhy     (:,:,iblk), dyhx  (:,:,iblk),   &
+                               cxp      (:,:,iblk), cyp   (:,:,iblk),   &
+                               cxm      (:,:,iblk), cym   (:,:,iblk),   &
+                               tinyarea (:,:,iblk),                     &
                                strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
-                               stPrtmp  (:,:,:) )                      
+                               stPrtmp  (:,:,:))
             
             call calc_vrel_Cb (nx_block           , ny_block,           &
-                               icellu       (iblk), Cdn_ocn (:,:,iblk), & 
+                               icellu       (iblk), Cdn_ocn (:,:,iblk), &
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                aiu      (:,:,iblk), Tbu     (:,:,iblk), &
-                               uocn     (:,:,iblk), vocn    (:,:,iblk), &     
+                               uocn     (:,:,iblk), vocn    (:,:,iblk), &
                                ulin     (:,:,iblk), vlin    (:,:,iblk), &
                                vrel     (:,:,iblk), Cb      (:,:,iblk))
             
             ! prepare b vector (RHS)
             call calc_bvec (nx_block           , ny_block,           &
-                            icellu       (iblk),                     & 
+                            icellu       (iblk),                     &
                             indxui     (:,iblk), indxuj    (:,iblk), &
                             stPrtmp  (:,:,:)   , uarear  (:,:,iblk), &
-                            waterx   (:,:,iblk), watery  (:,:,iblk), & 
+                            waterx   (:,:,iblk), watery  (:,:,iblk), &
                             ulin     (:,:,iblk), vlin    (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk), &
@@ -851,7 +851,7 @@ subroutine anderson_solver (icellt,   icellu,  &
                          uarear   (:,:,iblk)  ,                      &
                          Au       (:,:,iblk)  , Av       (:,:,iblk))
             call residual_vec (nx_block           , ny_block,           &
-                               icellu       (iblk),                     & 
+                               icellu       (iblk),                     &
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                bx       (:,:,iblk), by      (:,:,iblk), &
                                Au       (:,:,iblk), Av      (:,:,iblk), &
@@ -894,17 +894,17 @@ subroutine anderson_solver (icellt,   icellu,  &
                   call formDiag_step1 (nx_block           , ny_block,       & ! D term due to rheology
                                        icellu       (iblk),                 &
                                        indxui     (:,iblk), indxuj(:,iblk), &
-                                       dxt      (:,:,iblk), dyt (:,:,iblk), & 
-                                       dxhy     (:,:,iblk), dyhx(:,:,iblk), & 
-                                       cxp      (:,:,iblk), cyp (:,:,iblk), & 
-                                       cxm      (:,:,iblk), cym (:,:,iblk), & 
-                                       zetaD (:,:,iblk,:) , Dstrtmp (:,:,:) )
+                                       dxt      (:,:,iblk), dyt (:,:,iblk), &
+                                       dxhy     (:,:,iblk), dyhx(:,:,iblk), &
+                                       cxp      (:,:,iblk), cyp (:,:,iblk), &
+                                       cxm      (:,:,iblk), cym (:,:,iblk), &
+                                       zetaD (:,:,iblk,:) , Dstrtmp (:,:,:))
                   call formDiag_step2 (nx_block           , ny_block,           &
-                                       icellu       (iblk),                     & 
+                                       icellu       (iblk),                     &
                                        indxui     (:,iblk), indxuj    (:,iblk), &
                                        Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
-                                       umassdti (:,:,iblk),                     & 
-                                       uarear   (:,:,iblk), Cb      (:,:,iblk), & 
+                                       umassdti (:,:,iblk),                     &
+                                       uarear   (:,:,iblk), Cb      (:,:,iblk), &
                                        diagx    (:,:,iblk), diagy   (:,:,iblk))
                enddo
                !$OMP END PARALLEL DO
@@ -965,12 +965,12 @@ subroutine anderson_solver (icellt,   icellu,  &
          !    tol = reltol_andacc*fpres_norm
          ! endif
          ! 
-         ! ! Check residual 
+         ! ! Check residual
          ! if (fpres_norm < tol) then
          !    exit
          ! endif
 
-         if (im_andacc == 0 .or. it_nl < start_andacc) then 
+         if (im_andacc == 0 .or. it_nl < start_andacc) then
             ! Simple fixed point (Picard) iteration in this case
             sol = fpfunc
          else
@@ -1056,7 +1056,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          !     Put vector sol in uvel and vvel arrays
          !-----------------------------------------------------------------------
          call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     & 
+                             max_blocks, icellu (:), ntot,     &
                              indxui    (:,:), indxuj(:,:),     &
                              sol (:),                          &
                              uvel (:,:,:), vvel (:,:,:))
@@ -1070,7 +1070,7 @@ subroutine anderson_solver (icellt,   icellu,  &
             fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
             fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
             call calc_L2norm_squared (nx_block        , ny_block,         &
-                                      icellu    (iblk),                   & 
+                                      icellu    (iblk),                   &
                                       indxui  (:,iblk), indxuj  (:,iblk), &
                                       fpresx(:,:,iblk), fpresy(:,:,iblk), &
                                       L2norm    (iblk))
@@ -1088,27 +1088,27 @@ end subroutine anderson_solver
 
 !=======================================================================
 
-! Computes the viscous coefficients (in fact zetaD=2*zeta) and dPr/dx. 
-
-      subroutine calc_zeta_Pr  (nx_block,   ny_block,   & 
-                                icellt,                 & 
-                                indxti,     indxtj,     & 
-                                uvel,       vvel,       & 
-                                dxt,        dyt,        & 
-                                dxhy,       dyhx,       & 
-                                cxp,        cyp,        & 
-                                cxm,        cym,        & 
-                                tinyarea,               & 
+! Computes the viscous coefficients (in fact zetaD=2*zeta) and dPr/dx.
+
+      subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
+                                icellt,                 &
+                                indxti,     indxtj,     &
+                                uvel,       vvel,       &
+                                dxt,        dyt,        &
+                                dxhy,       dyhx,       &
+                                cxp,        cyp,        &
+                                cxm,        cym,        &
+                                tinyarea,               &
                                 strength,   zetaD,      &
                                 stPr)
 
       use ice_dyn_shared, only: strain_rates
 
-      integer (kind=int_kind), intent(in) :: & 
+      integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellt                ! no. of cells where icetmask = 1
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
          indxti   , & ! compressed index in i-direction
          indxtj       ! compressed index in j-direction
@@ -1127,11 +1127,11 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
          cxm      , & ! 0.5*HTN - 1.5*HTN
          tinyarea     ! min_strain_rate*tarea
          
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
          intent(out) :: &
          zetaD          ! 2*zeta
          
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
          intent(out) :: &
          stPr          ! stress Pr combinations
 
@@ -1150,7 +1150,7 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
         stressp_1, stressp_2, stressp_3, stressp_4    , &
         strp_tmp
         
-      logical :: capping ! of the viscous coeff  
+      logical :: capping ! of the viscous coeff
 
       character(len=*), parameter :: subname = '(calc_zeta_Pr)'
 
@@ -1201,7 +1201,6 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
           zetaD(i,j,2) = strength(i,j)/(Deltanw + tinyarea(i,j))
           zetaD(i,j,3) = strength(i,j)/(Deltasw + tinyarea(i,j))
           zetaD(i,j,4) = strength(i,j)/(Deltase + tinyarea(i,j))
-          
          
          endif
          
@@ -1279,7 +1278,7 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
 
       enddo                     ! ij
 
-      end subroutine calc_zeta_Pr      
+      end subroutine calc_zeta_Pr
       
 !=======================================================================
 
@@ -1302,11 +1301,11 @@ subroutine stress_vp (nx_block,   ny_block,   &
 
       use ice_dyn_shared, only: strain_rates
 
-      integer (kind=int_kind), intent(in) :: & 
+      integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellt                ! no. of cells where icetmask = 1
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
          indxti   , & ! compressed index in i-direction
          indxtj       ! compressed index in j-direction
@@ -1321,11 +1320,11 @@ subroutine stress_vp (nx_block,   ny_block,   &
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm          ! 0.5*HTN - 1.5*HTN
          
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
          intent(in) :: &
-         zetaD          ! 2*zeta   
+         zetaD          ! 2*zeta
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
          stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22
          stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
@@ -1521,13 +1520,13 @@ subroutine matvec (nx_block,   ny_block, &
                          icellu,     icellt ,  &
                          indxui,     indxuj,   &
                          indxti,     indxtj,   &
-                         dxt,        dyt,      & 
-                         dxhy,       dyhx,     & 
-                         cxp,        cyp,      & 
-                         cxm,        cym,      & 
+                         dxt,        dyt,      &
+                         dxhy,       dyhx,     &
+                         cxp,        cyp,      &
+                         cxm,        cym,      &
                          uvel,       vvel,     &
                          vrel,       Cb,       &
-                         zetaD,                & 
+                         zetaD,                &
                          umassdti,   fm,       &
                          uarear,               &
                          Au,         Av)
@@ -1566,9 +1565,9 @@ subroutine matvec (nx_block,   ny_block, &
          fm      , & ! Coriolis param. * mass in U-cell (kg/s)
          uarear      ! 1/uarea
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
          intent(in) :: &
-         zetaD          ! 2*zeta   
+         zetaD          ! 2*zeta
          
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
@@ -1581,7 +1580,7 @@ subroutine matvec (nx_block,   ny_block, &
          i, j, ij
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
-         str    
+         str
          
       real (kind=dbl_kind) :: &
          utp, vtp          , & ! utp = uvel, vtp = vvel
@@ -1606,7 +1605,7 @@ subroutine matvec (nx_block,   ny_block, &
       real (kind=dbl_kind) :: &
          stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22 (without Pr)
          stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
-         stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12        
+         stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12
 
       character(len=*), parameter :: subname = '(matvec)'
 
@@ -1793,15 +1792,15 @@ subroutine matvec (nx_block,   ny_block, &
 
       end subroutine matvec
 
-!=======================================================================      
+!=======================================================================
 
 ! Compute the constant component of b(u,v) i.e. the part of b(u,v) that
 ! does not depend on (u,v) and thus do not change during the nonlinear iteration
 
-     subroutine calc_bfix  (nx_block,   ny_block,   & 
-                            icellu,                 & 
-                            indxui,     indxuj,     & 
-                            umassdti,               & 
+     subroutine calc_bfix  (nx_block,   ny_block,   &
+                            icellu,                 &
+                            indxui,     indxuj,     &
+                            umassdti,               &
                             forcex,     forcey,     &
                             uvel_init,  vvel_init,  &
                             bxfix,      byfix)
@@ -1810,12 +1809,12 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
          nx_block, ny_block, & ! block dimensions
          icellu                ! no. of cells where iceumask = 1
          
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
          indxui   , & ! compressed index in i-direction
          indxuj       ! compressed index in j-direction
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(in) :: &
          uvel_init,& ! x-component of velocity (m/s), beginning of time step
          vvel_init,& ! y-component of velocity (m/s), beginning of time step
@@ -1823,7 +1822,7 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
          forcex  , & ! work array: combined atm stress and ocn tilt, x
          forcey      ! work array: combined atm stress and ocn tilt, y
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), & 
+      real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(out) :: &
          bxfix   , & ! bx = taux + bxfix
          byfix       ! by = tauy + byfix
@@ -2011,20 +2010,20 @@ end subroutine residual_vec
 ! Form the diagonal of the matrix A(u,v) (first part of the computation)
 ! Part 1: compute the contributions of the diagonal to the rheology term
 
-      subroutine formDiag_step1  (nx_block,   ny_block,   & 
-                                  icellu,                 & 
-                                  indxui,     indxuj,     & 
-                                  dxt,        dyt,        & 
-                                  dxhy,       dyhx,       & 
-                                  cxp,        cyp,        & 
-                                  cxm,        cym,        & 
+      subroutine formDiag_step1  (nx_block,   ny_block,   &
+                                  icellu,                 &
+                                  indxui,     indxuj,     &
+                                  dxt,        dyt,        &
+                                  dxhy,       dyhx,       &
+                                  cxp,        cyp,        &
+                                  cxm,        cym,        &
                                   zetaD,      Dstr )
 
-      integer (kind=int_kind), intent(in) :: & 
+      integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellu                ! no. of cells where icetmask = 1
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), & 
+      integer (kind=int_kind), dimension (nx_block*ny_block), &
          intent(in) :: &
          indxui   , & ! compressed index in i-direction
          indxuj       ! compressed index in j-direction
@@ -2039,14 +2038,14 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm          ! 0.5*HTN - 1.5*HTN
          
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), & 
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
          intent(in) :: &
-         zetaD          ! 2*zeta      
+         zetaD          ! 2*zeta
          
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), & 
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
          intent(out) :: &
-         Dstr          ! intermediate calc for diagonal components of matrix A associated 
-                       ! with rheology term         
+         Dstr          ! intermediate calc for diagonal components of matrix A associated
+                       ! with rheology term
 
       ! local variables
 
@@ -2081,16 +2080,16 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
 !cdir nodep      !NEC
 !ocl novrec      !Fujitsu
 
-      Dstr(:,:,:) = c0 ! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8 
-                       ! come from the surrounding T cells but are all refrerenced to the i,j (u point)  
+      Dstr(:,:,:) = c0 ! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8
+                       ! come from the surrounding T cells but are all refrerenced to the i,j (u point)
        
  !      Dstr(i,j,1) corresponds to str(i,j,1)
  !      Dstr(i,j,2) corresponds to str(i+1,j,2)
- !      Dstr(i,j,3) corresponds to str(i,j+1,3) 
+ !      Dstr(i,j,3) corresponds to str(i,j+1,3)
  !      Dstr(i,j,4) corresponds to str(i+1,j+1,4))
- !      Dstr(i,j,5) corresponds to str(i,j,5) 
+ !      Dstr(i,j,5) corresponds to str(i,j,5)
  !      Dstr(i,j,6) corresponds to str(i,j+1,6)
- !      Dstr(i,j,7) corresponds to str(i+1,j,7) 
+ !      Dstr(i,j,7) corresponds to str(i+1,j,7)
  !      Dstr(i,j,8) corresponds to str(i+1,j+1,8))
              
       do cc=1, 8 ! 4 for u and 4 for v
@@ -2183,7 +2182,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
         vi1j1 = c1
         di    = 1
         dj    = 1
-       endif 
+       endif
        
       do ij = 1, icellu
       
@@ -2308,7 +2307,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          Dstr(iu,ju,1) = -strp_tmp - strm_tmp - str12ew &
               + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
               
-         elseif (cc .eq. 2) then ! T cell i+1,j     
+         elseif (cc .eq. 2) then ! T cell i+1,j
           
           strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
           strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
@@ -2317,7 +2316,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          Dstr(iu,ju,2) = strp_tmp + strm_tmp - str12we &
               + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
 
-         elseif (cc .eq. 3) then ! T cell i,j+1        
+         elseif (cc .eq. 3) then ! T cell i,j+1
               
          strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
          strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
@@ -2326,10 +2325,10 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          Dstr(iu,ju,3) = -strp_tmp - strm_tmp + str12ew &
               + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
 
-         elseif (cc .eq. 4) then ! T cell i+1,j+1     
+         elseif (cc .eq. 4) then ! T cell i+1,j+1
               
          strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)     
+         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
               
          ! southwest (i+1,j+1)
          Dstr(iu,ju,4) = strp_tmp + strm_tmp + str12we &
@@ -2339,7 +2338,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
       ! for dF/dy (v momentum)
       !-----------------------------------------------------------------
          
-         elseif (cc .eq. 5) then ! T cell i,j 
+         elseif (cc .eq. 5) then ! T cell i,j
          
          strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
          strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
@@ -2348,16 +2347,16 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          Dstr(iu,ju,5) = -strp_tmp + strm_tmp - str12ns &
               - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
 
-         elseif (cc .eq. 6) then ! T cell i,j+1      
+         elseif (cc .eq. 6) then ! T cell i,j+1
               
          strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)              
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
               
          ! southeast (i,j+1)
          Dstr(iu,ju,6) = strp_tmp - strm_tmp - str12sn &
               - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
 
-         elseif (cc .eq. 7) then ! T cell i,j+1     
+         elseif (cc .eq. 7) then ! T cell i,j+1
               
          strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
          strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
@@ -2366,22 +2365,22 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          Dstr(iu,ju,7) = -strp_tmp + strm_tmp + str12ns &
               - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
 
-         elseif (cc .eq. 8) then ! T cell i+1,j+1        
+         elseif (cc .eq. 8) then ! T cell i+1,j+1
               
          strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)     
+         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
               
          ! southwest (i+1,j+1)
          Dstr(iu,ju,8) = strp_tmp - strm_tmp + str12sn &
               - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
               
-         endif     
+         endif
 
       enddo                     ! ij
       
       enddo                     ! cc
 
-      end subroutine formDiag_step1      
+      end subroutine formDiag_step1
       
       
 !=======================================================================
@@ -2439,16 +2438,16 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
       strintx=c0
       strinty=c0
          
-! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8 
-! come from the surrounding T cells but are all refrerenced to the i,j (u point)  
+! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8
+! come from the surrounding T cells but are all refrerenced to the i,j (u point)
        
  !      Dstr(i,j,1) corresponds to str(i,j,1)
  !      Dstr(i,j,2) corresponds to str(i+1,j,2)
- !      Dstr(i,j,3) corresponds to str(i,j+1,3) 
+ !      Dstr(i,j,3) corresponds to str(i,j+1,3)
  !      Dstr(i,j,4) corresponds to str(i+1,j+1,4))
- !      Dstr(i,j,5) corresponds to str(i,j,5) 
+ !      Dstr(i,j,5) corresponds to str(i,j,5)
  !      Dstr(i,j,6) corresponds to str(i,j+1,6)
- !      Dstr(i,j,7) corresponds to str(i+1,j,7) 
+ !      Dstr(i,j,7) corresponds to str(i+1,j,7)
  !      Dstr(i,j,8) corresponds to str(i+1,j+1,8))
          
       do ij =1, icellu
@@ -2467,7 +2466,7 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
          
       enddo                     ! ij
 
-      end subroutine formDiag_step2     
+      end subroutine formDiag_step2
       
 !=======================================================================
 
@@ -2536,7 +2535,7 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
          ntot                  ! size of problem for fgmres
 
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
-         icellu          
+         icellu
          
       integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
          intent(in) :: &
@@ -2545,7 +2544,7 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block, max_blocks), intent(in) :: &
          tpu     , & ! x-component of vector
-         tpv         ! y-component of vector         
+         tpv         ! y-component of vector
          
       real (kind=dbl_kind), dimension (ntot), intent(out) :: &
          outvec
@@ -2596,7 +2595,7 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
          ntot                  ! size of problem for fgmres
 
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
-         icellu          
+         icellu
          
       integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
          intent(in) :: &
@@ -2604,7 +2603,7 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
          indxuj      ! compressed index in j-direction
          
       real (kind=dbl_kind), dimension (ntot), intent(in) :: &
-         invec         
+         invec
 
       real (kind=dbl_kind), dimension (nx_block,ny_block, max_blocks), intent(out) :: &
          tpu     , & ! x-component of vector
@@ -2639,7 +2638,6 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
 
       end subroutine vec_to_arrays
       
-      
 !=======================================================================
 
 ! Update Q and R factor after deletion of the 1st column of G_diff
@@ -2694,7 +2692,7 @@ end subroutine qr_delete
 
 !=======================================================================
 
-! FGMRES: Flexible generalized minimum residual method (with restarts). 
+! FGMRES: Flexible generalized minimum residual method (with restarts).
 ! Solves the linear system A x = b using GMRES with a varying (right) preconditioner
 !
 ! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
@@ -2706,7 +2704,7 @@ subroutine fgmres (zetaD,               &
                          bx,         by,      &
                          diagx,      diagy,   &
                          tolerance, maxinner, &
-                         maxouter,            &                         
+                         maxouter,            &
                          solx,       soly,    &
                          nbiter,     conv)
 
@@ -2714,7 +2712,7 @@ subroutine fgmres (zetaD,               &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
-         vrel  , & ! coefficient for tauw 
+         vrel  , & ! coefficient for tauw
          Cb    , & ! seabed stress coefficient
          umassdti  ! mass of U-cell/dte (kg/m^2 s)
 
@@ -2781,7 +2779,7 @@ subroutine fgmres (zetaD,               &
          it, k, ii, jj       ! reusable loop counters
 
       real (kind=dbl_kind), dimension(maxinner+1) :: &
-         rot_cos         , & ! cosine elements of Givens rotations 
+         rot_cos         , & ! cosine elements of Givens rotations
          rot_sin         , & ! sine elements of Givens rotations
          rhs_hess            ! right hand side vector of the Hessenberg (least squares) system
 
@@ -2830,7 +2828,7 @@ subroutine fgmres (zetaD,               &
                       uarear     (:,:,iblk)  ,                        &
                       workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
          call residual_vec (nx_block             , ny_block             , &
-                            icellu         (iblk),                        & 
+                            icellu         (iblk),                        &
                             indxui       (:,iblk), indxuj    (:,iblk)   , &
                             bx         (:,:,iblk), by      (:,:,iblk)   , &
                             workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
@@ -2852,7 +2850,7 @@ subroutine fgmres (zetaD,               &
                                      norm_squared(iblk))
 
          enddo
-         !$OMP END PARALLEL DO 
+         !$OMP END PARALLEL DO
          norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
          
          if (my_task == master_task .and. monitor_fgmres) then
@@ -2951,7 +2949,7 @@ subroutine fgmres (zetaD,               &
                                         arnoldi_basis_y(:,:,iblk, nextit), &
                                         norm_squared(iblk))
             enddo
-            !$OMP END PARALLEL DO 
+            !$OMP END PARALLEL DO
             hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
             
             ! Watch out for happy breakdown
@@ -2968,7 +2966,7 @@ subroutine fgmres (zetaD,               &
                      arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
                   enddo ! ij
                enddo
-               !$OMP END PARALLEL DO 
+               !$OMP END PARALLEL DO
             end if
             
             ! Apply previous Givens rotation to the last column of the Hessenberg matrix
@@ -3050,14 +3048,14 @@ subroutine fgmres (zetaD,               &
          ! \begin{equation}
          !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
          ! \end{equation}
-         ! where : 
+         ! where :
          ! $r$ is the residual
          ! $V_{m+1}$ is a matrix whose columns are the Arnoldi vectors from 1 to nextit (m+1)
          ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
          ! $gamma_{m+1}$ is the last element of rhs_hess
          ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
          
-         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
+         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1},
          ! store the result in rhs_hess
          do it = 1, initer
             jj = nextit - it + 1
@@ -3089,7 +3087,7 @@ end subroutine fgmres
 
 !=======================================================================
 
-! PGMRES: Right-preconditioned generalized minimum residual method (with restarts). 
+! PGMRES: Right-preconditioned generalized minimum residual method (with restarts).
 ! Solves the linear A x = b using GMRES with a right preconditioner
 !
 ! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
@@ -3108,7 +3106,7 @@ subroutine pgmres (zetaD,                &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
-         vrel  , & ! coefficient for tauw 
+         vrel  , & ! coefficient for tauw
          Cb    , & ! seabed stress coefficient
          umassdti  ! mass of U-cell/dte (kg/m^2 s)
 
@@ -3170,7 +3168,7 @@ subroutine pgmres (zetaD,                &
          it, k, ii, jj       ! reusable loop counters
 
       real (kind=dbl_kind), dimension(maxinner+1) :: &
-         rot_cos         , & ! cosine elements of Givens rotations 
+         rot_cos         , & ! cosine elements of Givens rotations
          rot_sin         , & ! sine elements of Givens rotations
          rhs_hess            ! right hand side vector of the Hessenberg (least squares) system
 
@@ -3221,7 +3219,7 @@ subroutine pgmres (zetaD,                &
                       uarear     (:,:,iblk)  ,                        &
                       workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
          call residual_vec (nx_block             , ny_block             , &
-                            icellu         (iblk),                        & 
+                            icellu         (iblk),                        &
                             indxui       (:,iblk), indxuj    (:,iblk)   , &
                             bx         (:,:,iblk), by      (:,:,iblk)   , &
                             workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
@@ -3243,7 +3241,7 @@ subroutine pgmres (zetaD,                &
                                      norm_squared(iblk))
 
          enddo
-         !$OMP END PARALLEL DO 
+         !$OMP END PARALLEL DO
          norm_residual = sqrt(global_sum(sum(norm_squared), distrb_info))
          
          if (my_task == master_task .and. monitor_pgmres) then
@@ -3337,7 +3335,7 @@ subroutine pgmres (zetaD,                &
                                         arnoldi_basis_y(:,:,iblk, nextit), &
                                         norm_squared(iblk))
             enddo
-            !$OMP END PARALLEL DO 
+            !$OMP END PARALLEL DO
             hessenberg(nextit,initer) = sqrt(global_sum(sum(norm_squared), distrb_info))
             
             ! Watch out for happy breakdown
@@ -3354,7 +3352,7 @@ subroutine pgmres (zetaD,                &
                      arnoldi_basis_y(i, j, iblk, nextit) = arnoldi_basis_y(i, j, iblk, nextit)*inverse_norm
                   enddo ! ij
                enddo
-               !$OMP END PARALLEL DO 
+               !$OMP END PARALLEL DO
             end if
             
             ! Apply previous Givens rotation to the last column of the Hessenberg matrix
@@ -3450,14 +3448,14 @@ subroutine pgmres (zetaD,                &
          ! \begin{equation}
          !    r =  V_{m+1} * Q_m^T * (\gamma_{m+1} * e_{m+1})
          ! \end{equation}
-         ! where : 
+         ! where :
          ! $r$ is the residual
          ! $V_{m+1}$ is a matrix whose columns are the Arnoldi vectors from 1 to nextit (m+1)
          ! $Q_m$ is the product of the Givens rotation : Q_m = G_m G_{m-1} ... G_1
          ! $gamma_{m+1}$ is the last element of rhs_hess
          ! $e_{m+1})$ is the unit vector (0, 0, ..., 1)^T \in \reals^{m+1}
          
-         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1}, 
+         ! Apply the Givens rotation in reverse order to g := \gamma_{m+1} * e_{m+1},
          ! store the result in rhs_hess
          do it = 1, initer
             jj = nextit - it + 1
@@ -3505,7 +3503,7 @@ subroutine precondition(zetaD,                &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks), intent(in) :: &
-         vrel  , & ! coefficient for tauw 
+         vrel  , & ! coefficient for tauw
          Cb    , & ! seabed stress coefficient
          umassdti  ! mass of U-cell/dte (kg/m^2 s)
 
@@ -3562,7 +3560,7 @@ subroutine precondition(zetaD,                &
                wy(i,j,iblk) = vy(i,j,iblk) / diagy(i,j,iblk)
             enddo ! ij
          enddo
-         !$OMP END PARALLEL DO 
+         !$OMP END PARALLEL DO
       elseif (precond_type == 'pgmres') then ! PGMRES (Jacobi-preconditioned GMRES)
          ! Initialize preconditioned vector to 0 !phb try with wx = vx or vx/diagx
          wx = c0
@@ -3642,7 +3640,7 @@ subroutine orthogonalize(ortho_type     , initer         , &
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
                   
-                  local_dot(iblk) = local_dot(iblk) + & 
+                  local_dot(iblk) = local_dot(iblk) + &
                                     (arnoldi_basis_x(i, j, iblk, it) * arnoldi_basis_x(i, j, iblk, nextit)) + &
                                     (arnoldi_basis_y(i, j, iblk, it) * arnoldi_basis_y(i, j, iblk, nextit))
                enddo ! ij
@@ -3668,7 +3666,7 @@ subroutine orthogonalize(ortho_type     , initer         , &
                                                         - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
                enddo ! ij
             enddo
-            !$OMP END PARALLEL DO 
+            !$OMP END PARALLEL DO
          end do
       elseif (trim(ortho_type) == 'mgs') then ! Modified Gram-Schmidt
          ! Modified Gram-Schmidt orthogonalisation process
@@ -3702,7 +3700,7 @@ subroutine orthogonalize(ortho_type     , initer         , &
                                                         - hessenberg(it,initer) * arnoldi_basis_y(i, j, iblk, it)
                enddo ! ij
             enddo
-            !$OMP END PARALLEL DO 
+            !$OMP END PARALLEL DO
          end do
       else
          call abort_ice(error_message='wrong orthonalization in ' // subname, &

From 0c3677f6fbe5654398836976f36cb74c072b7eec Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Jul 2020 12:18:05 -0400
Subject: [PATCH 174/196] ice_dyn_vp: uniformize indentation and whitespace

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 896 +++++++++++-----------
 1 file changed, 441 insertions(+), 455 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index f45a9158f..da10fd011 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -112,7 +112,7 @@ module ice_dyn_vp
 ! author: Philippe Blain, ECCC
 
       subroutine init_vp (dt)
-      
+
       use ice_blocks, only: get_block, block
       use ice_boundary, only: ice_HaloUpdate
       use ice_constants, only: c1, &
@@ -120,22 +120,22 @@ subroutine init_vp (dt)
       use ice_domain, only: blocks_ice, halo_info
       use ice_dyn_shared, only: init_evp
       use ice_grid, only: tarea, tinyarea
-      
+
       real (kind=dbl_kind), intent(in) :: &
          dt      ! time step
-      
+
       ! local variables
-      
+
       integer (kind=int_kind) :: &
          i, j, iblk, &
          ilo,ihi,jlo,jhi      ! beginning and end of physical domain
 
       type (block) :: &
          this_block           ! block information for current block
-         
+
       real (kind=dbl_kind) :: &
          min_strain_rate = 2e-09_dbl_kind      ! used for recomputing tinyarea
-      
+
       ! Initialize variables shared with evp
       call init_evp(dt)
       
@@ -169,6 +169,7 @@ subroutine init_vp (dt)
                            fillValue=c1)
       
       end subroutine init_vp
+
 !=======================================================================
 
 ! Viscous-plastic dynamics driver
@@ -248,12 +249,12 @@ subroutine imp_solver (dt)
 
       type (block) :: &
          this_block           ! block information for current block
-      
+
       real (kind=dbl_kind), allocatable :: &
          sol(:)          ! solution vector
-      
+
       character(len=*), parameter :: subname = '(imp_solver)'
-      
+
       call ice_timer_start(timer_dynamics) ! dynamics
 
       !-----------------------------------------------------------------
@@ -401,12 +402,12 @@ subroutine imp_solver (dt)
             i = indxti(ij, iblk)
             j = indxtj(ij, iblk)
             call icepack_ice_strength (ncat,                 &
-                                      aice    (i,j,  iblk), &
-                                      vice    (i,j,  iblk), &
-                                      aice0   (i,j,  iblk), &
-                                      aicen   (i,j,:,iblk), &
-                                      vicen   (i,j,:,iblk), &
-                                      strength(i,j,  iblk) )
+                                       aice    (i,j,  iblk), &
+                                       vice    (i,j,  iblk), &
+                                       aice0   (i,j,  iblk), &
+                                       aicen   (i,j,:,iblk), &
+                                       vicen   (i,j,:,iblk), &
+                                       strength(i,j,  iblk))
          enddo  ! ij
 
       enddo  ! iblk
@@ -454,11 +455,11 @@ subroutine imp_solver (dt)
       ! calc size of problem (ntot) and allocate arrays and vectors
       !-----------------------------------------------------------------
       
-      ntot=0
+      ntot = 0
       do iblk = 1, nblocks
-        ntot = ntot + icellu(iblk)
+         ntot = ntot + icellu(iblk)
       enddo
-      ntot = 2*ntot ! times 2 because of u and v
+      ntot = 2 * ntot ! times 2 because of u and v
       
       allocate(sol(ntot))
       
@@ -749,14 +750,14 @@ subroutine anderson_solver (icellt,   icellu,  &
          fpfunc     , & ! current value of fixed point function
          fpfunc_old , & ! previous value of fixed point function
          tmp            ! temporary vector for BLAS calls
-      
+
       real (kind=dbl_kind), dimension(ntot,im_andacc) :: &
          Q        , & ! Q factor for QR factorization of F (residuals) matrix
          G_diff       ! Matrix containing the differences of g(x) (fixed point function) evaluations
-      
+
       real (kind=dbl_kind), dimension(im_andacc,im_andacc) :: &
          R            ! R factor for QR factorization of F (residuals) matrix
-      
+
       real (kind=dbl_kind), dimension(im_andacc) :: &
          rhs_tri  , & ! right hand side vector for matrix-vector product
          coeffs       ! coeffs used to combine previous solutions
@@ -773,7 +774,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          conv            ! needed for FGMRES !phb keep ?
 
       character(len=*), parameter :: subname = '(anderson_solver)'
-      
+
       ! Initialization
       res_num = 0
       L2norm  = c0
@@ -1126,11 +1127,11 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm      , & ! 0.5*HTN - 1.5*HTN
          tinyarea     ! min_strain_rate*tarea
-         
+
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
          intent(out) :: &
          zetaD          ! 2*zeta
-         
+
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
          intent(out) :: &
          stPr          ! stress Pr combinations
@@ -1149,7 +1150,7 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
         csigpne, csigpnw, csigpsw, csigpse            , &
         stressp_1, stressp_2, stressp_3, stressp_4    , &
         strp_tmp
-        
+
       logical :: capping ! of the viscous coeff
 
       character(len=*), parameter :: subname = '(calc_zeta_Pr)'
@@ -1189,19 +1190,15 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
                             Deltase,    Deltasw     )
 
          if (capping) then
-         
-          zetaD(i,j,1) = strength(i,j)/max(Deltane,tinyarea(i,j))
-          zetaD(i,j,2) = strength(i,j)/max(Deltanw,tinyarea(i,j))
-          zetaD(i,j,3) = strength(i,j)/max(Deltasw,tinyarea(i,j))
-          zetaD(i,j,4) = strength(i,j)/max(Deltase,tinyarea(i,j))
-          
+            zetaD(i,j,1) = strength(i,j)/max(Deltane,tinyarea(i,j))
+            zetaD(i,j,2) = strength(i,j)/max(Deltanw,tinyarea(i,j))
+            zetaD(i,j,3) = strength(i,j)/max(Deltasw,tinyarea(i,j))
+            zetaD(i,j,4) = strength(i,j)/max(Deltase,tinyarea(i,j))
          else
-
-          zetaD(i,j,1) = strength(i,j)/(Deltane + tinyarea(i,j))
-          zetaD(i,j,2) = strength(i,j)/(Deltanw + tinyarea(i,j))
-          zetaD(i,j,3) = strength(i,j)/(Deltasw + tinyarea(i,j))
-          zetaD(i,j,4) = strength(i,j)/(Deltase + tinyarea(i,j))
-         
+            zetaD(i,j,1) = strength(i,j)/(Deltane + tinyarea(i,j))
+            zetaD(i,j,2) = strength(i,j)/(Deltanw + tinyarea(i,j))
+            zetaD(i,j,3) = strength(i,j)/(Deltasw + tinyarea(i,j))
+            zetaD(i,j,4) = strength(i,j)/(Deltase + tinyarea(i,j))
          endif
          
       !-----------------------------------------------------------------
@@ -1279,7 +1276,7 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
       enddo                     ! ij
 
       end subroutine calc_zeta_Pr
-      
+
 !=======================================================================
 
 ! Computes the VP stress (as diagnostic)
@@ -1319,7 +1316,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm          ! 0.5*HTN - 1.5*HTN
-         
+
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
          intent(in) :: &
          zetaD          ! 2*zeta
@@ -1389,7 +1386,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
       enddo                     ! ij
 
       end subroutine stress_vp
-      
+
 !=======================================================================
 
 ! Compute vrel and basal stress coefficients
@@ -1437,7 +1434,7 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
 
       real (kind=dbl_kind) :: &
          rhow                  !
-         
+
       character(len=*), parameter :: subname = '(calc_vrel_Cb)'
       
       !-----------------------------------------------------------------
@@ -1449,7 +1446,7 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
       if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
          file=__FILE__, line=__LINE__)
 
-      do ij =1, icellu
+      do ij = 1, icellu
          i = indxui(ij)
          j = indxuj(ij)
 
@@ -1458,7 +1455,6 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
                                                 (vocn(i,j) - vvel(i,j))**2)  ! m/s
       
          Cb(i,j)  = Tbu(i,j) / (sqrt(uvel(i,j)**2 + vvel(i,j)**2) + u0) ! for basal stress
-
       enddo                     ! ij
 
       end subroutine calc_vrel_Cb
@@ -1499,17 +1495,16 @@ subroutine calc_seabed_stress (nx_block  , ny_block, &
 
       character(len=*), parameter :: subname = '(calc_seabed_stress)'
 
-      do ij =1, icellu
+      do ij = 1, icellu
          i = indxui(ij)
          j = indxuj(ij)
          
          taubx(i,j) = -uvel(i,j)*Cb(i,j)
          tauby(i,j) = -vvel(i,j)*Cb(i,j)
-
       enddo                     ! ij
 
       end subroutine calc_seabed_stress
-      
+
 !=======================================================================
 
 ! Computes the matrix vector product A(u,v) * (u,v)
@@ -1554,7 +1549,7 @@ subroutine matvec (nx_block,   ny_block, &
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm          ! 0.5*HTN - 1.5*HTN
-         
+
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(in) :: &
          uvel    , & ! x-component of velocity (m/s)
@@ -1568,7 +1563,7 @@ subroutine matvec (nx_block,   ny_block, &
       real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
          intent(in) :: &
          zetaD          ! 2*zeta
-         
+
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(inout) :: &
          Au      , & ! matvec, Fx = bx - Au (N/m^2)
@@ -1581,7 +1576,7 @@ subroutine matvec (nx_block,   ny_block, &
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          str
-         
+
       real (kind=dbl_kind) :: &
          utp, vtp          , & ! utp = uvel, vtp = vvel
          ccaimp,ccb        , & ! intermediate variables
@@ -1601,7 +1596,7 @@ subroutine matvec (nx_block,   ny_block, &
         csig12ne, csig12nw, csig12se, csig12sw    , &
         str12ew, str12we, str12ns, str12sn        , &
         strp_tmp, strm_tmp
-        
+
       real (kind=dbl_kind) :: &
          stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22 (without Pr)
          stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
@@ -1761,14 +1756,14 @@ subroutine matvec (nx_block,   ny_block, &
          ! southwest (i+1,j+1)
          str(i,j,8) = strp_tmp - strm_tmp + str12sn &
               - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
-  
+
       enddo ! ij - icellt
-         
+      
       !-----------------------------------------------------------------
       ! Form Au and Av
       !-----------------------------------------------------------------
 
-      do ij =1, icellu
+      do ij = 1, icellu
          i = indxui(ij)
          j = indxuj(ij)
 
@@ -1776,7 +1771,7 @@ subroutine matvec (nx_block,   ny_block, &
          vtp = vvel(i,j)
 
          ccaimp = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) ! kg/m^2 s
-               
+         
          ccb = fm(i,j) + sign(c1,fm(i,j)) * vrel(i,j) * sinw ! kg/m^2 s
 
          ! divergence of the internal stress tensor
@@ -1787,8 +1782,7 @@ subroutine matvec (nx_block,   ny_block, &
 
          Au(i,j) = ccaimp*utp - ccb*vtp - strintx
          Av(i,j) = ccaimp*vtp + ccb*utp - strinty
-         
-      enddo ! ij - icellu               
+      enddo ! ij - icellu
 
       end subroutine matvec
 
@@ -1844,11 +1838,10 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
 
          bxfix(i,j) = umassdti(i,j)*uvel_init(i,j) + forcex(i,j)
          byfix(i,j) = umassdti(i,j)*vvel_init(i,j) + forcey(i,j)
-         
       enddo
 
-      end subroutine calc_bfix            
-      
+      end subroutine calc_bfix
+
 !=======================================================================
 
 ! Compute the vector b(u,v), i.e. the part of the nonlinear function F(u,v)
@@ -1856,14 +1849,14 @@ end subroutine calc_bfix
 ! depending on (u,v)
 
       subroutine calc_bvec (nx_block,   ny_block, &
-                       icellu,               &
-                       indxui,     indxuj,   &
-                       stPr,       uarear,   &
-                       waterx,     watery,   &
-                       uvel,       vvel,     &
-                       bxfix,      byfix,    &
-                       bx,         by,       &
-                       vrel)
+                            icellu,               &
+                            indxui,     indxuj,   &
+                            stPr,       uarear,   &
+                            waterx,     watery,   &
+                            uvel,       vvel,     &
+                            bxfix,      byfix,    &
+                            bx,         by,       &
+                            vrel)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1883,16 +1876,16 @@ subroutine calc_bvec (nx_block,   ny_block, &
          bxfix   , & ! bx = taux + bxfix
          byfix   , & ! by = tauy + byfix
          vrel        ! relative ice-ocean velocity
-         
+
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
          intent(in) :: &
          stPr
-         
+
       real (kind=dbl_kind), dimension (nx_block,ny_block), &
          intent(out) :: &
          bx      , & ! b vector, bx = taux + bxfix (N/m^2)
          by          ! b vector, by = tauy + byfix (N/m^2)
-         
+
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -1903,7 +1896,7 @@ subroutine calc_bvec (nx_block,   ny_block, &
          taux, tauy        , & ! part of ocean stress term
          strintx, strinty  , & ! divergence of the internal stress tensor (only Pr part)
          rhow                  !
-         
+
       character(len=*), parameter :: subname = '(calc_bvec)'
       
       !-----------------------------------------------------------------
@@ -1915,7 +1908,7 @@ subroutine calc_bvec (nx_block,   ny_block, &
       if (icepack_warnings_aborted()) call abort_ice(error_message=subname, &
          file=__FILE__, line=__LINE__)
 
-      do ij =1, icellu
+      do ij = 1, icellu
          i = indxui(ij)
          j = indxuj(ij)
 
@@ -1934,11 +1927,10 @@ subroutine calc_bvec (nx_block,   ny_block, &
 
          bx(i,j) = bxfix(i,j) + taux + strintx
          by(i,j) = byfix(i,j) + tauy + strinty
-         
       enddo                     ! ij
 
       end subroutine calc_bvec
-      
+
 !=======================================================================
 
 ! Compute the non linear residual F(u,v) = b(u,v) - A(u,v) * (u,v),
@@ -1991,8 +1983,8 @@ subroutine residual_vec (nx_block,   ny_block, &
       if (present(sum_squared)) then
          sum_squared = c0
       endif
-         
-      do ij =1, icellu
+      
+      do ij = 1, icellu
          i = indxui(ij)
          j = indxuj(ij)
 
@@ -2004,7 +1996,7 @@ subroutine residual_vec (nx_block,   ny_block, &
       enddo                     ! ij
 
       end subroutine residual_vec
-      
+
 !=======================================================================
 
 ! Form the diagonal of the matrix A(u,v) (first part of the computation)
@@ -2017,7 +2009,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
                                   dxhy,       dyhx,       &
                                   cxp,        cyp,        &
                                   cxm,        cym,        &
-                                  zetaD,      Dstr )
+                                  zetaD,      Dstr)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -2082,307 +2074,306 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
 
       Dstr(:,:,:) = c0 ! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8
                        ! come from the surrounding T cells but are all refrerenced to the i,j (u point)
-       
- !      Dstr(i,j,1) corresponds to str(i,j,1)
- !      Dstr(i,j,2) corresponds to str(i+1,j,2)
- !      Dstr(i,j,3) corresponds to str(i,j+1,3)
- !      Dstr(i,j,4) corresponds to str(i+1,j+1,4))
- !      Dstr(i,j,5) corresponds to str(i,j,5)
- !      Dstr(i,j,6) corresponds to str(i,j+1,6)
- !      Dstr(i,j,7) corresponds to str(i+1,j,7)
- !      Dstr(i,j,8) corresponds to str(i+1,j+1,8))
-             
-      do cc=1, 8 ! 4 for u and 4 for v
       
-       if (cc .eq. 1) then     ! u comp, T cell i,j
-        uij   = c1
-        ui1j  = c0
-        uij1  = c0
-        ui1j1 = c0
-        vij   = c0
-        vi1j  = c0
-        vij1  = c0
-        vi1j1 = c0
-        di    = 0
-        dj    = 0
-       elseif (cc .eq. 2) then ! u comp, T cell i+1,j
-        uij   = c0
-        ui1j  = c1
-        uij1  = c0
-        ui1j1 = c0
-        vij   = c0
-        vi1j  = c0
-        vij1  = c0
-        vi1j1 = c0
-        di    = 1
-        dj    = 0
-       elseif (cc .eq. 3) then ! u comp, T cell i,j+1
-        uij   = c0
-        ui1j  = c0
-        uij1  = c1
-        ui1j1 = c0
-        vij   = c0
-        vi1j  = c0
-        vij1  = c0
-        vi1j1 = c0
-        di    = 0
-        dj    = 1
-       elseif (cc .eq. 4) then ! u comp, T cell i+1,j+1
-        uij   = c0
-        ui1j  = c0
-        uij1  = c0
-        ui1j1 = c1
-        vij   = c0
-        vi1j  = c0
-        vij1  = c0
-        vi1j1 = c0
-        di    = 1
-        dj    = 1
-       elseif (cc .eq. 5) then ! v comp, T cell i,j
-        uij   = c0
-        ui1j  = c0
-        uij1  = c0
-        ui1j1 = c0
-        vij   = c1
-        vi1j  = c0
-        vij1  = c0
-        vi1j1 = c0
-        di    = 0
-        dj    = 0
-       elseif (cc .eq. 6) then ! v comp, T cell i,j+1
-        uij   = c0
-        ui1j  = c0
-        uij1  = c0
-        ui1j1 = c0
-        vij   = c0
-        vi1j  = c0
-        vij1  = c1
-        vi1j1 = c0
-        di    = 0
-        dj    = 1
-       elseif (cc .eq. 7) then ! v comp, T cell i+1,j
-        uij   = c0
-        ui1j  = c0
-        uij1  = c0
-        ui1j1 = c0
-        vij   = c0
-        vi1j  = c1
-        vij1  = c0
-        vi1j1 = c0
-        di    = 1
-        dj    = 0
-       elseif (cc .eq. 8) then ! v comp, T cell i+1,j+1
-        uij   = c0
-        ui1j  = c0
-        uij1  = c0
-        ui1j1 = c0
-        vij   = c0
-        vi1j  = c0
-        vij1  = c0
-        vi1j1 = c1
-        di    = 1
-        dj    = 1
-       endif
-       
-      do ij = 1, icellu
-      
-         iu = indxui(ij)
-         ju = indxuj(ij)
-         i=iu+di
-         j=ju+dj
-          
-      !-----------------------------------------------------------------
-      ! strain rates
-      ! NOTE these are actually strain rates * area  (m^2/s)
-      !-----------------------------------------------------------------
-         ! divergence  =  e_11 + e_22
-         divune    = cyp(i,j)*uij - dyt(i,j)*ui1j &
-                   + cxp(i,j)*vij - dxt(i,j)*vij1
-         divunw    = cym(i,j)*ui1j + dyt(i,j)*uij &
-                   + cxp(i,j)*vi1j - dxt(i,j)*vi1j1
-         divusw    = cym(i,j)*ui1j1 + dyt(i,j)*uij1 &
-                   + cxm(i,j)*vi1j1 + dxt(i,j)*vi1j
-         divuse    = cyp(i,j)*uij1 - dyt(i,j)*ui1j1 &
-                   + cxm(i,j)*vij1 + dxt(i,j)*vij
-
-         ! tension strain rate  =  e_11 - e_22
-         tensionne = -cym(i,j)*uij - dyt(i,j)*ui1j &
-                   +  cxm(i,j)*vij + dxt(i,j)*vij1
-         tensionnw = -cyp(i,j)*ui1j + dyt(i,j)*uij &
-                   +  cxm(i,j)*vi1j + dxt(i,j)*vi1j1
-         tensionsw = -cyp(i,j)*ui1j1 + dyt(i,j)*uij1 &
-                   +  cxp(i,j)*vi1j1 - dxt(i,j)*vi1j
-         tensionse = -cym(i,j)*uij1  - dyt(i,j)*ui1j1 &
-                   +  cxp(i,j)*vij1 - dxt(i,j)*vij
-
-         ! shearing strain rate  =  2*e_12
-         shearne = -cym(i,j)*vij - dyt(i,j)*vi1j &
-                 -  cxm(i,j)*uij - dxt(i,j)*uij1
-         shearnw = -cyp(i,j)*vi1j + dyt(i,j)*vij &
-                 -  cxm(i,j)*ui1j - dxt(i,j)*ui1j1
-         shearsw = -cyp(i,j)*vi1j1 + dyt(i,j)*vij1 &
-                 -  cxp(i,j)*ui1j1 + dxt(i,j)*ui1j
-         shearse = -cym(i,j)*vij1 - dyt(i,j)*vi1j1 &
-                 -  cxp(i,j)*uij1 + dxt(i,j)*uij
-         
-      !-----------------------------------------------------------------
-      ! the stresses                            ! kg/s^2
-      ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
-      !-----------------------------------------------------------------
-         
-         stressp_1 = zetaD(i,j,1)*divune*(c1+Ktens)
-         stressp_2 = zetaD(i,j,2)*divunw*(c1+Ktens)
-         stressp_3 = zetaD(i,j,3)*divusw*(c1+Ktens)
-         stressp_4 = zetaD(i,j,4)*divuse*(c1+Ktens)
-         
-         stressm_1 = zetaD(i,j,1)*tensionne*(c1+Ktens)*ecci
-         stressm_2 = zetaD(i,j,2)*tensionnw*(c1+Ktens)*ecci
-         stressm_3 = zetaD(i,j,3)*tensionsw*(c1+Ktens)*ecci
-         stressm_4 = zetaD(i,j,4)*tensionse*(c1+Ktens)*ecci
-                          
-         stress12_1 = zetaD(i,j,1)*shearne*p5*(c1+Ktens)*ecci
-         stress12_2 = zetaD(i,j,2)*shearnw*p5*(c1+Ktens)*ecci
-         stress12_3 = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
-         stress12_4 = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
-
-      !-----------------------------------------------------------------
-      ! combinations of the stresses for the momentum equation ! kg/s^2
-      !-----------------------------------------------------------------
+      ! Dstr(i,j,1) corresponds to str(i,j,1)
+      ! Dstr(i,j,2) corresponds to str(i+1,j,2)
+      ! Dstr(i,j,3) corresponds to str(i,j+1,3)
+      ! Dstr(i,j,4) corresponds to str(i+1,j+1,4))
+      ! Dstr(i,j,5) corresponds to str(i,j,5)
+      ! Dstr(i,j,6) corresponds to str(i,j+1,6)
+      ! Dstr(i,j,7) corresponds to str(i+1,j,7)
+      ! Dstr(i,j,8) corresponds to str(i+1,j+1,8))
+      
+      do cc = 1, 8 ! 4 for u and 4 for v
+      
+         if (cc .eq. 1) then     ! u comp, T cell i,j
+            uij   = c1
+            ui1j  = c0
+            uij1  = c0
+            ui1j1 = c0
+            vij   = c0
+            vi1j  = c0
+            vij1  = c0
+            vi1j1 = c0
+            di    = 0
+            dj    = 0
+         elseif (cc .eq. 2) then ! u comp, T cell i+1,j
+            uij   = c0
+            ui1j  = c1
+            uij1  = c0
+            ui1j1 = c0
+            vij   = c0
+            vi1j  = c0
+            vij1  = c0
+            vi1j1 = c0
+            di    = 1
+            dj    = 0
+         elseif (cc .eq. 3) then ! u comp, T cell i,j+1
+            uij   = c0
+            ui1j  = c0
+            uij1  = c1
+            ui1j1 = c0
+            vij   = c0
+            vi1j  = c0
+            vij1  = c0
+            vi1j1 = c0
+            di    = 0
+            dj    = 1
+         elseif (cc .eq. 4) then ! u comp, T cell i+1,j+1
+            uij   = c0
+            ui1j  = c0
+            uij1  = c0
+            ui1j1 = c1
+            vij   = c0
+            vi1j  = c0
+            vij1  = c0
+            vi1j1 = c0
+            di    = 1
+            dj    = 1
+         elseif (cc .eq. 5) then ! v comp, T cell i,j
+            uij   = c0
+            ui1j  = c0
+            uij1  = c0
+            ui1j1 = c0
+            vij   = c1
+            vi1j  = c0
+            vij1  = c0
+            vi1j1 = c0
+            di    = 0
+            dj    = 0
+         elseif (cc .eq. 6) then ! v comp, T cell i,j+1
+            uij   = c0
+            ui1j  = c0
+            uij1  = c0
+            ui1j1 = c0
+            vij   = c0
+            vi1j  = c0
+            vij1  = c1
+            vi1j1 = c0
+            di    = 0
+            dj    = 1
+         elseif (cc .eq. 7) then ! v comp, T cell i+1,j
+            uij   = c0
+            ui1j  = c0
+            uij1  = c0
+            ui1j1 = c0
+            vij   = c0
+            vi1j  = c1
+            vij1  = c0
+            vi1j1 = c0
+            di    = 1
+            dj    = 0
+         elseif (cc .eq. 8) then ! v comp, T cell i+1,j+1
+            uij   = c0
+            ui1j  = c0
+            uij1  = c0
+            ui1j1 = c0
+            vij   = c0
+            vi1j  = c0
+            vij1  = c0
+            vi1j1 = c1
+            di    = 1
+            dj    = 1
+         endif
 
-         ssigpn  = stressp_1 + stressp_2
-         ssigps  = stressp_3 + stressp_4
-         ssigpe  = stressp_1 + stressp_4
-         ssigpw  = stressp_2 + stressp_3
-         ssigp1  =(stressp_1 + stressp_3)*p055
-         ssigp2  =(stressp_2 + stressp_4)*p055
+         do ij = 1, icellu
+         
+            iu = indxui(ij)
+            ju = indxuj(ij)
+            i  = iu + di
+            j  = ju + dj
+             
+         !-----------------------------------------------------------------
+         ! strain rates
+         ! NOTE these are actually strain rates * area  (m^2/s)
+         !-----------------------------------------------------------------
+            ! divergence  =  e_11 + e_22
+            divune    = cyp(i,j)*uij   - dyt(i,j)*ui1j  &
+                      + cxp(i,j)*vij   - dxt(i,j)*vij1
+            divunw    = cym(i,j)*ui1j  + dyt(i,j)*uij   &
+                      + cxp(i,j)*vi1j  - dxt(i,j)*vi1j1
+            divusw    = cym(i,j)*ui1j1 + dyt(i,j)*uij1  &
+                      + cxm(i,j)*vi1j1 + dxt(i,j)*vi1j
+            divuse    = cyp(i,j)*uij1  - dyt(i,j)*ui1j1 &
+                      + cxm(i,j)*vij1  + dxt(i,j)*vij
+
+            ! tension strain rate  =  e_11 - e_22
+            tensionne = -cym(i,j)*uij   - dyt(i,j)*ui1j  &
+                      +  cxm(i,j)*vij   + dxt(i,j)*vij1
+            tensionnw = -cyp(i,j)*ui1j  + dyt(i,j)*uij   &
+                      +  cxm(i,j)*vi1j  + dxt(i,j)*vi1j1
+            tensionsw = -cyp(i,j)*ui1j1 + dyt(i,j)*uij1  &
+                      +  cxp(i,j)*vi1j1 - dxt(i,j)*vi1j
+            tensionse = -cym(i,j)*uij1  - dyt(i,j)*ui1j1 &
+                      +  cxp(i,j)*vij1  - dxt(i,j)*vij
+
+            ! shearing strain rate  =  2*e_12
+            shearne = -cym(i,j)*vij   - dyt(i,j)*vi1j  &
+                    -  cxm(i,j)*uij   - dxt(i,j)*uij1
+            shearnw = -cyp(i,j)*vi1j  + dyt(i,j)*vij   &
+                    -  cxm(i,j)*ui1j  - dxt(i,j)*ui1j1
+            shearsw = -cyp(i,j)*vi1j1 + dyt(i,j)*vij1  &
+                    -  cxp(i,j)*ui1j1 + dxt(i,j)*ui1j
+            shearse = -cym(i,j)*vij1  - dyt(i,j)*vi1j1 &
+                    -  cxp(i,j)*uij1  + dxt(i,j)*uij
+            
+         !-----------------------------------------------------------------
+         ! the stresses                            ! kg/s^2
+         ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
+         !-----------------------------------------------------------------
+            
+            stressp_1 = zetaD(i,j,1)*divune*(c1+Ktens)
+            stressp_2 = zetaD(i,j,2)*divunw*(c1+Ktens)
+            stressp_3 = zetaD(i,j,3)*divusw*(c1+Ktens)
+            stressp_4 = zetaD(i,j,4)*divuse*(c1+Ktens)
+            
+            stressm_1 = zetaD(i,j,1)*tensionne*(c1+Ktens)*ecci
+            stressm_2 = zetaD(i,j,2)*tensionnw*(c1+Ktens)*ecci
+            stressm_3 = zetaD(i,j,3)*tensionsw*(c1+Ktens)*ecci
+            stressm_4 = zetaD(i,j,4)*tensionse*(c1+Ktens)*ecci
+            
+            stress12_1 = zetaD(i,j,1)*shearne*p5*(c1+Ktens)*ecci
+            stress12_2 = zetaD(i,j,2)*shearnw*p5*(c1+Ktens)*ecci
+            stress12_3 = zetaD(i,j,3)*shearsw*p5*(c1+Ktens)*ecci
+            stress12_4 = zetaD(i,j,4)*shearse*p5*(c1+Ktens)*ecci
 
-         ssigmn  = stressm_1 + stressm_2
-         ssigms  = stressm_3 + stressm_4
-         ssigme  = stressm_1 + stressm_4
-         ssigmw  = stressm_2 + stressm_3
-         ssigm1  =(stressm_1 + stressm_3)*p055
-         ssigm2  =(stressm_2 + stressm_4)*p055
+         !-----------------------------------------------------------------
+         ! combinations of the stresses for the momentum equation ! kg/s^2
+         !-----------------------------------------------------------------
 
-         ssig12n = stress12_1 + stress12_2
-         ssig12s = stress12_3 + stress12_4
-         ssig12e = stress12_1 + stress12_4
-         ssig12w = stress12_2 + stress12_3
-         ssig121 =(stress12_1 + stress12_3)*p111
-         ssig122 =(stress12_2 + stress12_4)*p111
+            ssigpn  = stressp_1 + stressp_2
+            ssigps  = stressp_3 + stressp_4
+            ssigpe  = stressp_1 + stressp_4
+            ssigpw  = stressp_2 + stressp_3
+            ssigp1  =(stressp_1 + stressp_3)*p055
+            ssigp2  =(stressp_2 + stressp_4)*p055
+
+            ssigmn  = stressm_1 + stressm_2
+            ssigms  = stressm_3 + stressm_4
+            ssigme  = stressm_1 + stressm_4
+            ssigmw  = stressm_2 + stressm_3
+            ssigm1  =(stressm_1 + stressm_3)*p055
+            ssigm2  =(stressm_2 + stressm_4)*p055
+
+            ssig12n = stress12_1 + stress12_2
+            ssig12s = stress12_3 + stress12_4
+            ssig12e = stress12_1 + stress12_4
+            ssig12w = stress12_2 + stress12_3
+            ssig121 =(stress12_1 + stress12_3)*p111
+            ssig122 =(stress12_2 + stress12_4)*p111
+
+            csigpne = p111*stressp_1 + ssigp2 + p027*stressp_3
+            csigpnw = p111*stressp_2 + ssigp1 + p027*stressp_4
+            csigpsw = p111*stressp_3 + ssigp2 + p027*stressp_1
+            csigpse = p111*stressp_4 + ssigp1 + p027*stressp_2
+            
+            csigmne = p111*stressm_1 + ssigm2 + p027*stressm_3
+            csigmnw = p111*stressm_2 + ssigm1 + p027*stressm_4
+            csigmsw = p111*stressm_3 + ssigm2 + p027*stressm_1
+            csigmse = p111*stressm_4 + ssigm1 + p027*stressm_2
+            
+            csig12ne = p222*stress12_1 + ssig122 &
+                     + p055*stress12_3
+            csig12nw = p222*stress12_2 + ssig121 &
+                     + p055*stress12_4
+            csig12sw = p222*stress12_3 + ssig122 &
+                     + p055*stress12_1
+            csig12se = p222*stress12_4 + ssig121 &
+                     + p055*stress12_2
+
+            str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
+            str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
+            str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
+            str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
 
-         csigpne = p111*stressp_1 + ssigp2 + p027*stressp_3
-         csigpnw = p111*stressp_2 + ssigp1 + p027*stressp_4
-         csigpsw = p111*stressp_3 + ssigp2 + p027*stressp_1
-         csigpse = p111*stressp_4 + ssigp1 + p027*stressp_2
-         
-         csigmne = p111*stressm_1 + ssigm2 + p027*stressm_3
-         csigmnw = p111*stressm_2 + ssigm1 + p027*stressm_4
-         csigmsw = p111*stressm_3 + ssigm2 + p027*stressm_1
-         csigmse = p111*stressm_4 + ssigm1 + p027*stressm_2
+         !-----------------------------------------------------------------
+         ! for dF/dx (u momentum)
+         !-----------------------------------------------------------------
          
-         csig12ne = p222*stress12_1 + ssig122 &
-                  + p055*stress12_3
-         csig12nw = p222*stress12_2 + ssig121 &
-                  + p055*stress12_4
-         csig12sw = p222*stress12_3 + ssig122 &
-                  + p055*stress12_1
-         csig12se = p222*stress12_4 + ssig121 &
-                  + p055*stress12_2
-
-         str12ew = p5*dxt(i,j)*(p333*ssig12e + p166*ssig12w)
-         str12we = p5*dxt(i,j)*(p333*ssig12w + p166*ssig12e)
-         str12ns = p5*dyt(i,j)*(p333*ssig12n + p166*ssig12s)
-         str12sn = p5*dyt(i,j)*(p333*ssig12s + p166*ssig12n)
+            if (cc .eq. 1) then ! T cell i,j
+            
+               strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+               strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
 
-      !-----------------------------------------------------------------
-      ! for dF/dx (u momentum)
-      !-----------------------------------------------------------------
-      
-         if (cc .eq. 1) then ! T cell i,j
-         
-          strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
-          strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+               ! northeast (i,j)
+               Dstr(iu,ju,1) = -strp_tmp - strm_tmp - str12ew &
+                  + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
+               
+            elseif (cc .eq. 2) then ! T cell i+1,j
+               
+               strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
+               strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
+               
+               ! northwest (i+1,j)
+               Dstr(iu,ju,2) = strp_tmp + strm_tmp - str12we &
+                  + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
 
-         ! northeast (i,j)
-         Dstr(iu,ju,1) = -strp_tmp - strm_tmp - str12ew &
-              + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
-              
-         elseif (cc .eq. 2) then ! T cell i+1,j
-          
-          strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
-          strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
-          
-         ! northwest (i+1,j)
-         Dstr(iu,ju,2) = strp_tmp + strm_tmp - str12we &
-              + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
+            elseif (cc .eq. 3) then ! T cell i,j+1
+               
+               strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+               strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
 
-         elseif (cc .eq. 3) then ! T cell i,j+1
-              
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
+               ! southeast (i,j+1)
+               Dstr(iu,ju,3) = -strp_tmp - strm_tmp + str12ew &
+                  + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
 
-         ! southeast (i,j+1)
-         Dstr(iu,ju,3) = -strp_tmp - strm_tmp + str12ew &
-              + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
+            elseif (cc .eq. 4) then ! T cell i+1,j+1
+                 
+               strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
+               strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
+               
+               ! southwest (i+1,j+1)
+               Dstr(iu,ju,4) = strp_tmp + strm_tmp + str12we &
+                  + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
 
-         elseif (cc .eq. 4) then ! T cell i+1,j+1
-              
-         strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
-         strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
-              
-         ! southwest (i+1,j+1)
-         Dstr(iu,ju,4) = strp_tmp + strm_tmp + str12we &
-              + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
+         !-----------------------------------------------------------------
+         ! for dF/dy (v momentum)
+         !-----------------------------------------------------------------
+            
+            elseif (cc .eq. 5) then ! T cell i,j
+               
+               strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+               strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
 
-      !-----------------------------------------------------------------
-      ! for dF/dy (v momentum)
-      !-----------------------------------------------------------------
-         
-         elseif (cc .eq. 5) then ! T cell i,j
-         
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
+               ! northeast (i,j)
+               Dstr(iu,ju,5) = -strp_tmp + strm_tmp - str12ns &
+                  - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
 
-         ! northeast (i,j)
-         Dstr(iu,ju,5) = -strp_tmp + strm_tmp - str12ns &
-              - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
+            elseif (cc .eq. 6) then ! T cell i,j+1
+               
+               strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
+               strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
+               
+               ! southeast (i,j+1)
+               Dstr(iu,ju,6) = strp_tmp - strm_tmp - str12sn &
+                  - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
 
-         elseif (cc .eq. 6) then ! T cell i,j+1
-              
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
-              
-         ! southeast (i,j+1)
-         Dstr(iu,ju,6) = strp_tmp - strm_tmp - str12sn &
-              - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
+            elseif (cc .eq. 7) then ! T cell i,j+1
+               
+               strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+               strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
 
-         elseif (cc .eq. 7) then ! T cell i,j+1
-              
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
+               ! northwest (i+1,j)
+               Dstr(iu,ju,7) = -strp_tmp + strm_tmp + str12ns &
+                  - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
 
-         ! northwest (i+1,j)
-         Dstr(iu,ju,7) = -strp_tmp + strm_tmp + str12ns &
-              - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
+            elseif (cc .eq. 8) then ! T cell i+1,j+1
+               
+               strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
+               strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
+               
+               ! southwest (i+1,j+1)
+               Dstr(iu,ju,8) = strp_tmp - strm_tmp + str12sn &
+                  - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
+               
+            endif
 
-         elseif (cc .eq. 8) then ! T cell i+1,j+1
-              
-         strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
-         strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
-              
-         ! southwest (i+1,j+1)
-         Dstr(iu,ju,8) = strp_tmp - strm_tmp + str12sn &
-              - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
-              
-         endif
+         enddo                     ! ij
 
-      enddo                     ! ij
-      
       enddo                     ! cc
 
       end subroutine formDiag_step1
-      
-      
+
 !=======================================================================
 
 ! Form the diagonal of the matrix A(u,v) (second part of the computation)
@@ -2394,7 +2385,7 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
                                  Dstr,       vrel,     &
                                  umassdti,             &
                                  uarear,     Cb,       &
-                                 diagx,      diagy )
+                                 diagx,      diagy)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -2435,22 +2426,22 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
       ! integrate the momentum equation
       !-----------------------------------------------------------------
 
-      strintx=c0
-      strinty=c0
-         
-! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8
-! come from the surrounding T cells but are all refrerenced to the i,j (u point)
-       
- !      Dstr(i,j,1) corresponds to str(i,j,1)
- !      Dstr(i,j,2) corresponds to str(i+1,j,2)
- !      Dstr(i,j,3) corresponds to str(i,j+1,3)
- !      Dstr(i,j,4) corresponds to str(i+1,j+1,4))
- !      Dstr(i,j,5) corresponds to str(i,j,5)
- !      Dstr(i,j,6) corresponds to str(i,j+1,6)
- !      Dstr(i,j,7) corresponds to str(i+1,j,7)
- !      Dstr(i,j,8) corresponds to str(i+1,j+1,8))
-         
-      do ij =1, icellu
+      strintx = c0
+      strinty = c0
+      
+      ! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8
+      ! come from the surrounding T cells but are all refrerenced to the i,j (u point)
+
+      ! Dstr(i,j,1) corresponds to str(i,j,1)
+      ! Dstr(i,j,2) corresponds to str(i+1,j,2)
+      ! Dstr(i,j,3) corresponds to str(i,j+1,3)
+      ! Dstr(i,j,4) corresponds to str(i+1,j+1,4))
+      ! Dstr(i,j,5) corresponds to str(i,j,5)
+      ! Dstr(i,j,6) corresponds to str(i,j+1,6)
+      ! Dstr(i,j,7) corresponds to str(i+1,j,7)
+      ! Dstr(i,j,8) corresponds to str(i+1,j+1,8))
+      
+      do ij = 1, icellu
          i = indxui(ij)
          j = indxuj(ij)
 
@@ -2463,11 +2454,10 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
 
          diagx(i,j) = ccaimp - strintx
          diagy(i,j) = ccaimp - strinty
-         
       enddo                     ! ij
 
       end subroutine formDiag_step2
-      
+
 !=======================================================================
 
 ! Compute squared l^2 norm of a grid function (tpu,tpv)
@@ -2505,18 +2495,17 @@ subroutine calc_L2norm_squared (nx_block,   ny_block, &
       ! compute squared l^2 norm of vector grid function (tpu,tpv)
       !-----------------------------------------------------------------
 
-     L2norm = c0
+      L2norm = c0
       
-      do ij =1, icellu
+      do ij = 1, icellu
          i = indxui(ij)
          j = indxuj(ij)
          
          L2norm = L2norm + tpu(i,j)**2 + tpv(i,j)**2
-         
       enddo ! ij
       
       end subroutine calc_L2norm_squared
-      
+
 !=======================================================================
 
 ! Convert a grid function (tpu,tpv) to a one dimensional vector
@@ -2536,7 +2525,7 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
 
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
          icellu
-         
+
       integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
          intent(in) :: &
          indxui  , & ! compressed index in i-direction
@@ -2545,7 +2534,7 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
       real (kind=dbl_kind), dimension (nx_block,ny_block, max_blocks), intent(in) :: &
          tpu     , & ! x-component of vector
          tpv         ! y-component of vector
-         
+
       real (kind=dbl_kind), dimension (ntot), intent(out) :: &
          outvec
 
@@ -2553,7 +2542,6 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
 
       integer (kind=int_kind) :: &
          i, j, iblk, tot, ij
-         
 
       character(len=*), parameter :: subname = '(arrays_to_vec)'
 
@@ -2561,22 +2549,22 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
       ! form vector (converts from max_blocks arrays to single vector
       !-----------------------------------------------------------------
 
-      outvec(:)=c0
-      tot=0
+      outvec(:) = c0
+      tot = 0
       
-      do iblk=1, nblocks
-       do ij =1, icellu(iblk)
-          i = indxui(ij,iblk)
-          j = indxuj(ij,iblk)
-          tot=tot+1
-          outvec(tot)=tpu(i,j,iblk)
-          tot=tot+1
-          outvec(tot)=tpv(i,j,iblk)
-       enddo
-      enddo! ij
+      do iblk = 1, nblocks
+         do ij = 1, icellu(iblk)
+            i = indxui(ij, iblk)
+            j = indxuj(ij, iblk)
+            tot = tot + 1
+            outvec(tot) = tpu(i, j, iblk)
+            tot = tot + 1
+            outvec(tot) = tpv(i, j, iblk)
+         enddo
+      enddo ! ij
 
       end subroutine arrays_to_vec
-      
+
 !=======================================================================
 
 ! Convert one dimensional vector received from the legacy FGMRES driver
@@ -2596,7 +2584,7 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
 
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
          icellu
-         
+
       integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
          intent(in) :: &
          indxui  , & ! compressed index in i-direction
@@ -2607,13 +2595,12 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
 
       real (kind=dbl_kind), dimension (nx_block,ny_block, max_blocks), intent(out) :: &
          tpu     , & ! x-component of vector
-         tpv         ! y-component of vector         
-         
+         tpv         ! y-component of vector
+
       ! local variables
 
       integer (kind=int_kind) :: &
          i, j, iblk, tot, ij
-         
 
       character(len=*), parameter :: subname = '(vec_to_arrays)'
 
@@ -2621,23 +2608,23 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
       ! form arrays (converts from vector to the max_blocks arrays
       !-----------------------------------------------------------------
 
-      tpu(:,:,:)=c0
-      tpv(:,:,:)=c0
-      tot=0
+      tpu(:,:,:) = c0
+      tpv(:,:,:) = c0
+      tot = 0
       
-      do iblk=1, nblocks
-       do ij =1, icellu(iblk)
-          i = indxui(ij,iblk)
-          j = indxuj(ij,iblk)
-          tot=tot+1
-          tpu(i,j,iblk)=invec(tot)
-          tot=tot+1
-          tpv(i,j,iblk)=invec(tot)
-       enddo
+      do iblk = 1, nblocks
+         do ij = 1, icellu(iblk)
+            i = indxui(ij, iblk)
+            j = indxuj(ij, iblk)
+            tot = tot + 1
+            tpu(i, j, iblk) = invec(tot)
+            tot = tot + 1
+            tpv(i, j, iblk) = invec(tot)
+         enddo
       enddo! ij
 
       end subroutine vec_to_arrays
-      
+
 !=======================================================================
 
 ! Update Q and R factor after deletion of the 1st column of G_diff
@@ -2649,7 +2636,7 @@ end subroutine vec_to_arrays
 !   [Online]. Available: https://users.wpi.edu/~walker/Papers/anderson_accn_algs_imps.pdf
 
       subroutine qr_delete(Q, R)
-      
+
       real (kind=dbl_kind), intent(inout) :: &
          Q(:,:),  & ! Q factor
          R(:,:)     ! R factor
@@ -2659,34 +2646,34 @@ subroutine qr_delete(Q, R)
       integer (kind=int_kind) :: &
          i, j, k, & ! loop indices
          m, n       ! size of Q matrix
-         
+
       real (kind=dbl_kind) :: &
          temp, c, s
-               
+
       character(len=*), parameter :: subname = '(qr_delete)'
       
-      n = size(Q,1)
-      m = size(Q,2)
+      n = size(Q, 1)
+      m = size(Q, 2)
       do i = 1, m-1
-         temp = sqrt(R(i,i+1)**2 + R(i+1,i+1)**2)
-         c = R(i,i+1)/temp
-         s = R(i+1,i+1)/temp
-         R(i,i+1) = temp
-         R(i+1,i+1) = 0
+         temp = sqrt(R(i, i+1)**2 + R(i+1, i+1)**2)
+         c = R(i  , i+1) / temp
+         s = R(i+1, i+1) / temp
+         R(i  , i+1) = temp
+         R(i+1, i+1) = 0
          if (i < m-1) then
             do j = i+2, m
-               temp = c*R(i,j) + s*R(i+1,j)
-               R(i+1,j) = -s*R(i,j) + c*R(i+1,j)
-               R(i,j) = temp
+               temp      =  c*R(i, j) + s*R(i+1, j)
+               R(i+1, j) = -s*R(i, j) + c*R(i+1, j)
+               R(i  , j) = temp
             enddo
          endif
          do k = 1, n
-            temp = c*Q(k,i) + s*Q(k,i+1);
-            Q(k,i+1) = -s*Q(k,i) + c*Q(k,i+1);
-            Q(k,i) = temp
+            temp      =  c*Q(k, i) + s*Q(k, i+1);
+            Q(k, i+1) = -s*Q(k, i) + c*Q(k, i+1);
+            Q(k, i)   = temp
          enddo
       enddo
-      R(:,1:m-1) = R(:,2:m)
+      R(:, 1:m-1) = R(:, 2:m)
       
       end subroutine qr_delete
 
@@ -2751,7 +2738,7 @@ subroutine fgmres (zetaD,               &
          iblk    , & ! block index
          ij      , & ! index for indx[t|u][i|j]
          i, j        ! grid indices
-         
+
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
          workspace_x , & ! work vector (x components)
          workspace_y     ! work vector (y components)
@@ -2867,7 +2854,7 @@ subroutine fgmres (zetaD,               &
          inverse_norm = c1 / norm_residual
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
-            do ij =1, icellu(iblk)
+            do ij = 1, icellu(iblk)
                i = indxui(ij, iblk)
                j = indxuj(ij, iblk)
 
@@ -2889,7 +2876,7 @@ subroutine fgmres (zetaD,               &
          rhs_hess(2:) = c0
          
          initer = 0
-      
+         
          ! Start of inner (Arnoldi) loop
          do
             
@@ -2938,7 +2925,6 @@ subroutine fgmres (zetaD,               &
                                arnoldi_basis_x, arnoldi_basis_y, &
                                hessenberg)
             
-            
             ! Compute norm of new Arnoldi vector and update Hessenberg matrix
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
@@ -2958,7 +2944,7 @@ subroutine fgmres (zetaD,               &
                inverse_norm = c1 / hessenberg(nextit,initer)
                !$OMP PARALLEL DO PRIVATE(iblk)
                do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
+                  do ij = 1, icellu(iblk)
                      i = indxui(ij, iblk)
                      j = indxuj(ij, iblk)
                      
@@ -3004,7 +2990,7 @@ subroutine fgmres (zetaD,               &
             endif
             
          end do ! end of inner (Arnoldi) loop
-      
+         
          ! At this point either the maximum number of inner iterations
          ! was reached or the absolute residual is below the scaled tolerance.
 
@@ -3025,7 +3011,7 @@ subroutine fgmres (zetaD,               &
             t = rhs_hess(it)
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
+               do ij = 1, icellu(iblk)
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
 
@@ -3069,7 +3055,7 @@ subroutine fgmres (zetaD,               &
          do it = 1, nextit
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
+               do ij = 1, icellu(iblk)
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
 
@@ -3144,7 +3130,7 @@ subroutine pgmres (zetaD,                &
          iblk    , & ! block index
          ij      , & ! index for indx[t|u][i|j]
          i, j        ! grid indices
-         
+
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks) :: &
          workspace_x , & ! work vector (x components)
          workspace_y     ! work vector (y components)
@@ -3258,7 +3244,7 @@ subroutine pgmres (zetaD,                &
          inverse_norm = c1 / norm_residual
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
-            do ij =1, icellu(iblk)
+            do ij = 1, icellu(iblk)
                i = indxui(ij, iblk)
                j = indxuj(ij, iblk)
 
@@ -3280,7 +3266,7 @@ subroutine pgmres (zetaD,                &
          rhs_hess(2:) = c0
          
          initer = 0
-      
+         
          ! Start of inner (Arnoldi) loop
          do
             
@@ -3344,7 +3330,7 @@ subroutine pgmres (zetaD,                &
                inverse_norm = c1 / hessenberg(nextit,initer)
                !$OMP PARALLEL DO PRIVATE(iblk)
                do iblk = 1, nblocks
-                  do ij =1, icellu(iblk)
+                  do ij = 1, icellu(iblk)
                      i = indxui(ij, iblk)
                      j = indxuj(ij, iblk)
                      
@@ -3390,7 +3376,7 @@ subroutine pgmres (zetaD,                &
             endif
             
          end do ! end of inner (Arnoldi) loop
-      
+         
          ! At this point either the maximum number of inner iterations
          ! was reached or the absolute residual is below the scaled tolerance.
 
@@ -3413,7 +3399,7 @@ subroutine pgmres (zetaD,                &
             t = rhs_hess(it)
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
+               do ij = 1, icellu(iblk)
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
 
@@ -3469,7 +3455,7 @@ subroutine pgmres (zetaD,                &
          do it = 1, nextit
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
+               do ij = 1, icellu(iblk)
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
 
@@ -3552,7 +3538,7 @@ subroutine precondition(zetaD,                &
       elseif (precond_type == 'diag') then ! Jacobi preconditioner (diagonal)
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
-            do ij =1, icellu(iblk)
+            do ij = 1, icellu(iblk)
                i = indxui(ij, iblk)
                j = indxuj(ij, iblk)
 
@@ -3579,7 +3565,7 @@ subroutine precondition(zetaD,                &
                       nbiter,     conv)
       else
          call abort_ice(error_message='wrong preconditioner in ' // subname, &
-         file=__FILE__, line=__LINE__)
+            file=__FILE__, line=__LINE__)
       endif
       end subroutine precondition
 
@@ -3631,12 +3617,12 @@ subroutine orthogonalize(ortho_type     , initer         , &
          ! Classical Gram-Schmidt orthogonalisation process
          ! First loop of Gram-Schmidt (compute coefficients)
          dotprod_local = c0
-         do it=1,initer
+         do it = 1, initer
             local_dot = c0
             
             !$OMP PARALLEL DO PRIVATE(iblk, ij, i, j)
             do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
+               do ij = 1, icellu(iblk)
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
                   
@@ -3650,13 +3636,13 @@ subroutine orthogonalize(ortho_type     , initer         , &
             dotprod_local(it) = sum(local_dot)
          end do
 
-         hessenberg(1:initer,initer) = global_sums(dotprod_local(1:initer), distrb_info)
+         hessenberg(1:initer, initer) = global_sums(dotprod_local(1:initer), distrb_info)
 
          ! Second loop of Gram-Schmidt (orthonormalize)
          do it = 1, initer
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
+               do ij = 1, icellu(iblk)
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
                   
@@ -3670,12 +3656,12 @@ subroutine orthogonalize(ortho_type     , initer         , &
          end do
       elseif (trim(ortho_type) == 'mgs') then ! Modified Gram-Schmidt
          ! Modified Gram-Schmidt orthogonalisation process
-         do it=1,initer
+         do it = 1, initer
             local_dot = c0
             
             !$OMP PARALLEL DO PRIVATE(iblk, ij, i, j)
             do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
+               do ij = 1, icellu(iblk)
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
                   
@@ -3690,7 +3676,7 @@ subroutine orthogonalize(ortho_type     , initer         , &
             
             !$OMP PARALLEL DO PRIVATE(iblk, ij, i, j)
             do iblk = 1, nblocks
-               do ij =1, icellu(iblk)
+               do ij = 1, icellu(iblk)
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
                   

From ba6eb00dd46ab2189ae3d91519109fd3ce11c35a Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Jul 2020 12:42:54 -0400
Subject: [PATCH 175/196] ice_dyn_vp: put 'intent's on same lines as types

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 99 ++++++++---------------
 1 file changed, 34 insertions(+), 65 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index da10fd011..2436e2d31 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1109,8 +1109,7 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
          nx_block, ny_block, & ! block dimensions
          icellt                ! no. of cells where icetmask = 1
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxti   , & ! compressed index in i-direction
          indxtj       ! compressed index in j-direction
 
@@ -1128,12 +1127,10 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
          cxm      , & ! 0.5*HTN - 1.5*HTN
          tinyarea     ! min_strain_rate*tarea
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
-         intent(out) :: &
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), intent(out) :: &
          zetaD          ! 2*zeta
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
-         intent(out) :: &
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), intent(out) :: &
          stPr          ! stress Pr combinations
 
       ! local variables
@@ -1302,8 +1299,7 @@ subroutine stress_vp (nx_block,   ny_block,   &
          nx_block, ny_block, & ! block dimensions
          icellt                ! no. of cells where icetmask = 1
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxti   , & ! compressed index in i-direction
          indxtj       ! compressed index in j-direction
 
@@ -1317,12 +1313,10 @@ subroutine stress_vp (nx_block,   ny_block,   &
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm          ! 0.5*HTN - 1.5*HTN
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
-         intent(in) :: &
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), intent(in) :: &
          zetaD          ! 2*zeta
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(inout) :: &
          stressp_1, stressp_2, stressp_3, stressp_4 , & ! sigma11+sigma22
          stressm_1, stressm_2, stressm_3, stressm_4 , & ! sigma11-sigma22
          stress12_1,stress12_2,stress12_3,stress12_4    ! sigma12
@@ -1405,8 +1399,7 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
          nx_block, ny_block, & ! block dimensions
          icellu                ! total count when iceumask is true
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj      ! compressed index in j-direction
 
@@ -1417,13 +1410,11 @@ subroutine calc_vrel_Cb (nx_block,   ny_block, &
          vocn    , & ! ocean current, y-direction (m/s)
          Cw          ! ocean-ice neutral drag coefficient
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(in) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
          uvel    , & ! x-component of velocity (m/s)
          vvel        ! y-component of velocity (m/s)
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(inout) :: &
          vrel      , & ! coeff for tauw
          Cb            ! seabed stress coeff
 
@@ -1474,8 +1465,7 @@ subroutine calc_seabed_stress (nx_block  , ny_block, &
          nx_block, ny_block, & ! block dimensions
          icellu                ! total count when iceumask is true
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj      ! compressed index in j-direction
 
@@ -1533,8 +1523,7 @@ subroutine matvec (nx_block,   ny_block, &
          icellu,             & ! total count when iceumask is true
          icellt                ! no. of cells where icetmask = 1
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj  , & ! compressed index in j-direction
          indxti  , & ! compressed index in i-direction
@@ -1550,8 +1539,7 @@ subroutine matvec (nx_block,   ny_block, &
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm          ! 0.5*HTN - 1.5*HTN
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(in) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
          uvel    , & ! x-component of velocity (m/s)
          vvel    , & ! y-component of velocity (m/s)
          vrel    , & ! coefficient for tauw
@@ -1560,12 +1548,10 @@ subroutine matvec (nx_block,   ny_block, &
          fm      , & ! Coriolis param. * mass in U-cell (kg/s)
          uarear      ! 1/uarea
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
-         intent(in) :: &
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), intent(in) :: &
          zetaD          ! 2*zeta
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(inout) :: &
          Au      , & ! matvec, Fx = bx - Au (N/m^2)
          Av          ! matvec, Fy = by - Av (N/m^2)
 
@@ -1802,22 +1788,19 @@ subroutine calc_bfix  (nx_block,   ny_block,   &
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
          icellu                ! no. of cells where iceumask = 1
-         
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxui   , & ! compressed index in i-direction
          indxuj       ! compressed index in j-direction
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(in) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
          uvel_init,& ! x-component of velocity (m/s), beginning of time step
          vvel_init,& ! y-component of velocity (m/s), beginning of time step
          umassdti, & ! mass of U-cell/dt (kg/m^2 s)
          forcex  , & ! work array: combined atm stress and ocn tilt, x
          forcey      ! work array: combined atm stress and ocn tilt, y
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(out) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(out) :: &
          bxfix   , & ! bx = taux + bxfix
          byfix       ! by = tauy + byfix
 
@@ -1862,8 +1845,7 @@ subroutine calc_bvec (nx_block,   ny_block, &
          nx_block, ny_block, & ! block dimensions
          icellu                ! total count when iceumask is true
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj      ! compressed index in j-direction
 
@@ -1877,12 +1859,10 @@ subroutine calc_bvec (nx_block,   ny_block, &
          byfix   , & ! by = tauy + byfix
          vrel        ! relative ice-ocean velocity
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
-         intent(in) :: &
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), intent(in) :: &
          stPr
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(out) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(out) :: &
          bx      , & ! b vector, bx = taux + bxfix (N/m^2)
          by          ! b vector, by = tauy + byfix (N/m^2)
 
@@ -1950,8 +1930,7 @@ subroutine residual_vec (nx_block,   ny_block, &
          nx_block, ny_block, & ! block dimensions
          icellu                ! total count when iceumask is true
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj      ! compressed index in j-direction
 
@@ -1961,8 +1940,7 @@ subroutine residual_vec (nx_block,   ny_block, &
          Au       , & ! matvec, Fx = bx - Au (N/m^2)
          Av           ! matvec, Fy = by - Av (N/m^2)
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(inout) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(inout) :: &
          Fx      , & ! x residual vector, Fx = bx - Au (N/m^2)
          Fy          ! y residual vector, Fy = by - Av (N/m^2)
 
@@ -2015,8 +1993,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          nx_block, ny_block, & ! block dimensions
          icellu                ! no. of cells where icetmask = 1
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxui   , & ! compressed index in i-direction
          indxuj       ! compressed index in j-direction
 
@@ -2029,13 +2006,11 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          cxp      , & ! 1.5*HTN - 0.5*HTN
          cym      , & ! 0.5*HTE - 1.5*HTE
          cxm          ! 0.5*HTN - 1.5*HTN
-         
-      real (kind=dbl_kind), dimension(nx_block,ny_block,4), &
-         intent(in) :: &
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,4), intent(in) :: &
          zetaD          ! 2*zeta
-         
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
-         intent(out) :: &
+
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), intent(out) :: &
          Dstr          ! intermediate calc for diagonal components of matrix A associated
                        ! with rheology term
 
@@ -2391,8 +2366,7 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
          nx_block, ny_block, & ! block dimensions
          icellu                ! total count when iceumask is true
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj      ! compressed index in j-direction
 
@@ -2402,12 +2376,10 @@ subroutine formDiag_step2 (nx_block,   ny_block, &
          umassdti, & ! mass of U-cell/dt (kg/m^2 s)
          uarear      ! 1/uarea
 
-      real (kind=dbl_kind), dimension(nx_block,ny_block,8), &
-         intent(in) :: &
+      real (kind=dbl_kind), dimension(nx_block,ny_block,8), intent(in) :: &
          Dstr
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block), &
-         intent(out) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block), intent(out) :: &
          diagx   , & ! Diagonal (x component) of the matrix A
          diagy       ! Diagonal (y component) of the matrix A
 
@@ -2472,8 +2444,7 @@ subroutine calc_L2norm_squared (nx_block,   ny_block, &
          nx_block, ny_block, & ! block dimensions
          icellu                ! total count when iceumask is true
 
-      integer (kind=int_kind), dimension (nx_block*ny_block), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block), intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj      ! compressed index in j-direction
 
@@ -2526,8 +2497,7 @@ subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
          icellu
 
-      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj      ! compressed index in j-direction
 
@@ -2585,8 +2555,7 @@ subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
          icellu
 
-      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), &
-         intent(in) :: &
+      integer (kind=int_kind), dimension (nx_block*ny_block, max_blocks), intent(in) :: &
          indxui  , & ! compressed index in i-direction
          indxuj      ! compressed index in j-direction
          

From 12e97b240d8653ac6dd4df60d4f636c89eeb4cb8 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Jul 2020 12:43:30 -0400
Subject: [PATCH 176/196] ice_dyn_vp: use '==' instead of '.eq.'

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 32 +++++++++++------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 2436e2d31..21f8b842b 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -2061,7 +2061,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
       
       do cc = 1, 8 ! 4 for u and 4 for v
       
-         if (cc .eq. 1) then     ! u comp, T cell i,j
+         if (cc == 1) then     ! u comp, T cell i,j
             uij   = c1
             ui1j  = c0
             uij1  = c0
@@ -2072,7 +2072,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
             vi1j1 = c0
             di    = 0
             dj    = 0
-         elseif (cc .eq. 2) then ! u comp, T cell i+1,j
+         elseif (cc == 2) then ! u comp, T cell i+1,j
             uij   = c0
             ui1j  = c1
             uij1  = c0
@@ -2083,7 +2083,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
             vi1j1 = c0
             di    = 1
             dj    = 0
-         elseif (cc .eq. 3) then ! u comp, T cell i,j+1
+         elseif (cc == 3) then ! u comp, T cell i,j+1
             uij   = c0
             ui1j  = c0
             uij1  = c1
@@ -2094,7 +2094,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
             vi1j1 = c0
             di    = 0
             dj    = 1
-         elseif (cc .eq. 4) then ! u comp, T cell i+1,j+1
+         elseif (cc == 4) then ! u comp, T cell i+1,j+1
             uij   = c0
             ui1j  = c0
             uij1  = c0
@@ -2105,7 +2105,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
             vi1j1 = c0
             di    = 1
             dj    = 1
-         elseif (cc .eq. 5) then ! v comp, T cell i,j
+         elseif (cc == 5) then ! v comp, T cell i,j
             uij   = c0
             ui1j  = c0
             uij1  = c0
@@ -2116,7 +2116,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
             vi1j1 = c0
             di    = 0
             dj    = 0
-         elseif (cc .eq. 6) then ! v comp, T cell i,j+1
+         elseif (cc == 6) then ! v comp, T cell i,j+1
             uij   = c0
             ui1j  = c0
             uij1  = c0
@@ -2127,7 +2127,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
             vi1j1 = c0
             di    = 0
             dj    = 1
-         elseif (cc .eq. 7) then ! v comp, T cell i+1,j
+         elseif (cc == 7) then ! v comp, T cell i+1,j
             uij   = c0
             ui1j  = c0
             uij1  = c0
@@ -2138,7 +2138,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
             vi1j1 = c0
             di    = 1
             dj    = 0
-         elseif (cc .eq. 8) then ! v comp, T cell i+1,j+1
+         elseif (cc == 8) then ! v comp, T cell i+1,j+1
             uij   = c0
             ui1j  = c0
             uij1  = c0
@@ -2265,7 +2265,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          ! for dF/dx (u momentum)
          !-----------------------------------------------------------------
          
-            if (cc .eq. 1) then ! T cell i,j
+            if (cc == 1) then ! T cell i,j
             
                strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
                strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
@@ -2274,7 +2274,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
                Dstr(iu,ju,1) = -strp_tmp - strm_tmp - str12ew &
                   + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
                
-            elseif (cc .eq. 2) then ! T cell i+1,j
+            elseif (cc == 2) then ! T cell i+1,j
                
                strp_tmp  = p25*dyt(i,j)*(p333*ssigpn  + p166*ssigps)
                strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
@@ -2283,7 +2283,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
                Dstr(iu,ju,2) = strp_tmp + strm_tmp - str12we &
                   + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
 
-            elseif (cc .eq. 3) then ! T cell i,j+1
+            elseif (cc == 3) then ! T cell i,j+1
                
                strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
                strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
@@ -2292,7 +2292,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
                Dstr(iu,ju,3) = -strp_tmp - strm_tmp + str12ew &
                   + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
 
-            elseif (cc .eq. 4) then ! T cell i+1,j+1
+            elseif (cc == 4) then ! T cell i+1,j+1
                  
                strp_tmp  = p25*dyt(i,j)*(p333*ssigps  + p166*ssigpn)
                strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
@@ -2305,7 +2305,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
          ! for dF/dy (v momentum)
          !-----------------------------------------------------------------
             
-            elseif (cc .eq. 5) then ! T cell i,j
+            elseif (cc == 5) then ! T cell i,j
                
                strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
                strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
@@ -2314,7 +2314,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
                Dstr(iu,ju,5) = -strp_tmp + strm_tmp - str12ns &
                   - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
 
-            elseif (cc .eq. 6) then ! T cell i,j+1
+            elseif (cc == 6) then ! T cell i,j+1
                
                strp_tmp  = p25*dxt(i,j)*(p333*ssigpe  + p166*ssigpw)
                strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
@@ -2323,7 +2323,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
                Dstr(iu,ju,6) = strp_tmp - strm_tmp - str12sn &
                   - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
 
-            elseif (cc .eq. 7) then ! T cell i,j+1
+            elseif (cc == 7) then ! T cell i,j+1
                
                strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
                strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
@@ -2332,7 +2332,7 @@ subroutine formDiag_step1  (nx_block,   ny_block,   &
                Dstr(iu,ju,7) = -strp_tmp + strm_tmp + str12ns &
                   - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
 
-            elseif (cc .eq. 8) then ! T cell i+1,j+1
+            elseif (cc == 8) then ! T cell i+1,j+1
                
                strp_tmp  = p25*dxt(i,j)*(p333*ssigpw  + p166*ssigpe)
                strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)

From 2dfe55dd7355e22b784122d92a22979e61d53317 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Jul 2020 15:01:32 -0400
Subject: [PATCH 177/196] ice_dyn_vp: uniformize whitespace in subroutine
 arguments

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 576 +++++++++++-----------
 1 file changed, 291 insertions(+), 285 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 21f8b842b..a9eab9da5 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -385,7 +385,7 @@ subroutine imp_solver (dt)
                          uvel      (:,:,iblk), vvel      (:,:,iblk), &
                          Tbu       (:,:,iblk))
 
-         call calc_bfix (nx_block            , ny_block,             &
+         call calc_bfix (nx_block            , ny_block            , &
                          icellu(iblk)        ,                       &
                          indxui      (:,iblk), indxuj      (:,iblk), &
                          umassdti  (:,:,iblk),                       &
@@ -442,11 +442,11 @@ subroutine imp_solver (dt)
       if (basalstress) then
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
-            call basal_stress_coeff (nx_block,         ny_block,       &
-                                     icellu  (iblk),                   &
-                                     indxui(:,iblk),   indxuj(:,iblk), &
-                                     vice(:,:,iblk),   aice(:,:,iblk), &
-                                     hwater(:,:,iblk), Tbu(:,:,iblk))
+            call basal_stress_coeff (nx_block        , ny_block      , &
+                                     icellu    (iblk),                 &
+                                     indxui  (:,iblk), indxuj(:,iblk), &
+                                     vice  (:,:,iblk), aice(:,:,iblk), &
+                                     hwater(:,:,iblk), Tbu (:,:,iblk))
          enddo
          !$OMP END PARALLEL DO
       endif
@@ -466,15 +466,15 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       ! Start of nonlinear iteration
       !-----------------------------------------------------------------
-      call anderson_solver (icellt,   icellu,  &
-                            indxti,   indxtj,  &
-                            indxui,   indxuj,  &
-                            aiu,      ntot,    &
-                            waterx,   watery,  &
-                            bxfix,    byfix,   &
-                            umassdti, sol,     &
-                            fpresx,   fpresy,  &
-                            zetaD,    Cb,      &
+      call anderson_solver (icellt  , icellu, &
+                            indxti  , indxtj, &
+                            indxui  , indxuj, &
+                            aiu     , ntot  , &
+                            waterx  , watery, &
+                            bxfix   , byfix , &
+                            umassdti, sol   , &
+                            fpresx  , fpresy, &
+                            zetaD   , Cb    , &
                             halo_info_mask)
       !-----------------------------------------------------------------
       ! End of nonlinear iteration
@@ -489,8 +489,8 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
-         call stress_vp (nx_block,             ny_block,             &
-                         icellt(iblk),                               &
+         call stress_vp (nx_block            , ny_block            , &
+                         icellt(iblk)        ,                       &
                          indxti      (:,iblk), indxtj      (:,iblk), &
                          uvel      (:,:,iblk), vvel      (:,:,iblk), &
                          dxt       (:,:,iblk), dyt       (:,:,iblk), &
@@ -511,8 +511,8 @@ subroutine imp_solver (dt)
       !-----------------------------------------------------------------
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
-         call deformations (nx_block,             ny_block,             &
-                            icellt(iblk),                               &
+         call deformations (nx_block            , ny_block            , &
+                            icellt(iblk)        ,                       &
                             indxti      (:,iblk), indxtj      (:,iblk), &
                             uvel      (:,:,iblk), vvel      (:,:,iblk), &
                             dxt       (:,:,iblk), dyt       (:,:,iblk), &
@@ -649,15 +649,15 @@ end subroutine imp_solver
 ! H. F. Walker, “Anderson Acceleration: Algorithms and Implementations”
 !   [Online]. Available: https://users.wpi.edu/~walker/Papers/anderson_accn_algs_imps.pdf
 
-      subroutine anderson_solver (icellt,   icellu,  &
-                                  indxti,   indxtj,  &
-                                  indxui,   indxuj,  &
-                                  aiu,      ntot,    &
-                                  waterx,   watery,  &
-                                  bxfix,    byfix,   &
-                                  umassdti, sol,     &
-                                  fpresx,   fpresy,  &
-                                  zetaD,    Cb,      &
+      subroutine anderson_solver (icellt  , icellu, &
+                                  indxti  , indxtj, &
+                                  indxui  , indxuj, &
+                                  aiu     , ntot  , &
+                                  waterx  , watery, &
+                                  bxfix   , byfix , &
+                                  umassdti, sol   , &
+                                  fpresx  , fpresy, &
+                                  zetaD   , Cb    , &
                                   halo_info_mask)
 
       use ice_arrays_column, only: Cdn_ocn
@@ -805,19 +805,19 @@ subroutine anderson_solver (icellt,   icellu,  &
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
             
-            call calc_zeta_Pr (nx_block           , ny_block,           &
-                               icellt(iblk),                            &
-                               indxti   (:,iblk)  , indxtj(:,iblk),     &
+            call calc_zeta_Pr (nx_block           , ny_block          , &
+                               icellt       (iblk),                     &
+                               indxti     (:,iblk), indxtj    (:,iblk), &
                                uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
-                               dxt      (:,:,iblk), dyt   (:,:,iblk),   &
-                               dxhy     (:,:,iblk), dyhx  (:,:,iblk),   &
-                               cxp      (:,:,iblk), cyp   (:,:,iblk),   &
-                               cxm      (:,:,iblk), cym   (:,:,iblk),   &
+                               dxt      (:,:,iblk), dyt     (:,:,iblk), &
+                               dxhy     (:,:,iblk), dyhx    (:,:,iblk), &
+                               cxp      (:,:,iblk), cyp     (:,:,iblk), &
+                               cxm      (:,:,iblk), cym     (:,:,iblk), &
                                tinyarea (:,:,iblk),                     &
-                               strength (:,:,iblk), zetaD (:,:,iblk,:) ,&
+                               strength (:,:,iblk), zetaD (:,:,iblk,:), &
                                stPrtmp  (:,:,:))
             
-            call calc_vrel_Cb (nx_block           , ny_block,           &
+            call calc_vrel_Cb (nx_block           , ny_block          , &
                                icellu       (iblk), Cdn_ocn (:,:,iblk), &
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                aiu      (:,:,iblk), Tbu     (:,:,iblk), &
@@ -826,10 +826,10 @@ subroutine anderson_solver (icellt,   icellu,  &
                                vrel     (:,:,iblk), Cb      (:,:,iblk))
             
             ! prepare b vector (RHS)
-            call calc_bvec (nx_block           , ny_block,           &
+            call calc_bvec (nx_block           , ny_block          , &
                             icellu       (iblk),                     &
                             indxui     (:,iblk), indxuj    (:,iblk), &
-                            stPrtmp  (:,:,:)   , uarear  (:,:,iblk), &
+                            stPrtmp     (:,:,:), uarear  (:,:,iblk), &
                             waterx   (:,:,iblk), watery  (:,:,iblk), &
                             ulin     (:,:,iblk), vlin    (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
@@ -837,10 +837,10 @@ subroutine anderson_solver (icellt,   icellu,  &
                             vrel     (:,:,iblk))
             
             ! Compute nonlinear residual norm (PDE residual)
-            call matvec (nx_block             , ny_block,            &
-                         icellu   (iblk)      , icellt   (iblk)    , &
-                         indxui   (:,iblk)    , indxuj   (:,iblk)  , &
-                         indxti   (:,iblk)    , indxtj   (:,iblk)  , &
+            call matvec (nx_block             , ny_block           , &
+                         icellu       (iblk)  , icellt       (iblk), &
+                         indxui     (:,iblk)  , indxuj     (:,iblk), &
+                         indxti     (:,iblk)  , indxtj     (:,iblk), &
                          dxt      (:,:,iblk)  , dyt      (:,:,iblk), &
                          dxhy     (:,:,iblk)  , dyhx     (:,:,iblk), &
                          cxp      (:,:,iblk)  , cyp      (:,:,iblk), &
@@ -851,13 +851,13 @@ subroutine anderson_solver (icellt,   icellu,  &
                          umassdti (:,:,iblk)  , fm       (:,:,iblk), &
                          uarear   (:,:,iblk)  ,                      &
                          Au       (:,:,iblk)  , Av       (:,:,iblk))
-            call residual_vec (nx_block           , ny_block,           &
+            call residual_vec (nx_block           , ny_block          , &
                                icellu       (iblk),                     &
                                indxui     (:,iblk), indxuj    (:,iblk), &
                                bx       (:,:,iblk), by      (:,:,iblk), &
                                Au       (:,:,iblk), Av      (:,:,iblk), &
                                Fx       (:,:,iblk), Fy      (:,:,iblk), &
-                               L2norm(iblk))
+                               L2norm       (iblk))
          enddo
          !$OMP END PARALLEL DO
          nlres_norm = sqrt(global_sum(sum(L2norm), distrb_info))
@@ -878,11 +878,12 @@ subroutine anderson_solver (icellt,   icellu,  &
          ! Put initial guess for FGMRES in solx,soly and sol (needed for anderson)
          solx = uprev_k
          soly = vprev_k
-         call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     &
-                             indxui    (:,:), indxuj(:,:),     &
+         call arrays_to_vec (nx_block       , ny_block       , &
+                             nblocks        , max_blocks     , &
+                             icellu      (:), ntot           , &
+                             indxui    (:,:), indxuj    (:,:), &
                              uprev_k (:,:,:), vprev_k (:,:,:), &
-                             sol(:))
+                             sol         (:))
          
          ! Compute fixed point map g(x)
          if (fpfunc_andacc == 1) then
@@ -892,18 +893,18 @@ subroutine anderson_solver (icellt,   icellu,  &
             if (precond == 'diag' .or. precond == 'pgmres') then
                !$OMP PARALLEL DO PRIVATE(iblk)
                do iblk = 1, nblocks
-                  call formDiag_step1 (nx_block           , ny_block,       & ! D term due to rheology
-                                       icellu       (iblk),                 &
-                                       indxui     (:,iblk), indxuj(:,iblk), &
-                                       dxt      (:,:,iblk), dyt (:,:,iblk), &
-                                       dxhy     (:,:,iblk), dyhx(:,:,iblk), &
-                                       cxp      (:,:,iblk), cyp (:,:,iblk), &
-                                       cxm      (:,:,iblk), cym (:,:,iblk), &
-                                       zetaD (:,:,iblk,:) , Dstrtmp (:,:,:))
-                  call formDiag_step2 (nx_block           , ny_block,           &
+                  call formDiag_step1 (nx_block             , ny_block      , & ! D term due to rheology
+                                       icellu       (iblk)  ,                 &
+                                       indxui     (:,iblk)  , indxuj(:,iblk), &
+                                       dxt      (:,:,iblk)  , dyt (:,:,iblk), &
+                                       dxhy     (:,:,iblk)  , dyhx(:,:,iblk), &
+                                       cxp      (:,:,iblk)  , cyp (:,:,iblk), &
+                                       cxm      (:,:,iblk)  , cym (:,:,iblk), &
+                                       zetaD    (:,:,iblk,:), Dstrtmp (:,:,:))
+                  call formDiag_step2 (nx_block           , ny_block          , &
                                        icellu       (iblk),                     &
                                        indxui     (:,iblk), indxuj    (:,iblk), &
-                                       Dstrtmp  (:,:,:)   , vrel    (:,:,iblk), &
+                                       Dstrtmp     (:,:,:), vrel    (:,:,iblk), &
                                        umassdti (:,:,iblk),                     &
                                        uarear   (:,:,iblk), Cb      (:,:,iblk), &
                                        diagx    (:,:,iblk), diagy   (:,:,iblk))
@@ -912,22 +913,23 @@ subroutine anderson_solver (icellt,   icellu,  &
             endif
             
             ! FGMRES linear solver
-            call fgmres (zetaD,                    &
-                         Cb,            vrel,      &
-                         umassdti,                 &
-                         halo_info_mask,           &
-                         bx,            by,        &
-                         diagx,         diagy,     &
-                         reltol_fgmres, im_fgmres, &
-                         maxits_fgmres,            &
-                         solx,          soly,      &
-                         nbiter,        conv)
+            call fgmres (zetaD         ,            &
+                         Cb            , vrel     , &
+                         umassdti      ,            &
+                         halo_info_mask,            &
+                         bx            , by       , &
+                         diagx         , diagy    , &
+                         reltol_fgmres , im_fgmres, &
+                         maxits_fgmres ,            &
+                         solx          , soly     , &
+                         nbiter        , conv)
             ! Put FGMRES solution solx,soly in fpfunc vector (needed for anderson)
-            call arrays_to_vec (nx_block, ny_block, nblocks,      &
-                                max_blocks, icellu (:), ntot,     &
-                                indxui    (:,:), indxuj(:,:),     &
-                                solx    (:,:,:), soly  (:,:,:),   &
-                                fpfunc(:))
+            call arrays_to_vec (nx_block       , ny_block     , &
+                                nblocks        , max_blocks   , &
+                                icellu      (:), ntot         , &
+                                indxui    (:,:), indxuj  (:,:), &
+                                solx    (:,:,:), soly  (:,:,:), &
+                                fpfunc      (:))
          elseif (fpfunc_andacc == 2) then
             ! g_2(x) = x - A(x)x + b(x) = x - F(x)
             call abort_ice(error_message=subname // " Fixed point function g_2(x) not yet implemented (fpfunc_andacc = 2)" , &
@@ -939,14 +941,15 @@ subroutine anderson_solver (icellt,   icellu,  &
 #ifdef CICE_USE_LAPACK
          fpres_norm = global_sum(dnrm2(size(res), res, inc)**2, distrb_info)
 #else
-         call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     &
-                             indxui    (:,:), indxuj(:,:),     &
-                             res (:),                          &
+         call vec_to_arrays (nx_block      , ny_block    , &
+                             nblocks       , max_blocks  , &
+                             icellu     (:), ntot        , &
+                             indxui   (:,:), indxuj(:,:) , &
+                             res        (:),               &
                              fpresx (:,:,:), fpresy (:,:,:))
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
-            call calc_L2norm_squared (nx_block        , ny_block,         &
+            call calc_L2norm_squared (nx_block        , ny_block        , &
                                       icellu    (iblk),                   &
                                       indxui  (:,iblk), indxuj  (:,iblk), &
                                       fpresx(:,:,iblk), fpresy(:,:,iblk), &
@@ -1056,10 +1059,11 @@ subroutine anderson_solver (icellt,   icellu,  &
          !-----------------------------------------------------------------------
          !     Put vector sol in uvel and vvel arrays
          !-----------------------------------------------------------------------
-         call vec_to_arrays (nx_block, ny_block, nblocks,      &
-                             max_blocks, icellu (:), ntot,     &
-                             indxui    (:,:), indxuj(:,:),     &
-                             sol (:),                          &
+         call vec_to_arrays (nx_block    , ny_block    , &
+                             nblocks     , max_blocks  , &
+                             icellu   (:), ntot        , &
+                             indxui (:,:), indxuj (:,:), &
+                             sol      (:),               &
                              uvel (:,:,:), vvel (:,:,:))
          
          ! phb NOT SURE IF THIS HALO UPDATE IS ACTUALLY NEEDED
@@ -1070,7 +1074,7 @@ subroutine anderson_solver (icellt,   icellu,  &
          do iblk = 1, nblocks
             fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
             fpresy(:,:,iblk) = vvel(:,:,iblk) - vprev_k(:,:,iblk)
-            call calc_L2norm_squared (nx_block        , ny_block,         &
+            call calc_L2norm_squared (nx_block        , ny_block        , &
                                       icellu    (iblk),                   &
                                       indxui  (:,iblk), indxuj  (:,iblk), &
                                       fpresx(:,:,iblk), fpresy(:,:,iblk), &
@@ -1091,16 +1095,16 @@ end subroutine anderson_solver
 
 ! Computes the viscous coefficients (in fact zetaD=2*zeta) and dPr/dx.
 
-      subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
-                                icellt,                 &
-                                indxti,     indxtj,     &
-                                uvel,       vvel,       &
-                                dxt,        dyt,        &
-                                dxhy,       dyhx,       &
-                                cxp,        cyp,        &
-                                cxm,        cym,        &
-                                tinyarea,               &
-                                strength,   zetaD,      &
+      subroutine calc_zeta_Pr  (nx_block, ny_block, &
+                                icellt  ,           &
+                                indxti  , indxtj  , &
+                                uvel    , vvel    , &
+                                dxt     , dyt     , &
+                                dxhy    , dyhx    , &
+                                cxp     , cyp     , &
+                                cxm     , cym     , &
+                                tinyarea,           &
+                                strength, zetaD   , &
                                 stPr)
 
       use ice_dyn_shared, only: strain_rates
@@ -1171,20 +1175,20 @@ subroutine calc_zeta_Pr  (nx_block,   ny_block,   &
       ! strain rates
       ! NOTE these are actually strain rates * area  (m^2/s)
       !-----------------------------------------------------------------
-         call strain_rates (nx_block,   ny_block,   &
-                            i,          j,          &
-                            uvel,       vvel,       &
-                            dxt,        dyt,        &
-                            cxp,        cyp,        &
-                            cxm,        cym,        &
-                            divune,     divunw,     &
-                            divuse,     divusw,     &
-                            tensionne,  tensionnw,  &
-                            tensionse,  tensionsw,  &
-                            shearne,    shearnw,    &
-                            shearse,    shearsw,    &
-                            Deltane,    Deltanw,    &
-                            Deltase,    Deltasw     )
+         call strain_rates (nx_block , ny_block , &
+                            i        , j        , &
+                            uvel     , vvel     , &
+                            dxt      , dyt      , &
+                            cxp      , cyp      , &
+                            cxm      , cym      , &
+                            divune   , divunw   , &
+                            divuse   , divusw   , &
+                            tensionne, tensionnw, &
+                            tensionse, tensionsw, &
+                            shearne  , shearnw  , &
+                            shearse  , shearsw  , &
+                            Deltane  , Deltanw  , &
+                            Deltase  , Deltasw)
 
          if (capping) then
             zetaD(i,j,1) = strength(i,j)/max(Deltane,tinyarea(i,j))
@@ -1278,18 +1282,18 @@ end subroutine calc_zeta_Pr
 
 ! Computes the VP stress (as diagnostic)
 
-      subroutine stress_vp (nx_block,   ny_block,   &
-                            icellt,                 &
-                            indxti,     indxtj,     &
-                            uvel,       vvel,       &
-                            dxt,        dyt,        &
-                            cxp,        cyp,        &
-                            cxm,        cym,        &
-                            zetaD,                  &
-                            stressp_1,  stressp_2,  &
-                            stressp_3,  stressp_4,  &
-                            stressm_1,  stressm_2,  &
-                            stressm_3,  stressm_4,  &
+      subroutine stress_vp (nx_block  , ny_block  , &
+                            icellt    ,             &
+                            indxti    , indxtj    , &
+                            uvel      , vvel      , &
+                            dxt       , dyt       , &
+                            cxp       , cyp       , &
+                            cxm       , cym       , &
+                            zetaD     ,             &
+                            stressp_1 , stressp_2 , &
+                            stressp_3 , stressp_4 , &
+                            stressm_1 , stressm_2 , &
+                            stressm_3 , stressm_4 , &
                             stress12_1, stress12_2, &
                             stress12_3, stress12_4)
 
@@ -1342,20 +1346,20 @@ subroutine stress_vp (nx_block,   ny_block,   &
       ! strain rates
       ! NOTE these are actually strain rates * area  (m^2/s)
       !-----------------------------------------------------------------
-         call strain_rates (nx_block,   ny_block,   &
-                            i,          j,          &
-                            uvel,       vvel,       &
-                            dxt,        dyt,        &
-                            cxp,        cyp,        &
-                            cxm,        cym,        &
-                            divune,     divunw,     &
-                            divuse,     divusw,     &
-                            tensionne,  tensionnw,  &
-                            tensionse,  tensionsw,  &
-                            shearne,    shearnw,    &
-                            shearse,    shearsw,    &
-                            Deltane,    Deltanw,    &
-                            Deltase,    Deltasw     )
+         call strain_rates (nx_block , ny_block , &
+                            i        , j        , &
+                            uvel     , vvel     , &
+                            dxt      , dyt      , &
+                            cxp      , cyp      , &
+                            cxm      , cym      , &
+                            divune   , divunw   , &
+                            divuse   , divusw   , &
+                            tensionne, tensionnw, &
+                            tensionse, tensionsw, &
+                            shearne  , shearnw  , &
+                            shearse  , shearsw  , &
+                            Deltane  , Deltanw  , &
+                            Deltase  , Deltasw)
 
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2
@@ -1385,13 +1389,13 @@ end subroutine stress_vp
 
 ! Compute vrel and basal stress coefficients
 
-      subroutine calc_vrel_Cb (nx_block,   ny_block, &
-                               icellu,     Cw,       &
-                               indxui,     indxuj,   &
-                               aiu,        Tbu,      &
-                               uocn,       vocn,     &
-                               uvel,       vvel,     &
-                               vrel,       Cb)
+      subroutine calc_vrel_Cb (nx_block, ny_block, &
+                               icellu  , Cw      , &
+                               indxui  , indxuj  , &
+                               aiu     , Tbu     , &
+                               uocn    , vocn    , &
+                               uvel    , vvel    , &
+                               vrel    , Cb)
 
       use ice_dyn_shared, only: u0 ! residual velocity for basal stress (m/s)
 
@@ -1454,12 +1458,12 @@ end subroutine calc_vrel_Cb
 
 ! Compute seabed stress (diagnostic)
 
-      subroutine calc_seabed_stress (nx_block  , ny_block, &
-                                     icellu    ,           &
-                                     indxui    , indxuj  , &
-                                     uvel      , vvel    , &
-                                     Cb        ,           &
-                                     taubx     , tauby)
+      subroutine calc_seabed_stress (nx_block, ny_block, &
+                                     icellu  ,           &
+                                     indxui  , indxuj  , &
+                                     uvel    , vvel    , &
+                                     Cb      ,           &
+                                     taubx   , tauby)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1501,20 +1505,20 @@ end subroutine calc_seabed_stress
 ! Au = A(u,v)_[x] * uvel (x components of  A(u,v) * (u,v))
 ! Av = A(u,v)_[y] * vvel (y components of  A(u,v) * (u,v))
 
-      subroutine matvec (nx_block,   ny_block, &
-                         icellu,     icellt ,  &
-                         indxui,     indxuj,   &
-                         indxti,     indxtj,   &
-                         dxt,        dyt,      &
-                         dxhy,       dyhx,     &
-                         cxp,        cyp,      &
-                         cxm,        cym,      &
-                         uvel,       vvel,     &
-                         vrel,       Cb,       &
-                         zetaD,                &
-                         umassdti,   fm,       &
-                         uarear,               &
-                         Au,         Av)
+      subroutine matvec (nx_block, ny_block, &
+                         icellu  , icellt  , &
+                         indxui  , indxuj  , &
+                         indxti  , indxtj  , &
+                         dxt     , dyt     , &
+                         dxhy    , dyhx    , &
+                         cxp     , cyp     , &
+                         cxm     , cym     , &
+                         uvel    , vvel    , &
+                         vrel    , Cb      , &
+                         zetaD   ,           &
+                         umassdti, fm      , &
+                         uarear  ,           &
+                         Au      , Av)
 
       use ice_dyn_shared, only: strain_rates
 
@@ -1608,20 +1612,20 @@ subroutine matvec (nx_block,   ny_block, &
       ! strain rates
       ! NOTE these are actually strain rates * area  (m^2/s)
       !-----------------------------------------------------------------
-         call strain_rates (nx_block,   ny_block,   &
-                            i,          j,          &
-                            uvel,       vvel,       &
-                            dxt,        dyt,        &
-                            cxp,        cyp,        &
-                            cxm,        cym,        &
-                            divune,     divunw,     &
-                            divuse,     divusw,     &
-                            tensionne,  tensionnw,  &
-                            tensionse,  tensionsw,  &
-                            shearne,    shearnw,    &
-                            shearse,    shearsw,    &
-                            Deltane,    Deltanw,    &
-                            Deltase,    Deltasw     )
+         call strain_rates (nx_block , ny_block , &
+                            i        , j        , &
+                            uvel     , vvel     , &
+                            dxt      , dyt      , &
+                            cxp      , cyp      , &
+                            cxm      , cym      , &
+                            divune   , divunw   , &
+                            divuse   , divusw   , &
+                            tensionne, tensionnw, &
+                            tensionse, tensionsw, &
+                            shearne  , shearnw  , &
+                            shearse  , shearsw  , &
+                            Deltane  , Deltanw  , &
+                            Deltase  , Deltasw)
 
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2
@@ -1777,13 +1781,13 @@ end subroutine matvec
 ! Compute the constant component of b(u,v) i.e. the part of b(u,v) that
 ! does not depend on (u,v) and thus do not change during the nonlinear iteration
 
-     subroutine calc_bfix  (nx_block,   ny_block,   &
-                            icellu,                 &
-                            indxui,     indxuj,     &
-                            umassdti,               &
-                            forcex,     forcey,     &
-                            uvel_init,  vvel_init,  &
-                            bxfix,      byfix)
+     subroutine calc_bfix  (nx_block , ny_block , &
+                            icellu   ,            &
+                            indxui   , indxuj   , &
+                            umassdti ,            &
+                            forcex   , forcey   , &
+                            uvel_init, vvel_init, &
+                            bxfix    , byfix)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1831,14 +1835,14 @@ end subroutine calc_bfix
 ! that cannot be written as A(u,v)*(u,v), where A(u,v) is a matrix with entries
 ! depending on (u,v)
 
-      subroutine calc_bvec (nx_block,   ny_block, &
-                            icellu,               &
-                            indxui,     indxuj,   &
-                            stPr,       uarear,   &
-                            waterx,     watery,   &
-                            uvel,       vvel,     &
-                            bxfix,      byfix,    &
-                            bx,         by,       &
+      subroutine calc_bvec (nx_block, ny_block, &
+                            icellu  ,           &
+                            indxui  , indxuj  , &
+                            stPr    , uarear  , &
+                            waterx  , watery  , &
+                            uvel    , vvel    , &
+                            bxfix   , byfix   , &
+                            bx      , by      , &
                             vrel)
 
       integer (kind=int_kind), intent(in) :: &
@@ -1918,12 +1922,12 @@ end subroutine calc_bvec
 ! Au = A(u,v)_[x] * uvel (x components of  A(u,v) * (u,v))
 ! Av = A(u,v)_[y] * vvel (y components of  A(u,v) * (u,v))
 
-      subroutine residual_vec (nx_block,   ny_block, &
-                               icellu,               &
-                               indxui,     indxuj,   &
-                               bx,         by,       &
-                               Au,         Av,       &
-                               Fx,         Fy,       &
+      subroutine residual_vec (nx_block   , ny_block, &
+                               icellu     ,           &
+                               indxui     , indxuj  , &
+                               bx         , by      , &
+                               Au         , Av      , &
+                               Fx         , Fy      , &
                                sum_squared)
 
       integer (kind=int_kind), intent(in) :: &
@@ -1980,14 +1984,14 @@ end subroutine residual_vec
 ! Form the diagonal of the matrix A(u,v) (first part of the computation)
 ! Part 1: compute the contributions of the diagonal to the rheology term
 
-      subroutine formDiag_step1  (nx_block,   ny_block,   &
-                                  icellu,                 &
-                                  indxui,     indxuj,     &
-                                  dxt,        dyt,        &
-                                  dxhy,       dyhx,       &
-                                  cxp,        cyp,        &
-                                  cxm,        cym,        &
-                                  zetaD,      Dstr)
+      subroutine formDiag_step1  (nx_block, ny_block, &
+                                  icellu  ,           &
+                                  indxui  , indxuj  , &
+                                  dxt     , dyt     , &
+                                  dxhy    , dyhx    , &
+                                  cxp     , cyp     , &
+                                  cxm     , cym     , &
+                                  zetaD   , Dstr)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -2354,13 +2358,13 @@ end subroutine formDiag_step1
 ! Form the diagonal of the matrix A(u,v) (second part of the computation)
 ! Part 2: compute diagonal
 
-      subroutine formDiag_step2 (nx_block,   ny_block, &
-                                 icellu,               &
-                                 indxui,     indxuj,   &
-                                 Dstr,       vrel,     &
-                                 umassdti,             &
-                                 uarear,     Cb,       &
-                                 diagx,      diagy)
+      subroutine formDiag_step2 (nx_block, ny_block, &
+                                 icellu  ,           &
+                                 indxui  , indxuj  , &
+                                 Dstr    , vrel    , &
+                                 umassdti,           &
+                                 uarear  , Cb      , &
+                                 diagx   , diagy)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -2434,10 +2438,10 @@ end subroutine formDiag_step2
 
 ! Compute squared l^2 norm of a grid function (tpu,tpv)
 
-      subroutine calc_L2norm_squared (nx_block,   ny_block, &
-                                      icellu,               &
-                                      indxui,     indxuj,   &
-                                      tpu,        tpv,      &
+      subroutine calc_L2norm_squared (nx_block, ny_block, &
+                                      icellu  ,           &
+                                      indxui  , indxuj  , &
+                                      tpu     , tpv     , &
                                       L2norm)
 
       integer (kind=int_kind), intent(in) :: &
@@ -2482,10 +2486,11 @@ end subroutine calc_L2norm_squared
 ! Convert a grid function (tpu,tpv) to a one dimensional vector
 ! to be passed to the legacy FGMRES driver
 
-      subroutine arrays_to_vec (nx_block, ny_block, nblocks, &
-                                max_blocks, icellu,   ntot,  &
-                                indxui,   indxuj,            &
-                                tpu,      tpv,               &
+      subroutine arrays_to_vec (nx_block, ny_block  , &
+                                nblocks , max_blocks, &
+                                icellu  , ntot      , &
+                                indxui  , indxuj    , &
+                                tpu     , tpv       , &
                                 outvec)
 
       integer (kind=int_kind), intent(in) :: &
@@ -2540,11 +2545,12 @@ end subroutine arrays_to_vec
 ! Convert one dimensional vector received from the legacy FGMRES driver
 ! to a grid function (tpu,tpv)
 
-      subroutine vec_to_arrays (nx_block, ny_block, nblocks, &
-                                max_blocks, icellu,   ntot,  &
-                                indxui,   indxuj,            &
-                                invec,                       &
-                                tpu,      tpv)
+      subroutine vec_to_arrays (nx_block, ny_block  , &
+                                nblocks , max_blocks, &
+                                icellu  , ntot      , &
+                                indxui  , indxuj    , &
+                                invec   ,             &
+                                tpu     , tpv)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -2653,16 +2659,16 @@ end subroutine qr_delete
 !
 ! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
 
-      subroutine fgmres (zetaD,               &
-                         Cb,         vrel,    &
-                         umassdti,            &
-                         halo_info_mask,      &
-                         bx,         by,      &
-                         diagx,      diagy,   &
+      subroutine fgmres (zetaD    ,           &
+                         Cb       , vrel    , &
+                         umassdti ,           &
+                         halo_info_mask     , &
+                         bx       , by      , &
+                         diagx    , diagy   , &
                          tolerance, maxinner, &
-                         maxouter,            &
-                         solx,       soly,    &
-                         nbiter,     conv)
+                         maxouter ,           &
+                         solx     , soly    , &
+                         nbiter   , conv)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
@@ -2769,7 +2775,7 @@ subroutine fgmres (zetaD,               &
       
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
-         call matvec (nx_block               , ny_block,              &
+         call matvec (nx_block               , ny_block             , &
                       icellu         (iblk)  , icellt         (iblk), &
                       indxui       (:,iblk)  , indxuj       (:,iblk), &
                       indxti       (:,iblk)  , indxtj       (:,iblk), &
@@ -2785,8 +2791,8 @@ subroutine fgmres (zetaD,               &
                       workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
          call residual_vec (nx_block             , ny_block             , &
                             icellu         (iblk),                        &
-                            indxui       (:,iblk), indxuj    (:,iblk)   , &
-                            bx         (:,:,iblk), by      (:,:,iblk)   , &
+                            indxui       (:,iblk), indxuj       (:,iblk), &
+                            bx         (:,:,iblk), by         (:,:,iblk), &
                             workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
                             arnoldi_basis_x (:,:,iblk, 1),                &
                             arnoldi_basis_y (:,:,iblk, 1))
@@ -2798,11 +2804,11 @@ subroutine fgmres (zetaD,               &
          ! Compute norm of initial residual
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
-            call calc_L2norm_squared(nx_block,        ny_block,        &
-                                     icellu   (iblk),                  &
-                                     indxui (:,iblk), indxuj(:, iblk), &
-                                     arnoldi_basis_x(:,:,iblk, 1),     &
-                                     arnoldi_basis_y(:,:,iblk, 1),     &
+            call calc_L2norm_squared(nx_block       , ny_block      , &
+                                     icellu   (iblk),                 &
+                                     indxui (:,iblk), indxuj(:,iblk), &
+                                     arnoldi_basis_x(:,:,iblk, 1)   , &
+                                     arnoldi_basis_y(:,:,iblk, 1)   , &
                                      norm_squared(iblk))
 
          enddo
@@ -2853,12 +2859,12 @@ subroutine fgmres (zetaD,               &
             initer = initer + 1
             nextit = initer + 1
             ! precondition the current Arnoldi vector
-            call precondition(zetaD,                         &
-                              Cb,           vrel,            &
-                              umassdti,                      &
+            call precondition(zetaD       ,                  &
+                              Cb          , vrel           , &
+                              umassdti    ,                  &
                               arnoldi_basis_x(:,:,:,initer), &
                               arnoldi_basis_y(:,:,:,initer), &
-                              diagx,        diagy,           &
+                              diagx       , diagy          , &
                               precond_type,                  &
                               workspace_x , workspace_y)
             ! !phb DESCRIBE ww
@@ -2870,7 +2876,7 @@ subroutine fgmres (zetaD,               &
                                     halo_info_mask)
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
-               call matvec (nx_block               , ny_block,              &
+               call matvec (nx_block               , ny_block             , &
                             icellu         (iblk)  , icellt         (iblk), &
                             indxui       (:,iblk)  , indxuj       (:,iblk), &
                             indxti       (:,iblk)  , indxtj       (:,iblk), &
@@ -3047,15 +3053,15 @@ end subroutine fgmres
 !
 ! authors: Stéphane Gaudreault, Abdessamad Qaddouri, Philippe Blain, ECCC
 
-      subroutine pgmres (zetaD,                &
-                         Cb,         vrel,     &
-                         umassdti,             &
-                         bx,         by,       &
-                         diagx,      diagy,    &
-                         tolerance,  maxinner, &
-                         maxouter,             &
-                         solx,       soly,     &
-                         nbiter,     conv)
+      subroutine pgmres (zetaD    ,           &
+                         Cb       , vrel    , &
+                         umassdti ,           &
+                         bx       , by      , &
+                         diagx    , diagy   , &
+                         tolerance, maxinner, &
+                         maxouter ,           &
+                         solx     , soly    , &
+                         nbiter   , conv)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
@@ -3159,7 +3165,7 @@ subroutine pgmres (zetaD,                &
       
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
-         call matvec (nx_block               , ny_block,              &
+         call matvec (nx_block               , ny_block             , &
                       icellu         (iblk)  , icellt         (iblk), &
                       indxui       (:,iblk)  , indxuj       (:,iblk), &
                       indxti       (:,iblk)  , indxtj       (:,iblk), &
@@ -3175,8 +3181,8 @@ subroutine pgmres (zetaD,                &
                       workspace_x(:,:,iblk)  , workspace_y(:,:,iblk))
          call residual_vec (nx_block             , ny_block             , &
                             icellu         (iblk),                        &
-                            indxui       (:,iblk), indxuj    (:,iblk)   , &
-                            bx         (:,:,iblk), by      (:,:,iblk)   , &
+                            indxui       (:,iblk), indxuj       (:,iblk), &
+                            bx         (:,:,iblk), by         (:,:,iblk), &
                             workspace_x(:,:,iblk), workspace_y(:,:,iblk), &
                             arnoldi_basis_x (:,:,iblk, 1),                &
                             arnoldi_basis_y (:,:,iblk, 1))
@@ -3188,7 +3194,7 @@ subroutine pgmres (zetaD,                &
          ! Compute norm of initial residual
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
-            call calc_L2norm_squared(nx_block,        ny_block,        &
+            call calc_L2norm_squared(nx_block       , ny_block       , &
                                      icellu   (iblk),                  &
                                      indxui (:,iblk), indxuj(:, iblk), &
                                      arnoldi_basis_x(:,:,iblk, 1),     &
@@ -3244,19 +3250,19 @@ subroutine pgmres (zetaD,                &
             nextit = initer + 1
             
             ! precondition the current Arnoldi vector
-            call precondition(zetaD,                         &
-                              Cb,           vrel,            &
-                              umassdti,                      &
+            call precondition(zetaD       ,                  &
+                              Cb          , vrel           , &
+                              umassdti    ,                  &
                               arnoldi_basis_x(:,:,:,initer), &
                               arnoldi_basis_y(:,:,:,initer), &
-                              diagx,        diagy,           &
+                              diagx       , diagy          , &
                               precond_type,                  &
                               workspace_x , workspace_y)
             
             ! !phb haloUpdate would go here (for workspace_x, _y)
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
-               call matvec (nx_block               , ny_block,              &
+               call matvec (nx_block               , ny_block             , &
                             icellu         (iblk)  , icellt         (iblk), &
                             indxui       (:,iblk)  , indxuj       (:,iblk), &
                             indxti       (:,iblk)  , indxtj       (:,iblk), &
@@ -3283,7 +3289,7 @@ subroutine pgmres (zetaD,                &
             ! Compute norm of new Arnoldi vector and update Hessenberg matrix
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
-               call calc_L2norm_squared(nx_block      ,  ny_block        , &
+               call calc_L2norm_squared(nx_block       , ny_block        , &
                                         icellu   (iblk),                   &
                                         indxui (:,iblk), indxuj(:, iblk) , &
                                         arnoldi_basis_x(:,:,iblk, nextit), &
@@ -3380,11 +3386,11 @@ subroutine pgmres (zetaD,                &
          end do
          
          ! Call preconditioner
-         call precondition(zetaD,                     &
-                           Cb,           vrel,        &
-                           umassdti,                  &
+         call precondition(zetaD       ,              &
+                           Cb          , vrel       , &
+                           umassdti    ,              &
                            workspace_x , workspace_y, &
-                           diagx,        diagy,       &
+                           diagx       , diagy      , &
                            precond_type,              &
                            workspace_x , workspace_y)
          
@@ -3446,13 +3452,13 @@ end subroutine pgmres
 !
 ! authors: Philippe Blain, ECCC
 
-      subroutine precondition(zetaD,                &
-                              Cb,         vrel,     &
-                              umassdti,             &
-                              vx,         vy,       &
-                              diagx,      diagy,    &
-                              precond_type,         &
-                              wx,         wy)
+      subroutine precondition(zetaD       ,        &
+                              Cb          , vrel , &
+                              umassdti    ,        &
+                              vx          , vy   , &
+                              diagx       , diagy, &
+                              precond_type,        &
+                              wx          , wy)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
@@ -3523,15 +3529,15 @@ subroutine precondition(zetaD,                &
          tolerance = reltol_pgmres
          maxinner = im_pgmres
          maxouter = maxits_pgmres
-         call pgmres (zetaD,                &
-                      Cb,         vrel,     &
-                      umassdti,             &
-                      vx,         vy,       &
-                      diagx,      diagy,    &
-                      tolerance,  maxinner, &
-                      maxouter,             &
-                      wx,         wy,       &
-                      nbiter,     conv)
+         call pgmres (zetaD,               &
+                      Cb       , vrel    , &
+                      umassdti ,           &
+                      vx       , vy      , &
+                      diagx    , diagy   , &
+                      tolerance, maxinner, &
+                      maxouter ,           &
+                      wx       , wy      , &
+                      nbiter   , conv)
       else
          call abort_ice(error_message='wrong preconditioner in ' // subname, &
             file=__FILE__, line=__LINE__)

From 7355c580fb5290a7f0c1d49ba0802d495c1fdbc6 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Jul 2020 15:56:16 -0400
Subject: [PATCH 178/196] ice_dyn_vp: calc_bvec: remove unneeded arguments

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index a9eab9da5..ec5a9f0da 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -831,7 +831,6 @@ subroutine anderson_solver (icellt  , icellu, &
                             indxui     (:,iblk), indxuj    (:,iblk), &
                             stPrtmp     (:,:,:), uarear  (:,:,iblk), &
                             waterx   (:,:,iblk), watery  (:,:,iblk), &
-                            ulin     (:,:,iblk), vlin    (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk), &
                             vrel     (:,:,iblk))
@@ -1840,7 +1839,6 @@ subroutine calc_bvec (nx_block, ny_block, &
                             indxui  , indxuj  , &
                             stPr    , uarear  , &
                             waterx  , watery  , &
-                            uvel    , vvel    , &
                             bxfix   , byfix   , &
                             bx      , by      , &
                             vrel)
@@ -1854,8 +1852,6 @@ subroutine calc_bvec (nx_block, ny_block, &
          indxuj      ! compressed index in j-direction
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(in) :: &
-         uvel    , & ! x-component of velocity (m/s)
-         vvel    , & ! y-component of velocity (m/s)
          uarear  , & ! 1/uarea
          waterx  , & ! for ocean stress calculation, x (m/s)
          watery  , & ! for ocean stress calculation, y (m/s)
@@ -1876,7 +1872,6 @@ subroutine calc_bvec (nx_block, ny_block, &
          i, j, ij
 
       real (kind=dbl_kind) :: &
-         utp, vtp          , & ! utp = uvel, vtp = vvel !jfl needed?
          taux, tauy        , & ! part of ocean stress term
          strintx, strinty  , & ! divergence of the internal stress tensor (only Pr part)
          rhow                  !
@@ -1896,9 +1891,6 @@ subroutine calc_bvec (nx_block, ny_block, &
          i = indxui(ij)
          j = indxuj(ij)
 
-         utp = uvel(i,j)
-         vtp = vvel(i,j)
-
          ! ice/ocean stress
          taux = vrel(i,j)*waterx(i,j) ! NOTE this is not the entire
          tauy = vrel(i,j)*watery(i,j) ! ocn stress term

From 210bf6271b5054559385897d6357fb9e27b4f307 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Jul 2020 18:23:37 -0400
Subject: [PATCH 179/196] ice_dyn_vp: clean up and clarify code comments

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 141 +++++++++++-----------
 1 file changed, 69 insertions(+), 72 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index ec5a9f0da..a1f2a9efe 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -215,7 +215,7 @@ subroutine imp_solver (dt)
       ! local variables
 
       integer (kind=int_kind) :: &
-         ntot           , & ! size of problem for fgmres (for given cpu)
+         ntot           , & ! size of problem for Anderson
          iblk           , & ! block index
          ilo,ihi,jlo,jhi, & ! beginning and end of physical domain
          i, j, ij
@@ -229,8 +229,8 @@ subroutine imp_solver (dt)
          bxfix    , & ! part of bx that is constant during Picard
          byfix    , & ! part of by that is constant during Picard
          Cb       , & ! seabed stress coefficient
-         fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
-         fpresy   , & ! y fixed point residual vector, fy = vvel - vprev_k
+         fpresx   , & ! fixed point residual vector, x components: fx = uvel - uprev_k
+         fpresy   , & ! fixed point residual vector, y components: fy = vvel - vprev_k
          aiu      , & ! ice fraction on u-grid
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
@@ -452,7 +452,7 @@ subroutine imp_solver (dt)
       endif
       
       !-----------------------------------------------------------------
-      ! calc size of problem (ntot) and allocate arrays and vectors
+      ! calc size of problem (ntot) and allocate solution vector
       !-----------------------------------------------------------------
       
       ntot = 0
@@ -673,7 +673,7 @@ subroutine anderson_solver (icellt  , icellu, &
       use ice_timers, only: ice_timer_start, ice_timer_stop
 
       integer (kind=int_kind), intent(in) :: &
-         ntot         ! size of problem for fgmres (for given cpu)
+         ntot         ! size of problem for Anderson
 
       integer (kind=int_kind), dimension(max_blocks), intent(in) :: &
          icellt   , & ! no. of cells where icetmask = 1
@@ -700,8 +700,8 @@ subroutine anderson_solver (icellt  , icellu, &
          halo_info_mask !  ghost cell update info for masked halo
 
       real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
-         fpresx   , & ! x fixed point residual vector, fx = uvel - uprev_k
-         fpresy   , & ! y fixed point residual vector, fy = vvel - vprev_k
+         fpresx   , & ! fixed point residual vector, x components: fx = uvel - uprev_k
+         fpresy   , & ! fixed point residual vector, y components: fy = vvel - vprev_k
          Cb           ! seabed stress coefficient
 
       real (kind=dbl_kind), dimension (ntot), intent(inout) :: &
@@ -737,11 +737,11 @@ subroutine anderson_solver (icellt  , icellu, &
          soly         ! solution of FGMRES (y components)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
-         stPrtmp,   & ! doit etre (nx_block,ny_block,max_blocks,8)???? PAs besoin des 2? reuse?
-         Dstrtmp
+         stPrtmp,   & ! x,y-derivatives of the replacement pressure
+         Dstrtmp      ! contributions of the rhelogy term to the diagonal
 
       real (kind=dbl_kind), dimension (max_blocks) :: &
-         L2norm       ! to compute l^2 norm of grid function
+         L2norm       ! array used to compute l^2 norm of grid function
 
       real (kind=dbl_kind), dimension (ntot) :: &
          res        , & ! current residual
@@ -769,7 +769,7 @@ subroutine anderson_solver (icellt  , icellu, &
          prog_norm   , & ! norm of difference between current and previous solution
          nlres_norm  , & ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
 #ifdef CICE_USE_LAPACK
-         ddot, dnrm2 , & ! BLAS functions
+         ddot, dnrm2 , & ! external BLAS functions
 #endif
          conv            ! needed for FGMRES !phb keep ?
 
@@ -790,7 +790,7 @@ subroutine anderson_solver (icellt  , icellu, &
       do it_nl = 0, maxits_nonlin        ! nonlinear iteration loop
          ! Compute quantities needed for computing PDE residual and g(x) (fixed point map)
          !-----------------------------------------------------------------
-         ! Calc zetaD, Pr, Cb and vrel = f(uprev_k, vprev_k)
+         ! Calc zetaD, dPr/dx, dPr/dy, Cb and vrel = f(uprev_k, vprev_k)
          !-----------------------------------------------------------------
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
@@ -888,11 +888,12 @@ subroutine anderson_solver (icellt  , icellu, &
          if (fpfunc_andacc == 1) then
             ! g_1(x) = FGMRES(A(x), b(x))
             
-            ! Prepare precond matrix
+            ! Prepare diagonal for preconditioner
             if (precond == 'diag' .or. precond == 'pgmres') then
                !$OMP PARALLEL DO PRIVATE(iblk)
                do iblk = 1, nblocks
-                  call formDiag_step1 (nx_block             , ny_block      , & ! D term due to rheology
+                  ! first compute diagonal contributions due to rheology term
+                  call formDiag_step1 (nx_block             , ny_block      , &
                                        icellu       (iblk)  ,                 &
                                        indxui     (:,iblk)  , indxuj(:,iblk), &
                                        dxt      (:,:,iblk)  , dyt (:,:,iblk), &
@@ -900,6 +901,7 @@ subroutine anderson_solver (icellt  , icellu, &
                                        cxp      (:,:,iblk)  , cyp (:,:,iblk), &
                                        cxm      (:,:,iblk)  , cym (:,:,iblk), &
                                        zetaD    (:,:,iblk,:), Dstrtmp (:,:,:))
+                  ! second compute the full diagonal
                   call formDiag_step2 (nx_block           , ny_block          , &
                                        icellu       (iblk),                     &
                                        indxui     (:,iblk), indxuj    (:,iblk), &
@@ -922,7 +924,7 @@ subroutine anderson_solver (icellt  , icellu, &
                          maxits_fgmres ,            &
                          solx          , soly     , &
                          nbiter        , conv)
-            ! Put FGMRES solution solx,soly in fpfunc vector (needed for anderson)
+            ! Put FGMRES solution solx,soly in fpfunc vector (needed for Anderson)
             call arrays_to_vec (nx_block       , ny_block     , &
                                 nblocks        , max_blocks   , &
                                 icellu      (:), ntot         , &
@@ -935,7 +937,7 @@ subroutine anderson_solver (icellt  , icellu, &
                file=__FILE__, line=__LINE__)
          endif
 
-         ! Compute fixed point residual
+         ! Compute fixed point residual f(x) = g(x) - x
          res = fpfunc - sol
 #ifdef CICE_USE_LAPACK
          fpres_norm = global_sum(dnrm2(size(res), res, inc)**2, distrb_info)
@@ -1023,17 +1025,17 @@ subroutine anderson_solver (icellt  , icellu, &
                   R(res_num, res_num) = dnrm2(size(res_diff) ,res_diff, inc)
                   Q(:,res_num) = res_diff / R(res_num, res_num)
                endif
-               ! phb: here, drop more columns to improve conditioning
+               ! TODO: here, drop more columns to improve conditioning
                ! if (droptol) then
                
                ! endif
                ! Solve least square problem for coefficients
                ! 1. Compute rhs_tri = Q^T * res
                call dgemv ('t', size(Q,1), res_num, c1, Q(:,1:res_num), size(Q,1), res, inc, c0, rhs_tri, inc)
-               ! 2. Solve R*coeffs = rhs_tri, puts result in rhs_tri
+               ! 2. Solve R*coeffs = rhs_tri, put result in rhs_tri
                call dtrsv ('u', 'n', 'n', res_num, R(1:res_num,1:res_num), res_num, rhs_tri, inc)
                coeffs = rhs_tri
-               ! Update approximate solution: x = fpfunc - G_diff*coeffs, puts result in fpfunc
+               ! Update approximate solution: x = fpfunc - G_diff*coeffs, put result in fpfunc
                call dgemv ('n', size(G_diff,1), res_num, -c1, G_diff(:,1:res_num), size(G_diff,1), coeffs, inc, c1, fpfunc, inc)
                sol = fpfunc
                ! Apply damping
@@ -1065,10 +1067,10 @@ subroutine anderson_solver (icellt  , icellu, &
                              sol      (:),               &
                              uvel (:,:,:), vvel (:,:,:))
          
-         ! phb NOT SURE IF THIS HALO UPDATE IS ACTUALLY NEEDED
+         ! Do halo update so that halo cells contain up to date info for advection
          call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
          
-         ! Compute fixed point residual norm
+         ! Compute "progress" residual norm
          !$OMP PARALLEL DO PRIVATE(iblk)
          do iblk = 1, nblocks
             fpresx(:,:,iblk) = uvel(:,:,iblk) - uprev_k(:,:,iblk)
@@ -1092,7 +1094,7 @@ end subroutine anderson_solver
 
 !=======================================================================
 
-! Computes the viscous coefficients (in fact zetaD=2*zeta) and dPr/dx.
+! Computes the viscous coefficients (in fact zetaD=2*zeta) and dPr/dx, dPr/dy
 
       subroutine calc_zeta_Pr  (nx_block, ny_block, &
                                 icellt  ,           &
@@ -1134,7 +1136,7 @@ subroutine calc_zeta_Pr  (nx_block, ny_block, &
          zetaD          ! 2*zeta
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), intent(out) :: &
-         stPr          ! stress Pr combinations
+         stPr          ! stress combinations from replacement pressure
 
       ! local variables
 
@@ -1279,7 +1281,7 @@ end subroutine calc_zeta_Pr
 
 !=======================================================================
 
-! Computes the VP stress (as diagnostic)
+! Computes the VP stresses (as diagnostic)
 
       subroutine stress_vp (nx_block  , ny_block  , &
                             icellt    ,             &
@@ -1386,7 +1388,7 @@ end subroutine stress_vp
 
 !=======================================================================
 
-! Compute vrel and basal stress coefficients
+! Compute vrel and seabed stress coefficients
 
       subroutine calc_vrel_Cb (nx_block, ny_block, &
                                icellu  , Cw      , &
@@ -1430,10 +1432,6 @@ subroutine calc_vrel_Cb (nx_block, ny_block, &
          rhow                  !
 
       character(len=*), parameter :: subname = '(calc_vrel_Cb)'
-      
-      !-----------------------------------------------------------------
-      ! integrate the momentum equation
-      !-----------------------------------------------------------------
 
       call icepack_query_parameters(rhow_out=rhow)
       call icepack_warnings_flush(nu_diag)
@@ -1448,7 +1446,7 @@ subroutine calc_vrel_Cb (nx_block, ny_block, &
          vrel(i,j) = aiu(i,j)*rhow*Cw(i,j)*sqrt((uocn(i,j) - uvel(i,j))**2 + &
                                                 (vocn(i,j) - vvel(i,j))**2)  ! m/s
       
-         Cb(i,j)  = Tbu(i,j) / (sqrt(uvel(i,j)**2 + vvel(i,j)**2) + u0) ! for basal stress
+         Cb(i,j)  = Tbu(i,j) / (sqrt(uvel(i,j)**2 + vvel(i,j)**2) + u0) ! for seabed stress
       enddo                     ! ij
 
       end subroutine calc_vrel_Cb
@@ -1569,7 +1567,7 @@ subroutine matvec (nx_block, ny_block, &
       real (kind=dbl_kind) :: &
          utp, vtp          , & ! utp = uvel, vtp = vvel
          ccaimp,ccb        , & ! intermediate variables
-         strintx, strinty
+         strintx, strinty      ! divergence of the internal stress tensor
 
       real (kind=dbl_kind) :: &
         divune, divunw, divuse, divusw            , & ! divergence
@@ -1629,7 +1627,7 @@ subroutine matvec (nx_block, ny_block, &
       !-----------------------------------------------------------------
       ! the stresses                            ! kg/s^2
       ! (1) northeast, (2) northwest, (3) southwest, (4) southeast
-      ! JFL commented part of stressp is for the rep pressure Pr
+      ! NOTE: commented part of stressp is from the replacement pressure Pr
       !-----------------------------------------------------------------
 
          stressp_1 = zetaD(i,j,1)*(divune*(c1+Ktens))! - Deltane*(c1-Ktens))
@@ -1814,10 +1812,6 @@ subroutine calc_bfix  (nx_block , ny_block , &
 
       character(len=*), parameter :: subname = '(calc_bfix)'
 
-      !-----------------------------------------------------------------
-      ! Define variables for momentum equation
-      !-----------------------------------------------------------------
-
       do ij = 1, icellu
          i = indxui(ij)
          j = indxuj(ij)
@@ -1873,7 +1867,7 @@ subroutine calc_bvec (nx_block, ny_block, &
 
       real (kind=dbl_kind) :: &
          taux, tauy        , & ! part of ocean stress term
-         strintx, strinty  , & ! divergence of the internal stress tensor (only Pr part)
+         strintx, strinty  , & ! divergence of the internal stress tensor (only Pr contributions)
          rhow                  !
 
       character(len=*), parameter :: subname = '(calc_bvec)'
@@ -1895,7 +1889,7 @@ subroutine calc_bvec (nx_block, ny_block, &
          taux = vrel(i,j)*waterx(i,j) ! NOTE this is not the entire
          tauy = vrel(i,j)*watery(i,j) ! ocn stress term
          
-         ! divergence of the internal stress tensor (only Pr part, i.e. dPr/dx)
+         ! divergence of the internal stress tensor (only Pr part, i.e. dPr/dx, dPr/dy)
          strintx = uarear(i,j)* &
              (stPr(i,j,1) + stPr(i+1,j,2) + stPr(i,j+1,3) + stPr(i+1,j+1,4))
          strinty = uarear(i,j)* &
@@ -1974,7 +1968,7 @@ end subroutine residual_vec
 !=======================================================================
 
 ! Form the diagonal of the matrix A(u,v) (first part of the computation)
-! Part 1: compute the contributions of the diagonal to the rheology term
+! Part 1: compute the contributions to the diagonal from the rheology term
 
       subroutine formDiag_step1  (nx_block, ny_block, &
                                   icellu  ,           &
@@ -2007,7 +2001,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
          zetaD          ! 2*zeta
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), intent(out) :: &
-         Dstr          ! intermediate calc for diagonal components of matrix A associated
+         Dstr          ! intermediate value for diagonal components of matrix A associated
                        ! with rheology term
 
       ! local variables
@@ -2019,7 +2013,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
         divune, divunw, divuse, divusw            , & ! divergence
         tensionne, tensionnw, tensionse, tensionsw, & ! tension
         shearne, shearnw, shearse, shearsw        , & ! shearing
-        uij,ui1j,uij1,ui1j1,vij,vi1j,vij1,vi1j1   , & ! c0 or c1
+        uij,ui1j,uij1,ui1j1,vij,vi1j,vij1,vi1j1   , & ! == c0 or c1
         stressp_1, stressp_2, stressp_3, stressp_4, &
         stressm_1, stressm_2, stressm_3, stressm_4, &
         stress12_1,stress12_2,stress12_3,stress12_4,&
@@ -2043,8 +2037,11 @@ subroutine formDiag_step1  (nx_block, ny_block, &
 !cdir nodep      !NEC
 !ocl novrec      !Fujitsu
 
-      Dstr(:,:,:) = c0 ! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8
-                       ! come from the surrounding T cells but are all refrerenced to the i,j (u point)
+      Dstr(:,:,:) = c0
+      
+      ! Be careful: Dstr contains 4 terms for u and 4 terms for v.
+      ! These 8 terms come from the surrounding T cells but are all
+      ! refrerenced to the i,j (u point) :
       
       ! Dstr(i,j,1) corresponds to str(i,j,1)
       ! Dstr(i,j,2) corresponds to str(i+1,j,2)
@@ -2386,7 +2383,7 @@ subroutine formDiag_step2 (nx_block, ny_block, &
 
       real (kind=dbl_kind) :: &
          ccaimp             , & ! intermediate variables
-         strintx, strinty
+         strintx, strinty       ! diagonal contributions to the divergence
 
       character(len=*), parameter :: subname = '(formDiag_step2)'
 
@@ -2397,8 +2394,9 @@ subroutine formDiag_step2 (nx_block, ny_block, &
       strintx = c0
       strinty = c0
       
-      ! BE careful: Dstr contains 4 terms for u and 4 terms for v. These 8
-      ! come from the surrounding T cells but are all refrerenced to the i,j (u point)
+      ! Be careful: Dstr contains 4 terms for u and 4 terms for v.
+      ! These 8 terms come from the surrounding T cells but are all
+      ! refrerenced to the i,j (u point) :
 
       ! Dstr(i,j,1) corresponds to str(i,j,1)
       ! Dstr(i,j,2) corresponds to str(i+1,j,2)
@@ -2476,7 +2474,6 @@ end subroutine calc_L2norm_squared
 !=======================================================================
 
 ! Convert a grid function (tpu,tpv) to a one dimensional vector
-! to be passed to the legacy FGMRES driver
 
       subroutine arrays_to_vec (nx_block, ny_block  , &
                                 nblocks , max_blocks, &
@@ -2489,7 +2486,7 @@ subroutine arrays_to_vec (nx_block, ny_block  , &
          nx_block, ny_block, & ! block dimensions
          nblocks,            & ! nb of blocks
          max_blocks,         & ! max nb of blocks
-         ntot                  ! size of problem for fgmres
+         ntot                  ! size of problem for Anderson
 
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
          icellu
@@ -2503,7 +2500,7 @@ subroutine arrays_to_vec (nx_block, ny_block  , &
          tpv         ! y-component of vector
 
       real (kind=dbl_kind), dimension (ntot), intent(out) :: &
-         outvec
+         outvec      ! output 1D vector
 
       ! local variables
 
@@ -2513,7 +2510,7 @@ subroutine arrays_to_vec (nx_block, ny_block  , &
       character(len=*), parameter :: subname = '(arrays_to_vec)'
 
       !-----------------------------------------------------------------
-      ! form vector (converts from max_blocks arrays to single vector
+      ! form vector (converts from max_blocks arrays to single vector)
       !-----------------------------------------------------------------
 
       outvec(:) = c0
@@ -2534,8 +2531,7 @@ end subroutine arrays_to_vec
 
 !=======================================================================
 
-! Convert one dimensional vector received from the legacy FGMRES driver
-! to a grid function (tpu,tpv)
+! Convert one dimensional vector to a grid function (tpu,tpv)
 
       subroutine vec_to_arrays (nx_block, ny_block  , &
                                 nblocks , max_blocks, &
@@ -2548,7 +2544,7 @@ subroutine vec_to_arrays (nx_block, ny_block  , &
          nx_block, ny_block, & ! block dimensions
          nblocks,            & ! nb of blocks
          max_blocks,         & ! max nb of blocks
-         ntot                  ! size of problem for fgmres
+         ntot                  ! size of problem for Anderson
 
       integer (kind=int_kind), dimension (max_blocks), intent(in) :: &
          icellu
@@ -2558,7 +2554,7 @@ subroutine vec_to_arrays (nx_block, ny_block  , &
          indxuj      ! compressed index in j-direction
          
       real (kind=dbl_kind), dimension (ntot), intent(in) :: &
-         invec
+         invec       ! input 1D vector
 
       real (kind=dbl_kind), dimension (nx_block,ny_block, max_blocks), intent(out) :: &
          tpu     , & ! x-component of vector
@@ -2572,7 +2568,7 @@ subroutine vec_to_arrays (nx_block, ny_block  , &
       character(len=*), parameter :: subname = '(vec_to_arrays)'
 
       !-----------------------------------------------------------------
-      ! form arrays (converts from vector to the max_blocks arrays
+      ! form arrays (converts from vector to the max_blocks arrays)
       !-----------------------------------------------------------------
 
       tpu(:,:,:) = c0
@@ -2594,7 +2590,7 @@ end subroutine vec_to_arrays
 
 !=======================================================================
 
-! Update Q and R factor after deletion of the 1st column of G_diff
+! Update Q and R factors after deletion of the 1st column of G_diff
 !
 ! author: P. Blain ECCC
 !
@@ -2714,8 +2710,8 @@ subroutine fgmres (zetaD    ,           &
          norm_squared   ! array to accumulate squared norm of grid function over blocks
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1) :: &
-         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
-         arnoldi_basis_y     ! arnoldi basis (y components)
+         arnoldi_basis_x , & ! Arnoldi basis (x components)
+         arnoldi_basis_y     ! Arnoldi basis (y components)
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner) :: &
          wwx             , & ! !phb FIND BETTER NAME (x components)
@@ -2812,7 +2808,7 @@ subroutine fgmres (zetaD    ,           &
                                               " fgmres_L2norm= ", norm_residual
          endif
          
-         ! Current guess is a good enough solution
+         ! Current guess is a good enough solution TODO: reactivate and test this
          ! if (norm_residual < tolerance) then
          !    return
          ! end if
@@ -3106,8 +3102,8 @@ subroutine pgmres (zetaD    ,           &
          norm_squared   ! array to accumulate squared norm of grid function over blocks
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1) :: &
-         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
-         arnoldi_basis_y     ! arnoldi basis (y components)
+         arnoldi_basis_x , & ! Arnoldi basis (x components)
+         arnoldi_basis_y     ! Arnoldi basis (y components)
 
       real (kind=dbl_kind) :: &
          norm_residual   , & ! current L^2 norm of residual vector
@@ -3145,7 +3141,7 @@ subroutine pgmres (zetaD    ,           &
       conv = c1
       norm_squared = c0
       precond_type = 'diag' ! Jacobi preconditioner
-      ortho_type = 'cgs' ! classical gram-schmidt
+      ortho_type = 'cgs' ! classical gram-schmidt TODO: try with MGS
       
       ! Cells with no ice should be zero-initialized
       workspace_x = c0
@@ -3251,7 +3247,9 @@ subroutine pgmres (zetaD    ,           &
                               precond_type,                  &
                               workspace_x , workspace_y)
             
-            ! !phb haloUpdate would go here (for workspace_x, _y)
+            ! NOTE: halo updates for (workspace_x, workspace_y)
+            ! are skipped here for efficiency since this is just a preconditioner
+            
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
                call matvec (nx_block               , ny_block             , &
@@ -3483,19 +3481,19 @@ subroutine precondition(zetaD       ,        &
          i, j        ! grid indices
 
       real (kind=dbl_kind) :: &
-         tolerance   ! Tolerance for pgmres
+         tolerance   ! Tolerance for PGMRES
 
       integer (kind=int_kind) :: &
-         maxinner    ! Restart parameter for pgmres
+         maxinner    ! Restart parameter for PGMRES
 
       integer (kind=int_kind) :: &
-         maxouter    ! Maximum number of outer iterations for pgmres
+         maxouter    ! Maximum number of outer iterations for PGMRES
 
       integer (kind=int_kind) :: &
-         nbiter      ! Total number of iteration pgmres performed
+         nbiter      ! Total number of iteration PGMRES performed
 
       real (kind=dbl_kind) :: &
-         conv        ! !phb DESCRIBE IF WE KEEP for pgmres
+         conv        ! !phb DESCRIBE IF WE KEEP for PGMRES
 
       character(len=*), parameter :: subname = '(precondition)'
 
@@ -3515,7 +3513,7 @@ subroutine precondition(zetaD       ,        &
          enddo
          !$OMP END PARALLEL DO
       elseif (precond_type == 'pgmres') then ! PGMRES (Jacobi-preconditioned GMRES)
-         ! Initialize preconditioned vector to 0 !phb try with wx = vx or vx/diagx
+         ! Initialize preconditioned vector to 0 ! TODO: try with wx = vx or vx/diagx
          wx = c0
          wy = c0
          tolerance = reltol_pgmres
@@ -3557,12 +3555,11 @@ subroutine orthogonalize(ortho_type     , initer         , &
          maxinner    ! Restart the method every maxinner inner iterations
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner+1), intent(inout) :: &
-         arnoldi_basis_x , & ! arnoldi basis (x components) !phb == vv
+         arnoldi_basis_x , & ! arnoldi basis (x components)
          arnoldi_basis_y     ! arnoldi basis (y components)
 
       real (kind=dbl_kind), dimension(maxinner+1, maxinner), intent(inout) :: &
          hessenberg        ! system matrix of the Hessenberg (least squares) system
-         !phb: removing this parameter and argument makes ifort error in the .i90 file
 
       ! local variables
 
@@ -3576,7 +3573,7 @@ subroutine orthogonalize(ortho_type     , initer         , &
          local_dot      ! local array value to accumulate dot product of grid function over blocks
 
       real (kind=dbl_kind), dimension(maxinner) :: &
-         dotprod_local     ! local array to accumulate several dot product computations
+         dotprod_local  ! local array to accumulate several dot product computations
 
       character(len=*), parameter :: subname = '(orthogonalize)'
 

From 5edf0fdee3b86eea2b17c4f7c614a9edba4266c2 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Jul 2020 18:24:46 -0400
Subject: [PATCH 180/196] ice_dyn_vp: matvec: remove unneeded local variable

---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index a1f2a9efe..054e2030c 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1565,7 +1565,6 @@ subroutine matvec (nx_block, ny_block, &
          str
 
       real (kind=dbl_kind) :: &
-         utp, vtp          , & ! utp = uvel, vtp = vvel
          ccaimp,ccb        , & ! intermediate variables
          strintx, strinty      ! divergence of the internal stress tensor
 
@@ -1754,9 +1753,6 @@ subroutine matvec (nx_block, ny_block, &
          i = indxui(ij)
          j = indxuj(ij)
 
-         utp = uvel(i,j)
-         vtp = vvel(i,j)
-
          ccaimp = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) ! kg/m^2 s
          
          ccb = fm(i,j) + sign(c1,fm(i,j)) * vrel(i,j) * sinw ! kg/m^2 s
@@ -1767,8 +1763,8 @@ subroutine matvec (nx_block, ny_block, &
          strinty = uarear(i,j)* &
              (str(i,j,5) + str(i,j+1,6) + str(i+1,j,7) + str(i+1,j+1,8))
 
-         Au(i,j) = ccaimp*utp - ccb*vtp - strintx
-         Av(i,j) = ccaimp*vtp + ccb*utp - strinty
+         Au(i,j) = ccaimp*uvel(i,j) - ccb*vvel(i,j) - strintx
+         Av(i,j) = ccaimp*vvel(i,j) + ccb*uvel(i,j) - strinty
       enddo ! ij - icellu
 
       end subroutine matvec

From 892040affc63e06bf810481e0ae8b6f6f4a3ed8c Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 16 Jul 2020 18:41:50 -0400
Subject: [PATCH 181/196] ice_dyn_vp: rename 'stPrtmp' to 'stress_Pr' and
 'Dstrtmp' to 'diag_rheo'

Also rename 'Dstr' to 'Drheo' in formDiag_step[12]
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 78 +++++++++++------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 054e2030c..98298c9c5 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -737,8 +737,8 @@ subroutine anderson_solver (icellt  , icellu, &
          soly         ! solution of FGMRES (y components)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
-         stPrtmp,   & ! x,y-derivatives of the replacement pressure
-         Dstrtmp      ! contributions of the rhelogy term to the diagonal
+         stress_Pr,   & ! x,y-derivatives of the replacement pressure
+         diag_rheo      ! contributions of the rhelogy term to the diagonal
 
       real (kind=dbl_kind), dimension (max_blocks) :: &
          L2norm       ! array used to compute l^2 norm of grid function
@@ -815,7 +815,7 @@ subroutine anderson_solver (icellt  , icellu, &
                                cxm      (:,:,iblk), cym     (:,:,iblk), &
                                tinyarea (:,:,iblk),                     &
                                strength (:,:,iblk), zetaD (:,:,iblk,:), &
-                               stPrtmp  (:,:,:))
+                               stress_Pr  (:,:,:))
             
             call calc_vrel_Cb (nx_block           , ny_block          , &
                                icellu       (iblk), Cdn_ocn (:,:,iblk), &
@@ -829,7 +829,7 @@ subroutine anderson_solver (icellt  , icellu, &
             call calc_bvec (nx_block           , ny_block          , &
                             icellu       (iblk),                     &
                             indxui     (:,iblk), indxuj    (:,iblk), &
-                            stPrtmp     (:,:,:), uarear  (:,:,iblk), &
+                            stress_Pr   (:,:,:), uarear  (:,:,iblk), &
                             waterx   (:,:,iblk), watery  (:,:,iblk), &
                             bxfix    (:,:,iblk), byfix   (:,:,iblk), &
                             bx       (:,:,iblk), by      (:,:,iblk), &
@@ -900,12 +900,12 @@ subroutine anderson_solver (icellt  , icellu, &
                                        dxhy     (:,:,iblk)  , dyhx(:,:,iblk), &
                                        cxp      (:,:,iblk)  , cyp (:,:,iblk), &
                                        cxm      (:,:,iblk)  , cym (:,:,iblk), &
-                                       zetaD    (:,:,iblk,:), Dstrtmp (:,:,:))
+                                       zetaD    (:,:,iblk,:), diag_rheo(:,:,:))
                   ! second compute the full diagonal
                   call formDiag_step2 (nx_block           , ny_block          , &
                                        icellu       (iblk),                     &
                                        indxui     (:,iblk), indxuj    (:,iblk), &
-                                       Dstrtmp     (:,:,:), vrel    (:,:,iblk), &
+                                       diag_rheo   (:,:,:), vrel    (:,:,iblk), &
                                        umassdti (:,:,iblk),                     &
                                        uarear   (:,:,iblk), Cb      (:,:,iblk), &
                                        diagx    (:,:,iblk), diagy   (:,:,iblk))
@@ -1973,7 +1973,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
                                   dxhy    , dyhx    , &
                                   cxp     , cyp     , &
                                   cxm     , cym     , &
-                                  zetaD   , Dstr)
+                                  zetaD   , Drheo)
 
       integer (kind=int_kind), intent(in) :: &
          nx_block, ny_block, & ! block dimensions
@@ -1997,7 +1997,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
          zetaD          ! 2*zeta
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), intent(out) :: &
-         Dstr          ! intermediate value for diagonal components of matrix A associated
+         Drheo          ! intermediate value for diagonal components of matrix A associated
                        ! with rheology term
 
       ! local variables
@@ -2033,20 +2033,20 @@ subroutine formDiag_step1  (nx_block, ny_block, &
 !cdir nodep      !NEC
 !ocl novrec      !Fujitsu
 
-      Dstr(:,:,:) = c0
+      Drheo(:,:,:) = c0
       
-      ! Be careful: Dstr contains 4 terms for u and 4 terms for v.
+      ! Be careful: Drheo contains 4 terms for u and 4 terms for v.
       ! These 8 terms come from the surrounding T cells but are all
       ! refrerenced to the i,j (u point) :
       
-      ! Dstr(i,j,1) corresponds to str(i,j,1)
-      ! Dstr(i,j,2) corresponds to str(i+1,j,2)
-      ! Dstr(i,j,3) corresponds to str(i,j+1,3)
-      ! Dstr(i,j,4) corresponds to str(i+1,j+1,4))
-      ! Dstr(i,j,5) corresponds to str(i,j,5)
-      ! Dstr(i,j,6) corresponds to str(i,j+1,6)
-      ! Dstr(i,j,7) corresponds to str(i+1,j,7)
-      ! Dstr(i,j,8) corresponds to str(i+1,j+1,8))
+      ! Drheo(i,j,1) corresponds to str(i,j,1)
+      ! Drheo(i,j,2) corresponds to str(i+1,j,2)
+      ! Drheo(i,j,3) corresponds to str(i,j+1,3)
+      ! Drheo(i,j,4) corresponds to str(i+1,j+1,4))
+      ! Drheo(i,j,5) corresponds to str(i,j,5)
+      ! Drheo(i,j,6) corresponds to str(i,j+1,6)
+      ! Drheo(i,j,7) corresponds to str(i+1,j,7)
+      ! Drheo(i,j,8) corresponds to str(i+1,j+1,8))
       
       do cc = 1, 8 ! 4 for u and 4 for v
       
@@ -2260,7 +2260,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
                strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
 
                ! northeast (i,j)
-               Dstr(iu,ju,1) = -strp_tmp - strm_tmp - str12ew &
+               Drheo(iu,ju,1) = -strp_tmp - strm_tmp - str12ew &
                   + dxhy(i,j)*(-csigpne + csigmne) + dyhx(i,j)*csig12ne
                
             elseif (cc == 2) then ! T cell i+1,j
@@ -2269,7 +2269,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
                strm_tmp  = p25*dyt(i,j)*(p333*ssigmn  + p166*ssigms)
                
                ! northwest (i+1,j)
-               Dstr(iu,ju,2) = strp_tmp + strm_tmp - str12we &
+               Drheo(iu,ju,2) = strp_tmp + strm_tmp - str12we &
                   + dxhy(i,j)*(-csigpnw + csigmnw) + dyhx(i,j)*csig12nw
 
             elseif (cc == 3) then ! T cell i,j+1
@@ -2278,7 +2278,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
                strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
 
                ! southeast (i,j+1)
-               Dstr(iu,ju,3) = -strp_tmp - strm_tmp + str12ew &
+               Drheo(iu,ju,3) = -strp_tmp - strm_tmp + str12ew &
                   + dxhy(i,j)*(-csigpse + csigmse) + dyhx(i,j)*csig12se
 
             elseif (cc == 4) then ! T cell i+1,j+1
@@ -2287,7 +2287,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
                strm_tmp  = p25*dyt(i,j)*(p333*ssigms  + p166*ssigmn)
                
                ! southwest (i+1,j+1)
-               Dstr(iu,ju,4) = strp_tmp + strm_tmp + str12we &
+               Drheo(iu,ju,4) = strp_tmp + strm_tmp + str12we &
                   + dxhy(i,j)*(-csigpsw + csigmsw) + dyhx(i,j)*csig12sw
 
          !-----------------------------------------------------------------
@@ -2300,7 +2300,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
                strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
 
                ! northeast (i,j)
-               Dstr(iu,ju,5) = -strp_tmp + strm_tmp - str12ns &
+               Drheo(iu,ju,5) = -strp_tmp + strm_tmp - str12ns &
                   - dyhx(i,j)*(csigpne + csigmne) + dxhy(i,j)*csig12ne
 
             elseif (cc == 6) then ! T cell i,j+1
@@ -2309,7 +2309,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
                strm_tmp  = p25*dxt(i,j)*(p333*ssigme  + p166*ssigmw)
                
                ! southeast (i,j+1)
-               Dstr(iu,ju,6) = strp_tmp - strm_tmp - str12sn &
+               Drheo(iu,ju,6) = strp_tmp - strm_tmp - str12sn &
                   - dyhx(i,j)*(csigpse + csigmse) + dxhy(i,j)*csig12se
 
             elseif (cc == 7) then ! T cell i,j+1
@@ -2318,7 +2318,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
                strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
 
                ! northwest (i+1,j)
-               Dstr(iu,ju,7) = -strp_tmp + strm_tmp + str12ns &
+               Drheo(iu,ju,7) = -strp_tmp + strm_tmp + str12ns &
                   - dyhx(i,j)*(csigpnw + csigmnw) + dxhy(i,j)*csig12nw
 
             elseif (cc == 8) then ! T cell i+1,j+1
@@ -2327,7 +2327,7 @@ subroutine formDiag_step1  (nx_block, ny_block, &
                strm_tmp  = p25*dxt(i,j)*(p333*ssigmw  + p166*ssigme)
                
                ! southwest (i+1,j+1)
-               Dstr(iu,ju,8) = strp_tmp - strm_tmp + str12sn &
+               Drheo(iu,ju,8) = strp_tmp - strm_tmp + str12sn &
                   - dyhx(i,j)*(csigpsw + csigmsw) + dxhy(i,j)*csig12sw
                
             endif
@@ -2346,7 +2346,7 @@ end subroutine formDiag_step1
       subroutine formDiag_step2 (nx_block, ny_block, &
                                  icellu  ,           &
                                  indxui  , indxuj  , &
-                                 Dstr    , vrel    , &
+                                 Drheo   , vrel    , &
                                  umassdti,           &
                                  uarear  , Cb      , &
                                  diagx   , diagy)
@@ -2366,7 +2366,7 @@ subroutine formDiag_step2 (nx_block, ny_block, &
          uarear      ! 1/uarea
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,8), intent(in) :: &
-         Dstr
+         Drheo
 
       real (kind=dbl_kind), dimension (nx_block,ny_block), intent(out) :: &
          diagx   , & ! Diagonal (x component) of the matrix A
@@ -2390,18 +2390,18 @@ subroutine formDiag_step2 (nx_block, ny_block, &
       strintx = c0
       strinty = c0
       
-      ! Be careful: Dstr contains 4 terms for u and 4 terms for v.
+      ! Be careful: Drheo contains 4 terms for u and 4 terms for v.
       ! These 8 terms come from the surrounding T cells but are all
       ! refrerenced to the i,j (u point) :
 
-      ! Dstr(i,j,1) corresponds to str(i,j,1)
-      ! Dstr(i,j,2) corresponds to str(i+1,j,2)
-      ! Dstr(i,j,3) corresponds to str(i,j+1,3)
-      ! Dstr(i,j,4) corresponds to str(i+1,j+1,4))
-      ! Dstr(i,j,5) corresponds to str(i,j,5)
-      ! Dstr(i,j,6) corresponds to str(i,j+1,6)
-      ! Dstr(i,j,7) corresponds to str(i+1,j,7)
-      ! Dstr(i,j,8) corresponds to str(i+1,j+1,8))
+      ! Drheo(i,j,1) corresponds to str(i,j,1)
+      ! Drheo(i,j,2) corresponds to str(i+1,j,2)
+      ! Drheo(i,j,3) corresponds to str(i,j+1,3)
+      ! Drheo(i,j,4) corresponds to str(i+1,j+1,4))
+      ! Drheo(i,j,5) corresponds to str(i,j,5)
+      ! Drheo(i,j,6) corresponds to str(i,j+1,6)
+      ! Drheo(i,j,7) corresponds to str(i+1,j,7)
+      ! Drheo(i,j,8) corresponds to str(i+1,j+1,8))
       
       do ij = 1, icellu
          i = indxui(ij)
@@ -2410,9 +2410,9 @@ subroutine formDiag_step2 (nx_block, ny_block, &
          ccaimp = umassdti(i,j) + vrel(i,j) * cosw + Cb(i,j) ! kg/m^2 s
          
          strintx = uarear(i,j)* &
-             (Dstr(i,j,1) + Dstr(i,j,2) + Dstr(i,j,3) + Dstr(i,j,4))
+             (Drheo(i,j,1) + Drheo(i,j,2) + Drheo(i,j,3) + Drheo(i,j,4))
          strinty = uarear(i,j)* &
-             (Dstr(i,j,5) + Dstr(i,j,6) + Dstr(i,j,7) + Dstr(i,j,8))
+             (Drheo(i,j,5) + Drheo(i,j,6) + Drheo(i,j,7) + Drheo(i,j,8))
 
          diagx(i,j) = ccaimp - strintx
          diagy(i,j) = ccaimp - strinty

From 88db326e02afa2c584a94d57958fccdf2b821248 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 17 Jul 2020 15:35:42 -0400
Subject: [PATCH 182/196] ice_dyn_vp: rename 'calc_zeta_Pr' to 'calc_zeta_dPr'

The subroutine 'calc_zeta_Pr' computes the derivatives of the
replacement pressure, so rename it to 'calc_zeta_dPr' to bring its name
in line with its computation.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 28 +++++++++++------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 98298c9c5..5cb6807ee 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -805,17 +805,17 @@ subroutine anderson_solver (icellt  , icellu, &
             uprev_k(:,:,iblk) = uvel(:,:,iblk)
             vprev_k(:,:,iblk) = vvel(:,:,iblk)
             
-            call calc_zeta_Pr (nx_block           , ny_block          , &
-                               icellt       (iblk),                     &
-                               indxti     (:,iblk), indxtj    (:,iblk), &
-                               uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
-                               dxt      (:,:,iblk), dyt     (:,:,iblk), &
-                               dxhy     (:,:,iblk), dyhx    (:,:,iblk), &
-                               cxp      (:,:,iblk), cyp     (:,:,iblk), &
-                               cxm      (:,:,iblk), cym     (:,:,iblk), &
-                               tinyarea (:,:,iblk),                     &
-                               strength (:,:,iblk), zetaD (:,:,iblk,:), &
-                               stress_Pr  (:,:,:))
+            call calc_zeta_dPr (nx_block           , ny_block          , &
+                                icellt       (iblk),                     &
+                                indxti     (:,iblk), indxtj    (:,iblk), &
+                                uprev_k  (:,:,iblk), vprev_k (:,:,iblk), &
+                                dxt      (:,:,iblk), dyt     (:,:,iblk), &
+                                dxhy     (:,:,iblk), dyhx    (:,:,iblk), &
+                                cxp      (:,:,iblk), cyp     (:,:,iblk), &
+                                cxm      (:,:,iblk), cym     (:,:,iblk), &
+                                tinyarea (:,:,iblk),                     &
+                                strength (:,:,iblk), zetaD (:,:,iblk,:), &
+                                stress_Pr  (:,:,:))
             
             call calc_vrel_Cb (nx_block           , ny_block          , &
                                icellu       (iblk), Cdn_ocn (:,:,iblk), &
@@ -1096,7 +1096,7 @@ end subroutine anderson_solver
 
 ! Computes the viscous coefficients (in fact zetaD=2*zeta) and dPr/dx, dPr/dy
 
-      subroutine calc_zeta_Pr  (nx_block, ny_block, &
+      subroutine calc_zeta_dPr (nx_block, ny_block, &
                                 icellt  ,           &
                                 indxti  , indxtj  , &
                                 uvel    , vvel    , &
@@ -1155,7 +1155,7 @@ subroutine calc_zeta_Pr  (nx_block, ny_block, &
 
       logical :: capping ! of the viscous coeff
 
-      character(len=*), parameter :: subname = '(calc_zeta_Pr)'
+      character(len=*), parameter :: subname = '(calc_zeta_dPr)'
 
       ! Initialize
 
@@ -1277,7 +1277,7 @@ subroutine calc_zeta_Pr  (nx_block, ny_block, &
 
       enddo                     ! ij
 
-      end subroutine calc_zeta_Pr
+      end subroutine calc_zeta_dPr
 
 !=======================================================================
 

From 235133df2e8de4822f86f3df4ebb2bf927acc2be Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 17 Jul 2020 16:51:31 -0400
Subject: [PATCH 183/196] ice_dyn_vp: rename 'ww[xy]' to 'orig_basis_[xy]'

The arrays 'ww_[xy]' are used to keep the values of the preconditioned
vectors, in order to update the solution at the end of the iteration.
As such, rename them to 'orig_basis_[xy]', since they form a basis of
the solution space and this name describes their purpose better.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 5cb6807ee..b7b0e6f39 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -2710,8 +2710,8 @@ subroutine fgmres (zetaD    ,           &
          arnoldi_basis_y     ! Arnoldi basis (y components)
 
       real (kind=dbl_kind), dimension(nx_block, ny_block, max_blocks, maxinner) :: &
-         wwx             , & ! !phb FIND BETTER NAME (x components)
-         wwy                 ! !phb FIND BETTER NAME (y components)
+         orig_basis_x , & ! original basis (x components)
+         orig_basis_y     ! original basis (y components)
 
       real (kind=dbl_kind) :: &
          norm_residual   , & ! current L^2 norm of residual vector
@@ -2851,9 +2851,8 @@ subroutine fgmres (zetaD    ,           &
                               diagx       , diagy          , &
                               precond_type,                  &
                               workspace_x , workspace_y)
-            ! !phb DESCRIBE ww
-            wwx(:,:,:,initer) = workspace_x
-            wwy(:,:,:,initer) = workspace_y
+            orig_basis_x(:,:,:,initer) = workspace_x
+            orig_basis_y(:,:,:,initer) = workspace_y
             
             ! Update workspace with boundary values
             call ice_HaloUpdate_vel(workspace_x, workspace_y,  &
@@ -2974,8 +2973,8 @@ subroutine fgmres (zetaD    ,           &
                   i = indxui(ij, iblk)
                   j = indxuj(ij, iblk)
 
-                  solx(i, j, iblk) = solx(i, j, iblk) + t * wwx(i, j, iblk, it)
-                  soly(i, j, iblk) = soly(i, j, iblk) + t * wwy(i, j, iblk, it)
+                  solx(i, j, iblk) = solx(i, j, iblk) + t * orig_basis_x(i, j, iblk, it)
+                  soly(i, j, iblk) = soly(i, j, iblk) + t * orig_basis_y(i, j, iblk, it)
                enddo ! ij
             enddo
             !$OMP END PARALLEL DO

From 0d4e4c3eb217d555d80853f60f99d227f2e8219b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 17 Jul 2020 18:00:36 -0400
Subject: [PATCH 184/196] ice_dyn_vp: [fp]gmres: removed unused 'r0' variable
 and 'conv' argument

Both of these are either leftovers of the conversion from the legacy
FGMRES implementation or of the GEM implementation, but are unused in
our implementation. Remove them

Move the declaration of the BLAS functions in their own type declaration
to avoid having to deal with the trailing `, &`.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90 | 40 +++++++----------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index b7b0e6f39..f6a220067 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -767,11 +767,12 @@ subroutine anderson_solver (icellt  , icellu, &
          tol_nl      , & ! tolerance for nonlinear convergence: reltol_nonlin * (initial nonlinear residual norm)
          fpres_norm  , & ! norm of current fixed point residual : f(x) = g(x) - x
          prog_norm   , & ! norm of difference between current and previous solution
-         nlres_norm  , & ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
+         nlres_norm      ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
+
 #ifdef CICE_USE_LAPACK
-         ddot, dnrm2 , & ! external BLAS functions
+      real (kind=dbl_kind) :: &
+         ddot, dnrm2     ! external BLAS functions
 #endif
-         conv            ! needed for FGMRES !phb keep ?
 
       character(len=*), parameter :: subname = '(anderson_solver)'
 
@@ -923,7 +924,7 @@ subroutine anderson_solver (icellt  , icellu, &
                          reltol_fgmres , im_fgmres, &
                          maxits_fgmres ,            &
                          solx          , soly     , &
-                         nbiter        , conv)
+                         nbiter)
             ! Put FGMRES solution solx,soly in fpfunc vector (needed for Anderson)
             call arrays_to_vec (nx_block       , ny_block     , &
                                 nblocks        , max_blocks   , &
@@ -2652,7 +2653,7 @@ subroutine fgmres (zetaD    ,           &
                          tolerance, maxinner, &
                          maxouter ,           &
                          solx     , soly    , &
-                         nbiter   , conv)
+                         nbiter)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
@@ -2688,9 +2689,6 @@ subroutine fgmres (zetaD    ,           &
       integer (kind=int_kind), intent(out) :: &
          nbiter      ! Total number of Arnoldi iterations performed
 
-      real (kind=dbl_kind), intent(out) :: &
-         conv        ! !phb DESCRIBE IF WE KEEP
-
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -2735,7 +2733,8 @@ subroutine fgmres (zetaD    ,           &
       character (len=char_len) :: &
          precond_type ! type of preconditioner
 
-      real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
+      real (kind=dbl_kind) :: &
+         relative_tolerance  ! relative_tolerance, i.e. tolerance*norm(initial residual)
 
       character(len=*), parameter :: subname = '(fgmres)'
 
@@ -2745,7 +2744,6 @@ subroutine fgmres (zetaD    ,           &
       outiter = 0
       nbiter = 0
       
-      conv = c1
       norm_squared = c0
       precond_type = precond
       
@@ -2825,11 +2823,8 @@ subroutine fgmres (zetaD    ,           &
          
          if (outiter == 0) then
             relative_tolerance = tolerance * norm_residual
-            r0 = norm_residual
          end if
          
-         conv = norm_residual / r0
-         
          ! Initialize 1-st term of RHS of Hessenberg system
          rhs_hess(1)  = norm_residual
          rhs_hess(2:) = c0
@@ -2936,7 +2931,6 @@ subroutine fgmres (zetaD    ,           &
             
             ! Check for convergence
             norm_residual = abs(rhs_hess(nextit))
-            conv = norm_residual / r0
             
             if (my_task == master_task .and. monitor_fgmres) then
                write(nu_diag, '(a,i4,a,d26.16)') "monitor_fgmres: iter_fgmres= ", nbiter, &
@@ -3044,7 +3038,7 @@ subroutine pgmres (zetaD    ,           &
                          tolerance, maxinner, &
                          maxouter ,           &
                          solx     , soly    , &
-                         nbiter   , conv)
+                         nbiter)
 
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
@@ -3079,9 +3073,6 @@ subroutine pgmres (zetaD    ,           &
       integer (kind=int_kind), intent(out) :: &
          nbiter      ! Total number of Arnoldi iterations performed
 
-      real (kind=dbl_kind), intent(out) :: &
-         conv        ! !phb DESCRIBE IF WE KEEP
-
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -3123,7 +3114,8 @@ subroutine pgmres (zetaD    ,           &
          precond_type    , & ! type of preconditioner
          ortho_type          ! type of orthogonalization
 
-      real (kind=dbl_kind) :: relative_tolerance, r0 !phb DESCRIBE if we keep
+      real (kind=dbl_kind) :: &
+         relative_tolerance  ! relative_tolerance, i.e. tolerance*norm(initial residual)
 
       character(len=*), parameter :: subname = '(pgmres)'
       
@@ -3133,7 +3125,6 @@ subroutine pgmres (zetaD    ,           &
       outiter = 0
       nbiter = 0
       
-      conv = c1
       norm_squared = c0
       precond_type = 'diag' ! Jacobi preconditioner
       ortho_type = 'cgs' ! classical gram-schmidt TODO: try with MGS
@@ -3214,11 +3205,8 @@ subroutine pgmres (zetaD    ,           &
          
          if (outiter == 0) then
             relative_tolerance = tolerance * norm_residual
-            r0 = norm_residual
          end if
          
-         conv = norm_residual / r0
-         
          ! Initialize 1-st term of RHS of Hessenberg system
          rhs_hess(1)  = norm_residual
          rhs_hess(2:) = c0
@@ -3330,7 +3318,6 @@ subroutine pgmres (zetaD    ,           &
                                                  " pgmres_L2norm= ", norm_residual
             endif
             
-            conv = norm_residual / r0
              if ((initer >= maxinner) .or. (norm_residual <= relative_tolerance)) then
                exit
             endif
@@ -3487,9 +3474,6 @@ subroutine precondition(zetaD       ,        &
       integer (kind=int_kind) :: &
          nbiter      ! Total number of iteration PGMRES performed
 
-      real (kind=dbl_kind) :: &
-         conv        ! !phb DESCRIBE IF WE KEEP for PGMRES
-
       character(len=*), parameter :: subname = '(precondition)'
 
       if     (precond_type == 'ident') then ! identity (no preconditioner)
@@ -3522,7 +3506,7 @@ subroutine precondition(zetaD       ,        &
                       tolerance, maxinner, &
                       maxouter ,           &
                       wx       , wy      , &
-                      nbiter   , conv)
+                      nbiter)
       else
          call abort_ice(error_message='wrong preconditioner in ' // subname, &
             file=__FILE__, line=__LINE__)

From ea840c768155af6f50b14395f9a4f4d7887cd4d4 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 20 Jul 2020 10:04:33 -0400
Subject: [PATCH 185/196] ice_dyn_vp: make 'maxits_nonlin' default to 4

Let's use a small number of nonlinear iterations by default, since
other models with VP solvers are often used with a small number of
iterations.

Add an option file 'set_nml.nonlin5000' to set 'maxits_nonlin' to 5000,
effectively letting the VP solver iterate until 'reltol_nonlin' is
reached.
---
 cicecore/cicedynB/general/ice_init.F90            | 2 +-
 configuration/scripts/ice_in                      | 2 +-
 configuration/scripts/options/set_nml.dynanderson | 1 -
 configuration/scripts/options/set_nml.dynpicard   | 1 -
 configuration/scripts/options/set_nml.nonlin5000  | 1 +
 5 files changed, 3 insertions(+), 4 deletions(-)
 create mode 100644 configuration/scripts/options/set_nml.nonlin5000

diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index 635bb0626..862974e14 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -334,7 +334,7 @@ subroutine input_data
       threshold_hw = 30.0_dbl_kind ! max water depth for grounding
       Ktens = 0.0_dbl_kind   ! T=Ktens*P (tensile strength: see Konig and Holland, 2010)
       e_ratio = 2.0_dbl_kind ! VP ellipse aspect ratio
-      maxits_nonlin = 1000   ! max nb of iteration for nonlinear solver
+      maxits_nonlin = 4      ! max nb of iteration for nonlinear solver
       precond = 'pgmres'     ! preconditioner for fgmres: 'ident' (identity), 'diag' (diagonal), 'pgmres' (Jacobi-preconditioned GMRES)
       im_fgmres = 50         ! size of fgmres Krylov subspace
       im_pgmres = 5          ! size of pgmres Krylov subspace
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index 42b66b47e..b783a48ab 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -139,7 +139,7 @@
     kridge          = 1
     ktransport      = 1
     ssh_stress      = 'geostrophic'
-    maxits_nonlin   = 1000
+    maxits_nonlin   = 4
     precond         = 'pgmres'
     im_fgmres       = 50
     im_pgmres       = 5
diff --git a/configuration/scripts/options/set_nml.dynanderson b/configuration/scripts/options/set_nml.dynanderson
index fbfd16af0..566c53a09 100644
--- a/configuration/scripts/options/set_nml.dynanderson
+++ b/configuration/scripts/options/set_nml.dynanderson
@@ -1,4 +1,3 @@
 kdyn            = 3
 algo_nonlin     = 'anderson'
-maxits_nonlin   = 5000
 use_mean_vrel   = .false.
diff --git a/configuration/scripts/options/set_nml.dynpicard b/configuration/scripts/options/set_nml.dynpicard
index 8e21b0e0d..b81f4d4e6 100644
--- a/configuration/scripts/options/set_nml.dynpicard
+++ b/configuration/scripts/options/set_nml.dynpicard
@@ -1,4 +1,3 @@
 kdyn            = 3
 algo_nonlin     = 'picard'
-maxits_nonlin   = 5000
 use_mean_vrel   = .true.
diff --git a/configuration/scripts/options/set_nml.nonlin5000 b/configuration/scripts/options/set_nml.nonlin5000
new file mode 100644
index 000000000..f767a3d0d
--- /dev/null
+++ b/configuration/scripts/options/set_nml.nonlin5000
@@ -0,0 +1 @@
+maxits_nonlin   = 5000

From 294fe1db1cda7016cb9e37817f4afb28bf2b0595 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 20 Jul 2020 10:16:09 -0400
Subject: [PATCH 186/196] ice_dyn_vp: rename 'im_{fgmres,pgmres,andacc}' to
 'dim_*'

Since these three parameters denote dimensions (of the FGMRES, PGMRES
and Anderson acceleration solution spaces), name them as such.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90  | 38 +++++++++++-----------
 cicecore/cicedynB/general/ice_init.F90     | 30 ++++++++---------
 configuration/scripts/ice_in               |  4 +--
 doc/source/user_guide/ug_case_settings.rst |  4 +--
 4 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index f6a220067..c13341ff8 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -64,12 +64,12 @@ module ice_dyn_vp
 
       integer (kind=int_kind), public :: &
          maxits_nonlin  , & ! max nb of iteration for nonlinear solver
-         im_fgmres      , & ! size of fgmres Krylov subspace
-         im_pgmres      , & ! size of pgmres Krylov subspace
+         dim_fgmres     , & ! size of fgmres Krylov subspace
+         dim_pgmres     , & ! size of pgmres Krylov subspace
          maxits_fgmres  , & ! max nb of iteration for fgmres
          maxits_pgmres  , & ! max nb of iteration for pgmres
          fpfunc_andacc  , & ! fixed point function for Anderson acceleration: 1: g(x) = FMGRES(A(x),b(x)), 2: g(x) = x - A(x)x + b(x)
-         im_andacc      , & ! size of Anderson minimization matrix (number of saved previous residuals)
+         dim_andacc     , & ! size of Anderson minimization matrix (number of saved previous residuals)
          start_andacc       ! acceleration delay factor (acceleration starts at this iteration)
 
       logical (kind=log_kind), public :: &
@@ -751,14 +751,14 @@ subroutine anderson_solver (icellt  , icellu, &
          fpfunc_old , & ! previous value of fixed point function
          tmp            ! temporary vector for BLAS calls
 
-      real (kind=dbl_kind), dimension(ntot,im_andacc) :: &
+      real (kind=dbl_kind), dimension(ntot,dim_andacc) :: &
          Q        , & ! Q factor for QR factorization of F (residuals) matrix
          G_diff       ! Matrix containing the differences of g(x) (fixed point function) evaluations
 
-      real (kind=dbl_kind), dimension(im_andacc,im_andacc) :: &
+      real (kind=dbl_kind), dimension(dim_andacc,dim_andacc) :: &
          R            ! R factor for QR factorization of F (residuals) matrix
 
-      real (kind=dbl_kind), dimension(im_andacc) :: &
+      real (kind=dbl_kind), dimension(dim_andacc) :: &
          rhs_tri  , & ! right hand side vector for matrix-vector product
          coeffs       ! coeffs used to combine previous solutions
 
@@ -915,15 +915,15 @@ subroutine anderson_solver (icellt  , icellu, &
             endif
             
             ! FGMRES linear solver
-            call fgmres (zetaD         ,            &
-                         Cb            , vrel     , &
-                         umassdti      ,            &
-                         halo_info_mask,            &
-                         bx            , by       , &
-                         diagx         , diagy    , &
-                         reltol_fgmres , im_fgmres, &
-                         maxits_fgmres ,            &
-                         solx          , soly     , &
+            call fgmres (zetaD         ,             &
+                         Cb            , vrel      , &
+                         umassdti      ,             &
+                         halo_info_mask,             &
+                         bx            , by        , &
+                         diagx         , diagy     , &
+                         reltol_fgmres , dim_fgmres, &
+                         maxits_fgmres ,             &
+                         solx          , soly      , &
                          nbiter)
             ! Put FGMRES solution solx,soly in fpfunc vector (needed for Anderson)
             call arrays_to_vec (nx_block       , ny_block     , &
@@ -976,7 +976,7 @@ subroutine anderson_solver (icellt  , icellu, &
          !    exit
          ! endif
 
-         if (im_andacc == 0 .or. it_nl < start_andacc) then
+         if (dim_andacc == 0 .or. it_nl < start_andacc) then
             ! Simple fixed point (Picard) iteration in this case
             sol = fpfunc
          else
@@ -993,7 +993,7 @@ subroutine anderson_solver (icellt  , icellu, &
                ! Update residual difference vector
                res_diff = res - res_old
                ! Update fixed point function difference matrix
-               if (res_num < im_andacc) then
+               if (res_num < dim_andacc) then
                   ! Add column
                   G_diff(:,res_num+1) = fpfunc - fpfunc_old
                else
@@ -1013,7 +1013,7 @@ subroutine anderson_solver (icellt  , icellu, &
                   R(1,1) = dnrm2(size(res_diff), res_diff, inc)
                   Q(:,1) = res_diff/R(1,1)
                else
-                  if (res_num > im_andacc) then
+                  if (res_num > dim_andacc) then
                      ! Update factorization since 1st column was deleted
                      call qr_delete(Q,R)
                      res_num = res_num - 1
@@ -3496,7 +3496,7 @@ subroutine precondition(zetaD       ,        &
          wx = c0
          wy = c0
          tolerance = reltol_pgmres
-         maxinner = im_pgmres
+         maxinner = dim_pgmres
          maxouter = maxits_pgmres
          call pgmres (zetaD,               &
                       Cb       , vrel    , &
diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index 862974e14..fb12a3b45 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -100,10 +100,10 @@ subroutine input_data
                                 basalstress, k1, k2, alphab, threshold_hw, &
                                 Ktens, e_ratio, coriolis, ssh_stress, &
                                 kridge, ktransport, brlx, arlx
-      use ice_dyn_vp, only: maxits_nonlin, precond, im_fgmres, im_pgmres, maxits_fgmres, &
+      use ice_dyn_vp, only: maxits_nonlin, precond, dim_fgmres, dim_pgmres, maxits_fgmres, &
                             maxits_pgmres, monitor_nonlin, monitor_fgmres, &
                             monitor_pgmres, reltol_nonlin, reltol_fgmres, reltol_pgmres, &
-                            algo_nonlin, fpfunc_andacc, im_andacc, reltol_andacc, &
+                            algo_nonlin, fpfunc_andacc, dim_andacc, reltol_andacc, &
                             damping_andacc, start_andacc, use_mean_vrel, ortho_type
       use ice_transport_driver, only: advection, conserv_check
       use ice_restoring, only: restore_ice
@@ -199,10 +199,10 @@ subroutine input_data
         advection,      coriolis,       kridge,         ktransport,     &
         kstrength,      krdg_partic,    krdg_redist,    mu_rdg,         &
         e_ratio,        Ktens,          Cf,             basalstress,    &
-        k1,             maxits_nonlin,  precond,        im_fgmres,      &
-        im_pgmres,      maxits_fgmres,  maxits_pgmres,  monitor_nonlin, &
+        k1,             maxits_nonlin,  precond,        dim_fgmres,     &
+        dim_pgmres,     maxits_fgmres,  maxits_pgmres,  monitor_nonlin, &
         monitor_fgmres, monitor_pgmres, reltol_nonlin,  reltol_fgmres,  &
-        reltol_pgmres,  algo_nonlin,    im_andacc,      reltol_andacc,  &
+        reltol_pgmres,  algo_nonlin,    dim_andacc,     reltol_andacc,  &
         damping_andacc, start_andacc,   fpfunc_andacc,  use_mean_vrel,  &
         ortho_type,                                                     &
         k2,             alphab,         threshold_hw,                   &
@@ -336,8 +336,8 @@ subroutine input_data
       e_ratio = 2.0_dbl_kind ! VP ellipse aspect ratio
       maxits_nonlin = 4      ! max nb of iteration for nonlinear solver
       precond = 'pgmres'     ! preconditioner for fgmres: 'ident' (identity), 'diag' (diagonal), 'pgmres' (Jacobi-preconditioned GMRES)
-      im_fgmres = 50         ! size of fgmres Krylov subspace
-      im_pgmres = 5          ! size of pgmres Krylov subspace
+      dim_fgmres = 50        ! size of fgmres Krylov subspace
+      dim_pgmres = 5         ! size of pgmres Krylov subspace
       maxits_fgmres = 50     ! max nb of iteration for fgmres
       maxits_pgmres = 5      ! max nb of iteration for pgmres
       monitor_nonlin = .false. ! print nonlinear residual norm
@@ -349,7 +349,7 @@ subroutine input_data
       reltol_pgmres = 1e-6_dbl_kind ! pgmres stopping criterion: reltol_pgmres*res(k)
       algo_nonlin = 'picard'        ! nonlinear algorithm: 'picard' (Picard iteration), 'anderson' (Anderson acceleration)
       fpfunc_andacc = 1      ! fixed point function for Anderson acceleration: 1: g(x) = FMGRES(A(x),b(x)), 2: g(x) = x - A(x)x + b(x)
-      im_andacc = 5          ! size of Anderson minimization matrix (number of saved previous residuals)
+      dim_andacc = 5         ! size of Anderson minimization matrix (number of saved previous residuals)
       reltol_andacc = 1e-6_dbl_kind  ! relative tolerance for Anderson acceleration
       damping_andacc = 0     ! damping factor for Anderson acceleration
       start_andacc = 0       ! acceleration delay factor (acceleration starts at this iteration)
@@ -661,8 +661,8 @@ subroutine input_data
       call broadcast_scalar(ktransport,         master_task)
       call broadcast_scalar(maxits_nonlin,      master_task)
       call broadcast_scalar(precond,            master_task)
-      call broadcast_scalar(im_fgmres,          master_task)
-      call broadcast_scalar(im_pgmres,          master_task)
+      call broadcast_scalar(dim_fgmres,         master_task)
+      call broadcast_scalar(dim_pgmres,         master_task)
       call broadcast_scalar(maxits_fgmres,      master_task)
       call broadcast_scalar(maxits_pgmres,      master_task)
       call broadcast_scalar(monitor_nonlin,     master_task)
@@ -674,7 +674,7 @@ subroutine input_data
       call broadcast_scalar(reltol_pgmres,      master_task)
       call broadcast_scalar(algo_nonlin,        master_task)
       call broadcast_scalar(fpfunc_andacc,      master_task)
-      call broadcast_scalar(im_andacc,          master_task)
+      call broadcast_scalar(dim_andacc,         master_task)
       call broadcast_scalar(reltol_andacc,      master_task)
       call broadcast_scalar(damping_andacc,     master_task)
       call broadcast_scalar(start_andacc,       master_task)
@@ -1095,7 +1095,7 @@ subroutine input_data
       
       if (trim(algo_nonlin) == 'picard') then
          ! Picard solver is implemented in the Anderson solver; reset number of saved residuals to zero
-         im_andacc = 0
+         dim_andacc = 0
       endif
       
       if (.not. (trim(precond) == 'ident' .or. trim(precond) == 'diag' .or. trim(precond) == 'pgmres')) then
@@ -1609,8 +1609,8 @@ subroutine input_data
          if (kdyn == 3) then
             write(nu_diag,1020) ' maxits_nonlin             = ', maxits_nonlin
             write(nu_diag,1030) ' precond                   = ', precond
-            write(nu_diag,1020) ' im_fgmres                 = ', im_fgmres
-            write(nu_diag,1020) ' im_pgmres                 = ', im_pgmres
+            write(nu_diag,1020) ' dim_fgmres                = ', dim_fgmres
+            write(nu_diag,1020) ' dim_pgmres                = ', dim_pgmres
             write(nu_diag,1020) ' maxits_fgmres             = ', maxits_fgmres
             write(nu_diag,1020) ' maxits_pgmres             = ', maxits_pgmres
             write(nu_diag,1010) ' monitor_nonlin            = ', monitor_nonlin
@@ -1624,7 +1624,7 @@ subroutine input_data
             write(nu_diag,1010) ' use_mean_vrel             = ', use_mean_vrel
             if (algo_nonlin == 'anderson') then
                write(nu_diag,1020) ' fpfunc_andacc          = ', fpfunc_andacc
-               write(nu_diag,1020) ' im_andacc              = ', im_andacc
+               write(nu_diag,1020) ' dim_andacc             = ', dim_andacc
                write(nu_diag,1008) ' reltol_andacc          = ', reltol_andacc
                write(nu_diag,1005) ' damping_andacc         = ', damping_andacc
                write(nu_diag,1020) ' start_andacc           = ', start_andacc
diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in
index b783a48ab..3139726f5 100644
--- a/configuration/scripts/ice_in
+++ b/configuration/scripts/ice_in
@@ -141,8 +141,8 @@
     ssh_stress      = 'geostrophic'
     maxits_nonlin   = 4
     precond         = 'pgmres'
-    im_fgmres       = 50
-    im_pgmres       = 5
+    dim_fgmres       = 50
+    dim_pgmres       = 5
     maxits_fgmres   = 1
     maxits_pgmres   = 1
     monitor_nonlin  = .false.
diff --git a/doc/source/user_guide/ug_case_settings.rst b/doc/source/user_guide/ug_case_settings.rst
index c91187b27..d04a3af9d 100644
--- a/doc/source/user_guide/ug_case_settings.rst
+++ b/doc/source/user_guide/ug_case_settings.rst
@@ -371,8 +371,8 @@ dynamics_nml
    "", "``zero``", "zero coriolis", ""
    "``Cstar``", "real", "constant in Hibler strength formula", "20"
    "``e_ratio``", "real", "EVP ellipse aspect ratio", "2.0"
-   "``im_fgmres``", "integer", "maximum number of Arnoldi iterations for FGMRES solver", "50"
-   "``im_pgmres``", "integer", "maximum number of Arnoldi iterations for PGMRES preconditioner", "5"
+   "``dim_fgmres``", "integer", "maximum number of Arnoldi iterations for FGMRES solver", "50"
+   "``dim_pgmres``", "integer", "maximum number of Arnoldi iterations for PGMRES preconditioner", "5"
    "``kdyn``", "``-1``", "dynamics algorithm OFF", "1"
    "", "``0``", "dynamics OFF", ""
    "", "``1``", "EVP dynamics", ""

From bda247bec9628d9deb2fd3513e8f75006235c94f Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 20 Jul 2020 10:22:08 -0400
Subject: [PATCH 187/196] tests: add 'dynpicard' test to base_suite

Add a test using the VP implicit solver to the base_suite.
Let's use the gx3 grid because it is less computationally intensive,
and use a 4x1 decomposition to test the solver in parallel.
---
 configuration/scripts/tests/base_suite.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configuration/scripts/tests/base_suite.ts b/configuration/scripts/tests/base_suite.ts
index e96b07622..386c29e41 100755
--- a/configuration/scripts/tests/base_suite.ts
+++ b/configuration/scripts/tests/base_suite.ts
@@ -50,3 +50,4 @@ restart        gx3     4x4        iobinary
 restart        gx3     4x4        histall,precision8,cdf64
 smoke          gx3    30x1        bgcz,histall
 smoke          gx3    14x2        fsd12,histall
+smoke          gx3     4x1        dynpicard,medium

From 6d59f650f7c0931d3fe5efea64753cd94203deb2 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 20 Jul 2020 12:43:29 -0400
Subject: [PATCH 188/196] dynamics: remove proprietary vector directives

---
 cicecore/cicedynB/dynamics/ice_dyn_shared.F90 |  3 ---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     | 11 -----------
 2 files changed, 14 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
index 8670812e6..4cd08bad0 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
@@ -1054,9 +1054,6 @@ subroutine deformations (nx_block,   ny_block,   &
 
       character(len=*), parameter :: subname = '(deformations)'
       
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
       do ij = 1, icellt
          i = indxti(ij)
          j = indxtj(ij)
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index c13341ff8..e91b3dc31 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -1166,9 +1166,6 @@ subroutine calc_zeta_dPr (nx_block, ny_block, &
       stPr  = c0
       zetaD = c0
 
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
       do ij = 1, icellt
          i = indxti(ij)
          j = indxtj(ij)
@@ -1597,10 +1594,6 @@ subroutine matvec (nx_block, ny_block, &
 
       str(:,:,:) = c0
 
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
-
       do ij = 1, icellt
          i = indxti(ij)
          j = indxtj(ij)
@@ -2030,10 +2023,6 @@ subroutine formDiag_step1  (nx_block, ny_block, &
       ! Initialize
       !-----------------------------------------------------------------
 
-!DIR$ CONCURRENT !Cray
-!cdir nodep      !NEC
-!ocl novrec      !Fujitsu
-
       Drheo(:,:,:) = c0
       
       ! Be careful: Drheo contains 4 terms for u and 4 terms for v.

From 13b3045eda3ad37995a6ab22bd51610dbe156278 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 21 Jul 2020 09:02:35 -0400
Subject: [PATCH 189/196] ice_init: validate implicit solver arguments only if
 solver is active

'algo_nonlin', 'precond' and 'ortho_type' are only active if 'kdyn ==
3', so wrap the code validating the values of these flags in an if
block.
---
 cicecore/cicedynB/general/ice_init.F90 | 50 +++++++++++++-------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/cicecore/cicedynB/general/ice_init.F90 b/cicecore/cicedynB/general/ice_init.F90
index fb12a3b45..e9cdd8fb7 100644
--- a/cicecore/cicedynB/general/ice_init.F90
+++ b/cicecore/cicedynB/general/ice_init.F90
@@ -1085,33 +1085,35 @@ subroutine input_data
       endif
 
       ! Implicit solver input validation
-      if (.not. (trim(algo_nonlin) == 'picard' .or. trim(algo_nonlin) == 'anderson')) then
-         if (my_task == master_task) then
-            write(nu_diag,*) subname//' ERROR: unknown algo_nonlin: '//algo_nonlin
-            write(nu_diag,*) subname//' ERROR:   allowed values: ''picard'', ''anderson'''
+      if (kdyn == 3) then
+         if (.not. (trim(algo_nonlin) == 'picard' .or. trim(algo_nonlin) == 'anderson')) then
+            if (my_task == master_task) then
+               write(nu_diag,*) subname//' ERROR: unknown algo_nonlin: '//algo_nonlin
+               write(nu_diag,*) subname//' ERROR:   allowed values: ''picard'', ''anderson'''
+            endif
+            abort_list = trim(abort_list)//":60"
          endif
-         abort_list = trim(abort_list)//":60"
-      endif
-      
-      if (trim(algo_nonlin) == 'picard') then
-         ! Picard solver is implemented in the Anderson solver; reset number of saved residuals to zero
-         dim_andacc = 0
-      endif
-      
-      if (.not. (trim(precond) == 'ident' .or. trim(precond) == 'diag' .or. trim(precond) == 'pgmres')) then
-         if (my_task == master_task) then
-            write(nu_diag,*) subname//' ERROR: unknown precond: '//precond
-            write(nu_diag,*) subname//' ERROR:   allowed values: ''ident'', ''diag'', ''pgmres'''
+         
+         if (trim(algo_nonlin) == 'picard') then
+            ! Picard solver is implemented in the Anderson solver; reset number of saved residuals to zero
+            dim_andacc = 0
          endif
-         abort_list = trim(abort_list)//":61"
-      endif
-      
-      if (.not. (trim(ortho_type) == 'cgs' .or. trim(ortho_type) == 'mgs')) then
-         if (my_task == master_task) then
-            write(nu_diag,*) subname//' ERROR: unknown ortho_type: '//ortho_type
-            write(nu_diag,*) subname//' ERROR:   allowed values: ''cgs'', ''mgs'''
+         
+         if (.not. (trim(precond) == 'ident' .or. trim(precond) == 'diag' .or. trim(precond) == 'pgmres')) then
+            if (my_task == master_task) then
+               write(nu_diag,*) subname//' ERROR: unknown precond: '//precond
+               write(nu_diag,*) subname//' ERROR:   allowed values: ''ident'', ''diag'', ''pgmres'''
+            endif
+            abort_list = trim(abort_list)//":61"
+         endif
+         
+         if (.not. (trim(ortho_type) == 'cgs' .or. trim(ortho_type) == 'mgs')) then
+            if (my_task == master_task) then
+               write(nu_diag,*) subname//' ERROR: unknown ortho_type: '//ortho_type
+               write(nu_diag,*) subname//' ERROR:   allowed values: ''cgs'', ''mgs'''
+            endif
+            abort_list = trim(abort_list)//":62"
          endif
-         abort_list = trim(abort_list)//":62"
       endif
 
       ice_IOUnitsMinUnit = numin

From 3c2c5c7091b200b12341af4cff78234f7cc44675 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 21 Jul 2020 09:34:35 -0400
Subject: [PATCH 190/196] ice_dyn_vp: rename 'imp_solver' to 'implicit_solver'

Use a more explicit subroutine name, which additionnally does not have
any negative connotations.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90  | 8 ++++----
 cicecore/cicedynB/general/ice_step_mod.F90 | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index e91b3dc31..d45de93f2 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -58,7 +58,7 @@ module ice_dyn_vp
 
       implicit none
       private
-      public :: imp_solver, init_vp
+      public :: implicit_solver, init_vp
 
       ! namelist parameters
 
@@ -184,7 +184,7 @@ end subroutine init_vp
 !
 ! author: JF Lemieux, A. Qaddouri and F. Dupont ECCC
 
-      subroutine imp_solver (dt)
+      subroutine implicit_solver (dt)
 
       use ice_arrays_column, only: Cdn_ocn
       use ice_boundary, only: ice_HaloMask, ice_HaloUpdate, &
@@ -253,7 +253,7 @@ subroutine imp_solver (dt)
       real (kind=dbl_kind), allocatable :: &
          sol(:)          ! solution vector
 
-      character(len=*), parameter :: subname = '(imp_solver)'
+      character(len=*), parameter :: subname = '(implicit_solver)'
 
       call ice_timer_start(timer_dynamics) ! dynamics
 
@@ -635,7 +635,7 @@ subroutine imp_solver (dt)
 
       call ice_timer_stop(timer_dynamics)    ! dynamics
 
-      end subroutine imp_solver
+      end subroutine implicit_solver
 
 !=======================================================================
 
diff --git a/cicecore/cicedynB/general/ice_step_mod.F90 b/cicecore/cicedynB/general/ice_step_mod.F90
index cf2a6b17f..deaca386c 100644
--- a/cicecore/cicedynB/general/ice_step_mod.F90
+++ b/cicecore/cicedynB/general/ice_step_mod.F90
@@ -850,7 +850,7 @@ subroutine step_dyn_horiz (dt)
 
       use ice_dyn_evp, only: evp
       use ice_dyn_eap, only: eap
-      use ice_dyn_vp, only: imp_solver
+      use ice_dyn_vp, only: implicit_solver
       use ice_dyn_shared, only: kdyn, ktransport
       use ice_flux, only: init_history_dyn
       use ice_transport_driver, only: advection, transport_upwind, transport_remap
@@ -868,7 +868,7 @@ subroutine step_dyn_horiz (dt)
 
       if (kdyn == 1) call evp (dt)
       if (kdyn == 2) call eap (dt)
-      if (kdyn == 3) call imp_solver (dt)
+      if (kdyn == 3) call implicit_solver (dt)
 
       !-----------------------------------------------------------------
       ! Horizontal ice transport

From ab21aadaab8d0ca03ad1a7c12e5f8dc418e34a73 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Thu, 23 Jul 2020 16:58:48 -0400
Subject: [PATCH 191/196] doc: add caveat for VP solver (tx1 grid, threading)

---
 doc/source/science_guide/sg_dynamics.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/science_guide/sg_dynamics.rst b/doc/source/science_guide/sg_dynamics.rst
index c88fd728a..e7f214ff7 100644
--- a/doc/source/science_guide/sg_dynamics.rst
+++ b/doc/source/science_guide/sg_dynamics.rst
@@ -44,6 +44,8 @@ dynamics into CICE is described in detail in
 
 The VP solver implementation mostly follows :cite:`Lemieux08`, with
 FGMRES :cite:`Saad93` as the linear solver and GMRES as the preconditioner.
+Note that the VP solver has not yet been tested on the ``tx1`` grid or with
+threading enabled.
 
 Here we summarize the equations and
 direct the reader to the above references for details.

From 99782678b05e12c497d190775a2f4536ba5a364a Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Tue, 11 Aug 2020 14:58:29 -0400
Subject: [PATCH 192/196] ice_dyn_vp: rename 'CICE_USE_LAPACK' macro to
 'USE_LAPACK'

Bring the name of the new macro 'CICE_USE_LAPACK' more in line with the
changes to the CPP macros in 819eedd (Update CPP implementation (#490),
2020-07-31) by renaming it to 'USE_LAPACK'.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90    | 6 +++---
 configuration/scripts/options/set_env.lapack | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index d45de93f2..416e3beac 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -769,7 +769,7 @@ subroutine anderson_solver (icellt  , icellu, &
          prog_norm   , & ! norm of difference between current and previous solution
          nlres_norm      ! norm of current nonlinear residual : F(x) = A(x)x -b(x)
 
-#ifdef CICE_USE_LAPACK
+#ifdef USE_LAPACK
       real (kind=dbl_kind) :: &
          ddot, dnrm2     ! external BLAS functions
 #endif
@@ -940,7 +940,7 @@ subroutine anderson_solver (icellt  , icellu, &
 
          ! Compute fixed point residual f(x) = g(x) - x
          res = fpfunc - sol
-#ifdef CICE_USE_LAPACK
+#ifdef USE_LAPACK
          fpres_norm = global_sum(dnrm2(size(res), res, inc)**2, distrb_info)
 #else
          call vec_to_arrays (nx_block      , ny_block    , &
@@ -980,7 +980,7 @@ subroutine anderson_solver (icellt  , icellu, &
             ! Simple fixed point (Picard) iteration in this case
             sol = fpfunc
          else
-#ifdef CICE_USE_LAPACK
+#ifdef USE_LAPACK
             ! Begin Anderson acceleration
             if (get_num_procs() > 1) then
                ! Anderson solver is not yet parallelized; abort
diff --git a/configuration/scripts/options/set_env.lapack b/configuration/scripts/options/set_env.lapack
index 3571cef75..cf52ad1b0 100644
--- a/configuration/scripts/options/set_env.lapack
+++ b/configuration/scripts/options/set_env.lapack
@@ -1 +1 @@
-setenv ICE_CPPDEFS -DCICE_USE_LAPACK
+setenv ICE_CPPDEFS -DUSE_LAPACK

From 2ef7e31bb2ce29e803236968ca59d5d4b30e5ca4 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Fri, 14 Aug 2020 12:41:20 -0400
Subject: [PATCH 193/196] comm: rename 'global_sums' to 'global_allreduce_sum'

Make the purpose of the 'global_sums' interface, i.e. reducing a
distributed variable to a variable of the same shape (MPI_ALLREDUCE)
clearer by renaming it to 'global_allreduce_sum'.

This makes it clearer that in contrast to the 'global_sum' interface,
the resulting variable is of the same shape as the distributed variable.
---
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90      |  4 ++--
 .../comm/mpi/ice_global_reductions.F90         | 18 +++++++++---------
 .../comm/serial/ice_global_reductions.F90      | 18 +++++++++---------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 416e3beac..a448eb6e3 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -50,7 +50,7 @@ module ice_dyn_vp
           vvel_init, basal_stress_coeff, basalstress, Ktens, ice_HaloUpdate_vel
       use ice_fileunits, only: nu_diag
       use ice_flux, only: fm
-      use ice_global_reductions, only: global_sum, global_sums
+      use ice_global_reductions, only: global_sum, global_allreduce_sum
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, uarear
       use ice_exit, only: abort_ice
       use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted
@@ -3568,7 +3568,7 @@ subroutine orthogonalize(ortho_type     , initer         , &
             dotprod_local(it) = sum(local_dot)
          end do
 
-         hessenberg(1:initer, initer) = global_sums(dotprod_local(1:initer), distrb_info)
+         hessenberg(1:initer, initer) = global_allreduce_sum(dotprod_local(1:initer), distrb_info)
 
          ! Second loop of Gram-Schmidt (orthonormalize)
          do it = 1, initer
diff --git a/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90 b/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90
index cb926f8dd..1d724fb39 100644
--- a/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90
+++ b/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90
@@ -36,7 +36,7 @@ module ice_global_reductions
    private
 
    public :: global_sum,      &
-             global_sums,     &
+             global_allreduce_sum, &
              global_sum_prod, &
              global_maxval,   &
              global_minval
@@ -56,10 +56,10 @@ module ice_global_reductions
                       global_sum_scalar_int
    end interface
 
-   interface global_sums
-     module procedure global_sums_dbl!,              &
-     ! module procedure global_sums_real,            & ! not yet implemented
-     ! module procedure global_sums_int                ! not yet implemented
+   interface global_allreduce_sum
+     module procedure global_allreduce_sum_vector_dbl!,   &
+     ! module procedure global_allreduce_sum_vector_real, & ! not yet implemented
+     ! module procedure global_allreduce_sum_vector_int     ! not yet implemented
    end interface
 
    interface global_sum_prod
@@ -709,13 +709,13 @@ end function global_sum_scalar_int
 
 !***********************************************************************
 
- function global_sums_dbl(vector, dist) &
+ function global_allreduce_sum_vector_dbl(vector, dist) &
           result(globalSums)
 
 !  Computes the global sums of sets of scalars (elements of 'vector') 
 !  distributed across a parallel machine.
 !
-!  This is actually the specific interface for the generic global_sums
+!  This is actually the specific interface for the generic global_allreduce_sum
 !  function corresponding to double precision vectors.  The generic
 !  interface is identical but will handle real and integer vectors.
 
@@ -744,7 +744,7 @@ function global_sums_dbl(vector, dist) &
    real (dbl_kind), dimension(:,:), allocatable :: &
       work           ! temporary local array
 
-   character(len=*), parameter :: subname = '(global_sums_dbl)'
+   character(len=*), parameter :: subname = '(global_allreduce_sum_vector_dbl)'
 
 !-----------------------------------------------------------------------
 !
@@ -768,7 +768,7 @@ function global_sums_dbl(vector, dist) &
 
 !-----------------------------------------------------------------------
 
- end function global_sums_dbl
+ end function global_allreduce_sum_vector_dbl
 
 !***********************************************************************
 
diff --git a/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90 b/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90
index 3b37b50d5..4d53e873e 100644
--- a/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90
+++ b/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90
@@ -37,7 +37,7 @@ module ice_global_reductions
    private
 
    public :: global_sum,      &
-             global_sums,     &
+             global_allreduce_sum, &
              global_sum_prod, &
              global_maxval,   &
              global_minval
@@ -57,10 +57,10 @@ module ice_global_reductions
                       global_sum_scalar_int
    end interface
 
-   interface global_sums
-     module procedure global_sums_dbl!,              &
-     ! module procedure global_sums_real,            & ! not yet implemented
-     ! module procedure global_sums_int                ! not yet implemented
+   interface global_allreduce_sum
+     module procedure global_allreduce_sum_vector_dbl!,   &
+     ! module procedure global_allreduce_sum_vector_real, & ! not yet implemented
+     ! module procedure global_allreduce_sum_vector_int     ! not yet implemented
    end interface
 
    interface global_sum_prod
@@ -710,13 +710,13 @@ end function global_sum_scalar_int
 
 !***********************************************************************
 
- function global_sums_dbl(vector, dist) &
+ function global_allreduce_sum_vector_dbl(vector, dist) &
           result(globalSums)
 
 !  Computes the global sums of sets of scalars (elements of 'vector') 
 !  distributed across a parallel machine.
 !
-!  This is actually the specific interface for the generic global_sums
+!  This is actually the specific interface for the generic global_allreduce_sum
 !  function corresponding to double precision vectors.  The generic
 !  interface is identical but will handle real and integer vectors.
 
@@ -745,7 +745,7 @@ function global_sums_dbl(vector, dist) &
    real (dbl_kind), dimension(:,:), allocatable :: &
       work           ! temporary local array
 
-   character(len=*), parameter :: subname = '(global_sums_dbl)'
+   character(len=*), parameter :: subname = '(global_allreduce_sum_vector_dbl)'
 
 !-----------------------------------------------------------------------
 !
@@ -769,7 +769,7 @@ function global_sums_dbl(vector, dist) &
 
 !-----------------------------------------------------------------------
 
- end function global_sums_dbl
+ end function global_allreduce_sum_vector_dbl
 
 !***********************************************************************
 

From 3ba4a2e4d805004f6919d31642f9ef1e82c3682b Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Mon, 24 Aug 2020 16:45:32 -0400
Subject: [PATCH 194/196] dynamics: remove 'ice_HaloUpdate_vel' and introduce
 '(un)stack_velocity_field'

The 'ice_HaloUpdate_vel' subroutine feels out of place in
'ice_dyn_shared', and moving it to 'ice_boundary' introduces other
problems, including circular dependencies.

Remove it, and introduce two simple subroutines, 'stack_velocity_field' and
'unstack_velocity_field', that are responsible to load the 'uvel' and
'vvel' arrays into the 'fld2' array used for the halo updates.

Use these new subroutines in ice_dyn_{evp,eap,vp}.
---
 cicecore/cicedynB/dynamics/ice_dyn_eap.F90    | 32 ++++++++-
 cicecore/cicedynB/dynamics/ice_dyn_evp.F90    | 31 ++++++++-
 cicecore/cicedynB/dynamics/ice_dyn_shared.F90 | 65 ++++++++++---------
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90     | 53 +++++++++++++--
 4 files changed, 139 insertions(+), 42 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90 b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
index ad99f8482..bc5823be8 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
@@ -122,7 +122,8 @@ subroutine eap (dt)
       use ice_dyn_shared, only: fcor_blk, ndte, dtei, &
           denom1, uvel_init, vvel_init, arlx1i, &
           dyn_prep1, dyn_prep2, stepu, dyn_finish, &
-          basal_stress_coeff, basalstress, ice_HaloUpdate_vel
+          basal_stress_coeff, basalstress, &
+          stack_velocity_field, unstack_velocity_field
       use ice_flux, only: rdg_conv, strairxT, strairyT, &
           strairx, strairy, uocn, vocn, ss_tltx, ss_tlty, iceumask, fm, &
           strtltx, strtlty, strocnx, strocny, strintx, strinty, taubx, tauby, &
@@ -172,6 +173,8 @@ subroutine eap (dt)
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
 
+      real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
+
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp       ! stress combinations for momentum equation
 
@@ -195,6 +198,8 @@ subroutine eap (dt)
       ! Initialize
       !-----------------------------------------------------------------
 
+      allocate(fld2(nx_block,ny_block,2,max_blocks))
+
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
 
@@ -373,7 +378,17 @@ subroutine eap (dt)
       endif
 
       ! velocities may have changed in dyn_prep2
-      call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+      call stack_velocity_field(uvel, vvel, fld2)
+      call ice_timer_start(timer_bound)
+      if (maskhalo_dyn) then
+         call ice_HaloUpdate (fld2,               halo_info_mask, &
+                              field_loc_NEcorner, field_type_vector)
+      else
+         call ice_HaloUpdate (fld2,               halo_info, &
+                              field_loc_NEcorner, field_type_vector)
+      endif
+      call ice_timer_stop(timer_bound)
+      call unstack_velocity_field(fld2, uvel, vvel)
 
       !-----------------------------------------------------------------
       ! basal stress coefficients (landfast ice)
@@ -480,10 +495,21 @@ subroutine eap (dt)
          enddo
          !$TCXOMP END PARALLEL DO
 
-         call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+         call stack_velocity_field(uvel, vvel, fld2)
+         call ice_timer_start(timer_bound)
+         if (maskhalo_dyn) then
+            call ice_HaloUpdate (fld2,               halo_info_mask, &
+                                 field_loc_NEcorner, field_type_vector)
+         else
+            call ice_HaloUpdate (fld2,               halo_info, &
+                                 field_loc_NEcorner, field_type_vector)
+         endif
+         call ice_timer_stop(timer_bound)
+         call unstack_velocity_field(fld2, uvel, vvel)
 
       enddo                     ! subcycling
 
+      deallocate(fld2)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
       !-----------------------------------------------------------------
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_evp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_evp.F90
index af06e5d70..c4312c325 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_evp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_evp.F90
@@ -94,7 +94,7 @@ subroutine evp (dt)
           ice_timer_start, ice_timer_stop, timer_evp_1d, timer_evp_2d
       use ice_dyn_evp_1d, only: ice_dyn_evp_1d_copyin, ice_dyn_evp_1d_kernel, &
           ice_dyn_evp_1d_copyout
-      use ice_dyn_shared, only: kevp_kernel, ice_HaloUpdate_vel
+      use ice_dyn_shared, only: kevp_kernel, stack_velocity_field, unstack_velocity_field
 
       real (kind=dbl_kind), intent(in) :: &
          dt      ! time step
@@ -127,6 +127,8 @@ subroutine evp (dt)
          umass    , & ! total mass of ice and snow (u grid)
          umassdti     ! mass of U-cell/dte (kg/m^2 s)
 
+      real (kind=dbl_kind), allocatable :: fld2(:,:,:,:)
+
       real (kind=dbl_kind), dimension(nx_block,ny_block,8):: &
          strtmp       ! stress combinations for momentum equation
 
@@ -152,6 +154,8 @@ subroutine evp (dt)
       ! Initialize
       !-----------------------------------------------------------------
 
+      allocate(fld2(nx_block,ny_block,2,max_blocks))
+
        ! This call is needed only if dt changes during runtime.
 !      call set_evp_parameters (dt)
 
@@ -316,7 +320,17 @@ subroutine evp (dt)
       endif
 
       ! velocities may have changed in dyn_prep2
-      call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+      call stack_velocity_field(uvel, vvel, fld2)
+      call ice_timer_start(timer_bound)
+      if (maskhalo_dyn) then
+         call ice_HaloUpdate (fld2,               halo_info_mask, &
+                              field_loc_NEcorner, field_type_vector)
+      else
+         call ice_HaloUpdate (fld2,               halo_info, &
+                              field_loc_NEcorner, field_type_vector)
+      endif
+      call ice_timer_stop(timer_bound)
+      call unstack_velocity_field(fld2, uvel, vvel)
 
       !-----------------------------------------------------------------
       ! basal stress coefficients (landfast ice)
@@ -429,12 +443,23 @@ subroutine evp (dt)
          enddo
          !$TCXOMP END PARALLEL DO
 
-         call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+         call stack_velocity_field(uvel, vvel, fld2)
+         call ice_timer_start(timer_bound)
+         if (maskhalo_dyn) then
+            call ice_HaloUpdate (fld2,               halo_info_mask, &
+                                 field_loc_NEcorner, field_type_vector)
+         else
+            call ice_HaloUpdate (fld2,               halo_info, &
+                                 field_loc_NEcorner, field_type_vector)
+         endif
+         call ice_timer_stop(timer_bound)
+         call unstack_velocity_field(fld2, uvel, vvel)
          
       enddo                     ! subcycling
       endif  ! kevp_kernel
       call ice_timer_stop(timer_evp_2d)
 
+      deallocate(fld2)
       if (maskhalo_dyn) call ice_HaloDestroy(halo_info_mask)
 
       ! Force symmetry across the tripole seam
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
index 4cd08bad0..af66a5413 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
@@ -24,7 +24,8 @@ module ice_dyn_shared
       private
       public :: init_evp, set_evp_parameters, stepu, principal_stress, &
                 dyn_prep1, dyn_prep2, dyn_finish, basal_stress_coeff,  &
-                alloc_dyn_shared, deformations, strain_rates, ice_HaloUpdate_vel
+                alloc_dyn_shared, deformations, strain_rates, &
+                stack_velocity_field, unstack_velocity_field
 
       ! namelist parameters
 
@@ -95,9 +96,6 @@ module ice_dyn_shared
                          ! see keel data from Amundrud et al. 2004 (JGR)
          u0 = 5e-5_dbl_kind ! residual velocity for basal stress (m/s)
 
-      real (kind=dbl_kind), allocatable :: &
-         fld2(:,:,:,:)    ! work array for boundary updates
-
 !=======================================================================
 
       contains
@@ -113,7 +111,6 @@ subroutine alloc_dyn_shared
       allocate( &
          uvel_init (nx_block,ny_block,max_blocks), & ! x-component of velocity (m/s), beginning of timestep
          vvel_init (nx_block,ny_block,max_blocks), & ! y-component of velocity (m/s), beginning of timestep
-         fld2      (nx_block,ny_block,2,max_blocks), & ! work array for boundary updates
          stat=ierr)
       if (ierr/=0) call abort_ice('(alloc_dyn_shared): Out of memory')
 
@@ -1186,29 +1183,25 @@ end subroutine strain_rates
 
 !=======================================================================
 
-! Perform a halo update for the velocity field
-! author: Philippe Blain, ECCC
-
-      subroutine ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+! Load velocity components into array for boundary updates
 
-      use ice_boundary, only: ice_HaloUpdate, ice_halo
-      use ice_constants, only: field_loc_NEcorner, field_type_vector
-      use ice_domain, only: halo_info, maskhalo_dyn, nblocks
-      use ice_timers, only: timer_bound, ice_timer_start, ice_timer_stop
+   subroutine stack_velocity_field(uvel, vvel, fld2)
 
-      type (ice_halo), intent(in) :: &
-         halo_info_mask !  ghost cell update info for masked halo
+      use ice_domain, only: nblocks
 
-      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(inout) :: &
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(in) :: &
          uvel    , & ! u components of velocity vector
          vvel        ! v components of velocity vector
 
+      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(out) :: &
+         fld2        ! work array for boundary updates
+
       ! local variables
 
       integer (kind=int_kind) :: &
          iblk        ! block index
 
-      character(len=*), parameter :: subname = '(ice_HaloUpdate_vel)'
+      character(len=*), parameter :: subname = '(stack_velocity_field)'
 
       ! load velocity into array for boundary updates
       !$OMP PARALLEL DO PRIVATE(iblk)
@@ -1218,17 +1211,31 @@ subroutine ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
       enddo
       !$OMP END PARALLEL DO
 
-      call ice_timer_start(timer_bound)
-      if (maskhalo_dyn) then
-         call ice_HaloUpdate (fld2,               halo_info_mask, &
-                              field_loc_NEcorner, field_type_vector)
-      else
-         call ice_HaloUpdate (fld2,               halo_info, &
-                              field_loc_NEcorner, field_type_vector)
-      endif
-      call ice_timer_stop(timer_bound)
+      end subroutine stack_velocity_field
+
+!=======================================================================
+
+! Unload velocity components from array after boundary updates
+
+   subroutine unstack_velocity_field(fld2, uvel, vvel)
+
+      use ice_domain, only: nblocks
+
+      real (kind=dbl_kind), dimension (nx_block,ny_block,2,max_blocks), intent(in) :: &
+         fld2        ! work array for boundary updates
 
-      ! Unload
+      real (kind=dbl_kind), dimension (nx_block,ny_block,max_blocks), intent(out) :: &
+         uvel    , & ! u components of velocity vector
+         vvel        ! v components of velocity vector
+
+      ! local variables
+
+      integer (kind=int_kind) :: &
+         iblk        ! block index
+
+      character(len=*), parameter :: subname = '(unstack_velocity_field)'
+
+      ! Unload velocity from array after boundary updates
       !$OMP PARALLEL DO PRIVATE(iblk)
       do iblk = 1, nblocks
          uvel(:,:,iblk) = fld2(:,:,1,iblk)
@@ -1236,10 +1243,10 @@ subroutine ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
       enddo
       !$OMP END PARALLEL DO
 
-      end subroutine ice_HaloUpdate_vel
+      end subroutine unstack_velocity_field
 
 !=======================================================================
-
+      
       end module ice_dyn_shared
 
 !=======================================================================
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index a448eb6e3..6f43b2fe1 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -47,7 +47,8 @@ module ice_dyn_vp
       use ice_domain_size, only: max_blocks
       use ice_dyn_shared, only: dyn_prep1, dyn_prep2, dyn_finish, &
           ecci, cosw, sinw, fcor_blk, uvel_init,  &
-          vvel_init, basal_stress_coeff, basalstress, Ktens, ice_HaloUpdate_vel
+          vvel_init, basal_stress_coeff, basalstress, Ktens, &
+          stack_velocity_field,  unstack_velocity_field
       use ice_fileunits, only: nu_diag
       use ice_flux, only: fm
       use ice_global_reductions, only: global_sum, global_allreduce_sum
@@ -102,6 +103,9 @@ module ice_dyn_vp
          indxui(:,:)  , & ! compressed index in i-direction
          indxuj(:,:)      ! compressed index in j-direction
 
+      real (kind=dbl_kind), allocatable :: & 
+         fld2(:,:,:,:)    ! work array for boundary updates
+
 !=======================================================================
 
       contains
@@ -145,6 +149,7 @@ subroutine init_vp (dt)
                indxtj(nx_block*ny_block, max_blocks), &
                indxui(nx_block*ny_block, max_blocks), &
                indxuj(nx_block*ny_block, max_blocks))
+      allocate(fld2(nx_block,ny_block,2,max_blocks))
       
       ! Redefine tinyarea using min_strain_rate
       
@@ -433,7 +438,17 @@ subroutine implicit_solver (dt)
       endif
 
       ! velocities may have changed in dyn_prep2
-      call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+      call stack_velocity_field(uvel, vvel, fld2)
+      call ice_timer_start(timer_bound)
+      if (maskhalo_dyn) then
+         call ice_HaloUpdate (fld2,               halo_info_mask, &
+                              field_loc_NEcorner, field_type_vector)
+      else
+         call ice_HaloUpdate (fld2,               halo_info, &
+                              field_loc_NEcorner, field_type_vector)
+      endif
+      call ice_timer_stop(timer_bound)
+      call unstack_velocity_field(fld2, uvel, vvel)
 
       !-----------------------------------------------------------------
       ! basal stress coefficients (landfast ice)
@@ -664,13 +679,13 @@ subroutine anderson_solver (icellt  , icellu, &
       use ice_blocks, only: nx_block, ny_block
       use ice_boundary, only: ice_HaloUpdate
       use ice_constants, only: c1
-      use ice_domain, only: maskhalo_dyn
+      use ice_domain, only: maskhalo_dyn, halo_info
       use ice_domain_size, only: max_blocks
       use ice_flux, only:   uocn, vocn, fm, Tbu
       use ice_grid, only: dxt, dyt, dxhy, dyhx, cxp, cyp, cxm, cym, &
           uarear, tinyarea
       use ice_state, only: uvel, vvel, strength
-      use ice_timers, only: ice_timer_start, ice_timer_stop
+      use ice_timers, only: ice_timer_start, ice_timer_stop, timer_bound
 
       integer (kind=int_kind), intent(in) :: &
          ntot         ! size of problem for Anderson
@@ -1069,7 +1084,17 @@ subroutine anderson_solver (icellt  , icellu, &
                              uvel (:,:,:), vvel (:,:,:))
          
          ! Do halo update so that halo cells contain up to date info for advection
-         call ice_HaloUpdate_vel(uvel, vvel, halo_info_mask)
+         call stack_velocity_field(uvel, vvel, fld2)
+         call ice_timer_start(timer_bound)
+         if (maskhalo_dyn) then
+            call ice_HaloUpdate (fld2,               halo_info_mask, &
+                                 field_loc_NEcorner, field_type_vector)
+         else
+            call ice_HaloUpdate (fld2,               halo_info, &
+                                 field_loc_NEcorner, field_type_vector)
+         endif
+         call ice_timer_stop(timer_bound)
+         call unstack_velocity_field(fld2, uvel, vvel)
          
          ! Compute "progress" residual norm
          !$OMP PARALLEL DO PRIVATE(iblk)
@@ -2644,6 +2669,10 @@ subroutine fgmres (zetaD    ,           &
                          solx     , soly    , &
                          nbiter)
 
+      use ice_boundary, only: ice_HaloUpdate
+      use ice_domain, only: maskhalo_dyn, halo_info
+      use ice_timers, only: ice_timer_start, ice_timer_stop, timer_bound
+
       real (kind=dbl_kind), dimension(nx_block,ny_block,max_blocks,4), intent(in) :: &
          zetaD   ! zetaD = 2*zeta (viscous coefficient)
 
@@ -2839,8 +2868,18 @@ subroutine fgmres (zetaD    ,           &
             orig_basis_y(:,:,:,initer) = workspace_y
             
             ! Update workspace with boundary values
-            call ice_HaloUpdate_vel(workspace_x, workspace_y,  &
-                                    halo_info_mask)
+            call stack_velocity_field(workspace_x, workspace_y, fld2)
+            call ice_timer_start(timer_bound)
+            if (maskhalo_dyn) then
+               call ice_HaloUpdate (fld2,               halo_info_mask, &
+                                    field_loc_NEcorner, field_type_vector)
+            else
+               call ice_HaloUpdate (fld2,               halo_info, &
+                                    field_loc_NEcorner, field_type_vector)
+            endif
+            call ice_timer_stop(timer_bound)
+            call unstack_velocity_field(fld2, workspace_x, workspace_y)
+
             !$OMP PARALLEL DO PRIVATE(iblk)
             do iblk = 1, nblocks
                call matvec (nx_block               , ny_block             , &

From e96bdda7d48e9a9a3d39c3b9614d30a0ae8542e1 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 26 Aug 2020 15:31:43 -0400
Subject: [PATCH 195/196] dynamics: rename 'init_evp' to 'init_dyn'

The 'init_evp' subroutine initializes arrays that are used in the EVP,
EAP or VP dynamics. Rename it to 'init_dyn' to emphasize that it is not
specific to EVP.
---
 cicecore/cicedynB/dynamics/ice_dyn_eap.F90        |  6 +++---
 cicecore/cicedynB/dynamics/ice_dyn_shared.F90     | 10 +++++-----
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90         |  4 ++--
 cicecore/drivers/direct/hadgem3/CICE_InitMod.F90  |  4 ++--
 cicecore/drivers/mct/cesm1/CICE_InitMod.F90       |  4 ++--
 cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90     |  4 ++--
 cicecore/drivers/nuopc/dmi/CICE_InitMod.F90       |  4 ++--
 cicecore/drivers/standalone/cice/CICE_InitMod.F90 |  4 ++--
 doc/source/developer_guide/dg_driver.rst          |  2 +-
 9 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90 b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
index bc5823be8..ebaf226fa 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
@@ -544,13 +544,13 @@ end subroutine eap
 !=======================================================================
 
 ! Initialize parameters and variables needed for the eap dynamics
-! (based on init_evp)
+! (based on init_dyn)
 
       subroutine init_eap (dt)
 
       use ice_blocks, only: nx_block, ny_block
       use ice_domain, only: nblocks
-      use ice_dyn_shared, only: init_evp
+      use ice_dyn_shared, only: init_dyn
 
       real (kind=dbl_kind), intent(in) :: &
          dt      ! time step
@@ -583,7 +583,7 @@ subroutine init_eap (dt)
          file=__FILE__, line=__LINE__)
       phi = pi/c12 ! diamond shaped floe smaller angle (default phi = 30 deg)
 
-      call init_evp (dt)
+      call init_dyn (dt)
 
       !$OMP PARALLEL DO PRIVATE(iblk,i,j)
       do iblk = 1, nblocks
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
index af66a5413..d9a0919e6 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90
@@ -22,7 +22,7 @@ module ice_dyn_shared
 
       implicit none
       private
-      public :: init_evp, set_evp_parameters, stepu, principal_stress, &
+      public :: init_dyn, set_evp_parameters, stepu, principal_stress, &
                 dyn_prep1, dyn_prep2, dyn_finish, basal_stress_coeff,  &
                 alloc_dyn_shared, deformations, strain_rates, &
                 stack_velocity_field, unstack_velocity_field
@@ -118,10 +118,10 @@ end subroutine alloc_dyn_shared
 
 !=======================================================================
 
-! Initialize parameters and variables needed for the evp dynamics
+! Initialize parameters and variables needed for the dynamics
 ! author: Elizabeth C. Hunke, LANL
 
-      subroutine init_evp (dt)
+      subroutine init_dyn (dt)
 
       use ice_blocks, only: nx_block, ny_block
       use ice_domain, only: nblocks
@@ -142,7 +142,7 @@ subroutine init_evp (dt)
          i, j, &
          iblk            ! block index
 
-      character(len=*), parameter :: subname = '(init_evp)'
+      character(len=*), parameter :: subname = '(init_dyn)'
 
       call set_evp_parameters (dt)
 
@@ -200,7 +200,7 @@ subroutine init_evp (dt)
       enddo                     ! iblk
       !$OMP END PARALLEL DO
 
-      end subroutine init_evp
+      end subroutine init_dyn
 
 !=======================================================================
 
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index 6f43b2fe1..f3226e679 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -122,7 +122,7 @@ subroutine init_vp (dt)
       use ice_constants, only: c1, &
           field_loc_center, field_type_scalar
       use ice_domain, only: blocks_ice, halo_info
-      use ice_dyn_shared, only: init_evp
+      use ice_dyn_shared, only: init_dyn
       use ice_grid, only: tarea, tinyarea
 
       real (kind=dbl_kind), intent(in) :: &
@@ -141,7 +141,7 @@ subroutine init_vp (dt)
          min_strain_rate = 2e-09_dbl_kind      ! used for recomputing tinyarea
 
       ! Initialize variables shared with evp
-      call init_evp(dt)
+      call init_dyn(dt)
       
       ! Initialize module variables
       allocate(icellt(max_blocks), icellu(max_blocks))
diff --git a/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90 b/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90
index 2e88f6334..0bbeaaf16 100644
--- a/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90
+++ b/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90
@@ -71,7 +71,7 @@ subroutine cice_init
       use ice_domain, only: init_domain_blocks
       use ice_domain_size, only: ncat, nfsd
       use ice_dyn_eap, only: init_eap, alloc_dyn_eap
-      use ice_dyn_shared, only: kdyn, init_evp, basalstress, alloc_dyn_shared
+      use ice_dyn_shared, only: kdyn, init_dyn, basalstress, alloc_dyn_shared
       use ice_dyn_vp, only: init_vp
       use ice_flux, only: init_coupler_flux, init_history_therm, &
           init_history_dyn, init_flux_atm, init_flux_ocn, alloc_flux
@@ -127,7 +127,7 @@ subroutine cice_init
       else if (kdyn == 3) then
          call init_vp (dt_dyn)  ! define vp dynamics parameters, variables
       else                      ! for both kdyn = 0 or 1
-         call init_evp (dt_dyn) ! define evp dynamics parameters, variables
+         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/cicecore/drivers/mct/cesm1/CICE_InitMod.F90 b/cicecore/drivers/mct/cesm1/CICE_InitMod.F90
index 79d6753b0..3052af79d 100644
--- a/cicecore/drivers/mct/cesm1/CICE_InitMod.F90
+++ b/cicecore/drivers/mct/cesm1/CICE_InitMod.F90
@@ -71,7 +71,7 @@ subroutine cice_init(mpicom_ice)
       use ice_domain, only: init_domain_blocks
       use ice_domain_size, only: ncat, nfsd
       use ice_dyn_eap, only: init_eap, alloc_dyn_eap
-      use ice_dyn_shared, only: kdyn, init_evp, alloc_dyn_shared
+      use ice_dyn_shared, only: kdyn, init_dyn, alloc_dyn_shared
       use ice_dyn_vp, only: init_vp
       use ice_flux, only: init_coupler_flux, init_history_therm, &
           init_history_dyn, init_flux_atm, init_flux_ocn, alloc_flux
@@ -129,7 +129,7 @@ subroutine cice_init(mpicom_ice)
       else if (kdyn == 3) then
          call init_vp (dt_dyn)  ! define vp dynamics parameters, variables
       else                      ! for both kdyn = 0 or 1
-         call init_evp (dt_dyn) ! define evp dynamics parameters, variables
+         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90 b/cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90
index 917774908..db5239978 100644
--- a/cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90
+++ b/cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90
@@ -52,7 +52,7 @@ subroutine cice_init
       use ice_domain, only: init_domain_blocks
       use ice_domain_size, only: ncat, nfsd
       use ice_dyn_eap, only: init_eap, alloc_dyn_eap
-      use ice_dyn_shared, only: kdyn, init_evp, alloc_dyn_shared
+      use ice_dyn_shared, only: kdyn, init_dyn, alloc_dyn_shared
       use ice_flux, only: init_coupler_flux, init_history_therm, &
           init_history_dyn, init_flux_atm, init_flux_ocn, alloc_flux
       use ice_forcing, only: init_forcing_ocn
@@ -102,7 +102,7 @@ subroutine cice_init
          call alloc_dyn_eap     ! allocate dyn_eap arrays
          call init_eap (dt_dyn) ! define eap dynamics parameters, variables
       else                      ! for both kdyn = 0 or 1
-         call init_evp (dt_dyn) ! define evp dynamics parameters, variables
+         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/cicecore/drivers/nuopc/dmi/CICE_InitMod.F90 b/cicecore/drivers/nuopc/dmi/CICE_InitMod.F90
index 4e236bb11..6f1b94e58 100644
--- a/cicecore/drivers/nuopc/dmi/CICE_InitMod.F90
+++ b/cicecore/drivers/nuopc/dmi/CICE_InitMod.F90
@@ -76,7 +76,7 @@ subroutine cice_init(mpi_comm)
       use ice_domain, only: init_domain_blocks
       use ice_domain_size, only: ncat, nfsd
       use ice_dyn_eap, only: init_eap, alloc_dyn_eap
-      use ice_dyn_shared, only: kdyn, init_evp, alloc_dyn_shared
+      use ice_dyn_shared, only: kdyn, init_dyn, alloc_dyn_shared
       use ice_flux, only: init_coupler_flux, init_history_therm, &
           init_history_dyn, init_flux_atm, init_flux_ocn, alloc_flux
       use ice_forcing, only: init_forcing_ocn, init_forcing_atmo, &
@@ -138,7 +138,7 @@ subroutine cice_init(mpi_comm)
          call alloc_dyn_eap     ! allocate dyn_eap arrays
          call init_eap (dt_dyn) ! define eap dynamics parameters, variables
       else                      ! for both kdyn = 0 or 1
-         call init_evp (dt_dyn) ! define evp dynamics parameters, variables
+         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/cicecore/drivers/standalone/cice/CICE_InitMod.F90 b/cicecore/drivers/standalone/cice/CICE_InitMod.F90
index 1fc64c259..16531f043 100644
--- a/cicecore/drivers/standalone/cice/CICE_InitMod.F90
+++ b/cicecore/drivers/standalone/cice/CICE_InitMod.F90
@@ -71,7 +71,7 @@ subroutine cice_init
       use ice_domain, only: init_domain_blocks
       use ice_domain_size, only: ncat, nfsd
       use ice_dyn_eap, only: init_eap, alloc_dyn_eap
-      use ice_dyn_shared, only: kdyn, init_evp, alloc_dyn_shared
+      use ice_dyn_shared, only: kdyn, init_dyn, alloc_dyn_shared
       use ice_dyn_vp, only: init_vp
       use ice_flux, only: init_coupler_flux, init_history_therm, &
           init_history_dyn, init_flux_atm, init_flux_ocn, alloc_flux
@@ -129,7 +129,7 @@ subroutine cice_init
       else if (kdyn == 3) then
          call init_vp (dt_dyn)  ! define vp dynamics parameters, variables
       else                      ! for both kdyn = 0 or 1
-         call init_evp (dt_dyn) ! define evp dynamics parameters, variables
+         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/doc/source/developer_guide/dg_driver.rst b/doc/source/developer_guide/dg_driver.rst
index dd560a17c..d7fefa82d 100644
--- a/doc/source/developer_guide/dg_driver.rst
+++ b/doc/source/developer_guide/dg_driver.rst
@@ -58,7 +58,7 @@ The initialize calling sequence looks something like::
       if (kdyn == 2) then
          call init_eap (dt_dyn) ! define eap dynamics parameters, variables
       else                      ! for both kdyn = 0 or 1
-         call init_evp (dt_dyn) ! define evp dynamics parameters, variables
+         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
       endif
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
       call init_thermo_vertical ! initialize vertical thermodynamics

From 2fb99d1a8f2f196f05d081be6deb7231a61cd3f4 Mon Sep 17 00:00:00 2001
From: Philippe Blain <philippe.blain@canada.ca>
Date: Wed, 26 Aug 2020 15:53:39 -0400
Subject: [PATCH 196/196] drivers: call 'init_dyn' unconditionnally

'init_dyn' is called in both 'init_eap' and 'init_vp', so it makes more
sense for this subroutine to be called unconditionnally in the drivers,
and then call 'init_eap' or 'init_vp' if the EAP or VP dynamics are
chosen.
---
 cicecore/cicedynB/dynamics/ice_dyn_eap.F90        | 8 +-------
 cicecore/cicedynB/dynamics/ice_dyn_vp.F90         | 9 +--------
 cicecore/drivers/direct/hadgem3/CICE_InitMod.F90  | 7 +++----
 cicecore/drivers/mct/cesm1/CICE_InitMod.F90       | 7 +++----
 cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90     | 7 ++++---
 cicecore/drivers/nuopc/dmi/CICE_InitMod.F90       | 7 ++++---
 cicecore/drivers/standalone/cice/CICE_InitMod.F90 | 7 +++----
 doc/source/developer_guide/dg_driver.rst          | 7 ++++---
 8 files changed, 23 insertions(+), 36 deletions(-)

diff --git a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90 b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
index ebaf226fa..8d4f5cccf 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_eap.F90
@@ -546,14 +546,10 @@ end subroutine eap
 ! Initialize parameters and variables needed for the eap dynamics
 ! (based on init_dyn)
 
-      subroutine init_eap (dt)
+      subroutine init_eap
 
       use ice_blocks, only: nx_block, ny_block
       use ice_domain, only: nblocks
-      use ice_dyn_shared, only: init_dyn
-
-      real (kind=dbl_kind), intent(in) :: &
-         dt      ! time step
 
       ! local variables
 
@@ -583,8 +579,6 @@ subroutine init_eap (dt)
          file=__FILE__, line=__LINE__)
       phi = pi/c12 ! diamond shaped floe smaller angle (default phi = 30 deg)
 
-      call init_dyn (dt)
-
       !$OMP PARALLEL DO PRIVATE(iblk,i,j)
       do iblk = 1, nblocks
       do j = 1, ny_block
diff --git a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
index f3226e679..773d76440 100644
--- a/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
+++ b/cicecore/cicedynB/dynamics/ice_dyn_vp.F90
@@ -115,19 +115,15 @@ module ice_dyn_vp
 ! Initialize parameters and variables needed for the vp dynamics
 ! author: Philippe Blain, ECCC
 
-      subroutine init_vp (dt)
+      subroutine init_vp
 
       use ice_blocks, only: get_block, block
       use ice_boundary, only: ice_HaloUpdate
       use ice_constants, only: c1, &
           field_loc_center, field_type_scalar
       use ice_domain, only: blocks_ice, halo_info
-      use ice_dyn_shared, only: init_dyn
       use ice_grid, only: tarea, tinyarea
 
-      real (kind=dbl_kind), intent(in) :: &
-         dt      ! time step
-
       ! local variables
 
       integer (kind=int_kind) :: &
@@ -139,9 +135,6 @@ subroutine init_vp (dt)
 
       real (kind=dbl_kind) :: &
          min_strain_rate = 2e-09_dbl_kind      ! used for recomputing tinyarea
-
-      ! Initialize variables shared with evp
-      call init_dyn(dt)
       
       ! Initialize module variables
       allocate(icellt(max_blocks), icellu(max_blocks))
diff --git a/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90 b/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90
index 0bbeaaf16..49cf12ce1 100644
--- a/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90
+++ b/cicecore/drivers/direct/hadgem3/CICE_InitMod.F90
@@ -121,13 +121,12 @@ subroutine cice_init
       call init_calendar        ! initialize some calendar stuff
       call init_hist (dt)       ! initialize output history file
 
+      call init_dyn (dt_dyn)    ! define dynamics parameters, variables
       if (kdyn == 2) then
          call alloc_dyn_eap     ! allocate dyn_eap arrays
-         call init_eap (dt_dyn) ! define eap dynamics parameters, variables
+         call init_eap          ! define eap dynamics parameters, variables
       else if (kdyn == 3) then
-         call init_vp (dt_dyn)  ! define vp dynamics parameters, variables
-      else                      ! for both kdyn = 0 or 1
-         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
+         call init_vp           ! define vp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/cicecore/drivers/mct/cesm1/CICE_InitMod.F90 b/cicecore/drivers/mct/cesm1/CICE_InitMod.F90
index 3052af79d..da745d965 100644
--- a/cicecore/drivers/mct/cesm1/CICE_InitMod.F90
+++ b/cicecore/drivers/mct/cesm1/CICE_InitMod.F90
@@ -123,13 +123,12 @@ subroutine cice_init(mpicom_ice)
       call init_calendar        ! initialize some calendar stuff
       call init_hist (dt)       ! initialize output history file
 
+      call init_dyn (dt_dyn)    ! define dynamics parameters, variables
       if (kdyn == 2) then
          call alloc_dyn_eap     ! allocate dyn_eap arrays
-         call init_eap (dt_dyn) ! define eap dynamics parameters, variables
+         call init_eap          ! define eap dynamics parameters, variables
       else if (kdyn == 3) then
-         call init_vp (dt_dyn)  ! define vp dynamics parameters, variables
-      else                      ! for both kdyn = 0 or 1
-         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
+         call init_vp           ! define vp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90 b/cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90
index db5239978..cb70c9b4a 100644
--- a/cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90
+++ b/cicecore/drivers/nuopc/cmeps/CICE_InitMod.F90
@@ -98,11 +98,12 @@ subroutine cice_init
       call init_calendar        ! initialize some calendar stuff
       call init_hist (dt)       ! initialize output history file
 
+      call init_dyn (dt_dyn)    ! define dynamics parameters, variables
       if (kdyn == 2) then
          call alloc_dyn_eap     ! allocate dyn_eap arrays
-         call init_eap (dt_dyn) ! define eap dynamics parameters, variables
-      else                      ! for both kdyn = 0 or 1
-         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
+         call init_eap          ! define eap dynamics parameters, variables
+      else if (kdyn == 3) then
+         call init_vp           ! define vp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/cicecore/drivers/nuopc/dmi/CICE_InitMod.F90 b/cicecore/drivers/nuopc/dmi/CICE_InitMod.F90
index 6f1b94e58..70ef5f895 100644
--- a/cicecore/drivers/nuopc/dmi/CICE_InitMod.F90
+++ b/cicecore/drivers/nuopc/dmi/CICE_InitMod.F90
@@ -134,11 +134,12 @@ subroutine cice_init(mpi_comm)
       call init_calendar        ! initialize some calendar stuff
       call init_hist (dt)       ! initialize output history file
 
+      call init_dyn (dt_dyn)    ! define dynamics parameters, variables
       if (kdyn == 2) then
          call alloc_dyn_eap     ! allocate dyn_eap arrays
-         call init_eap (dt_dyn) ! define eap dynamics parameters, variables
-      else                      ! for both kdyn = 0 or 1
-         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
+         call init_eap          ! define eap dynamics parameters, variables
+      else if (kdyn == 3) then
+         call init_vp           ! define vp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/cicecore/drivers/standalone/cice/CICE_InitMod.F90 b/cicecore/drivers/standalone/cice/CICE_InitMod.F90
index 16531f043..8b507740d 100644
--- a/cicecore/drivers/standalone/cice/CICE_InitMod.F90
+++ b/cicecore/drivers/standalone/cice/CICE_InitMod.F90
@@ -123,13 +123,12 @@ subroutine cice_init
       call init_calendar        ! initialize some calendar stuff
       call init_hist (dt)       ! initialize output history file
 
+      call init_dyn (dt_dyn)    ! define dynamics parameters, variables
       if (kdyn == 2) then
          call alloc_dyn_eap     ! allocate dyn_eap arrays
-         call init_eap (dt_dyn) ! define eap dynamics parameters, variables
+         call init_eap          ! define eap dynamics parameters, variables
       else if (kdyn == 3) then
-         call init_vp (dt_dyn)  ! define vp dynamics parameters, variables
-      else                      ! for both kdyn = 0 or 1
-         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
+         call init_vp           ! define vp dynamics parameters, variables
       endif
 
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
diff --git a/doc/source/developer_guide/dg_driver.rst b/doc/source/developer_guide/dg_driver.rst
index d7fefa82d..a10cb319a 100644
--- a/doc/source/developer_guide/dg_driver.rst
+++ b/doc/source/developer_guide/dg_driver.rst
@@ -55,10 +55,11 @@ The initialize calling sequence looks something like::
       call init_zbgc            ! vertical biogeochemistry initialization
       call init_calendar        ! initialize some calendar stuff
       call init_hist (dt)       ! initialize output history file
+      call init_dyn (dt_dyn)    ! define dynamics parameters, variables
       if (kdyn == 2) then
-         call init_eap (dt_dyn) ! define eap dynamics parameters, variables
-      else                      ! for both kdyn = 0 or 1
-         call init_dyn (dt_dyn) ! define evp dynamics parameters, variables
+         call init_eap          ! define eap dynamics parameters, variables
+      else if (kdyn == 3) then
+         call init_vp           ! define vp dynamics parameters, variables
       endif
       call init_coupler_flux    ! initialize fluxes exchanged with coupler
       call init_thermo_vertical ! initialize vertical thermodynamics