From 032e43cad5b12a145c4ec1b2eeb83eafb181d07b Mon Sep 17 00:00:00 2001
From: Rahul Mahajan <aerorahul@users.noreply.github.com>
Date: Thu, 1 Oct 2020 10:01:55 -0400
Subject: [PATCH 1/4] guard omp directives with _OPENMP. Use compiler flag when
 OpenMP is found

---
 CMakeLists.txt        | 31 +++++++++++++++------------
 atmosphere_stub.F90   | 50 +++++++++++++++++++++++++------------------
 four_to_grid_stochy.F | 12 +++++++++++
 spectral_layout.F90   |  4 ++++
 sumfln_stochy.f       | 12 +++++++++++
 5 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3d94ec2..e00c228 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,16 +1,16 @@
 if(32BIT)
 remove_definitions(-DOVERLOAD_R4)
 remove_definitions(-DOVERLOAD_R8)
-message ("Force 64 bits in stochastic_physics")
-if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel")
-    if(REPRO)
-        string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
-    else()
-        string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64 -no-prec-div -no-prec-sqrt" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
-    endif()
-elseif(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
-    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-real-8")
-endif()
+  message ("Force 64 bits in stochastic_physics")
+  if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel")
+      if(REPRO)
+          string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
+      else()
+          string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64 -no-prec-div -no-prec-sqrt" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
+      endif()
+  elseif(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
+      set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-real-8")
+  endif()
 endif()
 
 add_library(
@@ -53,9 +53,12 @@ add_library(
     ./cellular_automata_global.F90
     ./cellular_automata_sgs.F90
     ./update_ca.F90
-    ./lndp_apply_perts.F90 
+    ./lndp_apply_perts.F90
 )
+add_dependencies(stochastic_physics fms)
 
-target_link_libraries(stochastic_physics sp::sp_d)
-target_link_libraries(stochastic_physics fms)
-
+target_link_libraries(stochastic_physics PUBLIC sp::sp_d)
+target_link_libraries(stochastic_physics PUBLIC fms)
+if(OpenMP_Fortran_FOUND)
+  target_link_libraries(stochastic_physics PUBLIC OpenMP::OpenMP_Fortran)
+endif()
diff --git a/atmosphere_stub.F90 b/atmosphere_stub.F90
index 38a7fd7..d204e7a 100644
--- a/atmosphere_stub.F90
+++ b/atmosphere_stub.F90
@@ -87,10 +87,10 @@ module atmosphere_stub_mod
 
 
 !>@brief The subroutine 'atmosphere_init' is an API to initialize the FV3 dynamical core,
-!! including the grid structures, memory, initial state (self-initialization or restart), 
-!! and diagnostics.  
+!! including the grid structures, memory, initial state (self-initialization or restart),
+!! and diagnostics.
  subroutine atmosphere_init_stub (Grid_box, area)
-#ifdef OPENMP
+#ifdef _OPENMP
    use omp_lib
 #endif
    type(grid_box_type), intent(inout) :: Grid_box
@@ -172,11 +172,11 @@ subroutine atmosphere_init_stub (Grid_box, area)
 
                     call timing_off('ATMOS_INIT')
 
-      
+
  end subroutine atmosphere_init_stub
 
 ! subroutine atmosphere_smooth_noise (wnoise,npass,ns_type,renorm_type)
-!   
+!
 !   !--- interface variables ---
 !   real,intent(inout)     :: wnoise(isd:ied,jsd:jed,1)
 !   integer, intent(in) :: npass,ns_type,renorm_type
@@ -184,7 +184,7 @@ end subroutine atmosphere_init_stub
 !   integer:: i,j,nloops,nlast
 !   real ::inflation(isc:iec,jsc:jec),inflation2
 !   ! scale factor for restoring inflation
-!   ! logic:  
+!   ! logic:
 !   ! if box mean: scalar get basic scaling, vector gets 1/grid dependent scaling  0-0 ; 0 - 1
 !   ! if box mean2: no scaling
 !   ! if del2   : scalar gets grid dependent scaling,vector get basic scaling  1  0; 1 1
@@ -202,14 +202,14 @@ end subroutine atmosphere_init_stub
 !                 inflation(i,j)=inflation2*Atm(mytile)%gridstruct%dxAV/(0.5*(Atm(mytile)%gridstruct%dx(i,j)+Atm(mytile)%gridstruct%dy(i,j)))
 !              enddo
 !           enddo
-!        else  
+!        else
 !           if ( renorm_type.EQ.1) then  ! box smooth does not need scaling for scalar
 !               do j=jsc,jec
 !                  do i=isc,iec
 !                   inflation(i,j)=inflation2
 !                  enddo
 !               enddo
-!           else 
+!           else
 !              ! box mean needs inversize grid-size scaling for vector
 !              do j=jsc,jec
 !                 do i=isc,iec
@@ -221,7 +221,7 @@ end subroutine atmosphere_init_stub
 !     endif
 !     nloops=npass/3
 !     nlast=mod(npass,3)
-!     do j=1,nloops 
+!     do j=1,nloops
 !        if (ns_type.EQ.1) then
 !           !call del2_cubed(wnoise , 0.25*Atm(mytile)%gridstruct%da_min, Atm(mytile)%gridstruct, &
 !           call del2_cubed(wnoise , 0.20*Atm(mytile)%gridstruct%da_min, Atm(mytile)%gridstruct, &
@@ -272,8 +272,8 @@ end subroutine atmosphere_init_stub
 !    call make_c_winds(ua, va, psi,Atm(mytile)%ng,Atm(mytile)%gridstruct,Atm(mytile)%bd,Atm(mytile)%npx,Atm(mytile)%npy)
 !! populate wind perturbations right here
 !    do k=1,km
-!       Atm(mytile)%urandom_c(isc:iec+edge,jsc:jec     ,k)=ua*vwts(k) 
-!       Atm(mytile)%vrandom_c(isc:iec     ,jsc:jec+edge,k)=va*vwts(k) 
+!       Atm(mytile)%urandom_c(isc:iec+edge,jsc:jec     ,k)=ua*vwts(k)
+!       Atm(mytile)%vrandom_c(isc:iec     ,jsc:jec+edge,k)=va*vwts(k)
 !    enddo
 !    !call mpp_update_domains(Atm(mytile)%urandom_c, Atm(mytile)%domain, complete=.true.)
 !    !call mpp_update_domains(Atm(mytile)%vrandom_c, Atm(mytile)%domain, complete=.true.)
@@ -320,11 +320,13 @@ subroutine del2_cubed(q, cd, gridstruct, domain, npx, npy, km, nmax, bd)
 
       do n=1,ntimes
          nt = ntimes - n
-      
+
+#ifdef _OPENMP
 !$OMP parallel do default(none) shared(km,q,is,ie,js,je,npx,npy, &
 !$OMP                                  nt,isd,jsd,gridstruct,bd, &
 !$OMP                                  cd) &
 !$OMP                          private(fx, fy)
+#endif
          do k=1,km
 
             if ( gridstruct%sw_corner ) then
@@ -423,9 +425,11 @@ subroutine box_mean(q, gridstruct, domain, npx, npy, km, nmax, bd)
       do n=1,ntimes
          nt = ntimes !- n
 
+#ifdef _OPENMP
 !$OMP parallel do default(none) shared(km,is,ie,js,je,npx,npy, &
 !$OMP                                  q,nt,isd,jsd,gridstruct,bd) &
 !$OMP                          private(q2)
+#endif
          do k=1,km
 
             if ( gridstruct%sw_corner ) then
@@ -519,9 +523,11 @@ subroutine box_mean2(q, gridstruct, domain, npx, npy, km, nmax, bd)
       do n=1,ntimes
          nt = ntimes !- n
 
+#ifdef _OPENMP
 !$OMP parallel do default(none) shared(km,is,ie,js,je,npx,npy, &
 !$OMP                                  q,nt,isd,jsd,gridstruct,bd) &
 !$OMP                          private(q2)
+#endif
          do k=1,km
 
             if ( gridstruct%sw_corner ) then
@@ -575,7 +581,7 @@ subroutine make_a_winds(ua, va, psi, ng, gridstruct, bd, npx, npy)
 type(fv_grid_type), intent(IN), target :: gridstruct
 ! Local:
 real, dimension(bd%isd:bd%ied,bd%jsd:bd%jed) :: wk
-real, dimension(bd%isc:bd%iec,bd%jsc:bd%jec) :: u,v 
+real, dimension(bd%isc:bd%iec,bd%jsc:bd%jec) :: u,v
 integer i,j
 
 integer :: is,  ie,  js,  je
@@ -614,7 +620,7 @@ subroutine make_c_winds(uc, vc, psi, ng, gridstruct, bd, npx, npy)
 type(fv_grid_type), intent(IN), target :: gridstruct
 ! Local:
 real, dimension(bd%isd:bd%ied,bd%jsd:bd%jed) :: wk
-real, dimension(bd%isc:bd%iec,bd%jsc:bd%jec) :: u,v 
+real, dimension(bd%isc:bd%iec,bd%jsc:bd%jec) :: u,v
 integer i,j
 
 integer :: is,  ie,  js,  je
@@ -637,8 +643,8 @@ subroutine make_c_winds(uc, vc, psi, ng, gridstruct, bd, npx, npy)
 
 end subroutine make_c_winds
 
-!>@brief The subroutine 'atmospehre_resolution' is an API to return the local 
-!! extents of the current MPI-rank or the global extents of the current 
+!>@brief The subroutine 'atmospehre_resolution' is an API to return the local
+!! extents of the current MPI-rank or the global extents of the current
 !! cubed-sphere tile.
  subroutine atmosphere_resolution (i_size, j_size, global)
    integer, intent(out)          :: i_size, j_size
@@ -657,7 +663,7 @@ subroutine atmosphere_resolution (i_size, j_size, global)
    end if
  end subroutine atmosphere_resolution
 !>@brief The subroutine 'atmosphere_domain' is an API to return
-!! the "domain2d" variable associated with the coupling grid and the 
+!! the "domain2d" variable associated with the coupling grid and the
 !! decomposition for the current cubed-sphere tile.
 !>@detail Coupling is done using the mass/temperature grid with no halos.
  subroutine atmosphere_domain ( fv_domain, layout, regional, nested, pelist )
@@ -683,10 +689,10 @@ subroutine set_atmosphere_pelist ()
  end subroutine set_atmosphere_pelist
 
 
-!>@brief The subroutine 'atmosphere_scalar_field_halo' is an API to return halo information 
+!>@brief The subroutine 'atmosphere_scalar_field_halo' is an API to return halo information
 !! of the current MPI_rank for an input scalar field.
 !>@detail Up to three point haloes can be returned by this API which includes special handling for
-!! the cubed-sphere tile corners. Output will be in (i,j,k) while input can be in (i,j,k) or 
+!! the cubed-sphere tile corners. Output will be in (i,j,k) while input can be in (i,j,k) or
 !! horizontally-packed form (ix,k).
  subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p)
    !--------------------------------------------------------------------
@@ -697,7 +703,7 @@ subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p
    ! ied    - horizontal resolution in i-dir with haloes
    ! jed    - horizontal resolution in j-dir with haloes
    ! ksize  - vertical resolution
-   ! data_p - optional input field in packed format (ix,k)  
+   ! data_p - optional input field in packed format (ix,k)
    !--------------------------------------------------------------------
    !--- interface variables ---
    real*8, dimension(1:isize,1:jsize,ksize), intent(inout) :: data !< output array to return the field with halo (i,j,k)
@@ -724,9 +730,11 @@ subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p
      if (ic*jc .ne. size(data_p,1)) call mpp_error(FATAL, modname//' - incorrect sizes for incoming &
                                                   &variables data and data_p')
      data = 0.
+#ifdef _OPENMP
 !$OMP parallel do default (none) &
 !$OMP              shared (data, data_p, halo, ic, jc, ksize) &
 !$OMP             private (i, j, k)
+#endif
      do k = 1, ksize
        do j = 1, jc
          do i = 1, ic
@@ -745,7 +753,7 @@ subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p
      call mpp_error(FATAL, modname//' - unsupported halo size')
    endif
 
-   !--- fill the halo points when at a corner of the cubed-sphere tile 
+   !--- fill the halo points when at a corner of the cubed-sphere tile
    !--- interior domain corners are handled correctly
    if ( (isc==1) .or. (jsc==1) .or. (iec==npx-1) .or. (jec==npy-1) ) then
      do k = 1, ksize
diff --git a/four_to_grid_stochy.F b/four_to_grid_stochy.F
index 358be21..49e8a62 100644
--- a/four_to_grid_stochy.F
+++ b/four_to_grid_stochy.F
@@ -50,18 +50,22 @@ subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2,
 
       if ( kind_dbl_prec == 8 ) then !------------------------------------
 #ifdef MKL
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,plan)
+#endif
 #else
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+shared(ibmsign,scale_ibm)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs)
+#endif
 #endif
          do thread=1,num_threads   ! start of thread loop ..............
            nvar_1=(thread-1)*nvar_thread_max + 1
@@ -108,18 +112,22 @@ subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2,
          enddo  ! fin thread loop ......................................
       else !------------------------------------------------------------
 #ifdef MKL
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,plan)
+#endif
 #else
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+shared(ibmsign,scale_ibm)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs)
+#endif
 #endif
          do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
@@ -201,12 +209,14 @@ subroutine grid_to_four(anl_gr_a_2,anl_gr_a_1,
       nvar_thread_max=(lot+num_threads-1)/num_threads
 
       if ( kind_dbl_prec == 8 ) then !------------------------------------
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(anl_gr_a_1,anl_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+shared(ibmsign,scale_ibm,rone)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs)
+#endif
 
          do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
@@ -236,12 +246,14 @@ subroutine grid_to_four(anl_gr_a_2,anl_gr_a_1,
             endif
          enddo  ! fin thread loop ......................................
       else !------------------------------------------------------------
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(anl_gr_a_1,anl_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+shared(ibmsign,scale_ibm,rone)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs)
+#endif
 
          do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
diff --git a/spectral_layout.F90 b/spectral_layout.F90
index 291f528..4aad474 100644
--- a/spectral_layout.F90
+++ b/spectral_layout.F90
@@ -86,6 +86,7 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, &
 !
       len_thread_m  = (len+ompthreads-1) / ompthreads
 !
+      #ifdef _OPENMP
       !$omp parallel do num_threads(ompthreads) default(none)               &
       !$omp private(i1_t,i2_t,len_thread,it,i,ii,i1,i2)                     &
       !$omp private(j,j1,j2,jq,ix,jy,nx,kxs,kxt,kmami)                      &
@@ -96,6 +97,7 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, &
       !$omp shared(outlon,outlat,wrk,iindx1,rinlon,jindx1,rinlat,ddx,ddy)   &
       !$omp shared(rlon,rlat,regin,gauout)                                  &
       !$omp shared(ompthreads,len_thread_m,len,iindx2,jindx2,rslmsk)
+      #endif
       do it=1,ompthreads ! start of threaded loop
         i1_t       = (it-1)*len_thread_m+1
         i2_t       = min(i1_t+len_thread_m-1,len)
@@ -282,7 +284,9 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, &
           endif
         enddo
       enddo ! end of threaded loop
+#ifdef _OPENMP
 !$omp end parallel do
+#endif
 !
       return
 !
diff --git a/sumfln_stochy.f b/sumfln_stochy.f
index f0d063c..9553956 100644
--- a/sumfln_stochy.f
+++ b/sumfln_stochy.f
@@ -107,8 +107,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
         lat1 = lat1s(l)
         if ( kind_dbl_prec == 8 ) then !------------------------------------
 
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(thread,nvar_1,nvar_2,n2)
+#endif
           do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
             nvar_2 = min(nvar_1+nvar_thread_max-1,nvars)
@@ -169,8 +171,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
             endif
           enddo   ! end of thread loop ..................................
         else !------------------------------------------------------------
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(thread,nvar_1,nvar_2)
+#endif
           do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
             nvar_2 = min(nvar_1+nvar_thread_max-1,nvars)
@@ -186,8 +190,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
         do node = 1, nodes - 1
           ilat_list(node+1) = ilat_list(node) + lats_nodes(node)
         end do
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(node,jj,ilat,lat,ipt_ls,nvar,kn,n2)
+#endif
         do node=1,nodes
           do jj=1,lats_nodes(node)
             ilat  = ilat_list(node) + jj
@@ -237,7 +243,9 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
 !
 !
       n2 = nvars + nvars
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads) private(node)
+#endif
       do node=1,nodes
          sendcounts(node) = kpts(node) * n2
          recvcounts(node) = kptr(node) * n2
@@ -249,8 +257,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
      &                 work1dr,recvcounts,sdispls)
       nullify(work1dr)
       nullify(work1ds)
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(j,lat,lmax,nvar,lval,n2,lonl,nv)
+#endif
       do j=1,lats_node
          lat  = global_lats(ipt_lats_node-1+j)
          lonl = lons_lat(lat)
@@ -274,8 +284,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
       kptr = 0
 !     write(0,*)' kptr=',kptr(1)
 !!
+#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(node,l,lval,j,lat,nvar,kn,n2)
+#endif
       do node=1,nodes
         do l=1,max_ls_nodes(node)
           lval = ls_nodes(l,node)+1

From 80700212ec25a9b93de398afb23700caf4e5e02a Mon Sep 17 00:00:00 2001
From: Rahul Mahajan <aerorahul@users.noreply.github.com>
Date: Thu, 1 Oct 2020 16:02:09 -0400
Subject: [PATCH 2/4] remove guarding ifdefs from omp directives

---
 atmosphere_stub.F90   | 11 -----------
 four_to_grid_stochy.F | 12 ------------
 spectral_layout.F90   |  4 ----
 sumfln_stochy.f       | 12 ------------
 4 files changed, 39 deletions(-)

diff --git a/atmosphere_stub.F90 b/atmosphere_stub.F90
index d204e7a..c9aa37a 100644
--- a/atmosphere_stub.F90
+++ b/atmosphere_stub.F90
@@ -90,9 +90,6 @@ module atmosphere_stub_mod
 !! including the grid structures, memory, initial state (self-initialization or restart),
 !! and diagnostics.
  subroutine atmosphere_init_stub (Grid_box, area)
-#ifdef _OPENMP
-   use omp_lib
-#endif
    type(grid_box_type), intent(inout) :: Grid_box
    real*8, pointer, dimension(:,:), intent(inout) :: area
 !--- local variables ---
@@ -321,12 +318,10 @@ subroutine del2_cubed(q, cd, gridstruct, domain, npx, npy, km, nmax, bd)
       do n=1,ntimes
          nt = ntimes - n
 
-#ifdef _OPENMP
 !$OMP parallel do default(none) shared(km,q,is,ie,js,je,npx,npy, &
 !$OMP                                  nt,isd,jsd,gridstruct,bd, &
 !$OMP                                  cd) &
 !$OMP                          private(fx, fy)
-#endif
          do k=1,km
 
             if ( gridstruct%sw_corner ) then
@@ -425,11 +420,9 @@ subroutine box_mean(q, gridstruct, domain, npx, npy, km, nmax, bd)
       do n=1,ntimes
          nt = ntimes !- n
 
-#ifdef _OPENMP
 !$OMP parallel do default(none) shared(km,is,ie,js,je,npx,npy, &
 !$OMP                                  q,nt,isd,jsd,gridstruct,bd) &
 !$OMP                          private(q2)
-#endif
          do k=1,km
 
             if ( gridstruct%sw_corner ) then
@@ -523,11 +516,9 @@ subroutine box_mean2(q, gridstruct, domain, npx, npy, km, nmax, bd)
       do n=1,ntimes
          nt = ntimes !- n
 
-#ifdef _OPENMP
 !$OMP parallel do default(none) shared(km,is,ie,js,je,npx,npy, &
 !$OMP                                  q,nt,isd,jsd,gridstruct,bd) &
 !$OMP                          private(q2)
-#endif
          do k=1,km
 
             if ( gridstruct%sw_corner ) then
@@ -730,11 +721,9 @@ subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p
      if (ic*jc .ne. size(data_p,1)) call mpp_error(FATAL, modname//' - incorrect sizes for incoming &
                                                   &variables data and data_p')
      data = 0.
-#ifdef _OPENMP
 !$OMP parallel do default (none) &
 !$OMP              shared (data, data_p, halo, ic, jc, ksize) &
 !$OMP             private (i, j, k)
-#endif
      do k = 1, ksize
        do j = 1, jc
          do i = 1, ic
diff --git a/four_to_grid_stochy.F b/four_to_grid_stochy.F
index 49e8a62..358be21 100644
--- a/four_to_grid_stochy.F
+++ b/four_to_grid_stochy.F
@@ -50,22 +50,18 @@ subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2,
 
       if ( kind_dbl_prec == 8 ) then !------------------------------------
 #ifdef MKL
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,plan)
-#endif
 #else
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+shared(ibmsign,scale_ibm)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs)
-#endif
 #endif
          do thread=1,num_threads   ! start of thread loop ..............
            nvar_1=(thread-1)*nvar_thread_max + 1
@@ -112,22 +108,18 @@ subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2,
          enddo  ! fin thread loop ......................................
       else !------------------------------------------------------------
 #ifdef MKL
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,plan)
-#endif
 #else
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+shared(ibmsign,scale_ibm)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs)
-#endif
 #endif
          do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
@@ -209,14 +201,12 @@ subroutine grid_to_four(anl_gr_a_2,anl_gr_a_1,
       nvar_thread_max=(lot+num_threads-1)/num_threads
 
       if ( kind_dbl_prec == 8 ) then !------------------------------------
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(anl_gr_a_1,anl_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+shared(ibmsign,scale_ibm,rone)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs)
-#endif
 
          do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
@@ -246,14 +236,12 @@ subroutine grid_to_four(anl_gr_a_2,anl_gr_a_1,
             endif
          enddo  ! fin thread loop ......................................
       else !------------------------------------------------------------
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+shared(anl_gr_a_1,anl_gr_a_2,lons_lat)
 !$omp+shared(lon_dim_coef,lon_dim_grid)
 !$omp+shared(lot,num_threads,nvar_thread_max)
 !$omp+shared(ibmsign,scale_ibm,rone)
 !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs)
-#endif
 
          do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
diff --git a/spectral_layout.F90 b/spectral_layout.F90
index 4aad474..291f528 100644
--- a/spectral_layout.F90
+++ b/spectral_layout.F90
@@ -86,7 +86,6 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, &
 !
       len_thread_m  = (len+ompthreads-1) / ompthreads
 !
-      #ifdef _OPENMP
       !$omp parallel do num_threads(ompthreads) default(none)               &
       !$omp private(i1_t,i2_t,len_thread,it,i,ii,i1,i2)                     &
       !$omp private(j,j1,j2,jq,ix,jy,nx,kxs,kxt,kmami)                      &
@@ -97,7 +96,6 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, &
       !$omp shared(outlon,outlat,wrk,iindx1,rinlon,jindx1,rinlat,ddx,ddy)   &
       !$omp shared(rlon,rlat,regin,gauout)                                  &
       !$omp shared(ompthreads,len_thread_m,len,iindx2,jindx2,rslmsk)
-      #endif
       do it=1,ompthreads ! start of threaded loop
         i1_t       = (it-1)*len_thread_m+1
         i2_t       = min(i1_t+len_thread_m-1,len)
@@ -284,9 +282,7 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, &
           endif
         enddo
       enddo ! end of threaded loop
-#ifdef _OPENMP
 !$omp end parallel do
-#endif
 !
       return
 !
diff --git a/sumfln_stochy.f b/sumfln_stochy.f
index 9553956..f0d063c 100644
--- a/sumfln_stochy.f
+++ b/sumfln_stochy.f
@@ -107,10 +107,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
         lat1 = lat1s(l)
         if ( kind_dbl_prec == 8 ) then !------------------------------------
 
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(thread,nvar_1,nvar_2,n2)
-#endif
           do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
             nvar_2 = min(nvar_1+nvar_thread_max-1,nvars)
@@ -171,10 +169,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
             endif
           enddo   ! end of thread loop ..................................
         else !------------------------------------------------------------
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(thread,nvar_1,nvar_2)
-#endif
           do thread=1,num_threads   ! start of thread loop ..............
             nvar_1 = (thread-1)*nvar_thread_max + 1
             nvar_2 = min(nvar_1+nvar_thread_max-1,nvars)
@@ -190,10 +186,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
         do node = 1, nodes - 1
           ilat_list(node+1) = ilat_list(node) + lats_nodes(node)
         end do
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(node,jj,ilat,lat,ipt_ls,nvar,kn,n2)
-#endif
         do node=1,nodes
           do jj=1,lats_nodes(node)
             ilat  = ilat_list(node) + jj
@@ -243,9 +237,7 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
 !
 !
       n2 = nvars + nvars
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads) private(node)
-#endif
       do node=1,nodes
          sendcounts(node) = kpts(node) * n2
          recvcounts(node) = kptr(node) * n2
@@ -257,10 +249,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
      &                 work1dr,recvcounts,sdispls)
       nullify(work1dr)
       nullify(work1ds)
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(j,lat,lmax,nvar,lval,n2,lonl,nv)
-#endif
       do j=1,lats_node
          lat  = global_lats(ipt_lats_node-1+j)
          lonl = lons_lat(lat)
@@ -284,10 +274,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod,
       kptr = 0
 !     write(0,*)' kptr=',kptr(1)
 !!
-#ifdef _OPENMP
 !$omp parallel do num_threads(num_threads)
 !$omp+private(node,l,lval,j,lat,nvar,kn,n2)
-#endif
       do node=1,nodes
         do l=1,max_ls_nodes(node)
           lval = ls_nodes(l,node)+1

From ea1b343926ad6084f1422675f0d4a3b856f17e32 Mon Sep 17 00:00:00 2001
From: Rahul Mahajan <aerorahul@users.noreply.github.com>
Date: Thu, 1 Oct 2020 23:26:47 -0400
Subject: [PATCH 3/4] cleanup compile defs

---
 CMakeLists.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e00c228..a8079ea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,4 @@
 if(32BIT)
-remove_definitions(-DOVERLOAD_R4)
-remove_definitions(-DOVERLOAD_R8)
   message ("Force 64 bits in stochastic_physics")
   if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel")
       if(REPRO)
@@ -57,6 +55,9 @@ add_library(
 )
 add_dependencies(stochastic_physics fms)
 
+list(APPEND _stoch_phys_defs_private INTERNAL_FILE_NML)
+target_compile_definitions(stochastic_physics PRIVATE "${_stoch_phys_defs_private}")
+
 target_link_libraries(stochastic_physics PUBLIC sp::sp_d)
 target_link_libraries(stochastic_physics PUBLIC fms)
 if(OpenMP_Fortran_FOUND)

From 0d35aa24e5720be371b549d7a4a6f6f480f3e7a1 Mon Sep 17 00:00:00 2001
From: Rahul Mahajan <aerorahul@users.noreply.github.com>
Date: Sun, 4 Oct 2020 21:25:49 -0500
Subject: [PATCH 4/4] there is no need for a defs_private variable

---
 CMakeLists.txt | 93 +++++++++++++++++++++++++-------------------------
 1 file changed, 46 insertions(+), 47 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a8079ea..ec2f29a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,62 +1,61 @@
 if(32BIT)
   message ("Force 64 bits in stochastic_physics")
   if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel")
-      if(REPRO)
-          string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
-      else()
-          string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64 -no-prec-div -no-prec-sqrt" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
-      endif()
+    if(REPRO)
+      string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
+    else()
+      string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64 -no-prec-div -no-prec-sqrt" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
+    endif()
   elseif(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
-      set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-real-8")
+    set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-real-8")
   endif()
 endif()
 
-add_library(
-    stochastic_physics
+list(APPEND _stoch_phys_srcs
+  kinddef.F90
+  mpi_wrapper.F90
+  halo_exchange.fv3.F90
+  plumes.f90
 
-    ./kinddef.F90
-    ./mpi_wrapper.F90
-    ./halo_exchange.fv3.F90
-    ./plumes.f90
+  stochy_gg_def.f
+  stochy_resol_def.f
+  stochy_layout_lag.f
+  four_to_grid_stochy.F
+  glats_stochy.f
+  sumfln_stochy.f
+  gozrineo_stochy.f
+  num_parthds_stochy.f
+  get_ls_node_stochy.f
+  get_lats_node_a_stochy.f
+  setlats_a_stochy.f
+  setlats_lag_stochy.f
+  epslon_stochy.f
+  getcon_lag_stochy.f
+  pln2eo_stochy.f
+  dozeuv_stochy.f
+  dezouv_stochy.f
+  mersenne_twister.F
 
-    ./stochy_gg_def.f
-    ./stochy_resol_def.f
-    ./stochy_layout_lag.f
-    ./four_to_grid_stochy.F
-    ./glats_stochy.f
-    ./sumfln_stochy.f
-    ./gozrineo_stochy.f
-    ./num_parthds_stochy.f
-    ./get_ls_node_stochy.f
-    ./get_lats_node_a_stochy.f
-    ./setlats_a_stochy.f
-    ./setlats_lag_stochy.f
-    ./epslon_stochy.f
-    ./getcon_lag_stochy.f
-    ./pln2eo_stochy.f
-    ./dozeuv_stochy.f
-    ./dezouv_stochy.f
-    ./mersenne_twister.F
-
-    ./spectral_layout.F90
-    ./getcon_spectral.F90
-    ./stochy_namelist_def.F90
-    ./compns_stochy.F90
-    ./stochy_internal_state_mod.F90
-    ./stochastic_physics.F90
-    ./stochy_patterngenerator.F90
-    ./stochy_data_mod.F90
-    ./get_stochy_pattern.F90
-    ./initialize_spectral_mod.F90
-    ./cellular_automata_global.F90
-    ./cellular_automata_sgs.F90
-    ./update_ca.F90
-    ./lndp_apply_perts.F90
+  spectral_layout.F90
+  getcon_spectral.F90
+  stochy_namelist_def.F90
+  compns_stochy.F90
+  stochy_internal_state_mod.F90
+  stochastic_physics.F90
+  stochy_patterngenerator.F90
+  stochy_data_mod.F90
+  get_stochy_pattern.F90
+  initialize_spectral_mod.F90
+  cellular_automata_global.F90
+  cellular_automata_sgs.F90
+  update_ca.F90
+  lndp_apply_perts.F90
 )
+
+add_library(stochastic_physics ${_stoch_phys_srcs})
 add_dependencies(stochastic_physics fms)
 
-list(APPEND _stoch_phys_defs_private INTERNAL_FILE_NML)
-target_compile_definitions(stochastic_physics PRIVATE "${_stoch_phys_defs_private}")
+target_compile_definitions(stochastic_physics PRIVATE INTERNAL_FILE_NML)
 
 target_link_libraries(stochastic_physics PUBLIC sp::sp_d)
 target_link_libraries(stochastic_physics PUBLIC fms)