From 032e43cad5b12a145c4ec1b2eeb83eafb181d07b Mon Sep 17 00:00:00 2001 From: Rahul Mahajan Date: Thu, 1 Oct 2020 10:01:55 -0400 Subject: [PATCH 1/4] guard omp directives with _OPENMP. Use compiler flag when OpenMP is found --- CMakeLists.txt | 31 +++++++++++++++------------ atmosphere_stub.F90 | 50 +++++++++++++++++++++++++------------------ four_to_grid_stochy.F | 12 +++++++++++ spectral_layout.F90 | 4 ++++ sumfln_stochy.f | 12 +++++++++++ 5 files changed, 74 insertions(+), 35 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d94ec2..e00c228 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,16 +1,16 @@ if(32BIT) remove_definitions(-DOVERLOAD_R4) remove_definitions(-DOVERLOAD_R8) -message ("Force 64 bits in stochastic_physics") -if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") - if(REPRO) - string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}") - else() - string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64 -no-prec-div -no-prec-sqrt" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}") - endif() -elseif(CMAKE_Fortran_COMPILER_ID MATCHES "GNU") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-real-8") -endif() + message ("Force 64 bits in stochastic_physics") + if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") + if(REPRO) + string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}") + else() + string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64 -no-prec-div -no-prec-sqrt" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}") + endif() + elseif(CMAKE_Fortran_COMPILER_ID MATCHES "GNU") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-real-8") + endif() endif() add_library( @@ -53,9 +53,12 @@ add_library( ./cellular_automata_global.F90 ./cellular_automata_sgs.F90 ./update_ca.F90 - ./lndp_apply_perts.F90 + ./lndp_apply_perts.F90 ) +add_dependencies(stochastic_physics fms) -target_link_libraries(stochastic_physics sp::sp_d) -target_link_libraries(stochastic_physics fms) - +target_link_libraries(stochastic_physics PUBLIC sp::sp_d) +target_link_libraries(stochastic_physics PUBLIC fms) +if(OpenMP_Fortran_FOUND) + target_link_libraries(stochastic_physics PUBLIC OpenMP::OpenMP_Fortran) +endif() diff --git a/atmosphere_stub.F90 b/atmosphere_stub.F90 index 38a7fd7..d204e7a 100644 --- a/atmosphere_stub.F90 +++ b/atmosphere_stub.F90 @@ -87,10 +87,10 @@ module atmosphere_stub_mod !>@brief The subroutine 'atmosphere_init' is an API to initialize the FV3 dynamical core, -!! including the grid structures, memory, initial state (self-initialization or restart), -!! and diagnostics. +!! including the grid structures, memory, initial state (self-initialization or restart), +!! and diagnostics. subroutine atmosphere_init_stub (Grid_box, area) -#ifdef OPENMP +#ifdef _OPENMP use omp_lib #endif type(grid_box_type), intent(inout) :: Grid_box @@ -172,11 +172,11 @@ subroutine atmosphere_init_stub (Grid_box, area) call timing_off('ATMOS_INIT') - + end subroutine atmosphere_init_stub ! subroutine atmosphere_smooth_noise (wnoise,npass,ns_type,renorm_type) -! +! ! !--- interface variables --- ! real,intent(inout) :: wnoise(isd:ied,jsd:jed,1) ! integer, intent(in) :: npass,ns_type,renorm_type @@ -184,7 +184,7 @@ end subroutine atmosphere_init_stub ! integer:: i,j,nloops,nlast ! real ::inflation(isc:iec,jsc:jec),inflation2 ! ! scale factor for restoring inflation -! ! logic: +! ! logic: ! ! if box mean: scalar get basic scaling, vector gets 1/grid dependent scaling 0-0 ; 0 - 1 ! ! if box mean2: no scaling ! ! if del2 : scalar gets grid dependent scaling,vector get basic scaling 1 0; 1 1 @@ -202,14 +202,14 @@ end subroutine atmosphere_init_stub ! inflation(i,j)=inflation2*Atm(mytile)%gridstruct%dxAV/(0.5*(Atm(mytile)%gridstruct%dx(i,j)+Atm(mytile)%gridstruct%dy(i,j))) ! enddo ! enddo -! else +! else ! if ( renorm_type.EQ.1) then ! box smooth does not need scaling for scalar ! do j=jsc,jec ! do i=isc,iec ! inflation(i,j)=inflation2 ! enddo ! enddo -! else +! else ! ! box mean needs inversize grid-size scaling for vector ! do j=jsc,jec ! do i=isc,iec @@ -221,7 +221,7 @@ end subroutine atmosphere_init_stub ! endif ! nloops=npass/3 ! nlast=mod(npass,3) -! do j=1,nloops +! do j=1,nloops ! if (ns_type.EQ.1) then ! !call del2_cubed(wnoise , 0.25*Atm(mytile)%gridstruct%da_min, Atm(mytile)%gridstruct, & ! call del2_cubed(wnoise , 0.20*Atm(mytile)%gridstruct%da_min, Atm(mytile)%gridstruct, & @@ -272,8 +272,8 @@ end subroutine atmosphere_init_stub ! call make_c_winds(ua, va, psi,Atm(mytile)%ng,Atm(mytile)%gridstruct,Atm(mytile)%bd,Atm(mytile)%npx,Atm(mytile)%npy) !! populate wind perturbations right here ! do k=1,km -! Atm(mytile)%urandom_c(isc:iec+edge,jsc:jec ,k)=ua*vwts(k) -! Atm(mytile)%vrandom_c(isc:iec ,jsc:jec+edge,k)=va*vwts(k) +! Atm(mytile)%urandom_c(isc:iec+edge,jsc:jec ,k)=ua*vwts(k) +! Atm(mytile)%vrandom_c(isc:iec ,jsc:jec+edge,k)=va*vwts(k) ! enddo ! !call mpp_update_domains(Atm(mytile)%urandom_c, Atm(mytile)%domain, complete=.true.) ! !call mpp_update_domains(Atm(mytile)%vrandom_c, Atm(mytile)%domain, complete=.true.) @@ -320,11 +320,13 @@ subroutine del2_cubed(q, cd, gridstruct, domain, npx, npy, km, nmax, bd) do n=1,ntimes nt = ntimes - n - + +#ifdef _OPENMP !$OMP parallel do default(none) shared(km,q,is,ie,js,je,npx,npy, & !$OMP nt,isd,jsd,gridstruct,bd, & !$OMP cd) & !$OMP private(fx, fy) +#endif do k=1,km if ( gridstruct%sw_corner ) then @@ -423,9 +425,11 @@ subroutine box_mean(q, gridstruct, domain, npx, npy, km, nmax, bd) do n=1,ntimes nt = ntimes !- n +#ifdef _OPENMP !$OMP parallel do default(none) shared(km,is,ie,js,je,npx,npy, & !$OMP q,nt,isd,jsd,gridstruct,bd) & !$OMP private(q2) +#endif do k=1,km if ( gridstruct%sw_corner ) then @@ -519,9 +523,11 @@ subroutine box_mean2(q, gridstruct, domain, npx, npy, km, nmax, bd) do n=1,ntimes nt = ntimes !- n +#ifdef _OPENMP !$OMP parallel do default(none) shared(km,is,ie,js,je,npx,npy, & !$OMP q,nt,isd,jsd,gridstruct,bd) & !$OMP private(q2) +#endif do k=1,km if ( gridstruct%sw_corner ) then @@ -575,7 +581,7 @@ subroutine make_a_winds(ua, va, psi, ng, gridstruct, bd, npx, npy) type(fv_grid_type), intent(IN), target :: gridstruct ! Local: real, dimension(bd%isd:bd%ied,bd%jsd:bd%jed) :: wk -real, dimension(bd%isc:bd%iec,bd%jsc:bd%jec) :: u,v +real, dimension(bd%isc:bd%iec,bd%jsc:bd%jec) :: u,v integer i,j integer :: is, ie, js, je @@ -614,7 +620,7 @@ subroutine make_c_winds(uc, vc, psi, ng, gridstruct, bd, npx, npy) type(fv_grid_type), intent(IN), target :: gridstruct ! Local: real, dimension(bd%isd:bd%ied,bd%jsd:bd%jed) :: wk -real, dimension(bd%isc:bd%iec,bd%jsc:bd%jec) :: u,v +real, dimension(bd%isc:bd%iec,bd%jsc:bd%jec) :: u,v integer i,j integer :: is, ie, js, je @@ -637,8 +643,8 @@ subroutine make_c_winds(uc, vc, psi, ng, gridstruct, bd, npx, npy) end subroutine make_c_winds -!>@brief The subroutine 'atmospehre_resolution' is an API to return the local -!! extents of the current MPI-rank or the global extents of the current +!>@brief The subroutine 'atmospehre_resolution' is an API to return the local +!! extents of the current MPI-rank or the global extents of the current !! cubed-sphere tile. subroutine atmosphere_resolution (i_size, j_size, global) integer, intent(out) :: i_size, j_size @@ -657,7 +663,7 @@ subroutine atmosphere_resolution (i_size, j_size, global) end if end subroutine atmosphere_resolution !>@brief The subroutine 'atmosphere_domain' is an API to return -!! the "domain2d" variable associated with the coupling grid and the +!! the "domain2d" variable associated with the coupling grid and the !! decomposition for the current cubed-sphere tile. !>@detail Coupling is done using the mass/temperature grid with no halos. subroutine atmosphere_domain ( fv_domain, layout, regional, nested, pelist ) @@ -683,10 +689,10 @@ subroutine set_atmosphere_pelist () end subroutine set_atmosphere_pelist -!>@brief The subroutine 'atmosphere_scalar_field_halo' is an API to return halo information +!>@brief The subroutine 'atmosphere_scalar_field_halo' is an API to return halo information !! of the current MPI_rank for an input scalar field. !>@detail Up to three point haloes can be returned by this API which includes special handling for -!! the cubed-sphere tile corners. Output will be in (i,j,k) while input can be in (i,j,k) or +!! the cubed-sphere tile corners. Output will be in (i,j,k) while input can be in (i,j,k) or !! horizontally-packed form (ix,k). subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p) !-------------------------------------------------------------------- @@ -697,7 +703,7 @@ subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p ! ied - horizontal resolution in i-dir with haloes ! jed - horizontal resolution in j-dir with haloes ! ksize - vertical resolution - ! data_p - optional input field in packed format (ix,k) + ! data_p - optional input field in packed format (ix,k) !-------------------------------------------------------------------- !--- interface variables --- real*8, dimension(1:isize,1:jsize,ksize), intent(inout) :: data !< output array to return the field with halo (i,j,k) @@ -724,9 +730,11 @@ subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p if (ic*jc .ne. size(data_p,1)) call mpp_error(FATAL, modname//' - incorrect sizes for incoming & &variables data and data_p') data = 0. +#ifdef _OPENMP !$OMP parallel do default (none) & !$OMP shared (data, data_p, halo, ic, jc, ksize) & !$OMP private (i, j, k) +#endif do k = 1, ksize do j = 1, jc do i = 1, ic @@ -745,7 +753,7 @@ subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p call mpp_error(FATAL, modname//' - unsupported halo size') endif - !--- fill the halo points when at a corner of the cubed-sphere tile + !--- fill the halo points when at a corner of the cubed-sphere tile !--- interior domain corners are handled correctly if ( (isc==1) .or. (jsc==1) .or. (iec==npx-1) .or. (jec==npy-1) ) then do k = 1, ksize diff --git a/four_to_grid_stochy.F b/four_to_grid_stochy.F index 358be21..49e8a62 100644 --- a/four_to_grid_stochy.F +++ b/four_to_grid_stochy.F @@ -50,18 +50,22 @@ subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2, if ( kind_dbl_prec == 8 ) then !------------------------------------ #ifdef MKL +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+private(thread,nvar_1,nvar_2,lot_thread,plan) +#endif #else +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+shared(ibmsign,scale_ibm) !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs) +#endif #endif do thread=1,num_threads ! start of thread loop .............. nvar_1=(thread-1)*nvar_thread_max + 1 @@ -108,18 +112,22 @@ subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2, enddo ! fin thread loop ...................................... else !------------------------------------------------------------ #ifdef MKL +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+private(thread,nvar_1,nvar_2,lot_thread,plan) +#endif #else +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+shared(ibmsign,scale_ibm) !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs) +#endif #endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 @@ -201,12 +209,14 @@ subroutine grid_to_four(anl_gr_a_2,anl_gr_a_1, nvar_thread_max=(lot+num_threads-1)/num_threads if ( kind_dbl_prec == 8 ) then !------------------------------------ +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(anl_gr_a_1,anl_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+shared(ibmsign,scale_ibm,rone) !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs) +#endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 @@ -236,12 +246,14 @@ subroutine grid_to_four(anl_gr_a_2,anl_gr_a_1, endif enddo ! fin thread loop ...................................... else !------------------------------------------------------------ +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(anl_gr_a_1,anl_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+shared(ibmsign,scale_ibm,rone) !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs) +#endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 diff --git a/spectral_layout.F90 b/spectral_layout.F90 index 291f528..4aad474 100644 --- a/spectral_layout.F90 +++ b/spectral_layout.F90 @@ -86,6 +86,7 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, & ! len_thread_m = (len+ompthreads-1) / ompthreads ! + #ifdef _OPENMP !$omp parallel do num_threads(ompthreads) default(none) & !$omp private(i1_t,i2_t,len_thread,it,i,ii,i1,i2) & !$omp private(j,j1,j2,jq,ix,jy,nx,kxs,kxt,kmami) & @@ -96,6 +97,7 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, & !$omp shared(outlon,outlat,wrk,iindx1,rinlon,jindx1,rinlat,ddx,ddy) & !$omp shared(rlon,rlat,regin,gauout) & !$omp shared(ompthreads,len_thread_m,len,iindx2,jindx2,rslmsk) + #endif do it=1,ompthreads ! start of threaded loop i1_t = (it-1)*len_thread_m+1 i2_t = min(i1_t+len_thread_m-1,len) @@ -282,7 +284,9 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, & endif enddo enddo ! end of threaded loop +#ifdef _OPENMP !$omp end parallel do +#endif ! return ! diff --git a/sumfln_stochy.f b/sumfln_stochy.f index f0d063c..9553956 100644 --- a/sumfln_stochy.f +++ b/sumfln_stochy.f @@ -107,8 +107,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, lat1 = lat1s(l) if ( kind_dbl_prec == 8 ) then !------------------------------------ +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(thread,nvar_1,nvar_2,n2) +#endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 nvar_2 = min(nvar_1+nvar_thread_max-1,nvars) @@ -169,8 +171,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, endif enddo ! end of thread loop .................................. else !------------------------------------------------------------ +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(thread,nvar_1,nvar_2) +#endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 nvar_2 = min(nvar_1+nvar_thread_max-1,nvars) @@ -186,8 +190,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, do node = 1, nodes - 1 ilat_list(node+1) = ilat_list(node) + lats_nodes(node) end do +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(node,jj,ilat,lat,ipt_ls,nvar,kn,n2) +#endif do node=1,nodes do jj=1,lats_nodes(node) ilat = ilat_list(node) + jj @@ -237,7 +243,9 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, ! ! n2 = nvars + nvars +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) private(node) +#endif do node=1,nodes sendcounts(node) = kpts(node) * n2 recvcounts(node) = kptr(node) * n2 @@ -249,8 +257,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, & work1dr,recvcounts,sdispls) nullify(work1dr) nullify(work1ds) +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(j,lat,lmax,nvar,lval,n2,lonl,nv) +#endif do j=1,lats_node lat = global_lats(ipt_lats_node-1+j) lonl = lons_lat(lat) @@ -274,8 +284,10 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, kptr = 0 ! write(0,*)' kptr=',kptr(1) !! +#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(node,l,lval,j,lat,nvar,kn,n2) +#endif do node=1,nodes do l=1,max_ls_nodes(node) lval = ls_nodes(l,node)+1 From 80700212ec25a9b93de398afb23700caf4e5e02a Mon Sep 17 00:00:00 2001 From: Rahul Mahajan Date: Thu, 1 Oct 2020 16:02:09 -0400 Subject: [PATCH 2/4] remove guarding ifdefs from omp directives --- atmosphere_stub.F90 | 11 ----------- four_to_grid_stochy.F | 12 ------------ spectral_layout.F90 | 4 ---- sumfln_stochy.f | 12 ------------ 4 files changed, 39 deletions(-) diff --git a/atmosphere_stub.F90 b/atmosphere_stub.F90 index d204e7a..c9aa37a 100644 --- a/atmosphere_stub.F90 +++ b/atmosphere_stub.F90 @@ -90,9 +90,6 @@ module atmosphere_stub_mod !! including the grid structures, memory, initial state (self-initialization or restart), !! and diagnostics. subroutine atmosphere_init_stub (Grid_box, area) -#ifdef _OPENMP - use omp_lib -#endif type(grid_box_type), intent(inout) :: Grid_box real*8, pointer, dimension(:,:), intent(inout) :: area !--- local variables --- @@ -321,12 +318,10 @@ subroutine del2_cubed(q, cd, gridstruct, domain, npx, npy, km, nmax, bd) do n=1,ntimes nt = ntimes - n -#ifdef _OPENMP !$OMP parallel do default(none) shared(km,q,is,ie,js,je,npx,npy, & !$OMP nt,isd,jsd,gridstruct,bd, & !$OMP cd) & !$OMP private(fx, fy) -#endif do k=1,km if ( gridstruct%sw_corner ) then @@ -425,11 +420,9 @@ subroutine box_mean(q, gridstruct, domain, npx, npy, km, nmax, bd) do n=1,ntimes nt = ntimes !- n -#ifdef _OPENMP !$OMP parallel do default(none) shared(km,is,ie,js,je,npx,npy, & !$OMP q,nt,isd,jsd,gridstruct,bd) & !$OMP private(q2) -#endif do k=1,km if ( gridstruct%sw_corner ) then @@ -523,11 +516,9 @@ subroutine box_mean2(q, gridstruct, domain, npx, npy, km, nmax, bd) do n=1,ntimes nt = ntimes !- n -#ifdef _OPENMP !$OMP parallel do default(none) shared(km,is,ie,js,je,npx,npy, & !$OMP q,nt,isd,jsd,gridstruct,bd) & !$OMP private(q2) -#endif do k=1,km if ( gridstruct%sw_corner ) then @@ -730,11 +721,9 @@ subroutine atmosphere_scalar_field_halo (data, halo, isize, jsize, ksize, data_p if (ic*jc .ne. size(data_p,1)) call mpp_error(FATAL, modname//' - incorrect sizes for incoming & &variables data and data_p') data = 0. -#ifdef _OPENMP !$OMP parallel do default (none) & !$OMP shared (data, data_p, halo, ic, jc, ksize) & !$OMP private (i, j, k) -#endif do k = 1, ksize do j = 1, jc do i = 1, ic diff --git a/four_to_grid_stochy.F b/four_to_grid_stochy.F index 49e8a62..358be21 100644 --- a/four_to_grid_stochy.F +++ b/four_to_grid_stochy.F @@ -50,22 +50,18 @@ subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2, if ( kind_dbl_prec == 8 ) then !------------------------------------ #ifdef MKL -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+private(thread,nvar_1,nvar_2,lot_thread,plan) -#endif #else -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+shared(ibmsign,scale_ibm) !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs) -#endif #endif do thread=1,num_threads ! start of thread loop .............. nvar_1=(thread-1)*nvar_thread_max + 1 @@ -112,22 +108,18 @@ subroutine four_to_grid(syn_gr_a_1,syn_gr_a_2, enddo ! fin thread loop ...................................... else !------------------------------------------------------------ #ifdef MKL -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+private(thread,nvar_1,nvar_2,lot_thread,plan) -#endif #else -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(syn_gr_a_1,syn_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+shared(ibmsign,scale_ibm) !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs) -#endif #endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 @@ -209,14 +201,12 @@ subroutine grid_to_four(anl_gr_a_2,anl_gr_a_1, nvar_thread_max=(lot+num_threads-1)/num_threads if ( kind_dbl_prec == 8 ) then !------------------------------------ -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(anl_gr_a_1,anl_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+shared(ibmsign,scale_ibm,rone) !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs) -#endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 @@ -246,14 +236,12 @@ subroutine grid_to_four(anl_gr_a_2,anl_gr_a_1, endif enddo ! fin thread loop ...................................... else !------------------------------------------------------------ -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+shared(anl_gr_a_1,anl_gr_a_2,lons_lat) !$omp+shared(lon_dim_coef,lon_dim_grid) !$omp+shared(lot,num_threads,nvar_thread_max) !$omp+shared(ibmsign,scale_ibm,rone) !$omp+private(thread,nvar_1,nvar_2,lot_thread,init,aux1crs) -#endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 diff --git a/spectral_layout.F90 b/spectral_layout.F90 index 4aad474..291f528 100644 --- a/spectral_layout.F90 +++ b/spectral_layout.F90 @@ -86,7 +86,6 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, & ! len_thread_m = (len+ompthreads-1) / ompthreads ! - #ifdef _OPENMP !$omp parallel do num_threads(ompthreads) default(none) & !$omp private(i1_t,i2_t,len_thread,it,i,ii,i1,i2) & !$omp private(j,j1,j2,jq,ix,jy,nx,kxs,kxt,kmami) & @@ -97,7 +96,6 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, & !$omp shared(outlon,outlat,wrk,iindx1,rinlon,jindx1,rinlat,ddx,ddy) & !$omp shared(rlon,rlat,regin,gauout) & !$omp shared(ompthreads,len_thread_m,len,iindx2,jindx2,rslmsk) - #endif do it=1,ompthreads ! start of threaded loop i1_t = (it-1)*len_thread_m+1 i2_t = min(i1_t+len_thread_m-1,len) @@ -284,9 +282,7 @@ subroutine stochy_la2ga(regin,imxin,jmxin,rinlon,rinlat,rlon,rlat, & endif enddo enddo ! end of threaded loop -#ifdef _OPENMP !$omp end parallel do -#endif ! return ! diff --git a/sumfln_stochy.f b/sumfln_stochy.f index 9553956..f0d063c 100644 --- a/sumfln_stochy.f +++ b/sumfln_stochy.f @@ -107,10 +107,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, lat1 = lat1s(l) if ( kind_dbl_prec == 8 ) then !------------------------------------ -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(thread,nvar_1,nvar_2,n2) -#endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 nvar_2 = min(nvar_1+nvar_thread_max-1,nvars) @@ -171,10 +169,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, endif enddo ! end of thread loop .................................. else !------------------------------------------------------------ -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(thread,nvar_1,nvar_2) -#endif do thread=1,num_threads ! start of thread loop .............. nvar_1 = (thread-1)*nvar_thread_max + 1 nvar_2 = min(nvar_1+nvar_thread_max-1,nvars) @@ -190,10 +186,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, do node = 1, nodes - 1 ilat_list(node+1) = ilat_list(node) + lats_nodes(node) end do -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(node,jj,ilat,lat,ipt_ls,nvar,kn,n2) -#endif do node=1,nodes do jj=1,lats_nodes(node) ilat = ilat_list(node) + jj @@ -243,9 +237,7 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, ! ! n2 = nvars + nvars -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) private(node) -#endif do node=1,nodes sendcounts(node) = kpts(node) * n2 recvcounts(node) = kptr(node) * n2 @@ -257,10 +249,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, & work1dr,recvcounts,sdispls) nullify(work1dr) nullify(work1ds) -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(j,lat,lmax,nvar,lval,n2,lonl,nv) -#endif do j=1,lats_node lat = global_lats(ipt_lats_node-1+j) lonl = lons_lat(lat) @@ -284,10 +274,8 @@ subroutine sumfln_stochy(flnev,flnod,lat1s,plnev,plnod, kptr = 0 ! write(0,*)' kptr=',kptr(1) !! -#ifdef _OPENMP !$omp parallel do num_threads(num_threads) !$omp+private(node,l,lval,j,lat,nvar,kn,n2) -#endif do node=1,nodes do l=1,max_ls_nodes(node) lval = ls_nodes(l,node)+1 From ea1b343926ad6084f1422675f0d4a3b856f17e32 Mon Sep 17 00:00:00 2001 From: Rahul Mahajan Date: Thu, 1 Oct 2020 23:26:47 -0400 Subject: [PATCH 3/4] cleanup compile defs --- CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e00c228..a8079ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,4 @@ if(32BIT) -remove_definitions(-DOVERLOAD_R4) -remove_definitions(-DOVERLOAD_R8) message ("Force 64 bits in stochastic_physics") if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") if(REPRO) @@ -57,6 +55,9 @@ add_library( ) add_dependencies(stochastic_physics fms) +list(APPEND _stoch_phys_defs_private INTERNAL_FILE_NML) +target_compile_definitions(stochastic_physics PRIVATE "${_stoch_phys_defs_private}") + target_link_libraries(stochastic_physics PUBLIC sp::sp_d) target_link_libraries(stochastic_physics PUBLIC fms) if(OpenMP_Fortran_FOUND) From 0d35aa24e5720be371b549d7a4a6f6f480f3e7a1 Mon Sep 17 00:00:00 2001 From: Rahul Mahajan Date: Sun, 4 Oct 2020 21:25:49 -0500 Subject: [PATCH 4/4] there is no need for a defs_private variable --- CMakeLists.txt | 93 +++++++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a8079ea..ec2f29a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,62 +1,61 @@ if(32BIT) message ("Force 64 bits in stochastic_physics") if(CMAKE_Fortran_COMPILER_ID MATCHES "Intel") - if(REPRO) - string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}") - else() - string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64 -no-prec-div -no-prec-sqrt" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}") - endif() + if(REPRO) + string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}") + else() + string (REPLACE "-i4 -real-size 32" "-i4 -real-size 64 -no-prec-div -no-prec-sqrt" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}") + endif() elseif(CMAKE_Fortran_COMPILER_ID MATCHES "GNU") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-real-8") + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fdefault-real-8") endif() endif() -add_library( - stochastic_physics +list(APPEND _stoch_phys_srcs + kinddef.F90 + mpi_wrapper.F90 + halo_exchange.fv3.F90 + plumes.f90 - ./kinddef.F90 - ./mpi_wrapper.F90 - ./halo_exchange.fv3.F90 - ./plumes.f90 + stochy_gg_def.f + stochy_resol_def.f + stochy_layout_lag.f + four_to_grid_stochy.F + glats_stochy.f + sumfln_stochy.f + gozrineo_stochy.f + num_parthds_stochy.f + get_ls_node_stochy.f + get_lats_node_a_stochy.f + setlats_a_stochy.f + setlats_lag_stochy.f + epslon_stochy.f + getcon_lag_stochy.f + pln2eo_stochy.f + dozeuv_stochy.f + dezouv_stochy.f + mersenne_twister.F - ./stochy_gg_def.f - ./stochy_resol_def.f - ./stochy_layout_lag.f - ./four_to_grid_stochy.F - ./glats_stochy.f - ./sumfln_stochy.f - ./gozrineo_stochy.f - ./num_parthds_stochy.f - ./get_ls_node_stochy.f - ./get_lats_node_a_stochy.f - ./setlats_a_stochy.f - ./setlats_lag_stochy.f - ./epslon_stochy.f - ./getcon_lag_stochy.f - ./pln2eo_stochy.f - ./dozeuv_stochy.f - ./dezouv_stochy.f - ./mersenne_twister.F - - ./spectral_layout.F90 - ./getcon_spectral.F90 - ./stochy_namelist_def.F90 - ./compns_stochy.F90 - ./stochy_internal_state_mod.F90 - ./stochastic_physics.F90 - ./stochy_patterngenerator.F90 - ./stochy_data_mod.F90 - ./get_stochy_pattern.F90 - ./initialize_spectral_mod.F90 - ./cellular_automata_global.F90 - ./cellular_automata_sgs.F90 - ./update_ca.F90 - ./lndp_apply_perts.F90 + spectral_layout.F90 + getcon_spectral.F90 + stochy_namelist_def.F90 + compns_stochy.F90 + stochy_internal_state_mod.F90 + stochastic_physics.F90 + stochy_patterngenerator.F90 + stochy_data_mod.F90 + get_stochy_pattern.F90 + initialize_spectral_mod.F90 + cellular_automata_global.F90 + cellular_automata_sgs.F90 + update_ca.F90 + lndp_apply_perts.F90 ) + +add_library(stochastic_physics ${_stoch_phys_srcs}) add_dependencies(stochastic_physics fms) -list(APPEND _stoch_phys_defs_private INTERNAL_FILE_NML) -target_compile_definitions(stochastic_physics PRIVATE "${_stoch_phys_defs_private}") +target_compile_definitions(stochastic_physics PRIVATE INTERNAL_FILE_NML) target_link_libraries(stochastic_physics PUBLIC sp::sp_d) target_link_libraries(stochastic_physics PUBLIC fms)