From f42bab72cf2c5185a44fa2e7858e53b769838be4 Mon Sep 17 00:00:00 2001 From: Conrad Chan <8309215+conradtchan@users.noreply.github.com> Date: Thu, 24 Mar 2022 10:24:06 +1100 Subject: [PATCH] MPI and OpenMP memory allocation optimisations (#262) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Type of PR: modification to existing code Description: This PR combines several changes to memory allocation: - Rename `mpi_stack.F90` to `mpi_memory.F90` to reflect the fact that the structure is not strictly a memory stack - Move mpi memory allocation from initial into the main memory allocation subroutine - Set the MPI "stacksize" dynamically, based on `npart` and `nprocs` - Remove `remote_export` from the cell derived types, and adjust send/recv subroutines to match - this allows the `maxprocs` parameter to be removed - Remove `maxprocs` parameter and allocate arrays dynamically using `nprocs` - Automatically expand `stacksize` as required - Add check for if global node refinment exceeds ncellsmax (which is an issue for large numbers of MPI tasks) - Allocate a larger array for the global tree (using `ncellsmaxglobal` instead of `ncellsmax`) - Move dynamic memory size calculation inside the `allocate_memory` subroutine - Tidy up some unused imports - Fix setdisc test failure due to uninitialised (but unused) variables Resolve #260 The crash is caused by large `threadprivate` arrays being allocated for certain numbers of MPI tasks and OpenMP threads. The root cause has not been identified, and is potentially a compiler bug. Two possible solutions: 1. Dynamically allocate a separate `xyzcache` array for each thread (or one large array that is split between tasks). This solution essentially makes the memory allocation a manual/explicit process, since `threadprivate` _already_ performs a "dynamic" allocation at runtime based on `OMP_NUM_THREADS`. After discussion with @danieljprice, this solution was not preferred because: - The cache size is fixed per thread (though the total memory allocation still varies with the number of threads) - Performance may be affected. However, a test of the polar benchmark showed no change in performance for 1 and 2 node hybrid jobs. Interestingly, the new version produced slightly faster run times, but not with any significance. - Additional code complexity 2. Reduce the cache size to work around the bug. Performance may not be affected, based on discussion with @danieljprice: _xyzcache does not have to be 50,000 long. Typically the number of “trial” neighbours is more like a few hundred, so something like 10,000 or even 5,000 for xyzcache length is already plenty_ Option 2 has been chosen, with cache size reduced to 10,000. This works around the issue for 2 nodes, but the problem may still exist at other task+thread counts or other systems. This can be revisited if a more robust solution is required. Testing: - Benchmark suite to assess performance impact - Test suite for correctness Did you run the bots? yes --- build/Makefile | 6 +- src/main/config.F90 | 24 +-- src/main/dens.F90 | 33 ++- src/main/force.F90 | 46 ++--- src/main/initial.F90 | 12 +- src/main/kdtree.F90 | 12 +- src/main/linklist_kdtree.F90 | 12 +- src/main/memory.F90 | 43 +++- src/main/mpi_balance.F90 | 39 +++- src/main/mpi_dens.F90 | 18 +- src/main/mpi_derivs.F90 | 108 +++++----- src/main/mpi_force.F90 | 18 +- src/main/mpi_memory.F90 | 291 +++++++++++++++++++++++++++ src/main/mpi_stack.F90 | 254 ----------------------- src/main/part.F90 | 2 + src/main/readwrite_dumps_fortran.F90 | 8 +- src/main/utils_allocate.f90 | 62 ++++-- src/setup/phantomsetup.F90 | 2 +- src/setup/relax_star.f90 | 2 +- src/tests/phantomtest.f90 | 2 +- src/tests/test_rwdump.F90 | 2 +- src/utils/combinedustdumps.f90 | 2 +- src/utils/phantom_moddump.f90 | 2 +- 23 files changed, 530 insertions(+), 470 deletions(-) create mode 100644 src/main/mpi_memory.F90 delete mode 100644 src/main/mpi_stack.F90 diff --git a/build/Makefile b/build/Makefile index ee29300a2..455c7a6c0 100644 --- a/build/Makefile +++ b/build/Makefile @@ -499,7 +499,7 @@ SOURCES= physcon.f90 ${CONFIG} ${SRCKERNEL} io.F90 units.f90 boundary.f90 \ centreofmass.f90 ${SRCPOT} damping.f90 checkconserved.f90 \ partinject.F90 utils_inject.f90 utils_filenames.f90 utils_summary.F90 ${SRCCHEM}\ ${SRCDUST} dust_formation.F90 ptmass_radiation.F90 ptmass_heating.F90 \ - mpi_dens.F90 mpi_force.F90 mpi_stack.F90 mpi_derivs.F90 kdtree.F90 linklist_kdtree.F90 ${SRCTURB} \ + mpi_dens.F90 mpi_force.F90 mpi_memory.F90 mpi_derivs.F90 kdtree.F90 linklist_kdtree.F90 ${SRCTURB} \ ${SRCPHOTO} ${SRCINJECT} ${SRCKROME} memory.F90 ${SRCREADWRITE_DUMPS} \ quitdump.f90 ptmass.F90 \ readwrite_infile.F90 dens.F90 force.F90 utils_deriv.f90 deriv.F90 energies.F90 sort_particles.F90 \ @@ -573,7 +573,7 @@ edit: checksetup SRCDUMP= physcon.f90 ${CONFIG} ${SRCKERNEL} io.F90 units.f90 boundary.f90 mpi_utils.F90 \ utils_timing.f90 utils_infiles.f90 dtype_kdtree.f90 utils_allocate.f90 part.F90 ${DOMAIN} \ mpi_dens.F90 mpi_force.F90 \ - mpi_balance.F90 mpi_stack.F90 mpi_derivs.F90 kdtree.F90 linklist_kdtree.F90 \ + mpi_balance.F90 mpi_memory.F90 mpi_derivs.F90 kdtree.F90 linklist_kdtree.F90 \ utils_dumpfiles.f90 utils_vectors.f90 utils_mathfunc.f90 \ utils_datafiles.f90 utils_filenames.f90 utils_tables.f90 datafiles.f90 gitinfo.f90 \ centreofmass.f90 \ @@ -1015,7 +1015,7 @@ SRCMULT = physcon.f90 ${CONFIG} ${SRCKERNEL} io.F90 mpi_utils.F90 utils_timing.f utils_filenames.f90 utils_mathfunc.f90 utils_vectors.f90 utils_omp.F90 utils_datafiles.f90 datafiles.f90 utils_tables.f90 utils_infiles.f90 \ ${SRCEOS} viscosity.f90 options.f90 damping.f90 utils_gravwave.f90 \ utils_dumpfiles.f90 utils_summary.f90 centreofmass.f90 \ - ${SRCCHEM} ${DOMAIN} ${SRCPOT} dust_formation.F90 ptmass_radiation.F90 mpi_balance.F90 mpi_stack.F90 mpi_force.F90 mpi_dens.F90 mpi_derivs.F90 kdtree.F90 linklist_kdtree.F90 ptmass.F90 ${SRCTURB} \ + ${SRCCHEM} ${DOMAIN} ${SRCPOT} dust_formation.F90 ptmass_radiation.F90 mpi_balance.F90 mpi_memory.F90 mpi_force.F90 mpi_dens.F90 mpi_derivs.F90 kdtree.F90 linklist_kdtree.F90 ptmass.F90 ${SRCTURB} \ checkconserved.f90 prompting.f90 ${SRCDUST} ${SRCNIMHD} readwrite_infile.f90 ${MULTIRUNFILE} OBJM1 = ${SRCMULT:.f90=.o} OBJMULT = ${OBJM1:.F90=.o} diff --git a/src/main/config.F90 b/src/main/config.F90 index 17099a951..517ff5439 100644 --- a/src/main/config.F90 +++ b/src/main/config.F90 @@ -69,7 +69,8 @@ module dim #endif ! maxmimum storage in linklist - integer :: ncellsmax + integer :: ncellsmax + integer(kind=8) :: ncellsmaxglobal !------ ! Dust @@ -144,18 +145,10 @@ module dim radenxpartvecforce + & maxxpartvecGR - ! cell storage - integer, parameter :: maxprocs = 32 -#ifdef STACKSIZE - integer, parameter :: stacksize = STACKSIZE -#else #ifdef MPI - integer, parameter :: stacksize = 200000 logical, parameter :: mpi = .true. #else - integer, parameter :: stacksize = 0 logical, parameter :: mpi = .false. -#endif #endif ! storage for artificial viscosity switch @@ -364,8 +357,9 @@ module dim contains -subroutine update_max_sizes(n) - integer, intent(in) :: n +subroutine update_max_sizes(n,ntot) + integer, intent(in) :: n + integer(kind=8), optional, intent(in) :: ntot maxp = n @@ -378,9 +372,15 @@ subroutine update_max_sizes(n) #endif #ifdef NCELLSMAX - ncellsmax = NCELLSMAX + ncellsmax = NCELLSMAX + ncellsmaxglobal = NCELLSMAX #else ncellsmax = 2*maxp + if (present(ntot)) then + ncellsmaxglobal = 2*ntot + else + ncellsmaxglobal = ncellsmax + endif #endif #ifdef DUST diff --git a/src/main/dens.F90 b/src/main/dens.F90 index 23a898842..8ec58c381 100644 --- a/src/main/dens.F90 +++ b/src/main/dens.F90 @@ -16,7 +16,7 @@ module densityforce ! :Runtime parameters: None ! ! :Dependencies: boundary, dim, io, io_summary, kdtree, kernel, linklist, -! mpidens, mpiderivs, mpistack, mpiutils, options, part, timestep, +! mpidens, mpiderivs, mpimemory, mpiutils, options, part, timestep, ! timing, viscosity ! use dim, only:maxdvdx,maxp,maxrhosum,maxdustlarge @@ -99,7 +99,7 @@ module densityforce !--kernel related parameters !real, parameter :: cnormk = 1./pi, wab0 = 1., gradh0 = -3.*wab0, radkern2 = 4F.0 - integer, parameter :: isizecellcache = 50000 + integer, parameter :: isizecellcache = 10000 integer, parameter :: isizeneighcache = 0 integer, parameter :: maxdensits = 50 @@ -127,10 +127,10 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol use part, only:mhd,rhoh,dhdrho,rhoanddhdrho,ll,get_partinfo,iactive,& hrho,iphase,igas,idust,iamgas,periodic,all_active,dustfrac use mpiutils, only:reduceall_mpi,barrier_mpi,reduce_mpi,reduceall_mpi - use mpistack, only:reserve_stack,swap_stacks,reset_stacks - use mpistack, only:stack_remote => dens_stack_1 - use mpistack, only:stack_waiting => dens_stack_2 - use mpistack, only:stack_redo => dens_stack_3 + use mpimemory, only:reserve_stack,swap_stacks,reset_stacks + use mpimemory, only:stack_remote => dens_stack_1 + use mpimemory, only:stack_waiting => dens_stack_2 + use mpimemory, only:stack_redo => dens_stack_3 use mpiderivs, only:send_cell,recv_cells,check_send_finished,init_cell_exchange,& finish_cell_exchange,recv_while_wait,reset_cell_counters use timestep, only:rhomaxnow @@ -302,7 +302,6 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol cell%owner = id cell%nits = 0 cell%nneigh = 0 - cell%remote_export(1:nprocs) = remote_export call start_cell(cell,iphase,xyzh,vxyzu,fxyzu,fext,Bevol,rad) call get_cell_location(icell,cell%xpos,cell%xsizei,cell%rcuti) @@ -314,7 +313,7 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol if (do_export) then if (stack_waiting%n > 0) call check_send_finished(stack_remote,irequestsend,irequestrecv,xrecvbuf) call reserve_stack(stack_waiting,cell%waiting_index) ! make a reservation on the stack - call send_cell(cell,0,irequestsend,xsendbuf) ! export the cell: direction 0 for exporting + call send_cell(cell,remote_export,irequestsend,xsendbuf) ! send the cell to remote endif !$omp end critical (send_and_recv_remote) endif @@ -336,14 +335,13 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol call get_neighbour_list(-1,listneigh,nneigh,xyzh,xyzcache,isizecellcache,getj=.false., & cell_xpos=cell%xpos,cell_xsizei=cell%xsizei,cell_rcuti=cell%rcuti, & remote_export=remote_export) - cell%remote_export(1:nprocs) = remote_export if (any(remote_export)) then do_export = .true. !$omp critical (send_and_recv_remote) if (stack_waiting%n > 0) call check_send_finished(stack_remote,irequestsend,irequestrecv,xrecvbuf) call reserve_stack(stack_waiting,cell%waiting_index) - call send_cell(cell,0,irequestsend,xsendbuf) ! direction export (0) + call send_cell(cell,remote_export,irequestsend,xsendbuf) ! send the cell to remote !$omp end critical (send_and_recv_remote) endif nrelink = nrelink + 1 @@ -407,14 +405,14 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,xyzcache,rad) - cell%remote_export(id+1) = .false. + remote_export = .false. + remote_export(cell%owner+1) = .true. ! use remote_export array to send back to the owner ! communication happened while computing contributions to remote cells !$omp critical (send_and_recv_waiting) call recv_cells(stack_waiting,xrecvbuf,irequestrecv) call check_send_finished(stack_waiting,irequestsend,irequestrecv,xrecvbuf) - ! direction return (1) - call send_cell(cell,1,irequestsend,xsendbuf) + call send_cell(cell,remote_export,irequestsend,xsendbuf) ! send the cell back to owner !$omp end critical (send_and_recv_waiting) enddo over_remote !$omp enddo @@ -441,10 +439,6 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol over_waiting: do i = 1, stack_waiting%n cell = stack_waiting%cells(i) - if (any(cell%remote_export(1:nprocs))) then - call fatal('dens', 'not all results returned from remote processor') - endif - if (calculate_density) then call finish_cell(cell,converged) call compute_hmax(cell,redo_neighbours) @@ -461,12 +455,11 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol call get_neighbour_list(-1,listneigh,nneigh,xyzh,xyzcache,isizecellcache,getj=.false., & cell_xpos=cell%xpos,cell_xsizei=cell%xsizei,cell_rcuti=cell%rcuti, & remote_export=remote_export) - cell%remote_export(1:nprocs) = remote_export + !$omp critical (send_and_recv_remote) call check_send_finished(stack_remote,irequestsend,irequestrecv,xrecvbuf) call reserve_stack(stack_redo,cell%waiting_index) - ! direction export (0) - call send_cell(cell,0,irequestsend,xsendbuf) + call send_cell(cell,remote_export,irequestsend,xsendbuf) ! send the cell to remote !$omp end critical (send_and_recv_remote) call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,xyzcache,rad) diff --git a/src/main/force.F90 b/src/main/force.F90 index e3a60866a..3aba954a9 100644 --- a/src/main/force.F90 +++ b/src/main/force.F90 @@ -41,9 +41,9 @@ module forces ! ! :Dependencies: boundary, cooling, dim, dust, eos, eos_shen, fastmath, ! growth, io, io_summary, kdtree, kernel, linklist, metric_tools, -! mpiderivs, mpiforce, mpistack, mpiutils, nicil, options, part, physcon, -! ptmass, ptmass_heating, radiation_utils, timestep, timestep_ind, -! timestep_sts, units, utils_gr, viscosity +! mpiderivs, mpiforce, mpimemory, mpiutils, nicil, omp_cache, options, +! part, physcon, ptmass, ptmass_heating, radiation_utils, timestep, +! timestep_ind, timestep_sts, units, utils_gr, viscosity ! use dim, only:maxfsum,maxxpartveciforce,maxp,ndivcurlB,ndivcurlv,& maxdusttypes,maxdustsmall,do_radiation @@ -56,7 +56,7 @@ module forces character(len=80), parameter, public :: & ! module version modid="$Id$" - integer, parameter :: maxcellcache = 50000 + integer, parameter :: maxcellcache = 10000 public :: force, reconstruct_dv ! latter to avoid compiler warning @@ -220,9 +220,9 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,& #endif use mpiderivs, only:send_cell,recv_cells,check_send_finished,init_cell_exchange,& finish_cell_exchange,recv_while_wait,reset_cell_counters - use mpistack, only:reserve_stack,reset_stacks - use mpistack, only:stack_remote => force_stack_1 - use mpistack, only:stack_waiting => force_stack_2 + use mpimemory, only:reserve_stack,reset_stacks + use mpimemory, only:stack_remote => force_stack_1 + use mpimemory, only:stack_waiting => force_stack_2 use io_summary, only:iosumdtr integer, intent(in) :: icall,npart real, intent(in) :: xyzh(:,:) @@ -456,7 +456,7 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,& getj=.true.,f=cell%fgrav,remote_export=remote_export) cell%owner = id - cell%remote_export(1:nprocs) = remote_export + do_export = any(remote_export) if (mpi) then @@ -465,7 +465,7 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,& if (do_export) then if (stack_waiting%n > 0) call check_send_finished(stack_remote,irequestsend,irequestrecv,xrecvbuf) call reserve_stack(stack_waiting,cell%waiting_index) - call send_cell(cell,0,irequestsend,xsendbuf) ! export the cell: direction 0 for exporting + call send_cell(cell,remote_export,irequestsend,xsendbuf) ! send to remote endif !$omp end critical (send_and_recv_remote) endif @@ -522,12 +522,12 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,& dustfrac,dustprop,gradh,ibinnow_m1,ibin_wake,stressmax,xyzcache,& rad,radprop,dens,metrics) - cell%remote_export(id+1) = .false. - + remote_export = .false. + remote_export(cell%owner+1) = .true. ! use remote_export array to send back to the owner !$omp critical (send_and_recv_waiting) call recv_cells(stack_waiting,xrecvbuf,irequestrecv) call check_send_finished(stack_waiting,irequestsend,irequestrecv,xrecvbuf) - call send_cell(cell,1,irequestsend,xsendbuf) + call send_cell(cell,remote_export,irequestsend,xsendbuf) ! send the cell back to owner !$omp end critical (send_and_recv_waiting) enddo over_remote @@ -553,10 +553,6 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,& over_waiting: do i = 1, stack_waiting%n cell = stack_waiting%cells(i) - if (any(cell%remote_export(1:nprocs))) then - call fatal('force', 'not all results returned from remote processor') - endif - call finish_cell_and_store_results(icall,cell,fxyzu,xyzh,vxyzu,poten,dt,dvdx, & divBsymm,divcurlv,dBevol,ddustevol,deltav,dustgasprop, & dtcourant,dtforce,dtvisc,dtohm,dthall,dtambi,dtdiff,dtmini,dtmaxi, & @@ -2999,24 +2995,6 @@ subroutine finish_cell_and_store_results(icall,cell,fxyzu,xyzh,vxyzu,poten,dt,dv enddo over_parts end subroutine finish_cell_and_store_results -pure subroutine combine_cells(cella, cellb) - type(cellforce), intent(inout) :: cella - type(cellforce), intent(in) :: cellb - - integer :: i - - do i = 1,cella%npcell - cella%fsums(:,i) = cella%fsums(:,i) + cellb%fsums(:,i) - enddo - - cella%ndrag = cella%ndrag + cellb%ndrag - cella%nstokes = cella%nstokes + cellb%nstokes - cella%nsuper = cella%nsuper + cellb%nsuper - - cella%remote_export = (cella%remote_export .and. cellb%remote_export) - -end subroutine combine_cells - !----------------------------------------------------------------------------- !+ ! Apply reconstruction to velocity gradients diff --git a/src/main/initial.F90 b/src/main/initial.F90 index 838558c49..cf334db92 100644 --- a/src/main/initial.F90 +++ b/src/main/initial.F90 @@ -19,7 +19,7 @@ module initial ! deriv, dim, dust, energies, eos, evwrite, extern_gr, externalforces, ! fastmath, fileutils, forcing, growth, inject, io, io_summary, ! krome_interface, linklist, metric_tools, mf_write, mpibalance, -! mpiderivs, mpidomain, mpistack, mpiutils, nicil, nicil_sup, omputils, +! mpiderivs, mpidomain, mpimemory, mpiutils, nicil, nicil_sup, omputils, ! options, part, photoevap, ptmass, radiation_utils, readwrite_dumps, ! readwrite_infile, timestep, timestep_ind, timestep_sts, timing, units, ! writeheader @@ -54,8 +54,6 @@ subroutine initialise() use mpidomain, only:init_domains use cpuinfo, only:print_cpuinfo use checkoptions, only:check_compile_time_settings - use mpiderivs, only:init_tree_comms - use mpistack, only:init_mpi_memory use readwrite_dumps, only:init_readwrite_dumps integer :: ierr ! @@ -100,10 +98,6 @@ subroutine initialise() !--initialise MPI domains ! call init_domains(nprocs) - if (mpi) then - call init_tree_comms() - call init_mpi_memory() - endif call init_readwrite_dumps() @@ -773,11 +767,11 @@ end subroutine startrun subroutine finalise() use dim, only: mpi use mpiderivs, only:finish_tree_comms - use mpistack, only:finish_mpi_memory + use mpimemory, only:deallocate_mpi_memory if (mpi) then call finish_tree_comms() - call finish_mpi_memory() + call deallocate_mpi_memory() endif end subroutine finalise diff --git a/src/main/kdtree.F90 b/src/main/kdtree.F90 index 5708a09e2..1fa4afa5d 100644 --- a/src/main/kdtree.F90 +++ b/src/main/kdtree.F90 @@ -1499,16 +1499,16 @@ subroutine maketreeglobal(nodeglobal,node,nodemap,globallevel,refinelevels,xyzh, use part, only:isdead_or_accreted,iactive,ibelong use timing, only:increment_timer,get_timings,itimer_balance - type(kdnode), intent(out) :: nodeglobal(:) !ncellsmax+1) - type(kdnode), intent(out) :: node(:) !ncellsmax+1) - integer, intent(out) :: nodemap(:) !ncellsmax+1) + type(kdnode), intent(out) :: nodeglobal(:) ! ncellsmax+1 + type(kdnode), intent(out) :: node(:) ! ncellsmax+1 + integer, intent(out) :: nodemap(:) ! ncellsmax+1 integer, intent(out) :: globallevel integer, intent(out) :: refinelevels integer, intent(inout) :: np integer, intent(in) :: ndim real, intent(inout) :: xyzh(:,:) - integer, intent(out) :: cellatid(:) !ncellsmax+1) - integer, intent(out) :: ifirstincell(:) !ncellsmax+1) + integer, intent(out) :: cellatid(:) ! ncellsmax+1 + integer, intent(out) :: ifirstincell(:) ! ncellsmax+1) real :: xmini(ndim),xmaxi(ndim) real :: xminl(ndim),xmaxl(ndim) real :: xminr(ndim),xmaxr(ndim) @@ -1650,6 +1650,8 @@ subroutine maketreeglobal(nodeglobal,node,nodemap,globallevel,refinelevels,xyzh, nnodestart = offset nnodeend = 2*nnodestart-1 + if (nnodeend > ncellsmax) call fatal('kdtree', 'global tree refinement has exceeded ncellsmax') + locstart = roffset locend = 2*locstart-1 diff --git a/src/main/linklist_kdtree.F90 b/src/main/linklist_kdtree.F90 index c8789e84e..1e263471b 100644 --- a/src/main/linklist_kdtree.F90 +++ b/src/main/linklist_kdtree.F90 @@ -21,7 +21,7 @@ module linklist ! :Dependencies: allocutils, boundary, dim, dtypekdtree, infile_utils, io, ! kdtree, kernel, mpiutils, part ! - use dim, only:ncellsmax + use dim, only:ncellsmax,ncellsmaxglobal use part, only:ll use dtypekdtree, only:kdnode implicit none @@ -58,11 +58,11 @@ subroutine allocate_linklist use kdtree, only:allocate_kdtree use dim, only:maxp - call allocate_array('cellatid', cellatid, ncellsmax+1) - call allocate_array('ifirstincell', ifirstincell, ncellsmax+1) - call allocate_array('nodeglobal', nodeglobal, ncellsmax+1) - call allocate_array('node', node, ncellsmax+1) - call allocate_array('nodemap', nodemap, ncellsmax+1) + call allocate_array('cellatid', cellatid, ncellsmaxglobal+1 ) + call allocate_array('ifirstincell', ifirstincell, ncellsmax+1 ) + call allocate_array('nodeglobal', nodeglobal, ncellsmaxglobal+1 ) + call allocate_array('node', node, ncellsmax+1 ) + call allocate_array('nodemap', nodemap, ncellsmax+1 ) call allocate_kdtree() !$omp parallel call allocate_array('listneigh',listneigh,maxp) diff --git a/src/main/memory.F90 b/src/main/memory.F90 index 5e6c17e9c..680d9334f 100644 --- a/src/main/memory.F90 +++ b/src/main/memory.F90 @@ -14,7 +14,8 @@ module memory ! ! :Runtime parameters: None ! -! :Dependencies: allocutils, dim, io, linklist, part, photoevap +! :Dependencies: allocutils, dim, io, linklist, mpibalance, mpiderivs, +! mpimemory, part, photoevap ! implicit none @@ -23,19 +24,23 @@ module memory ! !--Allocate all allocatable arrays: mostly part arrays, and tree structures ! -subroutine allocate_memory(n, part_only) - use io, only:iprint,warning,nprocs,id,master - use dim, only:update_max_sizes,maxp +subroutine allocate_memory(ntot, part_only) + use io, only:iprint,warning,nprocs,id,master + use dim, only:update_max_sizes,maxp,mpi use allocutils, only:nbytes_allocated,bytes2human - use part, only:allocate_part - use linklist, only:allocate_linklist,ifirstincell + use part, only:allocate_part + use linklist, only:allocate_linklist,ifirstincell + use mpimemory, only:allocate_mpi_memory + use mpibalance, only:allocate_balance_arrays + use mpiderivs, only:allocate_comms_arrays #ifdef PHOTO - use photoevap, only:allocate_photoevap + use photoevap, only:allocate_photoevap #endif - integer, intent(in) :: n + integer(kind=8), intent(in) :: ntot logical, optional, intent(in) :: part_only + integer :: n logical :: part_only_ character(len=11) :: sizestring @@ -44,6 +49,9 @@ subroutine allocate_memory(n, part_only) else part_only_ = .false. endif + + n = int(min(nprocs,4) * ntot / nprocs) + if (nbytes_allocated > 0.0 .and. n <= maxp) then ! ! just silently skip if arrays are already large enough @@ -57,7 +65,6 @@ subroutine allocate_memory(n, part_only) ! skip remaining memory allocation (arrays already big enough) return endif - call update_max_sizes(n) if (nprocs == 1) then write(iprint, *) @@ -72,15 +79,20 @@ subroutine allocate_memory(n, part_only) call warning('memory', 'Attempting to allocate memory, but memory is already allocated. & & Deallocating and then allocating again.') call deallocate_memory(part_only=part_only_) - call update_max_sizes(n) endif + call update_max_sizes(n,ntot) call allocate_part if (.not. part_only_) then call allocate_linklist #ifdef PHOTO call allocate_photoevap #endif + if (mpi) then + call allocate_mpi_memory(npart=n) + call allocate_balance_arrays + call allocate_comms_arrays + endif endif call bytes2human(nbytes_allocated, sizestring) @@ -95,12 +107,15 @@ subroutine allocate_memory(n, part_only) end subroutine allocate_memory subroutine deallocate_memory(part_only) - use dim, only:update_max_sizes + use dim, only:update_max_sizes,mpi use part, only:deallocate_part use linklist, only:deallocate_linklist #ifdef PHOTO use photoevap, only:deallocate_photoevap #endif + use mpimemory, only:deallocate_mpi_memory + use mpibalance, only:deallocate_balance_arrays + use mpiderivs, only:deallocate_comms_arrays use allocutils, only:nbytes_allocated logical, optional, intent(in) :: part_only @@ -120,6 +135,12 @@ subroutine deallocate_memory(part_only) #endif endif + if (mpi) then + call deallocate_mpi_memory + call deallocate_balance_arrays + call deallocate_comms_arrays + endif + nbytes_allocated = 0 call update_max_sizes(0) diff --git a/src/main/mpi_balance.F90 b/src/main/mpi_balance.F90 index 62173e319..afd9013f2 100644 --- a/src/main/mpi_balance.F90 +++ b/src/main/mpi_balance.F90 @@ -14,34 +14,51 @@ module mpibalance ! ! :Runtime parameters: None ! -! :Dependencies: dim, io, mpi, mpiutils, part, timing +! :Dependencies: allocutils, io, mpi, mpiutils, part, timing ! #ifdef MPI use mpi - use io, only:id,nprocs - use dim, only:maxprocs + use io, only:id use mpiutils, only:mpierr,status,MPI_DEFAULT_REAL,reduceall_mpi, & comm_balance,comm_balancecount use part, only:ipartbufsize #endif + use io, only:nprocs implicit none + public :: allocate_balance_arrays + public :: deallocate_balance_arrays public :: balancedomains private #ifdef MPI - real, dimension(ipartbufsize) :: xsendbuf,xbuffer - integer, dimension(1) :: irequestrecv,irequestsend - integer(kind=8) :: ntot_start - integer :: nsent(maxprocs),nexpect(maxprocs),nrecv(maxprocs) - integer :: countrequest(maxprocs) - integer :: npartnew, ncomplete + real, dimension(ipartbufsize) :: xsendbuf,xbuffer + integer, dimension(1) :: irequestrecv,irequestsend + integer(kind=8) :: ntot_start + integer :: npartnew, ncomplete #endif + integer,allocatable :: nsent(:),nexpect(:),nrecv(:) + integer,allocatable :: countrequest(:) contains +subroutine allocate_balance_arrays + use allocutils, only:allocate_array + call allocate_array('nsent', nsent, nprocs) + call allocate_array('nexpect', nexpect, nprocs) + call allocate_array('nrecv', nrecv, nprocs) + call allocate_array('countrequest',countrequest,nprocs) +end subroutine allocate_balance_arrays + +subroutine deallocate_balance_arrays + if (allocated(nsent )) deallocate(nsent ) + if (allocated(nexpect )) deallocate(nexpect ) + if (allocated(nrecv )) deallocate(nrecv ) + if (allocated(countrequest)) deallocate(countrequest) +end subroutine deallocate_balance_arrays + !---------------------------------------------------------------- !+ ! routine which moves particles onto their correct processor @@ -231,7 +248,7 @@ end subroutine recv_part !+ !----------------------------------------------------------------------- subroutine send_part(i,newproc,replace) - use io, only:fatal,nprocs + use io, only:fatal use part, only:fill_sendbuf,kill_particle integer, intent(in) :: i,newproc logical, intent(in), optional :: replace @@ -271,7 +288,7 @@ end subroutine send_part !+ !---------------------------------------------------------------- subroutine balance_finish(npart,replace) - use io, only:id,nprocs,fatal,iverbose + use io, only:id,fatal,iverbose use part, only:recount_npartoftype integer, intent(out) :: npart logical, intent(in), optional :: replace diff --git a/src/main/mpi_dens.F90 b/src/main/mpi_dens.F90 index 2e5531b09..11a28effa 100644 --- a/src/main/mpi_dens.F90 +++ b/src/main/mpi_dens.F90 @@ -17,7 +17,7 @@ module mpidens ! :Dependencies: dim, io, mpi, mpiutils ! use io, only:nprocs,fatal,error - use dim, only:minpart,maxrhosum,maxprocs,stacksize,maxxpartvecidens + use dim, only:minpart,maxrhosum,maxxpartvecidens implicit none private @@ -44,9 +44,8 @@ module mpidens integer :: nneightry integer :: nneigh(minpart) ! number of actual neighbours (diagnostic) integer :: waiting_index - logical :: remote_export(maxprocs) ! remotes we are waiting for integer(kind=1) :: iphase(minpart) - integer(kind=1) :: pad(8 - mod(4 * (6 + 2 * minpart + maxprocs) + minpart, 8)) + integer(kind=1) :: pad(8 - mod(4 * (6 + 2 * minpart) + minpart, 8)) end type celldens type stackdens @@ -54,8 +53,7 @@ module mpidens type(celldens), pointer :: cells(:) integer :: maxlength = 0 integer :: n = 0 - integer :: mem_start - integer :: mem_end + integer :: number end type stackdens contains @@ -66,7 +64,7 @@ subroutine get_mpitype_of_celldens(dtype) use mpiutils, only:mpierr use io, only:error - integer, parameter :: ndata = 20 + integer, parameter :: ndata = 18 integer, intent(out) :: dtype integer :: nblock, blens(ndata), mpitypes(ndata) @@ -175,12 +173,6 @@ subroutine get_mpitype_of_celldens(dtype) call MPI_GET_ADDRESS(cell%waiting_index,addr,mpierr) disp(nblock) = addr - start - nblock = nblock + 1 - blens(nblock) = size(cell%remote_export) - mpitypes(nblock) = MPI_LOGICAL - call MPI_GET_ADDRESS(cell%remote_export,addr,mpierr) - disp(nblock) = addr - start - nblock = nblock + 1 blens(nblock) = size(cell%iphase) mpitypes(nblock) = MPI_INTEGER1 @@ -188,7 +180,7 @@ subroutine get_mpitype_of_celldens(dtype) disp(nblock) = addr - start nblock = nblock + 1 - blens(nblock) = 8 - mod(4 * (6 + 2 * minpart + maxprocs) + minpart, 8) + blens(nblock) = 8 - mod(4 * (6 + 2 * minpart) + minpart, 8) mpitypes(nblock) = MPI_INTEGER1 call MPI_GET_ADDRESS(cell%pad,addr,mpierr) disp(nblock) = addr - start diff --git a/src/main/mpi_derivs.F90 b/src/main/mpi_derivs.F90 index 6717ca048..5480b727d 100644 --- a/src/main/mpi_derivs.F90 +++ b/src/main/mpi_derivs.F90 @@ -15,14 +15,13 @@ module mpiderivs ! ! :Runtime parameters: None ! -! :Dependencies: dim, dtypekdtree, io, mpi, mpidens, mpiforce, mpistack, -! mpiutils +! :Dependencies: allocutils, dtypekdtree, io, mpi, mpidens, mpiforce, +! mpimemory, mpiutils ! #ifdef MPI use mpi #endif use io, only:id,nprocs - use dim, only:maxprocs use mpiutils, only:mpierr,status,comm_cellexchange,comm_cellcount use dtypekdtree, only:kdnode,ndimtree @@ -56,6 +55,9 @@ module mpiderivs module procedure reduce_group_real, reduce_group_int end interface reduce_group + public :: allocate_comms_arrays + public :: deallocate_comms_arrays + public :: init_cell_exchange public :: send_cell public :: recv_cells @@ -67,7 +69,6 @@ module mpiderivs public :: tree_sync public :: tree_bcast - public :: init_tree_comms public :: finish_tree_comms public :: reset_cell_counters @@ -80,18 +81,39 @@ module mpiderivs integer :: dtype_cellforce integer :: globallevel - integer :: comm_cofm(maxprocs) ! only comms up to globallevel are used - integer :: comm_owner(maxprocs) ! only comms up to globallevel are used +#endif + integer,allocatable :: comm_cofm(:) ! only comms up to globallevel are used + integer,allocatable :: comm_owner(:) ! only comms up to globallevel are used - integer :: nsent(maxprocs) ! counter for number of cells sent to i - integer :: nexpect(maxprocs) ! counter for number of cells expecting from i - integer :: nrecv(maxprocs) ! counter for number of cells received from i + integer,allocatable :: nsent(:) ! counter for number of cells sent to i + integer,allocatable :: nexpect(:) ! counter for number of cells expecting from i + integer,allocatable :: nrecv(:) ! counter for number of cells received from i - integer :: countrequest(maxprocs) -#endif + integer,allocatable :: countrequest(:) contains +subroutine allocate_comms_arrays + use allocutils, only:allocate_array + call allocate_array('nsent', nsent, nprocs) + call allocate_array('nexpect', nexpect, nprocs) + call allocate_array('nrecv', nrecv, nprocs) + call allocate_array('countrequest',countrequest,nprocs) + call allocate_array('comm_cofm', comm_cofm, nprocs) + call allocate_array('comm_owner', comm_owner, nprocs) + + call init_tree_comms +end subroutine allocate_comms_arrays + +subroutine deallocate_comms_arrays + if (allocated(nsent )) deallocate(nsent ) + if (allocated(nexpect )) deallocate(nexpect ) + if (allocated(nrecv )) deallocate(nrecv ) + if (allocated(countrequest)) deallocate(countrequest) + if (allocated(comm_cofm )) deallocate(comm_cofm ) + if (allocated(comm_owner )) deallocate(comm_owner ) +end subroutine deallocate_comms_arrays + !---------------------------------------------------------------- !+ ! initialise the receive type for each thread @@ -169,34 +191,30 @@ end subroutine init_cellforce_exchange ! Subroutine to broadcast particle buffer to a bunch of processors !+ !----------------------------------------------------------------------- -subroutine send_celldens(cell,direction,irequestsend,xsendbuf) +subroutine send_celldens(cell,targets,irequestsend,xsendbuf) use mpidens, only:celldens type(celldens), intent(in) :: cell - integer, intent(in) :: direction + logical, intent(in) :: targets(nprocs) integer, intent(inout) :: irequestsend(nprocs) type(celldens), intent(out) :: xsendbuf - logical :: targets(nprocs) integer :: newproc + integer :: tag #ifdef MPI xsendbuf = cell + irequestsend = MPI_REQUEST_NULL - ! export - if (direction == 0) then - targets = cell%remote_export(1:nprocs) - ! return - elseif (direction == 1) then - targets = .false. - targets(cell%owner+1) = .true. + if (targets(cell%owner+1)) then + tag = 2 + else + tag = 1 endif - irequestsend = MPI_REQUEST_NULL - do newproc=0,nprocs-1 if ((newproc /= id) .and. (targets(newproc+1))) then ! do not send to self - call MPI_ISEND(xsendbuf,1,dtype_celldens,newproc,1+direction,comm_cellexchange,irequestsend(newproc+1),mpierr) + call MPI_ISEND(xsendbuf,1,dtype_celldens,newproc,tag,comm_cellexchange,irequestsend(newproc+1),mpierr) nsent(newproc+1) = nsent(newproc+1) + 1 endif enddo @@ -204,34 +222,30 @@ subroutine send_celldens(cell,direction,irequestsend,xsendbuf) end subroutine send_celldens -subroutine send_cellforce(cell,direction,irequestsend,xsendbuf) +subroutine send_cellforce(cell,targets,irequestsend,xsendbuf) use mpiforce, only:cellforce type(cellforce), intent(in) :: cell - integer, intent(in) :: direction + logical, intent(in) :: targets(nprocs) integer, intent(inout) :: irequestsend(nprocs) type(cellforce), intent(out) :: xsendbuf - logical :: targets(nprocs) integer :: newproc + integer :: tag #ifdef MPI xsendbuf = cell + irequestsend = MPI_REQUEST_NULL - ! export - if (direction == 0) then - targets = cell%remote_export(1:nprocs) - ! return - elseif (direction == 1) then - targets = .false. - targets(cell%owner+1) = .true. + if (targets(cell%owner+1)) then + tag = 2 + else + tag = 1 endif - irequestsend = MPI_REQUEST_NULL - do newproc=0,nprocs-1 if ((newproc /= id) .and. (targets(newproc+1))) then ! do not send to self - call MPI_ISEND(xsendbuf,1,dtype_cellforce,newproc,1+direction,comm_cellexchange,irequestsend(newproc+1),mpierr) + call MPI_ISEND(xsendbuf,1,dtype_cellforce,newproc,tag,comm_cellexchange,irequestsend(newproc+1),mpierr) nsent(newproc+1) = nsent(newproc+1) + 1 endif enddo @@ -357,9 +371,9 @@ end subroutine recv_while_wait_force !+ !------------------------------------------------ subroutine recv_celldens(target_stack,xbuf,irequestrecv) - use io, only:fatal - use mpistack, only:push_onto_stack - use mpidens, only:stackdens,celldens + use io, only:fatal + use mpimemory, only:push_onto_stack + use mpidens, only:stackdens,celldens type(celldens), intent(inout) :: xbuf(:) ! just need memory address type(stackdens), intent(inout) :: target_stack @@ -380,10 +394,6 @@ subroutine recv_celldens(target_stack,xbuf,irequestrecv) target_stack%cells(iwait)%rhosums(:,k) = target_stack%cells(iwait)%rhosums(:,k) + xbuf(iproc)%rhosums(:,k) target_stack%cells(iwait)%nneigh(k) = target_stack%cells(iwait)%nneigh(k) + xbuf(iproc)%nneigh(k) enddo - do k = 1,nprocs - target_stack%cells(iwait)%remote_export(k) = target_stack%cells(iwait)%remote_export(k) & - .and. xbuf(iproc)%remote_export(k) - enddo target_stack%cells(iwait)%nneightry = target_stack%cells(iwait)%nneightry + xbuf(iproc)%nneightry nrecv(iproc) = nrecv(iproc) + 1 elseif (status(MPI_TAG) == 1) then @@ -397,9 +407,9 @@ subroutine recv_celldens(target_stack,xbuf,irequestrecv) end subroutine recv_celldens subroutine recv_cellforce(target_stack,xbuf,irequestrecv) - use io, only:fatal - use mpistack, only:push_onto_stack - use mpiforce, only:stackforce,cellforce + use io, only:fatal + use mpimemory, only:push_onto_stack + use mpiforce, only:stackforce,cellforce type(cellforce), intent(inout) :: xbuf(:) ! just need memory address type(stackforce), intent(inout) :: target_stack @@ -430,10 +440,6 @@ subroutine recv_cellforce(target_stack,xbuf,irequestrecv) target_stack%cells(iwait)%fgrav(k) = target_stack%cells(iwait)%fgrav(k) + xbuf(iproc)%fgrav(k) enddo #endif - do k =1,nprocs - target_stack%cells(iwait)%remote_export(k) = target_stack%cells(iwait)%remote_export(k) & - .and. xbuf(iproc)%remote_export(k) - enddo target_stack%cells(iwait)%ndrag = target_stack%cells(iwait)%ndrag + xbuf(iproc)%ndrag target_stack%cells(iwait)%nstokes = target_stack%cells(iwait)%nstokes + xbuf(iproc)%nstokes target_stack%cells(iwait)%nsuper = target_stack%cells(iwait)%nsuper + xbuf(iproc)%nsuper diff --git a/src/main/mpi_force.F90 b/src/main/mpi_force.F90 index d16eeaa00..abf6e3fb4 100644 --- a/src/main/mpi_force.F90 +++ b/src/main/mpi_force.F90 @@ -17,7 +17,7 @@ module mpiforce ! :Dependencies: dim, io, mpi, mpiutils ! use io, only:nprocs,fatal - use dim, only:minpart,maxfsum,maxprocs,stacksize,maxxpartveciforce + use dim, only:minpart,maxfsum,maxxpartveciforce implicit none private @@ -44,10 +44,9 @@ module mpiforce integer :: nsuper integer :: owner ! id of the process that owns this integer :: waiting_index - logical :: remote_export(maxprocs) ! remotes we are waiting for integer(kind=1) :: iphase(minpart) integer(kind=1) :: ibinneigh(minpart) - integer(kind=1) :: pad(8 - mod(4 * (7 + minpart + maxprocs) + 2*minpart, 8)) !padding to maintain alignment of elements + integer(kind=1) :: pad(8 - mod(4 * (7 + minpart) + 2*minpart, 8)) !padding to maintain alignment of elements end type cellforce type stackforce @@ -55,8 +54,7 @@ module mpiforce type(cellforce), pointer :: cells(:) integer :: maxlength = 0 integer :: n = 0 - integer :: mem_start - integer :: mem_end + integer :: number end type stackforce contains @@ -67,7 +65,7 @@ subroutine get_mpitype_of_cellforce(dtype) use mpiutils, only:mpierr use io, only:error - integer, parameter :: ndata = 20 + integer, parameter :: ndata = 19 integer, intent(out) :: dtype integer :: nblock, blens(ndata), mpitypes(ndata) @@ -176,12 +174,6 @@ subroutine get_mpitype_of_cellforce(dtype) call MPI_GET_ADDRESS(cell%waiting_index,addr,mpierr) disp(nblock) = addr - start - nblock = nblock + 1 - blens(nblock) = size(cell%remote_export) - mpitypes(nblock) = MPI_LOGICAL - call MPI_GET_ADDRESS(cell%remote_export,addr,mpierr) - disp(nblock) = addr - start - nblock = nblock + 1 blens(nblock) = size(cell%iphase) mpitypes(nblock) = MPI_INTEGER1 @@ -195,7 +187,7 @@ subroutine get_mpitype_of_cellforce(dtype) disp(nblock) = addr - start nblock = nblock + 1 - blens(nblock) = 8 - mod(4 * (7 + minpart + maxprocs) + 2*minpart, 8) + blens(nblock) = 8 - mod(4 * (7 + minpart) + 2*minpart, 8) mpitypes(nblock) = MPI_INTEGER1 call MPI_GET_ADDRESS(cell%pad,addr,mpierr) disp(nblock) = addr - start diff --git a/src/main/mpi_memory.F90 b/src/main/mpi_memory.F90 new file mode 100644 index 000000000..d149ae74b --- /dev/null +++ b/src/main/mpi_memory.F90 @@ -0,0 +1,291 @@ +!--------------------------------------------------------------------------! +! The Phantom Smoothed Particle Hydrodynamics code, by Daniel Price et al. ! +! Copyright (c) 2007-2022 The Authors (see AUTHORS) ! +! See LICENCE file for usage and distribution conditions ! +! http://phantomsph.bitbucket.io/ ! +!--------------------------------------------------------------------------! +module mpimemory +! +! None +! +! :References: None +! +! :Owner: Conrad Chan +! +! :Runtime parameters: None +! +! :Dependencies: dim, io, mpidens, mpiforce +! + use io, only:fatal,iprint + use mpidens, only:celldens,stackdens + use mpiforce, only:cellforce,stackforce + + implicit none + + interface allocate_stack + module procedure allocate_stack_dens,allocate_stack_force + end interface allocate_stack + + interface swap_stacks ! force doesn't require a stack swap + module procedure swap_stacks_dens + end interface swap_stacks + + interface push_onto_stack + module procedure push_onto_stack_dens,push_onto_stack_force + end interface push_onto_stack + + interface pop_off_stack + module procedure pop_off_stack_dens,pop_off_stack_force + end interface pop_off_stack + + interface reserve_stack + module procedure reserve_stack_dens,reserve_stack_force + end interface reserve_stack + + public :: allocate_mpi_memory + public :: deallocate_mpi_memory + public :: allocate_stack + public :: swap_stacks + public :: push_onto_stack + public :: pop_off_stack + public :: reserve_stack + public :: reset_stacks + + ! stacks to be referenced from density and force routines + type(stackdens), public :: dens_stack_1 + type(stackdens), public :: dens_stack_2 + type(stackdens), public :: dens_stack_3 + type(stackforce), public :: force_stack_1 + type(stackforce), public :: force_stack_2 + + private + + integer :: stacksize + + ! primary chunk of memory requested using alloc + type(celldens), allocatable, target :: dens_cells(:,:) + type(cellforce), allocatable, target :: force_cells(:,:) + + ! temporary memory for increasing stack sizes + type(celldens), allocatable, target :: dens_cells_tmp(:,:) + type(cellforce), allocatable, target :: force_cells_tmp(:,:) + +contains + +subroutine allocate_mpi_memory(npart, stacksize_in, reallocate) + integer, optional, intent(in) :: npart + integer, optional, intent(in) :: stacksize_in + logical, optional, intent(in) :: reallocate + integer :: allocstat + logical :: re_allocate = .false. + + allocstat = 0 + + if (present(stacksize_in)) stacksize = stacksize_in + if (present(npart)) call calculate_stacksize(npart) + if (present(reallocate)) re_allocate = reallocate + + if (.not. allocated(dens_cells)) allocate(dens_cells(stacksize,3), stat=allocstat) + if (allocstat /= 0) call fatal('stack','fortran memory allocation error') + + ! If reallocating an existing stack for expanding MPI memory, + ! give the stack the same address that it previously had. This + ! may not be the same as the initial order because density stacks + ! can be swapped. Force stacks do not get swapped. + if (re_allocate) then + call allocate_stack(dens_stack_1, dens_stack_1%number) + call allocate_stack(dens_stack_2, dens_stack_2%number) + call allocate_stack(dens_stack_3, dens_stack_3%number) + else + call allocate_stack(dens_stack_1, 1) + call allocate_stack(dens_stack_2, 2) + call allocate_stack(dens_stack_3, 3) + endif + + if (.not. allocated(force_cells)) allocate(force_cells(stacksize,2), stat=allocstat) + if (allocstat /= 0) call fatal('stack','fortran memory allocation error') + call allocate_stack(force_stack_1, 1) + call allocate_stack(force_stack_2, 2) + +end subroutine allocate_mpi_memory + +subroutine increase_mpi_memory + use io, only:id + real, parameter :: factor = 1.5 + integer :: stacksize_new + integer :: allocstat + + stacksize_new = int(real(stacksize) * factor) + write(iprint, *) 'MPI stack exceeded on', id, 'increasing size to', stacksize_new + + ! Expand density + allocate(dens_cells_tmp(stacksize,3), stat=allocstat) + if (allocstat /= 0) call fatal('stack','error increasing dens stack size') + dens_cells_tmp(:,:) = dens_cells(:,:) + deallocate(dens_cells) + allocate(dens_cells(stacksize_new,3), stat=allocstat) + dens_cells(1:stacksize,:) = dens_cells_tmp(:,:) + deallocate(dens_cells_tmp) + + ! Do these one at a time to minimise peak memory usage + + ! Expand force + allocate(force_cells_tmp(stacksize,2), stat=allocstat) + if (allocstat /= 0) call fatal('stack','error increasing force stack size') + force_cells_tmp(:,:) = force_cells(:,:) + deallocate(force_cells) + allocate(force_cells(stacksize_new,2), stat=allocstat) + force_cells(1:stacksize,:) = force_cells_tmp(:,:) + deallocate(force_cells_tmp) + + ! Set new stacksize value + ! Reallocate, with memory already containing cells + call allocate_mpi_memory(stacksize_in=stacksize_new, reallocate=.true.) + +end subroutine increase_mpi_memory + +subroutine calculate_stacksize(npart) + use dim, only:mpi,minpart + use io, only:nprocs,id,master + integer, intent(in) :: npart + integer, parameter :: safety = 4 + + ! size of the stack needed for communication, + ! should be at least the maximum number of cells that need + ! to be exported to other tasks. + ! + ! if it is not large enough, it will be automatically expanded + + ! number of particles per cell, divided by number of tasks + if (mpi .and. nprocs > 1) then + ! assume that every cell will be exported, with some safety factor + stacksize = (npart / minpart / nprocs) * 4 + + if (id == master) then + write(iprint, *) 'MPI memory stack size = ', stacksize + write(iprint, *) ' (total number of cells that can be exported by a single task)' + endif + else + stacksize = 0 + endif + +end subroutine calculate_stacksize + +subroutine deallocate_mpi_memory + if (allocated(dens_cells )) deallocate(dens_cells ) + if (allocated(force_cells)) deallocate(force_cells) +end subroutine deallocate_mpi_memory + +subroutine allocate_stack_dens(stack, i) + type(stackdens), intent(inout) :: stack + integer, intent(in) :: i + + stack%number = i + stack%cells => dens_cells(1:stacksize,stack%number) + stack%maxlength = stacksize + +end subroutine allocate_stack_dens + +subroutine allocate_stack_force(stack, i) + type(stackforce), intent(inout) :: stack + integer, intent(in) :: i + + stack%number = i + stack%cells => force_cells(1:stacksize,stack%number) + stack%maxlength = stacksize + +end subroutine allocate_stack_force + +subroutine swap_stacks_dens(stack_a, stack_b) + type(stackdens), intent(inout) :: stack_a + type(stackdens), intent(inout) :: stack_b + + integer :: temp_n + integer :: temp_number + + if (stack_a%maxlength /= stack_b%maxlength) call fatal('stack', 'stack swap of unequal size') + + ! counters + temp_n = stack_a%n + stack_a%n = stack_b%n + stack_b%n = temp_n + + ! addresses + temp_number = stack_a%number + stack_a%number = stack_b%number + stack_b%number = temp_number + + ! change pointers + stack_a%cells => dens_cells(1:stacksize,stack_a%number) + stack_b%cells => dens_cells(1:stacksize,stack_b%number) + +end subroutine swap_stacks_dens + +subroutine push_onto_stack_dens(stack,cell) + type(stackdens), intent(inout) :: stack + type(celldens), intent(in) :: cell + + if (stack%n + 1 > stack%maxlength) call increase_mpi_memory + ! after increasing stack size, cells can be added to because it is just a pointer + stack%n = stack%n + 1 + stack%cells(stack%n) = cell +end subroutine push_onto_stack_dens + +subroutine push_onto_stack_force(stack,cell) + type(stackforce), intent(inout) :: stack + type(cellforce), intent(in) :: cell + + if (stack%n + 1 > stack%maxlength) call increase_mpi_memory + ! after increasing stack size, cells can be added to because it is just a pointer + stack%n = stack%n + 1 + stack%cells(stack%n) = cell +end subroutine push_onto_stack_force + +subroutine pop_off_stack_dens(stack,cell) + type(stackdens), intent(inout) :: stack + type(celldens), intent(out) :: cell + + if (stack%n <= 0) call fatal('density','attempting to pop empty stack') + cell = stack%cells(stack%n) + stack%n = stack%n - 1 +end subroutine pop_off_stack_dens + +subroutine pop_off_stack_force(stack,cell) + type(stackforce), intent(inout) :: stack + type(cellforce), intent(out) :: cell + + if (stack%n <= 0) call fatal('force','attempting to pop empty stack') + cell = stack%cells(stack%n) + stack%n = stack%n - 1 +end subroutine pop_off_stack_force + +subroutine reserve_stack_dens(stack,i) + type(stackdens), intent(inout) :: stack + integer, intent(out) :: i + + if (stack%n + 1 > stack%maxlength) call increase_mpi_memory + stack%n = stack%n + 1 + i = stack%n + +end subroutine reserve_stack_dens + +subroutine reserve_stack_force(stack,i) + type(stackforce), intent(inout) :: stack + integer, intent(out) :: i + + if (stack%n + 1 > stack%maxlength) call increase_mpi_memory + stack%n = stack%n + 1 + i = stack%n + +end subroutine reserve_stack_force + +subroutine reset_stacks + dens_stack_1%n=0 + dens_stack_2%n=0 + dens_stack_3%n=0 + + force_stack_1%n=0 + force_stack_2%n=0 +end subroutine reset_stacks + +end module mpimemory diff --git a/src/main/mpi_stack.F90 b/src/main/mpi_stack.F90 deleted file mode 100644 index 6856b4eef..000000000 --- a/src/main/mpi_stack.F90 +++ /dev/null @@ -1,254 +0,0 @@ -!--------------------------------------------------------------------------! -! The Phantom Smoothed Particle Hydrodynamics code, by Daniel Price et al. ! -! Copyright (c) 2007-2022 The Authors (see AUTHORS) ! -! See LICENCE file for usage and distribution conditions ! -! http://phantomsph.bitbucket.io/ ! -!--------------------------------------------------------------------------! -module mpistack -! -! None -! -! :References: None -! -! :Owner: Conrad Chan -! -! :Runtime parameters: None -! -! :Dependencies: dim, io, mpidens, mpiforce -! - use dim, only:stacksize - use io, only:fatal - use mpidens, only:celldens,stackdens - use mpiforce, only:cellforce,stackforce - - implicit none - - interface allocate_stack - module procedure allocate_stack_dens,allocate_stack_force - end interface allocate_stack - - interface deallocate_stack - module procedure deallocate_stack_dens,deallocate_stack_force - end interface deallocate_stack - - interface swap_stacks ! force doesn't require a stack swap - module procedure swap_stacks_dens - end interface swap_stacks - - interface push_onto_stack - module procedure push_onto_stack_dens,push_onto_stack_force - end interface push_onto_stack - - interface pop_off_stack - module procedure pop_off_stack_dens,pop_off_stack_force - end interface pop_off_stack - - interface reserve_stack - module procedure reserve_stack_dens,reserve_stack_force - end interface reserve_stack - - public :: init_mpi_memory - public :: finish_mpi_memory - public :: allocate_stack - public :: deallocate_stack - public :: swap_stacks - public :: push_onto_stack - public :: pop_off_stack - public :: reserve_stack - public :: reset_stacks - - ! stacks to be referenced from density and force routines - type(stackdens), public :: dens_stack_1 - type(stackdens), public :: dens_stack_2 - type(stackdens), public :: dens_stack_3 - type(stackforce), public :: force_stack_1 - type(stackforce), public :: force_stack_2 - - private - - ! primary chunk of memory requested using alloc - integer, parameter :: n_dens_cells = stacksize*3 - integer, parameter :: n_force_cells = stacksize*2 - type(celldens), allocatable, target :: dens_cells(:) - type(cellforce), allocatable, target :: force_cells(:) - - ! memory allocation counters - integer :: idens - integer :: iforce - -contains - -subroutine init_mpi_memory - integer :: allocstat - - allocate(dens_cells(n_dens_cells), stat = allocstat) - if (allocstat /= 0) call fatal('stack','fortran memory allocation error') - idens = 1 - call allocate_stack(dens_stack_1, idens) - call allocate_stack(dens_stack_2, idens) - call allocate_stack(dens_stack_3, idens) - if (idens - 1 > n_dens_cells) call fatal('stack','phantom memory allocation error') - - allocate(force_cells(n_force_cells), stat = allocstat) - if (allocstat /= 0) call fatal('stack','fortran memory allocation error') - iforce = 1 - call allocate_stack(force_stack_1,iforce) - call allocate_stack(force_stack_2,iforce) - if (iforce - 1 > n_force_cells) call fatal('stack','phantom memory allocation error') -end subroutine init_mpi_memory - -subroutine finish_mpi_memory - ! - !--Only called at the end, so deallocation_stack is not strictly necessary. - ! May be useful in the future. TODO: Allow for unordered deallocation. - ! - - ! call deallocate_stack(dens_stack_3, idens) - ! call deallocate_stack(dens_stack_2, idens) - ! call deallocate_stack(dens_stack_1, idens) - deallocate(dens_cells) - - ! call deallocate_stack(force_stack_2, idens) - ! call deallocate_stack(force_stack_1, idens) - deallocate(force_cells) -end subroutine finish_mpi_memory - -subroutine allocate_stack_dens(stack, i) - type(stackdens), intent(inout) :: stack - integer, intent(inout) :: i - - stack%mem_start = i - stack%mem_end = i + stacksize - 1 - - stack%cells => dens_cells(stack%mem_start:stack%mem_end) - stack%maxlength = stacksize - - i = i + stacksize -end subroutine allocate_stack_dens - -subroutine allocate_stack_force(stack, i) - type(stackforce), intent(inout) :: stack - integer, intent(inout) :: i - - stack%mem_start = i - stack%mem_end = i + stacksize - 1 - - stack%cells => force_cells(stack%mem_start:stack%mem_end) - stack%maxlength = stacksize - - i = i + stacksize -end subroutine allocate_stack_force - -subroutine deallocate_stack_dens(stack, i) - type(stackdens), intent(inout) :: stack - integer, intent(inout) :: i - - if (i - 1 /= stack%mem_end) call fatal('stack','memory deallocation not from top of stack') - i = stack%mem_start - 1 - stack%mem_start = -1 - stack%mem_end = -1 -end subroutine deallocate_stack_dens - -subroutine deallocate_stack_force(stack, i) - type(stackforce), intent(inout) :: stack - integer, intent(inout) :: i - - if (i - 1 /= stack%mem_end) call fatal('stack','memory deallocation not from top of stack') - i = stack%mem_start - 1 - stack%mem_start = -1 - stack%mem_end = -1 -end subroutine deallocate_stack_force - -subroutine swap_stacks_dens(stack_a, stack_b) - type(stackdens), intent(inout) :: stack_a - type(stackdens), intent(inout) :: stack_b - - integer :: temp_n - integer :: temp_start - integer :: temp_end - - if (stack_a%maxlength /= stack_b%maxlength) call fatal('stack', 'stack swap of unequal size') - - ! counters - temp_n = stack_a%n - stack_a%n = stack_b%n - stack_b%n = temp_n - - ! addresses - temp_start = stack_a%mem_start - temp_end = stack_a%mem_end - stack_a%mem_start = stack_b%mem_start - stack_a%mem_end = stack_b%mem_end - stack_b%mem_start = temp_start - stack_b%mem_end = temp_end - - ! change pointers - stack_a%cells => dens_cells(stack_a%mem_start:stack_a%mem_end) - stack_b%cells => dens_cells(stack_b%mem_start:stack_b%mem_end) - -end subroutine swap_stacks_dens - -subroutine push_onto_stack_dens(stack,cell) - type(stackdens), intent(inout) :: stack - type(celldens), intent(in) :: cell - - stack%n = stack%n + 1 - if (stack%n > stack%maxlength) call fatal('density','stack overflow') - stack%cells(stack%n) = cell -end subroutine push_onto_stack_dens - -subroutine push_onto_stack_force(stack,cell) - type(stackforce), intent(inout) :: stack - type(cellforce), intent(in) :: cell - - stack%n = stack%n + 1 - if (stack%n > stack%maxlength) call fatal('force','stack overflow') - stack%cells(stack%n) = cell -end subroutine push_onto_stack_force - -subroutine pop_off_stack_dens(stack,cell) - type(stackdens), intent(inout) :: stack - type(celldens), intent(out) :: cell - - if (stack%n <= 0) call fatal('density','attempting to pop empty stack') - cell = stack%cells(stack%n) - stack%n = stack%n - 1 -end subroutine pop_off_stack_dens - -subroutine pop_off_stack_force(stack,cell) - type(stackforce), intent(inout) :: stack - type(cellforce), intent(out) :: cell - - if (stack%n <= 0) call fatal('force','attempting to pop empty stack') - cell = stack%cells(stack%n) - stack%n = stack%n - 1 -end subroutine pop_off_stack_force - -subroutine reserve_stack_dens(stack,i) - type(stackdens), intent(inout) :: stack - integer, intent(out) :: i - - stack%n = stack%n + 1 - i = stack%n - if (stack%n > stack%maxlength) call fatal('density','stack overflow') -end subroutine reserve_stack_dens - -subroutine reserve_stack_force(stack,i) - type(stackforce), intent(inout) :: stack - integer, intent(out) :: i - - stack%n = stack%n + 1 - i = stack%n - if (stack%n > stack%maxlength) call fatal('force','stack overflow') -end subroutine reserve_stack_force - -subroutine reset_stacks - dens_stack_1%n=0 - dens_stack_2%n=0 - dens_stack_3%n=0 - - force_stack_1%n=0 - force_stack_2%n=0 -end subroutine reset_stacks - -end module mpistack diff --git a/src/main/part.F90 b/src/main/part.F90 index f4b356d17..8b2f91ace 100644 --- a/src/main/part.F90 +++ b/src/main/part.F90 @@ -633,6 +633,8 @@ subroutine init_part ibin(:) = 0 ibin_old(:) = 0 ibin_wake(:) = 0 + dt_in(:) = 0. + twas(:) = 0. #endif ideadhead = 0 diff --git a/src/main/readwrite_dumps_fortran.F90 b/src/main/readwrite_dumps_fortran.F90 index 3abac2392..59b870d07 100644 --- a/src/main/readwrite_dumps_fortran.F90 +++ b/src/main/readwrite_dumps_fortran.F90 @@ -772,9 +772,9 @@ subroutine read_dump_fortran(dumpfile,tfile,hfactfile,idisk1,iprint,id,nprocs,ie !--Allocate main arrays ! #ifdef INJECT_PARTICLES - call allocate_memory(maxp_hard) + call allocate_memory(int(maxp_hard,kind=8)) #else - call allocate_memory(int(min(nprocs,4)*nparttot/nprocs)) + call allocate_memory(nparttot) #endif endif ! @@ -964,9 +964,9 @@ subroutine read_smalldump_fortran(dumpfile,tfile,hfactfile,idisk1,iprint,id,npro !--Allocate main arrays ! #ifdef INJECT_PARTICLES - call allocate_memory(maxp_hard) + call allocate_memory(int(maxp_hard,kind=8)) #else - call allocate_memory(int(min(nprocs,3)*nparttot/nprocs)) + call allocate_memory(nparttot) #endif ! diff --git a/src/main/utils_allocate.f90 b/src/main/utils_allocate.f90 index f40dc5088..ce2cb2e6e 100644 --- a/src/main/utils_allocate.f90 +++ b/src/main/utils_allocate.f90 @@ -37,12 +37,14 @@ module allocutils allocate_array_real4_4d, & allocate_array_integer8_1d, & allocate_array_integer4_1d, & + allocate_array_integer4_1d_long, & allocate_array_integer4_2d, & allocate_array_integer4_3d, & allocate_array_integer1_1d, & allocate_array_integer1_2d, & allocate_array_integer1_3d, & allocate_array_kdnode_1d, & + allocate_array_kdnode_1d_long, & allocate_array_logical end interface allocate_array @@ -58,7 +60,7 @@ subroutine allocate_array_real8_1d(name, x, n1) allocate(x(n1), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1/), 'real(8)') + call print_allocation_stats(name, int((/n1/),kind=8), 'real(8)') end subroutine allocate_array_real8_1d @@ -70,7 +72,7 @@ subroutine allocate_array_real8_2d(name, x, n1, n2) allocate(x(n1, n2), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2/), 'real(8)') + call print_allocation_stats(name, int((/n1, n2/),kind=8), 'real(8)') end subroutine allocate_array_real8_2d @@ -82,7 +84,7 @@ subroutine allocate_array_real8_3d(name, x, n1, n2, n3) allocate(x(n1, n2, n3), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2, n3/), 'real(8)') + call print_allocation_stats(name, int((/n1, n2, n3/),kind=8), 'real(8)') end subroutine allocate_array_real8_3d @@ -94,7 +96,7 @@ subroutine allocate_array_real8_4d(name, x, n1, n2, n3, n4) allocate(x(n1, n2, n3, n4), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2, n3, n4/), 'real(8)') + call print_allocation_stats(name, int((/n1, n2, n3, n4/),kind=8), 'real(8)') end subroutine allocate_array_real8_4d @@ -106,7 +108,7 @@ subroutine allocate_array_real4_1d(name, x, n1) allocate(x(n1), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1/), 'real(4)') + call print_allocation_stats(name, int((/n1/),kind=8), 'real(4)') end subroutine allocate_array_real4_1d @@ -118,7 +120,7 @@ subroutine allocate_array_real4_2d(name, x, n1, n2) allocate(x(n1, n2), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2/), 'real(4)') + call print_allocation_stats(name, int((/n1, n2/),kind=8), 'real(4)') end subroutine allocate_array_real4_2d @@ -130,7 +132,7 @@ subroutine allocate_array_real4_3d(name, x, n1, n2, n3) allocate(x(n1, n2, n3), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2, n3/), 'real(4)') + call print_allocation_stats(name, int((/n1, n2, n3/),kind=8), 'real(4)') end subroutine allocate_array_real4_3d @@ -142,7 +144,7 @@ subroutine allocate_array_real4_4d(name, x, n1, n2, n3, n4) allocate(x(n1, n2, n3, n4), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2, n3, n4/), 'real(4)') + call print_allocation_stats(name, int((/n1, n2, n3, n4/),kind=8), 'real(4)') end subroutine allocate_array_real4_4d @@ -154,7 +156,7 @@ subroutine allocate_array_integer8_1d(name, x, n1) allocate(x(n1), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1/), 'integer(8)') + call print_allocation_stats(name, int((/n1/),kind=8), 'integer(8)') end subroutine allocate_array_integer8_1d subroutine allocate_array_integer4_1d(name, x, n1) @@ -165,10 +167,22 @@ subroutine allocate_array_integer4_1d(name, x, n1) allocate(x(n1), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1/), 'integer(4)') + call print_allocation_stats(name, int((/n1/),kind=8), 'integer(4)') end subroutine allocate_array_integer4_1d +subroutine allocate_array_integer4_1d_long(name, x, n1) + character(len=*), intent(in) :: name + integer(kind=4), allocatable, intent(inout) :: x(:) + integer(kind=8), intent(in) :: n1 + integer :: allocstat + + allocate(x(n1), stat = allocstat) + call check_allocate(name, allocstat) + call print_allocation_stats(name, (/n1/), 'integer(4)') + + end subroutine allocate_array_integer4_1d_long + subroutine allocate_array_integer4_2d(name, x, n1, n2) character(len=*), intent(in) :: name integer(kind=4), allocatable, intent(inout) :: x(:,:) @@ -177,7 +191,7 @@ subroutine allocate_array_integer4_2d(name, x, n1, n2) allocate(x(n1, n2), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2/), 'integer(4)') + call print_allocation_stats(name, int((/n1, n2/),kind=8), 'integer(4)') end subroutine allocate_array_integer4_2d @@ -189,7 +203,7 @@ subroutine allocate_array_integer4_3d(name, x, n1, n2, n3) allocate(x(n1, n2, n3), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2, n3/), 'integer(4)') + call print_allocation_stats(name, int((/n1, n2, n3/),kind=8), 'integer(4)') end subroutine allocate_array_integer4_3d @@ -201,7 +215,7 @@ subroutine allocate_array_integer1_1d(name, x, n1) allocate(x(n1), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1/), 'integer(1)') + call print_allocation_stats(name, int((/n1/),kind=8), 'integer(1)') end subroutine allocate_array_integer1_1d @@ -213,7 +227,7 @@ subroutine allocate_array_integer1_2d(name, x, n1, n2) allocate(x(n1, n2), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2/), 'integer(1)') + call print_allocation_stats(name, int((/n1, n2/),kind=8), 'integer(1)') end subroutine allocate_array_integer1_2d @@ -225,7 +239,7 @@ subroutine allocate_array_integer1_3d(name, x, n1, n2, n3) allocate(x(n1, n2, n3), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1, n2, n3/), 'integer(1)') + call print_allocation_stats(name, int((/n1, n2, n3/),kind=8), 'integer(1)') end subroutine allocate_array_integer1_3d @@ -237,10 +251,22 @@ subroutine allocate_array_kdnode_1d(name, x, n1) allocate(x(n1), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1/), 'kdnode') + call print_allocation_stats(name, int((/n1/),kind=8), 'kdnode') end subroutine allocate_array_kdnode_1d +subroutine allocate_array_kdnode_1d_long(name, x, n1) + character(len=*), intent(in) :: name + type(kdnode), allocatable, intent(inout) :: x(:) + integer(kind=8), intent(in) :: n1 + integer :: allocstat + + allocate(x(n1), stat = allocstat) + call check_allocate(name, allocstat) + call print_allocation_stats(name, (/n1/), 'kdnode') + +end subroutine allocate_array_kdnode_1d_long + subroutine allocate_array_logical(name, x, n1) character(len=*), intent(in) :: name logical, allocatable, intent(inout) :: x(:) @@ -249,7 +275,7 @@ subroutine allocate_array_logical(name, x, n1) allocate(x(n1), stat = allocstat) call check_allocate(name, allocstat) - call print_allocation_stats(name, (/n1/), 'integer(4)') + call print_allocation_stats(name, int((/n1/),kind=8), 'integer(4)') end subroutine allocate_array_logical @@ -263,7 +289,7 @@ end subroutine check_allocate subroutine print_allocation_stats(name, xdim, type) character(len=*), intent(in) :: name - integer, intent(in) :: xdim(:) + integer(kind=8), intent(in) :: xdim(:) character(len=*), intent(in) :: type character(len=10) :: number character(len=14) :: dimstring diff --git a/src/setup/phantomsetup.F90 b/src/setup/phantomsetup.F90 index b15d8c6b2..30380c5dc 100644 --- a/src/setup/phantomsetup.F90 +++ b/src/setup/phantomsetup.F90 @@ -93,7 +93,7 @@ program phantomsetup ! also rely on maxp being set to the number of desired particles. Allocate only ! part, not kdtree or linklist ! - call allocate_memory(maxp_hard, part_only=.true.) + call allocate_memory(int(maxp_hard,kind=8), part_only=.true.) call set_default_options ! diff --git a/src/setup/relax_star.f90 b/src/setup/relax_star.f90 index 6b52464b3..4c6d972a3 100644 --- a/src/setup/relax_star.f90 +++ b/src/setup/relax_star.f90 @@ -118,7 +118,7 @@ subroutine relax_star(nt,rho,pr,r,npart,xyzh) ! compute derivatives the first time around (needed if using actual step routine) ! t = 0. - call allocate_memory(2*npart) + call allocate_memory(int(2*npart,kind=8)) call get_derivs_global() call reset_u_and_get_errors(npart,xyzh,vxyzu,nt,mr,rho,utherm,entrop,fix_entrop,rmax,rmserr) call compute_energies(t) diff --git a/src/tests/phantomtest.f90 b/src/tests/phantomtest.f90 index d43cfa16e..fdf57e1d7 100644 --- a/src/tests/phantomtest.f90 +++ b/src/tests/phantomtest.f90 @@ -24,7 +24,7 @@ program phantomtest implicit none integer :: nargs,i,ntests,npass,nfail character(len=120) :: string - integer, parameter :: maxp_test = 1000000 + integer(kind=8), parameter :: maxp_test = 1000000 ntests = 0 npass = 0 diff --git a/src/tests/test_rwdump.F90 b/src/tests/test_rwdump.F90 index 51f305371..2e5e8b019 100644 --- a/src/tests/test_rwdump.F90 +++ b/src/tests/test_rwdump.F90 @@ -335,7 +335,7 @@ subroutine test_rwdump(ntests,npass) enddo over_tests call deallocate_memory - call allocate_memory(maxp_old) + call allocate_memory(int(maxp_old,kind=8)) if (id==master) write(*,"(/,a)") '<-- READ/WRITE TEST COMPLETE' end subroutine test_rwdump diff --git a/src/utils/combinedustdumps.f90 b/src/utils/combinedustdumps.f90 index 0d66a219d..392522677 100644 --- a/src/utils/combinedustdumps.f90 +++ b/src/utils/combinedustdumps.f90 @@ -91,7 +91,7 @@ program combinedustdumps ! ! allocate memory ! - call allocate_memory(counter) + call allocate_memory(int(counter,kind=8)) ! ! read gas particles from first file ! diff --git a/src/utils/phantom_moddump.f90 b/src/utils/phantom_moddump.f90 index 8035e7c09..223726376 100644 --- a/src/utils/phantom_moddump.f90 +++ b/src/utils/phantom_moddump.f90 @@ -111,7 +111,7 @@ program phantommoddump ! will be reallocated automatically if npart > maxp_hard ! but allows user to manually preset array sizes if necessary ! - call allocate_memory(maxp_hard) + call allocate_memory(int(maxp_hard,kind=8)) ! !--read particle setup from dumpfile !