From 0d599e8b825e0a7fe08f9ea130b85500b5123159 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Tue, 2 Apr 2024 14:46:06 +0200 Subject: [PATCH 01/28] try to improve particle exchange (CPU for now) --- doc/Sphinx/implementation.rst | 2 +- src/ParticleBC/BoundaryConditionType.cpp | 30 +- src/Particles/Particles.cpp | 118 ++++- src/Particles/Particles.h | 4 + src/Patch/Patch.cpp | 534 ++++++++++------------- src/Patch/Patch.h | 4 +- src/Patch/SyncVectorPatch.cpp | 60 ++- src/Patch/SyncVectorPatch.h | 7 +- src/Patch/VectorPatch.cpp | 22 +- src/Patch/VectorPatch.h | 4 +- src/Smilei.cpp | 2 +- src/SmileiMPI/AsyncMPIbuffers.cpp | 22 +- src/SmileiMPI/AsyncMPIbuffers.h | 12 +- src/Species/Species.cpp | 37 +- src/Species/SpeciesV.cpp | 48 +- src/Species/SpeciesVAdaptive.cpp | 10 +- src/Tools/Timers.cpp | 2 +- 17 files changed, 478 insertions(+), 440 deletions(-) diff --git a/doc/Sphinx/implementation.rst b/doc/Sphinx/implementation.rst index 46bf953e9..0d35165b2 100644 --- a/doc/Sphinx/implementation.rst +++ b/doc/Sphinx/implementation.rst @@ -547,7 +547,7 @@ file ``Smilei.cpp`` thought calls to different ``vecPatches`` methods. .. code-block:: c++ - vecPatches.finalizeAndSortParticles( params, &smpi, simWindow, + vecPatches.finalizeExchParticlesAndSort( params, &smpi, simWindow, time_dual, timers, itime ); * **Particle merging**: merging process for particles (still experimental) diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp index 318b6b289..5a55d74b2 100755 --- a/src/ParticleBC/BoundaryConditionType.cpp +++ b/src/ParticleBC/BoundaryConditionType.cpp @@ -28,9 +28,9 @@ void internal_inf( Species *species, int imin, int imax, int direction, double l cell_keys /* [imin:imax - imin] */ ) #pragma omp teams distribute parallel for #endif - for (int ipart=imin ; ipart= 0 && position[ ipart ] < limit_inf ) { + cell_keys[ ipart ] = -2 - 2 * direction; } } } @@ -50,9 +50,9 @@ void internal_sup( Species *species, int imin, int imax, int direction, double l cell_keys /* [imin:imax - imin] */ ) #pragma omp teams distribute parallel for #endif - for (int ipart=imin ; ipart= limit_sup) { - cell_keys[ ipart ] = -1; + for( int ipart=imin ; ipart= 0 && position[ ipart ] >= limit_sup ) { + cell_keys[ ipart ] = -3 - 2 * direction; } } } @@ -63,10 +63,11 @@ void internal_inf_AM( Species *species, int imin, int imax, int /*direction*/, d double* position_y = species->particles->getPtrPosition(1); double* position_z = species->particles->getPtrPosition(2); int* cell_keys = species->particles->getPtrCellKeys(); - for (int ipart=imin ; ipart= 0 && distance2ToAxis < limit_inf2 ) { + cell_keys[ ipart ] = -4; } } } @@ -77,10 +78,11 @@ void internal_sup_AM( Species *species, int imin, int imax, int /*direction*/, d double* position_y = species->particles->getPtrPosition(1); double* position_z = species->particles->getPtrPosition(2); int* cell_keys = species->particles->getPtrCellKeys(); - for (int ipart=imin ; ipart= limit_sup*limit_sup ) { - cell_keys[ ipart ] = -1; + if( cell_keys[ ipart ] >= 0 && distance2ToAxis >= limit_sup2 ) { + cell_keys[ ipart ] = -5; } } } @@ -97,8 +99,8 @@ void reflect_particle_inf( Species *species, int imin, int imax, int direction, #pragma omp target is_device_ptr( position, momentum ) #pragma omp teams distribute parallel for #endif - for (int ipart=imin ; ipart indices, Particles &dest_parts, int dest_id ) +{ + const size_t transfer_size = indices.size(); + const size_t dest_new_size = dest_parts.size() + transfer_size; + + for( unsigned int iprop=0 ; ipropresize( dest_new_size ); + auto loc = dest_parts.double_prop_[iprop]->begin() + dest_id; + move_backward( loc, loc + transfer_size, dest_parts.double_prop_[iprop]->end() ); + // Copy data + for( size_t i = 0; i < transfer_size; i++ ) { + ( *dest_parts.double_prop_[iprop] )[dest_id+i] = ( *double_prop_[iprop] )[indices[i]]; + } + } + + for( unsigned int iprop=0 ; ipropresize( dest_new_size ); + auto loc = dest_parts.short_prop_[iprop]->begin() + dest_id; + move_backward( loc, loc + transfer_size, dest_parts.short_prop_[iprop]->end() ); + // Copy data + for( size_t i = 0; i < transfer_size; i++ ) { + ( *dest_parts.short_prop_[iprop] )[dest_id+i] = ( *short_prop_[iprop] )[indices[i]]; + } + } + + for( unsigned int iprop=0 ; ipropresize( dest_new_size ); + auto loc = dest_parts.uint64_prop_[iprop]->begin() + dest_id; + move_backward( loc, loc + transfer_size, dest_parts.uint64_prop_[iprop]->end() ); + // Copy data + for( size_t i = 0; i < transfer_size; i++ ) { + ( *dest_parts.uint64_prop_[iprop] )[dest_id+i] = ( *uint64_prop_[iprop] )[indices[i]]; + } + } +} + // --------------------------------------------------------------------------------------------------------------------- //! Make a new particle at the position of another //! cell keys not affected @@ -529,6 +573,70 @@ void Particles::eraseParticle( unsigned int ipart, unsigned int npart, bool comp } + +// --------------------------------------------------------------------------------------------------------------------- +//! Erase particles indexed by array 'indices' to dest_id in dest_parts +//! The array 'indices' must be sorted in increasing order +//! cell keys not affected +// --------------------------------------------------------------------------------------------------------------------- +void Particles::eraseParticles( vector indices ) +{ + const size_t indices_size = indices.size(); + const size_t initial_size = size(); + + if( indices_size > 0 ) { + + for( auto prop : double_prop_ ) { + // Relocate data to fill erased space + size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0]; + for( size_t from = indices[0]+1; from < initial_size; from++ ) { + if( from < stop ) { + ( *prop )[to] = ( *prop )[from]; + to++; + } else { + j++; + stop = ( j == indices_size ) ? initial_size : indices[j]; + } + } + // Resize + prop->resize( initial_size - indices_size ); + } + + for( auto prop : short_prop_ ) { + // Relocate data to fill erased space + size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0]; + for( size_t from = indices[0]+1; from < initial_size; from++ ) { + if( from < stop ) { + ( *prop )[to] = ( *prop )[from]; + to++; + } else { + j++; + stop = ( j == indices_size ) ? initial_size : indices[j]; + } + } + // Resize + prop->resize( initial_size - indices_size ); + } + + for( auto prop : uint64_prop_ ) { + // Relocate data to fill erased space + size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0]; + for( size_t from = indices[0]+1; from < initial_size; from++ ) { + if( from < stop ) { + ( *prop )[to] = ( *prop )[from]; + to++; + } else { + j++; + stop = ( j == indices_size ) ? initial_size : indices[j]; + } + } + // Resize + prop->resize( initial_size - indices_size ); + } + + } +} + // --------------------------------------------------------------------------------------------------------------------- // Print parameters of particle iPart // --------------------------------------------------------------------------------------------------------------------- @@ -1198,11 +1306,11 @@ void Particles::copyFromDeviceToHost() void Particles::extractParticles( Particles* particles_to_move ) { particles_to_move->clear(); - for ( int ipart=0 ; ipart indices, Particles &dest_parts, int dest_id ); //! Make a new particle at the position of another void makeParticleAt( Particles &source_particles, unsigned int ipart, double w, short q=0., double px=0., double py=0., double pz=0. ); @@ -151,6 +153,8 @@ class Particles void eraseParticle( unsigned int iPart, bool compute_cell_keys = false ); //! Suppress nPart particles from iPart void eraseParticle( unsigned int iPart, unsigned int nPart, bool compute_cell_keys = false ); + //! Suppress indexed particles + void eraseParticles( std::vector indices ); //! Suppress all particles from iPart to the end of particle array void eraseParticleTrail( unsigned int iPart, bool compute_cell_keys = false ); diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index b8ed401d9..0bf353e67 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -517,220 +517,184 @@ void Patch::updateMPIenv( SmileiMPI *smpi ) // --------------------------------------------------------------------------------------------------------------------- void Patch::cleanMPIBuffers( int ispec, Params ¶ms ) { - int ndim = params.nDim_field; + size_t ndim = params.nDim_field; + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; - for( int iDim=0 ; iDim < ndim ; iDim++ ) { + for( size_t iDim=0 ; iDim < ndim ; iDim++ ) { for( int iNeighbor=0 ; iNeighborMPI_buffer_.partRecv[iDim][iNeighbor].clear();//resize(0,ndim); - vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor].clear();//resize(0,ndim); - vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].clear(); - //vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].resize(0); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0; + buffer.partRecv[iDim][iNeighbor]->clear(); + buffer.partSend[iDim][iNeighbor]->clear(); } } } // cleanMPIBuffers // --------------------------------------------------------------------------------------------------------------------- -// Split particles Id to send in per direction and per patch neighbor dedicated buffers -// Apply periodicity if necessary +// Copy particles to be exchanged to buffers // --------------------------------------------------------------------------------------------------------------------- -void Patch::initExchParticles( int ispec, Params ¶ms ) +void Patch::copyExchParticlesToBuffers( int ispec, Params ¶ms ) { - Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move ); - int ndim = params.nDim_field; - int idim, check; -// double xmax[3]; - - for( int iDim=0 ; iDim < ndim ; iDim++ ) { - for( int iNeighbor=0 ; iNeighborMPI_buffer_.partRecv[iDim][iNeighbor].clear();//resize(0,ndim); - vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor].clear();//resize(0,ndim); - vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].resize( 0 ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0; - } + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + Particles &part = *vecSpecies[ispec]->particles; + + cleanMPIBuffers( ispec, params ); + + vector> copy( 3 ); + copy[0] = { neighbor_[0][0] != MPI_PROC_NULL, neighbor_[0][1] != MPI_PROC_NULL }; + copy[1] = { neighbor_[1][0] != MPI_PROC_NULL, neighbor_[1][1] != MPI_PROC_NULL }; + if( params.nDim_field > 2 ) { + copy[2] = { neighbor_[2][0] != MPI_PROC_NULL, neighbor_[2][1] != MPI_PROC_NULL }; } - - int n_part_send = cuParticles.size(); - - int iPart; - - // Define where particles are going - //Put particles in the send buffer it belongs to. Priority to lower dimensions. - if( params.geometry != "AMcylindrical" ) { - for( int i=0 ; iMPI_buffer_.part_index_send[idim][0].push_back( iPart ); - } - //If particle is outside of the global domain (has no neighbor), it will not be put in a send buffer and will simply be deleted. - check = 1; - } else if( cuParticles.position( idim, iPart ) >= max_local_[idim] ) { - if( neighbor_[idim][1]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][1].push_back( iPart ); - } - check = 1; + if( params.geometry == "AMcylindrical" ) { + copy[0][0] = copy[0][0] && ( Pcoordinates[0]!=0 || vecSpecies[ispec]->boundary_conditions_[0][0]=="periodic" ); + copy[0][1] = copy[0][1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" ); + } + + // Loop all particles and count the outgoing ones + for( size_t ipart = 0; ipart < part.size(); ipart++ ) { + if( part.cell_keys[ipart] < -1 ) { + if( part.cell_keys[ipart] == -2 ) { + if( copy[0][0] ) { + part.copyParticle( ipart, *buffer.partSend[0][0] ); } - idim++; - } - } - } else { //if (geometry == "AMcylindrical") - double r_min2, r_max2; - r_max2 = max_local_[1] * max_local_[1] ; - r_min2 = min_local_[1] * min_local_[1] ; - for( int i=0 ; iboundary_conditions_[0][0]!="periodic" ) ) { - continue; - } - vecSpecies[ispec]->MPI_buffer_.part_index_send[0][0].push_back( iPart ); - //MESSAGE("Sending particle to the left x= " << cuParticles.position(0,iPart) << " xmin = " << min_local_[0] ); + } else if( part.cell_keys[ipart] == -3 ) { + if( copy[0][1] ) { + part.copyParticle( ipart, *buffer.partSend[0][1] ); } - //If particle is outside of the global domain (has no neighbor), it will not be put in a send buffer and will simply be deleted. - } else if( cuParticles.position( 0, iPart ) >= max_local_[0] ) { - if ( (Pcoordinates[0]==params.number_of_patches[0]-1) && ( vecSpecies[ispec]->boundary_conditions_[0][1]!="periodic" ) ) { - continue; + } else if( part.cell_keys[ipart] == -4 ) { + if( copy[1][0] ) { + part.copyParticle( ipart, *buffer.partSend[1][0] ); } - if( neighbor_[0][1]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send[0][1].push_back( iPart ); - // MESSAGE("Sending particle to the right x= " << cuParticles.position(0,iPart) << " xmax = " << max_local_[0] ); + } else if( part.cell_keys[ipart] == -5 ) { + if( copy[1][1] ) { + part.copyParticle( ipart, *buffer.partSend[1][1] ); } - } else if( cuParticles.distance2ToAxis( iPart ) < r_min2 ) { - if( neighbor_[1][0]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send[1][0].push_back( iPart ); - //MESSAGE("Sending particle to the south r= " << cuParticles.distance2ToAxis(iPart) << " rmin2 = " << r_min2 ); + } else if( part.cell_keys[ipart] == -6 ) { + if( copy[2][0] ) { + part.copyParticle( ipart, *buffer.partSend[2][0] ); } - } else if( cuParticles.distance2ToAxis( iPart ) >= r_max2 ) { - if( neighbor_[1][1]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send[1][1].push_back( iPart ); - //MESSAGE("Sending particle to the north r= " << cuParticles.distance2ToAxis(iPart) << " rmax2 = " << r_max2 << " rmin2= " << r_min2 ); + } else if( part.cell_keys[ipart] == -7 ) { + if( copy[2][1] ) { + part.copyParticle( ipart, *buffer.partSend[2][1] ); } } - } } -} // initExchParticles(... iDim) +} // copyExchParticlesToBuffers(... iDim) // --------------------------------------------------------------------------------------------------------------------- -// For direction iDim, start exchange of number of particles -// - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles) -// - smpi : inhereted from previous SmileiMPI::exchangeParticles() +// Exchange number of particles to exchange to establish or not a communication // --------------------------------------------------------------------------------------------------------------------- void Patch::exchNbrOfParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, VectorPatch *vecPatch ) { - int h0 = ( *vecPatch )( 0 )->hindex; - /********************************************************************************/ - // Exchange number of particles to exchange to establish or not a communication - /********************************************************************************/ + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + for( int iNeighbor=0 ; iNeighborsize(); + + // Send number of particles from neighbor if( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor] = ( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor] ).size(); - if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { - //If neighbour is MPI ==> I send him the number of particles I'll send later. int local_hindex = hindex - vecPatch->refHindex_; int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); - MPI_Isend( &( vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor] ), 1, MPI_INT, MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ) ); + MPI_Isend( &buffer.partSendSize[iDim][iNeighbor], 1, MPI_INT, MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &buffer.srequest[iDim][iNeighbor] ); } else { - //Else, I directly set the receive size to the correct value. - ( *vecPatch )( neighbor_[iDim][iNeighbor]- h0 )->vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2] = vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor]; + // If the destination is in the same MPI, directly set the number at destination + int destination_hindex = neighbor_[iDim][iNeighbor] - vecPatch->refHindex_; + SpeciesMPIbuffers &destination_buffer = ( *vecPatch )( destination_hindex )->vecSpecies[ispec]->MPI_buffer_; + destination_buffer.partRecvSize[iDim][iOppositeNeighbor] = buffer.partSendSize[iDim][iNeighbor]; } - } // END of Send - - if( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) { - if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) { - //If other neighbour is MPI ==> I receive the number of particles I'll receive later. - int local_hindex = neighbor_[iDim][( iNeighbor+1 )%2] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][( iNeighbor+1 )%2] ]; + } + + // Receive number of particles from neighbor + if( neighbor_[iDim][iOppositeNeighbor]!=MPI_PROC_NULL ) { + if( is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) { + int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ]; int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); - MPI_Irecv( &( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2] ), 1, MPI_INT, MPI_neighbor_[iDim][( iNeighbor+1 )%2], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ) ); + MPI_Irecv( &buffer.partRecvSize[iDim][iOppositeNeighbor], 1, MPI_INT, MPI_neighbor_[iDim][iOppositeNeighbor], tag, MPI_COMM_WORLD, &buffer.rrequest[iDim][iOppositeNeighbor] ); } } - }//end loop on nb_neighbors. - + + } + } // exchNbrOfParticles(... iDim) +// --------------------------------------------------------------------------------------------------------------------- +// Wait for end of communications over number of particles +// --------------------------------------------------------------------------------------------------------------------- void Patch::endNbrOfParticles( int ispec, int iDim ) { - Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move ); - - /********************************************************************************/ - // Wait for end of communications over number of particles - /********************************************************************************/ + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + for( int iNeighbor=0 ; iNeighborMPI_buffer_.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) ); - } + int iOppositeNeighbor = ( iNeighbor+1 )%2; + + MPI_Status sstat[2]; + MPI_Status rstat[2]; + if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { + MPI_Wait( &( buffer.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) ); } - if( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) { - if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) { - MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ), &( rstat[( iNeighbor+1 )%2] ) ); - if( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]!=0 ) { - //If I receive particles over MPI, I initialize my receive buffer with the appropriate size. - vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2].initialize( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2], cuParticles ); - } - } + if( is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) { + MPI_Wait( &( buffer.rrequest[iDim][iOppositeNeighbor] ), &( rstat[iOppositeNeighbor] ) ); } } - } // END endNbrOfParticles(... iDim) // --------------------------------------------------------------------------------------------------------------------- -// For direction iDim, finalize receive of number of particles and really send particles +// For direction iDim, prepare particles to be sent // - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles) // - smpi : used smpi->periods_ // --------------------------------------------------------------------------------------------------------------------- void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iDim, VectorPatch *vecPatch ) { - Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move ); - - int n_part_send; - int h0 = ( *vecPatch )( 0 )->hindex; double x_max = params.cell_length[iDim]*( params.global_size_[iDim] ); - + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + for( int iNeighbor=0 ; iNeighborMPI_buffer_.part_index_send[iDim][iNeighbor] ).size(); - if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) { - // Enabled periodicity + + Particles &partSend = *buffer.partSend[iDim][iNeighbor]; + + // Enabled periodicity + if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL && partSend.size() != 0 ) { if( smpi->periods_[iDim]==1 ) { - for( int iPart=0 ; iPartMPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) < 0. ) ) { - cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) += x_max; - } else if( ( iNeighbor==1 ) && ( Pcoordinates[iDim] == params.number_of_patches[iDim]-1 ) && ( cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) >= x_max ) ) { - cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) -= x_max; + if( iNeighbor == 0 && Pcoordinates[iDim] == 0 ) { + for( size_t iPart=0; iPart < partSend.size(); iPart++ ) { + if( partSend.position( iDim, iPart ) < 0. ) { + partSend.position( iDim, iPart ) += x_max; + } + } + } + if( iNeighbor == 1 && Pcoordinates[iDim] == params.number_of_patches[iDim]-1 ) { + for( size_t iPart=0; iPart < partSend.size(); iPart++ ) { + if( partSend.position( iDim, iPart ) >= x_max ) { + partSend.position( iDim, iPart ) -= x_max; + } } } } - // Send particles + } + + if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL ) { + // Initialize receive buffer with the appropriate size if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { - // If MPI comm, first copy particles in the sendbuffer - for( int iPart=0 ; iPartMPI_buffer_.part_index_send[iDim][iNeighbor][iPart], vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ); - } - } else { - //If not MPI comm, copy particles directly in the receive buffer - for( int iPart=0 ; iPartMPI_buffer_.part_index_send[iDim][iNeighbor][iPart], ( ( *vecPatch )( neighbor_[iDim][iNeighbor]- h0 )->vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ) ); + if( buffer.partRecvSize[iDim][iNeighbor]!=0 ) { + buffer.partRecv[iDim][iNeighbor]->initialize( buffer.partRecvSize[iDim][iNeighbor], *vecSpecies[ispec]->particles ); } } - } // END of Send - + // Swap particles to other patch directly if it belongs to the same MPI + else { + int iOppositeNeighbor = ( iNeighbor+1 )%2; + SpeciesMPIbuffers &neighbor_buffer = ( *vecPatch )( neighbor_[iDim][iNeighbor]- vecPatch->refHindex_ )->vecSpecies[ispec]->MPI_buffer_; + swap( buffer.partSend[iDim][iNeighbor], neighbor_buffer.partRecv[iDim][iOppositeNeighbor] ); + } + } + } // END for iNeighbor } // END prepareParticles(... iDim) @@ -738,169 +702,135 @@ void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iD void Patch::exchParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, VectorPatch *vecPatch ) { - int n_part_send, n_part_recv; - - for( int iNeighbor=0 ; iNeighborMPI_buffer_.part_index_send[iDim][iNeighbor] ).size(); - if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) { - // Send particles - if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { - // Then send particles - int local_hindex = hindex - vecPatch->refHindex_; - int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); - vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &( vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ) ); - MPI_Isend( &( ( vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ).position( 0, 0 ) ), 1, vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ) ); - } - } // END of Send - - n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]; - if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) { - if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) { - // If MPI comm, receive particles in the recv buffer previously initialized. - vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ) ); - int local_hindex = neighbor_[iDim][( iNeighbor+1 )%2] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][( iNeighbor+1 )%2] ]; - int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); - MPI_Irecv( &( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( 0, 0 ) ), 1, vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][( iNeighbor+1 )%2], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ) ); - } - - } // END of Recv - - } // END for iNeighbor - + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + + for( int iNeighbor=0; iNeighborsize()<<" n_recv "<size()); + // Send + Particles &partSend = *buffer.partSend[iDim][iNeighbor]; + if( partSend.size() != 0 && is_a_MPI_neighbor( iDim, iNeighbor ) ) { + int local_hindex = hindex - vecPatch->refHindex_; + int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); + vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partSend ); + MPI_Isend( &partSend.position( 0, 0 ), 1, vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( buffer.srequest[iDim][iNeighbor] ) ); + } + + // Receive + int iOppositeNeighbor = ( iNeighbor+1 )%2; + Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor]; + if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) { + // MESSAGE(" patch "<typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partRecv ); + int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ]; + int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); + MPI_Irecv( &partRecv.position( 0, 0 ), 1, vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iOppositeNeighbor], tag, MPI_COMM_WORLD, &buffer.rrequest[iDim][iOppositeNeighbor] ); + } + + } + } // END exchParticles(... iDim) // --------------------------------------------------------------------------------------------------------------------- -// For direction iDim, finalize receive of particles, temporary store particles if diagonalParticles -// And store recv particles at their definitive place. -// Call Patch::cleanupSentParticles -// - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles) -// - smpi : used smpi->periods_ +// For direction iDim, wait receive of particles // --------------------------------------------------------------------------------------------------------------------- -void Patch::finalizeExchParticles( int ispec, int iDim ) +void Patch::waitExchParticles( int ispec, int iDim ) { - - int n_part_send, n_part_recv; - - /********************************************************************************/ - // Wait for end of communications over Particles - /********************************************************************************/ + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + for( int iNeighbor=0 ; iNeighborMPI_buffer_.part_index_send[iDim][iNeighbor].size(); - n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]; - - if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) { - if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { - MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) ); - MPI_Type_free( &( vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] ) ); - } + + int iOppositeNeighbor = ( iNeighbor+1 )%2; + Particles &partSend = *buffer.partSend[iDim][iNeighbor]; + Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor]; + + if( partSend.size() != 0 && is_a_MPI_neighbor( iDim, iNeighbor ) ) { + MPI_Wait( &buffer.srequest[iDim][iNeighbor], &sstat[iNeighbor] ); + MPI_Type_free( &vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] ); } - if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) { - if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) { - MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ), &( rstat[( iNeighbor+1 )%2] ) ); - MPI_Type_free( &( vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] ) ); - } + if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) { + MPI_Wait( &buffer.rrequest[iDim][iOppositeNeighbor], &rstat[iOppositeNeighbor] ); + MPI_Type_free( &vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] ); } } } void Patch::cornersParticles( int ispec, Params ¶ms, int iDim ) { - int ndim = params.nDim_field; - int idim, check; - - Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move ); - - int n_part_recv; - - /********************************************************************************/ - // Wait for end of communications over Particles - /********************************************************************************/ + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + + // No need to treat diag particles at last dimension + if( iDim == ndim-1 ) { + return; + } + for( int iNeighbor=0 ; iNeighborMPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]; - - if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) { - - // Treat diagonalParticles - if( iDim < ndim-1 ) { // No need to treat diag particles at last dimension. - if( params.geometry != "AMcylindrical" ) { - for( int iPart=n_part_recv-1 ; iPart>=0; iPart-- ) { - check = 0; - idim = iDim+1;//We check next dimension - while( check == 0 && idimMPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( idim, iPart ) < min_local_[idim] ) { - if( neighbor_[idim][0]!=MPI_PROC_NULL ) { //if neighbour exists - //... copy it at the back of the local particle vector ... - ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles ); - //...adjust particles->last_index or cell_keys ... - //vecSpecies[ispec]->addSpaceForOneParticle(); - //... and add its index to the particles to be sent later... - vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][0].push_back( cuParticles.size()-1 ); - } - //Remove it from receive buffer. - ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).eraseParticle( iPart ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]--; - check = 1; - } - //Other side of idim - else if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( idim, iPart ) >= max_local_[idim] ) { - if( neighbor_[idim][1]!=MPI_PROC_NULL ) { //if neighbour exists - ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles ); - //...adjust particles->last_index or cell_keys ... - //vecSpecies[ispec]->addSpaceForOneParticle(); - vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][1].push_back( cuParticles.size()-1 ); - } - ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).eraseParticle( iPart ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]--; - check = 1; - } - idim++; + + Particles &partRecv = *buffer.partRecv[iDim][iNeighbor]; + + vector> indices_corner_min( ndim-iDim-1 ); + vector> indices_corner_max( ndim-iDim-1 ); + vector indices_all_corners; + + if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL && partRecv.size() != 0 ) { + + // Find corner particles and store their indices + if( params.geometry != "AMcylindrical" ) { + + for( size_t iPart = 0; iPart < partRecv.size(); iPart++ ) { + for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) { + if( partRecv.position( otherDim, iPart ) < min_local_[otherDim] ) { + indices_corner_min[otherDim-iDim-1].push_back( iPart ); + indices_all_corners.push_back( iPart ); + break; + } else if( partRecv.position( otherDim, iPart ) >= max_local_[otherDim] ) { + indices_corner_max[otherDim-iDim-1].push_back( iPart ); + indices_all_corners.push_back( iPart ); + break; } } - } else { //In AM geometry - //In this case, iDim = 0 and idim = iDim + 1 = 1. We only have to check potential comms along R. - double r_min2, r_max2; - r_min2 = min_local_[1]*min_local_[1]; - r_max2 = max_local_[1]*max_local_[1]; - for( int iPart=n_part_recv-1 ; iPart>=0; iPart-- ) { - //MESSAGE("test particle diag r2 = " << (vecSpecies[ispec]->MPI_buffer_.partRecv[0][(iNeighbor+1)%2]).distance2ToAxis(iPart) << "rmin2 = " << r_min2 << " rmax2 = " << r_max2 ); - if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).distance2ToAxis( iPart ) < r_min2 ) { - if( neighbor_[1][0]!=MPI_PROC_NULL ) { //if neighbour exists - //... copy it at the back of the local particle vector ... - ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles ); - //...adjust particles->last_index or cell_keys ... - //vecSpecies[ispec]->addSpaceForOneParticle(); - //... and add its index to the particles to be sent later... - vecSpecies[ispec]->MPI_buffer_.part_index_send[1][0].push_back( cuParticles.size()-1 ); - //..without forgeting to add it to the list of particles to clean. - } - //Remove it from receive buffer. - ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).eraseParticle( iPart ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[0][( iNeighbor+1 )%2]--; - } - //Other side of idim - else if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).distance2ToAxis( iPart ) >= r_max2 ) { - if( neighbor_[1][1]!=MPI_PROC_NULL ) { //if neighbour exists - //MESSAGE("particle diag +R"); - ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles ); - //...adjust particles->last_index or cell_keys ... - //vecSpecies[ispec]->addSpaceForOneParticle(); - vecSpecies[ispec]->MPI_buffer_.part_index_send[1][1].push_back( cuParticles.size()-1 ); - } - ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).eraseParticle( iPart ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[0][( iNeighbor+1 )%2]--; - } + } + + } else { //In AM geometry + + //In this case, iDim = 0 and idim = iDim + 1 = 1. We only have to check potential comms along R. + double r_min2 = min_local_[1]*min_local_[1]; + double r_max2 = max_local_[1]*max_local_[1]; + + for( size_t iPart = 0; iPart < partRecv.size(); iPart++ ) { + if( partRecv.distance2ToAxis( iPart ) < r_min2 ) { + indices_corner_min[0].push_back( iPart ); + indices_all_corners.push_back( iPart ); + break; + } else if( partRecv.distance2ToAxis( iPart ) >= r_max2 ) { + indices_corner_max[0].push_back( iPart ); + indices_all_corners.push_back( iPart ); + break; } } - }//If not last dim for diagonal particles. + + } + + // Copy corner particles to the start or the end of the particles to be sent for the following dimension + for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) { + if( indices_corner_min[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][0] != MPI_PROC_NULL ) { + partRecv.copyParticles( indices_corner_min[otherDim-iDim-1], *buffer.partSend[otherDim][0], 0 ); + } + if( indices_corner_max[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][1] != MPI_PROC_NULL ) { + partRecv.copyParticles( indices_corner_max[otherDim-iDim-1], *buffer.partSend[otherDim][1], buffer.partSend[otherDim][1]->size() ); + } + } + + // Erase corner particles from the current recv array + if( indices_all_corners.size() > 0 ) { + partRecv.eraseParticles( indices_all_corners ); + } + } //If received something } //loop i Neighbor } @@ -925,22 +855,20 @@ void Patch::importAndSortParticles( int ispec, Params ¶ms ) void Patch::cleanParticlesOverhead( Params ¶ms ) { - int ndim = params.nDim_field; + for( unsigned int ispec=0 ; ispecparticles ); - - for( int idim = 0; idim < ndim; idim++ ) { + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + + for( size_t idim = 0; idim < params.nDim_field; idim++ ) { for( int iNeighbor=0 ; iNeighborMPI_buffer_.partRecv[idim][iNeighbor].clear(); - vecSpecies[ispec]->MPI_buffer_.partRecv[idim][iNeighbor].shrinkToFit( ); - vecSpecies[ispec]->MPI_buffer_.partSend[idim][iNeighbor].clear(); - vecSpecies[ispec]->MPI_buffer_.partSend[idim][iNeighbor].shrinkToFit( ); - vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor].clear(); - vector( vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor] ).swap( vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor] ); + buffer.partRecv[idim][iNeighbor]->clear(); + buffer.partRecv[idim][iNeighbor]->shrinkToFit( ); + buffer.partSend[idim][iNeighbor]->clear(); + buffer.partSend[idim][iNeighbor]->shrinkToFit( ); } } - - cuParticles.shrinkToFit( ); + + vecSpecies[ispec]->particles->shrinkToFit( ); } } diff --git a/src/Patch/Patch.h b/src/Patch/Patch.h index 6fc3f7578..ff5a76a5c 100755 --- a/src/Patch/Patch.h +++ b/src/Patch/Patch.h @@ -174,7 +174,7 @@ class Patch //! Clean the MPI buffers for communications void cleanMPIBuffers( int ispec, Params ¶ms ); //! manage Idx of particles per direction, - void initExchParticles( int ispec, Params ¶ms ); + void copyExchParticlesToBuffers( int ispec, Params ¶ms ); //! init comm nbr of particles void exchNbrOfParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iDim, VectorPatch *vecPatch ); //! finalize comm / nbr of particles, init exch / particles @@ -184,7 +184,7 @@ class Patch //! effective exchange of particles void exchParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iDim, VectorPatch *vecPatch ); //! finalize exch / particles - void finalizeExchParticles( int ispec, int iDim ); + void waitExchParticles( int ispec, int iDim ); //! Treat diagonalParticles void cornersParticles( int ispec, Params ¶ms, int iDim ); //! inject particles received in main data structure and particles sorting diff --git a/src/Patch/SyncVectorPatch.cpp b/src/Patch/SyncVectorPatch.cpp index 09817b201..675529113 100755 --- a/src/Patch/SyncVectorPatch.cpp +++ b/src/Patch/SyncVectorPatch.cpp @@ -24,26 +24,15 @@ template void SyncVectorPatch::exchangeAlongAllDirections,cField template void SyncVectorPatch::exchangeAlongAllDirectionsNoOMP( std::vector fields, VectorPatch &vecPatches, SmileiMPI *smpi ); template void SyncVectorPatch::exchangeAlongAllDirectionsNoOMP,cField>( std::vector fields, VectorPatch &vecPatches, SmileiMPI *smpi ); -void SyncVectorPatch::exchangeParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ) +void SyncVectorPatch::initExchParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ) { #pragma omp for schedule(runtime) for( unsigned int ipatch=0 ; ipatchextractParticles(); - vecPatches( ipatch )->initExchParticles( ispec, params ); - } - - // Init comm in direction 0 -#ifndef _NO_MPI_TM - #pragma omp for schedule(runtime) -#else - #pragma omp single -#endif - for( unsigned int ipatch=0 ; ipatchexchNbrOfParticles( smpi, ispec, params, 0, &vecPatches ); + vecPatches( ipatch )->copyExchParticlesToBuffers( ispec, params ); } + + // Start exchange along dimension 0 only + SyncVectorPatch::initExchParticlesAlongDimension( vecPatches, ispec, 0, params, smpi ); } // --------------------------------------------------------------------------------------------------------------------- @@ -52,24 +41,17 @@ void SyncVectorPatch::exchangeParticles( VectorPatch &vecPatches, int ispec, Par //! - the importation of the new particles in the particle property arrays //! - the sorting of particles // --------------------------------------------------------------------------------------------------------------------- -void SyncVectorPatch::finalizeAndSortParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ) +void SyncVectorPatch::finalizeExchParticlesAndSort( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ) { - SyncVectorPatch::finalizeExchangeParticles( vecPatches, ispec, 0, params, smpi ); - - // Per direction + // finish exchange along dimension 0 only + SyncVectorPatch::finalizeExchParticlesAlongDimension( vecPatches, ispec, 0, params, smpi ); + + // Other directions for( unsigned int iDim=1 ; iDimexchNbrOfParticles( smpi, ispec, params, iDim, &vecPatches ); - } - - SyncVectorPatch::finalizeExchangeParticles( vecPatches, ispec, iDim, params, smpi ); + SyncVectorPatch::initExchParticlesAlongDimension( vecPatches, ispec, iDim, params, smpi ); + SyncVectorPatch::finalizeExchParticlesAlongDimension( vecPatches, ispec, iDim, params, smpi ); } - + #pragma omp for schedule(runtime) for( unsigned int ipatch=0 ; ipatchimportAndSortParticles( ispec, params ); @@ -108,8 +90,20 @@ void SyncVectorPatch::finalizeAndSortParticles( VectorPatch &vecPatches, int isp } +void SyncVectorPatch::initExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ) +{ + // Exchange numbers of particles in direction 0 only +#ifndef _NO_MPI_TM + #pragma omp for schedule(runtime) +#else + #pragma omp single +#endif + for( unsigned int ipatch=0 ; ipatchexchNbrOfParticles( smpi, ispec, params, iDim, &vecPatches ); + } +} -void SyncVectorPatch::finalizeExchangeParticles( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ) +void SyncVectorPatch::finalizeExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ) { #ifndef _NO_MPI_TM #pragma omp for schedule(runtime) @@ -140,7 +134,7 @@ void SyncVectorPatch::finalizeExchangeParticles( VectorPatch &vecPatches, int is #pragma omp single #endif for( unsigned int ipatch=0 ; ipatchfinalizeExchParticles( ispec, iDim ); + vecPatches( ipatch )->waitExchParticles( ispec, iDim ); } #pragma omp for schedule(runtime) diff --git a/src/Patch/SyncVectorPatch.h b/src/Patch/SyncVectorPatch.h index 0ce868cae..0322c1283 100755 --- a/src/Patch/SyncVectorPatch.h +++ b/src/Patch/SyncVectorPatch.h @@ -17,9 +17,10 @@ class SyncVectorPatch public : //! Particles synchronization - static void exchangeParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ); - static void finalizeAndSortParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ); - static void finalizeExchangeParticles( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ); + static void initExchParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ); + static void finalizeExchParticlesAndSort( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ); + static void initExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ); + static void finalizeExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ); //! Densities synchronization static void sumRhoJ( Params ¶ms, VectorPatch &vecPatches, SmileiMPI *smpi ); diff --git a/src/Patch/VectorPatch.cpp b/src/Patch/VectorPatch.cpp index 0c2fbb036..22d976ba2 100755 --- a/src/Patch/VectorPatch.cpp +++ b/src/Patch/VectorPatch.cpp @@ -322,7 +322,7 @@ void VectorPatch::initialParticleSorting( Params ¶ms ) } // --------------------------------------------------------------------------------------------------------------------- -// For all patches, move particles (restartRhoJ(s), dynamics and exchangeParticles) +// For all patches, move particles (restartRhoJ(s), dynamics and initExchParticles) // --------------------------------------------------------------------------------------------------------------------- void VectorPatch::dynamics( Params ¶ms, SmileiMPI *smpi, @@ -402,7 +402,7 @@ void VectorPatch::dynamics( Params ¶ms, for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) { Species *spec = species( 0, ispec ); if ( (!params.Laser_Envelope_model) && (spec->isProj( time_dual, simWindow )) ){ - SyncVectorPatch::exchangeParticles( ( *this ), ispec, params, smpi ); // Included sortParticles + SyncVectorPatch::initExchParticles( ( *this ), ispec, params, smpi ); // Included sortParticles } // end condition on Species and on envelope model } // end loop on species //MESSAGE("exchange particles"); @@ -460,7 +460,7 @@ void VectorPatch::projectionForDiags( Params ¶ms, // --------------------------------------------------------------------------------------------------------------------- //! For all patches, exchange particles and sort them. // --------------------------------------------------------------------------------------------------------------------- -void VectorPatch::finalizeAndSortParticles( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, +void VectorPatch::finalizeExchParticlesAndSort( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, double time_dual, Timers &timers, int itime ) { timers.syncPart.restart(); @@ -471,7 +471,7 @@ void VectorPatch::finalizeAndSortParticles( Params ¶ms, SmileiMPI *smpi, Sim for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) { if( ( *this )( 0 )->vecSpecies[ispec]->isProj( time_dual, simWindow ) ) { - SyncVectorPatch::finalizeAndSortParticles( ( *this ), ispec, params, smpi ); // Included sortParticles + SyncVectorPatch::finalizeExchParticlesAndSort( ( *this ), ispec, params, smpi ); // Included sortParticles } } @@ -491,7 +491,7 @@ void VectorPatch::finalizeAndSortParticles( Params ¶ms, SmileiMPI *smpi, Sim timers.syncPart.update( params.printNow( itime ) ); -} // END finalizeAndSortParticles +} // END finalizeExchParticlesAndSort //! Perform the particles merging on all patches @@ -3030,7 +3030,7 @@ void VectorPatch::createPatches( Params ¶ms, SmileiMPI *smpi, SimWindow *sim // Set Index of the 1st patch of the vector yet on current MPI rank // Is this really necessary ? It should be done already ... - refHindex_ = ( *this )( 0 )->Hindex(); + setRefHindex(); // Current number of patch int nPatches_now = this->size() ; @@ -4645,7 +4645,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrents( Params ¶ms, timers.syncPart.restart(); for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) { if( ( *this )( 0 )->vecSpecies[ispec]->isProj( time_dual, simWindow ) ) { - SyncVectorPatch::exchangeParticles( ( *this ), ispec, params, smpi ); // Included sortParticles + SyncVectorPatch::initExchParticles( ( *this ), ispec, params, smpi ); // Included sortParticles } // end condition on species } // end loop on species timers.syncPart.update( params.printNow( itime ) ); @@ -5421,7 +5421,7 @@ void VectorPatch::dynamicsWithTasks( Params ¶ms, Species *spec_task = species( ipatch, ispec ); for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) { for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPartparticles->last_index[scell]; iPart++ ) { - if ( spec_task->particles->cell_keys[iPart] != -1 ) { + if ( spec_task->particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. spec_task->count[spec_task->particles->cell_keys[iPart]] ++; } @@ -5437,7 +5437,7 @@ void VectorPatch::dynamicsWithTasks( Params ¶ms, Species *spec_task = species( ipatch, ispec ); for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) { for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPartparticles->last_index[scell]; iPart++ ) { - if ( spec_task->particles->cell_keys[iPart] != -1 ) { + if ( spec_task->particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. spec_task->count[spec_task->particles->cell_keys[iPart]] ++; } @@ -5657,7 +5657,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrentsWithTasks( Params ¶m Species *spec_task = species( ipatch, ispec ); for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) { for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPartparticles->last_index[scell]; iPart++ ) { - if ( spec_task->particles->cell_keys[iPart] != -1 ) { + if ( spec_task->particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. spec_task->count[spec_task->particles->cell_keys[iPart]] ++; } @@ -5675,7 +5675,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrentsWithTasks( Params ¶m Species *spec_task = species( ipatch, ispec ); for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) { for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPartparticles->last_index[scell]; iPart++ ) { - if ( spec_task->particles->cell_keys[iPart] != -1 ) { + if ( spec_task->particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. spec_task->count[spec_task->particles->cell_keys[iPart]] ++; } diff --git a/src/Patch/VectorPatch.h b/src/Patch/VectorPatch.h index 35be9ee6b..ff1493813 100755 --- a/src/Patch/VectorPatch.h +++ b/src/Patch/VectorPatch.h @@ -138,7 +138,7 @@ public : //! Particle sorting for all patches. This is done at initialization time. void initialParticleSorting( Params ¶ms ); - //! For all patch, move particles (restartRhoJ(s), dynamics and exchangeParticles) + //! For all patch, move particles (restartRhoJ(s), dynamics and initExchParticles) void dynamics( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, @@ -157,7 +157,7 @@ public : Timers &timers, int itime ); //! For all patches, exchange particles and sort them. - void finalizeAndSortParticles( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, + void finalizeExchParticlesAndSort( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, double time_dual, Timers &timers, int itime ); void finalizeSyncAndBCFields( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, diff --git a/src/Smilei.cpp b/src/Smilei.cpp index 15cd7b047..0ab0db1a2 100755 --- a/src/Smilei.cpp +++ b/src/Smilei.cpp @@ -629,7 +629,7 @@ int main( int argc, char *argv[] ) #pragma omp parallel shared (time_dual,smpi,params, vecPatches, region, simWindow, checkpoint, itime) { // finalize particle exchanges and sort particles - vecPatches.finalizeAndSortParticles( params, &smpi, simWindow, + vecPatches.finalizeExchParticlesAndSort( params, &smpi, simWindow, time_dual, timers, itime ); // Particle merging diff --git a/src/SmileiMPI/AsyncMPIbuffers.cpp b/src/SmileiMPI/AsyncMPIbuffers.cpp index 0f7cebe9d..a5a53dbb0 100755 --- a/src/SmileiMPI/AsyncMPIbuffers.cpp +++ b/src/SmileiMPI/AsyncMPIbuffers.cpp @@ -66,6 +66,12 @@ SpeciesMPIbuffers::SpeciesMPIbuffers() SpeciesMPIbuffers::~SpeciesMPIbuffers() { + for( size_t i=0 ; i > partRecv; + std::vector< std::vector > partRecv; //! ndim vectors of 2 received packets of particles (1 per direction) - std::vector< std::vector > partSend; + std::vector< std::vector > partSend; - //! ndim vectors of 2 vectors of index particles to send (1 per direction) - //! - not sent - // - used to sort Species::indexes_of_particles_to_exchange built in Species::dynamics - std::vector< std::vector< std::vector > > part_index_send; //! ndim vectors of 2 numbers of particles to send (1 per direction) - std::vector< std::vector< unsigned int > > part_index_send_sz; + std::vector< std::vector< unsigned int > > partSendSize; //! ndim vectors of 2 numbers of particles to receive (1 per direction) - std::vector< std::vector< unsigned int > > part_index_recv_sz; + std::vector< std::vector< unsigned int > > partRecvSize; }; diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp index 37462566f..31ab4c1a5 100755 --- a/src/Species/Species.cpp +++ b/src/Species/Species.cpp @@ -378,11 +378,8 @@ void Species::initOperators( Params ¶ms, Patch *patch ) partBoundCond = new PartBoundCond( params, this, patch ); for( unsigned int iDim=0 ; iDim < nDim_field ; iDim++ ) { for( unsigned int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) { - MPI_buffer_.partRecv[iDim][iNeighbor].initialize( 0, ( *particles ) ); - MPI_buffer_.partSend[iDim][iNeighbor].initialize( 0, ( *particles ) ); - MPI_buffer_.part_index_send[iDim][iNeighbor].resize( 0 ); - MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0; - MPI_buffer_.part_index_send_sz[iDim][iNeighbor] = 0; + MPI_buffer_.partRecv[iDim][iNeighbor]->initialize( 0, ( *particles ) ); + MPI_buffer_.partSend[iDim][iNeighbor]->initialize( 0, ( *particles ) ); } } typePartSend.resize( nDim_field*2, MPI_DATATYPE_NULL ); @@ -1774,10 +1771,10 @@ void Species::sortParticles( Params ¶ms ) // Merge all MPI_buffer_.partRecv in particles_to_move for( int idim = 0; idim < params.nDim_field; idim++ ) { for( int iNeighbor = 0; iNeighbor < 2; iNeighbor++ ) { - int n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor]; - if( ( n_part_recv != 0 ) ) { + int n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size(); + if( n_part_recv != 0 ) { // insert n_part_recv in particles_to_move from 0 - MPI_buffer_.partRecv[idim][iNeighbor].copyParticles( 0, + MPI_buffer_.partRecv[idim][iNeighbor]->copyParticles( 0, n_part_recv, *particles_to_move, particles_to_move->size() ); @@ -1809,10 +1806,10 @@ void Species::sortParticles( Params ¶ms ) //Merge all MPI_buffer_.partRecv in particles_to_move // for( int idim = 0; idim < ndim; idim++ ) { // for( int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) { - // int n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor]; + // int n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size(); // if( ( n_part_recv!=0 ) ) { // // insert n_part_recv in particles_to_move from 0 - // //MPI_buffer_.partRecv[idim][iNeighbor].copyParticles( 0, n_part_recv, *particles_to_move, 0 ); + // //MPI_buffer_.partRecv[idim][iNeighbor]->copyParticles( 0, n_part_recv, *particles_to_move, 0 ); // total_number_part_recv += n_part_recv; // //particles->last_index[particles->last_index.size()-1] += n_part_recv; // //particles->cell_keys.resize(particles->cell_keys.size()+n_part_recv); @@ -1825,7 +1822,7 @@ void Species::sortParticles( Params ¶ms ) // Sort to adapt do cell_keys usage std::vector indexes_of_particles_to_exchange; for ( int ipart=0 ; ipart< (int)(getNbrOfParticles()) ; ipart++ ) { - if ( particles->cell_keys[ipart] == -1 ) { + if ( particles->cell_keys[ipart] < 0 ) { indexes_of_particles_to_exchange.push_back( ipart ); } } @@ -1900,15 +1897,15 @@ void Species::sortParticles( Params ¶ms ) //Evaluation of the necessary shift of all bins.2 //idim=0 - shift[1] += MPI_buffer_.part_index_recv_sz[0][0];//Particles coming from xmin all go to bin 0 and shift all the other bins. - shift[particles->last_index.size()] += MPI_buffer_.part_index_recv_sz[0][1];//Used only to count the total number of particles arrived. + shift[1] += MPI_buffer_.partRecv[0][0]->size();//Particles coming from xmin all go to bin 0 and shift all the other bins. + shift[particles->last_index.size()] += MPI_buffer_.partRecv[0][1]->size();//Used only to count the total number of particles arrived. //idim>0 for( idim = 1; idim < ndim; idim++ ) { for( int iNeighbor=0 ; iNeighborsize(); for( unsigned int j=0; j<( unsigned int )n_part_recv ; j++ ) { //We first evaluate how many particles arrive in each bin. - ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor].position( 0, j )-min_loc )/dbin ); //bin in which the particle goes. + ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor]->position( 0, j )-min_loc )/dbin ); //bin in which the particle goes. shift[ii+1]++; // It makes the next bins shift. } } @@ -1943,11 +1940,11 @@ void Species::sortParticles( Params ¶ms ) //Space has been made now to write the arriving particles into the correct bins //idim == 0 is the easy case, when particles arrive either in first or last bin. for( int iNeighbor=0 ; iNeighborsize(); //if ( (neighbor_[0][iNeighbor]!=MPI_PROC_NULL) && (n_part_recv!=0) ) { if( ( n_part_recv!=0 ) ) { ii = iNeighbor*( particles->last_index.size()-1 ); //0 if iNeighbor=0(particles coming from Xmin) and particles->last_index.size()-1 otherwise. - MPI_buffer_.partRecv[0][iNeighbor].overwriteParticle( 0, *particles, particles->last_index[ii], n_part_recv ); + MPI_buffer_.partRecv[0][iNeighbor]->overwriteParticle( 0, *particles, particles->last_index[ii], n_part_recv ); particles->last_index[ii] += n_part_recv ; } } @@ -1955,12 +1952,12 @@ void Species::sortParticles( Params ¶ms ) for( idim = 1; idim < ndim; idim++ ) { //if (idim!=iDim) continue; for( int iNeighbor=0 ; iNeighborsize(); //if ( (neighbor_[idim][iNeighbor]!=MPI_PROC_NULL) && (n_part_recv!=0) ) { if( ( n_part_recv!=0 ) ) { for( unsigned int j=0; j<( unsigned int )n_part_recv; j++ ) { - ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor].position( 0, j )-min_loc )/dbin ); //bin in which the particle goes. - MPI_buffer_.partRecv[idim][iNeighbor].overwriteParticle( j, *particles, particles->last_index[ii] ); + ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor]->position( 0, j )-min_loc )/dbin ); //bin in which the particle goes. + MPI_buffer_.partRecv[idim][iNeighbor]->overwriteParticle( j, *particles, particles->last_index[ii] ); particles->last_index[ii] ++ ; } } diff --git a/src/Species/SpeciesV.cpp b/src/Species/SpeciesV.cpp index 98d5d9dbb..89d12b340 100755 --- a/src/Species/SpeciesV.cpp +++ b/src/Species/SpeciesV.cpp @@ -518,7 +518,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec, nrj_lost_per_thd[tid] += mass_ * energy_lost; // for( iPart=particles->first_index[ipack*packsize_+scell] ; iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= length_[i]; @@ -552,7 +552,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec, // if( mass_>0 ) { // for( iPart=particles->first_index[ipack*packsize_+scell] ; iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -564,7 +564,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec, // } // for( iPart=particles->first_index[ipack*packsize_+scell] ; iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -1053,7 +1053,7 @@ void SpeciesV::dynamicsTasks( double time_dual, unsigned int ispec, if( mass_>0 ) { for( int scell = first_cell_of_bin[ibin] ; scell <= last_cell_of_bin[ibin] ; scell++ ) { for( int iPart=particles->first_index[ipack*packsize_+scell] ; ( int )iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -1067,7 +1067,7 @@ void SpeciesV::dynamicsTasks( double time_dual, unsigned int ispec, } else if( mass_==0 ) { for( int scell = first_cell_of_bin[ibin] ; scell <= last_cell_of_bin[ibin] ; scell++ ) { for( int iPart=particles->first_index[scell] ; ( int )iPartlast_index[scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles for( unsigned int i = 0 ; icell_keys[iPart] *= length[i]; @@ -1366,27 +1366,27 @@ void SpeciesV::sortParticles( Params ¶ms ) //Loop over just arrived particles to compute their cell keys and contribution to count for( unsigned int idim=0; idim < nDim_field ; idim++ ) { for( unsigned int ineighbor=0 ; ineighbor < 2 ; ineighbor++ ) { - buf_cell_keys[idim][ineighbor].resize( MPI_buffer_.part_index_recv_sz[idim][ineighbor] ); + buf_cell_keys[idim][ineighbor].resize( MPI_buffer_.partRecv[idim][ineighbor]->size() ); // #pragma omp simd - // for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) { + // for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) { // for( unsigned int ipos=0; ipos < nDim_field ; ipos++ ) { - // double X = ((this)->*(distance[ipos]))(&MPI_buffer_.partRecv[idim][ineighbor], ipos, ip); + // double X = ((this)->*(distance[ipos]))(MPI_buffer_.partRecv[idim][ineighbor], ipos, ip); // int IX = round( X * dx_inv_[ipos] ); // buf_cell_keys[idim][ineighbor][ip] = buf_cell_keys[idim][ineighbor][ip] * length_[ipos] + IX; // } // } // // not vectorizable because random access to count - // for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) { + // for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) { // count[buf_cell_keys[idim][ineighbor][ip]] ++; // } computeParticleCellKeys( params, - &MPI_buffer_.partRecv[idim][ineighbor], + MPI_buffer_.partRecv[idim][ineighbor], &buf_cell_keys[idim][ineighbor][0], &count[0], 0, - MPI_buffer_.part_index_recv_sz[idim][ineighbor] ); + MPI_buffer_.partRecv[idim][ineighbor]->size() ); } } @@ -1403,8 +1403,8 @@ void SpeciesV::sortParticles( Params ¶ms ) //Now proceed to the cycle sort - if( MPI_buffer_.partRecv[0][0].size() == 0 ) { - MPI_buffer_.partRecv[0][0].initialize( 0, *particles ); //Is this correct ? + if( MPI_buffer_.partRecv[0][0]->size() == 0 ) { + MPI_buffer_.partRecv[0][0]->initialize( 0, *particles ); //Is this correct ? } // Resize the particle vector @@ -1418,7 +1418,7 @@ void SpeciesV::sortParticles( Params ¶ms ) //Copy all particles from MPI buffers back to the writable particles via cycle sort pass. for( unsigned int idim=0; idim < nDim_field ; idim++ ) { for( unsigned int ineighbor=0 ; ineighbor < 2 ; ineighbor++ ) { - for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) { + for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) { cycle.resize( 1 ); cell_target = buf_cell_keys[idim][ineighbor][ip]; ip_dest = particles->first_index[cell_target]; @@ -1429,7 +1429,7 @@ void SpeciesV::sortParticles( Params ¶ms ) cycle[0] = ip_dest; cell_target = particles->cell_keys[ip_dest]; //As long as the particle is not erased, we can build up the cycle. - while( cell_target != -1 ) { + while( cell_target >= 0 ) { ip_dest = particles->first_index[cell_target]; while( particles->cell_keys[ip_dest] == cell_target ) { ip_dest++; @@ -1441,7 +1441,7 @@ void SpeciesV::sortParticles( Params ¶ms ) //Last target_cell is -1, the particle must be erased: particles->translateParticles( cycle ); //Eventually copy particle from the MPI buffer into the particle vector. - MPI_buffer_.partRecv[idim][ineighbor].overwriteParticle( ip, *particles, cycle[0] ); + MPI_buffer_.partRecv[idim][ineighbor]->overwriteParticle( ip, *particles, cycle[0] ); } } } @@ -1450,14 +1450,14 @@ void SpeciesV::sortParticles( Params ¶ms ) for( unsigned int ip=( unsigned int )particles->last_index.back(); ip < npart; ip++ ) { cell_target = particles->cell_keys[ip]; - if( cell_target == -1 ) { + if( cell_target < 0 ) { continue; } cycle.resize( 0 ); cycle.push_back( ip ); //As long as the particle is not erased, we can build up the cycle. - while( cell_target != -1 ) { + while( cell_target >= 0 ) { ip_dest = particles->first_index[cell_target]; @@ -1533,7 +1533,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, #pragma omp simd for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { //Compute cell_keys particles cell_keys[iPart] = std::round( position_x[iPart] * dx_inv_[0]) - min_loc_l ; cell_keys[iPart] *= length_[1]; @@ -1553,7 +1553,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, #pragma omp simd for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles cell_keys[iPart] = std::round(position_x[iPart] * dx_inv_[0] )- min_loc_x ; cell_keys[iPart] *= length_[1]; @@ -1573,7 +1573,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, #pragma omp simd for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles cell_keys[iPart] = std::round(position_x[iPart] * dx_inv_[0] )- min_loc_x ; cell_keys[iPart] *= length_[1]; @@ -1589,7 +1589,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, #pragma omp simd for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles cell_keys[iPart] = round(position_x[iPart] * dx_inv_[0] )- min_loc_x ; } @@ -1598,7 +1598,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, } for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { count[cell_keys[iPart]] ++; } } @@ -2526,7 +2526,7 @@ void SpeciesV::ponderomotiveUpdatePositionAndCurrentsTasks( double time_dual, un smpi->traceEventIfDiagTracing(diag_PartEventTracing, Tools::getOMPThreadNum(),0,11); for( int iPart=particles->first_index[scell] ; iPartlast_index[scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. for( int i = 0 ; i<( int )nDim_field; i++ ) { particles->cell_keys[iPart] *= length_[i]; diff --git a/src/Species/SpeciesVAdaptive.cpp b/src/Species/SpeciesVAdaptive.cpp index b24d86711..98813c71e 100755 --- a/src/Species/SpeciesVAdaptive.cpp +++ b/src/Species/SpeciesVAdaptive.cpp @@ -275,7 +275,7 @@ void SpeciesVAdaptive::scalarDynamics( double time_dual, unsigned int ispec, // if( mass_>0 ) { // // for( iPart=particles->first_index[scell] ; ( int )iPartlast_index[scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -289,7 +289,7 @@ void SpeciesVAdaptive::scalarDynamics( double time_dual, unsigned int ispec, // } else if( mass_==0 ) { // // for( iPart=particles->first_index[scell] ; ( int )iPartlast_index[scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -754,7 +754,7 @@ void SpeciesVAdaptive::scalarDynamicsTasks( double time_dual, unsigned int ispec if( mass_>0 ) { for( int iPart=particles->first_index[ipack*packsize_+scell] ; ( int )iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -768,7 +768,7 @@ void SpeciesVAdaptive::scalarDynamicsTasks( double time_dual, unsigned int ispec } else if( mass_==0 ) { for( int iPart=particles->first_index[scell] ; ( int )iPartlast_index[scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles for( unsigned int i = 0 ; icell_keys[iPart] *= length[i]; @@ -1662,7 +1662,7 @@ void SpeciesVAdaptive::scalarPonderomotiveUpdatePositionAndCurrentsTasks( double smpi->traceEventIfDiagTracing(diag_PartEventTracing, Tools::getOMPThreadNum(),0,11); for( int iPart=particles->first_index[first_cell_of_bin[ibin]] ; iPartlast_index[last_cell_of_bin[ibin]]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. for( int i = 0 ; i<( int )nDim_field; i++ ) { particles->cell_keys[iPart] *= length_[i]; diff --git a/src/Tools/Timers.cpp b/src/Tools/Timers.cpp index 0cd6dac0c..d3edda0e4 100755 --- a/src/Tools/Timers.cpp +++ b/src/Tools/Timers.cpp @@ -18,7 +18,7 @@ Timers::Timers( SmileiMPI *smpi ) : collisions( "Collisions" ), // Call to Collisions methods movWindow( "Mov window" ), // Moving Window loadBal( "Load balancing" ), // Load balancing - syncPart( "Sync Particles" ), // Call exchangeParticles (MPI & Patch sync) + syncPart( "Sync Particles" ), // Call initExchParticles (MPI & Patch sync) syncField( "Sync Fields" ), // Call sumRhoJ(s), exchangeB (MPI & Patch sync) syncDens( "Sync Densities" ), // If necessary the following timers can be reintroduced particleMerging( "Part Merging" ), // Particle merging From 50891ac16ef744f99d6d240606cfec7be65793d1 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Tue, 2 Apr 2024 17:00:55 +0200 Subject: [PATCH 02/28] repurpose extractParticles --- src/Particles/Particles.cpp | 17 ++++++++----- src/Particles/Particles.h | 2 +- src/Patch/Patch.cpp | 51 ++++++++----------------------------- src/Species/Species.cpp | 10 -------- src/Species/Species.h | 6 ----- 5 files changed, 22 insertions(+), 64 deletions(-) diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp index aa9b8a02c..d628e24d2 100755 --- a/src/Particles/Particles.cpp +++ b/src/Particles/Particles.cpp @@ -1303,14 +1303,17 @@ void Particles::copyFromDeviceToHost() ERROR( "Device only feature, should not have come here!" ); } -void Particles::extractParticles( Particles* particles_to_move ) +// Loop all particles and copy the outgoing ones to buffers +void Particles::extractParticles( const bool copy[], Particles* buffer[] ) { - particles_to_move->clear(); - // for ( int ipart=0 ; ipart> copy( 3 ); - copy[0] = { neighbor_[0][0] != MPI_PROC_NULL, neighbor_[0][1] != MPI_PROC_NULL }; - copy[1] = { neighbor_[1][0] != MPI_PROC_NULL, neighbor_[1][1] != MPI_PROC_NULL }; - if( params.nDim_field > 2 ) { - copy[2] = { neighbor_[2][0] != MPI_PROC_NULL, neighbor_[2][1] != MPI_PROC_NULL }; + bool copy[params.nDim_field*2]; + Particles* sendBuffer[params.nDim_field*2]; + for( size_t iDim = 0; iDim < params.nDim_field; iDim++ ) { + copy[2*iDim+0] = neighbor_[iDim][0] != MPI_PROC_NULL; + copy[2*iDim+1] = neighbor_[iDim][1] != MPI_PROC_NULL; + sendBuffer[2*iDim+0] = buffer.partSend[iDim][0]; + sendBuffer[2*iDim+1] = buffer.partSend[iDim][1]; } if( params.geometry == "AMcylindrical" ) { - copy[0][0] = copy[0][0] && ( Pcoordinates[0]!=0 || vecSpecies[ispec]->boundary_conditions_[0][0]=="periodic" ); - copy[0][1] = copy[0][1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" ); + copy[0] = copy[0] && ( Pcoordinates[0]!=0 || vecSpecies[ispec]->boundary_conditions_[0][0]=="periodic" ); + copy[1] = copy[1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" ); } - // Loop all particles and count the outgoing ones - for( size_t ipart = 0; ipart < part.size(); ipart++ ) { - if( part.cell_keys[ipart] < -1 ) { - if( part.cell_keys[ipart] == -2 ) { - if( copy[0][0] ) { - part.copyParticle( ipart, *buffer.partSend[0][0] ); - } - } else if( part.cell_keys[ipart] == -3 ) { - if( copy[0][1] ) { - part.copyParticle( ipart, *buffer.partSend[0][1] ); - } - } else if( part.cell_keys[ipart] == -4 ) { - if( copy[1][0] ) { - part.copyParticle( ipart, *buffer.partSend[1][0] ); - } - } else if( part.cell_keys[ipart] == -5 ) { - if( copy[1][1] ) { - part.copyParticle( ipart, *buffer.partSend[1][1] ); - } - } else if( part.cell_keys[ipart] == -6 ) { - if( copy[2][0] ) { - part.copyParticle( ipart, *buffer.partSend[2][0] ); - } - } else if( part.cell_keys[ipart] == -7 ) { - if( copy[2][1] ) { - part.copyParticle( ipart, *buffer.partSend[2][1] ); - } - } - } - } - + part.extractParticles( copy, sendBuffer ); + } // copyExchParticlesToBuffers(... iDim) @@ -706,7 +679,6 @@ void Patch::exchParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, Vecto for( int iNeighbor=0; iNeighborsize()<<" n_recv "<size()); // Send Particles &partSend = *buffer.partSend[iDim][iNeighbor]; if( partSend.size() != 0 && is_a_MPI_neighbor( iDim, iNeighbor ) ) { @@ -720,7 +692,6 @@ void Patch::exchParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, Vecto int iOppositeNeighbor = ( iNeighbor+1 )%2; Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor]; if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) { - // MESSAGE(" patch "<typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partRecv ); int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ]; int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp index 31ab4c1a5..0fb38f673 100755 --- a/src/Species/Species.cpp +++ b/src/Species/Species.cpp @@ -1744,16 +1744,6 @@ void Species::computeCharge( ElectroMagn *EMfields, bool old /*=false*/ ) }//END computeCharge -void Species::extractParticles() -{ - particles->extractParticles( particles_to_move ); -} - -// void Species::injectParticles( Params ¶ms ) -// { -// } - - // --------------------------------------------------------------------------------------------------------------------- //! Sort particles // --------------------------------------------------------------------------------------------------------------------- diff --git a/src/Species/Species.h b/src/Species/Species.h index 56c693d65..b91c9521b 100755 --- a/src/Species/Species.h +++ b/src/Species/Species.h @@ -482,12 +482,6 @@ class Species //! Method calculating the Particle charge on the grid (projection) virtual void computeCharge( ElectroMagn *EMfields, bool old=false ); - //! Method used to select particles which will change of patches - virtual void extractParticles(); - - //! Method used to integrate particles which come from another patches - // virtual void injectParticles( Params ¶ms ); - //! Method used to inject and sort particles virtual void sortParticles( Params ¶m ); From da1b17248adcb1ccb7171f8f31ad7b4faf853f53 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Tue, 2 Apr 2024 18:30:02 +0200 Subject: [PATCH 03/28] CI on particle_exchange --- .gitlab-ci.yml | 8 ++++++++ src/Particles/Particles.cpp | 2 +- src/Particles/Particles.h | 7 +++---- src/Patch/Patch.cpp | 2 +- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6faa6ff17..f50bfd819 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,6 +15,7 @@ install: stage: install only: - develop + - particle_exchange script: # Force workdir cleaning in case of retried @@ -33,6 +34,7 @@ compile_default: stage: compile_default only: - develop + - particle_exchange script: # Move in test dir @@ -44,6 +46,7 @@ runQuick: stage: run_quick only: - develop + - particle_exchange script: # Move in test dir @@ -55,6 +58,7 @@ run1D: stage: run_default only: - develop + - particle_exchange script: # Move in test dir @@ -67,6 +71,7 @@ run2D: stage: run_default only: - develop + - particle_exchange script: # Move in test dir @@ -81,6 +86,7 @@ run3D: stage: run_default only: - develop + - particle_exchange script: # Move in test dir @@ -96,6 +102,7 @@ runAM: stage: run_default only: - develop + - particle_exchange script: # Move in test dir @@ -108,6 +115,7 @@ runCollisions: stage: run_default only: - develop + - particle_exchange script: # Move in test dir diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp index d628e24d2..62f8b67af 100755 --- a/src/Particles/Particles.cpp +++ b/src/Particles/Particles.cpp @@ -1304,7 +1304,7 @@ void Particles::copyFromDeviceToHost() } // Loop all particles and copy the outgoing ones to buffers -void Particles::extractParticles( const bool copy[], Particles* buffer[] ) +void Particles::extractParticles( const size_t /* ndim */, const bool copy[], Particles* buffer[] ) { for( size_t ipart = 0; ipart < size(); ipart++ ) { if( cell_keys[ipart] < -1 ) { diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h index aa7fbbe9f..a155baf7a 100755 --- a/src/Particles/Particles.h +++ b/src/Particles/Particles.h @@ -473,10 +473,9 @@ class Particles // Accelerator specific virtual functions // ----------------------------------------------------------------------------- - //! Extract particles from the Particles object and put - //! them in the Particles object `particles_to_move` + //! Extract particles escaping the box to buffers // ----------------------------------------------------------------------------- - virtual void extractParticles( const bool copy[], Particles* buffer[] ); + virtual void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ); // ----------------------------------------------------------------------------- //! Erase particles leaving the patch object on device @@ -484,7 +483,7 @@ class Particles virtual int eraseLeavingParticles(); // ----------------------------------------------------------------------------- - //! Inject particles from particles_to_move object and put + //! Inject particles from particles_to_inject object and put //! them in the Particles object //! \param[in,out] particles_to_inject Particles object containing particles to inject virtual int injectParticles( Particles *particles_to_inject ); diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index c7e3ebd78..f0bb6a1fb 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -552,7 +552,7 @@ void Patch::copyExchParticlesToBuffers( int ispec, Params ¶ms ) copy[1] = copy[1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" ); } - part.extractParticles( copy, sendBuffer ); + part.extractParticles( params.nDim_field, copy, sendBuffer ); } // copyExchParticlesToBuffers(... iDim) From 593e96c545fa40d8811673639c087f3af3585e2a Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Wed, 3 Apr 2024 00:36:25 +0200 Subject: [PATCH 04/28] Fix in the new copyParticles --- src/Particles/Particles.cpp | 7 ++++--- src/Patch/Patch.cpp | 11 ++++------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp index 62f8b67af..b675ac12f 100755 --- a/src/Particles/Particles.cpp +++ b/src/Particles/Particles.cpp @@ -422,12 +422,13 @@ void Particles::copyParticles( vector indices, Particles &dest_parts, in { const size_t transfer_size = indices.size(); const size_t dest_new_size = dest_parts.size() + transfer_size; + const size_t displaced_size = dest_parts.size() - dest_id; for( unsigned int iprop=0 ; ipropresize( dest_new_size ); auto loc = dest_parts.double_prop_[iprop]->begin() + dest_id; - move_backward( loc, loc + transfer_size, dest_parts.double_prop_[iprop]->end() ); + move_backward( loc, loc + displaced_size, dest_parts.double_prop_[iprop]->end() ); // Copy data for( size_t i = 0; i < transfer_size; i++ ) { ( *dest_parts.double_prop_[iprop] )[dest_id+i] = ( *double_prop_[iprop] )[indices[i]]; @@ -438,7 +439,7 @@ void Particles::copyParticles( vector indices, Particles &dest_parts, in // Make space in dest array dest_parts.short_prop_[iprop]->resize( dest_new_size ); auto loc = dest_parts.short_prop_[iprop]->begin() + dest_id; - move_backward( loc, loc + transfer_size, dest_parts.short_prop_[iprop]->end() ); + move_backward( loc, loc + displaced_size, dest_parts.short_prop_[iprop]->end() ); // Copy data for( size_t i = 0; i < transfer_size; i++ ) { ( *dest_parts.short_prop_[iprop] )[dest_id+i] = ( *short_prop_[iprop] )[indices[i]]; @@ -449,7 +450,7 @@ void Particles::copyParticles( vector indices, Particles &dest_parts, in // Make space in dest array dest_parts.uint64_prop_[iprop]->resize( dest_new_size ); auto loc = dest_parts.uint64_prop_[iprop]->begin() + dest_id; - move_backward( loc, loc + transfer_size, dest_parts.uint64_prop_[iprop]->end() ); + move_backward( loc, loc + displaced_size, dest_parts.uint64_prop_[iprop]->end() ); // Copy data for( size_t i = 0; i < transfer_size; i++ ) { ( *dest_parts.uint64_prop_[iprop] )[dest_id+i] = ( *uint64_prop_[iprop] )[indices[i]]; diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index f0bb6a1fb..546e0ca08 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -634,8 +634,8 @@ void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iD Particles &partSend = *buffer.partSend[iDim][iNeighbor]; // Enabled periodicity - if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL && partSend.size() != 0 ) { - if( smpi->periods_[iDim]==1 ) { + if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL ) { + if( partSend.size() > 0 && smpi->periods_[iDim]==1 ) { if( iNeighbor == 0 && Pcoordinates[iDim] == 0 ) { for( size_t iPart=0; iPart < partSend.size(); iPart++ ) { if( partSend.position( iDim, iPart ) < 0. ) { @@ -651,17 +651,14 @@ void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iD } } } - } - - if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL ) { + // Initialize receive buffer with the appropriate size if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { if( buffer.partRecvSize[iDim][iNeighbor]!=0 ) { buffer.partRecv[iDim][iNeighbor]->initialize( buffer.partRecvSize[iDim][iNeighbor], *vecSpecies[ispec]->particles ); } - } // Swap particles to other patch directly if it belongs to the same MPI - else { + } else { int iOppositeNeighbor = ( iNeighbor+1 )%2; SpeciesMPIbuffers &neighbor_buffer = ( *vecPatch )( neighbor_[iDim][iNeighbor]- vecPatch->refHindex_ )->vecSpecies[ispec]->MPI_buffer_; swap( buffer.partSend[iDim][iNeighbor], neighbor_buffer.partRecv[iDim][iOppositeNeighbor] ); From f5659256bf7b8e0cf6371611c1caa7c6958b9a2b Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Wed, 3 Apr 2024 11:11:15 +0200 Subject: [PATCH 05/28] new reference --- .../references/tst2d_04_laser_wake.py.txt | Bin 51895 -> 51895 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/validation/references/tst2d_04_laser_wake.py.txt b/validation/references/tst2d_04_laser_wake.py.txt index 48d9eaecad05f679f5c1af836be621ca3b01a1e9..094e7c366dca0e242cf0f955e47a7c88c3b3a09d 100755 GIT binary patch delta 23828 zcmWJscRZC16i%hAlAXO}ME2&`GbLL_#{TR3DrA(lD4&W# zWi>QZ-~H?U?)}}rp7)&doacGYd%bAW^`cD$t;CI9wYPf?O2PU*qq-wxA?Vgn=2hmW ze`n~?-gZb8OeP|3`fM_S#y-_f&D8zK$GPvBTp0;#zfZhhWRJwJ9>QVMDzW(X+SU}d z(PVV8wzs=moQ!XTsxA9JCSvD8^>0dAJU*U?lJ?__MLDfr`G~}56uEWpK2Amu=+3q9 z^7N@-Y-21K`pfH&qj6lPY->Dlxv0Q*_g{PLA3s+9NmvK{+LJfDAk<;|i{0`gLiRA_ z@Xfa&>LB>`?~q?A_XFLyo70y!1q1s`io>NN5#XY}zudz$8XP>5UF9Ffz^gZ@4=lXn zVdzA2>Gp|4n0Yv`Vh}?lgZpP`(J!x(;d(n;MY~QcY{71eX|YHsDQGP%sUShV^Iz9b zTJ~^ekd_l|Z-f#;m#pr~t74M@<7ov(DXdZ1KRkY15IrNc&%ga<4ezCs=6UqZAc@~| z55pr<_@63mD3p>jop%vv0Zd^~9?!jp=& z3!U;U^O=|uC_7FXq2YJk|E5g~oQWLiuXT%JF@#IP0)OPspePfw8{PTi-fy(%7jGu;htHBrE{Y>F&!o(eTS%#}Lo z>EJVBUFXA*36)>!7Vh@Yz|$(AdU!1p7ANC2hs&nI$v&r3o-FZ@@MZa%0mos8Q2*sn zQ0@tAO3SrOI_g;Mv6&Dv6*a|lf5C5WUYp|K$S7Yce>0rw9ppW*)dnQi3RvIOo58x$ z82jB)Gl;V|HtDl!3g<%q{`bt@0A9>xoOQqHf^nB6k8b@Dfqj!sKdryVVei>`DZZ9e zTw(U9+7LuT=Piu4IG)fjfiXd1gf|l}^n05o8Kq-sJ8_NeO#>A>C(9>EtrXmoky*!m zEgpRi%jIR=^gQ88hDRiKePHM6_Q0C4}WEf2Vu9{As1vL zL61wRw&Je~*ee)Hj_Md-j?$;6maC>{qM0G;Oqk(-ee$der#TMPRSTBAv4&y`ug|~L z%%SVdE}=pNbI^C$Fzs{83<%-te?LBbYYffD>&{ir+T+spTh0N);i&idTuI7o9P0Ek z{a_5HV*S-WtNUNl@b|NolP@P}Xv$+#Co7bR8@(FJH&~|Q^Ztys=58uRDdzoa9-&}J zSkw=dfp{DqPYa#6Oh%!5w+}cd(I1hc_|?!U66RA{-fW$ABk*NyX4O)c5k^JbJZd9o z4CXZt!>pFvK#VQK%4nPf(dVCi;}DAg?*F`uR%^&Gd0a+(%Sb%OpVSeSXA&iku(+ z@!v!^Fzopv{$1e!c)ILZJ=0>07l-ELemyZm^NFXIgFczz22x|XT)#QKiQ?}uylf4+ zL}1RLYvxe>uwb`tjydqa!?VsM=3tg^A#Q2d6!;u=GWQi*;E%v}4ApL7s4BZvR(v!L z;X{5}z7m1(>2={Boht`@t8|l${^rIC~dVh(@BS3mBze5u}p~Zk3RZ(oCZD#LVbq~vf#uD=iGn|0aI684kS#*!GA># zF{MYtz*mPP8KGneS4!OyPkb^($-Gvl-)-h-_Q}%sMzT4IKOcUdYHNWH)qSIP>RQ8r zkW&PY>%2LXRZ+B^_$^?|1$|c=3kz_RHuOv~HHW{iI*mW>(!qsGrgfjrgrEe^a?)%= z9Exp;zdZhtK(!2=hUu&J9aTEG#g8L;v|F$rdPL!MB@P z!x~EcyUXwXGKWU@EJ>D27VzmiPp9Qm3#hyuy}nJ-0{BX(UvJ&v$1_TvQmW}e$f`E5 z<8M|R62mvh>=kxt$RE}Z(QH~6@9FU%f$HB;e;+1kT_Gla0n$jmF&mTBH`;3H6k)?Rq#2uaZKZ`DHF~(P< zSU1iQWAL4|;CH6DL-3LY_pLu90#=IFE|NJTKw7RWdht3LvJVKD^i9XZ?*WAYVu1o5 z)|070qg3!~uV1-ml@4)9#b&Q|XTo*Wkpl7@4FVEBe)Q?f0?~b3Q|AKGpk#St!ljFG z5Pk6Drks00P$ehJQTagt>ZFWa?YJzkHS~ud=d1-%`FVD2`H!%`>3y3|eLru3EYb{} z*~?bod9;0MF2w?FJ`vph`?dv4D8xK5@v#JEIU3odzyfsNa1;kRNkLF&(!zVu03_Wy zlv-s=hw@#}t-G~pC~wDWHL{Y0Q#q9e^)oco&JdLu)5t`g23GB0I+95{p0+0rP_b$G z=;StHmV&N9OqO5A;<2gHx8pz=84K?{i@x$J9HHu=rd2-)#R?R2HI&^^J%Xlxw8aQV zY~ILlY%qbwJb_J~749$_ENe2!699QW%wN9miU3zm;Z~_`GW?t5@htfg5AW<~8y_xF zK=|~))klx1;7}z(HFZpf%^&@W2qVo*P!g&?IR23a7ul6L%U7~sXVQ4Z>OdN}`x&jh z3X6lr@{I=%ga$xdAm@cJ6-m?^c2pDzx4`+$hm{tCEb*K=Z{b#1OBDFcpzig~0<~g0 zVItBBHav8=*FR_h4|^UR&wFJ7lpYhls_T{@6aI3%^``|q-Z4m&%>L5^bseF~*l0gg zV}H9raeEvprFQAV3`I)4Oj3&oifacK&dK$Yt3_T0bTFDZp_QbT329eDA6wFqlzerpp`0}vycQG&H%X<# z1*ewUHHA1>rU+c-Z1#hu3a73vS54ecyvUgITeiUK>oq5`otcv!>B!tm z;;8sc#jY0vDu>r7=py~ay81^v#`-th4SGq&qOt=s$I>J4Rjf_c;_d(}Kd8x5GwY6L zL!|c4$e3W0WMPZ4ixJfONsDq9xItBEasG!Y63D#uYnIs<0eO2&?K(=xu-E_Omb0Vr zP%d2B8-JHV0Qcqs@r*7i+aZfk%v1IGw@rzOdO6!~VL6 zW%gMhvlhcw*F%~nMAom5`(4v< z`(phJ<69~&AG;IFGeg1t!!DuH@8WTxyWF9*pN!qLohBnz5y)upE~bh2OhTffSnUqI zzt0AkpAwih!VA?I1J9+6Ved*()XrHqh~WHr+hLLf)3lJjYU>DC9<_=O=p=)H7W;+U z3-RzKd5D?(f&xMti)h++sSqXWeR{zy9bPpjT&vrY35^5HsuNQ*5S6?3Z<}E@Y{;#W z>F7;|Au-Yo!jT*coHrw@mk=YDHOzt9A`UobibezC+g$ktVnw!*y1FKt`CTjJTD zrni=Y=;Xs@(Wk%367HW*xvHpb0R>UZzITc&;htLHfdvtI-xq$j1%$GLh5ccOdV9 zbS%CUop`vB~2PO8^Levg@!;iGZCO2|h(M41Q#Z zr!9CQcj{Q+MS{gH&)U@j*erG^IXYUvo4->#(!wp_xmAN^yoD7!%{@`!)Mp7f%egP6 zge{;y;ZKfKtPSq0TOW-3L`Ff~Kc5<8vGAPAaU+GnW`26R>OfOnA^y3 zpGjrL^D$j!|2jXOGDm@-nV7I;dQtv0e5`W9Ass@GxcGgL&4j4FzJwo-X#_+{PwrFR zn+MQ&I%o9j_(QCW-I*p zF0jmg$pTNMb&5$iSzvuh<9OLsGq}EHC$XKba14T;ZX`4FU16^Z#C}Afvw%=#%P%OK$-EO zXeORKd&8lXUX()nwTb@Ibfq&VCH*|}4F#9~tFb&Z8;?`=JMZ*eAmboQg6s*I2rQYN z;J06=FO^nySsDiJIA5OXA(do|Rc0xks#}cVM%k$YY&q_r^{+Hj=MxDc&3s-I{0WDS zN;6;cMlxN72ey6bi-()qi!|Mr6tKQ+@*#MD3dc%_ReN94bm*^Sz5al%bUH3Du@@l? z`14;s@iL?H_he@lKP3a~h5~PqxT3*$-uB`lRYhP~^&2XFXpV`^E&{_!R>-Dj+f(9W zg%Qv9W$(Odfw`aMYwmq9$0g?<3Ge-I=}v=JY>klSNthqnSWfFYl+l@?rG#iB~WU%D{(>UX>Z@*{G>kv8Dbt4WCUt z_k2ZnGE{PfmUT?iF)sSrb%Ehd%wb5Wi1^gL_a$NYbfotxE!`Y{x4O4DeY3=N zJua#AVN0x!o8El8(gL?fXZ{%nb5vs$+tg@b3bQ?jILw&LV091GzxJ2~v^O15J58fQ znbYz;*U1vrUvGZDcgze5t_K|YqAv$OALc5UZwg15+vl{8(2*2Rmbot~n~j%y{$@>H zCuo@W^~j}Bu1su5%s-)Ql8z_d%g&9Rr{YUVhjxkA6x_RakxTn!JdVFUVfCbfjLYx8 zi_M!x;J_f;nYLLHCS3c;|pmcoH`ML27`FzbXbAc%Ph>9 zP_cTxqw^+Re#^5hI&f!$Uq-Ldb2^lA0eoWTox`E}*3(p8E-4hK94h1sHpg^lhf@*B zmRRZCrnSS)5-J3Q4&S^i3koL3z{u z*S8`qfoGygMIzb)JcL9xuhpBuRtNsIS4UJ~BG85R{Iy`b`!Q0>=0FBs_R~F+#+8j< zereuOsimRCF`c9F+cME4n60~xj%4oTrVVz*R6H;Gqa$sSg8ypIdeUCUW1G}Jg}Od6 zom0AqyZa7CAYXODxxsHF{LL=)PW*y9b_jBqYCJT?&afk#(mw&x77|<@_`88&i?7OP zHVOE-%52&P!eOHCVR=dc87l4vix_vu!^bEG_g%%cq0AXjet& zY6iqlUC&ysqrpJslW4v#S)i0k7>IPnWB{cqYs`l`1nPC#`mD!Q@aWnlR`*piYz%mQ znkHe1{|Q?Tez;Al~r%z(#UhvO%^CbTWuHyqp(fMJ9$cdR{s1~!%q>+D;j zn?lOo_RV=Tq?W3jEZC5VP1Mr&uNBg9^6b^h!bB+vLABns9@eU8%C| zuSh_A2t1QKC=dagmhKuqDJR3(u!{j(@5TdZSEQ==B?=UHh&%{Nph9xQ+(`|3QQlJz zR99Tg01vf|Z#SK%fpv+=uZI7!pl4P*kJlgrbos8PnT!R%{d9(uoo_XfX*_?5pU(_A z1;Y)RUs&KEn~}zc59ZiyGE3c7WKQ6f%md+H-k4%w#_{`u-08TdD6qvUiHajVCY~P02?`#vzs+&&NIah6 ze(ZMR4Bhi}6yVZq3&$wGr#`dCNqD_QVxXhf4VjO52Hm-3h;0g1SHvzFgYtxKCI5eJ zuq7Zn<@yo{3{Fjme36O(snk{v)$?SC72}MS!g$CwbR7~Zqd=0~=3lPiRLC5zh%}%V z<=axB9~X()447At9VSv~@ZUN8;0uFUkoQ=1-t%KRT&+6p!gtCK3K;cSbAxrT(J9lK zP&38I8bijLjuv=~o29(a-yHA%cC*R-X@*DocSZSqG{L}mBaTQZBVaphb(~yA*Abfc zj&WTy1zr2z2eH)d?Ft*C#)H4&yQgO(DKL6PjJep53XQTJ zLqq)OFy8P`q6@$oN#3OtA^nBnLfxTA%?bX@*RUx~^D$$2G6> zzchTauR%aMJPZ5uFK`?C(r{DYS_AAgIbVlZFE*CXB(OyBoY zVBtbMK9wDmFwZ8V^3f!j7yaQ_bFWGD@hcKKU;glc$HN_|LuRkzNya$CKQp{~f!;k| zhr%E9yTQAKEt|<1B#`~H;eK>$I9!q%Dot}BgHS5#?G?Xx*zMl^HH3~Nv8UjqMSvI; zA_tqVo3p0FFQ&&H&#z~|_e80{1KKp$Oq>ekOUnYytjxhJpVNS&;7FaipBt3U3n*Mv z*TVlSyp23ho1nJE>W)}JGjz{>BV(apj>WeUoUJ)cG08d1MaJG3%M$LNGnmka%hJpt zaFmumj5X z?NzhmPeU$V)^Z-xEPQStu(QyBPT#^CcK#~Nz}ozlaqgdK7~gL5uThqsj-5Dq;VMvY zb%sYrBq|DTymX;E zWZO~s6+?I_-ub2_-593CetsJ~W(@mzC{$%OQ;76*_?*pd1m5ynOKeNkAyfS%sldPv z|8jd;e|4u~XD<*KBuA8;Cr9NjZ~CSpct*# zQqb}8+{N&Sczm;KW7q`~GL9wBtR)PEW9$b7uDVMk4EX5b*%0Z5)m#lvHXJub)+XiA zM>HeYX2Z(MWaAE_{zDSiyh(7+Lm#EHF!V(3m~u9^ORRl5AnTVuOxLxE2x zerj)1rozu%I+{D^M&wZWnO#w5=#z5W?ur|KGvSEEY8OQ$3r>p$x~x2*!rYq^Q8Olc zVbozLx$WH^>?l%wd`;UJhnFRDjP{w}?4Hm^JKaoCVd=F(t%xzEIRE&T;S5L;BIcM2 zy0k#1yJ2=g!vKhD*N7o8x{a2mKlE~!F*sLFt^CY3hDcUzyZR3X(8l)qbcnVpNc4QJ z9MHAI?OJTZ2IeWqrG1{(`Im;eGqICeGnx3{?K0C|at1m!W;w0W`@hr8Y)kA^Dn4Sb zcrEEjL5({B*L>(jX-HgA62It3=Wm&OOa1w9?9g-UdU%zD7g*waCyU+C&V63+AfE{) zsFxU7Y%zq+tb$u3MQ-rXtC>Tej|7}E7M?&X9&@d?j>en)AK^qEW`G*_<{a=oCzyB42U_s3FTDDj;l zcdj{XNK!txRJ|LSqgKp>PXX@y$@LaFOt5s7QAO#iG3vgZifZyS!o%B+TtB9$k0amp z{nrcC;iy1S_A?zl&<%Z8&dO~F>)yWea!-ul&;GipZRY_l+f+74*69NqURDv9-c10< zq|AEg6JvyD^?krZLGnx0;S;xL_{OkO`N7ppd|$gqYWQFVGW*G@bhM_Sjo3o6`SDcr z=*n-r}b;MlCb=}XpMY@D~jYDJj|zK zgl{1};Je-v3uK@|#dL!iaD&tQ+xqP!}QJnGr>2DKso>j#;UMLkHldX4e3ml2q7?x@&#NFSP$a{GlMJlp*gm``D-F$vzD2?} zGZjx-ifmKgN*|Rc%C4=_GhbaZVdeyJGR~g(kon?JIJQe2EDc}v$DaoxHc>aZp^)DX z9&vReBu!sRsFXRHe0OJ~U@i&9i(`UV<-?)pT*$vWs$>{q;?D6liHGFR z|4J2xl0a&+>(RD~RIvGLPTa1!k_NAwYm*|lGr+py>v-I?ObE}`rMB*+!AMugNG@w4 zaP}pP_7xjJ(a-PC*{kHx>#fs7RIN4&Tj;M*YUw^7Z(i0|vjL88iC~^<)kh__pd&Y% zv~aAwW$`st8T73sXeazNVU26sNnMIA2h z(slofsknc)N-4{eB)s8UvV8k(9BS)MN zKjA$TkaHpl*kl#SWaU(##&F#Y&P@Y_kcDSc^xZL^LFPk=cP5yteqLw%l?gxmJj|^V za99-T@?C#>lp2X}?; zz9lW>f@-O#CrYp}*#2d~yedfES6JcJ)B%1bj}n~MfwOM}tN0%3!thA9{ooD_NG46R z9p&W%)8};_zJGL(l6FXHuSN`Z@4Fkc!7mf_tiv%@WXzTy-FK4+!%qp+rB9#yQ9OKQQvx5y-q+4sodh}iy(e-RQ{frkACEqIkSHRc^#1sN>F_(ivr>RA!+Uu)KW{F|gpZtK zHeBj4aAIV;dqoVuCAO})E=)W6<9kdH}zxp$<#gbyh@HYd`AN+Ok!G&}jW($Y$ zJ!xq0VoqOLG!^UI4PKwznuMqCU01E?i9=3K$)!)T(HO+@L4inC2*Vq7PoiG!B%!ot z&2`HtH$1MgZA8z^03{wLXtUom2I2UAqnkV2AnVq!m$(E8I{pgWX`x?ERHZapsXmAX z?)CGf`q^=i;cwL_)tCeaUH!zXmr`IZrE0`XF%2Yd#;Z^sq{G~Vm!$BG40s&%A193| z6O0wcq&+mS?7`Mkf)+Z+Z!okg za$21pOkDp`NYlXM4m%_-GwcG*ch3{nzV3$P*r*f2WoqDU`LyrEv>Ghuz1`{eRU10| z`jQUvD#Fbq{w`vM3mm<>k$ahLYC95cKSH)g;AYVXoiMQs9H{hSTkXie>Ag(RI-%+K zYPQ21)Y9lka5jlPNx}37s=qC_C*jA5ql>qvp_;UUB>lf_C*t>hT|E3v3Xc?|*dQ;;Hc3b#~-oyUzjZj)&*wYyX_3gS5 z?w!%FO?rN24|^PBSqgBsxg>#*{^mG`rzv1zb$I#9Z33=sY_>5xlMYKJoBpY#W&qDC zin4`T21IKQAJ^F#34@9+6yk`|8Bhutfc4!OUhY zT$4(6D$7?#qBh`<_A_x+eA-%h^v(kbNUYsv&g>u$r7!%p?0KmKFW*uFkDgZp+uy}1 z?#I>OR9LZXW4a=|uDX9^$n1~dl#DI=7dcaOyER^6BM^dA1;=--p6ST{VaZ3_AOq$9 zjc~Hl4a)1&-Qz*q=|#!c_vMOa3T|P$yE?xuiNH05ypu^s<4|4o;fs2^Xgr_U)Gbd7 zL(@_FgE>|Hs8X`M*0b3K9crTf8>P{WN}iw(3Kd51es;%)M?c*_Hw%xaX8D6)z2Vhi zr!ZhuT$%c?8Vz0gO}~C<#z9BL)C$3p1Pmws-e`K60>2xU_{oU`2rxe@E_Wf)p;`Tp zlbdS>Y;<_H{>3;Q0?W6JrBFg)Jx1n1)q(-Euo#b*&o3MD6HNGc%oma)O z)2|&aKaPUsprB;IY}up$e#2JuaZ-EebwetvA|tD?pz44F7|FyP#%6veQXl z6{y-cEcv5f5tN0?2So^3MRuV!nS%%kR)MY`L`T zb?sg{GDf|aHIYxpi_`8+`2>M+5uqXhf0FS^|L}U1R1$t`Vet#%i^K4175<;)qA_7b z;_a{6Fx)g+bz_I7KZg0d;1QZ}#;EFDce{Cwk^fHU9PzHz5ONRmy^i#EhkwD}t%FPa z!K+ZoX|6I9NOci4p%0@W>W2RrWv4jE-_+ULT$l)yF;g*~+7xgVd#xg6ML@iyEyfcs`@0E0q%U`+7kw8z<1U_KYDWzWM}=0sZlcq>nllS-7ad6%$;>@O93D8KwZwMGUH^XA^*bLWRREov#Jge-h6P4B2J+65aD zJVJ(c?uPp&2VXwT|r+W#Ja|6ZznA8`x?Z7Z~fY`Gj}3*6Va!M^Yr7>k$q-Wcjf3% z-nt+7xgrg8MbwhxTj?VcZ9h5$P(hb-DQ9}rALK_%zUK^@f{Vqx(bHcWA!^m*XVL^O z+FwzRh@zLBn6yYI@K$0C4l@p+33%1G5FNuxEOyz6a~)ob0iS9|q{T0SRd{t?A1pc|4`K>V&vd>NZd@&mZ!&_fYe9?)66ggs|-Z~Lpl3JX(_ay^ibx-~p z8v#C|6lUwRG?;7ryv0JC?g2ggr=Am@0(oxUIozqfptt8?z?Cb6IRuraGH<@M6ZnIz z6AWAVaLsMvv^=u{@@>1`x%Js@^scu%?A9TLdOWAxyUvK9S1zqf#dtde47?1uqQ4Vl z8mF%3WlF>2p1%(4APkMOW23>RC1KO7Bh_a0tD(c5vs-%E1u<}EjaaX<365_KXkpIx z!4zw=H=#sU5{ms4Uj9CshVwr!3;hyK!)*Jy{W&jFQPip6#_ZW-yh+M`qn?_8uQHyN zDmccX*kV+kr(QHZ_Bw7Bc1 zhRRi^t#*}Ec>JRL;bKG@G(Mj?m3Ax*G#^)Fr|MH6aC9gt$ioMOBFsY0-7|+4%TF#- z>C(X1Y3n@<0w}q%G-)m?fz(Zlg|1Vwc=?jjr8{ZFm2^?pZua zVtwAS4Zd&7EmoKl2U^rFe|dI3kkEY|@H=0O_qicGSL2L@*2#UxMHFy7Uhms* zmKAWVzBG?Basy=@(_bIxn~>qxub!V5!l7oT`(=`0G>mZhSqrCs8*{5xI zNU7n-Pu(@cv`a^HFMQ&Fy$x_puu1^t7#j7bD%e1`EBWi(AUB4uS4aPpQ9#1(OxZcL z!CJ%Dg!4q2<(Dq)Tjdl@~BFCVR8(#3<}FcUP^%T zJsi=NpD7^kBfS4_YYGhT7VmEsBS7Z3PL`|(LSAUaK{shZj&F-Mr0(y!H^0dg z=30gGOXB6A*YJ-Fj}r^Z_VOlQN#MpZ@ADE4J_Hvs(Rj>fmf7(LkfL0oNn+G;o`A=+o{@SpZLSv9T!iEt}oV7>7HP9gV zvg-H|E8N(#BJ}u?2Z|=%ANOO5MWt^ClTZCg#Vr2sD%{rzwCAt?$Lm5xqb(PAWfPE$ z&!)M*sZ*0MfxT>G$TS|mx^sV$_9kQQb8D4n4To_!VjttywP2*gf8sXn_Q8zyx>-Jc zC%O_kcl0-l8X9fwV7Q({znUHIWc&NV706nwOcjg%&@6IHSJpNHSg%fOr(e2tKz!m7N9jR!v@V&b7tS5;nb;vI^7|bN`V04NunCy% z;m^qT_^rWsza6;H0UnfW}^PJkxnVUH8sG+`Tn?zolT-p^sKa7ZP!{xYA1LZ7fCyoz;40M#db0b3RtOQTTeyH-}k07`NzNh)_J^ zgN{W+OGA&1E1JA1Es%FH!li23G1EeG7`~ME{aK_NC^`E+*30q(?arcIvITU-QE zKadP!r)_PjR$@UcOS`_C-scfXN^*h&$-vrGck|fW6wnX3K;iOB0jc-bL*6$=L;Mc` zR=%l&z;n8Jp)k}EG=sLR|MgUYVIurWi`$qaoGGbPJ5~M7uo4{;M89nS<6QpJw((zk zT%xu}OYC`IsJnf~)CU;>+<7ROJ9~*6|NQ>EZF|sC&!<1h=X~dW8crq3h$~Py;QZs- z?_9Js!$j-ZJ2W>rRD8X1x?=*+X**vXql*j9|GS;G|6v&R1v0RSt0f}w#pvwyL(&wC z+svxqES!kvDz{v#*d34dLi<|$BFNZL%M%|^6oy+<=Y@wd1M$_zTLW8c4x#yh8!6M` zme|Ht*eQQshJM?5t~U>DV8Nr6ba2fBn0Fo+n2`&D1?hj6-a#asJ6Gv(!8sbpE5a!| zhGQWnJZn;VV-f)pD&=Lvl@y>k3!QaOr9(GU7Nkb^1#@^=sEs}ZSZEel<=#qxa%=aP zo_EpUCqO-_p5g^8qNnZ%EA0c0yDwHAe$)fYp~Xj$fy%%k#H7oBVvr$t@=t~^GgL=0 z)kU`cFzkrXys+WVyy3mPcYhxCNnoKaW0uodLIMMr4Gm{#e5m_Qw7TM%0OI2(V@J|9 zpy{h7%abdjc=P7AGn*LqVBcnH$@Mu?d}#9Pm9w)8>iM%?c*PNdVayyM*X-g^k<;;M zc~JtstY~5Wdpr(3hjW#0oF?N9QK^S%BB7Z5`OMQ}OuktAhhg`|u>;s7i`pBBty*BJ z_`Lh@?}M;dA(oUoK!Uq#p{;7E;qda$vlEBtp3gp}e+E@*aUe{r*1m{I1mUK=-&gih zKxr%MzEz225X#)`y|ID*x()jt+Y`nF9OAgqog+m+)zckH?h0w(#uV@4w@Lt~)dZ)6 zN-{Lwwt42`7zKZ;Y4-_f22a3CB^%3*+Jd~6_e+VJ`mnj`!_%>6GH~XK&UEA^LE!$o z&b5?3-;*}~c<=b*cZRb6?d?8%c-&BG^uYPHi_*A3t@T8;;4YjQ{!yqfAcrB!?Ry>t ztKf#w2gK}eRh+!G_Ys4#A!aISO?y8z$9eBs8KZb7RR8jYxb#lV7uz1#sm0qx;a}3# z*|B5_KDP2Ovu{jAb*nm+*1M^Ajd#ZT`2J)xT0S;$;8g-%?z+CG`(zxQ4TFvpN=4)Q zy^+r%w+3O}`_&$yghNOU_`c{qb&7(7<7zH_dJ_93RwAUg*wfWILEgm~~EXjMEpG^^l@_x>Cb#sB>QNGD-tb7>W zWgQXuf(L_+ZM+z%DTGtr3LhU_*o5Q6VdL`3$)ASBt_iP}WQDO{`xx_)KLU6k_^(oG zrLl4@P|M)F5>6GBJ@1^-LUBdj$~HX{WPD%XJ~w2CZ{uPeBk2>;V3biNw>S{(9y~sO zF)kJbV$Jw?+0*c*%oly}|1!}}&9zc>G7F{4YKHD~Wn-iwdW0kKPb1(xf z^rIQK8mHlgXVjh7A z0ZCC$>vDDnVZDg_EdA9C40QZ=(CKq3R^i#1)VVa&5mbHckdT3rUmrWHwbPL0zeBGC zd9!iFV8qXaKL-t%){aq^a#6u$%kjKTd3c3sfFyVN2>y|6eLohMi`AYbO!Y)U$1FL;ib_n1-pcN&}f?`W5Rzd(0`%-8nvEI zN}{?{-#d|@b7HWEm40xbxKXIv#w{8i8%E{Ed-XXEjQ6 zpB)IJ^}0@z1)ZEWBoN8-05QXbQ}?dvfyBn+`pwIlAh2Wh=7NVN_*{Ozjq+6n_uoCP zw)GYhwh6@*Jl`k=;v|w;jFkq|3A(-8E@=v37p-n&GuXr42}kpD0d%3UEx0-Mv=1C{ z_{=hR!5`cbendQK@P~+RzsjE7@q+EIym%wHz2UUX58{CSR0xDV{L?*uFb2$IDJIS} z6o`r#lqri&2c69B;y!u^eMKeWOb~lEg!)|-KGBm4QO-#*_w|oL@9)EnyE^hA#HV%F z!SLg-XivW~6DxqJZBL71vkSm%cKo&Ug931wS7=p=J_-MQ3n&#haS~FKCR>Q%0*Eyu zex%g#6o9R;$6hzZ)^RBXKZOu9A# z>bA(M%4GV3-nlLH&o&+eT{h#r8qM}_u-Z-1C65m6`NHn=X@=n5`9`taO&#`>J1DyC zH9((DWgfd;7@@7}+s0p+M)-=zKe$7~-w5&Qn|~%Jby1|ttUWSY5npLELKg2XD9=55 zwP&v_e07;Svzf&MKCWtWn2QI%rF9+K-VI@J-nf+6hcgZY^g?<)6Vic`!nBwjng{Bm zM-q}N3L%}FYLmu&4!*e@jXBV90qod3j%b#ag3rp=fXI90K!|1Ot4ix#f$#3LE9`%+ z09vR|Yz?Y}^>5p+w#QXMV~*LN$mkXNv6u;4d29tlx`=OUT`q$HUg5h%&ZTf%xHHl@ z>jE5=GWV0_ItOXLO_Ga6P6Lb4{=bTh1<>VGxX;NX2WG$c77BP#fzlKZ>b4f1l4+f}?Ib8d$T`=LqdY`L50anOuI{ zd{tffx1kZ*xiYov%eO(DSY5{AhmJ^{&@}kR?1gGJeY{RiB)rzLY};cHjt4e3v)rTK zk}8>I-af{fNPqL`!Z#KAI({?QZM#+$J?)P4KUXH#kd0qObIA*&BXlLShonmH)2Gk9 z?9AwWT$Ti|mEPyZwZ;cetW$7YUe96U!$|Bc996kYPfkL=Fy32M3WQq23q?nqVqp97 zzhTa*L>M!R;QjnA8Ep1!jpUq5ft_b2xu5)|fX!%2p?z~K_-V9CO%XyN;P#^`Y$Mx2 zXkwRk*5S~EIU~Nl_x<*$e_(7-SNjkKe>R+>Z45vI5!e4tu|}e`$B%RtKQbm)vx_t@ zkg?O#=;2%GXf)aXBI7%w|hQRopzNs9?H|{Pd?;?PrP-*9-XvAqnw6U zLBSfB7B-MTY>WQclUMb5q;En9LmNKb*lVqi`t`y5?K}ri=FQQuXOlii>QG?}bx5ohW%Vw-vDRro!?nPtkJy`0>1P!2@z#YF+%>EVtxsE#qV7r<|JR1W3)NGb z^4gKx`%e$|&33#vGjQ#7PCEva&Pon{Z9`@C=ccFMw_@>8#`V?I7JAKAXu8ie;lhvU z;OK1)sJphS<mN1+%Gm(=vFJDkQGNuuj|yTl0` zAuoE(Rb``3wv<+}DS^E{Tb5NvWAI!nwPkfY5HBApa(=zd1tUfyZvK+f!;9m-iKBB8 zAoTd;q5NunI4@Y2d?U{g7{{7gUR^!_J5P5VpOW;1_c5xZSbu-enNS%Mp9q2>(uG4l zr$ZrG#Q%4RRS0Y$t_55EUGs$<8>cH6G8AFE|Iu%{gQ2K>!P+`MEggf71Z$oYEl`^y^to934m;9qNsp^=ZSw!0V-5Uz$-9DPvm? zHQ7#Ow<`hJ!A+68081@fA?5*ZfkTPfNbO^cv`$W^OH_O(5 z!)UHE!B+>r^k`|b<3uA2zrNA&x3n3&9z1!^A=?VUUnh15A83QDlq)1J)^?Z-|M0@> z?Ef8HcRZEv8&;HvP%0TEN>b^IN~O9rm01cQTauKW-6=a{WRtx^l#zYgWFF(l$T}P| zJG=Ou&+o72^FGh>&-;w`z3%(It}7R~wlbWjx%1%6s)6D4H+c}F6aA|>BM&+-=khnGOi@Us zBdBIUIbEB7AVBu!}qX?W(^%_&CNp7%4phA9MfFi|T5)Y$Uq-*LFQ@ z-1cLvvG*y~^{L65p3uko@y@%1ef`R~%tBj7&X$4j^TDo`dKMs1Pdvo1z3hHTNlp>xY{y zg;uH{S=r(%D`y>OybC;kYp@obfQ=Gb+ka{ zMxsC?Zwu7IR99476Lc-^kT-B>fN-vXs+PezkmX4Ua2cqE8)a56`GP7S>#3HJw`D1) z21OK9?a2rAXD=G)PJe^PZl=bJhd#q8$J;h3*|Cu8V;UT691K{*{coD`9voDJl_%A0 z;k7TJGcvVY8<=9+^|IA2qIYS@#+Z{Tj+u!c=kU`+hn=J2>!xbBv{lK|N2iTf&KQuo zcumpx!Jcf^Ejx@Sj=f~GcEKkmN+ixPZ*+FxqSe(2z#A!C+aETD;+L1H0mk}K=ts|@ z+}Iq8#;=~=3>%Hd9uIlbW%fjDB?yN4-tJDo<;0q&35{_Wvn^CS&><2ams zLYBVIFT>E2B6M1v!x=^|E6cvhUgjAUbt?CIYrY+!^@=G&z#)o#LL zv8;lTI5q;j-Cg~;x?dIJhZkg(wk$BJ@%U7zlq(Ke%+Qyf4Z!+Uub1~_qLK4b;X~>A zWQ=|5rt*mUD{4#T?VWc0j&kaRi&t3?+14~wo>x}lzPwiRF@-v8_uT8txYme@jDn%0 zdLsUb9qyW0X~VJke@yNA}I*bM|B>>^gL)p*qq8$JK<2ycngha?#i_g;yHl9Cp0B zY-$YkeatIye{5mrMc%nr-BB7@$f_$GwS!oIKF+DnpBYu4 z!-{c5mwg?IzEZsLGp89vc2lvrN*mVMe26>DOhS(?+V)7fpSaA}wEy{N7k=R`)xGhp z8|A$%jvh+w#wM;K*~A)Z|I7im9L}HU;ygC1kmTz%k`Q%;LzaZIx{_4Z+p4Eo zWhS#Ke6phBIJ{vFwYvwX)HfHXImc;Ff8Zl<2$+^tyGFu}X{YsZtt9ZASdQ7XmI z2J})Y?H(8a)BR!olBzwxS9jZxfq?|(4sJ4Xf%QkNM&hmj zgAlhuY!mZtVfyeLk+=GaQGGW0Z4IV!cx0kkb%)Hy+GOO`qNtZqV(?-bQ72m`824sW zoZUU;i8JZ#wfomq;4fTcd`Fe*g&w$@;AvuuD=LI*@sEpRHy>Ccd~cS2mL@%rDJ4`bFIip5ZL6E=tSv=qR^Ari>8gPeG_$q z7!hz%)23_~2N;)^FQklO>aep1pY8-Ms+_7RZ<@mJoMPqqwrQ$SkxZV7{w(?tPQ6xh zl9xsWB@n>l0+f9do?Y!3BOVzh>(XYqh9ahFuqGLn#am~c?0WIyd#2~bbe;Hcv0K1#S1Vp> z^mMgW_<`axd7+7OE5pN~s~mPel!GjP*)VJmAg1_#nt7s{Xcq2S*0KWiMU z(ZftdEa~%Q%>O!ef$tnuWa<;an!`ZFhP95Wm!3vIhJDptt<+eE&`&G8lbis>W=0{l z!HE#n&zO+S6%WnE*K7_NhCtxso9tvNIVJm^WVUu`G64;@ZJefTFTxU+rwii`Yf+*% z9~a_VF-5JFbDv8WzUG~ld$B%%HiIuKkGBrvCHtT0%}!%DdB*e`ZRaE!NxNP&44B64 ztip^79<%7bYlUt6)*PBfhG+QPp2JG(A(x~#v&b=_TCVLrjTT!^wv|^*qAph~N&6XL z3^S#K_Hbtpa5V@JXMG`Q{yK` zb&_!0Ad3I*eH|>Uu=%=&CJekqcXu7jjR)qLv$Ag(lc4BNXSK;z0^HJ@xY;}z3qNLP zHWbPt;Khq+L)O~?&}^vkJ(tjL3nzmEcrP&cLbTu4zR%Yp;aD@5wrYF|SS)Ynwz!lH z)6HvqwxY!lu+Z%|@~IMhn>n_m80+D0^EZR*0Yo_Q>2ZFEdjGi66Z>{M z^+Gt4kV$@MKUB(`IZhiq2q|WR5BUOzKtd?zM5h!P#?NGQ5$dzakioyhK6{J|k9%)I z;UXEj9B4V!=gGj%$8qYw92q`X9SI{Yk->ei$9A8>mRt!PT%Z*6CvO%~Xd@sd%xS#qv?y?|5;;V|HT~v~b z5@9!wSqCUS*mYGTg9z#la)Lvm^`Q9S(zr-*B~?}*DK)553?uKt9i-K>LHs@Zo@q^i zp#@jTcDqP8_|c&;U%?mD7k9*41Uuv4i;$)9Tu)>WojR7b%Ns{G;|c;!I%9&g4NpP9 zQxLrO-FNq&FnHx}uhvwQ3EJZXMnN0j5?Bz54;b242V?5F=U;EOLg{pKtHJednCWP( z{nR@M`7&!>biov`ms9UrOPPe``sv%#1GB(dp?W%-umIev`mW~&|G>Z(h314m%OJZD zXnrSV6$F`sAL=x&!GX)sM}rO5A?wJW!-LP)p_Np|Hr=`grwKRqiZms!f;kH_k!Nce z%KF`m6{r8e^SC$t`X3hHaH7v8-G8$XX)71pTQLbP`jj%IFBB;DD~=7C8-$BV$IWP- zb_2w-sTOg!!MNPI$Mzd_ARlM?SfjH99$MV3q-V_nK5-#wxi{hPo33Hj#nccYFP`2; zq7oem(HVC7f;C>)L|>P)8SIJjlm|0X?7qlVKN9c;?rGH}83m3Kt9jLGDh6|_My#w7k;g<2# z%+I53u&u%V?a_`OFyb<}#+_UOt1&-P);(juO;k8Rz9kW*RS#8M%O(_Jr;bXVw(o&8@XZ!O9&N}SowUyWMg^XtvmDv)U*GlAG$f)`o**V6^Q zWB16A&eVDq(rlA0Wl&GXGvdTQB)v%Nn)B0GpuGscE>wCordwgAm0}6!KR-;Vc<#^b z9fwhg55DUYj($bL9^+&^+hPnCypa?&R*f`Ru6Q7?1z)rT{+1_^&`7~7FKH4T;&vQ6EIf;|1`TG*ne(WmL%OIRwTQ!O{uwEXOL%2! zPRX=r83!tN6UePA$oRu{y~Ar2Wpd+P;p2?1%PMY{a)9r{0r~<~`LY#KlDjT{A33o=tYq z-G{!S(sf%1bdJNNkZp2=RX?oDyRu|jV1=jKbt~`g{|JPJ{bd%Zk?>4K(Eq+?0(h4^ zOlY@Eh2j&-g#sq&V9@g_ecwnL>}}IlwwH`P$7*>xOwnEm`HXke`f#fsm%|^i< zaCmytd&R6D&P~#4K0P%AqW7eH+K6Ph5$Z>dIygcVBxt*CUmu}j#+PTu>?u(Djm8wx zDDcJFH|%;8g^K?<>#B>*D8TO_*-E%W0S$o*wWItLa9AKEFrT5o-9p`svAYzwY#mB9 zilo4hk)H1NEb4rT!rc)i6yU038|(6=fL~Pgu2N+<$U68Qun5Q945KuHCpt=WT$Z!J$_$+=p5b+PiIuIyz~& zN?{HE4)^7S1?nzWL51u%x0+51{Qmt|dE`qc=<0C4bYLEUwiv$yHHsr}Pl?;{Uibve z*$f?@6rP2BC-(;Yda(e6UHcE%oLmAs4o)@3%oVVocoubVa1BIX6_#C!-XOs0J8c_v z(qAB|6htK0{DYaw`<~;})}tcuc24(S@O&~Iv=P4nYg)DceW9$uJ{!-~SGg;|v5mJ+ zT6hUUubk8JGg$yh87?!+3$wsdwCbu6GXV;v^kG%X)D3H8c^Az-07(VQ^L9C%P#jX~ zzVmqtuynpBe*vy4p!t?0Y8w3=#zoYaHK-@(a9;;)n7pGJ2iAQ4JOWY26nqi4a^QmT>oYCpCh8>VF`k2W;+Ye}1~u2k)GGGnc;% zKv$sUID5u`B9QMY9w`0O2X@V}4TjW-7pp%7Py{-`$XGdJtCk4P#>e*TV5)&P{ZV*b zB^TcQv2*$>r31(QvDndWW?@c1!y8lmQmQzjW2bCXB{~Iot0WW1x-YLEdozxeO7qX;bRH+=J4&`!S3x_zfm^( zx9ZOqgnS~u_RU!`G>8}MVB7l(v|1y2aAylJADMq0N}{GeZko-9Rk%S5Y441Ib~s#( zxN{+CgBl>#rARI>l0fa@HQVCv{lGpb|KaxAA^7%QsLt`&5XeTVFl>hY0#6xJna^=; z@O1J{-BeH!+!rNY{ZGyc^q=KCD3hLS`K*~n)1VjB#sLa~A?(9W|&$1&8 zci)atGtM-m9BNQsBC!(i@#+-JBoR4j*Cs%ln7I8z-YC>%no=~=hG6X43tGMSPGIIJ zyK_jd5^}uVZ>R-Ff`&3fM?7sXz^VFT-b*pmqJqufFNs_jlal=`ntkfEh1!FW0p#5r5VAI?O;)tKAs zMTt?M)4dfRijg3eLcY!D84ky`t-Bqc3xU74BdpCdLqPmfNY_=yP#_8<{u9Uxh4f0o z4%(?uxPGWyy^;FNnV}J7A`=R?TUJB}2go6?@GK-+I64H(vV`u~xr9J`o7Z7&w-6wi zaE9%V4S}8i_0+SHsDWJ%7PE7O0u<~e67)i;OZ#W)L3AkG-xSS!QyU5@ieEma_JzW` z|J=$;$3vkhl*qC^7Yc!vfx6y|kIZ%SUOd(3Rj~~;q%)+Kp>Yi~WC*xcw~xRmPQRnh zeg@8O6Q!fun1poV>blAJ2$b}X1wL9L0qbF*A5P{5puNc>arW#FXuc=mM5Y~rrIkBf zb5UdPK~_q1^UM^O=~+*o3HlAB?KT#+Otji4nX*SI?BIg#=+aa-fbXt8i=*8kL+@s1MG62{QGPk zN_unqDBtHnU2SLNazK8a0 z4fJw+iWgk-LndqGTdS(IuHx4T2=gfI$n9%o z{%rAZ@@M~e-WPXRtDiQqu4)9acY9P7PWOT`S?HFe>k!#Hqp+G=Jqd_naIEub`F*$qUdVhF*vd~B5x#VAAN3gO- zg#+gm2G{*ilb6=nWP1?a*mGsevS8{+>n&D>EMmY%H*HyYnss?YDtmOpH zY9QW}I>Rtl4wuQL0~H(jpvkbJ7gw1MhMcso&6#^wbR!C5<(1b31^&b+#sF znj1vF(Do?&90c>;j|GjS&0~QjX@ob(D;W&W1=BrpPJ?&#=dO%or9;0}Bw6W28qj#6 zczH?+X#L{hnjj>C{e#Mt{dI9b(ht;%oR0yKt*Q3UdtzWkPFv~xNHmC_R*nB99|OfM P2aTV%#r*#aS_%ITYJ~t> delta 23828 zcmWJscR1C56iy|Hh8-e%Mz#p&+N&fKvbpw%YhT>!a_y2tLnpbs!&Uw%Kes1S)x}Co%r;)1EtNLO82?=mEB~)jS_JEm=60Z`! z1Sl>&II!JK1twkv-*?|cfO{P(cQq1@peB#iYuREV{`fxfac*Y_PC5w%PAErU`E8bX z_GfV@tZRL^s~`^7c2$@@{t|=fGZnwdiBZ`2I#kk=GXn2Qx61{`gkjmEM^A7xn2NdY zXW!zrk!6P_11>#=X$8t106O1=tdVbc_O!yBp^jHK<8d%DLO z9;N>Ds0}>hw0*@uCMU!1+%i*UI3CE>Y= zYZT<>)EDg-i2<>}?nT{jY8>qQBDwebyEu5z%wF286#;|?ttJy9A#gFbvFLIs5kAui;iZ{frg5LLzh*WD7eWW`x@LOpCB?`OTW?L^r8cJM0GN)YI;YaJLoM*?q6_wwfQD3DBW)O~P^44d=ENdlKBFyzcy zrlm#$(sT1_caCImU9A4x)kcR?Cf*f&E6HH>E^>2_OahQQ?an!GW0ZVp;ioRgN!X#e zW|LFm45y_RZfw<3!y@<1R1w3yhB)me`1AdkArgW@JxrTOVm_a#Hf3ts>4wY4q?&L&-Oynh_Mio~;6eg)%`|Lj)Hmm`sUxkiGo zApt+Kx|eV8rQ`Be<_8?F=vc}UE%uZ*8J&Av4Pyy3y!w!Oll^@y1&gOjMv0ANbd5`{ z=Drz)Phw`ZPvwyCfZoxw!K}gfh;!~rU^5Y`FIlY@6+2+6us^#M7Xcr#3&rHT(1(uQ z!Fwc}9HCq4=+(AAMDQt?vkK4*h8OCT8~8Y(DZ2FS8`^zmtZnf_;5^}eny8oH;Xz&0$KmwSEyzKk2H45r&9{Lx` zgg^?r;*HYv?6fALlg2z10Sf z2U~wJ`%!S`t-ni0-qNxE)#BN=qjbz;v#6HYlZ^KrYfCnm(lGu`Qd4~^1*iEl|J6Sw zWA(|(*jBONo>WW%<}CSyX%0p_tIG}O3!fjIDtf~t<@ zyNQcr6iLqJ8u}Q850g!J74MTU|Acsh^Y2N1D;pjCH;2s+pEJ#?mmfT*!(JA0Z0S%P)8RH55Z;Mb>iDd7co)4?fnPsd5!6qs4$oawfp!r}7cN25n0 z!SaGlcu`g$Ea?%&gB497xWpmm%vVF4&uFy!-DHfHzLNoYr|itsFkEuJCTxt{rX~^kH69}sCG`wOeq=7eJ@=Xwxi*70n_J`0~9>d z+LwQfDGqP9eRp|28-?$!>@BX`A)%%@Rb+C+BN&&+`Uh?Zd1Kqx7nyN&j!2gI!msdB zA1!9d35NOv2>iL~?12uT{;KuNn+hU0T~>W%IT8e$Yj5eV6_8-l^UB#n-BCb|Vc)*c zM~301F5X-QC~IW2RgdY=!16iWn^Qa)X6ygc5t*QalK%HE3P~yOi~8dCl+ory*d->S z^|U<_=mpi@WzYN}Y|qs`TBR1$VZc~_pfSR%P=4MWV zY*#!Sr<-u*Oq-pD~8cy_&^GW;FOnE--qxFBy)hJk24^(7`h1%NO^K6ev8xHGa`M5vU6r zqpx0x1Z&qXo6;WnLWr6SN7*Ly3U7+!D$yBk|Xi?}M%ziRfyAL@SB0;qW+L{vHLc zru2<3=E=aH)?GF5k^)R6!W2VW8Zgay=2Hn8$zaJ-b7J@l9V)jgah5EmfPc(z=~8zh zbPx$E?*bzsrDWrYqXFK)>(5yjs4R{*pV%r02btgp){}~JzNXlu&YR03V~YIiOlmHZ zCRh=17e+$Nz^UKn(c>Nlom&U8GvArO!v_X@<+n}2{p8!>##IyGV(p<`p8lr+8#VWo zMTB``JI99&3OgcEgL?OO6$=e3H`rge%$tKmw<4H7g7Y;3I@ ziGn$Fcjg#m@cX`v>cB$^bT@~uOdX{Gaq^62tZFheRtLN^WmM8&>~w7jTPj3;mhagl zK?AB?!;KaBNDz$@xW-xU36JjC-MeS6fgh=#l4d*?Ot9+r{nfjjrnnxW`cpmB6b0P5 z{qh;#CvM#6w^}s?-Kh5;G&@ZoW{VtqA(JT_ko|n^GUIz`QgX?{K~vD`X#Hi$O@N`! z#{yNHZg_>_^Ze1h2z(dQcIK4}4G%1L-kViQMWrtLxu|(M@~stiG#Di#RpqGG)LADQ z(p!ierQawR^sHO?kZ8e_$Zu}GQQ=Z#NGHFzqg z9kJz}*pVqI1N0d<+n{uu0Igb*d%1HQprf+j)TeSH>|OS(m)aN%18RncTP~BJ!R_pp z3(un9A5&3#R2P{FDr`BTN%trqH}Loh-I@ktC$_R@GWe~1dgoE{6djVh*h2P6rNS$H zqSkl>4cu(=9&wIDKzzYs8R4iKkebI0R-*tHK7CU^l4Odhk(L4j%w{-{wC!VVy(y|~ zvKLA2FhPPk(+_($Q=qvE0y)bBmR66DYrUzapcj8k(B`Wt{8#ku!NXgo@R*~j$9<5nqm77N(IsW>MnkpI^+Wa^3TrU-`ZK`GGVSutynKbcDI~ntL{&@M= zo`&JyYo?e#P%z_SX9Uj_8K=CC2S`pv;m(c{o5sf^9C~-x;HgMaE8J$vQ zXW_%oQQ%DLWhK2KgTSvm`hhMA42!$weRiP1k%s7-)%%m7r=3-0WSkBm(l`HY*Gq-_ zY2{Ka?KHS1O1wk0B}Kqc3)%FKpd(CMKbX?VHNg4T%(gz?O;OA^Ra;ro3|Wi6H*Hxq z#fsmC59WPM@Cv(0ht4KbSi2ZsrEtImxI-5_I`d88hMLdO&%$PKEq~dPquUf}C?k!Y z!p2Z{Rk@{X+6L1XxCpa#;rMi$Lv-?I8gBl!i~4klkcys}Iwn5<(J{xKsW((P84tSt z_TfEBLzb$r%tx;%XpH;m%!6dS?qGN7%mAa5*L_=@>PgsnMu{}a6^y@5@qfI3kB9+# z-qCM%IpFf|E)jNV0mocIM_Juo`|ylbsBQJzSS%_>^ryrRruPV>aVj!2F5%1yj7$cyPn52s13>epco; zV2bRPJR!TaOi-B0dN|YE-URj=>{7I~HGw~W$9E+LnL?3KtwxlI862jZDYfe`g*$WU zZ^ng8z`5XWnnZ*JidX*WiTp}JLG8a^>m-x#jAFdTas5;@X`C&%`ks!f;vQ@gipfa$ z?-sejhK7ea>?D~#QZQZMw}LNY<~-fKa7V|yo{d$i8Omb#lKd(DLt2j-a|2YxbuG}V3RD;2Pz~>BW z6A7%gC{H=RjDmX~E)B=ekRfy>JaB;l$^@gA%4ck7(3f%C^OH<6P&zuIf4!trfh;j< zrF1A2zMtB2gIg;JhHdk zzx(D`txWN^Wvh2L1C)nnyY1gZ7()#0iKx7=3Fs^?iyb&@3Zh|yM=ExiL5l|S(~FGX zXYYjB%`1E<$hzPXneld;yu+v!d2FBBCBgK)@zt7 z3lhAs!pz4;|DGexM7Om#o-javQmL`W8bEdP6P?f0*ACE9r1j6Kj0i8X0vz9u2f?F< zkJ5dvl3*>a9hR%3z~Wa@zePV8)c4F;7%-N~D!KZfcZ_M!o!7igN+=l;Hl#k9eN2a7 zq0xp-7O9}zcswSY2fbZ+N7>UH;UA$mS?enitkm7#XNmGAq|d|vE6>oP&%unTX*Io z9sWB#_R7U5748h*P2ndeL2-}I10q)#q@K3C;-;bid%k$~7Q8UV&$o^X^eLL5G+4G> zb~nSKF{{+wRVFAfFIV~KyD|Pg_A7ewKSS`7?VUDlHv;~OhKJA9OrcNJlw=fP3J%oi z^0o*SGuUvs1*48v)fQ-`4pAY?d8im2a2U6J6NXYY5A>irv zAap%R8X(p&9K;HIr#U9~O(0C{52D!P@%Yb`@@Kup2OW zB_bXScht5gvh2PmNMVyEUyhSHfFmAfJiXc*kJdQ zu>t9DNznRC@kQ*!=7(h_*c+DocNmOOid|$=orxj1w7PK^Z8d^?If~bf(9S{G?xcZDM88PnE86^W6w$IAU^k1T2sDe$i*ccfpdUIR{-ZJEO*BP@{r6kpY4*v~gj6Vcf92XBmrBQEE%{VpoiN8J~0@xR*$kT{>l_I8IOM8tWFhjIf%1aW9VW! zGWHx&gmC1-optI#nymZJ{|TSMMZqYXjTi92Tfv=Ll> zB^Fln%>@3t`BH9|hzW3*Ngi4@GlD%lS{$o8HDGksy7t6=Z{(r6b4OV7C!z7RJ}s-2 z6bw>!wXV;kqs=AdvpE}*u`{mdy&xN z4T1U4dH+-%6A+Sn)?@j_7~*@mbPLuDp@Z*||2&r_1b5uj>TUJF*_Zt_%Fk#h)$n5P zt-cg&{hhpXZxkIBLJIc2TS&sd8P*4!+%)vP=F?ynOF`>L2F^~|R5GS(KjgTX5rveU zFCFfjXRK5d0WOWEAlwi*=sulIM9Xop?v{23bjxz{?R=nz)_$hfMXu<>xk2qR{{I}{ z1xsrD?Rg?_R*VRLmk5S;ag9zYmq=iY7qu{!geea%PGThPFyk;Lnfv4T2A-W7u z>Xq*Kb%i>e1P3H#`ltzX5Ie8qSJ;yR>n~MioxjlFc2)LqzH^>n%A~`V?x%&uHp%8x zRYOd!(PO@EYl1br+e&i1jIm_R!6JRt2${R3Lp{G3pw$e4BSeA#SwUvmq+)&0)qHfC z>xv-++qXY|&}9Tu9QIz@8jZm;gq=jK(Km$JNuxZm2@NP*3o@)$a={hfW5QAXG@MhZ z`+o9j3jR!>zBw04NBvLHl6+%HSRfPgdx49Fea5CLT<0iw^(0?qSt1#$Bu(C0l|^BG z@ZWc)jU;R!He4@#AB2^jyFSE!CSqRng-ekPdS|Ofzg5!L$9LU2)H)6a0$j8|%W~bw z0nRtk&3I1}VO!H_@rd>y*x8|^SmH#2$!_Vueo7SNX-*Da2qA-Rg$Qeb9tF-yJN5SR z(_rY<3*qM8B=}7}9oBZ74oB^W1}e)_AhO12>|hKHe%5)2$#%E`j~1tGY@HUm9LN_n zd}Dx{%Bl51g5k!Pp5nRmgvS_%o0KM$9gHw&M|v&OAAOvve+CQ|v6OWT4@~Ik(MG^JU(08hDsgM9aS9^n= zKX!n$wJn=TNkoulXL=IW7zDH8y+w&OB)A>I_HfZN3LJG>fA}*hNtMb!YvL_Jfsm*5 zw~g6o;KKUSY3Ozm_{2;296dmXPU^V;zQhzbOHc0E@+}cI7i3hcc{(s!tAP9!HBH=g z(v{$xXMkg-OS>WjjqrBjdnprnW8BsreaxKG5M7T49+$G#$M-Q$F6xfxfRGHUe~B&u zm#)urp|E&wY6#xGt}fFYc)ia~a+>6=CWCtme5zel{52bx8Fv zeOu^mS0=si{>8LEWVfR{Y63U%x80KD0#N|5{|LSBYxH&7UunNf7)ybnJ z92$k!Qr;~;)FWZUU)`?9aY5+tn#p)Jj7r314PWW?gATYc=zX!GJ^|zYXvio3(}(9A z^@VP?9ih{^tA%in2$D3zg7MKHV6t9QI+98PA^M&{Zib;4z@w0vp2{#Hwh#>~)hXaE zZMk&kdm@a#G%}LAl?17@AD{2P>vuZ0y@LAK2AW@ZCW{8D z;@sj{MWID~y!V{)%!W}q>6C!Qz>`M!=GWtE{nPq*<5?ev$2b8!Z@2C&chLrJ-|bm) zi+bQMdG~z-O&=QeuKw&ftq*lvWQr2IA&fZMd`sQQz=RIVWy>NpXg4@Z%+WoJf;`UV zKO8BzalL4jief4F^lYEB7AGA`Rx6G+QIqgx(bKHg-xBfr!;kKB8!7na0GY68Nyen& znJYoTQK%%hF|g2pgnnUDE783{_-ja>tNJPt6Q!J-YeO86d!X*shHQQ8&sTmnKqr9u zK{np47LGuU?iIV~N(6h!^kjSAAaH0Dr5ES>@%;1N!*;n|K<*$^aYG0|s_GFx;`lG5H(9`dDcH>u1t2z&(7_8P=S8nqb*j zJN;Q*7Yc6Nr1pvsV1};K`&L>XSgOVsS5p~aT-7{W^GO$$N8aW6A5eiG#ox-hwN24^ zKYO39aXe;gU83Jzr{ks15u=(@$+$YYu=Nlr343c&?3Nh(&$2h#5;2~DoGhhd;Asdo!KDtu&}-NqKb$ek0L!r3mmX}-zUKndMu>Fq=Ia2oq8}d{SIUpi%qq!bY_ykZ$hlw^@#bOR=R_ zW42LXV4tRrT{#1k=>f0YgOb4ALDGpfl?>F%A62N^{&=vaPAYJx8^g+B zrHk_w`*6pdMI)hefQrAlJ|KqyrY$flEB??&>WA^rTh0V5X34mHT0sY&|I+dLldA@z zqWP(>wGINoeX@j&TMvAkJ!WNJ5n$>>_4xKn0C&vGYQ?K{;J$Q`vcSYXDmeZp^(WvJ z!?hZ%>42?d-10`H@5}=_#%Y!*J+Df}=QsCD^qok;=wKP;mc~TXm;W4RoSlHdZLROB z_mlAyweAm5E(+a3GXBdJCLy79qg$eH5H5&J=-gl-qA06KrCg~!PX9Y`l240(VrTq9 z8?F<8+4=})HwV=bK5nTu`gDp2vHGz|`}Q$BowMcD9^NDfdKF{y%{B`9UUA=&pNR#- z-Z2aR`2;x1Iqh|@G7+4r-lqi{CV|_`V`1;XWRTWX)OnaghcTDAl|8u(H|>bQ$rD}r z5a)gkaj-vo+i2KWPNNF>a&9o9l!dB2ZW z#gLV!5lU8TF-bl{HNC{fVwAawYT?V-+#12Loa zJI6j9d?P;JwB=qj${E*|nRwIj?qxwbH#He4cedY)H%P)9_8oouQxcID(J@E$NKU{f z$Aq`5v5@iG)#95=4A0k4QiwHLl!V{tpOW9W1tEp+L{ZR^7c##L+Co{F&ud_8M4x}DUFBdAdCJ_)7fIcq#*iP zM*#swwN{39mdm2ob-R(!8wb!(U1x=SgMccsnJNACy4b@N%sSJkgVdwG8TW5#BGxy| zjZu`~x3L)gjF$$y;n{vxo2(5Rby8Bc>gWP_AN87shS)*Kyv#X!q4WT-t)0-67Z+e$ zv6Mc%bk)M|H+BE)3X8eu8v6H!B@=#STx z1k@E)F531g7B_lcUU>K+5_|WqC5C#Cu9>`1Y$(&`3t#s{TP-l6+fxG z3DQTolFnmgJ_I!iH zmq~?0=qp9=_P2hUGphm(_vIJ4HMD?(%jq)CYQd9v!E(MA+EDUet98#Vb?`km(v-!^ z2VS~0P9A@?&^*me;*fecCR%j)Zt!G)(dhisNzG&`-Vi#ih+3kl?qF z8IgeL#-i!#wz2p^q;hNEgGhX(L3#2;nS{?ppIP0f1~SGqW&Z0|FRbrh+*Hf$h+)T{ z_?&;Ihgm06f0hXuK)lS}hcl!0u-;rw^*>Am9*VAZYIqQMcc}|W^@PFI>;V3Xq$QP+`l<^)(~|LfGjFcPbU6hCJFM}rEYbD01`hVSLuJXD{b3=eqwEx6Re zL1^1f$I@^Bwf$AS`R-zvy4U%*(zpiRp{CvcYNdr0c?DBUs@hn6D2ct=TN5{#wYzg1 z)Wk8F`5AZ9eQ>`@E#01~s{};h^=ZWf4Y*Z!WJB(G4VdX_Yj%-82pM&gTeF#z;kW~M zz5T}zXVzx_zMs;=9kL1|kGn$gWJ=-TwZ};)Y(h3;T2I2u?rT?`7c$Q3>_ITRKM}cx zW^^R?Cg3SA-LbPQu{e15wn}ANB+9Lc&3~N^!+6P0^3*u_KvYS275Z*B5vdOj4kaDSHrusrK?5fvkX_NYK-10y-XkyvM@@;nR--d`%x zNsWX7E3-O@x>&e(+*7n-J|1Go4C7sj}KeC(?G-1$@&?_A>p@t1= zx4-Ao)sfF?m-sa%X}I-vD0=0`J~$R1dPb;N6}FoXcAS||h0y2^yFGs#fSiDi*b}@8 zpdG<~k2=K#m-JhadqE$Qt*H*b{C5Q7xxK+!fg+3{Q|ZFK)RKhzt+$401<+7vy2Th& z6EX8X=dHc3;<3F$<+tgMSiC)#HTQ5L66aL=#80<|VSS8ye`sAGejm3FeGp2-O^2mk z7`QoLS48{Ro1OX?bWPrC)07@Oq?X@$Uuh3-IQgmVC%xbwRqD^@{dWT)u}VAG@opIG zR-K*NzcUhgEd;omj>p1x?ah%kgYnRAb8_MPLn{1Ws<+TPPlHv1P5)FAlHm9#S;<5- z3G@{EvbA=Hz_8I<*{*9k&>F_!D4zVa?Qmxi*M^6xxbWSzqgq52Q?&2rOl@Y+SvJnD z_>>w_4ZZ#zcrB`e3-`;iI-iRHxpBKOtBo9FKK9(Q|E(enPf~ocE~$b7%VlN9Y*pBO zs=%_2rT}A)daw5y{nh(=)N-N^(_wug zt=tReba&oxu0M`+`OyEK(Fv%o=<`XwlmJIAaeNwBb%68&%uY!0f{!hFReg4W@XdH} z{L@kxxE#E-_DdrYYFow^soP>9DtZ0Rt+(+&s-EX3#ZV!X{dGafaVia#mH*l~*e8LS z-Q=I|`ZRF5xxGK08~_#zQqRjj>q7ir7%rJz(EHE+TG>xS1&r2Nj&r%Bf;qIY=Pea# zSj){awC}bmZjbGsz7`;lmG_1PG+pxtcNmO_}wf1 z>#+i;FBkU+Q)Luj_?&@|@!Cf0GC6dS`Jw@?X{01HEBN4BpN59t0f{(TxZra05e+|` zd^2qz$8f#hINmx%rQ(a=0AcUHaoEz?_orMU7ELR+dHV82BK=gU*EiWPWSSEDuy!L5 z6Kc!v?9%YU%Lj*e_Dmhat6Z{Ot-SiUtQ9yzoov*DRX4t|5HCkqH(NIMyX*yP;fi)M zWdV?IA-FQ&MHsB!^g6F(7YRRS@3hzF#z4-Xp$N~7c*qbPQXv#zb(Vtt( zzltXT`|jZbBf5z|zpSehw%Hes82$^dRMm%Ty;2*3yZ`9%_4xl+-6o3uUCC7LehpP* zOB|4iy{v>yQbOu7Ysz>^V&P*#r4-5urQhLmpFZJ{B1J$2gl@-hXP#%Ai@i^U^%{e`V|BC$B8_>Wcgbp*-=e)@biF$_m` zI0Yvf1>yBhj(`7>y>R&ZkRnCF9@)m+gF)R8dxfD~GC&iYpG)xUH?#)_uFZqdI$l73 zJgq!COGi~%_!oXv~^ofUVB(U|ZEF9yG0V*{)k9vs_KDDtiD({k|!jAh-e7=-SAM%&Iso!N^|d)qRHW%QBdevSH#cu#CmW#QG^6pw3m zOPPE3({QOs-u}&}g*58O2Gz|G-ka}UanOTwruDmD?i>eSncFqf z6<$DQFG~Jd7XUABjjz6$4u$=KZ%4jsMS_L~b)?2T23~kH9OJf%1F1ugpf=CdX0S8 zl`)bh$10C!7hCSKyxxa;4Tn!Uv`FBP@;S$Q=Y^3LMZc%4zXSHIzV*JYvl|X}jo;2p zmV_tm>oz+<2-aWqKl95IhwL{t6pN)FdYb$fwzTgQ#7Xf=k@jN-*w5nKzW^( z2T)UD@#?bB!t%33EMLF2XH6&({WYqOq`ghRYLA>d(--3KYv8H(Y6;QkpF3D2ZySNP z#zHfl4~8MLU$#-Gt3NLEYFU&CdSZZb+Qv(&jH6Q_T(~l!iO=mfSXFr%K`XOk)ng@l z_(NJ3{KG~B_qVwb>VE>EQA9eQIy4&!_6zVtVI~5eU!45%qA>;n@5r-X+!F_JOLi=W z%NQxP;gT0~!HG~fJbo_mbRv`ul%^)=kRjwnZ>X=6J7jAb`Coiw3~xoh9v`Pk0>`L@ zYaa+;e&774v5Xk53D4!)kIUeh`-)dP5A)&6yL(53V+HUaNsrppwLaE19!X>yYS<3@ zxzh{eXGG!38EG%MoqSNX@m<(xu@FSex3-hhw&0CuQDIkgQT#5u+CK2n2wTeXmHypw zL6)zlO(uP#@gPB~`-^EJ8f*RAdMkvA&rRQpy7nhv_e$H%ycRO{_cM#tEJdMsQ*XRH zDFT^XsejDBQbMtb%r%wu#}`}mDzb$pJ#cB?9m$!>W7xtS+mS6Sk0-Y2{On6HgDJtc z#*qXEXtOn3`^4CUcnW_wfBPH+A&QRIhpxB=6apzWUXhOpEAFKK;` z0(556GK2dC@yivt7#_dfI64RE$5KTxfhBQAuren;+vva~qt1a$dPi1`zHY~JCbb!> z+ADhhu4ZT#e&qm_LAWVcE&$7a>vYCT*#+>e`>Z;Zv9-RYgdU}0jIpFk+Cl=xCd}17KTbvt*NW!p#Aw{* zEqKxWzX*)lRn8Xu}7m8SY{c&(-fG)eU7p6(j0^4hj^~Um~gts|HQNb z2tCC4OE->F!Ee51Xi3}?;x?QiD{==y#)ik|-~I^&@ouqIAyPPaz7~=Sxf%`oS~$W? zzmZ{!yU>yK#&|H{EjUsyLWMiYTIH%wDUh)_(*KQSB>c(!EX%jW6`Y#xJ(}HQ2=p|e zQCUaR@fYeI={(1c zE8llpBm_RwV>`l1Omtb&V_ZXvA|CR*Ai515VhJ@?_X^YXmNBa*8|mH-W)|Wo7Q$RD86h<{$5IMihhPigYR!;!tsd z`==Tu7EQQ{pY|F?p<^)jS4meAZhK{}{JQoe?y9q5W?At=KAx}KhOO?H>RdC;$8U!g z+!wNbZ&O82j@wMPV@)9J(OvfSH}+s)!?v|_&I|Ulp4OJJ42G$wksSqB!(jhFY>(xZ zNN5}T)Y_yH3qHo~Ce#y*mifaf>YANS0!#{fin$OHKv?>9z^+#$`1zH+U6bkrrYn^$ ztv?OGU|S)|?34svg<`E&5=`K@ZJwj(#7-<9|F1?UeYkBkjr<%PXn@8a;+O}_6XL<$z3YNB+u#bbD# z;hf_KGUhvfG0XZKgAdBf%oIOFpr+jg%}FB?Cd*uOH`5M9mI9A7Ry9A=G$;&KIPZ=Z znyC%7Z5H-;BeW<-?l=K;CX-GZ<{CpvN9OYD5C`C!aDRC)#S?CI^Zz!W&%ZuztBtOzHl*Gd~4`o&=vYlZ5$il2IBS8b#B{@egTM*Mk}Q9s%e zezHo6?SHN(`fca`0+ zCVxyQ1~)fvxmmg|3YB+RHFyS-ur-$_$~!+0ld5KgdXs&yrr|;N77I75czrv5LevyJ zcb&c~_e2VkybIbh(E{>$8;K`YoPhSvx_e627dGzycXbj%Ao_ZlQ{k~N*t|nHepg=v z1ZSp70*s`1))I$h~b9*OagN#aGKZ~$iRxG;Ens|T7zZ``-B|0uqj!vhf8HU&qkucb>EC!i7;eX zS+7G_e34SJ)V3$u4O8=$=iG~r;;!q3=OQh+fV%x4G0gl3%xp;r^}fM4WY}GB6Sne# zepBkB(+^k!;Dfe$al*$y@Z){o_Im0h%muuobQOfd`kLX7JflcRJE3;-JtL~|WXRUk z?shb2_P{2Vh;V2=F_3GmNra-inH`!uhruj{H=Cbb0rGp-rth_K!6|+I(d}$}`0ufK zaL5}TOiI{zB}8KnzEYI`^1N^p4pYbUOUg!9^*$Vnem5^8gc8jCtQmg=&{2)QihM(o z5ykY;)V-vL?h(a9cgHnxk&m~m>7W5_{*>c5(|Z`3lOt?H7#*_q8M9P+fe(g1czNkc zWCT8nHsa&mnTVcB-*rU)OU4sg_GK!gDac!1+1q(96;+6qJqOv+k?PPND7#HL1GQ-v zW2~R2V~Us-;cZbmUVYX@cYdFaX=%3ZQcG!!i{^%nOpJg+M}7D$FBLjkDE-)&hDn&D z7{<(^pNQ`pDZ6jV#-hRzCb@uZ;kczR;+Bp$Lwr8mq4~-EI2MfL{jHcGz&;}`CgQL& zX!-{3dUwW$3R`vNZcj83A-T?uDo|+!dyWo|wkS#f=kWB+Z(oG*4(Y#;=sH2npWzyA z?%RWQ*Y>t-a^b;H-#rIE{rJ!pycVOYg)fC4s}+a9Z(bo6MRuU4c0|;O_a?BG zu8{jG&jlZ31qt^U%4fs;zNR-1Mc`(vO8kN*RT9#_{HqbavZ&W2TJkM`tcrH2W$yAE zHb_1@Se>@d7e|HUrWsE&aG%+~6L#McFs${$RKiRm)(NYO*+erc@a?6|N;4hVXWZTi z@}^>v?o&?#{xnqEzH*u}pN@_GTe34ZWupI|$3)q@42)1|{Mhf4jugMkTWhE{Qt^RP zYtEaKbc~leyzg)h4b5B5f0AW5mjhSqe%w3|ibN@v9O*soNU)V>j{a{O%p^a)NvZLK ztNXPn9-SdTIMdU{#t0ldSr=@za0mlBAv8VwyElw--kLD4bA}wprz%$AcA$n!p?j?k z1OJ5iAFW#^;IpzJni@9?P}`b2{^;gGuw%;BsbA25??Th}KRaoFPx;Vx@((Gzvg3>@ z%Y&_WO)4^HXrl-t3qmvsH&X|doeq;b#0_ETn%SLHCTn;xXls1Y+aA36{OTj}+~Jqw zw{1OzUa*<=D|n#R3%<>)6~FFufkH`F-e7K5nAHD8J*qR#h~0eo+d6xKk@=J*8yu@7 z!`09psp2pi{5jWJ(7`aFMGb?``|eByLTHuHnYMJ0w2BRXqLT$aYbWcZTTX$8SEKZa zplql!Vx-JOa)6(0uplBe2b4yK$0VQUfWv3`M#ZqRAiC;ZByi>|I8a6#sC_x0Y(V`L zU(J&P$$Oj*IVfa9-u7I+_=Zd%{`~!N+pQEhCpXupFhzlTH^!$q4@SbbEmf~2Zv;aF zo1BVNvKOeOZmD^_@dV^=*FU6QZw*-EAbvd41cq0sj>n1&2er zNZ3;BB>jef*-jto){+VMj!HeTOWlirx8vXcGdQb_&Jjkf zaizHsEkLnIx^@n0eeYADR3MY-QKuR3kYg!@ zK$S7}l;pYyM}Hf{6$s~nkKU1W1?C)ZKbC7{XOISV-#l^!Tqpo7fiqcKNN|QQbnLDy z9-yMXKlAOSe>%K>PT(of)}<(AJ@?R$LfO0<{i9tc0^okSg>r<4Z`e=_1hjXvZOyu zk{_OCi@_z$!k@~Fb^Lg!!w$_9hTHA$b+K5sHWfAZrjtGsGjQE~KT(Cj(?3Hlhm9CK zE{XtPVes6sSO3JBKV)2YIB2u+MF>OrJXgNPa87vpm>(@D`hdZ)!u$-oaFBWXuaC1l z1}Ngeyx%6{z$l(2gmWey{>vKWe)XFS$qNm+*7Xtaa;ZULoVv#!+C9qyH?p6Ab~edl zS{xdXXwKL1@v${_8TI#QA8^C1LwYmxjovu9%l^M}Y$2H9^ozF5lZ399b_&;jCSj2$ z;l&5ZF!VX}D)uXX5c11=1Vr|DV{*^q!w1S;aZ9$=<2W}vYQZR@pfPdi5T;7HxqJBQ45@ME2Zfl$gxwe?97s;q}VQK0G1#XKu(N zxjqIL;|OQIDkP(uUc8c%*eUd!xGo~Pb`DPnmBr^Cxs3N0PMxTET#V1UPh^d3F2_pd z^u}J18>oEn>U(poTD-X8GdQrb9wV1Ssb3xxHeh^@HZ%@4qKK+JRsX*x6#ZH;zA3XA zCEWhDao=yoCzIVb$I_Znt$bR%??)51GISa}7AP}bgth1Qw71C@;P|D=jz2beSSC)rce`2a435Uk zxy+QOVr`0qW`Q9Uue)qnP0?Te1S8d(9JzB z0O0lrkkL0R3pzPwk!n)VO4BekzfMH&6R%oE6H!!u$Jm z9eOU}EW!G&ic2xRvZZnkER>^-ojms1aRXW{1ZfZN`7X zT$`IqTTnj1Oy5WBHg+VPQuw#ff@c$Nr`+ObLF%#Y_K0csCUo(>U3BkzJ@O;DpT(^f z^NO3Uo;X{H5!>GrzQvSaZix!Vea=gmy-J!;<;%uY?i9VP#R>SQc@j5!dtwC77e4(i zOW2&Z?aNxGFANTyyO(4U3X6gadBa%SKco|9CXa*x+XY(}xeFmM?woIQ#mS%Xh_v_3 z*47v=kls+0YLx2)>@L2{2V{A%w;pAsKX^vuYk`J_VB7ti|M}&Vh+4(mN~*V7Uh(9@H1X858EaHon)PaBt%6dYMW% z`8@p?m9HA^=+G0VhpBbIId-RIy{H}{pS}9XA=3ylOC!7f@8G(_seIpfNy!Q&N$N{O zQYlfi+@!LKQW7FNvt%ndB2fq(TlNl-opsxL9Ghf2_TD4;o$L44^Iq@s{PAAzv+n!; zjH3@TfWt0YM~yxc-W!f~-1g4|pY3F)Y0fN=S<%xMx6c9zjj%rr$yu-?L*UiX?^)o? zWN>2iLl#^~+@O9fmj%)-*T0;Z%7lh~POan2nb2`@c}1o=9ex}acKLEG4QAry6Q~Kw zsX)8CS#L%l31}x6$8)sez;bxt@{#OFaF`k~R{i@8KF+TCF;)12@qWSI_XwU)=lv)| zc*Yr~1zFb{7~jD=nxb=iILzP(XOzR5H$^dRA7y8#_!y$hyhlV-RdCMW68#@4ZM562 z8m;Tr#Tz{;a;B$s@X=hm6yZSc6C^S3sv1m}g~BU-&KBC{uu)4oOuOw2y1xINai*|z zJ$CJ$GxQ3Bn-3*af82|M!|&%A=Hyf1g@(_<#CRqc@IRzpzLO7jO6oVwz7)fWV+|I3 zt7Y(5$^0iXM-_ND`dq%H12ERG6C{1}8exZ4*y@4SM!3Hb z!&ArA2)jimJ3}MuL3ClaoSt1R$a40THTGA**JJU-4}Il8TWo1^+_wbsbeSa^{tn#l?%2eqN5BCOQ$IgrKQPYc+?pnTf{v&BPbO4t z;mubpjyfu%i<$5#b}w}-*)uY>VXA`Xc9b3Mq1M2)b9!x^ zT&BqXa9=v}_8VMl95tb{{($ta6x%pP-BHr+$Sy5S%1Ic1gz8aU0Nyf5AR6n0qR?KZ zCv^=Glv1vNMBqpi+I*5TU15zuGU051=bf%-6i=wojjoHtqk95`ee6Q;%4Erjqs!jt z#n(^HH8R6j@Wr)QJC?+pO1l}o2pUq`}Qy`8cAOmT35=qvZwApy`dU4Q#` z0`Th(+1nqDhxI>6_qBPWf#a4(_ixfS&^KQyI4|J|;R|M)-~6#cgvmtvxjvAnA;IEM{m5FA$Ms&CTS2Y#91n&3z zf6dewl6n|dBbRJ}_4cSIw+Tf#St_I{()I~7EXKaF^Lc_!@ep~}ninj8j(Yu?)eF@2 ze@(e-y?-Pn~30a5*M&QKE%dSv0%4MR-AG(|L3qLz07`!kn z!Ms)DlFqkPc;}_Ut&WTal%S`?=1R?!5B7*W#@L3uox55>?suR*ef>d$kxm@pEY!M{ z)`dsh&AAUJbfFLDf9a%(PCPry_A!H_1KmE15IS-zTTzj;yTX1)Gp6*(h{1S0vI>kW z<^@+`D#kL%Iu>J@R^IaD{v6a8P~F{IoQwj_yH0I7gkqOJk3p2YBR=p;P4D8gghrf5 zq)TuEANwSC&kAp-NIqsTuJILUw}-OcP~!bh6DQ1Z)(wP?cN<>6L^PUMmr@KwhUbz!3gvL&#NV)6JykIH(E4lyZyhu^rML{T zG(yZ}Qb2PFiBkWkXYDgDXoA@ToSV|-&7k(|J;k!18IJo4d^&up1+L?f96R|II8DV( zyk^t_$>mj^u2wDZtla0_jaMxoQz9z&RJjG3T++{dINt)TiP6To_05n;&l}yt(+o9k z$A85LH9_DveB!>=2=V;nLv!18p!4+k6s~nMt}X37Ye(#=)asjOoZ2g!m?#4&Y*VoSEQ7e zC7gQo{{7iDb3EBL@XKZAJM^0PXO{TW0n2>Yn8nC;6uPN3!SAgOMjYk5G#q9}(f@59 z6@MDy4>B4_PI+SS@bCQEr>({eAXc*LdTN3c)A{#-~3(s{`QU~nLvs+J9eL5rqkVaE5sEB8-sgm7F5tz6h5rLNoJgql)S&?|dP z+V*=xE_s7Hy3fq*SbU52Vh@hSliV<9@D_6)l|Np*dRLc5J^~99{O(mr$K#`faF5^v zsW`e6nZtHA1LIlcX;k*)px8|Sf37?fJs;k9eKH?YpI=}&*q)E)|K0TqeUXcMLZU|q z49_w#D_U%ylT{+7N*qjXc0}^(oI77XqgR!;5ULGSDf^ z*>|F<5*Ft2AH-x-!*GlL7uB+A5VVXo64+A(>}g7B3a(}FA=hR(LI)vMZA7c_ZU)8L zA%}wGL_olpGSTy(6MDfO!alNkJpOf=5GGHh<3-`)2YGmZp@#oFYbPlWQ@o6|KAp(N zt-VA&|Bt!YF6iFK;E|15H3hGgx5|F4jKf3E$*M)!mz8|~_x zd&aB!F_(Bo-KJ;=v*}h=uEdXE;EITvt>pnYM%U9#}bXGvjTrm>fSe_-Fp~=AsLgV9ai^;g^xv-r&5st!1YYW9MypVbS z<&FwFYjm|x5{&(R4Zr;yy>eXWBdB}{X3n6cD7Y%$DPPqMhNQP;QqL12phhP#_ikJ? zw7oX;xAltw=U%$#^dnI)Y%FSXNZ%idR3%skDdd!9S+R7D!Z-rn+PQg-OqGXTA9NSS z9#x`vcMdK@HDR1e6UTuMotV!xecx!K5B2&@N>4QnA@OZTQiJ0tit?ML?P{OE&oa&; z`ow9hW8$Y*^u zRG1t)Gol%bV^2a){C%K_LB%#d_w5J-vrF`yCo`kq%FKm(_H?mOvD9AvYC9SPwZ|nI zCL&;FX2+&{Q80KIP3tq?Awq|~Qg$Yx*A|w1iCkA`Jt5NTXU})h5IEd$L_;|$9^R}_ zahhLEhw+B>xZ1z{g00O48Uu?jMMFsgRpQuxsy`n=z&)aRvu^caDT*T%B|*v>`~2M$yL{jxV2xPK5h*@ zGBJ{lZm&RD??+>W=_T-twC~mNSOC_TFITm;W+B=3zF&9A1O(`iixhv5;fGg#gzsEG zT#Y^TdWUWo+>ByT&f{!`!uuPpRJSPTM3m`Mwe|uqGM6f)VNQjWD||Bd?SsISx_0)1 zsXp8hIk&ToLUbflCBM<(t#HG5nyQR{exLC%`QeNtt0&UcOawjB4?(Wb27_g-I5ZTp z+RWVb6Xj$s6lz|k6cQfnKU6zgfM(V)xyU!`9QjUqDIEC z8h#sE^=@r7!N(jpqifL)g8VZ&;(1-*I8ea&!n7MYM=EKaS$0FuPCmAp(k|%ab8+|i z*bcKslQSLM&2YZvtpj&!HT-kxU+0V~0K1Ut_>Iruz%RrfE!P->r<4zuh^7;AF;G(} z%YwcXXPcg0?zOH!H?o99W>zKIw6Jcaa8=?ty6fkuPL!ji@cc%DXbCbeq(qat3Q&Z} zdn1V_8~cX`G$%JwacjrDLR!^0q!%VFwP}Z7{DPO-!Y&bD7cOxCGBff0(nUZU3Lk0QQ{{Pke z!ERFI2%YR89_Ey*mA4zgDgPjFhp?X4SQ z@5@#(%78g2AaR9aELeQvmiRJisnRvy{IZA)0@GBR;q$oLp#4%N+bojGAJ=wxjCHliZQC-{?e4HY2#DC{~!kQQU%et{_`pXh|TeM204t@nf?ZG1Rgb*-L;`M&; zIU3vw9!0lUBtYotm0X@zNf7?~WzvD+L@;gEP~n=00U`3oJ+lK|pv7}^tN&04`km|R zzwtN+pD?S3TK%fTq#lAChcRCZ@_nsMOdsgR^9c(yrrm=m$lIWQBK`i3uGy=L7Z>kV>G7b_!7(_yRS&F4KxeX3l)r-;E4O&;rzB5@cemV zeP?4SoO%=)cx5yXXkN(9I#K@2M56hFXHs$CO1z=?AHl0~Q<~ac56}Ar+`ynOkhN$3 z&SgsPh{YP2IVc+x}X}a+=sN_CaO17h8qGFvuu! zzLO3bhk2WUQxp8Nz<3l0tQ|F(P;`@nW zdGXqU`;{O|;nf2=flQcwX=s(RSq^5z#4eFi5@_cMMoXP)hpU%$z1fm~L%5X2cirV4 zFmv=wS^3ci6+RYYtjT=}K$22mD_rUU{f2wB`jmkV<==>8o^~iOev-UhNrKzPC-?1U zr~td(P!w0ngd(c9j(;UJDTyu9n_d4>kw~nyH`OVmsblRL%Q&?4#^0l6iGtx+sku)sq8Uph|4`)u8W&e5 z(CE!$+6!B)nXp+pyhS6Pu5xM$hlu%4JCuL8(>f%tkDZ9T1P+i0i})DZX|!AyuZur1s^Hn)E@?oMoZ73!{7cc zs_iM1NuN^gFd}3>ulzJ3;HZrB8OFOQY5J*fj$ z+7#8WGYGjJtGu}F2WLi{cpmg>;jut7u_?na0yv)b<(P7*1al9LJaM{C7?0a!zbY^Q zRR`0wxqXJ=O7w6%lKL*{3Wkz2&qg>RV4!aFtYSn3aJ_>($Lqn4Ic(45`yar$GoRPhUkhAIQvb;}go7TtZRz>k8hG0O zZtZ?tH?;G*3r{T%fns2Ds-WZ;@E(#-h;yHSYm1zZhVmz%_DQ_Vt)Vga_@8UU=VO$r zMa}Wu)4F{S*{m+bL$Gg#dh2yA6Y*S_QQW^ND&+^%!JRSUb-u9U?92F)tQhb-a9zc! zH4C&JY`zr?&V}Ho{uk#~(?D6Sr>o5-3|yE`=LFOFKz8x(_nJll;8xyZyK0;UzFe#4 z(&I|tT#{8=c78SR2(U|^p{@g|`^inmM5_V9xDpAXekG6}%(SF*C=0c}H8>YmK?o1=i!$5dWbnWdK1je4}pc77i;IvXm ze?>wyg^Aa~pE;)u!J}k*8vOx&{pZYg+mFEIrB;o_~{8l{f{VN}3P;{5K2j z+4Yi3`{p2SjE5ADvyit<623M%4e`qF+F1oAz<2qs^1bRI80%(y%01c)7pu?2ZfHAV z>0{ms_WXL#$>*KY_8Neg_QU#Yv7=CT=fZ$9X#$E_a-@uNf1pKB&S3d=J!>_n83Ra#0?h0a!QHE|%5(xP&FX|!wXb!3| zTyq#(CN<~#-&b5PvbrnD;DO$kx}WE;1Yp2ja~TFx7koBOHMV~|8ul!uhNLFtfq}Qo V^fCD|5E*AVu)DwF|7*~a|9=^u2VejI From 5c83d9df544c31132686ef300cf429b019c62de2 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Thu, 4 Apr 2024 10:18:33 +0200 Subject: [PATCH 06/28] particle exchange GPU --- src/MovWindow/SimWindow.cpp | 11 +- src/Particles/Particles.h | 2 +- src/Particles/nvidiaParticles.cu | 171 +++++++++++++--------- src/Particles/nvidiaParticles.h | 10 +- src/Patch/Patch.cpp | 5 +- src/Patch/VectorPatch.cpp | 74 +--------- src/SmileiMPI/AsyncMPIbuffers.cpp | 31 ++-- src/SmileiMPI/AsyncMPIbuffers.h | 2 +- src/SmileiMPI/SmileiMPI.cpp | 3 +- src/Species/Species.cpp | 92 ++++++------ src/Species/Species.h | 6 +- src/Species/SpeciesV.cpp | 6 +- src/Species/SpeciesV.h | 2 +- src/Species/SpeciesVAdaptive.cpp | 2 +- src/Species/SpeciesVAdaptiveMixedSort.cpp | 2 +- 15 files changed, 189 insertions(+), 230 deletions(-) diff --git a/src/MovWindow/SimWindow.cpp b/src/MovWindow/SimWindow.cpp index 08ffada69..6dbb5da57 100755 --- a/src/MovWindow/SimWindow.cpp +++ b/src/MovWindow/SimWindow.cpp @@ -384,14 +384,9 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params ¶ms, } // end loop nSpecies #if defined ( SMILEI_ACCELERATOR_MODE ) - if ( params.gpu_computing ) { - // ADD NEW PARTS ON GPU - for( unsigned int ispec=0 ; ispecvecSpecies[ispec]->particles_to_move->clear(); - // mypatch->vecSpecies[ispec]->particles->copyParticles( 0, mypatch->vecSpecies[ispec]->getNbrOfParticles(), - // *mypatch->vecSpecies[ispec]->particles_to_move, 0 ); - mypatch->vecSpecies[ispec]->particles->initializeDataOnDevice(); - mypatch->vecSpecies[ispec]->particles_to_move->initializeDataOnDevice(); + if( params.gpu_computing ) { + for( auto spec: mypatch->vecSpecies ) { + spec->allocateParticlesOnDevice(); } } #endif diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h index a155baf7a..13941b40a 100755 --- a/src/Particles/Particles.h +++ b/src/Particles/Particles.h @@ -476,7 +476,7 @@ class Particles //! Extract particles escaping the box to buffers // ----------------------------------------------------------------------------- virtual void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ); - +virtual void extractParticles( Particles* particles_to_move ); // ----------------------------------------------------------------------------- //! Erase particles leaving the patch object on device // ----------------------------------------------------------------------------- diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu index d7a63f0b3..191e0943f 100644 --- a/src/Particles/nvidiaParticles.cu +++ b/src/Particles/nvidiaParticles.cu @@ -33,14 +33,24 @@ // Cell key manipulation functor definition //////////////////////////////////////////////////////////////////////////////// -//! Structure with specific function count_if_out for thrust::tuple operator -//! Return True if the entry is -1 as in the cell keys vector for instance -struct count_if_out +//! Predicate for cell_keys +//! Return True if the entry is equal to `code` +template +struct cellKeyEquals { constexpr __host__ __device__ bool operator()( const int& x ) const { - return x == -1; + return x == code; + } +}; + +struct cellKeyNegative +{ + constexpr __host__ __device__ bool + operator()( const int& x ) const + { + return x < 0; } }; @@ -250,7 +260,7 @@ namespace detail { }; - //! This functor assign a cluster key to a_particle. + //! This functor checks the cluster key of a_particle. //! template struct OutOfClusterPredicate @@ -286,7 +296,7 @@ namespace detail { __host__ __device__ bool operator()( const Tuple& a_particle ) const { - return thrust::get<0>( a_particle ) /* cluster key */ == -1; + return thrust::get<0>( a_particle ) /* cluster key */ < 0; } }; @@ -467,34 +477,34 @@ namespace detail { // - compute bins // NOTE: This method consumes a lot of memory ! O(N) - const auto new_particle_to_inject_count = particle_to_inject.deviceSize(); - const auto current_local_particles_count = std::distance( first_particle, last_particle ); - const auto new_particle_count = new_particle_to_inject_count + current_local_particles_count; + const auto initial_count = std::distance( first_particle, last_particle ); + const auto inject_count = particle_to_inject.deviceSize(); + const auto new_count = initial_count + inject_count; // NOTE: We really want a non-initializing vector here! // It's possible to give a custom allocator to thrust::device_vector. // Create one with construct(<>) as a noop and derive from // thrust::device_malloc_allocator. For now we do an explicit resize. - particle_to_inject.softReserve( new_particle_count ); - particle_to_inject.resize( new_particle_count ); // We probably invalidated the iterators + particle_to_inject.softReserve( new_count ); + particle_to_inject.resize( new_count ); // We probably invalidated the iterators // Copy out of cluster/tile/chunk particles // partition_copy is way slower than copy_if/remove_copy_if on rocthrust // https://github.com/ROCmSoftwarePlatform/rocThrust/issues/247 - const auto first_particle_to_inject = particle_iterator_provider( particle_to_inject ); + const auto first_to_inject = particle_iterator_provider( particle_to_inject ); + const auto first_to_reorder = first_to_inject + inject_count; // NOTE: copy_if/remove_copy_if are stable. - const auto partitioned_particles_bounds_true = thrust::copy_if( thrust::device, + // First, copy particles that are not in their own cluster anymore + const auto first_already_ordered = thrust::copy_if( thrust::device, first_particle, last_particle, - // Dont overwrite the particle_to_inject (at the start of the array) - first_particle_to_inject + new_particle_to_inject_count, + first_to_reorder, OutOfClusterPredicate{ cluster_type } ); - const auto partitioned_particles_bounds_false = thrust::remove_copy_if( thrust::device, + // Then, copy particles that are still in their own cluster + const auto end = thrust::remove_copy_if( thrust::device, first_particle, last_particle, - // Do the copy with a destination - // starting from partitioned_particles_bounds_true - partitioned_particles_bounds_true, + first_already_ordered, OutOfClusterPredicate{ cluster_type } ); // Compute or recompute the cluster index of the particle_to_inject @@ -502,23 +512,23 @@ namespace detail { // - we can "save" some work here if cluster index is already computed // for the new particles to inject (not the one we got with copy_if). // - doComputeParticleClusterKey( first_particle_to_inject, - partitioned_particles_bounds_true, + doComputeParticleClusterKey( first_to_inject, + first_already_ordered, cluster_type ); - const auto first_particle_to_inject_no_key = particle_no_key_iterator_provider( particle_to_inject ); - const auto particle_to_rekey_count = std::distance( first_particle_to_inject, - partitioned_particles_bounds_true ); + const auto first_to_inject_no_key = particle_no_key_iterator_provider( particle_to_inject ); + const auto particle_to_rekey_count = std::distance( first_to_inject, + first_already_ordered ); doSortParticleByKey( particle_to_inject.getPtrCellKeys(), particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, - first_particle_to_inject_no_key ); + first_to_inject_no_key ); // This free generates a lot of memory fragmentation. // particle_container.free(); // Same as for particle_to_inject, non-initializing vector is best. - particle_container.softReserve( new_particle_count ); - particle_container.resize( new_particle_count ); + particle_container.softReserve( new_count ); + particle_container.resize( new_count ); // Merge by key // NOTE: Dont merge in place on GPU. That means we need an other large buffer! @@ -527,9 +537,9 @@ namespace detail { particle_to_inject.getPtrCellKeys(), // Input range 1, first key particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 1, last key particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 2, first key - particle_to_inject.getPtrCellKeys() + new_particle_count, // Input range 2, last key - first_particle_to_inject_no_key, // Input range 1, first value - first_particle_to_inject_no_key + particle_to_rekey_count, // Input range 2, first value + particle_to_inject.getPtrCellKeys() + new_count, // Input range 2, last key + first_to_inject_no_key, // Input range 1, first value + first_to_inject_no_key + particle_to_rekey_count, // Input range 2, first value particle_container.getPtrCellKeys(), // Output range first key particle_no_key_iterator_provider( particle_container ) ); // Output range first value @@ -1365,38 +1375,61 @@ unsigned int nvidiaParticles::deviceCapacity() const } // ----------------------------------------------------------------------------- -//! Extract particles from the Particles object and put -//! them in the Particles object `particles_to_move` +//! Move escaping particles to the buffers // ----------------------------------------------------------------------------- -void nvidiaParticles::extractParticles( Particles* particles_to_move ) +void nvidiaParticles::extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ) +{ + // Escaping particles have a cell_key equal to -2-direction + // where direction goes from 0 to 6 and tells which way the particle escapes. + // If the cell_key is -1, the particle must be destroyed so it is not extracted. + + extractParticlesByKey<-2>( copy[0], buffer[0] ); // x_min + extractParticlesByKey<-3>( copy[1], buffer[1] ); // x_max + if( ndim > 1 ) { + extractParticlesByKey<-4>( copy[2], buffer[2] ); // y_min + extractParticlesByKey<-5>( copy[3], buffer[3] ); // y_max + if( ndim > 2 ) { + extractParticlesByKey<-6>( copy[4], buffer[4] ); // z_min + extractParticlesByKey<-7>( copy[5], buffer[5] ); // z_max + } + } +} + + +//! Copy particles which have cell_key = key +template< const int key> +void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer ) { // TODO(Etienne M): We are doing extra work. We could use something like - // std::partition to output the invalidated particles in particles_to_move + // std::partition to output the invalidated particles in buffer // and keep the good ones. This would help us avoid the std::remove_if in // the particle injection and sorting algorithm. - - // Manage the send data structure - nvidiaParticles* const cp_parts = static_cast( particles_to_move ); - const int nparts = gpu_nparts_; - const int position_dimension_count = nvidia_position_.size(); - - const int nparts_to_move = thrust::count_if( thrust::device, - nvidia_cell_keys_.cbegin(), - nvidia_cell_keys_.cbegin() + nparts, - count_if_out() ); - - // Resize it, if too small (copy_if do not resize) - cp_parts->resize( nparts_to_move ); - + + if( ! copy ) { + return; + } + + const int nparts = gpu_nparts_; // Iterator of the main data structure // NOTE: https://nvidia.github.io/thrust/api/classes/classthrust_1_1zip__iterator.html#class-thrustzip_iterator - const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(), + const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(), nvidia_momentum_[0].begin(), nvidia_momentum_[1].begin(), nvidia_momentum_[2].begin(), nvidia_weight_.begin(), nvidia_charge_.begin() ) ); - const auto source_iterator_last = source_iterator_first + nparts; // std::advance + const auto source_iterator_last = source_iterator_first + nparts; // std::advance + + nvidiaParticles* const cp_parts = static_cast( buffer ); + + const int nparts_to_copy = thrust::count_if( thrust::device, + nvidia_cell_keys_.cbegin(), + nvidia_cell_keys_.cbegin() + nparts, + cellKeyEquals() ); + + // Resize it, if too small (copy_if do not resize) + cp_parts->resize( nparts_to_copy ); + const auto destination_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( cp_parts->nvidia_position_[0].begin(), cp_parts->nvidia_momentum_[0].begin(), cp_parts->nvidia_momentum_[1].begin(), @@ -1404,24 +1437,23 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) cp_parts->nvidia_weight_.begin(), cp_parts->nvidia_charge_.begin() ) ); - // Copy send particles in dedicated data structure if nvidia_cell_keys_=0 (currently = 1 if keeped, new PartBoundCond::apply(...)) + // Copy send particles in dedicated data structure thrust::copy_if( thrust::device, source_iterator_first, source_iterator_last, - // Copy depending on count_if_out()(nvidia_cell_keys_[i]) nvidia_cell_keys_.cbegin(), destination_iterator_first, - count_if_out() ); + cellKeyEquals() ); - // Copy the other position values depending on the simulation's grid - // dimensions - for( int i = 1; i < position_dimension_count; ++i ) { + // Copy the other position values depending on the simulation's grid dimensions + const int ndim_particles = nvidia_position_.size(); + for( int i = 1; i < ndim_particles; ++i ) { thrust::copy_if( thrust::device, nvidia_position_[i].cbegin(), nvidia_position_[i].cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_position_[i].begin(), - count_if_out() ); + cellKeyEquals() ); } // Special treatment for chi if radiation emission @@ -1431,7 +1463,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) nvidia_chi_.cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_chi_.begin(), - count_if_out() ); + cellKeyEquals() ); } if( has_Monte_Carlo_process ) { @@ -1440,7 +1472,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) nvidia_tau_.cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_tau_.begin(), - count_if_out() ); + cellKeyEquals() ); } if( tracked ) { @@ -1449,10 +1481,10 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) nvidia_id_.cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_id_.begin(), - count_if_out() ); + cellKeyEquals() ); } - particles_to_move->copyFromDeviceToHost(); + buffer->copyFromDeviceToHost(); } @@ -1475,7 +1507,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) // std::begin( nvidia_position_[i] ), // std::begin( nvidia_position_[i] ) + nparts, // std::cbegin( nvidia_cell_keys_ ), -// count_if_out() ); +// cellKeyEquals<-1>() ); // } // //} @@ -1490,7 +1522,7 @@ int nvidiaParticles::eraseLeavingParticles() const int nparts_to_remove = thrust::count_if( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.begin() + nparts, - count_if_out() ); + cellKeyNegative() ); if( nparts_to_remove > 0 ) { @@ -1508,7 +1540,7 @@ int nvidiaParticles::eraseLeavingParticles() first_particle, last_particle, nvidia_cell_keys_.cbegin(), - count_if_out() ); + cellKeyNegative() ); // Remove the other position values depending on the simulation's grid // dimensions @@ -1517,7 +1549,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_position_[i].begin(), nvidia_position_[i].begin() + nparts, nvidia_cell_keys_.cbegin(), - count_if_out() ); + cellKeyNegative() ); } if( has_quantum_parameter ) { @@ -1525,7 +1557,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_chi_.begin(), nvidia_chi_.begin() + nparts, nvidia_cell_keys_.cbegin(), - count_if_out() ); + cellKeyNegative() ); } if( has_Monte_Carlo_process ) { @@ -1533,7 +1565,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_tau_.begin(), nvidia_tau_.begin() + nparts, nvidia_cell_keys_.cbegin(), - count_if_out() ); + cellKeyNegative() ); } if( tracked ) { @@ -1541,7 +1573,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_id_.begin(), nvidia_id_.begin() + nparts, nvidia_cell_keys_.cbegin(), - count_if_out() ); + cellKeyNegative() ); } // Update current number of particles @@ -1679,8 +1711,7 @@ void nvidiaParticles::importAndSortParticles( Particles* particles_to_inject ) int nvidiaParticles::prepareBinIndex() { if( first_index.size() == 0 ) { - // Some Particles object like particles_to_move do not have allocated - // bins, we skip theses. + // Some Particles object do not have allocated bins, we skip theses. return -1; } diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h index 249a9fcf2..64164fad7 100644 --- a/src/Particles/nvidiaParticles.h +++ b/src/Particles/nvidiaParticles.h @@ -113,10 +113,12 @@ class nvidiaParticles : public Particles }; // ----------------------------------------------------------------------------- - //! Extract particles from the Particles object and put - //! them in the Particles object `particles_to_move` + //! Move escaping particles to the buffers // ----------------------------------------------------------------------------- - void extractParticles( Particles* particles_to_move ) override; + void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ) override; + + template< const int key> + void extractParticlesByKey( bool copy, Particles* buffer ); // ----------------------------------------------------------------------------- //! Erase particles leaving the patch object on device and returns the number of particle removed @@ -124,7 +126,7 @@ class nvidiaParticles : public Particles int eraseLeavingParticles() override; // ----------------------------------------------------------------------------- - //! Inject particles from particles_to_move into *this and return he number of particle added + //! Inject particles from particles_to_inject into *this and return the number of particle added // ----------------------------------------------------------------------------- int injectParticles( Particles* particles_to_inject ) override; diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index 546e0ca08..d61c1f9e1 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -784,10 +784,10 @@ void Patch::cornersParticles( int ispec, Params ¶ms, int iDim ) } - // Copy corner particles to the start or the end of the particles to be sent for the following dimension + // Copy corner particles to the end of the particles to be sent for the following dimension for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) { if( indices_corner_min[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][0] != MPI_PROC_NULL ) { - partRecv.copyParticles( indices_corner_min[otherDim-iDim-1], *buffer.partSend[otherDim][0], 0 ); + partRecv.copyParticles( indices_corner_min[otherDim-iDim-1], *buffer.partSend[otherDim][0], buffer.partSend[otherDim][0]->size() ); } if( indices_corner_max[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][1] != MPI_PROC_NULL ) { partRecv.copyParticles( indices_corner_max[otherDim-iDim-1], *buffer.partSend[otherDim][1], buffer.partSend[otherDim][1]->size() ); @@ -1310,7 +1310,6 @@ void Patch::deleteFieldsOnDevice() // for( unsigned int ispec=0 ; ispec<( *this )( ipatch )->vecSpecies.size() ; ispec++ ) { // Species *spec = species( ipatch, ispec ); // spec->particles->initializeDataOnDevice(); -// spec->particles_to_move->initializeDataOnDevice(); // //#pragma acc enter data copyin(spec->nrj_radiation) // } diff --git a/src/Patch/VectorPatch.cpp b/src/Patch/VectorPatch.cpp index 22d976ba2..9067d049d 100755 --- a/src/Patch/VectorPatch.cpp +++ b/src/Patch/VectorPatch.cpp @@ -4671,86 +4671,22 @@ void VectorPatch::allocateDataOnDevice(Params ¶ms, RadiationTables *radiation_tables, MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables) { - + #if defined( SMILEI_ACCELERATOR_MODE ) // TODO(Etienne M): FREE. If we have load balancing or other patch // creation/destruction available (which is not the case on GPU ATM), // we should be taking care of freeing this GPU memory. - const int npatches = this->size(); - - // const int sizeofJx = patches_[0]->EMfields->Jx_->size(); - // const int sizeofJy = patches_[0]->EMfields->Jy_->size(); - // const int sizeofJz = patches_[0]->EMfields->Jz_->size(); - // const int sizeofRho = patches_[0]->EMfields->rho_->size(); - - // const int sizeofEx = patches_[0]->EMfields->Ex_->size(); - // const int sizeofEy = patches_[0]->EMfields->Ey_->size(); - // const int sizeofEz = patches_[0]->EMfields->Ez_->size(); - - // const int sizeofBx = patches_[0]->EMfields->Bx_->size(); - // const int sizeofBy = patches_[0]->EMfields->By_->size(); - // const int sizeofBz = patches_[0]->EMfields->Bz_->size(); - - for( int ipatch=0 ; ipatchvecSpecies.size(); ispec++ ) { - Species *spec = species( ipatch, ispec ); - spec->particles->initializeDataOnDevice(); - spec->particles_to_move->initializeDataOnDevice(); - - // Create photon species on the device - if ( spec->radiation_model_ == "mc" && spec->photon_species_) { - spec->radiated_photons_->initializeDataOnDevice(); - } - - // Create pair species on the device - if ( spec->mBW_pair_species_[0] && spec->mBW_pair_species_[1]) { - spec->mBW_pair_particles_[0]->initializeDataOnDevice(); - spec->mBW_pair_particles_[1]->initializeDataOnDevice(); - } - - //#pragma acc enter data copyin(spec->nrj_radiation) + for( auto spec: patch->vecSpecies ) { + spec->allocateParticlesOnDevice(); } // Allocate field data structures on GPU - patches_[ipatch]->allocateFieldsOnDevice(); - - // const double *const Jx = patches_[ipatch]->EMfields->Jx_->data(); - // const double *const Jy = patches_[ipatch]->EMfields->Jy_->data(); - // const double *const Jz = patches_[ipatch]->EMfields->Jz_->data(); - // const double *const Rho = patches_[ipatch]->EMfields->rho_->data(); + patch->allocateFieldsOnDevice(); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jx, sizeofJx ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jy, sizeofJy ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jz, sizeofJz ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Rho, sizeofRho ); - - // const double *const Ex = patches_[ipatch]->EMfields->Ex_->data(); - // const double *const Ey = patches_[ipatch]->EMfields->Ey_->data(); - // const double *const Ez = patches_[ipatch]->EMfields->Ez_->data(); - - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ex, sizeofEx ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ey, sizeofEy ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ez, sizeofEz ); - - // const double *const Bmx = patches_[ipatch]->EMfields->Bx_m->data(); - // const double *const Bmy = patches_[ipatch]->EMfields->By_m->data(); - // const double *const Bmz = patches_[ipatch]->EMfields->Bz_m->data(); - - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmx, sizeofBx ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmy, sizeofBy ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmz, sizeofBz ); - - // const double *const Bx = patches_[ipatch]->EMfields->Bx_->data(); - // const double *const By = patches_[ipatch]->EMfields->By_->data(); - // const double *const Bz = patches_[ipatch]->EMfields->Bz_->data(); - - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( Bx, sizeofBx ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( By, sizeofBy ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( Bz, sizeofBz ); - } // end patch loop // TODO(Etienne M): We should create a function that does the copy of the radiation table. diff --git a/src/SmileiMPI/AsyncMPIbuffers.cpp b/src/SmileiMPI/AsyncMPIbuffers.cpp index a5a53dbb0..4cb283d17 100755 --- a/src/SmileiMPI/AsyncMPIbuffers.cpp +++ b/src/SmileiMPI/AsyncMPIbuffers.cpp @@ -1,5 +1,6 @@ #include "AsyncMPIbuffers.h" +#include "ParticlesFactory.h" #include "Field.h" #include "Patch.h" @@ -75,29 +76,29 @@ SpeciesMPIbuffers::~SpeciesMPIbuffers() } -void SpeciesMPIbuffers::allocate( unsigned int ndims ) +void SpeciesMPIbuffers::allocate( Params ¶ms, Patch *patch ) { - srequest.resize( ndims ); - rrequest.resize( ndims ); + srequest.resize( params.nDim_field ); + rrequest.resize( params.nDim_field ); - partRecv.resize( ndims ); - partSend.resize( ndims ); + partRecv.resize( params.nDim_field ); + partSend.resize( params.nDim_field ); - partSendSize.resize( ndims ); - partRecvSize.resize( ndims ); + partSendSize.resize( params.nDim_field ); + partRecvSize.resize( params.nDim_field ); - for( unsigned int i=0 ; i > partRecv; diff --git a/src/SmileiMPI/SmileiMPI.cpp b/src/SmileiMPI/SmileiMPI.cpp index c35a69fe9..4fe93fd03 100755 --- a/src/SmileiMPI/SmileiMPI.cpp +++ b/src/SmileiMPI/SmileiMPI.cpp @@ -929,8 +929,7 @@ void SmileiMPI::recv_species( Patch *patch, int from, int &tag, Params ¶ms ) recv( patch->vecSpecies[ispec]->particles, from, tag+2*ispec, recvParts ); MPI_Type_free( &( recvParts ) ); } - patch->vecSpecies[ispec]->particles->initializeDataOnDevice(); - patch->vecSpecies[ispec]->particles_to_move->initializeDataOnDevice(); + patch->vecSpecies[ispec]->allocateParticlesOnDevice(); } diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp index 0fb38f673..bfc1ae036 100755 --- a/src/Species/Species.cpp +++ b/src/Species/Species.cpp @@ -90,7 +90,6 @@ Species::Species( Params ¶ms, Patch *patch ) : { // &particles_sorted[0] particles = ParticlesFactory::create( params, *patch ); - particles_to_move = ParticlesFactory::create( params, *patch ); regular_number_array_.clear(); partBoundCond = NULL; @@ -104,7 +103,7 @@ Species::Species( Params ¶ms, Patch *patch ) : dx_inv_[1] = 1./cell_length[1]; dx_inv_[2] = 1./cell_length[2]; - initCluster( params ); + initCluster( params, patch ); inv_nDim_particles = 1./( ( double )nDim_particle ); length_[0]=0; @@ -123,7 +122,7 @@ Species::Species( Params ¶ms, Patch *patch ) : }//END Species creator -void Species::initCluster( Params ¶ms ) +void Species::initCluster( Params ¶ms, Patch *patch ) { // NOTE: On GPU we dont use first_index, it would contain redundant data but // we are forced to initialize it due to ParticleCreator::create() and the @@ -252,7 +251,7 @@ void Species::initCluster( Params ¶ms ) #endif //Initialize specMPI - MPI_buffer_.allocate( nDim_field ); + MPI_buffer_.allocate( params, patch ); //ener_tot = 0.; nrj_bc_lost = 0.; @@ -386,7 +385,6 @@ void Species::initOperators( Params ¶ms, Patch *patch ) typePartRecv.resize( nDim_field*2, MPI_DATATYPE_NULL ); exchangePatch = MPI_DATATYPE_NULL; - particles_to_move->initialize( 0, *particles ); } @@ -396,7 +394,6 @@ void Species::initOperators( Params ¶ms, Patch *patch ) Species::~Species() { delete particles; - delete particles_to_move; delete Push; delete Interp; @@ -631,6 +628,34 @@ Species::deleteSpeciesCurrentAndChargeOnDevice( } } + +void Species::allocateParticlesOnDevice() +{ + particles->initializeDataOnDevice(); + for( auto partSends: MPI_buffer_.partSend ) { + for( auto partSend: partSends ) { + partSend->initializeDataOnDevice(); + } + } + for( auto partRecvs: MPI_buffer_.partRecv ) { + for( auto partRecv: partRecvs ) { + partRecv->initializeDataOnDevice(); + } + } + + // Create photon species on the device + if( radiation_model_ == "mc" && photon_species_ ) { + radiated_photons_->initializeDataOnDevice(); + } + + // Create pair species on the device + if( mBW_pair_species_[0] && mBW_pair_species_[1] ) { + mBW_pair_particles_[0]->initializeDataOnDevice(); + mBW_pair_particles_[1]->initializeDataOnDevice(); + } +} + + //! Copy particles from host to device void Species::copyParticlesFromHostToDevice() @@ -1754,33 +1779,22 @@ void Species::sortParticles( Params ¶ms ) // ----------------------------- // GPU version - - // particles_to_move contains, up to here, send particles - // clean it to manage recv particles - particles_to_move->clear(); // Clear on the host - // Merge all MPI_buffer_.partRecv in particles_to_move - for( int idim = 0; idim < params.nDim_field; idim++ ) { - for( int iNeighbor = 0; iNeighbor < 2; iNeighbor++ ) { - int n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size(); - if( n_part_recv != 0 ) { - // insert n_part_recv in particles_to_move from 0 - MPI_buffer_.partRecv[idim][iNeighbor]->copyParticles( 0, - n_part_recv, - *particles_to_move, - particles_to_move->size() ); + + // Merge all MPI_buffer_.partRecv in the first one + Particles * first_buffer = MPI_buffer_.partRecv[0][0]; + for( auto &partRecvs: MPI_buffer_.partRecv ) { + for( auto partRecv: partRecvs ) { + if( partRecv != first_buffer && partRecv->size() > 0 ) { + partRecv->copyParticles( 0, partRecv->size(), *first_buffer, first_buffer->size() ); + partRecv->clear(); } } } - - particles_to_move->copyFromHostToDevice(); - - // // Erase particles that leaves this patch - // particles->last_index[0] = particles->eraseLeavingParticles(); - // - // // Inject newly arrived particles in particles_to_move - // particles->last_index[0] += particles->injectParticles( particles_to_move ); - - particles->importAndSortParticles( particles_to_move ); + + first_buffer->copyFromHostToDevice(); + + particles->importAndSortParticles( first_buffer ); + #else // -------------------------- @@ -1791,24 +1805,6 @@ void Species::sortParticles( Params ¶ms ) int ndim = params.nDim_field; int idim; - // Compute total number of particles received - // int total_number_part_recv = 0; - //Merge all MPI_buffer_.partRecv in particles_to_move - // for( int idim = 0; idim < ndim; idim++ ) { - // for( int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) { - // int n_part_recv = MPI_buffer_.partRecv[idim][iNeighbor]->size(); - // if( ( n_part_recv!=0 ) ) { - // // insert n_part_recv in particles_to_move from 0 - // //MPI_buffer_.partRecv[idim][iNeighbor]->copyParticles( 0, n_part_recv, *particles_to_move, 0 ); - // total_number_part_recv += n_part_recv; - // //particles->last_index[particles->last_index.size()-1] += n_part_recv; - // //particles->cell_keys.resize(particles->cell_keys.size()+n_part_recv); - // } - // } - // } - //cout << "\t Species id : " << species_number_ << " - nparticles recv : " << blabla << endl; - - // Sort to adapt do cell_keys usage std::vector indexes_of_particles_to_exchange; for ( int ipart=0 ; ipart< (int)(getNbrOfParticles()) ; ipart++ ) { diff --git a/src/Species/Species.h b/src/Species/Species.h index b91c9521b..83a2bab9d 100755 --- a/src/Species/Species.h +++ b/src/Species/Species.h @@ -147,8 +147,6 @@ class Species //! Vector containing all Particles of the considered Species Particles *particles; - //! Data structure through which passes particles which move from one patch to another - Particles *particles_to_move; Particles particles_sorted[2]; //std::vector index_of_particles_to_exchange; @@ -344,7 +342,7 @@ class Species // ----------------------------------------------------------------------------- // 5. Methods - virtual void initCluster( Params & ); + virtual void initCluster( Params &, Patch * ); virtual void resizeCluster( Params & ); @@ -386,6 +384,8 @@ class Species #if defined( SMILEI_ACCELERATOR_MODE ) + void allocateParticlesOnDevice(); + //! Copy particles from host to device void copyParticlesFromHostToDevice(); diff --git a/src/Species/SpeciesV.cpp b/src/Species/SpeciesV.cpp index 89d12b340..4a4199b63 100755 --- a/src/Species/SpeciesV.cpp +++ b/src/Species/SpeciesV.cpp @@ -46,7 +46,7 @@ using namespace std; SpeciesV::SpeciesV( Params ¶ms, Patch *patch ) : Species( params, patch ) { - initCluster( params ); + initCluster( params, patch ); npack_ = 0 ; packsize_ = 0; @@ -106,7 +106,7 @@ SpeciesV::~SpeciesV() } -void SpeciesV::initCluster( Params ¶ms ) +void SpeciesV::initCluster( Params ¶ms, Patch *patch ) { int ncells = 1; for( unsigned int iDim=0 ; iDim Date: Thu, 4 Apr 2024 11:24:01 +0200 Subject: [PATCH 07/28] forgot to remove function --- src/Particles/Particles.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h index 13941b40a..a155baf7a 100755 --- a/src/Particles/Particles.h +++ b/src/Particles/Particles.h @@ -476,7 +476,7 @@ class Particles //! Extract particles escaping the box to buffers // ----------------------------------------------------------------------------- virtual void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ); -virtual void extractParticles( Particles* particles_to_move ); + // ----------------------------------------------------------------------------- //! Erase particles leaving the patch object on device // ----------------------------------------------------------------------------- From 3afa356406ed886340af9afec929152fbf277ac3 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Thu, 18 Apr 2024 14:24:02 +0200 Subject: [PATCH 08/28] scatter recvBuffers on CPU instead of GPU --- makefile | 2 +- src/Particles/Particles.cpp | 39 ++++++++++++++- src/Particles/Particles.h | 5 +- src/Particles/nvidiaParticles.cu | 81 +++++++++++++++---------------- src/Particles/nvidiaParticles.h | 11 +++-- src/Patch/Patch.cpp | 3 +- src/Smilei.cpp | 2 +- src/SmileiMPI/AsyncMPIbuffers.cpp | 17 +++++-- src/Species/Species.cpp | 16 +++--- 9 files changed, 110 insertions(+), 66 deletions(-) diff --git a/makefile b/makefile index 3aaff0201..36239640d 100755 --- a/makefile +++ b/makefile @@ -216,7 +216,7 @@ endif ifneq (,$(call parse_config,gpu_amd)) CXXFLAGS += -DSMILEI_ACCELERATOR_MODE GPU_COMPILER ?= $(CC) - GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%) #$(PY_FLAGS) + GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%) GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS) GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu) GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o)) diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp index b675ac12f..688c53085 100755 --- a/src/Particles/Particles.cpp +++ b/src/Particles/Particles.cpp @@ -1305,8 +1305,37 @@ void Particles::copyFromDeviceToHost() } // Loop all particles and copy the outgoing ones to buffers -void Particles::extractParticles( const size_t /* ndim */, const bool copy[], Particles* buffer[] ) +void Particles::copyLeavingParticlesToBuffers( const bool copy[], Particles* buffer[] ) { + // Leaving particles have a cell_key equal to -2-direction + // where direction goes from 0 to 6 and tells which way the particle escapes. + // If the cell_key is -1, the particle must be destroyed so it is not extracted. + +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) + + // GPU + + // Copy leaving particles to buffer[0] on the GPU + copyLeavingParticlesToBuffer( buffer[0] ); + + // Dispatch between the different buffers on the CPU + // (doing this on the GPU is slower; maybe replacing thrust operations with pure cuda would work) + vector indices; + for( size_t ipart = 0; ipart < buffer[0]->size(); ipart++ ) { + int direction = -buffer[0]->cell_keys[ipart] - 2; + if( direction > 0 ) { + if( copy[direction] ) { + buffer[0]->copyParticle( ipart, *buffer[direction] ); + } + indices.push_back( ipart ); + } + } + buffer[0]->eraseParticles( indices ); + +#else + + // CPU + for( size_t ipart = 0; ipart < size(); ipart++ ) { if( cell_keys[ipart] < -1 ) { int direction = -cell_keys[ipart] - 2; @@ -1315,8 +1344,16 @@ void Particles::extractParticles( const size_t /* ndim */, const bool copy[], Pa } } } + +#endif } +void Particles::copyLeavingParticlesToBuffer( Particles* ) +{ + ERROR( "Device only feature, should not have come here!" ); +} + + void Particles::savePositions() { unsigned int ndim = Position.size(), npart = size(); double *p[3], *pold[3]; diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h index a155baf7a..86f9f9cac 100755 --- a/src/Particles/Particles.h +++ b/src/Particles/Particles.h @@ -473,9 +473,10 @@ class Particles // Accelerator specific virtual functions // ----------------------------------------------------------------------------- - //! Extract particles escaping the box to buffers + //! Extract particles leaving the box to buffers // ----------------------------------------------------------------------------- - virtual void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ); + void copyLeavingParticlesToBuffers( const bool copy[], Particles* buffer[] ); + virtual void copyLeavingParticlesToBuffer( Particles* buffer ); // ----------------------------------------------------------------------------- //! Erase particles leaving the patch object on device diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu index 191e0943f..efca22ad5 100644 --- a/src/Particles/nvidiaParticles.cu +++ b/src/Particles/nvidiaParticles.cu @@ -54,6 +54,15 @@ struct cellKeyNegative } }; +struct cellKeyBelowMinus1 +{ + constexpr __host__ __device__ bool + operator()( const int& x ) const + { + return x < -1; + } +}; + namespace detail { //////////////////////////////////////////////////////////////////////////////// @@ -1375,49 +1384,33 @@ unsigned int nvidiaParticles::deviceCapacity() const } // ----------------------------------------------------------------------------- -//! Move escaping particles to the buffers +//! Move leaving particles to the buffer // ----------------------------------------------------------------------------- -void nvidiaParticles::extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ) +void nvidiaParticles::copyLeavingParticlesToBuffer( Particles* buffer ) { - // Escaping particles have a cell_key equal to -2-direction - // where direction goes from 0 to 6 and tells which way the particle escapes. - // If the cell_key is -1, the particle must be destroyed so it is not extracted. - - extractParticlesByKey<-2>( copy[0], buffer[0] ); // x_min - extractParticlesByKey<-3>( copy[1], buffer[1] ); // x_max - if( ndim > 1 ) { - extractParticlesByKey<-4>( copy[2], buffer[2] ); // y_min - extractParticlesByKey<-5>( copy[3], buffer[3] ); // y_max - if( ndim > 2 ) { - extractParticlesByKey<-6>( copy[4], buffer[4] ); // z_min - extractParticlesByKey<-7>( copy[5], buffer[5] ); // z_max - } - } + copyParticlesByPredicate( buffer, cellKeyBelowMinus1() ); + buffer->copyFromDeviceToHost(); } -//! Copy particles which have cell_key = key -template< const int key> -void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer ) +//! Copy particles which statisfy some predicate +template +void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pred ) { // TODO(Etienne M): We are doing extra work. We could use something like // std::partition to output the invalidated particles in buffer // and keep the good ones. This would help us avoid the std::remove_if in // the particle injection and sorting algorithm. - if( ! copy ) { - return; - } - const int nparts = gpu_nparts_; // Iterator of the main data structure // NOTE: https://nvidia.github.io/thrust/api/classes/classthrust_1_1zip__iterator.html#class-thrustzip_iterator const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(), - nvidia_momentum_[0].begin(), - nvidia_momentum_[1].begin(), - nvidia_momentum_[2].begin(), - nvidia_weight_.begin(), - nvidia_charge_.begin() ) ); + nvidia_momentum_[0].begin(), + nvidia_momentum_[1].begin(), + nvidia_momentum_[2].begin(), + nvidia_weight_.begin(), + nvidia_charge_.begin() ) ); const auto source_iterator_last = source_iterator_first + nparts; // std::advance nvidiaParticles* const cp_parts = static_cast( buffer ); @@ -1425,7 +1418,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer ) const int nparts_to_copy = thrust::count_if( thrust::device, nvidia_cell_keys_.cbegin(), nvidia_cell_keys_.cbegin() + nparts, - cellKeyEquals() ); + pred ); // Resize it, if too small (copy_if do not resize) cp_parts->resize( nparts_to_copy ); @@ -1443,7 +1436,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer ) source_iterator_last, nvidia_cell_keys_.cbegin(), destination_iterator_first, - cellKeyEquals() ); + pred ); // Copy the other position values depending on the simulation's grid dimensions const int ndim_particles = nvidia_position_.size(); @@ -1453,7 +1446,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer ) nvidia_position_[i].cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_position_[i].begin(), - cellKeyEquals() ); + pred ); } // Special treatment for chi if radiation emission @@ -1463,7 +1456,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer ) nvidia_chi_.cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_chi_.begin(), - cellKeyEquals() ); + pred ); } if( has_Monte_Carlo_process ) { @@ -1472,7 +1465,7 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer ) nvidia_tau_.cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_tau_.begin(), - cellKeyEquals() ); + pred ); } if( tracked ) { @@ -1481,10 +1474,9 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer ) nvidia_id_.cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_id_.begin(), - cellKeyEquals() ); + pred ); } - buffer->copyFromDeviceToHost(); } @@ -1516,14 +1508,19 @@ void nvidiaParticles::extractParticlesByKey( bool copy, Particles* buffer ) //! Erase particles leaving the patch object on device // ----------------------------------------------------------------------------- int nvidiaParticles::eraseLeavingParticles() +{ + return eraseParticlesByPredicate( cellKeyNegative() ); +} + +template +int nvidiaParticles::eraseParticlesByPredicate( Predicate pred ) { const int position_dimension_count = nvidia_position_.size(); const int nparts = gpu_nparts_; const int nparts_to_remove = thrust::count_if( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.begin() + nparts, - cellKeyNegative() ); - + pred ); if( nparts_to_remove > 0 ) { const auto first_particle = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(), @@ -1540,7 +1537,7 @@ int nvidiaParticles::eraseLeavingParticles() first_particle, last_particle, nvidia_cell_keys_.cbegin(), - cellKeyNegative() ); + pred ); // Remove the other position values depending on the simulation's grid // dimensions @@ -1549,7 +1546,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_position_[i].begin(), nvidia_position_[i].begin() + nparts, nvidia_cell_keys_.cbegin(), - cellKeyNegative() ); + pred ); } if( has_quantum_parameter ) { @@ -1557,7 +1554,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_chi_.begin(), nvidia_chi_.begin() + nparts, nvidia_cell_keys_.cbegin(), - cellKeyNegative() ); + pred ); } if( has_Monte_Carlo_process ) { @@ -1565,7 +1562,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_tau_.begin(), nvidia_tau_.begin() + nparts, nvidia_cell_keys_.cbegin(), - cellKeyNegative() ); + pred ); } if( tracked ) { @@ -1573,7 +1570,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_id_.begin(), nvidia_id_.begin() + nparts, nvidia_cell_keys_.cbegin(), - cellKeyNegative() ); + pred ); } // Update current number of particles diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h index 64164fad7..ba689f1e8 100644 --- a/src/Particles/nvidiaParticles.h +++ b/src/Particles/nvidiaParticles.h @@ -113,18 +113,21 @@ class nvidiaParticles : public Particles }; // ----------------------------------------------------------------------------- - //! Move escaping particles to the buffers + //! Move leaving particles to the buffers // ----------------------------------------------------------------------------- - void extractParticles( const size_t ndim, const bool copy[], Particles* buffer[] ) override; + void copyLeavingParticlesToBuffer( Particles* buffer ) override; - template< const int key> - void extractParticlesByKey( bool copy, Particles* buffer ); + template + void copyParticlesByPredicate( Particles* buffer, Predicate pred ); // ----------------------------------------------------------------------------- //! Erase particles leaving the patch object on device and returns the number of particle removed // ----------------------------------------------------------------------------- int eraseLeavingParticles() override; + template + int eraseParticlesByPredicate( Predicate pred ); + // ----------------------------------------------------------------------------- //! Inject particles from particles_to_inject into *this and return the number of particle added // ----------------------------------------------------------------------------- diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index d61c1f9e1..585f76f97 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -539,6 +539,7 @@ void Patch::copyExchParticlesToBuffers( int ispec, Params ¶ms ) cleanMPIBuffers( ispec, params ); + // Make a list of buffers bool copy[params.nDim_field*2]; Particles* sendBuffer[params.nDim_field*2]; for( size_t iDim = 0; iDim < params.nDim_field; iDim++ ) { @@ -552,7 +553,7 @@ void Patch::copyExchParticlesToBuffers( int ispec, Params ¶ms ) copy[1] = copy[1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" ); } - part.extractParticles( params.nDim_field, copy, sendBuffer ); + part.copyLeavingParticlesToBuffers( copy, sendBuffer ); } // copyExchParticlesToBuffers(... iDim) diff --git a/src/Smilei.cpp b/src/Smilei.cpp index 0ab0db1a2..eae1993d9 100755 --- a/src/Smilei.cpp +++ b/src/Smilei.cpp @@ -124,7 +124,7 @@ int main( int argc, char *argv[] ) // oblivious to the program (only one, the one by default). // This could be a missed but very advanced optimization for some // kernels/exchange. - ERROR( "Simlei needs only one accelerator (GPU). Look for HIP_VISIBLE_DEVICES or 'gpu-bind=closest' in your SLURM script or use a custom binding script." ); + ERROR( "Smilei needs only one accelerator (GPU). Look for HIP_VISIBLE_DEVICES or 'gpu-bind=closest' in your SLURM script or use a custom binding script." ); } else { // ::omp_set_default_device(0); } diff --git a/src/SmileiMPI/AsyncMPIbuffers.cpp b/src/SmileiMPI/AsyncMPIbuffers.cpp index 4cb283d17..ff8efb17f 100755 --- a/src/SmileiMPI/AsyncMPIbuffers.cpp +++ b/src/SmileiMPI/AsyncMPIbuffers.cpp @@ -93,12 +93,21 @@ void SpeciesMPIbuffers::allocate( Params ¶ms, Patch *patch ) partRecvSize[i].resize( 2 ); partSendSize[i].resize( 2 ); + // NOTE: send/recv buffers on xmin / xmax use a different constructor because + // they must be sent on GPU for exchanging particles partRecv[i].resize( 2 ); - partRecv[i][0] = ParticlesFactory::create( params, *patch );; - partRecv[i][1] = ParticlesFactory::create( params, *patch );; partSend[i].resize( 2 ); - partSend[i][0] = ParticlesFactory::create( params, *patch );; - partSend[i][1] = ParticlesFactory::create( params, *patch );; + if( i == 0 ) { + partRecv[i][0] = ParticlesFactory::create( params, *patch ); + partRecv[i][1] = ParticlesFactory::create( params, *patch ); + partSend[i][0] = ParticlesFactory::create( params, *patch ); + partSend[i][1] = ParticlesFactory::create( params, *patch ); + } else { + partRecv[i][0] = new Particles(); + partRecv[i][1] = new Particles(); + partSend[i][0] = new Particles(); + partSend[i][1] = new Particles(); + } } } diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp index bfc1ae036..65358f555 100755 --- a/src/Species/Species.cpp +++ b/src/Species/Species.cpp @@ -632,16 +632,12 @@ Species::deleteSpeciesCurrentAndChargeOnDevice( void Species::allocateParticlesOnDevice() { particles->initializeDataOnDevice(); - for( auto partSends: MPI_buffer_.partSend ) { - for( auto partSend: partSends ) { - partSend->initializeDataOnDevice(); - } - } - for( auto partRecvs: MPI_buffer_.partRecv ) { - for( auto partRecv: partRecvs ) { - partRecv->initializeDataOnDevice(); - } - } + + // The first send/recv buffers are also on device + MPI_buffer_.partSend[0][0]->initializeDataOnDevice(); + MPI_buffer_.partSend[0][1]->initializeDataOnDevice(); + MPI_buffer_.partRecv[0][0]->initializeDataOnDevice(); + MPI_buffer_.partRecv[0][1]->initializeDataOnDevice(); // Create photon species on the device if( radiation_model_ == "mc" && photon_species_ ) { From 530529d6bc2b9b683de311df7af8e33332aa5d14 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Thu, 18 Apr 2024 21:39:27 +0200 Subject: [PATCH 09/28] fix analysis --- .../validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py index ee807d65b..da25c961c 100644 --- a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py +++ b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py @@ -247,7 +247,7 @@ def adaptive_error(values, statistics, thresholds): thresholds = {} thresholds["points"] = np.array([0. ,10 ,100,1000]) -thresholds["factor"] = np.array([1e9, 1.,0.5, 0.2]) +thresholds["factor"] = np.array([1e9, 1.,0.6, 0.2]) Validate("Average gamma for the electrons vs time", average_gamma["electron"], adaptive_error(average_gamma["electron"], Nelectron, thresholds)) Validate("Average gamma for the positrons vs time", average_gamma["positron"], adaptive_error(average_gamma["positron"], Npositron, thresholds)) From ff0266ec8be29aee51ce53e63c9b7d78d821f261 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Fri, 19 Apr 2024 10:20:49 +0200 Subject: [PATCH 10/28] more --- .../validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py index da25c961c..8d5b8ddb1 100644 --- a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py +++ b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py @@ -247,7 +247,7 @@ def adaptive_error(values, statistics, thresholds): thresholds = {} thresholds["points"] = np.array([0. ,10 ,100,1000]) -thresholds["factor"] = np.array([1e9, 1.,0.6, 0.2]) +thresholds["factor"] = np.array([1e9, 1.,0.7, 0.2]) Validate("Average gamma for the electrons vs time", average_gamma["electron"], adaptive_error(average_gamma["electron"], Nelectron, thresholds)) Validate("Average gamma for the positrons vs time", average_gamma["positron"], adaptive_error(average_gamma["positron"], Npositron, thresholds)) From 348faa03cf42084839984b61870942c6a9af05d1 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Fri, 26 Apr 2024 00:08:51 +0200 Subject: [PATCH 11/28] fix particle exchange --- src/Particles/Particles.cpp | 4 ++-- src/Particles/Particles.h | 4 ++-- src/Particles/nvidiaParticles.cu | 14 ++++++++++---- src/Particles/nvidiaParticles.h | 2 +- src/Patch/Patch.cpp | 4 ++-- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp index 688c53085..34eaeb161 100755 --- a/src/Particles/Particles.cpp +++ b/src/Particles/Particles.cpp @@ -1299,13 +1299,13 @@ void Particles::copyFromHostToDevice() { ERROR( "Device only feature, should not have come here!" ); } -void Particles::copyFromDeviceToHost() +void Particles::copyFromDeviceToHost( bool ) { ERROR( "Device only feature, should not have come here!" ); } // Loop all particles and copy the outgoing ones to buffers -void Particles::copyLeavingParticlesToBuffers( const bool copy[], Particles* buffer[] ) +void Particles::copyLeavingParticlesToBuffers( const vector copy, const vector buffer ) { // Leaving particles have a cell_key equal to -2-direction // where direction goes from 0 to 6 and tells which way the particle escapes. diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h index 86f9f9cac..c0e5958e3 100755 --- a/src/Particles/Particles.h +++ b/src/Particles/Particles.h @@ -435,7 +435,7 @@ class Particles virtual void initializeDataOnDevice(); virtual void initializeIDsOnDevice(); virtual void copyFromHostToDevice(); - virtual void copyFromDeviceToHost(); + virtual void copyFromDeviceToHost( bool copy_keys = false ); //! Return the pointer toward the Position[idim] vector virtual double* getPtrPosition( int idim ) { @@ -475,7 +475,7 @@ class Particles // ----------------------------------------------------------------------------- //! Extract particles leaving the box to buffers // ----------------------------------------------------------------------------- - void copyLeavingParticlesToBuffers( const bool copy[], Particles* buffer[] ); + void copyLeavingParticlesToBuffers( const std::vector copy, const std::vector buffer ); virtual void copyLeavingParticlesToBuffer( Particles* buffer ); // ----------------------------------------------------------------------------- diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu index efca22ad5..af45bfadd 100644 --- a/src/Particles/nvidiaParticles.cu +++ b/src/Particles/nvidiaParticles.cu @@ -1348,7 +1348,7 @@ void nvidiaParticles::copyFromHostToDevice() // ------------------------------------------------------------------------------------------------- //! Copy device to host // ------------------------------------------------------------------------------------------------- -void nvidiaParticles::copyFromDeviceToHost() +void nvidiaParticles::copyFromDeviceToHost( bool copy_keys ) { for (int idim=0;idimcopyFromDeviceToHost(); + buffer->copyFromDeviceToHost( true ); } @@ -1410,7 +1414,8 @@ void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pre nvidia_momentum_[1].begin(), nvidia_momentum_[2].begin(), nvidia_weight_.begin(), - nvidia_charge_.begin() ) ); + nvidia_charge_.begin(), + nvidia_cell_keys_.begin() ) ); const auto source_iterator_last = source_iterator_first + nparts; // std::advance nvidiaParticles* const cp_parts = static_cast( buffer ); @@ -1428,7 +1433,8 @@ void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pre cp_parts->nvidia_momentum_[1].begin(), cp_parts->nvidia_momentum_[2].begin(), cp_parts->nvidia_weight_.begin(), - cp_parts->nvidia_charge_.begin() ) ); + cp_parts->nvidia_charge_.begin(), + cp_parts->nvidia_cell_keys_.begin() ) ); // Copy send particles in dedicated data structure thrust::copy_if( thrust::device, diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h index ba689f1e8..5fa0a933b 100644 --- a/src/Particles/nvidiaParticles.h +++ b/src/Particles/nvidiaParticles.h @@ -78,7 +78,7 @@ class nvidiaParticles : public Particles void copyFromHostToDevice() override; //! Update the particles from device to host - void copyFromDeviceToHost() override; + void copyFromDeviceToHost( bool copy_keys = false ) override; unsigned int deviceCapacity() const override; diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index 585f76f97..8fa4022aa 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -540,8 +540,8 @@ void Patch::copyExchParticlesToBuffers( int ispec, Params ¶ms ) cleanMPIBuffers( ispec, params ); // Make a list of buffers - bool copy[params.nDim_field*2]; - Particles* sendBuffer[params.nDim_field*2]; + vector copy( params.nDim_field*2, false ); + vector sendBuffer( params.nDim_field*2, nullptr ); for( size_t iDim = 0; iDim < params.nDim_field; iDim++ ) { copy[2*iDim+0] = neighbor_[iDim][0] != MPI_PROC_NULL; copy[2*iDim+1] = neighbor_[iDim][1] != MPI_PROC_NULL; From 4b2f6487debd5d5290f29ef9a4583981ccd19cf2 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Fri, 26 Apr 2024 00:12:42 +0200 Subject: [PATCH 12/28] make happi working with virtualenv --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index 3aaff0201..4c9ada85b 100755 --- a/makefile +++ b/makefile @@ -52,7 +52,7 @@ DIRS := $(shell find src -type d) SRCS := $(shell find src/* -name \*.cpp) OBJS := $(addprefix $(BUILD_DIR)/, $(SRCS:.cpp=.o)) DEPS := $(addprefix $(BUILD_DIR)/, $(SRCS:.cpp=.d)) -SITEDIR = $(shell $(PYTHONEXE) -c 'import site; site._script()' --user-site) +SITEDIR = $(shell d=`$(PYTHONEXE) -m site --user-site` && echo $$d || $(PYTHONEXE) -c "import sysconfig; print(sysconfig.get_path('purelib'))") # Smilei tools TABLES_DIR := tools/tables From 8bcaeb4790678eb695a143d10d9ff3956269d820 Mon Sep 17 00:00:00 2001 From: Francesco Massimo Date: Mon, 29 Apr 2024 06:54:32 +0200 Subject: [PATCH 13/28] add publication --- doc/Sphinx/Overview/material.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index 3322c2857..048d8e1b5 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of April 2024, 181 papers have been published covering a broad range of topics: +As of April 2024, 182 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -50,6 +50,12 @@ Following is the distribution of these topics in the listed publications up to N Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here You can count the number of papers in the list with the vim command :%s/.. \[//gn. +.. [Yao2024] + + W. Yao, M. Nakatsutsumi, S. Buffechoux, P. Antici, M. Borghesi, A. Ciardi, S. N. Chen, E. d’Humières, L. Gremillet, R. Heathcote, V. Horný, P. McKenna, M. N. Quinn, L. Romagnani, R. Royle, G. Sarri, Y. Sentoku, H.-P. Schlenvoigt, T. Toncian, O. Tresca, L. Vassura, O. Willi, J. Fuchs, + `Optimizing laser coupling, matter heating, and particle acceleration from solids using multiplexed ultraintense lasers`, + `Matter and Radiation at Extremes 9, 047202 (2024) `_ + .. [Luo2024] M. Luo, C. Riconda, I. Pusztai, A. Grassi, J. S. Wurtele, and T. Fülöp, From 27dd743d2f95a8e7c8db4acdb4d835809dd9c1b0 Mon Sep 17 00:00:00 2001 From: Francesco Massimo Date: Thu, 2 May 2024 13:45:00 +0200 Subject: [PATCH 14/28] add publication --- doc/Sphinx/Overview/material.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index 048d8e1b5..9e6e17daf 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of April 2024, 182 papers have been published covering a broad range of topics: +As of April 2024, 183 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -50,6 +50,12 @@ Following is the distribution of these topics in the listed publications up to N Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here You can count the number of papers in the list with the vim command :%s/.. \[//gn. +.. [Pan2024] + + Z. Pan, J. Liu, P. Wang, Z. Mei, Z. Cao, D. Kong, S. Xu, Z. Liu, Y. Liang, Z. Peng, T. Xu, T. Song, X. Chen, Q. Wu, Y. Zhang, Q. Han, H. Chen, J. Zhao, Y. Gao, S. Chen, Y. Zhao, X. Yan, Y. Shou, W. Ma, + `Electron acceleration and x-ray generation from near-critical-density carbon nanotube foams driven by moderately relativistic lasers`, + `Physics of Plasmas 31, 043108 (2024) `_ + .. [Yao2024] W. Yao, M. Nakatsutsumi, S. Buffechoux, P. Antici, M. Borghesi, A. Ciardi, S. N. Chen, E. d’Humières, L. Gremillet, R. Heathcote, V. Horný, P. McKenna, M. N. Quinn, L. Romagnani, R. Royle, G. Sarri, Y. Sentoku, H.-P. Schlenvoigt, T. Toncian, O. Tresca, L. Vassura, O. Willi, J. Fuchs, From 8a6b4a82115099c145c39af4d1daeefa812376b9 Mon Sep 17 00:00:00 2001 From: Francesco Massimo Date: Thu, 9 May 2024 07:42:20 +0200 Subject: [PATCH 15/28] add article --- doc/Sphinx/Overview/material.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index 9e6e17daf..d0446f1ce 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of April 2024, 183 papers have been published covering a broad range of topics: +As of May 2024, 184 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -50,6 +50,13 @@ Following is the distribution of these topics in the listed publications up to N Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here You can count the number of papers in the list with the vim command :%s/.. \[//gn. + +.. [Azamoum2024] + + Y. Azamoum, G. A. Becker, S. Keppler, G. Duchateau, S. Skupin, M. Grech, F. Catoire, S. Hell, I. Tamer, M. Hornung, M. Hellwing, A. Kessler, F. Schorcht, and M. C. Kaluza, + `Optical probing of ultrafast laser-induced solid-to-overdense-plasma transitions`, + `Light: Science & Applications volume 13, Article number: 109 (2024) `_ + .. [Pan2024] Z. Pan, J. Liu, P. Wang, Z. Mei, Z. Cao, D. Kong, S. Xu, Z. Liu, Y. Liang, Z. Peng, T. Xu, T. Song, X. Chen, Q. Wu, Y. Zhang, Q. Han, H. Chen, J. Zhao, Y. Gao, S. Chen, Y. Zhao, X. Yan, Y. Shou, W. Ma, From 8447b754bc834a65cb1ab44c34117c658f324a11 Mon Sep 17 00:00:00 2001 From: Francesco Massimo Date: Sat, 11 May 2024 12:14:24 +0200 Subject: [PATCH 16/28] add publication --- doc/Sphinx/Overview/material.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index d0446f1ce..61fa240c7 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of May 2024, 184 papers have been published covering a broad range of topics: +As of May 2024, 185 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -55,7 +55,7 @@ Following is the distribution of these topics in the listed publications up to N Y. Azamoum, G. A. Becker, S. Keppler, G. Duchateau, S. Skupin, M. Grech, F. Catoire, S. Hell, I. Tamer, M. Hornung, M. Hellwing, A. Kessler, F. Schorcht, and M. C. Kaluza, `Optical probing of ultrafast laser-induced solid-to-overdense-plasma transitions`, - `Light: Science & Applications volume 13, Article number: 109 (2024) `_ + `Light: Science & Applications 13, 109 (2024) `_ .. [Pan2024] @@ -146,7 +146,13 @@ Following is the distribution of these topics in the listed publications up to N A. Seidel, B. Lei, C. Zepter, M. C. Kaluza, A. Sävert, M. Zepf, and D. Seipt, `Polarization and CEP dependence of the transverse phase space in laser driven accelerators`, `Physical Review Research 6, 013056 (2024) `_ - + +.. [Krishnamurthy2023] + + S. Krishnamurthy, S. Chintalwad, A. P. L. Robinson, R. M. G. M. Trines, and B. Ramakrishna, + `Observation of proton modulations in laser–solid interaction`, + `Plasma Physics and Controlled Fusion 65 085020 (2023) `_ + .. [Gao2023b] X. Gao, From 47e30b4b2ba663aabd546e389f1fb9d985b38a9b Mon Sep 17 00:00:00 2001 From: Francesco Massimo Date: Mon, 13 May 2024 22:25:14 +0200 Subject: [PATCH 17/28] add publication --- doc/Sphinx/Overview/material.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index 61fa240c7..04973edbf 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of May 2024, 185 papers have been published covering a broad range of topics: +As of May 2024, 186 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -51,6 +51,12 @@ Following is the distribution of these topics in the listed publications up to N You can count the number of papers in the list with the vim command :%s/.. \[//gn. +.. [Timmis2024] + + R. J. L. Timmis, R. W. Paddock, I. Ouatu, J. Lee, S. Howard, E. Atonga, R. T. Ruskov, H. Martin, R. H. W. Wang, R. Aboushelbaya, M. W. von der Leyen, E. Gumbrell and P. A. Norreys, + `Attosecond and nano‐Coulomb electron bunches via the Zero Vector Potential mechanism`, + `Scientific Reports volume 14, 10805 (2024) `_ + .. [Azamoum2024] Y. Azamoum, G. A. Becker, S. Keppler, G. Duchateau, S. Skupin, M. Grech, F. Catoire, S. Hell, I. Tamer, M. Hornung, M. Hellwing, A. Kessler, F. Schorcht, and M. C. Kaluza, From 18f1e1c120143504865619c06e937435f43a53b4 Mon Sep 17 00:00:00 2001 From: Arnaud Beck Date: Tue, 14 May 2024 17:25:22 +0200 Subject: [PATCH 18/28] Typo in deprecated error message --- src/Python/pyprofiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Python/pyprofiles.py b/src/Python/pyprofiles.py index 0e122a1a9..2fff14c1f 100755 --- a/src/Python/pyprofiles.py +++ b/src/Python/pyprofiles.py @@ -702,7 +702,7 @@ def LaserGaussianAM( box_side="xmin", a0=1., omega=1., focus=None, waist=3., print("ERROR: focus should be a list of length 1") exit(1) elif (len(focus)==2): - print("WARNING: deprecated focus in LaserEnvelopeGaussianAM should be a list of length 1") + print("WARNING: deprecated focus in LaserGaussianAM should be a list of length 1") # Polarization and amplitude [dephasing, amplitudeY, amplitudeZ] = transformPolarization(polarization_phi, ellipticity) amplitudeY *= a0 * omega From 5b60a4d771e3ba59e9aba251a5110a0d2366bfb9 Mon Sep 17 00:00:00 2001 From: Francesco Massimo Date: Sun, 19 May 2024 10:49:22 +0200 Subject: [PATCH 19/28] add article, use extended journal names for each article --- doc/Sphinx/Overview/material.rst | 197 ++++++++++++++++--------------- 1 file changed, 101 insertions(+), 96 deletions(-) diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index 04973edbf..2d33d6aff 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of May 2024, 186 papers have been published covering a broad range of topics: +As of May 2024, 187 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -50,7 +50,12 @@ Following is the distribution of these topics in the listed publications up to N Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here You can count the number of papers in the list with the vim command :%s/.. \[//gn. +.. [Ivanov2024] + K. A. Ivanov, D. A. Gorlova, I. N. Tsymbalov, I. P. Tsygvintsev, S. A. Shulyapov, R. V. Volkov, and A. B. Savel’ev, + `Laser-driven pointed acceleration of electrons with preformed plasma lens`, + `Physical Review Accelerators and Beams 27, 051301 (2024) `_ + .. [Timmis2024] R. J. L. Timmis, R. W. Paddock, I. Ouatu, J. Lee, S. Howard, E. Atonga, R. T. Ruskov, H. Martin, R. H. W. Wang, R. Aboushelbaya, M. W. von der Leyen, E. Gumbrell and P. A. Norreys, @@ -79,7 +84,7 @@ Following is the distribution of these topics in the listed publications up to N M. Luo, C. Riconda, I. Pusztai, A. Grassi, J. S. Wurtele, and T. Fülöp, `Control of autoresonant plasma beat-wave wakefield excitation`, - `Phys. Rev. Research 6, 013338 (2024) `_ + `Physical Review Research 6, 013338 (2024) `_ .. [Krafft2024] @@ -247,7 +252,7 @@ Following is the distribution of these topics in the listed publications up to N E. Starodubtseva, I. Tsymbalov, D. Gorlova, K. Ivanov, and A. Savel'ev, `Low energy electron injection for direct laser acceleration`, - `Phys. Plasmas 30, 083105 (2023) `_ + `Physics of Plasmas 30, 083105 (2023) `_ .. [Maffini2023] @@ -259,7 +264,7 @@ Following is the distribution of these topics in the listed publications up to N S. Yu. Gus'kov, Ph. Korneev, and M. Murakami, `Laser-driven electrodynamic implosion of fast ions in a thin shell`, - `Matter Radiat. Extremes 8, 056602 (2023) `_ + `Matter and Radiation at Extremes 8, 056602 (2023) `_ .. [RezaeiPandari2023] @@ -271,19 +276,19 @@ Following is the distribution of these topics in the listed publications up to N J. Jonnerby, A. von Boetticher, J. Holloway, L. Corner, A. Picksley, A. J. Ross, R. J. Shalloo , C. Thornton, N. Bourgeois, R. Walczak, and S. M. Hooker, `Measurement of the decay of laser-driven linear plasma wakefields`, - `Phys. Rev. E 108, 055211 (2023) `_ + `Physical Review E 108, 055211 (2023) `_ .. [Drobniak2023] P. Drobniak, E. Baynard, C. Bruni, K. Cassou, C. Guyot, G. Kane, S. Kazamias, V. Kubytskyi, N. Lericheux, B. Lucas, M. Pittman, F. Massimo, A. Beck, A. Specka, P. Nghiem, and D. Minenna, `Random scan optimization of a laser-plasma electron injector based on fast particle-in-cell simulations`, - `Phys. Rev. Accel. Beams 26, 091302 (2023) `_ + `Physical Review Accelerators and Beams 26, 091302 (2023) `_ .. [Bukharskii2023] N. Bukharskii and Ph. Korneev, `Intense widely controlled terahertz radiation from laser-driven wires`, - `Matter Radiat. Extremes 8, 044401 (2023) `_ + `Matter and Radiation at Extremes 8, 044401 (2023) `_ .. [Schmitz2023] @@ -307,7 +312,7 @@ Following is the distribution of these topics in the listed publications up to N X. Gao, `Ionization dynamics of sub-micrometer-sized clusters in intense ultrafast laser pulses`, - `Phys. Plasmas 30, 052102 (2023) `_ + `Physics of Plasmas 30, 052102 (2023) `_ .. [Krafft2023] @@ -325,7 +330,7 @@ Following is the distribution of these topics in the listed publications up to N A. Ghizzo, D. Del Sarto, and H. Betar, `Collisionless Heating Driven by Vlasov Filamentation in a Counterstreaming Beams Configuration`, - `Phys. Rev. Lett. 131, 035101 (2023) `_ + `Physical Review Letters 131, 035101 (2023) `_ .. [Yang2023] @@ -337,31 +342,31 @@ Following is the distribution of these topics in the listed publications up to N W. Yao, A. Fazzini, S.N. Chen, K. Burdonov, J. Béard, M. Borghesi, A. Ciardi, M. Miceli, S. Orlando, X. Ribeyre, E. d'Humières and J. Fuchs, `Investigating particle acceleration dynamics in interpenetrating magnetized collisionless super-critical shocks`, - `J. Plasma Phys. 89, 915890101 (2023) `_ + `Journal of Plasma Physics 89, 915890101 (2023) `_ .. [Pak2023] T. Pak, M. Rezaei-Pandari, S. B. Kim, G. Lee, D. H. Wi, C. I. Hojbota, M. Mirzaie, H. Kim, J. H. Sung, S. K. Lee, C. Kang and K.-Y. Kim, `Multi-millijoule terahertz emission from laser-wakefield-accelerated electrons`, - `Light Sci Appl 12, 37 (2023) `_ + `Light: Science and Applications 12, 37 (2023) `_ .. [Istokskaia2023] V. Istokskaia, M. Tosca, L. Giuffrida, J. Psikal, F. Grepl, V. Kantarelou, S. Stancek, S. Di Siena, A. Hadjikyriacou, A. McIlvenny, Y. Levy, J. Huynh, M. Cimrman, P. Pleskunov, D. Nikitin, A. Choukourov, F. Belloni, A. Picciotto, S. Kar, M. Borghesi, A. Lucianetti, T. Mocek and D. Margarone, `A multi-MeV alpha particle source via proton-boron fusion driven by a 10-GW tabletop laser`, - `Commun Phys 6, 27 (2023) `_ + `Communications Physics 6, 27 (2023) `_ .. [Yoon2023] Y. D. Yoon, D. E. Wendel and G. S. Yun, `Equilibrium selection via current sheet relaxation and guide field amplification`, - `Nat Commun 14, 139 (2023) `_ + `Nature Communications 14, 139 (2023) `_ .. [Galbiati2023] M. Galbiati, A. Formenti, M. Grech and M. Passoni, `Numerical investigation of non-linear inverse Compton scattering in double-layer targets`, - `Front. Phys. 11, fphy.2023.1117543 (2023) `_ + `Frontiers in Physics 11, fphy.2023.1117543 (2023) `_ .. [Sakai2023] @@ -373,7 +378,7 @@ Following is the distribution of these topics in the listed publications up to N A. Golovanov, I. Yu. Kostyukov, A. Pukhov and V. Malka, `Energy-Conserving Theory of the Blowout Regime of Plasma Wakefield`, - `Phys. Rev. Lett. 130, 105001 (2023) `_ + `Physical Review Letters 130, 105001 (2023) `_ .. [Miethlinger2023] @@ -385,13 +390,13 @@ Following is the distribution of these topics in the listed publications up to N C. Zepter, A. Seidel, M. Zepf, M. C. Kaluza and A. Sävert, `Role of spatiotemporal couplings in stimulated Raman side scattering`, - `Phys. Rev. Research 5, L012023 (2023) `_ + `Physical Review Research 5, L012023 (2023) `_ .. [Marini2023] S. Marini, M. Grech, P. S. Kleij, M. Raynaud and C. Riconda, `Electron acceleration by laser plasma wedge interaction`, - `Phys. Rev. Research 5, 013115 (2023) `_ + `Physical Review Research 5, 013115 (2023) `_ .. [Blackman2022] @@ -451,7 +456,7 @@ Following is the distribution of these topics in the listed publications up to N D. Margarone, J. Bonvalet, L. Giuffrida, A. Morace, V. Kantarelou, M. Tosca, D. Raffestin, P. Nicolai, A. Picciotto, Y. Abe, Y. Arikawa, S. Fujioka, Y. Fukuda, Y. Kuramitsu, H. Habara and D. Batani, `In-Target Proton–Boron Nuclear Fusion Using a PW-Class Laser`, - `Appl. Sci. 12(3), 1444 (2022) `_ + `Appled Sciences 12(3), 1444 (2022) `_ .. [Kochetkov2022] @@ -463,13 +468,13 @@ Following is the distribution of these topics in the listed publications up to N A. Oudin, A. Debayle, C. Ruyer, D. Benisti, `Cross-beam energy transfer between spatially smoothed laser beams`, - `Phys. Plasmas 29, 112112 (2022) `_ + `Physics of Plasmas 29, 112112 (2022) `_ .. [Chen2022] Q. Chen, D. Maslarova, J. Wang, S. Li, and D. Umstadter, `Injection of electron beams into two laser wakefields and generation of electron rings`, - `Phys. Rev. E 106, 055202 (2022) `_ + `Physical Review E 106, 055202 (2022) `_ .. [Kumar2022b] @@ -481,7 +486,7 @@ Following is the distribution of these topics in the listed publications up to N S. Kumar, D. K. Singh and H. K. Malik, `Comparative study of ultrashort single-pulse and multi-pulse driven laser wakefield acceleration`, - `Laser Phys. Lett. 20, 026001 (2022) `_ + `Laser Physics Letters 20, 026001 (2022) `_ .. [Miloshevsky2022] @@ -505,25 +510,25 @@ Following is the distribution of these topics in the listed publications up to N I. Ouatu, B. T. Spiers, R. Aboushelbaya, Q. Feng, M. W. von der Leyen, R. W. Paddock, R. Timmis, C. Ticos, K. M. Krushelnick and P. A. Norreys, `Ionization states for the multipetawatt laser-QED regime`, - `Phys. Rev. E 106, 015205 (2022) `_ + `Physical Review E 106, 015205 (2022) `_ .. [Beth2022] A. Beth, H. Gunell, C. Simon Wedlund, C. Goetz, H. Nilsson and M. Hamrin, `First investigation of the diamagnetic cavity boundary layer with a 1D3V PIC simulation`, - `A&A 667, A143 (2022) `_ + `Astronomy & Astrophysics 667, A143 (2022) `_ .. [Guo2022] Y. Guo, X. Geng, L. Ji, B. Shen and R. Li, `Improving the accuracy of hard photon emission by sigmoid sampling of the quantum-electrodynamic table in particle-in-cell Monte Carlo simulations`, - `Phys. Rev. E 105, 025309 (2022) `_ + `Physical Review E 105, 025309 (2022) `_ .. [Pae2022] K. . Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam, `Direct laser acceleration of electrons from a plasma mirror by an intense few-cycle Laguerre–Gaussian laser and its dependence on the carrier-envelope phase`, - `Plasma Phys. Control. Fusion 64, 055013 (2022) `_ + `Plasma Physics and Controlled Fusion 64, 055013 (2022) `_ .. [Zhang2022a] @@ -536,43 +541,43 @@ Following is the distribution of these topics in the listed publications up to N Q. Han, X. Geng, B. Shen, Z. Xu and L. Ji, `Ultra-fast polarization of a thin electron layer in the rotational standing-wave field driven by double ultra-intense laser pulses`, - `New J. Phys. 24, 063013 (2022) `_ + `New Journal of Physics 24, 063013 (2022) `_ .. [Gothel2022] I. Göthel, C. Bernert, M. Bussmann, M. Garten, T. Miethlinger, M. Rehwald, K. Zeil, T. Ziegler, T. E. Cowan, U. Schramm and T. Kluge, `Optimized laser ion acceleration at the relativistic critical density surface`, - `Plasma Phys. Control. Fusion 64, 044010 (2022) `_ + `Plasma Physics and Controlled Fusion 64, 044010 (2022) `_ .. [Fazzini2022] A. Fazzini, W. Yao, K. Burdonov, J. Béard, S. N. Chen, A. Ciardi, E. d’Humières, R. Diab, E. D. Filippov, S. Kisyov, V. Lelasseux, M. Miceli, Q. Moreno, S. Orlando, S. Pikuz, X. Ribeyre, M. Starodubtsev, R. Zemskov and J. Fuchs, `Particle energization in colliding subcritical collisionless shocks investigated in the laboratory`, - `A&A 665, A87 (2022) `_ + `Astronomy & Astrophysics 665, A87 (2022) `_ .. [Bykov2022] A. M. Bykov, S. M. Osipov and V. I. Romanskii, `Acceleration of Cosmic Rays to Energies above 1015 eV by Transrelativistic Shocks`, - `J. Exp. Theor. Phys. 134, 487-497 (2022) `_ + `Journal of Experimental and Theoretical Physics 134, 487-497 (2022) `_ .. [Sundstrom2022] A. Sundström, M. Grech, I. Pusztai and C. Riconda, `Stimulated-Raman-scattering amplification of attosecond XUV pulses with pulse-train pumps and application to local in-depth plasma-density measurement`, - `Phys. Rev. E 106, 045208 (2022) `_ + `Physical Review E 106, 045208 (2022) `_ .. [Krafft2022b] C. Krafft and P. Savoini, `Third and Fourth Harmonics of Electromagnetic Emissions by a Weak Beam in a Solar Wind Plasma with Random Density Fluctuations`, - `ApJL 934, L28 (2022) `_ + `The Astrophysical Journal Letters 934, L28 (2022) `_ .. [Krafft2022a] C. Krafft and P. Savoini, `Fundamental Electromagnetic Emissions by a Weak Electron Beam in Solar Wind Plasmas with Density Fluctuations`, - `ApJL 924, L24 (2022) `_ + `The Astrophysical Journal Letters 924, L24 (2022) `_ .. [Kong2022] @@ -584,7 +589,7 @@ Following is the distribution of these topics in the listed publications up to N C. Davidson, Z.-M. Sheng, T. Wilson and P. McKenna, `Theoretical and computational studies of the Weibel instability in several beam–plasma interaction configurations`, - `J. Plasma Phys. 88, 905880206 (2022) `_ + `Journal of Plasma Physics 88, 905880206 (2022) `_ .. [Glek2022] @@ -596,7 +601,7 @@ Following is the distribution of these topics in the listed publications up to N D. Umstadter `Controlled Injection of Electrons for Improved Performance of Laser-Wakefield Acceleration`, - `United States: N. p., (2022) `_ + `United States Department of Energy Technical Report (2022) `_ .. [Massimo2022] @@ -615,7 +620,7 @@ Following is the distribution of these topics in the listed publications up to N P. K. Singh, F.-Y. Li, C.-K. Huang, A. Moreau, R. Hollinger, A. Junghans, A. Favalli, C. Calvi, S. Wang, Y. Wang, H. Song, J. J. Rocca, R. E. Reinovsky and S. Palaniyappan, `Vacuum laser acceleration of super-ponderomotive electrons using relativistic transparency injection`, - `Nat Commun 13, 54 (2022) `_ + `Nature Communications 13, 54 (2022) `_ .. [Lobet2022] @@ -646,13 +651,13 @@ Following is the distribution of these topics in the listed publications up to N P. Tomassini, F. Massimo, L. Labate and L. A. Gizzi, `Accurate electron beam phase-space theory for ionization-injection schemes driven by laser pulses`, - `High Pow Laser Sci Eng 10, e15 (2021) `_ + `High Power Laser Science and Engineering 10, e15 (2021) `_ .. [Meinhold2021] T. A. Meinhold and N. Kumar, `Radiation pressure acceleration of protons from structured thin-foil targets`, - `J. Plasma Phys. 87, 905870607 (2021) `_ + `Journal of Plasma Physics 87, 905870607 (2021) `_ .. [Bonvalet2021b] @@ -664,13 +669,13 @@ Following is the distribution of these topics in the listed publications up to N Y. Shi, D. R. Blackman and A. Arefiev, `Electron acceleration using twisted laser wavefronts`, - `Plasma Phys. Control. Fusion 63, 125032 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 125032 (2021) `_ .. [Kumar2021] N. Kumar and B. Reville, `Nonthermal Particle Acceleration at Highly Oblique Nonrelativistic Shocks`, - `ApJL 921, L14 (2021) `_ + `The Astrophysical Journal Letters 921, L14 (2021) `_ .. [Ghaith2021] @@ -682,13 +687,13 @@ Following is the distribution of these topics in the listed publications up to N V. Horný and L. Veisz, `Generation of single attosecond relativistic electron bunch from intense laser interaction with a nanosphere`, - `Plasma Phys. Control. Fusion 63, 125025 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 125025 (2021) `_ .. [Krafft2021] C. Krafft and P. Savoini, `Second Harmonic Electromagnetic Emissions by an Electron Beam in Solar Wind Plasmas with Density Fluctuations`, - `ApJL 917, L23 (2021) `_ + `The Astrophysical Journal Letters 917, L23 (2021) `_ .. [Khalilzadeh2021c] @@ -712,7 +717,7 @@ Following is the distribution of these topics in the listed publications up to N Y. Shou, D. Wang, P. Wang, J. Liu, Z. Cao, Z. Mei, S. Xu, Z. Pan, D. Kong, G. Qi, Z. Liu, Y. Liang, Z. Peng, Y. Gao, S. Chen, J. Zhao, Y. Zhao, H. Xu, J. Zhao, Y. Wu, X. Yan and W. Ma, `High-efficiency generation of narrowband soft x rays from carbon nanotube foams irradiated by relativistic femtosecond lasers`, - `Opt. Lett. 46, 3969 (2021) `_ + `Optics Letters 46, 3969 (2021) `_ .. [Khalilzadeh2021b] @@ -724,67 +729,67 @@ Following is the distribution of these topics in the listed publications up to N H. Hosseinkhani, M. Pishdast, J. Yazdanpanah and S. A. Ghasemi, `Investigation of the classical and quantum radiation reaction effect on interaction of ultra high power laser with near critical plasma`, - `J. Nuclear Sci. Technol. 42, 27-35 (2021) `_ + `Journal of Nuclear Science, Engineering and Technology 42, 27-35 (2021) `_ .. [MercuriBaron2021] A. Mercuri-Baron, M. Grech, F. Niel, A. Grassi, M. Lobet, A. Di Piazza and C. Riconda, `Impact of the laser spatio-temporal shape on Breit–Wheeler pair production`, - `New J. Phys. 23, 085006 (2021) `_ + `New Journal of Physics 23, 085006 (2021) `_ .. [Peng2021] H. Peng, C. Riconda, S. Weber, C.T. Zhou and S.C. Ruan, `Frequency Conversion of Lasers in a Dynamic Plasma Grating`, - `Phys. Rev. Applied 15, 054053 (2021) `_ + `Physical Review Applied 15, 054053 (2021) `_ .. [Shi2021a] Y. Shi, D. Blackman, D. Stutman and A. Arefiev, `Generation of Ultrarelativistic Monoenergetic Electron Bunches via a Synergistic Interaction of Longitudinal Electric and Magnetic Fields of a Twisted Laser`, - `Phys. Rev. Lett. 126, 234801 (2021) `_ + `Physical Review Letters 126, 234801 (2021) `_ .. [Bonvalet2021a] J. Bonvalet, Ph. Nicolaï, D. Raffestin, E. D'humieres, D. Batani, V. Tikhonchuk, V. Kantarelou, L. Giuffrida, M. Tosca, G. Korn, A. Picciotto, A. Morace, Y. Abe, Y. Arikawa, S. Fujioka, Y. Fukuda, Y. Kuramitsu, H. Habara and D. Margarone, `Energetic α-particle sources produced through proton-boron reactions by high-energy high-intensity laser beams`, - `Phys. Rev. E 103, 053202 (2021) `_ + `Physical Review E 103, 053202 (2021) `_ .. [Shekhanov2021] S. A. Shekhanov and V. T. Tikhonchuk, `SRS-SBS competition and nonlinear laser energy absorption in a high temperature plasma`, - `Plasma Phys. Control. Fusion 63, 115016 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 115016 (2021) `_ .. [Psikal2021] J Psikal, `Laser-driven ion acceleration from near-critical Gaussian plasma density profile`, - `Plasma Phys. Control. Fusion 63, 064002 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 064002 (2021) `_ .. [Yoon2021b] Y. D. Yoon, G. S. Yun, D. E. Wendel and J. L. Burch, `Collisionless relaxation of a disequilibrated current sheet and implications for bifurcated structures`, - `Nat Commun 12, 3774 (2021) `_ + `Nature Communications 12, 3774 (2021) `_ .. [Lavorenti2021] F. Lavorenti, P. Henri, F. Califano, S. Aizawa and N. André, `Electron acceleration driven by the lower-hybrid-drift instability. An extended quasilinear model`, - `A&A 652, 202141049 (2021) `_ + `Astronomy & Astrophysics 652, 202141049 (2021) `_ .. [Golovanov2021] A A Golovanov, I Yu Kostyukov, L Reichwein, J Thomas and A Pukhov, `Excitation of strongly nonlinear plasma wakefield by electron bunches`, - `Plasma Phys. Control. Fusion 63, 085004 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 085004 (2021) `_ .. [Jirka2021] M. Jirka, P. Sasorov, S. S. Bulanov, G. Korn, B. Rus and S. V. Bulanov, `Reaching high laser intensity by a radiating electron`, - `Phys. Rev. A 103, 053114 (2021) `_ + `Physical Review A 103, 053114 (2021) `_ .. [Marques2021] @@ -814,7 +819,7 @@ Following is the distribution of these topics in the listed publications up to N G. Cantono, A. Permogorov, J. Ferri, E. Smetanina, A. Dmitriev, A. Persson, T. Fülöp and C.-G. Wahlström, `Laser-driven proton acceleration from ultrathin foils with nanoholes`, - `Sci Rep 11, 5006 (2021) `_ + `Scientific Reports 11, 5006 (2021) `_ .. [Perez2021] @@ -832,13 +837,13 @@ Following is the distribution of these topics in the listed publications up to N A. Sampath, X. Davoine, S. Corde, L. Gremillet, M. Gilljohann, M. Sangal, C. H. Keitel, R. Ariniello, J. Cary, H. Ekerfelt, C. Emma, F. Fiuza, H. Fujii, M. Hogan, C. Joshi, A. Knetsch, O. Kononenko, V. Lee, M. Litos, K. Marsh, Z. Nie, B. O’Shea, J. R. Peterson, P. San Miguel Claveria, D. Storey, Y. Wu, X. Xu, C. Zhang and M. Tamburini, `Extremely Dense Gamma-Ray Pulses in Electron Beam-Multifoil Collisions`, - `Phys. Rev. Lett. 126, 064801 (2021) `_ + `Physical Review Letters 126, 064801 (2021) `_ .. [Marini2021a] S. Marini, P. S. Kleij, F. Pisani, F. Amiranoff, M. Grech, A. Macchi, M. Raynaud and C. Riconda, `Ultrashort high energy electron bunches from tunable surface plasma waves driven with laser wavefront rotation`, - `Phys. Rev. E 103, L021201 (2021) `_ + `Physical Review E 103, L021201 (2021) `_ .. [Yao2021] @@ -850,14 +855,14 @@ Following is the distribution of these topics in the listed publications up to N E. G. Gelfer, A. M, Fedotov and S. Weber, `Radiation induced acceleration of ions in a laser irradiated transparent foil`, - `New J. Phys. 23, 095002 (2021) `_ + `New Journal of Physics 23, 095002 (2021) `_ `arXiv:1907.02621 `_ .. [Siminos2021] E. Siminos, I. Thiele and C. Olofsson, `Laser Wakefield Driven Generation of Isolated Carrier-Envelope-Phase Tunable Intense Subcycle Pulses`, - `Phys. Rev. Lett. 126, 044801 (2021) `_ + `Physical Review Letters 126, 044801 (2021) `_ `arXiv:1902.05014 `_ .. [Budriga2020] @@ -870,13 +875,13 @@ Following is the distribution of these topics in the listed publications up to N P. A. P. Nghiem, R. Assmann, A. Beck et al., `Toward a plasma-based accelerator at high beam energy with high beam charge and high beam quality`, - `Phys. Rev. Accel. Beams 23, 031301 (2020) `_ + `Physical Review Accelerators and Beams 23, 031301 (2020) `_ .. [Pisarczyk2020] T. Pisarczyk, M. Kalal, S. Yu. Gus'kov et al., `Hot electron retention in laser plasma created under terawatt subnanosecond irradiation of Cu targets`, - `Plasma Phys. Control. Fusion 62, 115020 (2020) `_ + `Plasma Physics and Controlled Fusion 62, 115020 (2020) `_ .. [Pagano2020] @@ -894,25 +899,25 @@ Following is the distribution of these topics in the listed publications up to N H. Peng, C. Riconda, M. Grech, C.-T. Zhou and S. Weber, `Dynamical aspects of plasma gratings driven by a static ponderomotive potential`, - `Plasma Phys. Control. Fusion 62, 115015 (2020) `_ + `Plasma Physics and Controlled Fusion 62, 115015 (2020) `_ .. [Glek2020] P. B. Glek, A. A. Voronin, V. Ya. Panchenko and A. M. Zheltikov, `Relativistic electron bunches locked to attosecond optical field waveforms: an attosecond light–matter bound state`, - `Laser Phys. Lett. 17 055401 (2020) `_ + `Laser Physics Letters 17 055401 (2020) `_ .. [Margarone2020] D. Margarone, A. Morace, J. Bonvalet et al., `Generation of α-Particle Beams With a Multi-kJ, Peta-Watt Class Laser System`, - `Front. Phys. 8, 343 (2020) `_ + `Frontiers in Physics 8, 343 (2020) `_ .. [Sinha2020] U. Sinha and N. Kumar, `Pair-beam propagation in a magnetized plasma for modeling the polarized radiation emission from gamma-ray bursts in laboratory astrophysics experiments`, - `Phys. Rev. E 101, 063204 (2020) `_ + `Physical Review E 101, 063204 (2020) `_ .. [Mitrofanov2020] @@ -924,81 +929,81 @@ Following is the distribution of these topics in the listed publications up to N B. T. Spiers, M. P. Hill, C. Brown, L. Ceurvorst, N. Ratan, A. F. Savin, P. Allan, E. Floyd, J. Fyrth, L. Hobbs, S. James, J. Luis, M. Ramsay, N. Sircombe, J. Skidmore, R. Aboushelbaya, M. W. Mayr, R. Paddock, R. H. W. Wang and P. A. Norreys, `Whole-beam self-focusing in fusion-relevant plasma`, - `Phil. Trans. R. Soc. A379, 20200159 `_ + `Philosophical Transactions of the Royal Society A379, 20200159 `_ .. [Derouillat2020] J. Derouillat and A. Beck, `Single Domain Multiple Decompositions for Particle-in-Cell simulations`, - `J. Phys.: Conf. Ser. 1596, 012052 (2020) `_ + `Journal of Physics: Conference Series 1596, 012052 (2020) `_ `arXiv:1912.04064 `_ .. [Zemzemi2020] I. Zemzemi, F. Massimo and A. Beck, `Azimuthal decomposition study of a realistic laser profile for efficient modeling of Laser WakeField Acceleration`, - `J. Phys.: Conf. Ser. 1596, 012055 (2020) `_ + `Journal of Physics: Conference Series 1596, 012055 (2020) `_ .. [Massimo2020b] F. Massimo, I. Zemzemi, A. Beck, J. Derouillat and A. Specka, `Efficient cylindrical envelope modeling for laser wakefield acceleration`, - `J. Phys.: Conf. Ser. 1596, 012054 (2020) `_ + `Journal of Physics: Conference Series 1596, 012054 (2020) `_ `arXiv:1912.04674 `_ .. [Massimo2020a] F. Massimo, A. Beck, J. Derouillat, I. Zemzemi and A. Specka, `Numerical modeling of laser tunneling ionization in particle-in-cell codes with a laser envelope model`, - `Phys. Rev. E 102, 033204 (2020) `_ + `Physical Review E 102, 033204 (2020) `_ `arXiv:2006.04433 `_ .. [Marcowith2020] A. Marcowith, G. Ferrand, M. Grech, Z. Meliani, I. Plotnikov and R. Walder, `Multi-scale simulations of particle acceleration in astrophysical systems`, - `Living Rev Comput Astrophys 6, 1 (2020) `_ + `Living Reviews in Computational Astrophysics 6, 1 (2020) `_ `arXiv:2002.09411 `_ .. [Dargent2020] J. Dargent, N. Aunai, B. Lavraud, S. Toledo‐Redondo and F. Califano, `Simulation of Plasmaspheric Plume Impact on Dayside Magnetic Reconnection`, - `Geophys. Res. Lett. 47, 2019GL086546 (2020) `_ + `Geophysical Research Letters 47, 2019GL086546 (2020) `_ `arXiv:2002.02243 `_ .. [Sundström2020b] A. Sundström, L. Gremillet, E. Siminos and I. Pusztai, `Collisional effects on the electrostatic shock dynamics in thin-foil targets driven by an ultraintense short pulse laser`, - `Plasma Phys. Control. Fusion 62, 085015 (2020) `_ + `Plasma Physics and Controlled Fusion 62, 085015 (2020) `_ .. [Sundström2020a] A. Sundström, L. Gremillet, E. Siminos and I. Pusztai, `Fast collisional electron heating and relaxation in thin foils driven by a circularly polarized ultraintense short-pulse laser`, - `J. Plasma Phys. 86, 755860201 (2020) `_ + `Journal of Plasma Physics 86, 755860201 (2020) `_ `arXiv:1911.09562 `_ .. [Gelfer2020] E. G. Gelfer, A. M. Fedotov, O. Klimo and S. Weber, `Absorption and opacity threshold for a thin foil in a strong circularly polarized laser field`, - `Phys. Rev. E 101, 033204 (2020) `_ + `Physical Review E 101, 033204 (2020) `_ `arXiv:1906.05902 `_ .. [Ferri2020] J. Ferri, I. Thiele, E. Siminos, L. Gremillet, E. Smetanina, A. Dmitriev, G. Cantono, C.-G. Wahlström and T. Fülöp, `Enhancement of laser-driven ion acceleration in non-periodic nanostructured targets`, - `J. Plasma Phys. 86, 905860101 (2020) `_ + `Journal of Plasma Physics 86, 905860101 (2020) `_ `arXiv:1905.11131 `_ .. [Marques2019] J.-R. Marquès, L. Lancia, T. Gangolf, M. Blecher, S. Bolaños, J. Fuchs, O. Willi, F. Amiranoff, R. L. Berger, M. Chiaramello, S. Weber, and C. Riconda, `Joule-Level High-Efficiency Energy Transfer to Subpicosecond Laser Pulses by a Plasma-Based Amplifier`, - `Phys. Rev. X 9, 021008 (2019) `_ + `Physical Review X 9, 021008 (2019) `_ .. [Plotnikov2019] I. Plotnikov and L. Sironi, @@ -1021,39 +1026,39 @@ Following is the distribution of these topics in the listed publications up to N X. S. Geng, L. L. Ji, B. F. Shen et al., `Quantum reflection above the classical radiation-reaction barrier in the quantum electro-dynamics regime`, - `Commun. Phys. 2, 66 (2019) `_ + `Communications Physics 2, 66 (2019) `_ .. [Sinha2019] U. Sinha, C. H. Keitel, and N. Kumar, `Polarized Light from the Transportation of a Matter-Antimatter Beam in a Plasma`, - `Phys. Rev. Lett. 122, 204801 (2019) `_ + `Physical Review Letters 122, 204801 (2019) `_ .. [Malko2019] S. Malko, X. Vaisseau, F. Perez, D. Batani, A. Curcio, M. Ehret, J. Honrubia, K. Jakubowska, A. Morace, J. J. Santos and L. Volpe, `Enhanced relativistic-electron beam collimation using two consecutive laser pulses`, - `Sci Rep 9, 14061 (2019) `_ + `Scientific Reports 9, 14061 (2019) `_ .. [Peng2019] H. Peng, C. Riconda, M. Grech, J.-Q. Su and S. Weber, `Nonlinear dynamics of laser-generated ion-plasma gratings: A unified description`, - `Phys. Rev. E 100, 061201 (2019) `_ + `Physical Review E 100, 061201 (2019) `_ `arXiv:1911.03440 `_ .. [Fang2019] J. Fang, C.-Y. Lu, J.-W. Yan and H. Yu, `Early acceleration of electrons and protons at the nonrelativistic quasiparallel shocks with different obliquity angles`, - `Res. Astron. Astrophys. 19, 182 (2019) `_ + `Research in Astronomy and Astrophysics 19, 182 (2019) `_ `arXiv:1908.08170 `_ .. [Yoon2019b] Y. Yoon and P. M. Bellan, `Kinetic Verification of the Stochastic Ion Heating Mechanism in Collisionless Magnetic Reconnection`, - `ApJ 887, L29 (2019) `_ + `The Astrophysical Journal Letters 887, L29 (2019) `_ .. [Yoon2019a] @@ -1065,7 +1070,7 @@ Following is the distribution of these topics in the listed publications up to N F. Massimo, A. Beck, J. Derouillat, M. Grech, M. Lobet, F. Pérez, I. Zemzemi and A Specka, `Efficient start-to-end 3D envelope modeling for two-stage laser wakefield acceleration experiments`, - `Plasma Phys. Control. Fusion 61, 124001 (2019) `_ + `Plasma Physics and Controlled Fusion 61, 124001 (2019) `_ `arXiv:1912.04127 `_ .. [Beck2019] @@ -1079,14 +1084,14 @@ Following is the distribution of these topics in the listed publications up to N F. Pérez and M. Grech, `Oblique-incidence, arbitrary-profile wave injection for electromagnetic simulations`, - `Phys. Rev. E 99, 033307 (2019) `_ + `Physical Review E 99, 033307 (2019) `_ `arXiv:1809.04435 `_ .. [Thiele2019] I. Thiele, E. Siminos and T. Fülöp, `Electron Beam Driven Generation of Frequency-Tunable Isolated Relativistic Subcycle Pulses`, - `Phys. Rev. Lett. 122, 104803 (2019) `_ + `Physical Review Letters 122, 104803 (2019) `_ `arXiv:1806.04976 `_ .. [Golovanov2018] @@ -1099,19 +1104,19 @@ Following is the distribution of these topics in the listed publications up to N S. Toledo-Redondo, J. Dargent, N. Aunai, B. Lavraud, M. André, W. Li, B. Giles, P.-A. Lindvist, R. E. Ergun, C. T. Russel and J. L. Burch, `Perpendicular Current Reduction Caused by Cold Ions of Ionospheric Origin in Magnetic Reconnection at the Magnetopause: Particle-in-Cell Simulations and Spacecraft Observations`, - `Geophys. Res. Lett. 45, 10,033 (2018) `_ + `Geophysical Research Letters 45, 10,033 (2018) `_ .. [Gelfer2018] E. Gelfer, N. Elkina and A. Fedotov, `Unexpected impact of radiation friction: enhancing production of longitudinal plasma waves`, - `Sci. Rep. 8, 6478 (2018) `_ + `Scientific Reports 8, 6478 (2018) `_ .. [Niel2018b] F. Niel, C. Riconda, F. Amiranoff, M. Lobet, J. Derouillat, F. Pérez, T. Vinci and M. Grech, `From quantum to classical modeling of radiation reaction: a focus on the radiation spectrum`, - `Plasma Phys. Control. Fusion 60, 094002 (2018) `_ + `Plasma Physics and Controlled Fusion 60, 094002 (2018) `_ `arXiv:1802.02927 `_ .. [Plotnikov2018] @@ -1125,21 +1130,21 @@ Following is the distribution of these topics in the listed publications up to N F. Niel, C. Riconda, F. Amiranoff, R. Duclous and M. Grech, `From quantum to classical modeling of radiation reaction: A focus on stochasticity effects`, - `Phys. Rev. E 97, 043209 (2018) `_ + `Physical Review E 97, 043209 (2018) `_ `arXiv:1707.02618 `_ .. [Grassi2017b] A. Grassi, M. Grech, F. Amiranoff, A. Macchi and C. Riconda, `Radiation-pressure-driven ion Weibel instability and collisionless shocks`, - `Phys. Rev. E 96, 033204 (2017) `_ + `Physical Review E 96, 033204 (2017) `_ `arXiv:1705.05402 `_ .. [Fedeli2017] L. Fedeli, A. Formenti, L. Cialfi, A. Sgattoni, G. Cantono and M. Passoni, `Structured targets for advanced laser-driven sources`, - `Plasma Phys. Control. Fusion 60, 014013 (2017) `_ + `Plasma Physics and Controlled Fusion 60, 014013 (2017) `_ .. [Golovanov2017] @@ -1151,19 +1156,19 @@ Following is the distribution of these topics in the listed publications up to N J. Dargent, N. Aunai, B. Lavraud, S. Toledo-Redondo, M. A. Shay, P. A. Cassak and K. Malakit, `Kinetic simulation of asymmetric magnetic reconnection with cold ions`, - `J. Geophys. Res. Space Physics 122, 5290-5306 (2017) `_ + `Journal of Geophysical Research: Space Physics 122, 5290-5306 (2017) `_ .. [Grassi2017a] A. Grassi, M. Grech, F. Amiranoff, F. Pegoraro, A. Macchi and C. Riconda, `Electron Weibel instability in relativistic counterstreaming plasmas with flow-aligned external magnetic fields`, - `Phys. Rev. E 95, 023203 (2017) `_ + `Physical Review E 95, 023203 (2017) `_ .. [Dargent2016] J. Dargent, N. Aunai, G. Belmont, N. Dorville, B. Lavraud and M. Hesse, `Full particle-in-cell simulations of kinetic equilibria and the role of the initial current sheet on steady asymmetric magnetic reconnection`, - `J. Plasma Phys. 82, 905820305 (2016) `_ + `Journal of Plasma Physics 82, 905820305 (2016) `_ .. [Chiaramello2016] @@ -1175,10 +1180,10 @@ Following is the distribution of these topics in the listed publications up to N A. Beck, J.T. Frederiksen and J. Dérouillat, `Load management strategy for Particle-In-Cell simulations in high energy particle acceleration`, - `Nucl. Inst. Meth. in Phys. Res. A 829, 418-421 (2016) `_ + `Nuclear Instuments and Methods in Physics Research A 829, 418-421 (2016) `_ .. [Lancia2016] L. Lancia, A. Giribono, L. Vassura, M. Chiaramello, C. Riconda, S. Weber, A. Castan, A. Chatelain, A. Frank, T. Gangolf, M. N. Quinn, J. Fuchs and J.-R. Marquès, `Signatures of the Self-Similar Regime of Strongly Coupled Stimulated Brillouin Scattering for Efficient Short Laser Pulse Amplification`, - `Phys. Rev. Lett. 116, 075001 (2016) `_ + `Physical Review Letters 116, 075001 (2016) `_ From 097422756966fccdf0630bb0c80e5a01d7c319a2 Mon Sep 17 00:00:00 2001 From: Francesco Massimo Date: Tue, 21 May 2024 09:33:43 +0200 Subject: [PATCH 20/28] add publication --- doc/Sphinx/Overview/material.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index 2d33d6aff..33184146f 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of May 2024, 187 papers have been published covering a broad range of topics: +As of May 2024, 188 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -50,6 +50,12 @@ Following is the distribution of these topics in the listed publications up to N Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here You can count the number of papers in the list with the vim command :%s/.. \[//gn. +.. [Salgado2024] + + F. C. Salgado, A. Kozan, D. Seipt, D. Hollatz, P. Hilz, M. Kaluza, A. Sävert, A. Seidel, D. Ullmann, Y. Zhao, and M. Zepf, + `All-optical source size and emittance measurements of laser-accelerated electron beams`, + `Physical Review Accelerators and Beams 27, 052803 (2024) `_ + .. [Ivanov2024] K. A. Ivanov, D. A. Gorlova, I. N. Tsymbalov, I. P. Tsygvintsev, S. A. Shulyapov, R. V. Volkov, and A. B. Savel’ev, From 3074a9fab7ecbd37ce74b6207f544daf0e6c2d9f Mon Sep 17 00:00:00 2001 From: Francesco Massimo Date: Fri, 24 May 2024 14:35:24 +0200 Subject: [PATCH 21/28] add publication --- doc/Sphinx/Overview/material.rst | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index 33184146f..66ed26180 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of May 2024, 188 papers have been published covering a broad range of topics: +As of May 2024, 189 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -50,6 +50,12 @@ Following is the distribution of these topics in the listed publications up to N Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here You can count the number of papers in the list with the vim command :%s/.. \[//gn. +.. [Krafft2024b] + + C. Krafft, P. Savoini, and F. J. Polanco-Rodríguez, + `Mechanisms of Fundamental Electromagnetic Wave Radiation in the Solar Wind`, + `The Astrophysical Journal Letters 967, 2 (2024) `_ + .. [Salgado2024] F. C. Salgado, A. Kozan, D. Seipt, D. Hollatz, P. Hilz, M. Kaluza, A. Sävert, A. Seidel, D. Ullmann, Y. Zhao, and M. Zepf, @@ -92,7 +98,7 @@ Following is the distribution of these topics in the listed publications up to N `Control of autoresonant plasma beat-wave wakefield excitation`, `Physical Review Research 6, 013338 (2024) `_ -.. [Krafft2024] +.. [Krafft2024a] C. Krafft and P. Savoini, `Electrostatic Wave Decay in the Randomly Inhomogeneous Solar Wind`, @@ -532,7 +538,7 @@ Following is the distribution of these topics in the listed publications up to N .. [Pae2022] - K. . Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam, + K. H. Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam, `Direct laser acceleration of electrons from a plasma mirror by an intense few-cycle Laguerre–Gaussian laser and its dependence on the carrier-envelope phase`, `Plasma Physics and Controlled Fusion 64, 055013 (2022) `_ @@ -769,7 +775,7 @@ Following is the distribution of these topics in the listed publications up to N .. [Psikal2021] - J Psikal, + J. Psikal, `Laser-driven ion acceleration from near-critical Gaussian plasma density profile`, `Plasma Physics and Controlled Fusion 63, 064002 (2021) `_ @@ -787,7 +793,7 @@ Following is the distribution of these topics in the listed publications up to N .. [Golovanov2021] - A A Golovanov, I Yu Kostyukov, L Reichwein, J Thomas and A Pukhov, + A. A. Golovanov, I. Y. Kostyukov, L. Reichwein, J. Thomas and A. Pukhov, `Excitation of strongly nonlinear plasma wakefield by electron bunches`, `Plasma Physics and Controlled Fusion 63, 085004 (2021) `_ From 1045fd21bda84117a21c6118f01dd23d36e0ce73 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Mon, 27 May 2024 09:38:48 +0200 Subject: [PATCH 22/28] Various small fixes --- doc/Sphinx/Use/namelist.rst | 8 ++++++-- happi/_Diagnostics/TrackParticles.py | 15 ++++++++++----- happi/_Utils.py | 6 +++++- src/SmileiMPI/AsyncMPIbuffers.h | 2 +- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/doc/Sphinx/Use/namelist.rst b/doc/Sphinx/Use/namelist.rst index f7deebcae..6c5eaf2be 100755 --- a/doc/Sphinx/Use/namelist.rst +++ b/doc/Sphinx/Use/namelist.rst @@ -1148,6 +1148,9 @@ Each species has to be defined in a ``Species`` block:: :ref:`tracking `. The available fields are ``"Ex"``, ``"Ey"``, ``"Ez"``, ``"Bx"``, ``"By"`` and ``"Bz"``. + Note that magnetic field components, as they originate from the interpolator, + are shifted by half a timestep compared to those from the *Fields* diagnostics. + Additionally, the work done by each component of the electric field is available as ``"Wx"``, ``"Wy"`` and ``"Wz"``. Contrary to the other interpolated fields, these quantities are accumulated over time. @@ -2716,7 +2719,8 @@ or several points arranged in a 2-D or 3-D grid. * **In "AMcylindrical" geometry**, probes are defined with 3D Cartesian coordinates and cannot be separated per mode. Use Field diagnostics for cylindrical coordinates and information per mode. - + * **Probes rely on the particle interpolator to compute fields** so that the + magnetic field is shifted by half a timestep compared to that of *Fields* diagnostics. To add one probe diagnostic, include the block ``DiagProbe``:: @@ -3343,7 +3347,7 @@ for instance:: def my_filter(particles): return (particles.px>-1.)*(particles.px<1.) + (particles.pz>3.) -.. Warning:: The ``px``, ``py`` and ``pz`` quantities are not exactly the momenta. +.. Note:: The ``px``, ``py`` and ``pz`` quantities are not exactly the momenta. They are actually the velocities multiplied by the lorentz factor, i.e., :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`. This is true only inside the ``filter`` function (not for the output of the diagnostic). diff --git a/happi/_Diagnostics/TrackParticles.py b/happi/_Diagnostics/TrackParticles.py index 253bb2958..0825eb0f3 100755 --- a/happi/_Diagnostics/TrackParticles.py +++ b/happi/_Diagnostics/TrackParticles.py @@ -447,8 +447,9 @@ def _orderFiles( self, fileOrdered, chunksize, sort ): for k, name in self._short_properties_from_raw.items(): if k not in group: continue ordered = self._np.empty((nparticles_to_write, ), dtype=group[k].dtype) - if k == "id": ordered.fill(0) - else : ordered.fill(self._np.nan) + if k == "id" : ordered.fill(0) + elif k == "charge": ordered.fill(9999) + else : ordered.fill(self._np.nan) ordered[locs] = group[k][()][selectedIndices] f0[name].write_direct(ordered, dest_sel=self._np.s_[it,:]) @@ -461,8 +462,9 @@ def _orderFiles( self, fileOrdered, chunksize, sort ): for first_o, last_o, npart_o in ChunkedRange(nparticles_to_write, chunksize): for k, name in self._short_properties_from_raw.items(): if k not in group: continue - if k == "id": data[k].fill(0) - else : data[k].fill(self._np.nan) + if k == "id" : data[k].fill(0) + elif k == "charge": data[k].fill(9999) + else : data[k].fill(self._np.nan) # Loop chunks of the input for first_i, last_i, npart_i in ChunkedRange(nparticles, chunksize): # Obtain IDs @@ -538,7 +540,10 @@ def _generateRawData(self, times=None): data[it,:] -= self._XmovedForTime[time] else: data = self._readUnstructuredH5(self._h5items[axis], self.selectedParticles, first_time, last_time) - data[deadParticles] = self._np.nan + if data.dtype == float: + data[deadParticles] = self._np.nan + else: + data[deadParticles] = 9999 self._rawData[axis] = data if self._verbose: print("Process broken lines ...") diff --git a/happi/_Utils.py b/happi/_Utils.py index 9fd35a757..28dd028df 100755 --- a/happi/_Utils.py +++ b/happi/_Utils.py @@ -398,7 +398,11 @@ def __init__(self, operation, QuantityTranslator, ureg): raise Exception("Quantity "+q+" not understood") # Calculate the total units and its inverse locals().update(self.imports) - units = eval("".join(basic_op)).units + units = eval("".join(basic_op)) + if isinstance(units, (int, float)): + units = ureg.Quantity(1) # dimensionless + else: + units = units.units self.translated_units = units.format_babel(locale="en") # Make the operation string self.translated_operation = "".join(full_op) diff --git a/src/SmileiMPI/AsyncMPIbuffers.h b/src/SmileiMPI/AsyncMPIbuffers.h index 7b3cf1fcc..90ba02fb1 100755 --- a/src/SmileiMPI/AsyncMPIbuffers.h +++ b/src/SmileiMPI/AsyncMPIbuffers.h @@ -17,7 +17,7 @@ class AsyncMPIbuffers AsyncMPIbuffers(); ~AsyncMPIbuffers(); - virtual void allocate( unsigned int nDim_field ); + void allocate( unsigned int nDim_field ); void defineTags( Patch *patch, SmileiMPI *smpi, int tag ) ; From a48d556b6dcffb9042342fa53efec27c9f53f33b Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Mon, 27 May 2024 12:44:49 +0200 Subject: [PATCH 23/28] fix many warnings --- makefile | 8 +- scripts/compile_tools/machine/adastra | 1 - scripts/compile_tools/machine/ruche_gpu2 | 2 +- src/Checkpoint/Checkpoint.cpp | 2 +- src/Diagnostic/DiagnosticProbes.cpp | 2 +- src/Diagnostic/DiagnosticScalar.cpp | 22 +-- src/Diagnostic/DiagnosticTrack.cpp | 2 +- src/ElectroMagn/ElectroMagn.cpp | 14 +- src/ElectroMagn/ElectroMagn.h | 2 +- src/ElectroMagn/ElectroMagn1D.cpp | 2 +- src/ElectroMagn/ElectroMagn1D.h | 2 +- src/ElectroMagn/ElectroMagn2D.cpp | 32 ++-- src/ElectroMagn/ElectroMagn2D.h | 2 +- src/ElectroMagn/ElectroMagn3D.cpp | 32 ++-- src/ElectroMagn/ElectroMagn3D.h | 2 +- src/ElectroMagn/ElectroMagnAM.cpp | 6 +- src/ElectroMagn/ElectroMagnAM.h | 2 +- src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp | 16 +- src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp | 26 +-- .../MA_Solver1D_Friedman.cpp | 2 +- src/ElectroMagnSolver/MA_Solver2D_norm.cpp | 18 +- src/ElectroMagnSolver/MA_Solver3D_norm.cpp | 18 +- src/ElectroMagnSolver/MF_Solver2D_Yee.cpp | 18 +- src/ElectroMagnSolver/MF_Solver3D_Yee.cpp | 18 +- .../PML_Solver2D_Envelope.cpp | 8 +- .../PML_SolverAM_Envelope.cpp | 9 +- ...PML_SolverAM_EnvelopeReducedDispersion.cpp | 7 +- src/Field/Field.cpp | 6 +- src/Field/Field.h | 8 +- src/Field/Field1D.cpp | 2 +- src/Field/Field1D.h | 2 +- src/Field/Field2D.cpp | 26 +-- src/Field/Field2D.h | 2 +- src/Field/Field3D.cpp | 38 ++-- src/Field/Field3D.h | 2 +- src/Field/cField.h | 2 +- src/Field/cField1D.cpp | 2 +- src/Field/cField1D.h | 2 +- src/Field/cField2D.cpp | 2 +- src/Field/cField2D.h | 2 +- src/Field/cField3D.cpp | 2 +- src/Field/cField3D.h | 2 +- src/Interpolator/Interpolator2D2Order.cpp | 10 +- src/Interpolator/Interpolator3D2Order.cpp | 12 +- src/Interpolator/Interpolator3D2Order.h | 2 +- src/MovWindow/SimWindow.cpp | 4 +- .../MultiphotonBreitWheeler.cpp | 26 +-- .../MultiphotonBreitWheeler.h | 2 +- .../MultiphotonBreitWheelerTables.h | 4 +- src/Params/Params.cpp | 34 ++-- src/Params/Params.h | 2 +- src/ParticleBC/BoundaryConditionType.cpp | 24 +-- src/ParticleBC/PartBoundCond.h | 2 +- src/Particles/Particles.cpp | 6 +- src/Particles/ParticlesFactory.cpp | 4 +- src/Patch/Patch.cpp | 4 +- src/Patch/Patch.h | 2 +- src/Patch/SyncVectorPatch.cpp | 60 +++---- src/Patch/SyncVectorPatch.h | 30 ++-- src/Patch/VectorPatch.cpp | 64 ++++--- src/Patch/VectorPatch.h | 2 +- src/Projector/Projector2D2OrderGPU.cpp | 62 ++++--- src/Projector/Projector2D2OrderGPU.h | 30 ++-- src/Projector/Projector2D2OrderGPUKernel.cpp | 2 +- .../Projector2D2OrderGPUKernelCUDAHIP.cu | 4 +- .../Projector2D2OrderGPUKernelCUDAHIP.h | 2 +- src/Projector/Projector3D2OrderGPU.cpp | 169 ++++++++++-------- src/Projector/Projector3D2OrderGPU.cpp.backup | 32 ++-- src/Projector/Projector3D2OrderGPU.h | 30 ++-- src/Projector/Projector3D2OrderGPUKernel.cpp | 2 +- src/Projector/Projector3D2OrderGPUKernelAcc.h | 26 +-- .../Projector3D2OrderGPUKernelCUDAHIP.cu | 2 +- .../Projector3D2OrderGPUKernelCUDAHIP.h | 2 +- .../Projector3D2OrderGPUKernelNaive.h | 6 +- src/Projector/ProjectorAM2OrderV.cpp | 4 - src/Projector/ProjectorFactory.h | 4 +- src/Pusher/PusherBoris.cpp | 2 +- src/Pusher/PusherBorisNR.cpp | 2 +- src/Pusher/PusherHigueraCary.cpp | 2 +- src/Pusher/PusherPhoton.cpp | 2 +- src/Pusher/PusherPonderomotiveBoris.cpp | 2 +- src/Pusher/PusherPonderomotiveBorisBTIS3.cpp | 1 - .../PusherPonderomotivePositionBoris.cpp | 2 +- src/Pusher/PusherVay.cpp | 2 +- src/Radiation/RadiationCorrLandauLifshitz.cpp | 12 +- src/Radiation/RadiationLandauLifshitz.cpp | 12 +- src/Radiation/RadiationMonteCarlo.cpp | 30 ++-- src/Radiation/RadiationMonteCarlo.h | 2 +- src/Radiation/RadiationNiel.cpp | 32 ++-- src/Radiation/RadiationNiel.h | 2 +- src/Radiation/RadiationTables.h | 14 +- src/Radiation/RadiationTools.h | 14 +- src/Radiation/Table.h | 2 +- src/Smilei.cpp | 16 +- src/SmileiMPI/SmileiMPI.cpp | 14 +- src/SmileiMPI/SmileiMPI.h | 6 +- src/Species/Species.cpp | 63 ++++--- src/Species/Species.h | 6 +- src/Tools/Pragma.h | 2 +- src/Tools/gpu.cpp | 26 +-- src/Tools/gpu.h | 2 +- src/Tools/gpuRandom.h | 22 ++- src/Tools/userFunctions.h | 4 +- 103 files changed, 689 insertions(+), 653 deletions(-) diff --git a/makefile b/makefile index d06dfaccc..277a2237d 100755 --- a/makefile +++ b/makefile @@ -202,9 +202,9 @@ endif ifneq (,$(call parse_config,gpu_nvidia)) override config += noopenmp # Prevent openmp for nvidia - CXXFLAGS += -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE + CXXFLAGS += -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OACC GPU_COMPILER ?= nvcc - GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE $(DIRS:%=-I%) + GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OACC $(DIRS:%=-I%) GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS) GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu) GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o)) @@ -214,9 +214,9 @@ endif # AMD GPUs ifneq (,$(call parse_config,gpu_amd)) - CXXFLAGS += -DSMILEI_ACCELERATOR_MODE + CXXFLAGS += -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OMP GPU_COMPILER ?= $(CC) - GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%) + GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OMP -std=c++14 $(DIRS:%=-I%) GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS) GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu) GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o)) diff --git a/scripts/compile_tools/machine/adastra b/scripts/compile_tools/machine/adastra index 7aab184ce..14c2a975a 100644 --- a/scripts/compile_tools/machine/adastra +++ b/scripts/compile_tools/machine/adastra @@ -85,7 +85,6 @@ ADASTRA_DEBUG_FLAGS := -g -ggdb $(ADASTRA_DEBUG_SANITIZER_FLAGS) -v # ifneq (,$(call parse_config,gpu_amd)) # When using OMP - ADASTRA_ACCELERATOR_GPU_OMP_DEFINE_FLAGS := -DSMILEI_ACCELERATOR_GPU_OMP=1 # ADASTRA_ACCELERATOR_GPU_TARGET := gfx908 # ADASTRA_ACCELERATOR_GPU_TARGET := gfx908:xnack- diff --git a/scripts/compile_tools/machine/ruche_gpu2 b/scripts/compile_tools/machine/ruche_gpu2 index a9406d60d..80cf09198 100644 --- a/scripts/compile_tools/machine/ruche_gpu2 +++ b/scripts/compile_tools/machine/ruche_gpu2 @@ -26,7 +26,7 @@ GPU_COMPILER_FLAGS += -arch=sm_80 #sm_89 # first compile completely with sm_80 t CXXFLAGS += -Minfo=accel # what is offloaded/copied # CXXFLAGS += -Minfo=all # very verbose output -# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE' +# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_ACCELERATOR_GPU_OACC' # CXXFLAGS += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP # GPU_COMPILER_FLAGS += -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ? # LDFLAGS += -mp=gpu diff --git a/src/Checkpoint/Checkpoint.cpp b/src/Checkpoint/Checkpoint.cpp index 13c3d28a5..943840cb9 100755 --- a/src/Checkpoint/Checkpoint.cpp +++ b/src/Checkpoint/Checkpoint.cpp @@ -233,7 +233,7 @@ void Checkpoint::dumpAll( VectorPatch &vecPatches, Region ®ion, unsigned int MESSAGE( " Checkpoint #" << num_dump << " at iteration " << itime << " dumped" ); #endif -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) MESSAGE( " Copying device data in main memory" ); // TODO(Etienne M): This may very well be redundant if we did a diagnostic // during the last iteration. Indeed, we copy everything from the device to diff --git a/src/Diagnostic/DiagnosticProbes.cpp b/src/Diagnostic/DiagnosticProbes.cpp index 5e79eecc9..e66c684e7 100755 --- a/src/Diagnostic/DiagnosticProbes.cpp +++ b/src/Diagnostic/DiagnosticProbes.cpp @@ -740,7 +740,7 @@ void DiagnosticProbes::run( SmileiMPI *smpi, VectorPatch &vecPatches, int itime, // Interpolate all usual fields on probe ("fake") particles of current patch unsigned int iPart_MPI = offset_in_MPI[ipatch]; unsigned int maxPart_MPI = offset_in_MPI[ipatch] + npart; -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) smpi->resizeDeviceBuffers( ithread, nDim_particle, npart ); diff --git a/src/Diagnostic/DiagnosticScalar.cpp b/src/Diagnostic/DiagnosticScalar.cpp index fe88f47d9..9b8b17409 100755 --- a/src/Diagnostic/DiagnosticScalar.cpp +++ b/src/Diagnostic/DiagnosticScalar.cpp @@ -436,7 +436,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) const unsigned int nPart=vecSpecies[ispec]->getNbrOfParticles(); // number of particles -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) const double *const __restrict__ weight_ptr = vecSpecies[ispec]->particles->getPtrWeight(); const short *const __restrict__ charge_ptr = vecSpecies[ispec]->particles->getPtrCharge(); const double *const __restrict__ momentum_x = vecSpecies[ispec]->particles->getPtrMomentum(0); @@ -447,14 +447,14 @@ void DiagnosticScalar::compute( Patch *patch, int ) if( vecSpecies[ispec]->mass_ > 0 ) { // GPU mode -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target teams distribute parallel for \ map(tofrom: density) \ is_device_ptr(weight_ptr) \ reduction(+:density) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(weight_ptr) #pragma acc loop gang worker vector reduction(+:density) #endif @@ -468,7 +468,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) map(tofrom: charge) \ is_device_ptr( charge_ptr, weight_ptr) \ reduction(+:charge) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(weight_ptr, charge_ptr) #pragma acc loop gang worker vector reduction(+:charge) #endif @@ -484,7 +484,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) momentum_y /* [istart:particle_number] */, \ momentum_z /* [istart:particle_number] */) \ reduction(+:ener_tot) -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc parallel deviceptr(weight_ptr, \ momentum_x, \ momentum_y, \ @@ -525,14 +525,14 @@ void DiagnosticScalar::compute( Patch *patch, int ) } else if( vecSpecies[ispec]->mass_ == 0 ) { // GPU mode -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target teams distribute parallel for \ map(tofrom: density) \ is_device_ptr(weight_ptr) \ reduction(+:density) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(weight_ptr) #pragma acc loop gang worker vector reduction(+:density) #endif @@ -548,7 +548,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) momentum_y /* [istart:particle_number] */, \ momentum_z /* [istart:particle_number] */) \ reduction(+:ener_tot) -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc parallel deviceptr(weight_ptr, \ momentum_x, \ momentum_y, \ @@ -667,7 +667,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) // total energy in current field double Uem = 0.; if( ! AM ) { -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) Uem = field->norm2OnDevice( EMfields->istart, EMfields->bufsize ); #else Uem = field->norm2( EMfields->istart, EMfields->bufsize ); @@ -751,7 +751,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) j_max = iFieldStart[1]; k_max = iFieldStart[2]; -#if defined( SMILEI_ACCELERATOR_MODE) +#if defined( SMILEI_ACCELERATOR_GPU) // We use scalar rather than arrays because omp target // sometime fails to pass them to the device const unsigned int ixstart = iFieldStart[0]; @@ -776,7 +776,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) map(tofrom: minval, maxval, i_min, i_max, j_min, j_max, k_min, k_max) \ map(to: ny, nz, ixstart, ixend, iystart, iyend, izstart, izend) //reduction(min:minval) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(field_data) //deviceptr( data_ ) #pragma acc loop gang worker vector collapse(3) #endif diff --git a/src/Diagnostic/DiagnosticTrack.cpp b/src/Diagnostic/DiagnosticTrack.cpp index 16ac325e9..583caab94 100755 --- a/src/Diagnostic/DiagnosticTrack.cpp +++ b/src/Diagnostic/DiagnosticTrack.cpp @@ -188,7 +188,7 @@ void DiagnosticTrack::setIDs( Patch *patch ) for( unsigned int iPart=0; iPartvecSpecies[species_index_]->particles->id( iPart ) = ++latest_Id; } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) patch->vecSpecies[species_index_]->particles->initializeIDsOnDevice(); #endif } diff --git a/src/ElectroMagn/ElectroMagn.cpp b/src/ElectroMagn/ElectroMagn.cpp index 2c75bc6a4..02467ecd4 100755 --- a/src/ElectroMagn/ElectroMagn.cpp +++ b/src/ElectroMagn/ElectroMagn.cpp @@ -555,7 +555,7 @@ void ElectroMagn::applyAntenna( unsigned int iAntenna, double intensity ) //! Compute the total density and currents from species density and currents on Device //! This function is valid wathever the geometry // --------------------------------------------------------------------------------------------------------------------- -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) void ElectroMagn::computeTotalRhoJOnDevice() { @@ -577,7 +577,7 @@ void ElectroMagn::computeTotalRhoJOnDevice() double *const __restrict__ rhosp = rho_s[ispec] ? rho_s[ispec]->data() : nullptr; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( \ Jxp[0:Jx_size], \ Jyp[0:Jy_size], \ @@ -594,7 +594,7 @@ void ElectroMagn::computeTotalRhoJOnDevice() #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc loop gang worker vector #endif for( unsigned int i=0 ; idata(); // Magnetic field Bx^(p,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofBx = Bx_->size(); const int sizeofBy = By_->size(); const int sizeofBz = Bz_->size(); @@ -1229,10 +1229,10 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_p; ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_d; ++y ) { @@ -1241,7 +1241,7 @@ void ElectroMagn2D::centerMagneticFields() } // Magnetic field By^(d,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(By2D[0:sizeofBy],By2D_m[0:sizeofBy]) #pragma acc loop gang worker #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -1249,10 +1249,10 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < ( nx_p + 1 ); ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_p; ++y ) { @@ -1260,7 +1260,7 @@ void ElectroMagn2D::centerMagneticFields() } } // Magnetic field Bz^(d,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(Bz2D[0:sizeofBz],Bz2D_m[0:sizeofBz]) #pragma acc loop gang worker #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -1268,10 +1268,10 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < ( nx_p + 1 ); ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_d; ++y ) { @@ -1282,7 +1282,7 @@ void ElectroMagn2D::centerMagneticFields() double *const By2D_oldBTIS3 = By_mBTIS3->data(); double *const Bz2D_oldBTIS3 = Bz_mBTIS3->data(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofByBTIS3 = By_mBTIS3->size(); #pragma acc parallel present(By2D_oldBTIS3[0:sizeofByBTIS3],By2D[0:sizeofBy]) #pragma acc loop gang @@ -1291,17 +1291,17 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < ( nx_p - 1 ); ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_p; ++y ) { By2D_oldBTIS3[x * ny_p + y] = ( By2D[(x+1) * ny_p + y] + By2D_oldBTIS3[x * ny_p + y] ) * 0.5; } } -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofBzBTIS3 = Bz_mBTIS3->size(); #pragma acc parallel present(Bz2D_oldBTIS3[0:sizeofBz],Bz2D[0:sizeofBz]) #pragma acc loop gang @@ -1310,10 +1310,10 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < ( nx_p - 1 ); ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_d; ++y ) { @@ -1392,7 +1392,7 @@ void ElectroMagn2D::computeTotalRhoJ() //END computeTotalRhoJ } -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // //! Method used to compute the total charge density and currents by summing over all species on Device // void ElectroMagn2D::computeTotalRhoJOnDevice() // { diff --git a/src/ElectroMagn/ElectroMagn2D.h b/src/ElectroMagn/ElectroMagn2D.h index aecb87ab8..d8cdfb031 100755 --- a/src/ElectroMagn/ElectroMagn2D.h +++ b/src/ElectroMagn/ElectroMagn2D.h @@ -115,7 +115,7 @@ class ElectroMagn2D : public ElectroMagn //! Method used to compute the total charge density and currents by summing over all species void computeTotalRhoJ() override; -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // //! Method used to compute the total charge density and currents by summing over all species on Device // void computeTotalRhoJOnDevice() override; // #endif diff --git a/src/ElectroMagn/ElectroMagn3D.cpp b/src/ElectroMagn/ElectroMagn3D.cpp index c8994d75c..41ba9cc58 100755 --- a/src/ElectroMagn/ElectroMagn3D.cpp +++ b/src/ElectroMagn/ElectroMagn3D.cpp @@ -4,7 +4,7 @@ #include #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif @@ -1207,7 +1207,7 @@ void ElectroMagn3D::centerMagneticFields() double *const __restrict__ Bz3D_m = Bz_m->data(); // Magnetic field Bx^(p,d,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofBx = Bx_->size(); const int sizeofBy = By_->size(); const int sizeofBz = Bz_->size(); @@ -1219,11 +1219,11 @@ void ElectroMagn3D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; idata(); double *const __restrict__ BzmBTIS3 = Bz_mBTIS3->data(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofByBTIS3 = By_mBTIS3->size(); #pragma acc parallel present(By3D[0:sizeofBy],BymBTIS3[0:sizeofByBTIS3]) #pragma acc loop gang @@ -1305,11 +1305,11 @@ void ElectroMagn3D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; isize(); #pragma acc parallel present(Bz3D[0:sizeofBz],BzmBTIS3[0:sizeofBzBTIS3]) #pragma acc loop gang @@ -1332,11 +1332,11 @@ void ElectroMagn3D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; icopyFrom( Br_[imode] ); if (input[2] && copy[2]) Bt_m[imode]->copyFrom( Bt_[imode] ); } - ElectroMagnAM *emAM = static_cast( patch->EMfields ); + // ElectroMagnAM *emAM = static_cast( patch->EMfields ); //emAM->compute_B_m_fromEB(); } @@ -1900,7 +1900,7 @@ void ElectroMagnAM::compute_B_m_fromEB() { const unsigned int nl_p = dimPrim[0]; const unsigned int nl_d = dimDual[0]; - const unsigned int nr_p = dimPrim[1]; + // const unsigned int nr_p = dimPrim[1]; const unsigned int nr_d = dimDual[1]; const unsigned int Nmodes = El_.size(); diff --git a/src/ElectroMagn/ElectroMagnAM.h b/src/ElectroMagn/ElectroMagnAM.h index 979581b4c..cd3063113 100755 --- a/src/ElectroMagn/ElectroMagnAM.h +++ b/src/ElectroMagn/ElectroMagnAM.h @@ -157,7 +157,7 @@ class ElectroMagnAM : public ElectroMagn void computeTotalRhoJ() override; -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // //! Method used to compute the total charge density and currents by summing over all species on Device // void computeTotalRhoJOnDevice() override ; // #endif diff --git a/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp index 42ce8c381..2d257cbd5 100755 --- a/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp +++ b/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp @@ -68,9 +68,9 @@ ElectroMagnBC2D_SM::ElectroMagnBC2D_SM( Params ¶ms, Patch *patch, unsigned i ElectroMagnBC2D_SM::~ElectroMagnBC2D_SM() { - for (int i=0 ; inumber_of_points_; const int sizeofE1 = E[1]->number_of_points_; const int sizeofE2 = E[2]->number_of_points_; @@ -182,7 +182,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( db1, b1_size ); if( axis0_ == 0 ) { // for By^(d,p) -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size]) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -199,7 +199,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * + B_ext1[j]; } } else { // for Bx^(p,d) -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size]) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -234,7 +234,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * // for Bz^(d,d) if( axis0_ == 0 ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E1[0:sizeofE1],B2[0:sizeofB2],B_ext2[0:B_ext_size2],db2[0:b2_size]) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -247,7 +247,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * } } else { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E0[0:sizeofE0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],db2[0:b2_size]) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) diff --git a/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp index 3ae113e60..ba4e61b28 100755 --- a/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp +++ b/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp @@ -186,7 +186,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * const int isBoundary2min = patch->isBoundary( axis2_, 0 ); const int isBoundary2max = patch->isBoundary( axis2_, 1 ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC const int sizeofE0 = E[axis0_]->number_of_points_; const int sizeofE1 = E[axis1_]->number_of_points_; const int sizeofE2 = E[axis2_]->number_of_points_; @@ -217,7 +217,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * // B1 if( axis0_ == 0 ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size]) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -225,7 +225,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int j=isBoundary1min; j( fields->Ex_ ); Field1D *Ey1D = static_cast( fields->Ey_ ); Field1D *Ez1D = static_cast( fields->Ez_ ); - Field1D *Bx1D = static_cast( fields->Bx_ ); + // Field1D *Bx1D = static_cast( fields->Bx_ ); Field1D *By1D = static_cast( fields->By_ ); Field1D *Bz1D = static_cast( fields->Bz_ ); Field1D *Jx1D = static_cast( fields->Jx_ ); diff --git a/src/ElectroMagnSolver/MA_Solver2D_norm.cpp b/src/ElectroMagnSolver/MA_Solver2D_norm.cpp index d12e021c1..4cd0d7d7c 100755 --- a/src/ElectroMagnSolver/MA_Solver2D_norm.cpp +++ b/src/ElectroMagnSolver/MA_Solver2D_norm.cpp @@ -37,7 +37,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) // double sumJz = 0; // Electric field Ex^(d,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofEx = fields->Ex_->number_of_points_; const int sizeofEy = fields->Ey_->number_of_points_; const int sizeofEz = fields->Ez_->number_of_points_; @@ -52,10 +52,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_d; ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_p; ++y ) { @@ -64,7 +64,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) } // Electric field Ey^(p,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( Ey2D[0:sizeofEy], Jy2D[0:sizeofEy], Bz2D[0:sizeofBz] ) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -72,10 +72,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_p; ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_d; ++y ) { @@ -84,7 +84,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) } // Electric field Ez^(p,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( Ez2D[0:sizeofEz], Jz2D[0:sizeofEz], Bx2D[0:sizeofBx], By2D[0:sizeofBy] ) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -92,10 +92,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_p; ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_p; ++y ) { diff --git a/src/ElectroMagnSolver/MA_Solver3D_norm.cpp b/src/ElectroMagnSolver/MA_Solver3D_norm.cpp index 9b2a089cc..7ffea26c0 100755 --- a/src/ElectroMagnSolver/MA_Solver3D_norm.cpp +++ b/src/ElectroMagnSolver/MA_Solver3D_norm.cpp @@ -35,7 +35,7 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields ) const unsigned int nz_d = fields->dimDual[2]; // Electric field Ex^(d,p,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofEx = fields->Ex_->number_of_points_; const int sizeofEy = fields->Ey_->number_of_points_; const int sizeofEz = fields->Ez_->number_of_points_; @@ -50,11 +50,11 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; iBz_->data(); // [x * ny_d + y] : dual in x,y primal in z // Magnetic field Bx^(p,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofEx = fields->Ex_->number_of_points_; const int sizeofEy = fields->Ey_->number_of_points_; const int sizeofEz = fields->Ez_->number_of_points_; @@ -48,10 +48,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_d - 1; ++x ) { -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int y = 1; y < ny_d - 1; ++y ) { @@ -59,7 +59,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) } } // Magnetic field By^(d,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( By2D[0:sizeofBy], Ez2D[0:sizeofEz] ) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -67,10 +67,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 1; x < nx_d - 1; ++x ) { -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int y = 0; y < ny_p; ++y ) { @@ -79,7 +79,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) } // Magnetic field Bz^(d,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( Bz2D[0:sizeofBy], Ex2D[0:sizeofEx], Ey2D[0:sizeofEz] ) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -87,10 +87,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 1; x < nx_d - 1; ++x ) { -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int y = 1; y < ny_d - 1; ++y ) { diff --git a/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp index 5930af3e1..f70159699 100755 --- a/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp +++ b/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp @@ -34,7 +34,7 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields ) const double * __restrict__ Ez3D = isEFilterApplied ? fields->filter_->Ez_[0]->data() : fields->Ez_->data(); // Magnetic field Bx^(p,d,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofEx = fields->Ex_->number_of_points_; const int sizeofEy = fields->Ey_->number_of_points_; const int sizeofEz = fields->Ez_->number_of_points_; @@ -49,11 +49,11 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; i dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dx) ; - std::complex dA_over_dx = dA_over_dx_fdtd - + i1*k0*( *A_n_pml )( i, j ) ; + // std::complex dA_over_dx = dA_over_dx_fdtd + // + i1*k0*( *A_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A std::complex d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dx*dx) ; std::complex d2A_over_dx2 = d2A_over_dx2_fdtd @@ -590,8 +590,8 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // ---- // dA/dx = dA/dx + ik0 A std::complex dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dx) ; - std::complex dA_over_dx = dA_over_dx_fdtd - + i1*k0*( *A_n_pml )( i, j ) ; + // std::complex dA_over_dx = dA_over_dx_fdtd + // + i1*k0*( *A_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A std::complex d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dx*dx) ; std::complex d2A_over_dx2 = d2A_over_dx2_fdtd diff --git a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp index 7e4e740c7..d8c65645a 100644 --- a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp +++ b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp @@ -395,7 +395,6 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, double k0 = 1.; // laser wavenumber std::complex source_term_x ; std::complex source_term_y ; - double mpml_ratio = 0.00; if (iDim == 0) { for( unsigned int k=0 ; k<1 ; k++ ) { @@ -405,7 +404,7 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // dA/dx = dA/dx + ik0 A // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G std::complex dG_over_dx_fdtd = ( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) ; - std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; + // std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2G_over_dx2_fdtd = ( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl) ; @@ -494,7 +493,7 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // dA/dx = dA/dx + ik0 A // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G std::complex dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dl) ; - std::complex dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ; + // std::complex dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dl*dl) ; @@ -635,8 +634,8 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, for( unsigned int j=solvermin ; j < solvermax ; j++ ) { // y loop // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G std::complex dG_over_dx_fdtd = ( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) ; - std::complex dG_over_dx = dG_over_dx_fdtd - + i1*k0*( *G_n_pml )( i, j ) ; + // std::complex dG_over_dx = dG_over_dx_fdtd + // + i1*k0*( *G_n_pml )( i, j ) ; // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2G_over_dx2_fdtd = ( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl) ; std::complex d2G_over_dx2 = d2G_over_dx2_fdtd diff --git a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp index 771f12e37..c2a5c4087 100644 --- a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp +++ b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp @@ -400,7 +400,6 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en double k0 = 1.; // laser wavenumber std::complex source_term_x ; std::complex source_term_y ; - double mpml_ratio = 0.00; if (iDim == 0) { for( unsigned int k=0 ; k<1 ; k++ ) { @@ -410,7 +409,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en // dA/dx = dA/dx + ik0 A // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G std::complex dG_over_dx_fdtd = (1.+delta)*( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *G_n_pml )( i+2, j )-( *G_n_pml )( i-2, j ) )/(4.*dl) ; - std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; + // std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2G_over_dx2_fdtd = (1.+delta)*( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *G_n_pml )( i-2, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+2, j ) )/(4.*dl*dl) ; @@ -490,7 +489,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en for( unsigned int i=solvermin ; i dA_over_dx_fdtd = (1.+delta)*( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *A_n_pml )( i+2, j )-( *A_n_pml )( i-2, j ) )/(4.*dl) ; - std::complex dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ; + // std::complex dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2A_over_dx2_fdtd = (1.+delta)*( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *A_n_pml )( i-2, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+2, j ) )/(4.*dl*dl) ; @@ -591,7 +590,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en for( unsigned int i=2 ; i dG_over_dx_fdtd = (1.+delta)*( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *G_n_pml )( i+2, j )-( *G_n_pml )( i-2, j ) )/(4.*dl) ; - std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; + // std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2G_over_dx2_fdtd = (1.+delta)*( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *G_n_pml )( i-2, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+2, j ) )/(4.*dl*dl) ; diff --git a/src/Field/Field.cpp b/src/Field/Field.cpp index 19c820d1d..0d8427f1e 100644 --- a/src/Field/Field.cpp +++ b/src/Field/Field.cpp @@ -5,14 +5,14 @@ void Field::put_to( double val ) { SMILEI_ASSERT( data_ != nullptr ); -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) const bool is_hostptr_mapped_on_device = smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( data_ ); #endif // NVCC's OpenACC needs that redundant pointer value double* an_other_data_pointer = data_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) // Test if data exists on GPU, put_to can be used on CPU and GPU during a simulation #pragma acc parallel present( an_other_data_pointer [0:size()] ) if( is_hostptr_mapped_on_device ) #pragma acc loop gang worker vector @@ -25,7 +25,7 @@ void Field::put_to( double val ) } } -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) //! copy the field array from Host to Device void Field::copyFromHostToDevice() { diff --git a/src/Field/Field.h b/src/Field/Field.h index 669106245..563705ab1 100755 --- a/src/Field/Field.h +++ b/src/Field/Field.h @@ -188,7 +188,7 @@ class Field virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0; -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) //! Compute the norm2OnDevice of the field virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0; #endif @@ -234,7 +234,7 @@ class Field return sum; } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) inline double __attribute__((always_inline)) normOnDevice() { @@ -245,7 +245,7 @@ class Field #pragma omp target teams distribute parallel for \ map(tofrom: sum) map(to: number_of_points_) \ reduction(+:sum) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(field) //deviceptr( data_ ) #pragma acc loop gang worker vector reduction(+:sum) #endif @@ -279,7 +279,7 @@ class Field virtual void extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) = 0; virtual void inject_fields_sum ( int iDim, int iNeighbor, int ghost_size ) = 0; -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) //! copy the field from Host to Device void copyFromHostToDevice(); diff --git a/src/Field/Field1D.cpp b/src/Field/Field1D.cpp index d0fa18b2f..59f085f81 100755 --- a/src/Field/Field1D.cpp +++ b/src/Field/Field1D.cpp @@ -190,7 +190,7 @@ double Field1D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double Field1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { ERROR("Not implemented"); diff --git a/src/Field/Field1D.h b/src/Field/Field1D.h index 0ff09cd1e..228cc586f 100755 --- a/src/Field/Field1D.h +++ b/src/Field/Field1D.h @@ -92,7 +92,7 @@ class Field1D : public Field virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Field/Field2D.cpp b/src/Field/Field2D.cpp index a089a0d45..94051fed6 100755 --- a/src/Field/Field2D.cpp +++ b/src/Field/Field2D.cpp @@ -71,7 +71,7 @@ Field2D::~Field2D() for (int iside=0 ; iside<(int)(sendFields_.size()) ; iside++ ) { if ( sendFields_[iside] != NULL ) { -#if defined ( SMILEI_ACCELERATOR_MODE ) +#if defined ( SMILEI_ACCELERATOR_GPU ) if ( sendFields_[iside]->isOnDevice() ) { sendFields_[iside]->deleteOnDevice(); @@ -220,7 +220,7 @@ double Field2D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double Field2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { @@ -247,7 +247,7 @@ double Field2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3 map(to: ny, idxlocalstart[0], idxlocalstart[1], iystart, iyend) \ /* is_device_ptr( data_ )*/ \ reduction(+:nrj) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(field) //deviceptr( data_ ) #pragma acc loop gang worker vector collapse(2) reduction(+:nrj) #endif @@ -333,7 +333,7 @@ void Field2D::create_sub_fields( int iDim, int iNeighbor, int ghost_size ) sendFields_[iDim*2+iNeighbor] = new Field2D(size); recvFields_[iDim*2+iNeighbor] = new Field2D(size); -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) { sendFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice(); recvFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice(); @@ -341,7 +341,7 @@ void Field2D::create_sub_fields( int iDim, int iNeighbor, int ghost_size ) #endif } else if ( ghost_size != (int)(sendFields_[iDim*2+iNeighbor]->dims_[iDim]) ) { -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) ERROR( "To Do GPU : envelope" ); #endif delete sendFields_[iDim*2+iNeighbor]; @@ -381,7 +381,7 @@ void Field2D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size ) #pragma omp target if( should_manipulate_gpu_memory ) #pragma omp teams distribute parallel for collapse( 2 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) const int subSize = sendFields_[iDim*2+iNeighbor]->size(); const int fSize = number_of_points_; bool fieldName( (name.substr(0,1) == "B") ); @@ -389,7 +389,7 @@ void Field2D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; isize(); const int fSize = number_of_points_; bool fieldName( name.substr(0,1) == "B" ); @@ -437,7 +437,7 @@ void Field2D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; isize(); const int fSize = number_of_points_; bool fieldName( ((name.substr(0,1) == "J") || (name.substr(0,1) == "R") ) && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub )); @@ -486,7 +486,7 @@ void Field2D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; isize(); int fSize = number_of_points_; bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R"); @@ -535,7 +535,7 @@ void Field2D::inject_fields_sum ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif @@ -81,7 +81,7 @@ Field3D::~Field3D() for( unsigned int iside=0 ; isideisOnDevice() ) { @@ -102,7 +102,9 @@ Field3D::~Field3D() } } if( data_!=NULL ) { +#if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete (data_[0:number_of_points_]) if (acc_deviceptr(data_) != NULL) +#endif delete [] data_; for( unsigned int i=0; idata_3D[i]; @@ -248,7 +250,7 @@ double Field3D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } // Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double Field3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { double nrj( 0. ); @@ -277,7 +279,7 @@ double Field3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3 map(to: ny, nz, ixstart, ixend, iystart, iyend, izstart, izend) \ /*is_device_ptr( data_ ) */ \ reduction(+:nrj) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(field[0:number_of_points_]) //deviceptr( data_ ) #pragma acc loop gang worker vector collapse(3) reduction(+:nrj) #endif @@ -405,7 +407,7 @@ void Field3D::create_sub_fields ( int iDim, int iNeighbor, int ghost_size ) sendFields_[iDim*2+iNeighbor] = new Field3D(size); recvFields_[iDim*2+iNeighbor] = new Field3D(size); -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) { @@ -427,7 +429,7 @@ void Field3D::create_sub_fields ( int iDim, int iNeighbor, int ghost_size ) } else if( ghost_size != (int) sendFields_[iDim*2+iNeighbor]->dims_[iDim] ) { -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) ERROR( "To Do GPU : envelope" ); #endif delete sendFields_[iDim*2+iNeighbor]; @@ -463,7 +465,7 @@ void Field3D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size ) #pragma omp target if( is_the_right_field ) #pragma omp teams distribute parallel for collapse( 3 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) const int subSize = sendFields_[iDim*2+iNeighbor]->size(); const int fSize = number_of_points_; bool fieldName( (name.substr(0,1) == "B") ); @@ -471,11 +473,11 @@ void Field3D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i<(unsigned int)NX; i++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif for( unsigned int j=0; j<(unsigned int)NY; j++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int k=0; k<(unsigned int)NZ; k++ ) { @@ -514,7 +516,7 @@ void Field3D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size ) map( tofrom \ : field [0:fSize] ) #pragma omp teams distribute parallel for collapse( 3 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size(); const int fSize = number_of_points_; bool fieldName( name.substr(0,1) == "B" ); @@ -522,11 +524,11 @@ void Field3D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i<(unsigned int)NX; i++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif for( unsigned int j=0; j<(unsigned int)NY; j++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int k=0; k<(unsigned int)NZ; k++ ) { @@ -566,7 +568,7 @@ void Field3D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) map( to \ : field [0:fSize] ) #pragma omp teams distribute parallel for collapse( 3 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) const int subSize = sendFields_[iDim*2+iNeighbor]->size(); const int fSize = number_of_points_; bool fieldName( (name.substr(0,1) == "J") || (name.substr(0,1) == "R")); @@ -575,11 +577,11 @@ void Field3D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i<(unsigned int)NX; i++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif for( unsigned int j=0; j<(unsigned int)NY; j++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int k=0; k<(unsigned int)NZ; k++ ) { @@ -618,7 +620,7 @@ void Field3D::inject_fields_sum ( int iDim, int iNeighbor, int ghost_size ) map( tofrom \ : field [0:fSize] ) #pragma omp teams distribute parallel for collapse( 3 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size(); int fSize = number_of_points_; bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R"); @@ -627,11 +629,11 @@ void Field3D::inject_fields_sum ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i<(unsigned int)NX; i++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif for( unsigned int j=0; j<(unsigned int)NY; j++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int k=0; k<(unsigned int)NZ; k++ ) { diff --git a/src/Field/Field3D.h b/src/Field/Field3D.h index cc9524790..9f9ce4c9a 100755 --- a/src/Field/Field3D.h +++ b/src/Field/Field3D.h @@ -100,7 +100,7 @@ class Field3D : public Field virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Field/cField.h b/src/Field/cField.h index c37aa9514..d76de6ed7 100755 --- a/src/Field/cField.h +++ b/src/Field/cField.h @@ -63,7 +63,7 @@ class cField : public Field virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override = 0; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0; #endif diff --git a/src/Field/cField1D.cpp b/src/Field/cField1D.cpp index 77b0c2685..6a79da95a 100755 --- a/src/Field/cField1D.cpp +++ b/src/Field/cField1D.cpp @@ -191,7 +191,7 @@ double cField1D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double cField1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { ERROR("Not implemented"); diff --git a/src/Field/cField1D.h b/src/Field/cField1D.h index 43f2030e3..27b15bfc1 100755 --- a/src/Field/cField1D.h +++ b/src/Field/cField1D.h @@ -94,7 +94,7 @@ class cField1D : public cField virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Field/cField2D.cpp b/src/Field/cField2D.cpp index e1ca5560a..57ff6ea81 100755 --- a/src/Field/cField2D.cpp +++ b/src/Field/cField2D.cpp @@ -219,7 +219,7 @@ double cField2D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double cField2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { ERROR("Not implemented"); diff --git a/src/Field/cField2D.h b/src/Field/cField2D.h index d447d4f2e..26ee995c9 100755 --- a/src/Field/cField2D.h +++ b/src/Field/cField2D.h @@ -84,7 +84,7 @@ class cField2D : public cField virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Field/cField3D.cpp b/src/Field/cField3D.cpp index 84510f401..f4249e134 100755 --- a/src/Field/cField3D.cpp +++ b/src/Field/cField3D.cpp @@ -218,7 +218,7 @@ double cField3D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double cField3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { ERROR("Not implemented"); diff --git a/src/Field/cField3D.h b/src/Field/cField3D.h index a81f293fc..0db1f6835 100755 --- a/src/Field/cField3D.h +++ b/src/Field/cField3D.h @@ -84,7 +84,7 @@ class cField3D : public cField virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Interpolator/Interpolator2D2Order.cpp b/src/Interpolator/Interpolator2D2Order.cpp index 0254294f5..795ab996d 100755 --- a/src/Interpolator/Interpolator2D2Order.cpp +++ b/src/Interpolator/Interpolator2D2Order.cpp @@ -180,7 +180,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, const double *const __restrict__ By2D = static_cast( EMfields->By_m )->data(); const double *const __restrict__ Bz2D = static_cast( EMfields->Bz_m )->data(); -#if defined(SMILEI_OPENACC_MODE) +#if defined(SMILEI_ACCELERATOR_GPU_OACC) const int sizeofEx = EMfields->Ex_->size(); const int sizeofEy = EMfields->Ey_->size(); const int sizeofEz = EMfields->Ez_->size(); @@ -207,7 +207,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, position_x /* [first_index:npart_range_size] */, \ position_y /* [first_index:npart_range_size] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc enter data create(this) #pragma acc update device(this) size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index; @@ -260,7 +260,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, delta[1*nparts+ipart] = delta_p[1]; } - #if defined(SMILEI_OPENACC_MODE) + #if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete(this) #endif } else{ // with B-TIS3 interpolation @@ -276,7 +276,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, position_x /* [first_index:npart_range_size] */, \ position_y /* [first_index:npart_range_size] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc enter data create(this) #pragma acc update device(this) size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index; @@ -337,7 +337,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, delta[1*nparts+ipart] = delta_p[1]; } // end ipart loop - #if defined(SMILEI_OPENACC_MODE) + #if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete(this) #endif } // end with B-TIS interpolation diff --git a/src/Interpolator/Interpolator3D2Order.cpp b/src/Interpolator/Interpolator3D2Order.cpp index 9e594f20b..f40239836 100755 --- a/src/Interpolator/Interpolator3D2Order.cpp +++ b/src/Interpolator/Interpolator3D2Order.cpp @@ -185,8 +185,6 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part int *const __restrict__ iold = smpi->dynamics_iold[ithread].data(); double *const __restrict__ delta = smpi->dynamics_deltaold[ithread].data(); - unsigned int buffer_size = smpi->dynamics_Epart[ithread].size(); - const double *const __restrict__ position_x = particles.getPtrPosition( 0 ); const double *const __restrict__ position_y = particles.getPtrPosition( 1 ); const double *const __restrict__ position_z = particles.getPtrPosition( 2 ); @@ -198,7 +196,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part const double *const __restrict__ By3D = EMfields->By_m->data_; const double *const __restrict__ Bz3D = EMfields->Bz_m->data_; -#if defined(SMILEI_OPENACC_MODE) +#if defined(SMILEI_ACCELERATOR_GPU_OACC) const int sizeofEx = EMfields->Ex_->size(); const int sizeofEy = EMfields->Ey_->size(); const int sizeofEz = EMfields->Ez_->size(); @@ -224,7 +222,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part position_y /* [first_index:npart_range_size] */, \ position_z /* [first_index:npart_range_size] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc enter data create(this) #pragma acc update device(this) size_t interpolation_range_size = ( last_index + 2 * nparts ) - first_index; @@ -282,7 +280,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part delta[1*nparts+ipart] = delta_p[1]; delta[2*nparts+ipart] = delta_p[2]; } - #if defined(SMILEI_OPENACC_MODE) + #if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete(this) #endif } else { // with B-TIS3 interpolation @@ -302,7 +300,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part position_y /* [first_index:npart_range_size] */, \ position_z /* [first_index:npart_range_size] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc enter data create(this) #pragma acc update device(this) size_t interpolation_range_size = ( last_index + 2 * nparts ) - first_index; @@ -368,7 +366,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part delta[ipart+0*nparts] = delta_p[0]; delta[ipart+1*nparts] = delta_p[1]; delta[ipart+2*nparts] = delta_p[2]; - #if defined(SMILEI_OPENACC_MODE) + #if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete(this) #endif } // end ipart loop diff --git a/src/Interpolator/Interpolator3D2Order.h b/src/Interpolator/Interpolator3D2Order.h index 52f0335a0..1fa07438d 100755 --- a/src/Interpolator/Interpolator3D2Order.h +++ b/src/Interpolator/Interpolator3D2Order.h @@ -59,7 +59,7 @@ class Interpolator3D2Order : public Interpolator3D int idx, int idy, int idz, - int nx, + int /*nx*/, int ny, int nz ) { diff --git a/src/MovWindow/SimWindow.cpp b/src/MovWindow/SimWindow.cpp index 6dbb5da57..4ee9781c7 100755 --- a/src/MovWindow/SimWindow.cpp +++ b/src/MovWindow/SimWindow.cpp @@ -383,7 +383,7 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params ¶ms, } // end loop nSpecies -#if defined ( SMILEI_ACCELERATOR_MODE ) +#if defined ( SMILEI_ACCELERATOR_GPU ) if( params.gpu_computing ) { for( auto spec: mypatch->vecSpecies ) { spec->allocateParticlesOnDevice(); @@ -398,7 +398,7 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params ¶ms, } // end test patch_particle_created[ithread][j] -#if defined ( SMILEI_ACCELERATOR_MODE ) +#if defined ( SMILEI_ACCELERATOR_GPU ) // if ( params.gpu_computing ) { // Initializes only field data structures, particle data structure are initialized separately mypatch->allocateAndCopyFieldsOnDevice(); diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp index 6f7b9e0df..8136f36ff 100755 --- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp +++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp @@ -10,7 +10,7 @@ #include "MultiphotonBreitWheeler.h" #include "Species.h" -#if defined(SMILEI_OPENACC_MODE) +#if defined(SMILEI_ACCELERATOR_GPU_OACC) #define __HIP_PLATFORM_NVCC__ #define __HIP_PLATFORM_NVIDIA__ #include "gpuRandom.h" @@ -248,7 +248,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, double *const __restrict__ pair1_chi = new_pair[1]->has_quantum_parameter ? new_pair[1]->getPtrChi() : nullptr; double *const __restrict__ pair1_tau = new_pair[1]->has_Monte_Carlo_process ? new_pair[1]->getPtrTau() : nullptr; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // Parameters for random generator unsigned long long seed; unsigned long long seq; @@ -325,7 +325,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref], Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC } @@ -349,7 +349,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, while( tau[ipart] <= epsilon_tau_ ) { //tau[ipart] = -log( 1.-Rand::uniform() ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC tau[ipart] = -std::log( 1.-rand_->uniform() ); #else @@ -406,7 +406,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, double pair_chi[2]; // Draw random number in [0,1[ -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC const double random_number = rand_->uniform(); #else seed_curand_2 = (int) (ipart + 1)*(initial_seed_2 + 1); //Seed for linear generator @@ -431,7 +431,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, SMILEI_UNUSED( ibin ); // Creation of new electrons in the temporary array new_pair[0] new_pair[0]->createParticles( mBW_pair_creation_sampling_[0] ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Final size int nparticles = new_pair[0]->size(); @@ -442,7 +442,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, #endif // For all new paticles -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #endif for( int ipair=i_pair_start; ipair < i_pair_start+mBW_pair_creation_sampling_[0]; ipair++ ) { @@ -466,7 +466,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, } // + new_pair[k].momentum(i,ipair)*remaining_dt*inv_gamma; -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Old positions if( particles.keepOldPositions() ) { pair0_position_old_x[ipair]=position_x[ipart] ; @@ -494,7 +494,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, // Create particle for the second pair species new_pair[1]->createParticles( mBW_pair_creation_sampling_[1] ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Final size nparticles = new_pair[1]->size(); @@ -505,7 +505,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, #endif // For all new paticles -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #endif for( auto ipair=i_pair_start; ipair < i_pair_start + mBW_pair_creation_sampling_[1]; ipair++ ) { @@ -530,7 +530,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, } // + new_pair[k].momentum(i,ipair)*remaining_dt*inv_gamma; -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Old positions if( particles.keepOldPositions() ) { pair1_position_old_x[ipair]=position_x[ipart] ; @@ -629,7 +629,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, } } // end ipart loop -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } #endif } @@ -795,7 +795,7 @@ void MultiphotonBreitWheeler::removeDecayedPhotonsWithoutBinCompression( if( ipart < last_photon_index ) { // The last existing photon comes to the position of // the deleted photon -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC particles.overwriteParticle( last_photon_index, ipart ); #else #endif diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h index 6e14a37f3..71315d79a 100755 --- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h +++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h @@ -115,7 +115,7 @@ class MultiphotonBreitWheeler //! \param bmin Pointer toward the first particle index of the bin in the Particles object //! \param bmax Pointer toward the last particle index of the bin in the Particles object //! \param ithread Thread index -//#ifdef SMILEI_OPENACC_MODE +//#ifdef SMILEI_ACCELERATOR_GPU_OACC // #pragma acc routine seq //#endif void removeDecayedPhotonsWithoutBinCompression( diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h b/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h index 4f7f1ce72..9bef108b6 100755 --- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h +++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h @@ -54,7 +54,7 @@ class MultiphotonBreitWheelerTables //! the multiphoton Breit-Wheeler pair creation //! \param photon_chi photon quantum parameter //! \param[out] pair_chi quantum parameters of the pair -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif void computePairQuantumParameter( const double photon_chi, @@ -71,7 +71,7 @@ class MultiphotonBreitWheelerTables //! \param photon_chi photon quantum parameter //! \param gamma photon normalized energy // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double computeBreitWheelerPairProductionRate( diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp index 803cdf9e5..b1fafcb09 100755 --- a/src/Params/Params.cpp +++ b/src/Params/Params.cpp @@ -837,7 +837,7 @@ Params::Params( SmileiMPI *smpi, std::vector namelistsFiles ) : PyTools::extract( "gpu_computing", gpu_computing, "Main" ); if( gpu_computing ) { -#if( defined( SMILEI_OPENACC_MODE ) && defined( _OPENACC ) ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if( defined( SMILEI_ACCELERATOR_GPU_OACC ) && defined( _OPENACC ) ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) // If compiled for GPU and asking for GPU MESSAGE( 1, "Smilei will run on GPU devices" ); #else @@ -1055,21 +1055,21 @@ Params::Params( SmileiMPI *smpi, std::vector namelistsFiles ) : // Extract the list of profiles and verify their content PyObject *p = PyTools::extract_py( "_profiles", "Laser", i_laser ); vector profiles; - vector profiles_n = {1, 2}; if( ! PyTools::py2pyvector( p, profiles ) ) { ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile must be a list of 2 profiles", LINK_NAMELIST + std::string("#lasers") ); } Py_DECREF( p ); - if( profiles.size()!=2 ) { + if( profiles.size() != 2 ) { ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile needs 2 profiles.", LINK_NAMELIST + std::string("#lasers") ); } - if( profiles[1] == Py_None ) { - profiles .pop_back(); - profiles_n.pop_back(); - } - if( profiles[0] == Py_None ) { - profiles .erase( profiles .begin() ); - profiles_n.erase( profiles_n.begin() ); + vector profiles_n; + for( unsigned int i = 0; i < 2; i++ ) { + if( profiles[i] == Py_None ) { + Py_DECREF( profiles[i] ); + profiles.erase( profiles.begin() ); + } else { + profiles_n.push_back( i ); + } } if( profiles.size() == 0 ) { ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile cannot be [None, None]", LINK_NAMELIST + std::string("#lasers") ); @@ -1124,7 +1124,11 @@ Params::Params( SmileiMPI *smpi, std::vector namelistsFiles ) : propagateX( profiles, profiles_n, offset, file, keep_n_strongest_modes, angle_z ); } } - + + for( auto p: profiles ) { + Py_DECREF( p ); + } + n_laser_offset ++; } } @@ -1227,7 +1231,7 @@ void Params::compute() // Set cluster_width_ if not set by the user if( cluster_width_ == -1 ) { -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) cluster_width_ = patch_size_[0]; // On GPU, dont do the CPU automatic cluster_width computation, only one // bin is expected. @@ -1276,7 +1280,7 @@ void Params::compute() // Verify that cluster_width_ divides patch_size_[0] or patch_size_[n] in GPU mode -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) const int kClusterWidth = getGPUClusterWidth(); if( kClusterWidth < 0 ) { @@ -1886,7 +1890,7 @@ string Params::speciesField( string field_name ) return ""; } -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) bool Params::isGPUParticleBinningAvailable() const { @@ -1903,7 +1907,7 @@ bool Params::isGPUParticleBinningAvailable() const #endif -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) int Params::getGPUClusterWidth() const { diff --git a/src/Params/Params.h b/src/Params/Params.h index e2b0603e6..32bf63a37 100755 --- a/src/Params/Params.h +++ b/src/Params/Params.h @@ -386,7 +386,7 @@ class Params //! bool isGPUParticleBinningAvailable() const; -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) //! Given dimension_id in [0, 3), return for dimension_id == : //! 1: the 1D value (not implemented) diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp index 5a55d74b2..304656eca 100755 --- a/src/ParticleBC/BoundaryConditionType.cpp +++ b/src/ParticleBC/BoundaryConditionType.cpp @@ -18,7 +18,7 @@ void internal_inf( Species *species, int imin, int imax, int direction, double l energy_change = 0.; // no energy loss during exchange const double* const position = species->particles->getPtrPosition( direction ); int* const cell_keys = species->particles->getPtrCellKeys(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(position,cell_keys) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -40,7 +40,7 @@ void internal_sup( Species *species, int imin, int imax, int direction, double l energy_change = 0.; // no energy loss during exchange const double* const position = species->particles->getPtrPosition( direction ); int* const cell_keys = species->particles->getPtrCellKeys(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(position,cell_keys) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -92,7 +92,7 @@ void reflect_particle_inf( Species *species, int imin, int imax, int direction, energy_change = 0.; // no energy loss during reflection double* position = species->particles->getPtrPosition(direction); double* momentum = species->particles->getPtrMomentum(direction); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel deviceptr(position,momentum) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -112,7 +112,7 @@ void reflect_particle_sup( Species *species, int imin, int imax, int direction, energy_change = 0.; // no energy loss during reflection double* position = species->particles->getPtrPosition(direction); double* momentum = species->particles->getPtrMomentum(direction); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel deviceptr(position,momentum) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -189,9 +189,9 @@ void remove_particle_inf( Species* species, int imin, int imax, int direction, double limit_inf, - double dt, - std::vector& invgf, - Random* rand, + double /*dt*/, + std::vector& /*invgf*/, + Random* /*rand*/, double& energy_change ) { @@ -210,7 +210,7 @@ void remove_particle_inf( Species* species, : change_in_energy ) #pragma omp teams distribute parallel for reduction( + \ : change_in_energy ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight,charge,cell_keys) #pragma acc loop gang worker vector reduction(+ : change_in_energy) #else @@ -235,9 +235,9 @@ void remove_particle_sup( Species* species, int imin, int imax, int direction, double limit_sup, - double dt, - std::vector& invgf, - Random* rand, + double /*dt*/, + std::vector& /*invgf*/, + Random* /*rand*/, double& energy_change ) { @@ -256,7 +256,7 @@ void remove_particle_sup( Species* species, : change_in_energy ) #pragma omp teams distribute parallel for reduction( + \ : change_in_energy ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight,charge,cell_keys) #pragma acc loop gang worker vector reduction(+ : change_in_energy) #else diff --git a/src/ParticleBC/PartBoundCond.h b/src/ParticleBC/PartBoundCond.h index 47ab7e235..7afd6ca9c 100755 --- a/src/ParticleBC/PartBoundCond.h +++ b/src/ParticleBC/PartBoundCond.h @@ -44,7 +44,7 @@ class PartBoundCond } else { int *const cell_keys = species->particles->getPtrCellKeys(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr( cell_keys ) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp index 34eaeb161..30c685155 100755 --- a/src/Particles/Particles.cpp +++ b/src/Particles/Particles.cpp @@ -1311,7 +1311,7 @@ void Particles::copyLeavingParticlesToBuffers( const vector copy, const ve // where direction goes from 0 to 6 and tells which way the particle escapes. // If the cell_key is -1, the particle must be destroyed so it is not extracted. -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) // GPU @@ -1398,13 +1398,13 @@ int Particles::eraseLeavingParticles() return 0; } -int Particles::injectParticles( Particles *particles_to_inject ) +int Particles::injectParticles( Particles */*particles_to_inject*/ ) { ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." ); return 0; } -void Particles::importAndSortParticles( Particles *particles_to_inject ) +void Particles::importAndSortParticles( Particles */*particles_to_inject*/ ) { ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." ); } diff --git a/src/Particles/ParticlesFactory.cpp b/src/Particles/ParticlesFactory.cpp index 00f51bbb0..34e9a3a83 100755 --- a/src/Particles/ParticlesFactory.cpp +++ b/src/Particles/ParticlesFactory.cpp @@ -7,7 +7,7 @@ // ----------------------------------------------------------------------------- #include "ParticlesFactory.h" -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) extern "C" void* CreateGPUParticles( const void* parameters, const void* a_parent_patch ); #endif @@ -22,7 +22,7 @@ Particles* ParticlesFactory::create( const Params& parameters, // We export a C interface to avoid potential ABI problems // that could occur when using two different compilers (e.g., one to // compile cuda/hip and another one for the host code). -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) particles = static_cast( CreateGPUParticles( ¶meters, &a_parent_patch ) ); #else SMILEI_UNUSED( a_parent_patch ); diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index 8fa4022aa..ca76c6ece 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -445,7 +445,7 @@ void Patch::setLocationAndAllocateFields( Params ¶ms, DomainDecomposition *d Patch::~Patch() { -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU deleteFieldsOnDevice(); #endif @@ -1153,7 +1153,7 @@ void Patch::computePoynting() { } } -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU // --------------------------------------------------------------------------------------------------------------------- // Allocate data on device diff --git a/src/Patch/Patch.h b/src/Patch/Patch.h index ff5a76a5c..8d06d21c2 100755 --- a/src/Patch/Patch.h +++ b/src/Patch/Patch.h @@ -194,7 +194,7 @@ class Patch //! delete Particles included in the index of particles to exchange. Assumes indexes are sorted. void cleanupSentParticles( int ispec, std::vector *indexes_of_particles_to_exchange ); -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU //! Allocate and copy all the field grids on device void allocateAndCopyFieldsOnDevice(); diff --git a/src/Patch/SyncVectorPatch.cpp b/src/Patch/SyncVectorPatch.cpp index 675529113..7f2cd183e 100755 --- a/src/Patch/SyncVectorPatch.cpp +++ b/src/Patch/SyncVectorPatch.cpp @@ -2,7 +2,7 @@ #include "SyncVectorPatch.h" #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif #include "Params.h" @@ -269,7 +269,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches.densitiesMPIx[ifield ]->extract_fields_sum( 0, iNeighbor, oversize[0] ); vecPatches.densitiesMPIx[ifield+nPatchMPIx ]->extract_fields_sum( 0, iNeighbor, oversize[0] ); vecPatches.densitiesMPIx[ifield+2*nPatchMPIx]->extract_fields_sum( 0, iNeighbor, oversize[0] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIx[ifield ]; // double* Jx = field->sendFields_[iNeighbor]->data_; // int sizeofJx = field->sendFields_[iNeighbor]->size(); @@ -291,7 +291,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc // iDim = 0, local const int nFieldLocalx = vecPatches.densitiesLocalx.size() / 3; -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) // At initialization, we may get a CPU buffer than needs to be handled on the host. const bool is_memory_on_device = vecPatches.densitiesLocalx.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocalx[0]->data() ); @@ -324,9 +324,9 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc pt2 = &( vecPatches.densitiesLocalx[ifield]->data_[0] ); //Sum 2 ==> 1 - const int last = gsp[0] * ny_ * nz_; + const unsigned int last = gsp[0] * ny_ * nz_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = vecPatches.densitiesLocalx[ifield]->size(); int nspace0 = size[0]; #pragma acc parallel if ( is_memory_on_device) present(pt1[0-nspace0*ny_*nz_:ptsize],pt2[0:ptsize]) @@ -358,7 +358,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIx[ifield+2*nPatchMPIx], 0 ); // Jz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, ( iNeighbor+1 )%2 ) ) { -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIx[ifield ]; // double* Jx = field->recvFields_[(iNeighbor+1)%2]->data_; // int sizeofJx = field->recvFields_[(iNeighbor+1)%2]->size(); @@ -402,7 +402,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches.densitiesMPIy[ifield ]->extract_fields_sum( 1, iNeighbor, oversize[1] ); vecPatches.densitiesMPIy[ifield+nPatchMPIy ]->extract_fields_sum( 1, iNeighbor, oversize[1] ); vecPatches.densitiesMPIy[ifield+2*nPatchMPIy]->extract_fields_sum( 1, iNeighbor, oversize[1] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIy[ifield ]; // double* Jx = field->sendFields_[iNeighbor+2]->data_; // int sizeofJx = field->sendFields_[iNeighbor+2]->size(); @@ -424,7 +424,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc // iDim = 1, const int nFieldLocaly = vecPatches.densitiesLocaly.size() / 3; -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) const bool is_memory_on_device = vecPatches.densitiesLocaly.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocaly[0]->data() ); #endif @@ -457,11 +457,11 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc pt1 = &( fields[vecPatches( ipatch )->neighbor_[1][0]-h0+icomp*nPatches]->data_[size[1]*nz_] ); pt2 = &( vecPatches.densitiesLocaly[ifield]->data_[0] ); - const int outer_last = nx_ * ny_ * nz_; - const int outer_stride = ny_ * nz_; - const int inner_last = gsp[1] * nz_; + const unsigned int outer_last = nx_ * ny_ * nz_; + const unsigned int outer_stride = ny_ * nz_; + const unsigned int inner_last = gsp[1] * nz_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = vecPatches.densitiesLocaly[ifield]->size(); int blabla = size[1]; #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla*nz_:ptsize],pt2[0:ptsize]) @@ -496,7 +496,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIy[ifield+2*nPatchMPIy], 1 ); // Jz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, ( iNeighbor+1 )%2 ) ) { -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIy[ifield ]; // double* Jx = field->recvFields_[(iNeighbor+1)%2+2]->data_; // int sizeofJx = field->recvFields_[(iNeighbor+1)%2+2]->size(); @@ -538,7 +538,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches.densitiesMPIz[ifield ]->extract_fields_sum( 2, iNeighbor, oversize[2] ); vecPatches.densitiesMPIz[ifield+nPatchMPIz ]->extract_fields_sum( 2, iNeighbor, oversize[2] ); vecPatches.densitiesMPIz[ifield+2*nPatchMPIz]->extract_fields_sum( 2, iNeighbor, oversize[2] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIz[ifield ]; // double* Jx = field->sendFields_[iNeighbor+4]->data_; // int sizeofJx = field->sendFields_[iNeighbor+4]->size(); @@ -560,7 +560,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc // iDim = 2 local const int nFieldLocalz = vecPatches.densitiesLocalz.size() / 3; -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) const bool is_memory_on_device = vecPatches.densitiesLocalz.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocalz[0]->data() ); #endif @@ -594,11 +594,11 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc pt1 = &( fields[vecPatches( ipatch )->neighbor_[2][0]-h0+icomp*nPatches]->data_[size[2]] ); pt2 = &( vecPatches.densitiesLocalz[ifield]->data_[0] ); - const int outer_last = nx_ * ny_ * nz_; - const int outer_stride = nz_; - const int inner_last = gsp[2]; + const unsigned int outer_last = nx_ * ny_ * nz_; + const unsigned int outer_stride = nz_; + const unsigned int inner_last = gsp[2]; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = vecPatches.densitiesLocalz[ifield]->size(); int blabla = size[2]; #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla:ptsize],pt2[0:ptsize]) @@ -630,7 +630,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIz[ifield+2*nPatchMPIz], 2 ); // Jz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, ( iNeighbor+1 )%2 ) ) { -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIz[ifield ]; // double* Jx = field->recvFields_[(iNeighbor+1)%2+4]->data_; // int sizeofJx = field->recvFields_[(iNeighbor+1)%2+4]->size(); @@ -797,7 +797,7 @@ void SyncVectorPatch::exchangeE( Params &, VectorPatch &vecPatches, int imode, S SyncVectorPatch::finalizeExchangeAlongAllDirections( vecPatches.listEt_[imode], vecPatches ); } -void SyncVectorPatch::exchangeBmBTIS3( Params ¶ms, VectorPatch &vecPatches, int imode, SmileiMPI *smpi ) +void SyncVectorPatch::exchangeBmBTIS3( Params &/*params*/, VectorPatch &vecPatches, int imode, SmileiMPI *smpi ) { SyncVectorPatch::exchangeAlongAllDirections,cField>( vecPatches.listBr_mBTIS3[imode], vecPatches, smpi ); SyncVectorPatch::finalizeExchangeAlongAllDirections( vecPatches.listBr_mBTIS3[imode], vecPatches ); @@ -881,7 +881,7 @@ void SyncVectorPatch::exchangeEnvEx( Params ¶ms, VectorPatch &vecPatches, Sm } } -void SyncVectorPatch::exchangeBmBTIS3( Params ¶ms, VectorPatch &vecPatches, SmileiMPI *smpi ) +void SyncVectorPatch::exchangeBmBTIS3( Params &/*params*/, VectorPatch &vecPatches, SmileiMPI *smpi ) { // exchange BmBTIS3 in Cartesian geometries // exchange ByBTIS3 @@ -1487,7 +1487,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongX( std::vector &fields, vecPatches.B_MPIx[ifield ]->extract_fields_exch( 0, iNeighbor, oversize ); vecPatches.B_MPIx[ifield+nMPIx]->create_sub_fields ( 0, iNeighbor, oversize ); vecPatches.B_MPIx[ifield+nMPIx]->extract_fields_exch( 0, iNeighbor, oversize ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B_MPIx[ifield ]; double* By = field->sendFields_[iNeighbor]->data_; int sizeofBy = field->sendFields_[iNeighbor]->size(); @@ -1580,7 +1580,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongX( VectorPatch &vecPatch vecPatches( ipatch )->finalizeExchange( vecPatches.B_MPIx[ifield+nMPIx], 0 ); // Bz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, ( iNeighbor+1 )%2 ) ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B_MPIx[ifield ]; double* By = field->recvFields_[(iNeighbor+1)%2]->data_; int sizeofBy = field->recvFields_[(iNeighbor+1)%2]->size(); @@ -1623,7 +1623,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongY( std::vector &fields, vecPatches.B1_MPIy[ifield ]->extract_fields_exch( 1, iNeighbor, oversize ); vecPatches.B1_MPIy[ifield+nMPIy]->create_sub_fields ( 1, iNeighbor, oversize ); vecPatches.B1_MPIy[ifield+nMPIy]->extract_fields_exch( 1, iNeighbor, oversize ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B1_MPIy[ifield ]; double* Bx = field->sendFields_[iNeighbor+2]->data_; int sizeofBx = field->sendFields_[iNeighbor+2]->size(); @@ -1671,7 +1671,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongY( std::vector &fields, if( vecPatches( ipatch )->MPI_me_ == vecPatches( ipatch )->MPI_neighbor_[1][0] ) { pt1 = &( fields[vecPatches( ipatch )->neighbor_[1][0]-h0+icomp*nPatches]->data_[size*nz_] ); pt2 = &( vecPatches.B1_localy[ifield]->data_[0] ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC int ptsize = vecPatches.B1_localy[ifield]->size(); #pragma acc parallel present(pt1[0-size*nz_:ptsize],pt2[0:ptsize]) #pragma acc loop gang worker vector @@ -1711,7 +1711,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongY( VectorPatch &vecPatch vecPatches( ipatch )->finalizeExchange( vecPatches.B1_MPIy[ifield+nMPIy], 1 ); // Bz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, ( iNeighbor+1 )%2 ) ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B1_MPIy[ifield ]; double* Bx = field->recvFields_[(iNeighbor+1)%2+2]->data_; int sizeofBx = field->recvFields_[(iNeighbor+1)%2+2]->size(); @@ -1754,7 +1754,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongZ( std::vector fields, vecPatches.B2_MPIz[ifield ]->extract_fields_exch( 2, iNeighbor, oversize ); vecPatches.B2_MPIz[ifield+nMPIz]->create_sub_fields ( 2, iNeighbor, oversize ); vecPatches.B2_MPIz[ifield+nMPIz]->extract_fields_exch( 2, iNeighbor, oversize ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B2_MPIz[ifield ]; double* Bx = field->sendFields_[iNeighbor+4]->data_; int sizeofBx = field->sendFields_[iNeighbor+4]->size(); @@ -1799,7 +1799,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongZ( std::vector fields, if( vecPatches( ipatch )->MPI_me_ == vecPatches( ipatch )->MPI_neighbor_[2][0] ) { pt1 = &( fields[vecPatches( ipatch )->neighbor_[2][0]-h0+icomp*nPatches]->data_[size] ); pt2 = &( vecPatches.B2_localz[ifield]->data_[0] ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC int ptsize = vecPatches.B2_localz[ifield]->size(); #pragma acc parallel present(pt1[0-size:ptsize],pt2[0:ptsize]) #pragma acc loop gang worker vector @@ -1839,7 +1839,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongZ( VectorPatch &vecPatch vecPatches( ipatch )->finalizeExchange( vecPatches.B2_MPIz[ifield+nMPIz], 2 ); // By for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, ( iNeighbor+1 )%2 ) ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B2_MPIz[ifield ]; double* Bx = field->recvFields_[(iNeighbor+1)%2+4]->data_; int sizeofBx = field->recvFields_[(iNeighbor+1)%2+4]->size(); diff --git a/src/Patch/SyncVectorPatch.h b/src/Patch/SyncVectorPatch.h index 0322c1283..07435cd49 100755 --- a/src/Patch/SyncVectorPatch.h +++ b/src/Patch/SyncVectorPatch.h @@ -73,7 +73,7 @@ public : if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, iNeighbor ) ) { fields[ifield]->create_sub_fields ( 0, iNeighbor, 2*oversize[0]+1+fields[ifield]->isDual_[0] ); fields[ifield]->extract_fields_sum( 0, iNeighbor, oversize[0] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // double * pointer = fields[ifield]->sendFields_[iNeighbor]->data_; // int size = fields[ifield]->size(); // #endif @@ -87,7 +87,7 @@ public : // iDim = 0, local -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) // At initialization, we may get a CPU buffer than needs to be handled on the host. const bool is_memory_on_device = fields.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() ); @@ -123,7 +123,7 @@ public : const unsigned int last = gsp[0] * ny_ * nz_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = fields[ifield]->size(); int nspace0 = size[0]; #pragma acc parallel if ( is_memory_on_device) present(pt1[0-nspace0*ny_*nz_:ptsize],pt2[0:ptsize]) @@ -177,7 +177,7 @@ public : if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, iNeighbor ) ) { fields[ifield]->create_sub_fields ( 1, iNeighbor, 2*oversize[1]+1+fields[ifield]->isDual_[1] ); fields[ifield]->extract_fields_sum( 1, iNeighbor, oversize[1] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // double* pointer = fields[ifield]->recvFields_[(iNeighbor+1)%2]->data_; // int size = fields[ifield]->recvFields_[(iNeighbor+1)%2]->size(); // //#pragma acc update device( Jx[0:sizeofJx], Jy[0:sizeofJy], Jz[0:sizeofJz] ) @@ -192,7 +192,7 @@ public : // iDim = 1, local -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) const bool is_memory_on_device = fields.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() ); #endif @@ -220,11 +220,11 @@ public : pt1 = &( *field1 )( size[1]*nz_ ); pt2 = &( *field2 )( 0 ); - const int outer_last = nx_ * ny_ * nz_; - const int outer_stride = ny_ * nz_; - const int inner_last = gsp[1] * nz_; + const unsigned int outer_last = nx_ * ny_ * nz_; + const unsigned int outer_stride = ny_ * nz_; + const unsigned int inner_last = gsp[1] * nz_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = fields[ifield]->size(); int blabla = size[1]; #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla*nz_:ptsize],pt2[0:ptsize]) @@ -282,7 +282,7 @@ public : if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, iNeighbor ) ) { fields[ifield]->create_sub_fields ( 2, iNeighbor, 2*oversize[2]+1+fields[ifield]->isDual_[2] ); fields[ifield]->extract_fields_sum( 2, iNeighbor, oversize[2] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // double* pointer = fields[ifield]->recvFields_[(iNeighbor+1)%2+2]->data_; // int size = fields[ifield]->recvFields_[(iNeighbor+1)%2+2]->size(); // #endif @@ -293,7 +293,7 @@ public : // iDim = 2 local -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) const bool is_memory_on_device = fields.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() ); #endif @@ -321,11 +321,11 @@ public : pt1 = &( *field1 )( size[2] ); pt2 = &( *field2 )( 0 ); - const int outer_last = nx_ * ny_ * nz_; - const int outer_stride = nz_; - const int inner_last = gsp[2]; + const unsigned int outer_last = nx_ * ny_ * nz_; + const unsigned int outer_stride = nz_; + const unsigned int inner_last = gsp[2]; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = fields[ifield]->size(); int blabla = size[2]; #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla:ptsize],pt2[0:ptsize]) diff --git a/src/Patch/VectorPatch.cpp b/src/Patch/VectorPatch.cpp index 65d68f28c..42f4dd3d8 100755 --- a/src/Patch/VectorPatch.cpp +++ b/src/Patch/VectorPatch.cpp @@ -301,7 +301,7 @@ void VectorPatch::reconfiguration( Params ¶ms, Timers &timers, int itime ) // --------------------------------------------------------------------------------------------------------------------- void VectorPatch::initialParticleSorting( Params ¶ms ) { -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC) // Initially I wanted to control the GPU particle sorting/bin initialization // here. In the end it was put in initializeDataOnDevice which is more // meaningful. @@ -853,7 +853,7 @@ void VectorPatch::sumDensities( Params ¶ms, double time_dual, Timers &timers #pragma omp for schedule(static) for( unsigned int ipatch=0 ; ipatchsize() ; ipatch++ ) { // Per species in global, Attention if output -> Sync / per species fields -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) // At itime == 0, data is still located on the Host if (itime == 0) { ( *this )( ipatch )->EMfields->computeTotalRhoJ(); @@ -1269,7 +1269,7 @@ void VectorPatch::closeAllDiags( SmileiMPI *smpi ) // --------------------------------------------------------------------------------------------------------------------- void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int itime, Timers &timers, SimWindow *simWindow ) { -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) bool data_on_cpu_updated = false; #endif @@ -1277,7 +1277,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int timers.diags.restart(); // Determine which data is required from the device -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) bool need_particles = false; bool need_fields = false; @@ -1346,7 +1346,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int for( unsigned int idiag = 0 ; idiag < globalDiags.size() ; idiag++ ) { diag_timers_[idiag]->restart(); -// #if defined( SMILEI_ACCELERATOR_MODE) +// #if defined( SMILEI_ACCELERATOR_GPU) // if( globalDiags[idiag]->timeSelection->theTimeIsNow( itime ) && // !data_on_cpu_updated && // ( itime > 0 ) ) { @@ -1462,7 +1462,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int for( unsigned int idiag = 0 ; idiag < localDiags.size() ; idiag++ ) { diag_timers_[globalDiags.size()+idiag]->restart(); -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // if( localDiags[idiag]->timeSelection->theTimeIsNow( itime ) && // !data_on_cpu_updated && // ( itime > 0 ) ) { @@ -1496,7 +1496,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int for( unsigned int ipatch=0 ; ipatchEMfields->restartRhoJs(); -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) // Delete species current and rho grids from device for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) { ( *this )( ipatch )->vecSpecies[ispec]->Species::deleteSpeciesCurrentAndChargeOnDevice(ispec, ( *this )( ipatch )->EMfields); @@ -4402,7 +4402,7 @@ void VectorPatch::moveWindow( // Bring all particles and field grids to the Host (except species grids) // This part can be optimized by copying only the patch to be destructed -#if defined( SMILEI_ACCELERATOR_MODE) +#if defined( SMILEI_ACCELERATOR_GPU) if( simWindow->isMoving( time_dual ) || itime == simWindow->getAdditionalShiftsIteration() ) { copyParticlesFromDeviceToHost(); copyFieldsFromDeviceToHost(); @@ -4412,10 +4412,11 @@ void VectorPatch::moveWindow( simWindow->shift( (*this), smpi, params, itime, time_dual, region ); - if (itime == simWindow->getAdditionalShiftsIteration() ) { + if( itime == (int) simWindow->getAdditionalShiftsIteration() ) { int adjust = simWindow->isMoving(time_dual)?0:1; - for (unsigned int n=0;n < simWindow->getNumberOfAdditionalShifts()-adjust; n++) + for( unsigned int n=0; n < simWindow->getNumberOfAdditionalShifts()-adjust; n++ ) { simWindow->shift( (*this), smpi, params, itime, time_dual, region ); + } } // Copy all Fields and Particles to the device @@ -4423,7 +4424,7 @@ void VectorPatch::moveWindow( // let's try initialising like we do at the start: -/*#if defined( SMILEI_ACCELERATOR_MODE ) +/*#if defined( SMILEI_ACCELERATOR_GPU ) // Allocate particle and field arrays // Also copy particle array content on device vecPatches.allocateDataOnDevice( params, &smpi, @@ -4434,7 +4435,7 @@ void VectorPatch::moveWindow( #endif*/ // does not do anything? - /*#if defined( SMILEI_ACCELERATOR_MODE) + /*#if defined( SMILEI_ACCELERATOR_GPU) if( simWindow->isMoving( time_dual ) || itime == simWindow->getAdditionalShiftsIteration() ) { copyFieldsFromHostToDevice(); copyParticlesFromHostToDevice(); @@ -4609,13 +4610,12 @@ void VectorPatch::initNewEnvelope( Params & ) } // END initNewEnvelope +#if defined( SMILEI_ACCELERATOR_GPU ) void VectorPatch::allocateDataOnDevice(Params ¶ms, SmileiMPI *smpi, RadiationTables *radiation_tables, MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables) { - -#if defined( SMILEI_ACCELERATOR_MODE ) // TODO(Etienne M): FREE. If we have load balancing or other patch // creation/destruction available (which is not the case on GPU ATM), // we should be taking care of freeing this GPU memory. @@ -4681,17 +4681,24 @@ void VectorPatch::allocateDataOnDevice(Params ¶ms, smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( min_particle_chi_table, min_particle_chi_size ); smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( xi_table, xi_table_size ); } +} #else +void VectorPatch::allocateDataOnDevice(Params &, + SmileiMPI *, + RadiationTables *, + MultiphotonBreitWheelerTables *) +{ ERROR( "GPU related code should not be reached in CPU mode!" ); -#endif } +#endif + //! Clean data allocated on device +#if defined( SMILEI_ACCELERATOR_GPU ) void VectorPatch::cleanDataOnDevice( Params ¶ms, SmileiMPI *smpi, RadiationTables *radiation_tables, MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables) { -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) const int npatches = this->size(); @@ -4801,12 +4808,17 @@ void VectorPatch::cleanDataOnDevice( Params ¶ms, SmileiMPI *smpi, smilei::tools::gpu::HostDeviceMemoryManagement::DeviceFree( xi_table, xi_table_size ); } +} #else +void VectorPatch::cleanDataOnDevice( Params &, SmileiMPI *, + RadiationTables *, + MultiphotonBreitWheelerTables *) +{ ERROR( "GPU related code should not be reached in CPU mode!" ); -#endif } +#endif -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) //! Field Synchronization from the GPU (Device) to the CPU //! This function updates the data on the host from the data located on the device @@ -4846,9 +4858,7 @@ void VectorPatch::copyFieldsFromHostToDevice() } } -#endif -#if defined( SMILEI_ACCELERATOR_MODE) //! Sync all fields from device to host void VectorPatch::copyFieldsFromDeviceToHost() @@ -4861,10 +4871,6 @@ VectorPatch::copyFieldsFromDeviceToHost() } } -#endif - - -#if defined( SMILEI_ACCELERATOR_MODE) //! Copy all species particles from Host to devices void VectorPatch::copyParticlesFromHostToDevice() @@ -4876,9 +4882,6 @@ void VectorPatch::copyParticlesFromHostToDevice() } } } -#endif - -#if defined( SMILEI_ACCELERATOR_MODE) //! copy all patch Particles from device to Host void @@ -4891,9 +4894,7 @@ VectorPatch::copyParticlesFromDeviceToHost() for( int ipatch = 0; ipatch < npatches; ipatch++ ) { for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) { species( ipatch, ispec )->particles->copyFromDeviceToHost(); -#if defined ( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_MODE ) species( ipatch, ispec )->particles->setHostBinIndex(); -#endif // std::cerr // << "ipatch: " << ipatch // << " ispec: " << ispec @@ -4906,9 +4907,6 @@ VectorPatch::copyParticlesFromDeviceToHost() } } -#endif - -#if defined( SMILEI_ACCELERATOR_MODE) //! Sync all fields from device to host void VectorPatch::copySpeciesFieldsFromDeviceToHost() @@ -4988,7 +4986,7 @@ void VectorPatch::dynamicsWithoutTasks( Params ¶ms, if( spec->isProj( time_dual, simWindow ) || diag_flag ) { -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) if (diag_flag) { spec->Species::prepareSpeciesCurrentAndChargeOnDevice( ispec, diff --git a/src/Patch/VectorPatch.h b/src/Patch/VectorPatch.h index be5a37d21..051d78276 100755 --- a/src/Patch/VectorPatch.h +++ b/src/Patch/VectorPatch.h @@ -510,7 +510,7 @@ public : RadiationTables * radiation_tables, MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables ); -#if defined( SMILEI_ACCELERATOR_MODE) +#if defined( SMILEI_ACCELERATOR_GPU) //! Field Synchronization from the GPU (Device) to the host (CPU) diff --git a/src/Projector/Projector2D2OrderGPU.cpp b/src/Projector/Projector2D2OrderGPU.cpp index cfe20eb7d..c669cc209 100755 --- a/src/Projector/Projector2D2OrderGPU.cpp +++ b/src/Projector/Projector2D2OrderGPU.cpp @@ -26,7 +26,7 @@ Projector2D2OrderGPU::Projector2D2OrderGPU( Params ¶meters, Patch *a_patch ) dts2 = dt / 2.0; dts4 = dts2 / 2.0; -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_GPU_OACC ) // When sorting is disabled, these values are invalid (-1) and the HIP // implementation can't be used. x_dimension_bin_count_ = parameters.getGPUBinCount( 1 ); @@ -41,7 +41,7 @@ Projector2D2OrderGPU::~Projector2D2OrderGPU() // EMPTY } -#if defined( SMILEI_ACCELERATOR_MODE ) //SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU ) //SMILEI_ACCELERATOR_GPU_OMP ) extern "C" void currentDepositionKernel2DOnDevice( double *__restrict__ Jx, @@ -109,6 +109,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy /// Project global current densities (EMfields->Jx_/Jy_/Jz_) /// /* inline */ void +#if defined( SMILEI_ACCELERATOR_GPU )//SMILEI_ACCELERATOR_GPU_OMP ) currents( double *__restrict__ Jx, double *__restrict__ Jy, double *__restrict__ Jz, @@ -132,7 +133,6 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy double, int not_spectral ) { -#if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP ) currentDepositionKernel2DOnDevice( Jx, Jy, Jz, @@ -159,15 +159,22 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy j_domain_begin, nprimy, not_spectral ); + } #else + currents( double *__restrict__ , double *__restrict__ , double *__restrict__ , int, int, int, + Particles &, unsigned int , unsigned int ,const double *__restrict__ , + const int *__restrict__ , const double *__restrict__ , double , double , double , + double , double , int , int , int , double, int ) + { SMILEI_ASSERT( false ); -#endif } +#endif /// Like currents(), project the particle current on the grid (Jx_/Jy_/Jz_) /// but also compute global current densities rho used for diagFields timestep /// /* inline */ void +#if defined( SMILEI_ACCELERATOR_GPU )//SMILEI_ACCELERATOR_GPU_OMP ) currentsAndDensity( double *__restrict__ Jx, double *__restrict__ Jy, double *__restrict__ Jz, @@ -193,7 +200,6 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy double, int not_spectral ) { -#if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP ) currentAndDensityDepositionKernelOnDevice( Jx, Jy, Jz, @@ -222,10 +228,16 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy j_domain_begin, nprimy, not_spectral ); + } #else + currentsAndDensity( double *__restrict__ , double *__restrict__ , double *__restrict__ , double *__restrict__ , + int , int , int , int , Particles &, unsigned int , unsigned int , + const double *__restrict__ , const int *__restrict__ , const double *__restrict__ , + double , double , double , double , double , int , int , int , double, int ) + { SMILEI_ASSERT( false ); -#endif } +#endif } // namespace @@ -233,7 +245,7 @@ void Projector2D2OrderGPU::basic( double *rhoj, Particles &particles, unsigned int ipart, unsigned int type, - int bin_shift ) + int /*bin_shift*/ ) { // Warning : this function is used for frozen species only. It is assumed that position = position_old !!! @@ -306,12 +318,12 @@ void Projector2D2OrderGPU::basic( double *rhoj, } } -void Projector2D2OrderGPU::ionizationCurrents( Field *Jx, - Field *Jy, - Field *Jz, - Particles &particles, - int ipart, - LocalFields Jion ) +void Projector2D2OrderGPU::ionizationCurrents( Field */*Jx*/, + Field */*Jy*/, + Field */*Jz*/, + Particles &/*particles*/, + int /*ipart*/, + LocalFields /*Jion */) { ERROR( "Projector2D2OrderGPU::ionizationCurrents(): Not implemented !" ); } @@ -325,8 +337,8 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields, bool diag_flag, bool is_spectral, int ispec, - int icell, - int ipart_ref ) + int /*icell*/, + int /*ipart_ref */) { std::vector &iold = smpi->dynamics_iold[ithread]; std::vector &delta = smpi->dynamics_deltaold[ithread]; @@ -425,20 +437,20 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields, } } -void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields, - Particles &particles, - double species_mass, - SmileiMPI *smpi, - int istart, - int iend, - int ithread, - int icell, - int ipart_ref ) +void Projector2D2OrderGPU::susceptibility( ElectroMagn */*EMfields*/, + Particles &/*particles*/, + double /*species_mass*/, + SmileiMPI */*smpi*/, + int /*istart*/, + int /*iend*/, + int /*ithread*/, + int /*icell*/, + int /*ipart_ref */) { ERROR( "Projector2D2OrderGPU::susceptibility(): Not implemented !" ); } -//#if defined( SMILEI_ACCELERATOR_MODE ) +//#if defined( SMILEI_ACCELERATOR_GPU ) ////! Project global current densities (EMfields->Jx_/Jy_/Jz_) ////! //extern "C" void diff --git a/src/Projector/Projector2D2OrderGPU.h b/src/Projector/Projector2D2OrderGPU.h index 9a799f9b5..ecdd4959d 100755 --- a/src/Projector/Projector2D2OrderGPU.h +++ b/src/Projector/Projector2D2OrderGPU.h @@ -46,21 +46,21 @@ class Projector2D2OrderGPU : public Projector2D int ipart_ref = 0 ) override; //!Wrapper for task-based implementation of Smilei - void currentsAndDensityWrapperOnBuffers( double *b_Jx, - double *b_Jy, - double *b_Jz, - double *b_rho, - int bin_width, - Particles &particles, - SmileiMPI *smpi, - int istart, - int iend, - int ithread, - bool diag_flag, - bool is_spectral, - int ispec, - int icell = 0, - int ipart_ref = 0 ) override {}; + void currentsAndDensityWrapperOnBuffers( double * /*b_Jx*/, + double * /*b_Jy*/, + double * /*b_Jz*/, + double * /*b_rho*/, + int /*bin_width*/, + Particles &/*particles*/, + SmileiMPI */*smpi*/, + int /*istart*/, + int /*iend*/, + int /*ithread*/, + bool /*diag_flag*/, + bool /*is_spectral*/, + int /*ispec*/, + int /*icell*/ = 0, + int /*ipart_ref*/ = 0 ) override {}; /// Project susceptibility, used as source term in envelope equation /// diff --git a/src/Projector/Projector2D2OrderGPUKernel.cpp b/src/Projector/Projector2D2OrderGPUKernel.cpp index 8f38f52fe..e2ec56495 100644 --- a/src/Projector/Projector2D2OrderGPUKernel.cpp +++ b/src/Projector/Projector2D2OrderGPUKernel.cpp @@ -1,4 +1,4 @@ -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #include "Projector2D2OrderGPUKernelCUDAHIP.h" #include diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu index 666a409f4..55082b793 100644 --- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu +++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu @@ -81,7 +81,7 @@ // device_particle_charge /* [0:particle_count] */, \ // device_particle_weight /* [0:particle_count] */ ) // #pragma omp teams thread_limit( 64 ) distribute parallel for -// #elif defined( SMILEI_OPENACC_MODE ) +// #elif defined( SMILEI_ACCELERATOR_GPU_OACC ) // #pragma acc parallel \ // deviceptr( device_particle_position_x, \ // device_particle_position_y, \ @@ -264,7 +264,7 @@ // device_particle_charge /* [0:particle_count] */, \ // device_particle_weight /* [0:particle_count] */ ) // #pragma omp teams thread_limit( 64 ) distribute parallel for -// #elif defined( SMILEI_OPENACC_MODE ) +// #elif defined( SMILEI_ACCELERATOR_GPU_OACC ) // #pragma acc parallel \ // deviceptr( device_particle_position_x, \ // device_particle_position_y, \ diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h index d607a4ab4..a21f757db 100644 --- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h +++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h @@ -4,7 +4,7 @@ #define Projector2D2OrderGPUKernelCUDAHIP_H -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #if defined( __HIP__ ) #include diff --git a/src/Projector/Projector3D2OrderGPU.cpp b/src/Projector/Projector3D2OrderGPU.cpp index 39342b204..62ec54141 100755 --- a/src/Projector/Projector3D2OrderGPU.cpp +++ b/src/Projector/Projector3D2OrderGPU.cpp @@ -30,13 +30,13 @@ Projector3D2OrderGPU::Projector3D2OrderGPU( Params ¶meters, Patch *a_patch ) dts2 = dt / 2.0; dts4 = dts2 / 2.0; -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_GPU_OACC ) // When sorting is disabled, these values are invalid (-1) and the HIP // implementation can't be used. x_dimension_bin_count_ = parameters.getGPUBinCount( 1 ); y_dimension_bin_count_ = parameters.getGPUBinCount( 2 ); z_dimension_bin_count_ = parameters.getGPUBinCount( 3 ); -//#elif defined( SMILEI_OPENACC_MODE ) +//#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) // x_dimension_bin_count_ = 1; // y_dimension_bin_count_ = 1; // z_dimension_bin_count_ = 1; @@ -50,7 +50,7 @@ Projector3D2OrderGPU::~Projector3D2OrderGPU() // EMPTY } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) extern "C" void currentDeposition3DOnDevice( double *__restrict__ Jx, double *__restrict__ Jy, @@ -122,6 +122,8 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy /// Project global current densities (EMfields->Jx_/Jy_/Jz_) /// /* inline */ void + +#if defined( SMILEI_ACCELERATOR_GPU ) currents( double *__restrict__ Jx, double *__restrict__ Jy, double *__restrict__ Jz, @@ -150,72 +152,77 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy double, int not_spectral ) { -#if defined( SMILEI_ACCELERATOR_MODE ) currentDeposition3DOnDevice( Jx, - Jy, - Jz, - Jx_size, - Jy_size, - Jz_size, - particles.getPtrPosition( 0 ), - particles.getPtrPosition( 1 ), - particles.getPtrPosition( 2 ), - particles.getPtrCharge(), - particles.getPtrWeight(), - particles.last_index.data(), - x_dimension_bin_count, - y_dimension_bin_count, - z_dimension_bin_count, - invgf_, - iold_, - deltaold_, - particles.deviceSize(), - inv_cell_volume, - dx_inv, - dy_inv, - dz_inv, - dx_ov_dt, - dy_ov_dt, - dz_ov_dt, - i_domain_begin, - j_domain_begin, - k_domain_begin, - nprimy, nprimz, - not_spectral ); + Jy, + Jz, + Jx_size, + Jy_size, + Jz_size, + particles.getPtrPosition( 0 ), + particles.getPtrPosition( 1 ), + particles.getPtrPosition( 2 ), + particles.getPtrCharge(), + particles.getPtrWeight(), + particles.last_index.data(), + x_dimension_bin_count, + y_dimension_bin_count, + z_dimension_bin_count, + invgf_, + iold_, + deltaold_, + particles.deviceSize(), + inv_cell_volume, + dx_inv, + dy_inv, + dz_inv, + dx_ov_dt, + dy_ov_dt, + dz_ov_dt, + i_domain_begin, + j_domain_begin, + k_domain_begin, + nprimy, nprimz, + not_spectral ); + } #else + currents( double *__restrict__ , double *__restrict__ , double *__restrict__ , int, int, int, + Particles &, unsigned int , unsigned int , unsigned int , const double *__restrict__ , + const int *__restrict__ , const double *__restrict__ , double , double , double , double , + double , double , double , int , int , int , int , int , double, int ) + { SMILEI_ASSERT( false ); -#endif } +#endif //! Project density /* inline */ void +#if defined( SMILEI_ACCELERATOR_GPU ) density( - double *__restrict__ rho, - int rho_size, - Particles &particles, - unsigned int x_dimension_bin_count, - unsigned int y_dimension_bin_count, - unsigned int z_dimension_bin_count, - const double *__restrict__ invgf_, - const int *__restrict__ iold_, - const double *__restrict__ deltaold_, - double inv_cell_volume, - double dx_inv, - double dy_inv, - double dz_inv, - double dx_ov_dt, - double dy_ov_dt, - double dz_ov_dt, - int i_domain_begin, - int j_domain_begin, - int k_domain_begin, - int nprimy, - int nprimz, - double, - int not_spectral ) + double *__restrict__ rho, + int rho_size, + Particles &particles, + unsigned int x_dimension_bin_count, + unsigned int y_dimension_bin_count, + unsigned int z_dimension_bin_count, + const double *__restrict__ invgf_, + const int *__restrict__ iold_, + const double *__restrict__ deltaold_, + double inv_cell_volume, + double dx_inv, + double dy_inv, + double dz_inv, + double dx_ov_dt, + double dy_ov_dt, + double dz_ov_dt, + int i_domain_begin, + int j_domain_begin, + int k_domain_begin, + int nprimy, + int nprimz, + double, + int not_spectral ) { -#if defined( SMILEI_ACCELERATOR_MODE ) densityDeposition3DOnDevice( rho, rho_size, @@ -244,10 +251,16 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy k_domain_begin, nprimy, nprimz, not_spectral ); + } #else + density( double *__restrict__ , int , Particles &, unsigned int , unsigned int , unsigned int , + const double *__restrict__ , const int *__restrict__ , const double *__restrict__ , + double , double , double , double , double , double , double , + int, int, int, int, int, double, int ) + { SMILEI_ASSERT( false ); -#endif } +#endif } // namespace @@ -255,7 +268,7 @@ void Projector3D2OrderGPU::basic( double *rhoj, Particles &particles, unsigned int ipart, unsigned int type, - int bin_shift ) + int /*bin_shift*/ ) { @@ -347,12 +360,12 @@ void Projector3D2OrderGPU::basic( double *rhoj, } } -void Projector3D2OrderGPU::ionizationCurrents( Field *Jx, - Field *Jy, - Field *Jz, - Particles &particles, - int ipart, - LocalFields Jion ) +void Projector3D2OrderGPU::ionizationCurrents( Field */*Jx*/, + Field */*Jy*/, + Field */*Jz*/, + Particles &/*particles*/, + int /*ipart*/, + LocalFields /*Jion */) { ERROR( "Projector3D2OrderGPU::ionizationCurrents(): Not implemented !" ); } @@ -366,8 +379,8 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields, bool diag_flag, bool is_spectral, int ispec, - int icell, - int ipart_ref ) + int /*icell*/, + int /*ipart_ref*/ ) { if( is_spectral ) { @@ -463,15 +476,15 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields, //std::cerr << sum << " " << sum2 << " " << sum_Jxs << " " << sum_Jx << std::endl; } -void Projector3D2OrderGPU::susceptibility( ElectroMagn *EMfields, - Particles &particles, - double species_mass, - SmileiMPI *smpi, - int istart, - int iend, - int ithread, - int icell, - int ipart_ref ) +void Projector3D2OrderGPU::susceptibility( ElectroMagn */*EMfields*/, + Particles &/*particles*/, + double /*species_mass*/, + SmileiMPI */*smpi*/, + int /*istart*/, + int /*iend*/, + int /*ithread*/, + int /*icell*/, + int /*ipart_ref */) { ERROR( "Projector3D2OrderGPU::susceptibility(): Not implemented !" ); } diff --git a/src/Projector/Projector3D2OrderGPU.cpp.backup b/src/Projector/Projector3D2OrderGPU.cpp.backup index 39ce7a4a5..761e6ae31 100755 --- a/src/Projector/Projector3D2OrderGPU.cpp.backup +++ b/src/Projector/Projector3D2OrderGPU.cpp.backup @@ -2,7 +2,7 @@ #include #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #include #endif @@ -136,7 +136,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ Sx0 [0:kTmpArraySize], \ @@ -262,7 +262,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSx [0:kTmpArraySize], sumX [0:kTmpArraySize] ) // #pragma acc parallel deviceptr( DSx, sumX ) @@ -287,7 +287,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jx [0:Jx_size], \ Sy0 [0:kTmpArraySize], \ @@ -310,7 +310,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( const double crx_p = dx_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex0 = iold[ipart+0*packsize]*yz_size0+iold[ipart+1*packsize]*z_size0+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=0 ; k<5 ; k++ ) { @@ -326,7 +326,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp atomic update -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc atomic #endif Jx [ jdx ] += val; @@ -339,7 +339,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSy [0:kTmpArraySize], \ sumX [0:kTmpArraySize] ) @@ -365,7 +365,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jy [0:Jy_size], \ Sx0 [0:kTmpArraySize], \ @@ -388,7 +388,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( const double cry_p = dy_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex1 = iold[ipart+0*packsize]*yz_size1+iold[ipart+1*packsize]*z_size1+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=0 ; k<5 ; k++ ) { @@ -404,7 +404,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp atomic update -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc atomic #endif Jy [ jdx ] += val; @@ -417,7 +417,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSz [0:kTmpArraySize], \ sumX [0:kTmpArraySize] ) @@ -443,7 +443,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jz [0:Jz_size], \ Sx0 [0:kTmpArraySize], \ @@ -466,7 +466,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( const double crz_p = dz_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex2 = iold[ipart+0*packsize]*yz_size2+iold[ipart+1*packsize]*z_size2+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=1 ; k<5 ; k++ ) { @@ -482,7 +482,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp atomic update -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc atomic #endif Jz[ jdx ] += val; @@ -498,7 +498,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ rho [0:rho_size], \ Sx1 [0:kTmpArraySize], \ @@ -523,7 +523,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( int jdx = idx + k; #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp atomic update -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc atomic #endif rho[ jdx ] += charge_weight * Sx1[ipart_pack+i*packsize]*Sy1[ipart_pack+j*packsize]*Sz1[ipart_pack+k*packsize]; diff --git a/src/Projector/Projector3D2OrderGPU.h b/src/Projector/Projector3D2OrderGPU.h index 2fac2402e..c76bf48a1 100755 --- a/src/Projector/Projector3D2OrderGPU.h +++ b/src/Projector/Projector3D2OrderGPU.h @@ -46,21 +46,21 @@ class Projector3D2OrderGPU : public Projector3D int ipart_ref = 0 ) override; //!Wrapper for task-based implementation of Smilei - void currentsAndDensityWrapperOnBuffers( double *b_Jx, - double *b_Jy, - double *b_Jz, - double *b_rho, - int bin_width, - Particles &particles, - SmileiMPI *smpi, - int istart, - int iend, - int ithread, - bool diag_flag, - bool is_spectral, - int ispec, - int icell = 0, - int ipart_ref = 0 ) override {}; + void currentsAndDensityWrapperOnBuffers( double * /*b_Jx*/, + double * /*b_Jy*/, + double * /*b_Jz*/, + double * /*b_rho*/, + int /*bin_width*/, + Particles &/*particles*/, + SmileiMPI */*smpi*/, + int /*istart*/, + int /*iend*/, + int /*ithread*/, + bool /*diag_flag*/, + bool /*is_spectral*/, + int /*ispec*/, + int /*icell*/ = 0, + int /*ipart_ref*/ = 0 ) override {}; /// Project susceptibility, used as source term in envelope equation /// diff --git a/src/Projector/Projector3D2OrderGPUKernel.cpp b/src/Projector/Projector3D2OrderGPUKernel.cpp index f77a4fda3..5d9f88b5d 100644 --- a/src/Projector/Projector3D2OrderGPUKernel.cpp +++ b/src/Projector/Projector3D2OrderGPUKernel.cpp @@ -5,7 +5,7 @@ // issues (!). -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) //! Simple switch to jump between the reference (omp) implementation and the //! hip one. diff --git a/src/Projector/Projector3D2OrderGPUKernelAcc.h b/src/Projector/Projector3D2OrderGPUKernelAcc.h index 9cf3b224d..43bff1cce 100644 --- a/src/Projector/Projector3D2OrderGPUKernelAcc.h +++ b/src/Projector/Projector3D2OrderGPUKernelAcc.h @@ -1,6 +1,6 @@ //! Optimized Acc projection (from Julien Derouillat) -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #include #include "Tools.h" @@ -110,7 +110,7 @@ namespace acc { position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ Sx0 [0:kTmpArraySize], \ @@ -236,7 +236,7 @@ namespace acc { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSx [0:kTmpArraySize], sumX [0:kTmpArraySize] ) // #pragma acc parallel deviceptr( DSx, sumX ) @@ -261,7 +261,7 @@ namespace acc { charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jx [0:Jx_size], \ Sy0 [0:kTmpArraySize], \ @@ -284,7 +284,7 @@ namespace acc { const double crx_p = dx_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex0 = iold[ipart+0*packsize]*yz_size0+iold[ipart+1*packsize]*z_size0+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=0 ; k<5 ; k++ ) { @@ -309,7 +309,7 @@ namespace acc { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSy [0:kTmpArraySize], \ sumX [0:kTmpArraySize] ) @@ -335,7 +335,7 @@ namespace acc { charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jy [0:Jy_size], \ Sx0 [0:kTmpArraySize], \ @@ -358,7 +358,7 @@ namespace acc { const double cry_p = dy_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex1 = iold[ipart+0*packsize]*yz_size1+iold[ipart+1*packsize]*z_size1+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=0 ; k<5 ; k++ ) { @@ -383,7 +383,7 @@ namespace acc { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSz [0:kTmpArraySize], \ sumX [0:kTmpArraySize] ) @@ -409,7 +409,7 @@ namespace acc { charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jz [0:Jz_size], \ Sx0 [0:kTmpArraySize], \ @@ -432,7 +432,7 @@ namespace acc { const double crz_p = dz_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex2 = iold[ipart+0*packsize]*yz_size2+iold[ipart+1*packsize]*z_size2+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=1 ; k<5 ; k++ ) { @@ -536,7 +536,7 @@ namespace acc { position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ Sx1 [0:kTmpArraySize], \ @@ -630,7 +630,7 @@ namespace acc { charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ rho [0:rho_size], \ Sx1 [0:kTmpArraySize], \ diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu index 195a02667..dd8d1e61d 100644 --- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu +++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu @@ -1,6 +1,6 @@ //! HIP CUDA implementation -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) //#include "Projector3D2OrderGPUKernelCUDAHIP.h" diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h index 94368f4dd..1b78b1252 100644 --- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h +++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h @@ -4,7 +4,7 @@ #define Projector3D2OrderGPUKernelCUDAHIP_H -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #if defined( __HIP__ ) #include diff --git a/src/Projector/Projector3D2OrderGPUKernelNaive.h b/src/Projector/Projector3D2OrderGPUKernelNaive.h index b6cfac080..a261af40b 100644 --- a/src/Projector/Projector3D2OrderGPUKernelNaive.h +++ b/src/Projector/Projector3D2OrderGPUKernelNaive.h @@ -1,6 +1,6 @@ //! Naive ACC/OMP implementation -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #include #include "Tools.h" @@ -66,7 +66,7 @@ namespace acc { position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ Jx[0:Jx_size], \ @@ -344,7 +344,7 @@ namespace acc { position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ rho[0:rho_size] \ diff --git a/src/Projector/ProjectorAM2OrderV.cpp b/src/Projector/ProjectorAM2OrderV.cpp index b222aa4ee..890d37332 100755 --- a/src/Projector/ProjectorAM2OrderV.cpp +++ b/src/Projector/ProjectorAM2OrderV.cpp @@ -673,10 +673,6 @@ void ProjectorAM2OrderV::susceptibility( ElectroMagn *EMfields, Particles &parti double charge_weight[8] __attribute__( ( aligned( 64 ) ) ); // double r_bar[8] __attribute__( ( aligned( 64 ) ) ); - //double *invR_local = &(invR_[jpom2]); - // double *invRd_local = &(invRd_[jpom2]); - - double *invR_local = &(invR_[jpom2]); // Pointer for GPU and vectorization on ARM processors double * __restrict__ position_x = particles.getPtrPosition(0); double * __restrict__ position_y = particles.getPtrPosition(1); diff --git a/src/Projector/ProjectorFactory.h b/src/Projector/ProjectorFactory.h index db8c39e1f..278739301 100755 --- a/src/Projector/ProjectorFactory.h +++ b/src/Projector/ProjectorFactory.h @@ -42,7 +42,7 @@ class ProjectorFactory // --------------- else if( ( params.geometry == "2Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) { if( !vectorization ) { - #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) + #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) Proj = new Projector2D2OrderGPU( params, patch ); #else Proj = new Projector2D2Order( params, patch ); @@ -64,7 +64,7 @@ class ProjectorFactory // --------------- else if( ( params.geometry == "3Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) { if( !vectorization ) { - #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) + #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) Proj = new Projector3D2OrderGPU( params, patch ); #else Proj = new Projector3D2Order( params, patch ); diff --git a/src/Pusher/PusherBoris.cpp b/src/Pusher/PusherBoris.cpp index 536def7a9..8f70a6cc3 100755 --- a/src/Pusher/PusherBoris.cpp +++ b/src/Pusher/PusherBoris.cpp @@ -57,7 +57,7 @@ void PusherBoris::operator()( Particles &particles, SmileiMPI *smpi, int istart, position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_buffer_offset; const int particle_number = iend - istart; diff --git a/src/Pusher/PusherBorisNR.cpp b/src/Pusher/PusherBorisNR.cpp index 84f072e1f..df4a3277b 100755 --- a/src/Pusher/PusherBorisNR.cpp +++ b/src/Pusher/PusherBorisNR.cpp @@ -57,7 +57,7 @@ void PusherBorisNR::operator()( Particles &particles, SmileiMPI *smpi, int istar position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_buffer_offset; const int particle_number = iend - istart; diff --git a/src/Pusher/PusherHigueraCary.cpp b/src/Pusher/PusherHigueraCary.cpp index 2ab234ae1..c85189fff 100755 --- a/src/Pusher/PusherHigueraCary.cpp +++ b/src/Pusher/PusherHigueraCary.cpp @@ -68,7 +68,7 @@ void PusherHigueraCary::operator()( Particles &particles, SmileiMPI *smpi, int i position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_buffer_offset; const int particle_number = iend - istart; diff --git a/src/Pusher/PusherPhoton.cpp b/src/Pusher/PusherPhoton.cpp index a94a521e3..5feb7823d 100755 --- a/src/Pusher/PusherPhoton.cpp +++ b/src/Pusher/PusherPhoton.cpp @@ -53,7 +53,7 @@ void PusherPhoton::operator()( Particles &particles, SmileiMPI *smpi, position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_ref; const int particle_number = iend - istart; diff --git a/src/Pusher/PusherPonderomotiveBoris.cpp b/src/Pusher/PusherPonderomotiveBoris.cpp index 41afa42e6..9d151dabb 100755 --- a/src/Pusher/PusherPonderomotiveBoris.cpp +++ b/src/Pusher/PusherPonderomotiveBoris.cpp @@ -55,7 +55,7 @@ void PusherPonderomotiveBoris::operator()( Particles &particles, SmileiMPI *smpi const double *const __restrict__ GradPhiz = &( ( *GradPhipart )[2*nparts] ); //double *inv_gamma_ponderomotive = &( ( *dynamics_inv_gamma_ponderomotive )[0*nparts] ); - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #else int np = iend-istart; diff --git a/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp b/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp index 379f41763..a32f359cb 100644 --- a/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp +++ b/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp @@ -31,7 +31,6 @@ void PusherPonderomotiveBorisBTIS3::operator()( Particles &particles, SmileiMPI double charge_over_mass_dts2, charge_sq_over_mass_sq_dts4; double umx, umy, umz, upx, upy, upz; double alpha; - double TxTy, TyTz, TzTx; double pxsm, pysm, pzsm; //double one_ov_gamma_ponderomotive; diff --git a/src/Pusher/PusherPonderomotivePositionBoris.cpp b/src/Pusher/PusherPonderomotivePositionBoris.cpp index 16a4e6c69..9b9bea639 100755 --- a/src/Pusher/PusherPonderomotivePositionBoris.cpp +++ b/src/Pusher/PusherPonderomotivePositionBoris.cpp @@ -52,7 +52,7 @@ void PusherPonderomotivePositionBoris::operator()( Particles &particles, SmileiM const double *const __restrict__ GradPhi_my = &( ( *GradPhi_mpart )[1*nparts] ); const double *const __restrict__ GradPhi_mz = &( ( *GradPhi_mpart )[2*nparts] ); - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #else int np = iend-istart; diff --git a/src/Pusher/PusherVay.cpp b/src/Pusher/PusherVay.cpp index c1ba76693..83debaae4 100755 --- a/src/Pusher/PusherVay.cpp +++ b/src/Pusher/PusherVay.cpp @@ -67,7 +67,7 @@ void PusherVay::operator()( Particles &particles, SmileiMPI *smpi, int istart, i position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_buffer_offset; const int particle_number = iend - istart; diff --git a/src/Radiation/RadiationCorrLandauLifshitz.cpp b/src/Radiation/RadiationCorrLandauLifshitz.cpp index 16c7b01fe..ebb0e54dd 100755 --- a/src/Radiation/RadiationCorrLandauLifshitz.cpp +++ b/src/Radiation/RadiationCorrLandauLifshitz.cpp @@ -96,7 +96,7 @@ void RadiationCorrLandauLifshitz::operator()( // cumulative Radiated energy from istart to iend double radiated_energy_loc = 0; -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Local vector to store the radiated energy double * rad_norm_energy = new double [iend-istart]; // double * rad_norm_energy = (double*) aligned_alloc(64, (iend-istart)*sizeof(double)); @@ -112,7 +112,7 @@ void RadiationCorrLandauLifshitz::operator()( // Computation // NVIDIA GPUs - #if defined (SMILEI_OPENACC_MODE) + #if defined (SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_ref; const int np = iend-istart; #pragma acc parallel \ @@ -185,7 +185,7 @@ void RadiationCorrLandauLifshitz::operator()( // _______________________________________________________________ // Computation of the thread radiated energy -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Exact energy loss due to the radiation rad_norm_energy[ipart-istart] = gamma - std::sqrt( 1.0 @@ -210,7 +210,7 @@ void RadiationCorrLandauLifshitz::operator()( // _______________________________________________________________ // Update of the quantum parameter -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd for( int ipart=istart ; ipart #include -#if defined(SMILEI_OPENACC_MODE) +#if defined(SMILEI_ACCELERATOR_GPU_OACC) #define __HIP_PLATFORM_NVCC__ #define __HIP_PLATFORM_NVIDIA__ #include "gpuRandom.h" @@ -103,7 +103,7 @@ void RadiationMonteCarlo::operator()( // Temporary double parameter double temp; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC unsigned long long seed; // Parameters for CUDA generator unsigned long long seq; unsigned long long offset; @@ -152,7 +152,7 @@ void RadiationMonteCarlo::operator()( // Number of photons int nphotons; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC int nphotons_start; #endif @@ -160,7 +160,7 @@ void RadiationMonteCarlo::operator()( const double photon_buffer_size_per_particle = radiation_photon_sampling_ * max_photon_emissions_; if (photons) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We reserve a large number of potential photons on device since we can't reallocate nphotons_start = photons->deviceSize(); //static_cast(photons)->deviceReserve( nphotons + (iend - istart) * photon_buffer_size_per_particle ); @@ -199,13 +199,13 @@ void RadiationMonteCarlo::operator()( double *const __restrict__ photon_tau = photons ? (photons->has_Monte_Carlo_process ? photons->getPtrTau() : nullptr) : nullptr; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // Cell keys as a mask int *const __restrict__ photon_cell_keys = photons ? photons->getPtrCellKeys() : nullptr; #endif // Table properties ---------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // Size of tables // int size_of_Table_integfochi = RadiationTables.integfochi_.size_particle_chi_; // int size_of_Table_min_photon_chi = RadiationTables.xi_.size_particle_chi_; @@ -221,7 +221,7 @@ void RadiationMonteCarlo::operator()( // _______________________________________________________________ // Computation -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // Management of the data on GPU though this data region int np = iend-istart; @@ -342,7 +342,7 @@ void RadiationMonteCarlo::operator()( // New final optical depth to reach for emision while( tau[ipart] <= epsilon_tau_ ) { //tau[ipart] = -log( 1.-Rand::uniform() ); - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC tau[ipart] = -std::log( 1.-rand_->uniform() ); #else seed_curand_1 = (int) (ipart+1)*(initial_seed_1+1); //Seed for linear generator @@ -385,7 +385,7 @@ void RadiationMonteCarlo::operator()( // Draw random number in [0,1[ - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC random_number = rand_->uniform(); #else seed_curand_2 = (int) (ipart + 1)*(initial_seed_2 + 1); //Seed for linear generator @@ -433,7 +433,7 @@ void RadiationMonteCarlo::operator()( && ( i_photon_emission < max_photon_emissions_)) { // CPU implementation (non-threaded implementation) -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Creation of new photons in the temporary array photons photons->createParticles( radiation_photon_sampling_ ); @@ -611,14 +611,14 @@ void RadiationMonteCarlo::operator()( } // end while } // end for -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } // end acc parallel #endif //if (photons) std::cerr << photons->deviceSize() << std::endl; // Remove extra space to save memory -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC if (photons) { photons->shrinkToFit( true ); } @@ -631,7 +631,7 @@ void RadiationMonteCarlo::operator()( // ____________________________________________________ // Update of the quantum parameter chi -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #else int np = iend-istart; @@ -660,11 +660,11 @@ void RadiationMonteCarlo::operator()( } - #ifdef SMILEI_OPENACC_MODE + #ifdef SMILEI_ACCELERATOR_GPU_OACC } // end acc parallel #endif -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } // end acc data #endif diff --git a/src/Radiation/RadiationMonteCarlo.h b/src/Radiation/RadiationMonteCarlo.h index 34b8c31db..4e84f169d 100755 --- a/src/Radiation/RadiationMonteCarlo.h +++ b/src/Radiation/RadiationMonteCarlo.h @@ -16,7 +16,7 @@ #include "Radiation.h" #include "userFunctions.h" -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include // This is wrong. Dont include nvidiaParticles, it may cause problem! // See particle factory. diff --git a/src/Radiation/RadiationNiel.cpp b/src/Radiation/RadiationNiel.cpp index 6e61f3759..dff292df4 100755 --- a/src/Radiation/RadiationNiel.cpp +++ b/src/Radiation/RadiationNiel.cpp @@ -127,7 +127,7 @@ void RadiationNiel::operator()( double radiated_energy_loc = 0; // Parameters for linear alleatory number generator - #ifdef SMILEI_OPENACC_MODE + #ifdef SMILEI_ACCELERATOR_GPU_OACC // Initialize initial seed for linear generator double initial_seed = rand_->uniform(); @@ -144,7 +144,7 @@ void RadiationNiel::operator()( //double t0 = MPI_Wtime(); // 1) Vectorized computation of gamma and the particle quantum parameter -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #else @@ -190,12 +190,12 @@ void RadiationNiel::operator()( Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref], Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC } //finish cycle #endif //double t1 = MPI_Wtime(); - #ifdef SMILEI_OPENACC_MODE + #ifdef SMILEI_ACCELERATOR_GPU_OACC if( particle_chi[ipart] > minimum_chi_continuous ) { seed_curand = (int) (ipart+1)*(initial_seed+1); //Seed for linear generator @@ -297,7 +297,7 @@ void RadiationNiel::operator()( if( niel_computation_method == 0 ) { - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC for( ipart=istart ; ipart minimum_chi_continuous ) { @@ -310,7 +310,7 @@ void RadiationNiel::operator()( diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart]; - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC } } #endif @@ -318,7 +318,7 @@ void RadiationNiel::operator()( // Using the fit at order 5 (vectorized) else if( niel_computation_method == 1 ) { - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd private(temp) for( ipart=istart ; ipart #endif diff --git a/src/Radiation/RadiationTables.h b/src/Radiation/RadiationTables.h index bc5003966..77bcac8e2 100755 --- a/src/Radiation/RadiationTables.h +++ b/src/Radiation/RadiationTables.h @@ -58,7 +58,7 @@ class RadiationTables //! param[in] particle_chi particle quantum parameter //! param[in] particle_gamma particle Lorentz factor //! param[in] integfochi_table table of the discretized integrated f/chi function for Photon production yield computation -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double computePhotonProductionYield( const double particle_chi, @@ -77,7 +77,7 @@ class RadiationTables //! \param[in] xi //! \param[in] table_min_photon_chi //! \param[in] table_xi -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double computeRandomPhotonChiWithInterpolation( double particle_chi, @@ -95,7 +95,7 @@ class RadiationTables //! from the computed table niel_.table //! \param particle_chi particle quantum parameter -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double getHNielFromTable( double particle_chi, double * tableNiel); @@ -116,7 +116,7 @@ class RadiationTables //! \param particle_chi particle quantum parameter //! \param dt time step //#pragma omp declare simd -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif inline double __attribute__((always_inline)) getRidgersCorrectedRadiatedEnergy( const double particle_chi, @@ -138,7 +138,7 @@ class RadiationTables //! Get of the classical continuous radiated energy during dt //! \param particle_chi particle quantum parameter //! \param dt time step -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif inline double __attribute__((always_inline)) getClassicalRadiatedEnergy( double particle_chi, double dt ) @@ -148,7 +148,7 @@ class RadiationTables //! Return the minimum_chi_discontinuous_ value //! Under this value, no discontinuous radiation reaction -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif inline double __attribute__((always_inline)) getMinimumChiDiscontinuous() @@ -158,7 +158,7 @@ class RadiationTables //! Return the minimum_chi_continuous_ value //! Under this value, no continuous radiation reaction -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif inline double __attribute__((always_inline)) getMinimumChiContinuous() diff --git a/src/Radiation/RadiationTools.h b/src/Radiation/RadiationTools.h index 33cb5f501..1746c894e 100644 --- a/src/Radiation/RadiationTools.h +++ b/src/Radiation/RadiationTools.h @@ -32,7 +32,7 @@ class RadiationTools { //! Valid between particle_chi in 1E-3 and 1E1 //! \param particle_chi particle quantum parameter // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) getHNielFitOrder10(double particle_chi) @@ -62,7 +62,7 @@ class RadiationTools { //! Valid between particle_chi in 1E-3 and 1E1 //! \param particle_chi particle quantum parameter // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) getHNielFitOrder5(double particle_chi) @@ -86,7 +86,7 @@ class RadiationTools { //! Ridgers et al., ArXiv 1708.04511 (2017) //! \param particle_chi particle quantum parameter // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) getHNielFitRidgers(double particle_chi) @@ -104,7 +104,7 @@ class RadiationTools { //! approximation formulae //! \param particle_chi particle quantum parameter //#pragma omp declare simd -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) computeGRidgers(double particle_chi) @@ -117,7 +117,7 @@ class RadiationTools { //! Return f1(nu) = Int_nu^\infty K_{5/3}(y) dy //! used in computed synchrotron power spectrum // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) computeF1Nu(double nu) @@ -155,7 +155,7 @@ class RadiationTools { //! Return f2(nu) = BesselK_{2/3}(nu) //! used in computed synchrotron power spectrum // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) computeF2Nu(double nu) @@ -194,7 +194,7 @@ class RadiationTools { //! = Int_nu^\infty K_{5/3}(y) dy + cst * BesselK_{2/3}(nu) //! used in computed synchrotron power spectrum // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) computeBesselPartsRadiatedPower(double nu, double cst) diff --git a/src/Radiation/Table.h b/src/Radiation/Table.h index 8b74aeeaa..a028d4df3 100644 --- a/src/Radiation/Table.h +++ b/src/Radiation/Table.h @@ -45,7 +45,7 @@ class Table void compute_parameters(); //! get value using linear interpolation at position x -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double get(double x); diff --git a/src/Smilei.cpp b/src/Smilei.cpp index eae1993d9..81ba6c258 100755 --- a/src/Smilei.cpp +++ b/src/Smilei.cpp @@ -20,7 +20,7 @@ #include #include #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif @@ -44,7 +44,7 @@ using namespace std; // MAIN CODE // --------------------------------------------------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #ifdef _OPENACC void initialization_openacc() { @@ -80,7 +80,7 @@ int main( int argc, char *argv[] ) // ------------------------- // Create the OpenACC environment -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC initialization_openacc(); #endif @@ -248,7 +248,7 @@ int main( int argc, char *argv[] ) checkpoint.restartAll( vecPatches, region, &smpi, params ); -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) // CPU only, its too early to sort on GPU vecPatches.initialParticleSorting( params ); #endif @@ -271,7 +271,7 @@ int main( int argc, char *argv[] ) PatchesFactory::createVector( vecPatches, params, &smpi, openPMD, &radiation_tables_, 0 ); -#if !(defined( SMILEI_ACCELERATOR_MODE )) +#if !(defined( SMILEI_ACCELERATOR_GPU )) // CPU only, its too early to sort on GPU vecPatches.initialParticleSorting( params ); #endif @@ -407,7 +407,7 @@ int main( int argc, char *argv[] ) } } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) TITLE( "GPU allocation and copy of the fields and particles" ); // Allocate particle and field arrays // Also copy particle array content on device @@ -685,7 +685,7 @@ int main( int argc, char *argv[] ) } //End omp parallel region if( params.has_load_balancing && params.load_balancing_time_selection->theTimeIsNow( itime ) ) { -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // ERROR( "Load balancing not tested on GPU !" ); // #endif count_dlb++; @@ -777,7 +777,7 @@ int main( int argc, char *argv[] ) region.clean(); } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) vecPatches.cleanDataOnDevice( params, &smpi, &radiation_tables_, &multiphoton_Breit_Wheeler_tables_ ); #endif diff --git a/src/SmileiMPI/SmileiMPI.cpp b/src/SmileiMPI/SmileiMPI.cpp index 4fe93fd03..88e03c864 100755 --- a/src/SmileiMPI/SmileiMPI.cpp +++ b/src/SmileiMPI/SmileiMPI.cpp @@ -763,7 +763,7 @@ void SmileiMPI::isend_species( Patch *patch, int to, int &irequest, int tag, Par irequest ++; } -#if defined( SMILEI_ACCELERATOR_MODE) +#if defined( SMILEI_ACCELERATOR_GPU) // For the particles for( unsigned int ispec=0; ispec &requests, int tag, bool send_xmax_bc ) { -// #if defined (SMILEI_ACCELERATOR_MODE) +// #if defined (SMILEI_ACCELERATOR_GPU) // isendOnDevice( EM->Ex_, to, tag+irequest, requests[irequest] ); // irequest++; @@ -1745,7 +1745,7 @@ int SmileiMPI::recv_PML(ElectroMagn *EM, Tpml embc, int bcId, int from, int tag void SmileiMPI::recv( ElectroMagn *EM, int from, int &tag, bool recv_xmin_bc ) { -// #if defined (SMILEI_ACCELERATOR_MODE) +// #if defined (SMILEI_ACCELERATOR_GPU) // recvOnDevice( EM->Ex_, from, tag ); // tag++; @@ -2121,7 +2121,7 @@ void SmileiMPI::isend( Field *field, int to, int tag, MPI_Request &request ) } // End isend ( Field ) -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) //! Sends the whole Field Device to Device (assuming MPI enables it) void SmileiMPI::isendOnDevice( Field *field, int to, int tag, MPI_Request &request ) { @@ -2194,7 +2194,7 @@ void SmileiMPI::recv( Field *field, int from, int tag ) } // End recv ( Field ) -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) void SmileiMPI::recvOnDevice( Field *field, int from, int tag ) { @@ -2524,7 +2524,7 @@ void SmileiMPI::eraseBufferParticleTrail( const int ndim, const int istart, cons } -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) template static inline void diff --git a/src/SmileiMPI/SmileiMPI.h b/src/SmileiMPI/SmileiMPI.h index 13cacc416..2785921de 100755 --- a/src/SmileiMPI/SmileiMPI.h +++ b/src/SmileiMPI/SmileiMPI.h @@ -103,7 +103,7 @@ class SmileiMPI //! Sends the whole Field void isend( Field *field, int to, int tag, MPI_Request &request ); //! Sends the whole Field Device to Device (assuming MPI enables it) -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) void isendOnDevice( Field *field, int to, int tag, MPI_Request &request ); #endif @@ -114,7 +114,7 @@ class SmileiMPI //! Receives the whole Field void recv( Field *field, int from, int tag); //! Receives the whole Field Device to Device (assuming MPI enables it) -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) void recvOnDevice( Field *field, int from, int tag); #endif @@ -248,7 +248,7 @@ class SmileiMPI //! Erase Particles from istart ot the end in the buffers of thread ithread void eraseBufferParticleTrail( const int ndim, const int istart, const int ithread, bool isAM = false ); -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) //! Map CPU buffers onto the GPU to at least accommodate particle_count //! particles. This method tries to reduce the number of //! allocation/deallocation which produces a lot of fragmentation on some diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp index 65358f555..089e25f27 100755 --- a/src/Species/Species.cpp +++ b/src/Species/Species.cpp @@ -500,7 +500,7 @@ Species::~Species() } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) //! Prepare the species Current and Rho grids on Device void Species::prepareSpeciesCurrentAndChargeOnDevice( @@ -540,7 +540,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice( } -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( Jx_s[0:Jx_size], \ Jy_s[0:Jy_size], \ Jz_s[0:Jz_size], \ @@ -551,7 +551,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc loop gang worker vector #endif for( unsigned int i=0 ; icopyFromHostToDevice(); } -#endif // end if SMILEI_ACCELERATOR_MODE +#endif // end if SMILEI_ACCELERATOR_GPU // --------------------------------------------------------------------------------------------------------------------- //! Method calculating the Particle dynamics (interpolation, pusher, projection and more) @@ -700,7 +700,7 @@ void Species::dynamics( double time_dual, if( time_dual>time_frozen_ || Ionize) { // moving particle // Prepare temporary buffers for this iteration -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) smpi->resizeDeviceBuffers( ithread, nDim_field, particles->numberOfParticles() ); @@ -713,7 +713,7 @@ void Species::dynamics( double time_dual, patch->startFineTimer(mBW_timer_id_); -#if defined( SMILEI_OPENACC_MODE) +#if defined( SMILEI_ACCELERATOR_GPU_OACC) static_cast(mBW_pair_particles_[0])->deviceResize( particles->deviceSize() * Multiphoton_Breit_Wheeler_process->getPairCreationSampling(0) ); static_cast(mBW_pair_particles_[0])->resetCellKeys(); static_cast(mBW_pair_particles_[1])->deviceResize( particles->deviceSize() * Multiphoton_Breit_Wheeler_process->getPairCreationSampling(1) ); @@ -726,7 +726,7 @@ void Species::dynamics( double time_dual, patch->stopFineTimer(mBW_timer_id_); } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) // Make sure some bin preconditions are respected SMILEI_ASSERT( particles->first_index.size() == 1 ); SMILEI_ASSERT( particles->last_index.size() >= 1 ); @@ -832,7 +832,7 @@ void Species::dynamics( double time_dual, // Compression of the bins if necessary if( Multiphoton_Breit_Wheeler_process ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC removeTaggedParticles(smpi, &particles->first_index[0], &particles->last_index[0], @@ -1690,14 +1690,14 @@ void Species::dynamicsImportParticles( double time_dual, Params ¶ms, Patch * // Radiation losses if( Radiate && photon_species_ ) { // If creation of macro-photon, we add them to photon_species -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We first erase empty slots in the buffer of photons // radiation_photons_->cell_keys is used as a mask static_cast(radiated_photons_)->eraseLeavingParticles(); #endif photon_species_->importParticles( params, patch, *radiated_photons_, localDiags, time_dual ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We explicitely clear the device Particles static_cast(radiated_photons_)->deviceClear(); #endif @@ -1709,7 +1709,7 @@ void Species::dynamicsImportParticles( double time_dual, Params ¶ms, Patch * // Addition of the electron-positron particles for( int k=0; k<2; k++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We first erase empty slots in the buffer of photons // radiation_photons_->cell_keys is used as a mask static_cast(mBW_pair_particles_[k])->eraseLeavingParticles(); @@ -1717,7 +1717,7 @@ void Species::dynamicsImportParticles( double time_dual, Params ¶ms, Patch * mBW_pair_species_[k]->importParticles( params, patch, *mBW_pair_particles_[k], localDiags, time_dual ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We explicitely clear the device Particles static_cast(mBW_pair_particles_[k])->deviceClear(); #endif @@ -1771,7 +1771,7 @@ void Species::computeCharge( ElectroMagn *EMfields, bool old /*=false*/ ) void Species::sortParticles( Params ¶ms ) { -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) // ----------------------------- // GPU version @@ -2096,7 +2096,7 @@ void Species::countSortParticles( Params ¶ms ) // Move all particles from another species to this one void Species::importParticles( Params ¶ms, Patch *patch, Particles &source_particles, vector &localDiags, double time_dual, Ionization *I ) { -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) // --------------------------------------------------- // GPU version // Warning: the GPU version does not handle bin and sorting @@ -2207,7 +2207,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { const int nparts = smpi->dynamics_Epart[ithread].size()/3; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC double *const __restrict__ weight = particles->getPtrWeight(); @@ -2246,7 +2246,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { const int nbin = particles->numberOfBins(); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel \ present(Ex[0:nparts],Ey[0:nparts],Ez[0:nparts], \ Bx[0:nparts], By[0:nparts], Bz[0:nparts], \ @@ -2291,7 +2291,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { if (copy_particle_number>0) { -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC particles->overwriteParticle(copy_first_index, particles->last_index[ibin], copy_particle_number, compute_cell_keys ); #else for (auto ipart = 0 ; ipart < copy_particle_number ; ipart ++) { @@ -2346,7 +2346,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { } } -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC if (thetaold) { for( unsigned int ipart = 0 ; ipart < copy_particle_number ; ipart ++ ) { thetaold[copy_first_index + ipart] = thetaold[particles->last_index[ibin] + ipart]; @@ -2384,7 +2384,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { } } -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } // end parallel region #endif @@ -2418,7 +2418,7 @@ void Species::removeTaggedParticlesPerBin( // Weight shortcut double *const __restrict__ weight = particles->getPtrWeight(); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC double *const __restrict__ position_x = particles->getPtrPosition( 0 ); double *const __restrict__ position_y = nDim_particle > 1 ? particles->getPtrPosition( 1 ) : nullptr; double *const __restrict__ position_z = nDim_particle > 2 ? particles->getPtrPosition( 2 ) : nullptr; @@ -2436,7 +2436,7 @@ void Species::removeTaggedParticlesPerBin( // Total number of bins / cells const int nbin = particles->numberOfBins(); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel \ present(Epart[0:nparts*3],\ Bpart[0:nparts*3], \ @@ -2478,7 +2478,7 @@ void Species::removeTaggedParticlesPerBin( if( ipart < last_photon_index ) { // The last existing photon comes to the position of // the deleted photon -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC particles->overwriteParticle( last_photon_index, ipart, compute_cell_keys ); #else weight[ipart] = weight[last_photon_index]; @@ -2512,7 +2512,7 @@ void Species::removeTaggedParticlesPerBin( } gamma[ipart] = gamma[0*nparts+last_photon_index]; -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC if (thetaold) { thetaold[0*nparts+ipart] = thetaold[0*nparts+last_photon_index]; } @@ -2539,13 +2539,14 @@ void Species::removeTaggedParticlesPerBin( } // if last_index[ibin] > first_index[ibin] } // end loop over the bins -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } // end parallel region #endif } //! This method removes particles with a negative weight //! when a single bin is used +#ifdef SMILEI_ACCELERATOR_GPU_OACC void Species::removeTaggedParticles( SmileiMPI *smpi, int *const first_index, @@ -2554,8 +2555,6 @@ void Species::removeTaggedParticles( bool compute_cell_keys) { -#ifdef SMILEI_OPENACC_MODE - unsigned int new_n_parts = 0; unsigned int nb_deleted = 0; @@ -2623,7 +2622,7 @@ void Species::removeTaggedParticles( // that will not be erased // Backward loop over the tagged particles to fill holes in the photon particle array (at the bin level only) -//#ifdef SMILEI_OPENACC_MODE +//#ifdef SMILEI_ACCELERATOR_GPU_OACC // #pragma acc loop seq //#endif for( int ipart=last_moving_index-1 ; ipart>=*first_index; ipart-- ) { @@ -2700,9 +2699,9 @@ void Species::removeTaggedParticles( } } // if nparts > 0 +} #endif -} // ------------------------------------------------ // Set position when using restart & moving window diff --git a/src/Species/Species.h b/src/Species/Species.h index 83a2bab9d..d4af3bf9d 100755 --- a/src/Species/Species.h +++ b/src/Species/Species.h @@ -6,7 +6,7 @@ // #include "PyTools.h" #include "Particles.h" -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include "nvidiaParticles.h" #endif #include "Params.h" @@ -382,7 +382,7 @@ class Species return particles->capacity(); } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) void allocateParticlesOnDevice(); @@ -566,12 +566,14 @@ class Species //! This method removes particles with a negative weight //! when a single bin is used +#ifdef SMILEI_ACCELERATOR_GPU_OACC void removeTaggedParticles( SmileiMPI *smpi, int *const first_index, int *const last_index, int ithread, bool compute_cell_keys = false); +#endif //! Moving window boundary conditions managment void disableXmax(); diff --git a/src/Tools/Pragma.h b/src/Tools/Pragma.h index b1a81cdae..0fb5e1e9d 100644 --- a/src/Tools/Pragma.h +++ b/src/Tools/Pragma.h @@ -31,7 +31,7 @@ #if defined ( SMILEI_ACCELERATOR_GPU_OMP ) #define ATOMIC(mode) \ _Pragma( TOSTRING(omp atomic mode)) -#elif defined ( SMILEI_OPENACC_MODE ) +#elif defined ( SMILEI_ACCELERATOR_GPU_OACC ) #define ATOMIC(mode) \ _Pragma( TOSTRING(acc atomic mode)) #endif diff --git a/src/Tools/gpu.cpp b/src/Tools/gpu.cpp index 7ce000e03..497786096 100644 --- a/src/Tools/gpu.cpp +++ b/src/Tools/gpu.cpp @@ -1,6 +1,6 @@ #include "gpu.h" -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) && defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) && defined( SMILEI_ACCELERATOR_GPU_OACC ) #error "You can not enable both OpenACC and OpenMP GPU support" #endif @@ -29,7 +29,7 @@ #else #error "Asking for OpenMP support without enabling compiler support for OpenMP" #endif -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #if defined( _OPENACC ) #include #else @@ -46,11 +46,12 @@ namespace smilei { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target enter data map( alloc \ : byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc enter data create( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -61,11 +62,12 @@ namespace smilei { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target enter data map( to \ : byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc enter data copyin( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -75,11 +77,12 @@ namespace smilei { const unsigned char* byte_array = static_cast( a_host_pointer ); #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target update to( byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc update device( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -89,11 +92,12 @@ namespace smilei { unsigned char* byte_array = static_cast( a_host_pointer ); #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target update from( byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc update host( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -104,11 +108,12 @@ namespace smilei { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target exit data map( from \ : byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc exit data copyout( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -119,11 +124,12 @@ namespace smilei { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target exit data map( delete \ : byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc exit data delete( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -154,7 +160,7 @@ namespace smilei { SMILEI_ASSERT( a_device_pointer != nullptr ); return const_cast( a_device_pointer ); -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) //return const_cast( ::acc_deviceptr( a_host_pointer ) ); return ::acc_deviceptr( const_cast(a_host_pointer) ) ; #else @@ -171,7 +177,7 @@ namespace smilei { a_count * an_object_size, 0, 0, device_num, device_num ) != 0 ) { ERROR( "omp_target_memcpy failed" ); } -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) // It seems that the interface of ::acc_memcpy_device does not accept ptr to array of const type ! // https://www.openacc.org/sites/default/files/inline-files/OpenACC.2.7.pdf // void acc_memcpy_device( d_void* dest, d_void* src, size_t bytes ); diff --git a/src/Tools/gpu.h b/src/Tools/gpu.h index 28a8c98da..bc6552986 100644 --- a/src/Tools/gpu.h +++ b/src/Tools/gpu.h @@ -19,7 +19,7 @@ namespace smilei { #define SMILEI_ACCELERATOR_DECLARE_ROUTINE _Pragma( "omp declare target" ) #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END _Pragma( "omp end declare target" ) #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "omp atomic update" ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #define SMILEI_ACCELERATOR_DECLARE_ROUTINE _Pragma( "acc routine seq" ) #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "acc atomic" ) diff --git a/src/Tools/gpuRandom.h b/src/Tools/gpuRandom.h index 916a7b8f8..bdb9aca59 100644 --- a/src/Tools/gpuRandom.h +++ b/src/Tools/gpuRandom.h @@ -1,7 +1,7 @@ #ifndef GPU_RANDOM #define GPU_RANDOM -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) // #include #include "curand_kernel.h" #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -29,7 +29,7 @@ namespace smilei { { protected: using State = -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) ::curandState_t; #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) // TODO @@ -42,7 +42,7 @@ namespace smilei { public: Random() -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) : a_state_{ 0xDEADBEEFU } #else @@ -53,26 +53,36 @@ namespace smilei { } // Initialization +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) void init( unsigned long long seed, unsigned long long seq, unsigned long long offset ) { -#if defined( SMILEI_OPENACC_MODE ) // Cuda generator initialization ::curand_init( seed, seq, offset, &a_state_ ); + } #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + void init( unsigned long long seed, + unsigned long long , + unsigned long long ) + { // Hip generator initialization // ::hiprand_init( seed, seq, offset, &state ); a_state_ = State{ static_cast( seed ) }; + } #else + void init( unsigned long long seed, + unsigned long long , + unsigned long long ) + { a_state_ = State{ static_cast( seed ) }; -#endif } +#endif // Initialization double uniform() { -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) return ::curand_uniform( &a_state_ ); #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) // TODO diff --git a/src/Tools/userFunctions.h b/src/Tools/userFunctions.h index 63753fb20..d9525723d 100755 --- a/src/Tools/userFunctions.h +++ b/src/Tools/userFunctions.h @@ -1,5 +1,5 @@ -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif @@ -36,7 +36,7 @@ class userFunctions //! \param array array in which to find the value //! \param elem element to be found //! \param nb_elem number of elements -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif template From da51604bac5a2590f4e04df582ff9d38d223c08a Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Mon, 27 May 2024 14:59:52 +0200 Subject: [PATCH 24/28] update ci --- .gitlab-ci.yml | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f50bfd819..cf3208df7 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,8 +15,7 @@ install: stage: install only: - develop - - particle_exchange - + script: # Force workdir cleaning in case of retried - echo "CI_PIPELINE_ID = " $CI_PIPELINE_ID @@ -34,8 +33,7 @@ compile_default: stage: compile_default only: - develop - - particle_exchange - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -46,8 +44,7 @@ runQuick: stage: run_quick only: - develop - - particle_exchange - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -58,8 +55,7 @@ run1D: stage: run_default only: - develop - - particle_exchange - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -71,8 +67,7 @@ run2D: stage: run_default only: - develop - - particle_exchange - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -86,8 +81,7 @@ run3D: stage: run_default only: - develop - - particle_exchange - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -102,8 +96,7 @@ runAM: stage: run_default only: - develop - - particle_exchange - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -115,8 +108,7 @@ runCollisions: stage: run_default only: - develop - - particle_exchange - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation From 2d0474ae533225023478e40ef0a757537d665b7c Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Tue, 28 May 2024 10:49:09 +0200 Subject: [PATCH 25/28] test CI From 83ee20d1a895da93ce092af8627bb009f65f63ab Mon Sep 17 00:00:00 2001 From: Arnaud Beck Date: Tue, 28 May 2024 11:43:27 +0200 Subject: [PATCH 26/28] retest CI --- .gitlab-ci.yml | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cf3208df7..e2efed6dc 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -8,8 +8,8 @@ stages: - compile_debug - compile_no_mpi_threadmultiple - compile_no_openmp - - compile_omptasks - - run_omptasks +# - compile_omptasks +# - run_omptasks install: stage: install @@ -164,21 +164,21 @@ compile_no_openmp: - make clean - python validation/validation.py -k noopenmp -c -v -compile_omptasks: - stage: compile_omptasks - only: - - develop - - script: - - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei - - make clean - - python validation/validation.py -k omptasks -c -v - -run_omptasks: - stage: run_omptasks - only: - - develop - - script: - - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation - - python validation.py -k omptasks -b "tst2d_tasks_01_radiation_pressure_acc.py" -m 4 -o 4 -n 1 -v +#compile_omptasks: +# stage: compile_omptasks +# only: +# - develop +# +# script: +# - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei +# - make clean +# - python validation/validation.py -k omptasks -c -v +# +#run_omptasks: +# stage: run_omptasks +# only: +# - develop +# +# script: +# - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation +# - python validation.py -k omptasks -b "tst2d_tasks_01_radiation_pressure_acc.py" -m 4 -o 4 -n 1 -v From b9754d7101c874e68359bb636916cb08f24520e9 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Fri, 31 May 2024 18:08:35 +0200 Subject: [PATCH 27/28] support matplotlib 3.9 --- doc/Sphinx/Use/namelist.rst | 32 +++++++++++++++++++------------- happi/_Utils.py | 5 ++++- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/doc/Sphinx/Use/namelist.rst b/doc/Sphinx/Use/namelist.rst index 6c5eaf2be..a07f19005 100755 --- a/doc/Sphinx/Use/namelist.rst +++ b/doc/Sphinx/Use/namelist.rst @@ -3347,19 +3347,20 @@ for instance:: def my_filter(particles): return (particles.px>-1.)*(particles.px<1.) + (particles.pz>3.) -.. Note:: The ``px``, ``py`` and ``pz`` quantities are not exactly the momenta. - They are actually the velocities multiplied by the lorentz factor, i.e., - :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`. This is true only - inside the ``filter`` function (not for the output of the diagnostic). - -.. Note:: The ``id`` attribute contains the :doc:`particles identification number`. - This number is set to 0 at the beginning of the simulation. **Only after particles have - passed the filter**, they acquire a positive ``id``. - -.. Note:: For advanced filtration, Smilei provides the quantity ``Main.iteration``, - accessible within the ``filter`` function. Its value is always equal to the current - iteration number of the PIC loop. The current time of the simulation is thus - ``Main.iteration * Main.timestep``. +.. Note:: + + * In the ``filter`` function only, the ``px``, ``py`` and ``pz`` quantities + are not exactly the momenta. + They are actually the velocities multiplied by the lorentz factor, i.e., + :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`. + This is *not* true for the output of the diagnostic. + * The ``id`` attribute contains the :doc:`particles identification number`. + This number is set to 0 at the beginning of the simulation. **Only after particles have + passed the filter**, they acquire a positive ``id``. + * For advanced filtration, Smilei provides the quantity ``Main.iteration``, + accessible within the ``filter`` function. Its value is always equal to the current + iteration number of the PIC loop. The current time of the simulation is thus + ``Main.iteration * Main.timestep``. .. py:data:: attributes @@ -3372,6 +3373,11 @@ for instance:: (``"chi"``, only for species with radiation losses) or the fields interpolated at their positions (``"Ex"``, ``"Ey"``, ``"Ez"``, ``"Bx"``, ``"By"``, ``"Bz"``). +.. Note:: Here, interpolated fields are normally computed after the Maxwell solver. + They may thus differ by half a timestep from those computed at the middle of the + timestep to push particles. When exact values are needed, use the option + :py:data:`keep_interpolated_fields`. + ---- .. rst-class:: experimental diff --git a/happi/_Utils.py b/happi/_Utils.py index 28dd028df..070046786 100755 --- a/happi/_Utils.py +++ b/happi/_Utils.py @@ -42,7 +42,10 @@ def updateMatplotLibColormaps(): if "smilei" in matplotlib.pyplot.colormaps(): return def register(name, d): cmap = matplotlib.colors.LinearSegmentedColormap(name, d, N=256, gamma=1.0) - matplotlib.pyplot.register_cmap(cmap=cmap) + try: + matplotlib.pyplot.register_cmap(cmap=cmap) + except Exception as e: + matplotlib.colormaps.register(cmap) register(u"smilei", { 'red' :((0., 0., 0.), (0.0625 , 0.091, 0.091), (0.09375, 0.118, 0.118), (0.125 , 0.127, 0.127), (0.1875 , 0.135, 0.135), (0.21875, 0.125, 0.125), (0.28125, 0.034, 0.034), (0.3125 , 0.010, 0.010), (0.34375, 0.009, 0.009), (0.4375 , 0.049, 0.049), (0.46875, 0.057, 0.057), (0.5 , 0.058, 0.058), (0.59375, 0.031, 0.031), (0.625 , 0.028, 0.028), (0.65625, 0.047, 0.047), (0.71875, 0.143, 0.143), (0.78125, 0.294, 0.294), (0.84375, 0.519, 0.519), (0.90625, 0.664, 0.664), (0.9375 , 0.760, 0.760), (0.96875, 0.880, 0.880), (1., 1., 1. )), 'green':((0., 0., 0.), (0.21875, 0.228, 0.228), (0.78125, 0.827, 0.827), (0.8125 , 0.852, 0.852), (0.84375, 0.869, 0.869), (0.9375 , 0.937, 0.937), (0.96875, 0.967, 0.967), (1. , 1. , 1. )), From d5eadb44ad81b974c52b9d61a9473903c57f33a8 Mon Sep 17 00:00:00 2001 From: Frederic Perez Date: Fri, 31 May 2024 18:32:03 +0200 Subject: [PATCH 28/28] Fix recent commit for laser offset --- src/Params/Params.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp index b1fafcb09..69973d104 100755 --- a/src/Params/Params.cpp +++ b/src/Params/Params.cpp @@ -1063,19 +1063,18 @@ Params::Params( SmileiMPI *smpi, std::vector namelistsFiles ) : ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile needs 2 profiles.", LINK_NAMELIST + std::string("#lasers") ); } vector profiles_n; + vector profiles_kept; for( unsigned int i = 0; i < 2; i++ ) { - if( profiles[i] == Py_None ) { - Py_DECREF( profiles[i] ); - profiles.erase( profiles.begin() ); - } else { - profiles_n.push_back( i ); + if( profiles[i] != Py_None ) { + profiles_kept.push_back( profiles[i] ); + profiles_n.push_back( i + 1 ); } } - if( profiles.size() == 0 ) { + if( profiles_kept.size() == 0 ) { ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile cannot be [None, None]", LINK_NAMELIST + std::string("#lasers") ); } - for( unsigned int i=0; i namelistsFiles ) : // Make the propagation happen and write out the file if( ! smpi->test_mode ) { - propagateX( profiles, profiles_n, offset, file, keep_n_strongest_modes, angle_z ); + propagateX( profiles_kept, profiles_n, offset, file, keep_n_strongest_modes, angle_z ); } }