diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6faa6ff17..e2efed6dc 100755 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -8,14 +8,14 @@ stages: - compile_debug - compile_no_mpi_threadmultiple - compile_no_openmp - - compile_omptasks - - run_omptasks +# - compile_omptasks +# - run_omptasks install: stage: install only: - develop - + script: # Force workdir cleaning in case of retried - echo "CI_PIPELINE_ID = " $CI_PIPELINE_ID @@ -33,7 +33,7 @@ compile_default: stage: compile_default only: - develop - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -44,7 +44,7 @@ runQuick: stage: run_quick only: - develop - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -55,7 +55,7 @@ run1D: stage: run_default only: - develop - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -67,7 +67,7 @@ run2D: stage: run_default only: - develop - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -81,7 +81,7 @@ run3D: stage: run_default only: - develop - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -96,7 +96,7 @@ runAM: stage: run_default only: - develop - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -108,7 +108,7 @@ runCollisions: stage: run_default only: - develop - + script: # Move in test dir - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation @@ -164,21 +164,21 @@ compile_no_openmp: - make clean - python validation/validation.py -k noopenmp -c -v -compile_omptasks: - stage: compile_omptasks - only: - - develop - - script: - - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei - - make clean - - python validation/validation.py -k omptasks -c -v - -run_omptasks: - stage: run_omptasks - only: - - develop - - script: - - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation - - python validation.py -k omptasks -b "tst2d_tasks_01_radiation_pressure_acc.py" -m 4 -o 4 -n 1 -v +#compile_omptasks: +# stage: compile_omptasks +# only: +# - develop +# +# script: +# - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei +# - make clean +# - python validation/validation.py -k omptasks -c -v +# +#run_omptasks: +# stage: run_omptasks +# only: +# - develop +# +# script: +# - cd /sps3/gitlab-runner/$CI_PIPELINE_ID/smilei/validation +# - python validation.py -k omptasks -b "tst2d_tasks_01_radiation_pressure_acc.py" -m 4 -o 4 -n 1 -v diff --git a/doc/Sphinx/Overview/material.rst b/doc/Sphinx/Overview/material.rst index 3322c2857..66ed26180 100644 --- a/doc/Sphinx/Overview/material.rst +++ b/doc/Sphinx/Overview/material.rst @@ -30,7 +30,7 @@ Papers involving Smilei ^^^^^^^^^^^^^^^^^^^^^^^^ Only papers published in peer-reviewed journals are listed (for the complete list of citing papers see `Google Scholar `_). -As of April 2024, 181 papers have been published covering a broad range of topics: +As of May 2024, 189 papers have been published covering a broad range of topics: * laser-plasma interaction (LPI) / inertial fusion (FCI) * ultra-high intensity (UHI) applications @@ -50,13 +50,55 @@ Following is the distribution of these topics in the listed publications up to N Use the python script doc/doi2publications.py to generate entries from a DOI number, and paste them here You can count the number of papers in the list with the vim command :%s/.. \[//gn. +.. [Krafft2024b] + + C. Krafft, P. Savoini, and F. J. Polanco-Rodríguez, + `Mechanisms of Fundamental Electromagnetic Wave Radiation in the Solar Wind`, + `The Astrophysical Journal Letters 967, 2 (2024) `_ + +.. [Salgado2024] + + F. C. Salgado, A. Kozan, D. Seipt, D. Hollatz, P. Hilz, M. Kaluza, A. Sävert, A. Seidel, D. Ullmann, Y. Zhao, and M. Zepf, + `All-optical source size and emittance measurements of laser-accelerated electron beams`, + `Physical Review Accelerators and Beams 27, 052803 (2024) `_ + +.. [Ivanov2024] + + K. A. Ivanov, D. A. Gorlova, I. N. Tsymbalov, I. P. Tsygvintsev, S. A. Shulyapov, R. V. Volkov, and A. B. Savel’ev, + `Laser-driven pointed acceleration of electrons with preformed plasma lens`, + `Physical Review Accelerators and Beams 27, 051301 (2024) `_ + +.. [Timmis2024] + + R. J. L. Timmis, R. W. Paddock, I. Ouatu, J. Lee, S. Howard, E. Atonga, R. T. Ruskov, H. Martin, R. H. W. Wang, R. Aboushelbaya, M. W. von der Leyen, E. Gumbrell and P. A. Norreys, + `Attosecond and nano‐Coulomb electron bunches via the Zero Vector Potential mechanism`, + `Scientific Reports volume 14, 10805 (2024) `_ + +.. [Azamoum2024] + + Y. Azamoum, G. A. Becker, S. Keppler, G. Duchateau, S. Skupin, M. Grech, F. Catoire, S. Hell, I. Tamer, M. Hornung, M. Hellwing, A. Kessler, F. Schorcht, and M. C. Kaluza, + `Optical probing of ultrafast laser-induced solid-to-overdense-plasma transitions`, + `Light: Science & Applications 13, 109 (2024) `_ + +.. [Pan2024] + + Z. Pan, J. Liu, P. Wang, Z. Mei, Z. Cao, D. Kong, S. Xu, Z. Liu, Y. Liang, Z. Peng, T. Xu, T. Song, X. Chen, Q. Wu, Y. Zhang, Q. Han, H. Chen, J. Zhao, Y. Gao, S. Chen, Y. Zhao, X. Yan, Y. Shou, W. Ma, + `Electron acceleration and x-ray generation from near-critical-density carbon nanotube foams driven by moderately relativistic lasers`, + `Physics of Plasmas 31, 043108 (2024) `_ + +.. [Yao2024] + + W. Yao, M. Nakatsutsumi, S. Buffechoux, P. Antici, M. Borghesi, A. Ciardi, S. N. Chen, E. d’Humières, L. Gremillet, R. Heathcote, V. Horný, P. McKenna, M. N. Quinn, L. Romagnani, R. Royle, G. Sarri, Y. Sentoku, H.-P. Schlenvoigt, T. Toncian, O. Tresca, L. Vassura, O. Willi, J. Fuchs, + `Optimizing laser coupling, matter heating, and particle acceleration from solids using multiplexed ultraintense lasers`, + `Matter and Radiation at Extremes 9, 047202 (2024) `_ + .. [Luo2024] M. Luo, C. Riconda, I. Pusztai, A. Grassi, J. S. Wurtele, and T. Fülöp, `Control of autoresonant plasma beat-wave wakefield excitation`, - `Phys. Rev. Research 6, 013338 (2024) `_ + `Physical Review Research 6, 013338 (2024) `_ -.. [Krafft2024] +.. [Krafft2024a] C. Krafft and P. Savoini, `Electrostatic Wave Decay in the Randomly Inhomogeneous Solar Wind`, @@ -127,7 +169,13 @@ Following is the distribution of these topics in the listed publications up to N A. Seidel, B. Lei, C. Zepter, M. C. Kaluza, A. Sävert, M. Zepf, and D. Seipt, `Polarization and CEP dependence of the transverse phase space in laser driven accelerators`, `Physical Review Research 6, 013056 (2024) `_ - + +.. [Krishnamurthy2023] + + S. Krishnamurthy, S. Chintalwad, A. P. L. Robinson, R. M. G. M. Trines, and B. Ramakrishna, + `Observation of proton modulations in laser–solid interaction`, + `Plasma Physics and Controlled Fusion 65 085020 (2023) `_ + .. [Gao2023b] X. Gao, @@ -216,7 +264,7 @@ Following is the distribution of these topics in the listed publications up to N E. Starodubtseva, I. Tsymbalov, D. Gorlova, K. Ivanov, and A. Savel'ev, `Low energy electron injection for direct laser acceleration`, - `Phys. Plasmas 30, 083105 (2023) `_ + `Physics of Plasmas 30, 083105 (2023) `_ .. [Maffini2023] @@ -228,7 +276,7 @@ Following is the distribution of these topics in the listed publications up to N S. Yu. Gus'kov, Ph. Korneev, and M. Murakami, `Laser-driven electrodynamic implosion of fast ions in a thin shell`, - `Matter Radiat. Extremes 8, 056602 (2023) `_ + `Matter and Radiation at Extremes 8, 056602 (2023) `_ .. [RezaeiPandari2023] @@ -240,19 +288,19 @@ Following is the distribution of these topics in the listed publications up to N J. Jonnerby, A. von Boetticher, J. Holloway, L. Corner, A. Picksley, A. J. Ross, R. J. Shalloo , C. Thornton, N. Bourgeois, R. Walczak, and S. M. Hooker, `Measurement of the decay of laser-driven linear plasma wakefields`, - `Phys. Rev. E 108, 055211 (2023) `_ + `Physical Review E 108, 055211 (2023) `_ .. [Drobniak2023] P. Drobniak, E. Baynard, C. Bruni, K. Cassou, C. Guyot, G. Kane, S. Kazamias, V. Kubytskyi, N. Lericheux, B. Lucas, M. Pittman, F. Massimo, A. Beck, A. Specka, P. Nghiem, and D. Minenna, `Random scan optimization of a laser-plasma electron injector based on fast particle-in-cell simulations`, - `Phys. Rev. Accel. Beams 26, 091302 (2023) `_ + `Physical Review Accelerators and Beams 26, 091302 (2023) `_ .. [Bukharskii2023] N. Bukharskii and Ph. Korneev, `Intense widely controlled terahertz radiation from laser-driven wires`, - `Matter Radiat. Extremes 8, 044401 (2023) `_ + `Matter and Radiation at Extremes 8, 044401 (2023) `_ .. [Schmitz2023] @@ -276,7 +324,7 @@ Following is the distribution of these topics in the listed publications up to N X. Gao, `Ionization dynamics of sub-micrometer-sized clusters in intense ultrafast laser pulses`, - `Phys. Plasmas 30, 052102 (2023) `_ + `Physics of Plasmas 30, 052102 (2023) `_ .. [Krafft2023] @@ -294,7 +342,7 @@ Following is the distribution of these topics in the listed publications up to N A. Ghizzo, D. Del Sarto, and H. Betar, `Collisionless Heating Driven by Vlasov Filamentation in a Counterstreaming Beams Configuration`, - `Phys. Rev. Lett. 131, 035101 (2023) `_ + `Physical Review Letters 131, 035101 (2023) `_ .. [Yang2023] @@ -306,31 +354,31 @@ Following is the distribution of these topics in the listed publications up to N W. Yao, A. Fazzini, S.N. Chen, K. Burdonov, J. Béard, M. Borghesi, A. Ciardi, M. Miceli, S. Orlando, X. Ribeyre, E. d'Humières and J. Fuchs, `Investigating particle acceleration dynamics in interpenetrating magnetized collisionless super-critical shocks`, - `J. Plasma Phys. 89, 915890101 (2023) `_ + `Journal of Plasma Physics 89, 915890101 (2023) `_ .. [Pak2023] T. Pak, M. Rezaei-Pandari, S. B. Kim, G. Lee, D. H. Wi, C. I. Hojbota, M. Mirzaie, H. Kim, J. H. Sung, S. K. Lee, C. Kang and K.-Y. Kim, `Multi-millijoule terahertz emission from laser-wakefield-accelerated electrons`, - `Light Sci Appl 12, 37 (2023) `_ + `Light: Science and Applications 12, 37 (2023) `_ .. [Istokskaia2023] V. Istokskaia, M. Tosca, L. Giuffrida, J. Psikal, F. Grepl, V. Kantarelou, S. Stancek, S. Di Siena, A. Hadjikyriacou, A. McIlvenny, Y. Levy, J. Huynh, M. Cimrman, P. Pleskunov, D. Nikitin, A. Choukourov, F. Belloni, A. Picciotto, S. Kar, M. Borghesi, A. Lucianetti, T. Mocek and D. Margarone, `A multi-MeV alpha particle source via proton-boron fusion driven by a 10-GW tabletop laser`, - `Commun Phys 6, 27 (2023) `_ + `Communications Physics 6, 27 (2023) `_ .. [Yoon2023] Y. D. Yoon, D. E. Wendel and G. S. Yun, `Equilibrium selection via current sheet relaxation and guide field amplification`, - `Nat Commun 14, 139 (2023) `_ + `Nature Communications 14, 139 (2023) `_ .. [Galbiati2023] M. Galbiati, A. Formenti, M. Grech and M. Passoni, `Numerical investigation of non-linear inverse Compton scattering in double-layer targets`, - `Front. Phys. 11, fphy.2023.1117543 (2023) `_ + `Frontiers in Physics 11, fphy.2023.1117543 (2023) `_ .. [Sakai2023] @@ -342,7 +390,7 @@ Following is the distribution of these topics in the listed publications up to N A. Golovanov, I. Yu. Kostyukov, A. Pukhov and V. Malka, `Energy-Conserving Theory of the Blowout Regime of Plasma Wakefield`, - `Phys. Rev. Lett. 130, 105001 (2023) `_ + `Physical Review Letters 130, 105001 (2023) `_ .. [Miethlinger2023] @@ -354,13 +402,13 @@ Following is the distribution of these topics in the listed publications up to N C. Zepter, A. Seidel, M. Zepf, M. C. Kaluza and A. Sävert, `Role of spatiotemporal couplings in stimulated Raman side scattering`, - `Phys. Rev. Research 5, L012023 (2023) `_ + `Physical Review Research 5, L012023 (2023) `_ .. [Marini2023] S. Marini, M. Grech, P. S. Kleij, M. Raynaud and C. Riconda, `Electron acceleration by laser plasma wedge interaction`, - `Phys. Rev. Research 5, 013115 (2023) `_ + `Physical Review Research 5, 013115 (2023) `_ .. [Blackman2022] @@ -420,7 +468,7 @@ Following is the distribution of these topics in the listed publications up to N D. Margarone, J. Bonvalet, L. Giuffrida, A. Morace, V. Kantarelou, M. Tosca, D. Raffestin, P. Nicolai, A. Picciotto, Y. Abe, Y. Arikawa, S. Fujioka, Y. Fukuda, Y. Kuramitsu, H. Habara and D. Batani, `In-Target Proton–Boron Nuclear Fusion Using a PW-Class Laser`, - `Appl. Sci. 12(3), 1444 (2022) `_ + `Appled Sciences 12(3), 1444 (2022) `_ .. [Kochetkov2022] @@ -432,13 +480,13 @@ Following is the distribution of these topics in the listed publications up to N A. Oudin, A. Debayle, C. Ruyer, D. Benisti, `Cross-beam energy transfer between spatially smoothed laser beams`, - `Phys. Plasmas 29, 112112 (2022) `_ + `Physics of Plasmas 29, 112112 (2022) `_ .. [Chen2022] Q. Chen, D. Maslarova, J. Wang, S. Li, and D. Umstadter, `Injection of electron beams into two laser wakefields and generation of electron rings`, - `Phys. Rev. E 106, 055202 (2022) `_ + `Physical Review E 106, 055202 (2022) `_ .. [Kumar2022b] @@ -450,7 +498,7 @@ Following is the distribution of these topics in the listed publications up to N S. Kumar, D. K. Singh and H. K. Malik, `Comparative study of ultrashort single-pulse and multi-pulse driven laser wakefield acceleration`, - `Laser Phys. Lett. 20, 026001 (2022) `_ + `Laser Physics Letters 20, 026001 (2022) `_ .. [Miloshevsky2022] @@ -474,25 +522,25 @@ Following is the distribution of these topics in the listed publications up to N I. Ouatu, B. T. Spiers, R. Aboushelbaya, Q. Feng, M. W. von der Leyen, R. W. Paddock, R. Timmis, C. Ticos, K. M. Krushelnick and P. A. Norreys, `Ionization states for the multipetawatt laser-QED regime`, - `Phys. Rev. E 106, 015205 (2022) `_ + `Physical Review E 106, 015205 (2022) `_ .. [Beth2022] A. Beth, H. Gunell, C. Simon Wedlund, C. Goetz, H. Nilsson and M. Hamrin, `First investigation of the diamagnetic cavity boundary layer with a 1D3V PIC simulation`, - `A&A 667, A143 (2022) `_ + `Astronomy & Astrophysics 667, A143 (2022) `_ .. [Guo2022] Y. Guo, X. Geng, L. Ji, B. Shen and R. Li, `Improving the accuracy of hard photon emission by sigmoid sampling of the quantum-electrodynamic table in particle-in-cell Monte Carlo simulations`, - `Phys. Rev. E 105, 025309 (2022) `_ + `Physical Review E 105, 025309 (2022) `_ .. [Pae2022] - K. . Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam, + K. H. Pae, C. M. Kim, V. B. Pathak, C.-M. Ryu and C. H. Nam, `Direct laser acceleration of electrons from a plasma mirror by an intense few-cycle Laguerre–Gaussian laser and its dependence on the carrier-envelope phase`, - `Plasma Phys. Control. Fusion 64, 055013 (2022) `_ + `Plasma Physics and Controlled Fusion 64, 055013 (2022) `_ .. [Zhang2022a] @@ -505,43 +553,43 @@ Following is the distribution of these topics in the listed publications up to N Q. Han, X. Geng, B. Shen, Z. Xu and L. Ji, `Ultra-fast polarization of a thin electron layer in the rotational standing-wave field driven by double ultra-intense laser pulses`, - `New J. Phys. 24, 063013 (2022) `_ + `New Journal of Physics 24, 063013 (2022) `_ .. [Gothel2022] I. Göthel, C. Bernert, M. Bussmann, M. Garten, T. Miethlinger, M. Rehwald, K. Zeil, T. Ziegler, T. E. Cowan, U. Schramm and T. Kluge, `Optimized laser ion acceleration at the relativistic critical density surface`, - `Plasma Phys. Control. Fusion 64, 044010 (2022) `_ + `Plasma Physics and Controlled Fusion 64, 044010 (2022) `_ .. [Fazzini2022] A. Fazzini, W. Yao, K. Burdonov, J. Béard, S. N. Chen, A. Ciardi, E. d’Humières, R. Diab, E. D. Filippov, S. Kisyov, V. Lelasseux, M. Miceli, Q. Moreno, S. Orlando, S. Pikuz, X. Ribeyre, M. Starodubtsev, R. Zemskov and J. Fuchs, `Particle energization in colliding subcritical collisionless shocks investigated in the laboratory`, - `A&A 665, A87 (2022) `_ + `Astronomy & Astrophysics 665, A87 (2022) `_ .. [Bykov2022] A. M. Bykov, S. M. Osipov and V. I. Romanskii, `Acceleration of Cosmic Rays to Energies above 1015 eV by Transrelativistic Shocks`, - `J. Exp. Theor. Phys. 134, 487-497 (2022) `_ + `Journal of Experimental and Theoretical Physics 134, 487-497 (2022) `_ .. [Sundstrom2022] A. Sundström, M. Grech, I. Pusztai and C. Riconda, `Stimulated-Raman-scattering amplification of attosecond XUV pulses with pulse-train pumps and application to local in-depth plasma-density measurement`, - `Phys. Rev. E 106, 045208 (2022) `_ + `Physical Review E 106, 045208 (2022) `_ .. [Krafft2022b] C. Krafft and P. Savoini, `Third and Fourth Harmonics of Electromagnetic Emissions by a Weak Beam in a Solar Wind Plasma with Random Density Fluctuations`, - `ApJL 934, L28 (2022) `_ + `The Astrophysical Journal Letters 934, L28 (2022) `_ .. [Krafft2022a] C. Krafft and P. Savoini, `Fundamental Electromagnetic Emissions by a Weak Electron Beam in Solar Wind Plasmas with Density Fluctuations`, - `ApJL 924, L24 (2022) `_ + `The Astrophysical Journal Letters 924, L24 (2022) `_ .. [Kong2022] @@ -553,7 +601,7 @@ Following is the distribution of these topics in the listed publications up to N C. Davidson, Z.-M. Sheng, T. Wilson and P. McKenna, `Theoretical and computational studies of the Weibel instability in several beam–plasma interaction configurations`, - `J. Plasma Phys. 88, 905880206 (2022) `_ + `Journal of Plasma Physics 88, 905880206 (2022) `_ .. [Glek2022] @@ -565,7 +613,7 @@ Following is the distribution of these topics in the listed publications up to N D. Umstadter `Controlled Injection of Electrons for Improved Performance of Laser-Wakefield Acceleration`, - `United States: N. p., (2022) `_ + `United States Department of Energy Technical Report (2022) `_ .. [Massimo2022] @@ -584,7 +632,7 @@ Following is the distribution of these topics in the listed publications up to N P. K. Singh, F.-Y. Li, C.-K. Huang, A. Moreau, R. Hollinger, A. Junghans, A. Favalli, C. Calvi, S. Wang, Y. Wang, H. Song, J. J. Rocca, R. E. Reinovsky and S. Palaniyappan, `Vacuum laser acceleration of super-ponderomotive electrons using relativistic transparency injection`, - `Nat Commun 13, 54 (2022) `_ + `Nature Communications 13, 54 (2022) `_ .. [Lobet2022] @@ -615,13 +663,13 @@ Following is the distribution of these topics in the listed publications up to N P. Tomassini, F. Massimo, L. Labate and L. A. Gizzi, `Accurate electron beam phase-space theory for ionization-injection schemes driven by laser pulses`, - `High Pow Laser Sci Eng 10, e15 (2021) `_ + `High Power Laser Science and Engineering 10, e15 (2021) `_ .. [Meinhold2021] T. A. Meinhold and N. Kumar, `Radiation pressure acceleration of protons from structured thin-foil targets`, - `J. Plasma Phys. 87, 905870607 (2021) `_ + `Journal of Plasma Physics 87, 905870607 (2021) `_ .. [Bonvalet2021b] @@ -633,13 +681,13 @@ Following is the distribution of these topics in the listed publications up to N Y. Shi, D. R. Blackman and A. Arefiev, `Electron acceleration using twisted laser wavefronts`, - `Plasma Phys. Control. Fusion 63, 125032 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 125032 (2021) `_ .. [Kumar2021] N. Kumar and B. Reville, `Nonthermal Particle Acceleration at Highly Oblique Nonrelativistic Shocks`, - `ApJL 921, L14 (2021) `_ + `The Astrophysical Journal Letters 921, L14 (2021) `_ .. [Ghaith2021] @@ -651,13 +699,13 @@ Following is the distribution of these topics in the listed publications up to N V. Horný and L. Veisz, `Generation of single attosecond relativistic electron bunch from intense laser interaction with a nanosphere`, - `Plasma Phys. Control. Fusion 63, 125025 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 125025 (2021) `_ .. [Krafft2021] C. Krafft and P. Savoini, `Second Harmonic Electromagnetic Emissions by an Electron Beam in Solar Wind Plasmas with Density Fluctuations`, - `ApJL 917, L23 (2021) `_ + `The Astrophysical Journal Letters 917, L23 (2021) `_ .. [Khalilzadeh2021c] @@ -681,7 +729,7 @@ Following is the distribution of these topics in the listed publications up to N Y. Shou, D. Wang, P. Wang, J. Liu, Z. Cao, Z. Mei, S. Xu, Z. Pan, D. Kong, G. Qi, Z. Liu, Y. Liang, Z. Peng, Y. Gao, S. Chen, J. Zhao, Y. Zhao, H. Xu, J. Zhao, Y. Wu, X. Yan and W. Ma, `High-efficiency generation of narrowband soft x rays from carbon nanotube foams irradiated by relativistic femtosecond lasers`, - `Opt. Lett. 46, 3969 (2021) `_ + `Optics Letters 46, 3969 (2021) `_ .. [Khalilzadeh2021b] @@ -693,67 +741,67 @@ Following is the distribution of these topics in the listed publications up to N H. Hosseinkhani, M. Pishdast, J. Yazdanpanah and S. A. Ghasemi, `Investigation of the classical and quantum radiation reaction effect on interaction of ultra high power laser with near critical plasma`, - `J. Nuclear Sci. Technol. 42, 27-35 (2021) `_ + `Journal of Nuclear Science, Engineering and Technology 42, 27-35 (2021) `_ .. [MercuriBaron2021] A. Mercuri-Baron, M. Grech, F. Niel, A. Grassi, M. Lobet, A. Di Piazza and C. Riconda, `Impact of the laser spatio-temporal shape on Breit–Wheeler pair production`, - `New J. Phys. 23, 085006 (2021) `_ + `New Journal of Physics 23, 085006 (2021) `_ .. [Peng2021] H. Peng, C. Riconda, S. Weber, C.T. Zhou and S.C. Ruan, `Frequency Conversion of Lasers in a Dynamic Plasma Grating`, - `Phys. Rev. Applied 15, 054053 (2021) `_ + `Physical Review Applied 15, 054053 (2021) `_ .. [Shi2021a] Y. Shi, D. Blackman, D. Stutman and A. Arefiev, `Generation of Ultrarelativistic Monoenergetic Electron Bunches via a Synergistic Interaction of Longitudinal Electric and Magnetic Fields of a Twisted Laser`, - `Phys. Rev. Lett. 126, 234801 (2021) `_ + `Physical Review Letters 126, 234801 (2021) `_ .. [Bonvalet2021a] J. Bonvalet, Ph. Nicolaï, D. Raffestin, E. D'humieres, D. Batani, V. Tikhonchuk, V. Kantarelou, L. Giuffrida, M. Tosca, G. Korn, A. Picciotto, A. Morace, Y. Abe, Y. Arikawa, S. Fujioka, Y. Fukuda, Y. Kuramitsu, H. Habara and D. Margarone, `Energetic α-particle sources produced through proton-boron reactions by high-energy high-intensity laser beams`, - `Phys. Rev. E 103, 053202 (2021) `_ + `Physical Review E 103, 053202 (2021) `_ .. [Shekhanov2021] S. A. Shekhanov and V. T. Tikhonchuk, `SRS-SBS competition and nonlinear laser energy absorption in a high temperature plasma`, - `Plasma Phys. Control. Fusion 63, 115016 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 115016 (2021) `_ .. [Psikal2021] - J Psikal, + J. Psikal, `Laser-driven ion acceleration from near-critical Gaussian plasma density profile`, - `Plasma Phys. Control. Fusion 63, 064002 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 064002 (2021) `_ .. [Yoon2021b] Y. D. Yoon, G. S. Yun, D. E. Wendel and J. L. Burch, `Collisionless relaxation of a disequilibrated current sheet and implications for bifurcated structures`, - `Nat Commun 12, 3774 (2021) `_ + `Nature Communications 12, 3774 (2021) `_ .. [Lavorenti2021] F. Lavorenti, P. Henri, F. Califano, S. Aizawa and N. André, `Electron acceleration driven by the lower-hybrid-drift instability. An extended quasilinear model`, - `A&A 652, 202141049 (2021) `_ + `Astronomy & Astrophysics 652, 202141049 (2021) `_ .. [Golovanov2021] - A A Golovanov, I Yu Kostyukov, L Reichwein, J Thomas and A Pukhov, + A. A. Golovanov, I. Y. Kostyukov, L. Reichwein, J. Thomas and A. Pukhov, `Excitation of strongly nonlinear plasma wakefield by electron bunches`, - `Plasma Phys. Control. Fusion 63, 085004 (2021) `_ + `Plasma Physics and Controlled Fusion 63, 085004 (2021) `_ .. [Jirka2021] M. Jirka, P. Sasorov, S. S. Bulanov, G. Korn, B. Rus and S. V. Bulanov, `Reaching high laser intensity by a radiating electron`, - `Phys. Rev. A 103, 053114 (2021) `_ + `Physical Review A 103, 053114 (2021) `_ .. [Marques2021] @@ -783,7 +831,7 @@ Following is the distribution of these topics in the listed publications up to N G. Cantono, A. Permogorov, J. Ferri, E. Smetanina, A. Dmitriev, A. Persson, T. Fülöp and C.-G. Wahlström, `Laser-driven proton acceleration from ultrathin foils with nanoholes`, - `Sci Rep 11, 5006 (2021) `_ + `Scientific Reports 11, 5006 (2021) `_ .. [Perez2021] @@ -801,13 +849,13 @@ Following is the distribution of these topics in the listed publications up to N A. Sampath, X. Davoine, S. Corde, L. Gremillet, M. Gilljohann, M. Sangal, C. H. Keitel, R. Ariniello, J. Cary, H. Ekerfelt, C. Emma, F. Fiuza, H. Fujii, M. Hogan, C. Joshi, A. Knetsch, O. Kononenko, V. Lee, M. Litos, K. Marsh, Z. Nie, B. O’Shea, J. R. Peterson, P. San Miguel Claveria, D. Storey, Y. Wu, X. Xu, C. Zhang and M. Tamburini, `Extremely Dense Gamma-Ray Pulses in Electron Beam-Multifoil Collisions`, - `Phys. Rev. Lett. 126, 064801 (2021) `_ + `Physical Review Letters 126, 064801 (2021) `_ .. [Marini2021a] S. Marini, P. S. Kleij, F. Pisani, F. Amiranoff, M. Grech, A. Macchi, M. Raynaud and C. Riconda, `Ultrashort high energy electron bunches from tunable surface plasma waves driven with laser wavefront rotation`, - `Phys. Rev. E 103, L021201 (2021) `_ + `Physical Review E 103, L021201 (2021) `_ .. [Yao2021] @@ -819,14 +867,14 @@ Following is the distribution of these topics in the listed publications up to N E. G. Gelfer, A. M, Fedotov and S. Weber, `Radiation induced acceleration of ions in a laser irradiated transparent foil`, - `New J. Phys. 23, 095002 (2021) `_ + `New Journal of Physics 23, 095002 (2021) `_ `arXiv:1907.02621 `_ .. [Siminos2021] E. Siminos, I. Thiele and C. Olofsson, `Laser Wakefield Driven Generation of Isolated Carrier-Envelope-Phase Tunable Intense Subcycle Pulses`, - `Phys. Rev. Lett. 126, 044801 (2021) `_ + `Physical Review Letters 126, 044801 (2021) `_ `arXiv:1902.05014 `_ .. [Budriga2020] @@ -839,13 +887,13 @@ Following is the distribution of these topics in the listed publications up to N P. A. P. Nghiem, R. Assmann, A. Beck et al., `Toward a plasma-based accelerator at high beam energy with high beam charge and high beam quality`, - `Phys. Rev. Accel. Beams 23, 031301 (2020) `_ + `Physical Review Accelerators and Beams 23, 031301 (2020) `_ .. [Pisarczyk2020] T. Pisarczyk, M. Kalal, S. Yu. Gus'kov et al., `Hot electron retention in laser plasma created under terawatt subnanosecond irradiation of Cu targets`, - `Plasma Phys. Control. Fusion 62, 115020 (2020) `_ + `Plasma Physics and Controlled Fusion 62, 115020 (2020) `_ .. [Pagano2020] @@ -863,25 +911,25 @@ Following is the distribution of these topics in the listed publications up to N H. Peng, C. Riconda, M. Grech, C.-T. Zhou and S. Weber, `Dynamical aspects of plasma gratings driven by a static ponderomotive potential`, - `Plasma Phys. Control. Fusion 62, 115015 (2020) `_ + `Plasma Physics and Controlled Fusion 62, 115015 (2020) `_ .. [Glek2020] P. B. Glek, A. A. Voronin, V. Ya. Panchenko and A. M. Zheltikov, `Relativistic electron bunches locked to attosecond optical field waveforms: an attosecond light–matter bound state`, - `Laser Phys. Lett. 17 055401 (2020) `_ + `Laser Physics Letters 17 055401 (2020) `_ .. [Margarone2020] D. Margarone, A. Morace, J. Bonvalet et al., `Generation of α-Particle Beams With a Multi-kJ, Peta-Watt Class Laser System`, - `Front. Phys. 8, 343 (2020) `_ + `Frontiers in Physics 8, 343 (2020) `_ .. [Sinha2020] U. Sinha and N. Kumar, `Pair-beam propagation in a magnetized plasma for modeling the polarized radiation emission from gamma-ray bursts in laboratory astrophysics experiments`, - `Phys. Rev. E 101, 063204 (2020) `_ + `Physical Review E 101, 063204 (2020) `_ .. [Mitrofanov2020] @@ -893,81 +941,81 @@ Following is the distribution of these topics in the listed publications up to N B. T. Spiers, M. P. Hill, C. Brown, L. Ceurvorst, N. Ratan, A. F. Savin, P. Allan, E. Floyd, J. Fyrth, L. Hobbs, S. James, J. Luis, M. Ramsay, N. Sircombe, J. Skidmore, R. Aboushelbaya, M. W. Mayr, R. Paddock, R. H. W. Wang and P. A. Norreys, `Whole-beam self-focusing in fusion-relevant plasma`, - `Phil. Trans. R. Soc. A379, 20200159 `_ + `Philosophical Transactions of the Royal Society A379, 20200159 `_ .. [Derouillat2020] J. Derouillat and A. Beck, `Single Domain Multiple Decompositions for Particle-in-Cell simulations`, - `J. Phys.: Conf. Ser. 1596, 012052 (2020) `_ + `Journal of Physics: Conference Series 1596, 012052 (2020) `_ `arXiv:1912.04064 `_ .. [Zemzemi2020] I. Zemzemi, F. Massimo and A. Beck, `Azimuthal decomposition study of a realistic laser profile for efficient modeling of Laser WakeField Acceleration`, - `J. Phys.: Conf. Ser. 1596, 012055 (2020) `_ + `Journal of Physics: Conference Series 1596, 012055 (2020) `_ .. [Massimo2020b] F. Massimo, I. Zemzemi, A. Beck, J. Derouillat and A. Specka, `Efficient cylindrical envelope modeling for laser wakefield acceleration`, - `J. Phys.: Conf. Ser. 1596, 012054 (2020) `_ + `Journal of Physics: Conference Series 1596, 012054 (2020) `_ `arXiv:1912.04674 `_ .. [Massimo2020a] F. Massimo, A. Beck, J. Derouillat, I. Zemzemi and A. Specka, `Numerical modeling of laser tunneling ionization in particle-in-cell codes with a laser envelope model`, - `Phys. Rev. E 102, 033204 (2020) `_ + `Physical Review E 102, 033204 (2020) `_ `arXiv:2006.04433 `_ .. [Marcowith2020] A. Marcowith, G. Ferrand, M. Grech, Z. Meliani, I. Plotnikov and R. Walder, `Multi-scale simulations of particle acceleration in astrophysical systems`, - `Living Rev Comput Astrophys 6, 1 (2020) `_ + `Living Reviews in Computational Astrophysics 6, 1 (2020) `_ `arXiv:2002.09411 `_ .. [Dargent2020] J. Dargent, N. Aunai, B. Lavraud, S. Toledo‐Redondo and F. Califano, `Simulation of Plasmaspheric Plume Impact on Dayside Magnetic Reconnection`, - `Geophys. Res. Lett. 47, 2019GL086546 (2020) `_ + `Geophysical Research Letters 47, 2019GL086546 (2020) `_ `arXiv:2002.02243 `_ .. [Sundström2020b] A. Sundström, L. Gremillet, E. Siminos and I. Pusztai, `Collisional effects on the electrostatic shock dynamics in thin-foil targets driven by an ultraintense short pulse laser`, - `Plasma Phys. Control. Fusion 62, 085015 (2020) `_ + `Plasma Physics and Controlled Fusion 62, 085015 (2020) `_ .. [Sundström2020a] A. Sundström, L. Gremillet, E. Siminos and I. Pusztai, `Fast collisional electron heating and relaxation in thin foils driven by a circularly polarized ultraintense short-pulse laser`, - `J. Plasma Phys. 86, 755860201 (2020) `_ + `Journal of Plasma Physics 86, 755860201 (2020) `_ `arXiv:1911.09562 `_ .. [Gelfer2020] E. G. Gelfer, A. M. Fedotov, O. Klimo and S. Weber, `Absorption and opacity threshold for a thin foil in a strong circularly polarized laser field`, - `Phys. Rev. E 101, 033204 (2020) `_ + `Physical Review E 101, 033204 (2020) `_ `arXiv:1906.05902 `_ .. [Ferri2020] J. Ferri, I. Thiele, E. Siminos, L. Gremillet, E. Smetanina, A. Dmitriev, G. Cantono, C.-G. Wahlström and T. Fülöp, `Enhancement of laser-driven ion acceleration in non-periodic nanostructured targets`, - `J. Plasma Phys. 86, 905860101 (2020) `_ + `Journal of Plasma Physics 86, 905860101 (2020) `_ `arXiv:1905.11131 `_ .. [Marques2019] J.-R. Marquès, L. Lancia, T. Gangolf, M. Blecher, S. Bolaños, J. Fuchs, O. Willi, F. Amiranoff, R. L. Berger, M. Chiaramello, S. Weber, and C. Riconda, `Joule-Level High-Efficiency Energy Transfer to Subpicosecond Laser Pulses by a Plasma-Based Amplifier`, - `Phys. Rev. X 9, 021008 (2019) `_ + `Physical Review X 9, 021008 (2019) `_ .. [Plotnikov2019] I. Plotnikov and L. Sironi, @@ -990,39 +1038,39 @@ Following is the distribution of these topics in the listed publications up to N X. S. Geng, L. L. Ji, B. F. Shen et al., `Quantum reflection above the classical radiation-reaction barrier in the quantum electro-dynamics regime`, - `Commun. Phys. 2, 66 (2019) `_ + `Communications Physics 2, 66 (2019) `_ .. [Sinha2019] U. Sinha, C. H. Keitel, and N. Kumar, `Polarized Light from the Transportation of a Matter-Antimatter Beam in a Plasma`, - `Phys. Rev. Lett. 122, 204801 (2019) `_ + `Physical Review Letters 122, 204801 (2019) `_ .. [Malko2019] S. Malko, X. Vaisseau, F. Perez, D. Batani, A. Curcio, M. Ehret, J. Honrubia, K. Jakubowska, A. Morace, J. J. Santos and L. Volpe, `Enhanced relativistic-electron beam collimation using two consecutive laser pulses`, - `Sci Rep 9, 14061 (2019) `_ + `Scientific Reports 9, 14061 (2019) `_ .. [Peng2019] H. Peng, C. Riconda, M. Grech, J.-Q. Su and S. Weber, `Nonlinear dynamics of laser-generated ion-plasma gratings: A unified description`, - `Phys. Rev. E 100, 061201 (2019) `_ + `Physical Review E 100, 061201 (2019) `_ `arXiv:1911.03440 `_ .. [Fang2019] J. Fang, C.-Y. Lu, J.-W. Yan and H. Yu, `Early acceleration of electrons and protons at the nonrelativistic quasiparallel shocks with different obliquity angles`, - `Res. Astron. Astrophys. 19, 182 (2019) `_ + `Research in Astronomy and Astrophysics 19, 182 (2019) `_ `arXiv:1908.08170 `_ .. [Yoon2019b] Y. Yoon and P. M. Bellan, `Kinetic Verification of the Stochastic Ion Heating Mechanism in Collisionless Magnetic Reconnection`, - `ApJ 887, L29 (2019) `_ + `The Astrophysical Journal Letters 887, L29 (2019) `_ .. [Yoon2019a] @@ -1034,7 +1082,7 @@ Following is the distribution of these topics in the listed publications up to N F. Massimo, A. Beck, J. Derouillat, M. Grech, M. Lobet, F. Pérez, I. Zemzemi and A Specka, `Efficient start-to-end 3D envelope modeling for two-stage laser wakefield acceleration experiments`, - `Plasma Phys. Control. Fusion 61, 124001 (2019) `_ + `Plasma Physics and Controlled Fusion 61, 124001 (2019) `_ `arXiv:1912.04127 `_ .. [Beck2019] @@ -1048,14 +1096,14 @@ Following is the distribution of these topics in the listed publications up to N F. Pérez and M. Grech, `Oblique-incidence, arbitrary-profile wave injection for electromagnetic simulations`, - `Phys. Rev. E 99, 033307 (2019) `_ + `Physical Review E 99, 033307 (2019) `_ `arXiv:1809.04435 `_ .. [Thiele2019] I. Thiele, E. Siminos and T. Fülöp, `Electron Beam Driven Generation of Frequency-Tunable Isolated Relativistic Subcycle Pulses`, - `Phys. Rev. Lett. 122, 104803 (2019) `_ + `Physical Review Letters 122, 104803 (2019) `_ `arXiv:1806.04976 `_ .. [Golovanov2018] @@ -1068,19 +1116,19 @@ Following is the distribution of these topics in the listed publications up to N S. Toledo-Redondo, J. Dargent, N. Aunai, B. Lavraud, M. André, W. Li, B. Giles, P.-A. Lindvist, R. E. Ergun, C. T. Russel and J. L. Burch, `Perpendicular Current Reduction Caused by Cold Ions of Ionospheric Origin in Magnetic Reconnection at the Magnetopause: Particle-in-Cell Simulations and Spacecraft Observations`, - `Geophys. Res. Lett. 45, 10,033 (2018) `_ + `Geophysical Research Letters 45, 10,033 (2018) `_ .. [Gelfer2018] E. Gelfer, N. Elkina and A. Fedotov, `Unexpected impact of radiation friction: enhancing production of longitudinal plasma waves`, - `Sci. Rep. 8, 6478 (2018) `_ + `Scientific Reports 8, 6478 (2018) `_ .. [Niel2018b] F. Niel, C. Riconda, F. Amiranoff, M. Lobet, J. Derouillat, F. Pérez, T. Vinci and M. Grech, `From quantum to classical modeling of radiation reaction: a focus on the radiation spectrum`, - `Plasma Phys. Control. Fusion 60, 094002 (2018) `_ + `Plasma Physics and Controlled Fusion 60, 094002 (2018) `_ `arXiv:1802.02927 `_ .. [Plotnikov2018] @@ -1094,21 +1142,21 @@ Following is the distribution of these topics in the listed publications up to N F. Niel, C. Riconda, F. Amiranoff, R. Duclous and M. Grech, `From quantum to classical modeling of radiation reaction: A focus on stochasticity effects`, - `Phys. Rev. E 97, 043209 (2018) `_ + `Physical Review E 97, 043209 (2018) `_ `arXiv:1707.02618 `_ .. [Grassi2017b] A. Grassi, M. Grech, F. Amiranoff, A. Macchi and C. Riconda, `Radiation-pressure-driven ion Weibel instability and collisionless shocks`, - `Phys. Rev. E 96, 033204 (2017) `_ + `Physical Review E 96, 033204 (2017) `_ `arXiv:1705.05402 `_ .. [Fedeli2017] L. Fedeli, A. Formenti, L. Cialfi, A. Sgattoni, G. Cantono and M. Passoni, `Structured targets for advanced laser-driven sources`, - `Plasma Phys. Control. Fusion 60, 014013 (2017) `_ + `Plasma Physics and Controlled Fusion 60, 014013 (2017) `_ .. [Golovanov2017] @@ -1120,19 +1168,19 @@ Following is the distribution of these topics in the listed publications up to N J. Dargent, N. Aunai, B. Lavraud, S. Toledo-Redondo, M. A. Shay, P. A. Cassak and K. Malakit, `Kinetic simulation of asymmetric magnetic reconnection with cold ions`, - `J. Geophys. Res. Space Physics 122, 5290-5306 (2017) `_ + `Journal of Geophysical Research: Space Physics 122, 5290-5306 (2017) `_ .. [Grassi2017a] A. Grassi, M. Grech, F. Amiranoff, F. Pegoraro, A. Macchi and C. Riconda, `Electron Weibel instability in relativistic counterstreaming plasmas with flow-aligned external magnetic fields`, - `Phys. Rev. E 95, 023203 (2017) `_ + `Physical Review E 95, 023203 (2017) `_ .. [Dargent2016] J. Dargent, N. Aunai, G. Belmont, N. Dorville, B. Lavraud and M. Hesse, `Full particle-in-cell simulations of kinetic equilibria and the role of the initial current sheet on steady asymmetric magnetic reconnection`, - `J. Plasma Phys. 82, 905820305 (2016) `_ + `Journal of Plasma Physics 82, 905820305 (2016) `_ .. [Chiaramello2016] @@ -1144,10 +1192,10 @@ Following is the distribution of these topics in the listed publications up to N A. Beck, J.T. Frederiksen and J. Dérouillat, `Load management strategy for Particle-In-Cell simulations in high energy particle acceleration`, - `Nucl. Inst. Meth. in Phys. Res. A 829, 418-421 (2016) `_ + `Nuclear Instuments and Methods in Physics Research A 829, 418-421 (2016) `_ .. [Lancia2016] L. Lancia, A. Giribono, L. Vassura, M. Chiaramello, C. Riconda, S. Weber, A. Castan, A. Chatelain, A. Frank, T. Gangolf, M. N. Quinn, J. Fuchs and J.-R. Marquès, `Signatures of the Self-Similar Regime of Strongly Coupled Stimulated Brillouin Scattering for Efficient Short Laser Pulse Amplification`, - `Phys. Rev. Lett. 116, 075001 (2016) `_ + `Physical Review Letters 116, 075001 (2016) `_ diff --git a/doc/Sphinx/Use/namelist.rst b/doc/Sphinx/Use/namelist.rst index f7deebcae..a07f19005 100755 --- a/doc/Sphinx/Use/namelist.rst +++ b/doc/Sphinx/Use/namelist.rst @@ -1148,6 +1148,9 @@ Each species has to be defined in a ``Species`` block:: :ref:`tracking `. The available fields are ``"Ex"``, ``"Ey"``, ``"Ez"``, ``"Bx"``, ``"By"`` and ``"Bz"``. + Note that magnetic field components, as they originate from the interpolator, + are shifted by half a timestep compared to those from the *Fields* diagnostics. + Additionally, the work done by each component of the electric field is available as ``"Wx"``, ``"Wy"`` and ``"Wz"``. Contrary to the other interpolated fields, these quantities are accumulated over time. @@ -2716,7 +2719,8 @@ or several points arranged in a 2-D or 3-D grid. * **In "AMcylindrical" geometry**, probes are defined with 3D Cartesian coordinates and cannot be separated per mode. Use Field diagnostics for cylindrical coordinates and information per mode. - + * **Probes rely on the particle interpolator to compute fields** so that the + magnetic field is shifted by half a timestep compared to that of *Fields* diagnostics. To add one probe diagnostic, include the block ``DiagProbe``:: @@ -3343,19 +3347,20 @@ for instance:: def my_filter(particles): return (particles.px>-1.)*(particles.px<1.) + (particles.pz>3.) -.. Warning:: The ``px``, ``py`` and ``pz`` quantities are not exactly the momenta. - They are actually the velocities multiplied by the lorentz factor, i.e., - :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`. This is true only - inside the ``filter`` function (not for the output of the diagnostic). - -.. Note:: The ``id`` attribute contains the :doc:`particles identification number`. - This number is set to 0 at the beginning of the simulation. **Only after particles have - passed the filter**, they acquire a positive ``id``. - -.. Note:: For advanced filtration, Smilei provides the quantity ``Main.iteration``, - accessible within the ``filter`` function. Its value is always equal to the current - iteration number of the PIC loop. The current time of the simulation is thus - ``Main.iteration * Main.timestep``. +.. Note:: + + * In the ``filter`` function only, the ``px``, ``py`` and ``pz`` quantities + are not exactly the momenta. + They are actually the velocities multiplied by the lorentz factor, i.e., + :math:`\gamma v_x`, :math:`\gamma v_y` and :math:`\gamma v_z`. + This is *not* true for the output of the diagnostic. + * The ``id`` attribute contains the :doc:`particles identification number`. + This number is set to 0 at the beginning of the simulation. **Only after particles have + passed the filter**, they acquire a positive ``id``. + * For advanced filtration, Smilei provides the quantity ``Main.iteration``, + accessible within the ``filter`` function. Its value is always equal to the current + iteration number of the PIC loop. The current time of the simulation is thus + ``Main.iteration * Main.timestep``. .. py:data:: attributes @@ -3368,6 +3373,11 @@ for instance:: (``"chi"``, only for species with radiation losses) or the fields interpolated at their positions (``"Ex"``, ``"Ey"``, ``"Ez"``, ``"Bx"``, ``"By"``, ``"Bz"``). +.. Note:: Here, interpolated fields are normally computed after the Maxwell solver. + They may thus differ by half a timestep from those computed at the middle of the + timestep to push particles. When exact values are needed, use the option + :py:data:`keep_interpolated_fields`. + ---- .. rst-class:: experimental diff --git a/doc/Sphinx/implementation.rst b/doc/Sphinx/implementation.rst index c524cb560..aab91c2c9 100644 --- a/doc/Sphinx/implementation.rst +++ b/doc/Sphinx/implementation.rst @@ -547,7 +547,7 @@ file ``Smilei.cpp`` thought calls to different ``vecPatches`` methods. .. code-block:: c++ - vecPatches.finalizeAndSortParticles( params, &smpi, simWindow, + vecPatches.finalizeExchParticlesAndSort( params, &smpi, simWindow, time_dual, timers, itime ); * **Particle merging**: merging process for particles (still experimental) diff --git a/happi/_Diagnostics/TrackParticles.py b/happi/_Diagnostics/TrackParticles.py index 253bb2958..0825eb0f3 100755 --- a/happi/_Diagnostics/TrackParticles.py +++ b/happi/_Diagnostics/TrackParticles.py @@ -447,8 +447,9 @@ def _orderFiles( self, fileOrdered, chunksize, sort ): for k, name in self._short_properties_from_raw.items(): if k not in group: continue ordered = self._np.empty((nparticles_to_write, ), dtype=group[k].dtype) - if k == "id": ordered.fill(0) - else : ordered.fill(self._np.nan) + if k == "id" : ordered.fill(0) + elif k == "charge": ordered.fill(9999) + else : ordered.fill(self._np.nan) ordered[locs] = group[k][()][selectedIndices] f0[name].write_direct(ordered, dest_sel=self._np.s_[it,:]) @@ -461,8 +462,9 @@ def _orderFiles( self, fileOrdered, chunksize, sort ): for first_o, last_o, npart_o in ChunkedRange(nparticles_to_write, chunksize): for k, name in self._short_properties_from_raw.items(): if k not in group: continue - if k == "id": data[k].fill(0) - else : data[k].fill(self._np.nan) + if k == "id" : data[k].fill(0) + elif k == "charge": data[k].fill(9999) + else : data[k].fill(self._np.nan) # Loop chunks of the input for first_i, last_i, npart_i in ChunkedRange(nparticles, chunksize): # Obtain IDs @@ -538,7 +540,10 @@ def _generateRawData(self, times=None): data[it,:] -= self._XmovedForTime[time] else: data = self._readUnstructuredH5(self._h5items[axis], self.selectedParticles, first_time, last_time) - data[deadParticles] = self._np.nan + if data.dtype == float: + data[deadParticles] = self._np.nan + else: + data[deadParticles] = 9999 self._rawData[axis] = data if self._verbose: print("Process broken lines ...") diff --git a/happi/_Utils.py b/happi/_Utils.py index 9fd35a757..070046786 100755 --- a/happi/_Utils.py +++ b/happi/_Utils.py @@ -42,7 +42,10 @@ def updateMatplotLibColormaps(): if "smilei" in matplotlib.pyplot.colormaps(): return def register(name, d): cmap = matplotlib.colors.LinearSegmentedColormap(name, d, N=256, gamma=1.0) - matplotlib.pyplot.register_cmap(cmap=cmap) + try: + matplotlib.pyplot.register_cmap(cmap=cmap) + except Exception as e: + matplotlib.colormaps.register(cmap) register(u"smilei", { 'red' :((0., 0., 0.), (0.0625 , 0.091, 0.091), (0.09375, 0.118, 0.118), (0.125 , 0.127, 0.127), (0.1875 , 0.135, 0.135), (0.21875, 0.125, 0.125), (0.28125, 0.034, 0.034), (0.3125 , 0.010, 0.010), (0.34375, 0.009, 0.009), (0.4375 , 0.049, 0.049), (0.46875, 0.057, 0.057), (0.5 , 0.058, 0.058), (0.59375, 0.031, 0.031), (0.625 , 0.028, 0.028), (0.65625, 0.047, 0.047), (0.71875, 0.143, 0.143), (0.78125, 0.294, 0.294), (0.84375, 0.519, 0.519), (0.90625, 0.664, 0.664), (0.9375 , 0.760, 0.760), (0.96875, 0.880, 0.880), (1., 1., 1. )), 'green':((0., 0., 0.), (0.21875, 0.228, 0.228), (0.78125, 0.827, 0.827), (0.8125 , 0.852, 0.852), (0.84375, 0.869, 0.869), (0.9375 , 0.937, 0.937), (0.96875, 0.967, 0.967), (1. , 1. , 1. )), @@ -398,7 +401,11 @@ def __init__(self, operation, QuantityTranslator, ureg): raise Exception("Quantity "+q+" not understood") # Calculate the total units and its inverse locals().update(self.imports) - units = eval("".join(basic_op)).units + units = eval("".join(basic_op)) + if isinstance(units, (int, float)): + units = ureg.Quantity(1) # dimensionless + else: + units = units.units self.translated_units = units.format_babel(locale="en") # Make the operation string self.translated_operation = "".join(full_op) diff --git a/makefile b/makefile index 3aaff0201..277a2237d 100755 --- a/makefile +++ b/makefile @@ -52,7 +52,7 @@ DIRS := $(shell find src -type d) SRCS := $(shell find src/* -name \*.cpp) OBJS := $(addprefix $(BUILD_DIR)/, $(SRCS:.cpp=.o)) DEPS := $(addprefix $(BUILD_DIR)/, $(SRCS:.cpp=.d)) -SITEDIR = $(shell $(PYTHONEXE) -c 'import site; site._script()' --user-site) +SITEDIR = $(shell d=`$(PYTHONEXE) -m site --user-site` && echo $$d || $(PYTHONEXE) -c "import sysconfig; print(sysconfig.get_path('purelib'))") # Smilei tools TABLES_DIR := tools/tables @@ -202,9 +202,9 @@ endif ifneq (,$(call parse_config,gpu_nvidia)) override config += noopenmp # Prevent openmp for nvidia - CXXFLAGS += -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE + CXXFLAGS += -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OACC GPU_COMPILER ?= nvcc - GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_MODE -DSMILEI_OPENACC_MODE $(DIRS:%=-I%) + GPU_COMPILER_FLAGS += -x cu -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OACC $(DIRS:%=-I%) GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS) GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu) GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o)) @@ -214,9 +214,9 @@ endif # AMD GPUs ifneq (,$(call parse_config,gpu_amd)) - CXXFLAGS += -DSMILEI_ACCELERATOR_MODE + CXXFLAGS += -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OMP GPU_COMPILER ?= $(CC) - GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_MODE -std=c++14 $(DIRS:%=-I%) #$(PY_FLAGS) + GPU_COMPILER_FLAGS += -x hip -DSMILEI_ACCELERATOR_GPU -DSMILEI_ACCELERATOR_GPU_OMP -std=c++14 $(DIRS:%=-I%) GPU_COMPILER_FLAGS += -I$(BUILD_DIR)/src/Python $(PY_CXXFLAGS) GPU_KERNEL_SRCS := $(shell find src/* -name \*.cu) GPU_KERNEL_OBJS := $(addprefix $(BUILD_DIR)/, $(GPU_KERNEL_SRCS:.cu=.o)) diff --git a/scripts/compile_tools/machine/adastra b/scripts/compile_tools/machine/adastra index 7aab184ce..14c2a975a 100644 --- a/scripts/compile_tools/machine/adastra +++ b/scripts/compile_tools/machine/adastra @@ -85,7 +85,6 @@ ADASTRA_DEBUG_FLAGS := -g -ggdb $(ADASTRA_DEBUG_SANITIZER_FLAGS) -v # ifneq (,$(call parse_config,gpu_amd)) # When using OMP - ADASTRA_ACCELERATOR_GPU_OMP_DEFINE_FLAGS := -DSMILEI_ACCELERATOR_GPU_OMP=1 # ADASTRA_ACCELERATOR_GPU_TARGET := gfx908 # ADASTRA_ACCELERATOR_GPU_TARGET := gfx908:xnack- diff --git a/scripts/compile_tools/machine/ruche_gpu2 b/scripts/compile_tools/machine/ruche_gpu2 index a9406d60d..80cf09198 100644 --- a/scripts/compile_tools/machine/ruche_gpu2 +++ b/scripts/compile_tools/machine/ruche_gpu2 @@ -26,7 +26,7 @@ GPU_COMPILER_FLAGS += -arch=sm_80 #sm_89 # first compile completely with sm_80 t CXXFLAGS += -Minfo=accel # what is offloaded/copied # CXXFLAGS += -Minfo=all # very verbose output -# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_OPENACC_MODE' +# To turn on the OpenMP support, uncomment these 3 lines and comment the line just above defining 'SMILEI_ACCELERATOR_GPU_OACC' # CXXFLAGS += -mp=gpu -DSMILEI_ACCELERATOR_GPU_OMP # GPU_COMPILER_FLAGS += -DSMILEI_ACCELERATOR_GPU_OMP # Can't we pass the -mp=gpu to nvcc when compiling a .cu file ? # LDFLAGS += -mp=gpu diff --git a/src/Checkpoint/Checkpoint.cpp b/src/Checkpoint/Checkpoint.cpp index 13c3d28a5..943840cb9 100755 --- a/src/Checkpoint/Checkpoint.cpp +++ b/src/Checkpoint/Checkpoint.cpp @@ -233,7 +233,7 @@ void Checkpoint::dumpAll( VectorPatch &vecPatches, Region ®ion, unsigned int MESSAGE( " Checkpoint #" << num_dump << " at iteration " << itime << " dumped" ); #endif -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) MESSAGE( " Copying device data in main memory" ); // TODO(Etienne M): This may very well be redundant if we did a diagnostic // during the last iteration. Indeed, we copy everything from the device to diff --git a/src/Diagnostic/DiagnosticProbes.cpp b/src/Diagnostic/DiagnosticProbes.cpp index 5e79eecc9..e66c684e7 100755 --- a/src/Diagnostic/DiagnosticProbes.cpp +++ b/src/Diagnostic/DiagnosticProbes.cpp @@ -740,7 +740,7 @@ void DiagnosticProbes::run( SmileiMPI *smpi, VectorPatch &vecPatches, int itime, // Interpolate all usual fields on probe ("fake") particles of current patch unsigned int iPart_MPI = offset_in_MPI[ipatch]; unsigned int maxPart_MPI = offset_in_MPI[ipatch] + npart; -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) smpi->resizeDeviceBuffers( ithread, nDim_particle, npart ); diff --git a/src/Diagnostic/DiagnosticScalar.cpp b/src/Diagnostic/DiagnosticScalar.cpp index fe88f47d9..9b8b17409 100755 --- a/src/Diagnostic/DiagnosticScalar.cpp +++ b/src/Diagnostic/DiagnosticScalar.cpp @@ -436,7 +436,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) const unsigned int nPart=vecSpecies[ispec]->getNbrOfParticles(); // number of particles -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) const double *const __restrict__ weight_ptr = vecSpecies[ispec]->particles->getPtrWeight(); const short *const __restrict__ charge_ptr = vecSpecies[ispec]->particles->getPtrCharge(); const double *const __restrict__ momentum_x = vecSpecies[ispec]->particles->getPtrMomentum(0); @@ -447,14 +447,14 @@ void DiagnosticScalar::compute( Patch *patch, int ) if( vecSpecies[ispec]->mass_ > 0 ) { // GPU mode -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target teams distribute parallel for \ map(tofrom: density) \ is_device_ptr(weight_ptr) \ reduction(+:density) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(weight_ptr) #pragma acc loop gang worker vector reduction(+:density) #endif @@ -468,7 +468,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) map(tofrom: charge) \ is_device_ptr( charge_ptr, weight_ptr) \ reduction(+:charge) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(weight_ptr, charge_ptr) #pragma acc loop gang worker vector reduction(+:charge) #endif @@ -484,7 +484,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) momentum_y /* [istart:particle_number] */, \ momentum_z /* [istart:particle_number] */) \ reduction(+:ener_tot) -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc parallel deviceptr(weight_ptr, \ momentum_x, \ momentum_y, \ @@ -525,14 +525,14 @@ void DiagnosticScalar::compute( Patch *patch, int ) } else if( vecSpecies[ispec]->mass_ == 0 ) { // GPU mode -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target teams distribute parallel for \ map(tofrom: density) \ is_device_ptr(weight_ptr) \ reduction(+:density) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(weight_ptr) #pragma acc loop gang worker vector reduction(+:density) #endif @@ -548,7 +548,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) momentum_y /* [istart:particle_number] */, \ momentum_z /* [istart:particle_number] */) \ reduction(+:ener_tot) -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc parallel deviceptr(weight_ptr, \ momentum_x, \ momentum_y, \ @@ -667,7 +667,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) // total energy in current field double Uem = 0.; if( ! AM ) { -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) Uem = field->norm2OnDevice( EMfields->istart, EMfields->bufsize ); #else Uem = field->norm2( EMfields->istart, EMfields->bufsize ); @@ -751,7 +751,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) j_max = iFieldStart[1]; k_max = iFieldStart[2]; -#if defined( SMILEI_ACCELERATOR_MODE) +#if defined( SMILEI_ACCELERATOR_GPU) // We use scalar rather than arrays because omp target // sometime fails to pass them to the device const unsigned int ixstart = iFieldStart[0]; @@ -776,7 +776,7 @@ void DiagnosticScalar::compute( Patch *patch, int ) map(tofrom: minval, maxval, i_min, i_max, j_min, j_max, k_min, k_max) \ map(to: ny, nz, ixstart, ixend, iystart, iyend, izstart, izend) //reduction(min:minval) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(field_data) //deviceptr( data_ ) #pragma acc loop gang worker vector collapse(3) #endif diff --git a/src/Diagnostic/DiagnosticTrack.cpp b/src/Diagnostic/DiagnosticTrack.cpp index 16ac325e9..583caab94 100755 --- a/src/Diagnostic/DiagnosticTrack.cpp +++ b/src/Diagnostic/DiagnosticTrack.cpp @@ -188,7 +188,7 @@ void DiagnosticTrack::setIDs( Patch *patch ) for( unsigned int iPart=0; iPartvecSpecies[species_index_]->particles->id( iPart ) = ++latest_Id; } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) patch->vecSpecies[species_index_]->particles->initializeIDsOnDevice(); #endif } diff --git a/src/ElectroMagn/ElectroMagn.cpp b/src/ElectroMagn/ElectroMagn.cpp index 2c75bc6a4..02467ecd4 100755 --- a/src/ElectroMagn/ElectroMagn.cpp +++ b/src/ElectroMagn/ElectroMagn.cpp @@ -555,7 +555,7 @@ void ElectroMagn::applyAntenna( unsigned int iAntenna, double intensity ) //! Compute the total density and currents from species density and currents on Device //! This function is valid wathever the geometry // --------------------------------------------------------------------------------------------------------------------- -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) void ElectroMagn::computeTotalRhoJOnDevice() { @@ -577,7 +577,7 @@ void ElectroMagn::computeTotalRhoJOnDevice() double *const __restrict__ rhosp = rho_s[ispec] ? rho_s[ispec]->data() : nullptr; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( \ Jxp[0:Jx_size], \ Jyp[0:Jy_size], \ @@ -594,7 +594,7 @@ void ElectroMagn::computeTotalRhoJOnDevice() #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc loop gang worker vector #endif for( unsigned int i=0 ; idata(); // Magnetic field Bx^(p,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofBx = Bx_->size(); const int sizeofBy = By_->size(); const int sizeofBz = Bz_->size(); @@ -1229,10 +1229,10 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_p; ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_d; ++y ) { @@ -1241,7 +1241,7 @@ void ElectroMagn2D::centerMagneticFields() } // Magnetic field By^(d,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(By2D[0:sizeofBy],By2D_m[0:sizeofBy]) #pragma acc loop gang worker #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -1249,10 +1249,10 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < ( nx_p + 1 ); ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_p; ++y ) { @@ -1260,7 +1260,7 @@ void ElectroMagn2D::centerMagneticFields() } } // Magnetic field Bz^(d,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(Bz2D[0:sizeofBz],Bz2D_m[0:sizeofBz]) #pragma acc loop gang worker #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -1268,10 +1268,10 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < ( nx_p + 1 ); ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_d; ++y ) { @@ -1282,7 +1282,7 @@ void ElectroMagn2D::centerMagneticFields() double *const By2D_oldBTIS3 = By_mBTIS3->data(); double *const Bz2D_oldBTIS3 = Bz_mBTIS3->data(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofByBTIS3 = By_mBTIS3->size(); #pragma acc parallel present(By2D_oldBTIS3[0:sizeofByBTIS3],By2D[0:sizeofBy]) #pragma acc loop gang @@ -1291,17 +1291,17 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < ( nx_p - 1 ); ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_p; ++y ) { By2D_oldBTIS3[x * ny_p + y] = ( By2D[(x+1) * ny_p + y] + By2D_oldBTIS3[x * ny_p + y] ) * 0.5; } } -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofBzBTIS3 = Bz_mBTIS3->size(); #pragma acc parallel present(Bz2D_oldBTIS3[0:sizeofBz],Bz2D[0:sizeofBz]) #pragma acc loop gang @@ -1310,10 +1310,10 @@ void ElectroMagn2D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < ( nx_p - 1 ); ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_d; ++y ) { @@ -1392,7 +1392,7 @@ void ElectroMagn2D::computeTotalRhoJ() //END computeTotalRhoJ } -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // //! Method used to compute the total charge density and currents by summing over all species on Device // void ElectroMagn2D::computeTotalRhoJOnDevice() // { diff --git a/src/ElectroMagn/ElectroMagn2D.h b/src/ElectroMagn/ElectroMagn2D.h index aecb87ab8..d8cdfb031 100755 --- a/src/ElectroMagn/ElectroMagn2D.h +++ b/src/ElectroMagn/ElectroMagn2D.h @@ -115,7 +115,7 @@ class ElectroMagn2D : public ElectroMagn //! Method used to compute the total charge density and currents by summing over all species void computeTotalRhoJ() override; -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // //! Method used to compute the total charge density and currents by summing over all species on Device // void computeTotalRhoJOnDevice() override; // #endif diff --git a/src/ElectroMagn/ElectroMagn3D.cpp b/src/ElectroMagn/ElectroMagn3D.cpp index c8994d75c..41ba9cc58 100755 --- a/src/ElectroMagn/ElectroMagn3D.cpp +++ b/src/ElectroMagn/ElectroMagn3D.cpp @@ -4,7 +4,7 @@ #include #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif @@ -1207,7 +1207,7 @@ void ElectroMagn3D::centerMagneticFields() double *const __restrict__ Bz3D_m = Bz_m->data(); // Magnetic field Bx^(p,d,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofBx = Bx_->size(); const int sizeofBy = By_->size(); const int sizeofBz = Bz_->size(); @@ -1219,11 +1219,11 @@ void ElectroMagn3D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; idata(); double *const __restrict__ BzmBTIS3 = Bz_mBTIS3->data(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofByBTIS3 = By_mBTIS3->size(); #pragma acc parallel present(By3D[0:sizeofBy],BymBTIS3[0:sizeofByBTIS3]) #pragma acc loop gang @@ -1305,11 +1305,11 @@ void ElectroMagn3D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; isize(); #pragma acc parallel present(Bz3D[0:sizeofBz],BzmBTIS3[0:sizeofBzBTIS3]) #pragma acc loop gang @@ -1332,11 +1332,11 @@ void ElectroMagn3D::centerMagneticFields() #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; icopyFrom( Br_[imode] ); if (input[2] && copy[2]) Bt_m[imode]->copyFrom( Bt_[imode] ); } - ElectroMagnAM *emAM = static_cast( patch->EMfields ); + // ElectroMagnAM *emAM = static_cast( patch->EMfields ); //emAM->compute_B_m_fromEB(); } @@ -1900,7 +1900,7 @@ void ElectroMagnAM::compute_B_m_fromEB() { const unsigned int nl_p = dimPrim[0]; const unsigned int nl_d = dimDual[0]; - const unsigned int nr_p = dimPrim[1]; + // const unsigned int nr_p = dimPrim[1]; const unsigned int nr_d = dimDual[1]; const unsigned int Nmodes = El_.size(); diff --git a/src/ElectroMagn/ElectroMagnAM.h b/src/ElectroMagn/ElectroMagnAM.h index 979581b4c..cd3063113 100755 --- a/src/ElectroMagn/ElectroMagnAM.h +++ b/src/ElectroMagn/ElectroMagnAM.h @@ -157,7 +157,7 @@ class ElectroMagnAM : public ElectroMagn void computeTotalRhoJ() override; -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // //! Method used to compute the total charge density and currents by summing over all species on Device // void computeTotalRhoJOnDevice() override ; // #endif diff --git a/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp index 42ce8c381..2d257cbd5 100755 --- a/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp +++ b/src/ElectroMagnBC/ElectroMagnBC2D_SM.cpp @@ -68,9 +68,9 @@ ElectroMagnBC2D_SM::ElectroMagnBC2D_SM( Params ¶ms, Patch *patch, unsigned i ElectroMagnBC2D_SM::~ElectroMagnBC2D_SM() { - for (int i=0 ; inumber_of_points_; const int sizeofE1 = E[1]->number_of_points_; const int sizeofE2 = E[2]->number_of_points_; @@ -182,7 +182,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( db1, b1_size ); if( axis0_ == 0 ) { // for By^(d,p) -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size]) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -199,7 +199,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * + B_ext1[j]; } } else { // for Bx^(p,d) -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size]) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -234,7 +234,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * // for Bz^(d,d) if( axis0_ == 0 ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E1[0:sizeofE1],B2[0:sizeofB2],B_ext2[0:B_ext_size2],db2[0:b2_size]) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -247,7 +247,7 @@ void ElectroMagnBC2D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * } } else { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E0[0:sizeofE0],B2[0:sizeofB2],B_ext2[0:B_ext_size2],db2[0:b2_size]) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) diff --git a/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp b/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp index 3ae113e60..ba4e61b28 100755 --- a/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp +++ b/src/ElectroMagnBC/ElectroMagnBC3D_SM.cpp @@ -186,7 +186,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * const int isBoundary2min = patch->isBoundary( axis2_, 0 ); const int isBoundary2max = patch->isBoundary( axis2_, 1 ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC const int sizeofE0 = E[axis0_]->number_of_points_; const int sizeofE1 = E[axis1_]->number_of_points_; const int sizeofE2 = E[axis2_]->number_of_points_; @@ -217,7 +217,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * // B1 if( axis0_ == 0 ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel present(E2[0:sizeofE2],B0[0:sizeofB0],B1[0:sizeofB1],B_ext1[0:B_ext_size1],B_ext0[0:B_ext_size0],db1[0:b1_size]) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -225,7 +225,7 @@ void ElectroMagnBC3D_SM::apply( ElectroMagn *EMfields, double time_dual, Patch * #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int j=isBoundary1min; j( fields->Ex_ ); Field1D *Ey1D = static_cast( fields->Ey_ ); Field1D *Ez1D = static_cast( fields->Ez_ ); - Field1D *Bx1D = static_cast( fields->Bx_ ); + // Field1D *Bx1D = static_cast( fields->Bx_ ); Field1D *By1D = static_cast( fields->By_ ); Field1D *Bz1D = static_cast( fields->Bz_ ); Field1D *Jx1D = static_cast( fields->Jx_ ); diff --git a/src/ElectroMagnSolver/MA_Solver2D_norm.cpp b/src/ElectroMagnSolver/MA_Solver2D_norm.cpp index d12e021c1..4cd0d7d7c 100755 --- a/src/ElectroMagnSolver/MA_Solver2D_norm.cpp +++ b/src/ElectroMagnSolver/MA_Solver2D_norm.cpp @@ -37,7 +37,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) // double sumJz = 0; // Electric field Ex^(d,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofEx = fields->Ex_->number_of_points_; const int sizeofEy = fields->Ey_->number_of_points_; const int sizeofEz = fields->Ez_->number_of_points_; @@ -52,10 +52,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_d; ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_p; ++y ) { @@ -64,7 +64,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) } // Electric field Ey^(p,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( Ey2D[0:sizeofEy], Jy2D[0:sizeofEy], Bz2D[0:sizeofBz] ) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -72,10 +72,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_p; ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_d; ++y ) { @@ -84,7 +84,7 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) } // Electric field Ez^(p,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( Ez2D[0:sizeofEz], Jz2D[0:sizeofEz], Bx2D[0:sizeofBx], By2D[0:sizeofBy] ) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -92,10 +92,10 @@ void MA_Solver2D_norm::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_p; ++x ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif for( unsigned int y = 0; y < ny_p; ++y ) { diff --git a/src/ElectroMagnSolver/MA_Solver3D_norm.cpp b/src/ElectroMagnSolver/MA_Solver3D_norm.cpp index 9b2a089cc..7ffea26c0 100755 --- a/src/ElectroMagnSolver/MA_Solver3D_norm.cpp +++ b/src/ElectroMagnSolver/MA_Solver3D_norm.cpp @@ -35,7 +35,7 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields ) const unsigned int nz_d = fields->dimDual[2]; // Electric field Ex^(d,p,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofEx = fields->Ex_->number_of_points_; const int sizeofEy = fields->Ey_->number_of_points_; const int sizeofEz = fields->Ez_->number_of_points_; @@ -50,11 +50,11 @@ void MA_Solver3D_norm::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; iBz_->data(); // [x * ny_d + y] : dual in x,y primal in z // Magnetic field Bx^(p,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofEx = fields->Ex_->number_of_points_; const int sizeofEy = fields->Ey_->number_of_points_; const int sizeofEz = fields->Ez_->number_of_points_; @@ -48,10 +48,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 0; x < nx_d - 1; ++x ) { -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int y = 1; y < ny_d - 1; ++y ) { @@ -59,7 +59,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) } } // Magnetic field By^(d,p) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( By2D[0:sizeofBy], Ez2D[0:sizeofEz] ) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -67,10 +67,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 1; x < nx_d - 1; ++x ) { -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int y = 0; y < ny_p; ++y ) { @@ -79,7 +79,7 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) } // Magnetic field Bz^(d,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( Bz2D[0:sizeofBy], Ex2D[0:sizeofEx], Ey2D[0:sizeofEz] ) #pragma acc loop gang #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -87,10 +87,10 @@ void MF_Solver2D_Yee::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 2 ) #endif for( unsigned int x = 1; x < nx_d - 1; ++x ) { -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) #pragma omp simd #endif -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int y = 1; y < ny_d - 1; ++y ) { diff --git a/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp b/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp index 5930af3e1..f70159699 100755 --- a/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp +++ b/src/ElectroMagnSolver/MF_Solver3D_Yee.cpp @@ -34,7 +34,7 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields ) const double * __restrict__ Ez3D = isEFilterApplied ? fields->filter_->Ez_[0]->data() : fields->Ez_->data(); // Magnetic field Bx^(p,d,d) -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) const int sizeofEx = fields->Ex_->number_of_points_; const int sizeofEy = fields->Ey_->number_of_points_; const int sizeofEz = fields->Ez_->number_of_points_; @@ -49,11 +49,11 @@ void MF_Solver3D_Yee::operator()( ElectroMagn *fields ) #pragma omp teams distribute parallel for collapse( 3 ) #endif for( unsigned int i=0 ; i dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dx) ; - std::complex dA_over_dx = dA_over_dx_fdtd - + i1*k0*( *A_n_pml )( i, j ) ; + // std::complex dA_over_dx = dA_over_dx_fdtd + // + i1*k0*( *A_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A std::complex d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dx*dx) ; std::complex d2A_over_dx2 = d2A_over_dx2_fdtd @@ -590,8 +590,8 @@ void PML_Solver2D_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // ---- // dA/dx = dA/dx + ik0 A std::complex dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dx) ; - std::complex dA_over_dx = dA_over_dx_fdtd - + i1*k0*( *A_n_pml )( i, j ) ; + // std::complex dA_over_dx = dA_over_dx_fdtd + // + i1*k0*( *A_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A std::complex d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dx*dx) ; std::complex d2A_over_dx2 = d2A_over_dx2_fdtd diff --git a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp index 7e4e740c7..d8c65645a 100644 --- a/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp +++ b/src/ElectroMagnSolver/PML_SolverAM_Envelope.cpp @@ -395,7 +395,6 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, double k0 = 1.; // laser wavenumber std::complex source_term_x ; std::complex source_term_y ; - double mpml_ratio = 0.00; if (iDim == 0) { for( unsigned int k=0 ; k<1 ; k++ ) { @@ -405,7 +404,7 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // dA/dx = dA/dx + ik0 A // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G std::complex dG_over_dx_fdtd = ( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) ; - std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; + // std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2G_over_dx2_fdtd = ( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl) ; @@ -494,7 +493,7 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, // dA/dx = dA/dx + ik0 A // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G std::complex dA_over_dx_fdtd = ( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dl) ; - std::complex dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ; + // std::complex dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2A_over_dx2_fdtd = ( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dl*dl) ; @@ -635,8 +634,8 @@ void PML_SolverAM_Envelope::compute_A_from_G( LaserEnvelope *envelope, int iDim, for( unsigned int j=solvermin ; j < solvermax ; j++ ) { // y loop // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G std::complex dG_over_dx_fdtd = ( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) ; - std::complex dG_over_dx = dG_over_dx_fdtd - + i1*k0*( *G_n_pml )( i, j ) ; + // std::complex dG_over_dx = dG_over_dx_fdtd + // + i1*k0*( *G_n_pml )( i, j ) ; // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2G_over_dx2_fdtd = ( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl) ; std::complex d2G_over_dx2 = d2G_over_dx2_fdtd diff --git a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp index 771f12e37..c2a5c4087 100644 --- a/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp +++ b/src/ElectroMagnSolver/PML_SolverAM_EnvelopeReducedDispersion.cpp @@ -400,7 +400,6 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en double k0 = 1.; // laser wavenumber std::complex source_term_x ; std::complex source_term_y ; - double mpml_ratio = 0.00; if (iDim == 0) { for( unsigned int k=0 ; k<1 ; k++ ) { @@ -410,7 +409,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en // dA/dx = dA/dx + ik0 A // r dA/dx = r dA/dx + ik0 rA <=> dG/dx = dG/dx + ik0 G std::complex dG_over_dx_fdtd = (1.+delta)*( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *G_n_pml )( i+2, j )-( *G_n_pml )( i-2, j ) )/(4.*dl) ; - std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; + // std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2G_over_dx2_fdtd = (1.+delta)*( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *G_n_pml )( i-2, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+2, j ) )/(4.*dl*dl) ; @@ -490,7 +489,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en for( unsigned int i=solvermin ; i dA_over_dx_fdtd = (1.+delta)*( ( *A_n_pml )( i+1, j )-( *A_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *A_n_pml )( i+2, j )-( *A_n_pml )( i-2, j ) )/(4.*dl) ; - std::complex dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ; + // std::complex dA_over_dx = dA_over_dx_fdtd + i1*k0*( *A_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2A_over_dx2_fdtd = (1.+delta)*( ( *A_n_pml )( i-1, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *A_n_pml )( i-2, j )-2.*( *A_n_pml )( i, j )+( *A_n_pml )( i+2, j ) )/(4.*dl*dl) ; @@ -591,7 +590,7 @@ void PML_SolverAM_EnvelopeReducedDispersion::compute_A_from_G( LaserEnvelope *en for( unsigned int i=2 ; i dG_over_dx_fdtd = (1.+delta)*( ( *G_n_pml )( i+1, j )-( *G_n_pml )( i-1, j ) )/(2.*dl) - delta*( ( *G_n_pml )( i+2, j )-( *G_n_pml )( i-2, j ) )/(4.*dl) ; - std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; + // std::complex dG_over_dx = dG_over_dx_fdtd + i1*k0*( *G_n_pml )( i, j ) ; // d2A/dx^2 = d2A/dx^2 + 2ik0 dA/dx - k0^2 A // r d2A/dx^2 = r d2A/dx^2 + r 2ik0 dA/dx - r k0^2 A <=> d2G/dx^2 = d2G/dx^2 + 2ik0 dG/dx - k0^2 G std::complex d2G_over_dx2_fdtd = (1.+delta)*( ( *G_n_pml )( i-1, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+1, j ) )/(dl*dl)-delta*( ( *G_n_pml )( i-2, j )-2.*( *G_n_pml )( i, j )+( *G_n_pml )( i+2, j ) )/(4.*dl*dl) ; diff --git a/src/Field/Field.cpp b/src/Field/Field.cpp index 19c820d1d..0d8427f1e 100644 --- a/src/Field/Field.cpp +++ b/src/Field/Field.cpp @@ -5,14 +5,14 @@ void Field::put_to( double val ) { SMILEI_ASSERT( data_ != nullptr ); -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) const bool is_hostptr_mapped_on_device = smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( data_ ); #endif // NVCC's OpenACC needs that redundant pointer value double* an_other_data_pointer = data_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) // Test if data exists on GPU, put_to can be used on CPU and GPU during a simulation #pragma acc parallel present( an_other_data_pointer [0:size()] ) if( is_hostptr_mapped_on_device ) #pragma acc loop gang worker vector @@ -25,7 +25,7 @@ void Field::put_to( double val ) } } -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) //! copy the field array from Host to Device void Field::copyFromHostToDevice() { diff --git a/src/Field/Field.h b/src/Field/Field.h index 669106245..563705ab1 100755 --- a/src/Field/Field.h +++ b/src/Field/Field.h @@ -188,7 +188,7 @@ class Field virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0; -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) //! Compute the norm2OnDevice of the field virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0; #endif @@ -234,7 +234,7 @@ class Field return sum; } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) inline double __attribute__((always_inline)) normOnDevice() { @@ -245,7 +245,7 @@ class Field #pragma omp target teams distribute parallel for \ map(tofrom: sum) map(to: number_of_points_) \ reduction(+:sum) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(field) //deviceptr( data_ ) #pragma acc loop gang worker vector reduction(+:sum) #endif @@ -279,7 +279,7 @@ class Field virtual void extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) = 0; virtual void inject_fields_sum ( int iDim, int iNeighbor, int ghost_size ) = 0; -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) //! copy the field from Host to Device void copyFromHostToDevice(); diff --git a/src/Field/Field1D.cpp b/src/Field/Field1D.cpp index d0fa18b2f..59f085f81 100755 --- a/src/Field/Field1D.cpp +++ b/src/Field/Field1D.cpp @@ -190,7 +190,7 @@ double Field1D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double Field1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { ERROR("Not implemented"); diff --git a/src/Field/Field1D.h b/src/Field/Field1D.h index 0ff09cd1e..228cc586f 100755 --- a/src/Field/Field1D.h +++ b/src/Field/Field1D.h @@ -92,7 +92,7 @@ class Field1D : public Field virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Field/Field2D.cpp b/src/Field/Field2D.cpp index a089a0d45..94051fed6 100755 --- a/src/Field/Field2D.cpp +++ b/src/Field/Field2D.cpp @@ -71,7 +71,7 @@ Field2D::~Field2D() for (int iside=0 ; iside<(int)(sendFields_.size()) ; iside++ ) { if ( sendFields_[iside] != NULL ) { -#if defined ( SMILEI_ACCELERATOR_MODE ) +#if defined ( SMILEI_ACCELERATOR_GPU ) if ( sendFields_[iside]->isOnDevice() ) { sendFields_[iside]->deleteOnDevice(); @@ -220,7 +220,7 @@ double Field2D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double Field2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { @@ -247,7 +247,7 @@ double Field2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3 map(to: ny, idxlocalstart[0], idxlocalstart[1], iystart, iyend) \ /* is_device_ptr( data_ )*/ \ reduction(+:nrj) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(field) //deviceptr( data_ ) #pragma acc loop gang worker vector collapse(2) reduction(+:nrj) #endif @@ -333,7 +333,7 @@ void Field2D::create_sub_fields( int iDim, int iNeighbor, int ghost_size ) sendFields_[iDim*2+iNeighbor] = new Field2D(size); recvFields_[iDim*2+iNeighbor] = new Field2D(size); -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) { sendFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice(); recvFields_[iDim * 2 + iNeighbor]->allocateAndCopyFromHostToDevice(); @@ -341,7 +341,7 @@ void Field2D::create_sub_fields( int iDim, int iNeighbor, int ghost_size ) #endif } else if ( ghost_size != (int)(sendFields_[iDim*2+iNeighbor]->dims_[iDim]) ) { -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) ERROR( "To Do GPU : envelope" ); #endif delete sendFields_[iDim*2+iNeighbor]; @@ -381,7 +381,7 @@ void Field2D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size ) #pragma omp target if( should_manipulate_gpu_memory ) #pragma omp teams distribute parallel for collapse( 2 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) const int subSize = sendFields_[iDim*2+iNeighbor]->size(); const int fSize = number_of_points_; bool fieldName( (name.substr(0,1) == "B") ); @@ -389,7 +389,7 @@ void Field2D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; isize(); const int fSize = number_of_points_; bool fieldName( name.substr(0,1) == "B" ); @@ -437,7 +437,7 @@ void Field2D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; isize(); const int fSize = number_of_points_; bool fieldName( ((name.substr(0,1) == "J") || (name.substr(0,1) == "R") ) && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( sub )); @@ -486,7 +486,7 @@ void Field2D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; isize(); int fSize = number_of_points_; bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R"); @@ -535,7 +535,7 @@ void Field2D::inject_fields_sum ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif @@ -81,7 +81,7 @@ Field3D::~Field3D() for( unsigned int iside=0 ; isideisOnDevice() ) { @@ -102,7 +102,9 @@ Field3D::~Field3D() } } if( data_!=NULL ) { +#if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete (data_[0:number_of_points_]) if (acc_deviceptr(data_) != NULL) +#endif delete [] data_; for( unsigned int i=0; idata_3D[i]; @@ -248,7 +250,7 @@ double Field3D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } // Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double Field3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { double nrj( 0. ); @@ -277,7 +279,7 @@ double Field3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3 map(to: ny, nz, ixstart, ixend, iystart, iyend, izstart, izend) \ /*is_device_ptr( data_ ) */ \ reduction(+:nrj) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present(field[0:number_of_points_]) //deviceptr( data_ ) #pragma acc loop gang worker vector collapse(3) reduction(+:nrj) #endif @@ -405,7 +407,7 @@ void Field3D::create_sub_fields ( int iDim, int iNeighbor, int ghost_size ) sendFields_[iDim*2+iNeighbor] = new Field3D(size); recvFields_[iDim*2+iNeighbor] = new Field3D(size); -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) if( ( name[0] == 'B' ) || ( name[0] == 'J' || name[0] == 'R' ) ) { @@ -427,7 +429,7 @@ void Field3D::create_sub_fields ( int iDim, int iNeighbor, int ghost_size ) } else if( ghost_size != (int) sendFields_[iDim*2+iNeighbor]->dims_[iDim] ) { -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) ERROR( "To Do GPU : envelope" ); #endif delete sendFields_[iDim*2+iNeighbor]; @@ -463,7 +465,7 @@ void Field3D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size ) #pragma omp target if( is_the_right_field ) #pragma omp teams distribute parallel for collapse( 3 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) const int subSize = sendFields_[iDim*2+iNeighbor]->size(); const int fSize = number_of_points_; bool fieldName( (name.substr(0,1) == "B") ); @@ -471,11 +473,11 @@ void Field3D::extract_fields_exch( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i<(unsigned int)NX; i++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif for( unsigned int j=0; j<(unsigned int)NY; j++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int k=0; k<(unsigned int)NZ; k++ ) { @@ -514,7 +516,7 @@ void Field3D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size ) map( tofrom \ : field [0:fSize] ) #pragma omp teams distribute parallel for collapse( 3 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size(); const int fSize = number_of_points_; bool fieldName( name.substr(0,1) == "B" ); @@ -522,11 +524,11 @@ void Field3D::inject_fields_exch ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i<(unsigned int)NX; i++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif for( unsigned int j=0; j<(unsigned int)NY; j++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int k=0; k<(unsigned int)NZ; k++ ) { @@ -566,7 +568,7 @@ void Field3D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) map( to \ : field [0:fSize] ) #pragma omp teams distribute parallel for collapse( 3 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) const int subSize = sendFields_[iDim*2+iNeighbor]->size(); const int fSize = number_of_points_; bool fieldName( (name.substr(0,1) == "J") || (name.substr(0,1) == "R")); @@ -575,11 +577,11 @@ void Field3D::extract_fields_sum ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i<(unsigned int)NX; i++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif for( unsigned int j=0; j<(unsigned int)NY; j++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int k=0; k<(unsigned int)NZ; k++ ) { @@ -618,7 +620,7 @@ void Field3D::inject_fields_sum ( int iDim, int iNeighbor, int ghost_size ) map( tofrom \ : field [0:fSize] ) #pragma omp teams distribute parallel for collapse( 3 ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) int subSize = recvFields_[iDim*2+(iNeighbor+1)%2]->size(); int fSize = number_of_points_; bool fieldName( name.substr(0,1) == "J" || name.substr(0,1) == "R"); @@ -627,11 +629,11 @@ void Field3D::inject_fields_sum ( int iDim, int iNeighbor, int ghost_size ) #pragma acc loop gang #endif for( unsigned int i=0; i<(unsigned int)NX; i++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop worker #endif for( unsigned int j=0; j<(unsigned int)NY; j++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( unsigned int k=0; k<(unsigned int)NZ; k++ ) { diff --git a/src/Field/Field3D.h b/src/Field/Field3D.h index cc9524790..9f9ce4c9a 100755 --- a/src/Field/Field3D.h +++ b/src/Field/Field3D.h @@ -100,7 +100,7 @@ class Field3D : public Field virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Field/cField.h b/src/Field/cField.h index c37aa9514..d76de6ed7 100755 --- a/src/Field/cField.h +++ b/src/Field/cField.h @@ -63,7 +63,7 @@ class cField : public Field virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override = 0; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) = 0; #endif diff --git a/src/Field/cField1D.cpp b/src/Field/cField1D.cpp index 77b0c2685..6a79da95a 100755 --- a/src/Field/cField1D.cpp +++ b/src/Field/cField1D.cpp @@ -191,7 +191,7 @@ double cField1D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double cField1D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { ERROR("Not implemented"); diff --git a/src/Field/cField1D.h b/src/Field/cField1D.h index 43f2030e3..27b15bfc1 100755 --- a/src/Field/cField1D.h +++ b/src/Field/cField1D.h @@ -94,7 +94,7 @@ class cField1D : public cField virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Field/cField2D.cpp b/src/Field/cField2D.cpp index e1ca5560a..57ff6ea81 100755 --- a/src/Field/cField2D.cpp +++ b/src/Field/cField2D.cpp @@ -219,7 +219,7 @@ double cField2D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double cField2D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { ERROR("Not implemented"); diff --git a/src/Field/cField2D.h b/src/Field/cField2D.h index d447d4f2e..26ee995c9 100755 --- a/src/Field/cField2D.h +++ b/src/Field/cField2D.h @@ -84,7 +84,7 @@ class cField2D : public cField virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Field/cField3D.cpp b/src/Field/cField3D.cpp index 84510f401..f4249e134 100755 --- a/src/Field/cField3D.cpp +++ b/src/Field/cField3D.cpp @@ -218,7 +218,7 @@ double cField3D::norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) } //! Perform the norm2 on Device -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) double cField3D::norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) { ERROR("Not implemented"); diff --git a/src/Field/cField3D.h b/src/Field/cField3D.h index a81f293fc..0db1f6835 100755 --- a/src/Field/cField3D.h +++ b/src/Field/cField3D.h @@ -84,7 +84,7 @@ class cField3D : public cField virtual double norm2( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override; //! Compute the norm2OnDevice of the field -#if defined(SMILEI_ACCELERATOR_MODE) +#if defined(SMILEI_ACCELERATOR_GPU) virtual double norm2OnDevice( unsigned int istart[3][2], unsigned int bufsize[3][2] ) override final; #endif diff --git a/src/Interpolator/Interpolator2D2Order.cpp b/src/Interpolator/Interpolator2D2Order.cpp index 0254294f5..795ab996d 100755 --- a/src/Interpolator/Interpolator2D2Order.cpp +++ b/src/Interpolator/Interpolator2D2Order.cpp @@ -180,7 +180,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, const double *const __restrict__ By2D = static_cast( EMfields->By_m )->data(); const double *const __restrict__ Bz2D = static_cast( EMfields->Bz_m )->data(); -#if defined(SMILEI_OPENACC_MODE) +#if defined(SMILEI_ACCELERATOR_GPU_OACC) const int sizeofEx = EMfields->Ex_->size(); const int sizeofEy = EMfields->Ey_->size(); const int sizeofEz = EMfields->Ez_->size(); @@ -207,7 +207,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, position_x /* [first_index:npart_range_size] */, \ position_y /* [first_index:npart_range_size] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc enter data create(this) #pragma acc update device(this) size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index; @@ -260,7 +260,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, delta[1*nparts+ipart] = delta_p[1]; } - #if defined(SMILEI_OPENACC_MODE) + #if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete(this) #endif } else{ // with B-TIS3 interpolation @@ -276,7 +276,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, position_x /* [first_index:npart_range_size] */, \ position_y /* [first_index:npart_range_size] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc enter data create(this) #pragma acc update device(this) size_t interpolation_range_size = ( last_index + 1 * nparts ) - first_index; @@ -337,7 +337,7 @@ void Interpolator2D2Order::fieldsWrapper( ElectroMagn *EMfields, delta[1*nparts+ipart] = delta_p[1]; } // end ipart loop - #if defined(SMILEI_OPENACC_MODE) + #if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete(this) #endif } // end with B-TIS interpolation diff --git a/src/Interpolator/Interpolator3D2Order.cpp b/src/Interpolator/Interpolator3D2Order.cpp index 9e594f20b..f40239836 100755 --- a/src/Interpolator/Interpolator3D2Order.cpp +++ b/src/Interpolator/Interpolator3D2Order.cpp @@ -185,8 +185,6 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part int *const __restrict__ iold = smpi->dynamics_iold[ithread].data(); double *const __restrict__ delta = smpi->dynamics_deltaold[ithread].data(); - unsigned int buffer_size = smpi->dynamics_Epart[ithread].size(); - const double *const __restrict__ position_x = particles.getPtrPosition( 0 ); const double *const __restrict__ position_y = particles.getPtrPosition( 1 ); const double *const __restrict__ position_z = particles.getPtrPosition( 2 ); @@ -198,7 +196,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part const double *const __restrict__ By3D = EMfields->By_m->data_; const double *const __restrict__ Bz3D = EMfields->Bz_m->data_; -#if defined(SMILEI_OPENACC_MODE) +#if defined(SMILEI_ACCELERATOR_GPU_OACC) const int sizeofEx = EMfields->Ex_->size(); const int sizeofEy = EMfields->Ey_->size(); const int sizeofEz = EMfields->Ez_->size(); @@ -224,7 +222,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part position_y /* [first_index:npart_range_size] */, \ position_z /* [first_index:npart_range_size] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc enter data create(this) #pragma acc update device(this) size_t interpolation_range_size = ( last_index + 2 * nparts ) - first_index; @@ -282,7 +280,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part delta[1*nparts+ipart] = delta_p[1]; delta[2*nparts+ipart] = delta_p[2]; } - #if defined(SMILEI_OPENACC_MODE) + #if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete(this) #endif } else { // with B-TIS3 interpolation @@ -302,7 +300,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part position_y /* [first_index:npart_range_size] */, \ position_z /* [first_index:npart_range_size] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc enter data create(this) #pragma acc update device(this) size_t interpolation_range_size = ( last_index + 2 * nparts ) - first_index; @@ -368,7 +366,7 @@ void Interpolator3D2Order::fieldsWrapper( ElectroMagn *EMfields, Particles &part delta[ipart+0*nparts] = delta_p[0]; delta[ipart+1*nparts] = delta_p[1]; delta[ipart+2*nparts] = delta_p[2]; - #if defined(SMILEI_OPENACC_MODE) + #if defined(SMILEI_ACCELERATOR_GPU_OACC) #pragma acc exit data delete(this) #endif } // end ipart loop diff --git a/src/Interpolator/Interpolator3D2Order.h b/src/Interpolator/Interpolator3D2Order.h index 52f0335a0..1fa07438d 100755 --- a/src/Interpolator/Interpolator3D2Order.h +++ b/src/Interpolator/Interpolator3D2Order.h @@ -59,7 +59,7 @@ class Interpolator3D2Order : public Interpolator3D int idx, int idy, int idz, - int nx, + int /*nx*/, int ny, int nz ) { diff --git a/src/MovWindow/SimWindow.cpp b/src/MovWindow/SimWindow.cpp index 08ffada69..4ee9781c7 100755 --- a/src/MovWindow/SimWindow.cpp +++ b/src/MovWindow/SimWindow.cpp @@ -383,15 +383,10 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params ¶ms, } // end loop nSpecies -#if defined ( SMILEI_ACCELERATOR_MODE ) - if ( params.gpu_computing ) { - // ADD NEW PARTS ON GPU - for( unsigned int ispec=0 ; ispecvecSpecies[ispec]->particles_to_move->clear(); - // mypatch->vecSpecies[ispec]->particles->copyParticles( 0, mypatch->vecSpecies[ispec]->getNbrOfParticles(), - // *mypatch->vecSpecies[ispec]->particles_to_move, 0 ); - mypatch->vecSpecies[ispec]->particles->initializeDataOnDevice(); - mypatch->vecSpecies[ispec]->particles_to_move->initializeDataOnDevice(); +#if defined ( SMILEI_ACCELERATOR_GPU ) + if( params.gpu_computing ) { + for( auto spec: mypatch->vecSpecies ) { + spec->allocateParticlesOnDevice(); } } #endif @@ -403,7 +398,7 @@ void SimWindow::shift( VectorPatch &vecPatches, SmileiMPI *smpi, Params ¶ms, } // end test patch_particle_created[ithread][j] -#if defined ( SMILEI_ACCELERATOR_MODE ) +#if defined ( SMILEI_ACCELERATOR_GPU ) // if ( params.gpu_computing ) { // Initializes only field data structures, particle data structure are initialized separately mypatch->allocateAndCopyFieldsOnDevice(); diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp index 6f7b9e0df..8136f36ff 100755 --- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp +++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.cpp @@ -10,7 +10,7 @@ #include "MultiphotonBreitWheeler.h" #include "Species.h" -#if defined(SMILEI_OPENACC_MODE) +#if defined(SMILEI_ACCELERATOR_GPU_OACC) #define __HIP_PLATFORM_NVCC__ #define __HIP_PLATFORM_NVIDIA__ #include "gpuRandom.h" @@ -248,7 +248,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, double *const __restrict__ pair1_chi = new_pair[1]->has_quantum_parameter ? new_pair[1]->getPtrChi() : nullptr; double *const __restrict__ pair1_tau = new_pair[1]->has_Monte_Carlo_process ? new_pair[1]->getPtrTau() : nullptr; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // Parameters for random generator unsigned long long seed; unsigned long long seq; @@ -325,7 +325,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref], Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC } @@ -349,7 +349,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, while( tau[ipart] <= epsilon_tau_ ) { //tau[ipart] = -log( 1.-Rand::uniform() ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC tau[ipart] = -std::log( 1.-rand_->uniform() ); #else @@ -406,7 +406,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, double pair_chi[2]; // Draw random number in [0,1[ -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC const double random_number = rand_->uniform(); #else seed_curand_2 = (int) (ipart + 1)*(initial_seed_2 + 1); //Seed for linear generator @@ -431,7 +431,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, SMILEI_UNUSED( ibin ); // Creation of new electrons in the temporary array new_pair[0] new_pair[0]->createParticles( mBW_pair_creation_sampling_[0] ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Final size int nparticles = new_pair[0]->size(); @@ -442,7 +442,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, #endif // For all new paticles -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #endif for( int ipair=i_pair_start; ipair < i_pair_start+mBW_pair_creation_sampling_[0]; ipair++ ) { @@ -466,7 +466,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, } // + new_pair[k].momentum(i,ipair)*remaining_dt*inv_gamma; -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Old positions if( particles.keepOldPositions() ) { pair0_position_old_x[ipair]=position_x[ipart] ; @@ -494,7 +494,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, // Create particle for the second pair species new_pair[1]->createParticles( mBW_pair_creation_sampling_[1] ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Final size nparticles = new_pair[1]->size(); @@ -505,7 +505,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, #endif // For all new paticles -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #endif for( auto ipair=i_pair_start; ipair < i_pair_start + mBW_pair_creation_sampling_[1]; ipair++ ) { @@ -530,7 +530,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, } // + new_pair[k].momentum(i,ipair)*remaining_dt*inv_gamma; -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Old positions if( particles.keepOldPositions() ) { pair1_position_old_x[ipair]=position_x[ipart] ; @@ -629,7 +629,7 @@ void MultiphotonBreitWheeler::operator()( Particles &particles, } } // end ipart loop -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } #endif } @@ -795,7 +795,7 @@ void MultiphotonBreitWheeler::removeDecayedPhotonsWithoutBinCompression( if( ipart < last_photon_index ) { // The last existing photon comes to the position of // the deleted photon -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC particles.overwriteParticle( last_photon_index, ipart ); #else #endif diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h index 6e14a37f3..71315d79a 100755 --- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h +++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheeler.h @@ -115,7 +115,7 @@ class MultiphotonBreitWheeler //! \param bmin Pointer toward the first particle index of the bin in the Particles object //! \param bmax Pointer toward the last particle index of the bin in the Particles object //! \param ithread Thread index -//#ifdef SMILEI_OPENACC_MODE +//#ifdef SMILEI_ACCELERATOR_GPU_OACC // #pragma acc routine seq //#endif void removeDecayedPhotonsWithoutBinCompression( diff --git a/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h b/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h index 4f7f1ce72..9bef108b6 100755 --- a/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h +++ b/src/MultiphotonBreitWheeler/MultiphotonBreitWheelerTables.h @@ -54,7 +54,7 @@ class MultiphotonBreitWheelerTables //! the multiphoton Breit-Wheeler pair creation //! \param photon_chi photon quantum parameter //! \param[out] pair_chi quantum parameters of the pair -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif void computePairQuantumParameter( const double photon_chi, @@ -71,7 +71,7 @@ class MultiphotonBreitWheelerTables //! \param photon_chi photon quantum parameter //! \param gamma photon normalized energy // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double computeBreitWheelerPairProductionRate( diff --git a/src/Params/Params.cpp b/src/Params/Params.cpp index 803cdf9e5..69973d104 100755 --- a/src/Params/Params.cpp +++ b/src/Params/Params.cpp @@ -837,7 +837,7 @@ Params::Params( SmileiMPI *smpi, std::vector namelistsFiles ) : PyTools::extract( "gpu_computing", gpu_computing, "Main" ); if( gpu_computing ) { -#if( defined( SMILEI_OPENACC_MODE ) && defined( _OPENACC ) ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if( defined( SMILEI_ACCELERATOR_GPU_OACC ) && defined( _OPENACC ) ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) // If compiled for GPU and asking for GPU MESSAGE( 1, "Smilei will run on GPU devices" ); #else @@ -1055,27 +1055,26 @@ Params::Params( SmileiMPI *smpi, std::vector namelistsFiles ) : // Extract the list of profiles and verify their content PyObject *p = PyTools::extract_py( "_profiles", "Laser", i_laser ); vector profiles; - vector profiles_n = {1, 2}; if( ! PyTools::py2pyvector( p, profiles ) ) { ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile must be a list of 2 profiles", LINK_NAMELIST + std::string("#lasers") ); } Py_DECREF( p ); - if( profiles.size()!=2 ) { + if( profiles.size() != 2 ) { ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile needs 2 profiles.", LINK_NAMELIST + std::string("#lasers") ); } - if( profiles[1] == Py_None ) { - profiles .pop_back(); - profiles_n.pop_back(); - } - if( profiles[0] == Py_None ) { - profiles .erase( profiles .begin() ); - profiles_n.erase( profiles_n.begin() ); + vector profiles_n; + vector profiles_kept; + for( unsigned int i = 0; i < 2; i++ ) { + if( profiles[i] != Py_None ) { + profiles_kept.push_back( profiles[i] ); + profiles_n.push_back( i + 1 ); + } } - if( profiles.size() == 0 ) { + if( profiles_kept.size() == 0 ) { ERROR_NAMELIST( "For LaserOffset #" << n_laser_offset << ": space_time_profile cannot be [None, None]", LINK_NAMELIST + std::string("#lasers") ); } - for( unsigned int i=0; i namelistsFiles ) : // Make the propagation happen and write out the file if( ! smpi->test_mode ) { - propagateX( profiles, profiles_n, offset, file, keep_n_strongest_modes, angle_z ); + propagateX( profiles_kept, profiles_n, offset, file, keep_n_strongest_modes, angle_z ); } } - + + for( auto p: profiles ) { + Py_DECREF( p ); + } + n_laser_offset ++; } } @@ -1227,7 +1230,7 @@ void Params::compute() // Set cluster_width_ if not set by the user if( cluster_width_ == -1 ) { -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) cluster_width_ = patch_size_[0]; // On GPU, dont do the CPU automatic cluster_width computation, only one // bin is expected. @@ -1276,7 +1279,7 @@ void Params::compute() // Verify that cluster_width_ divides patch_size_[0] or patch_size_[n] in GPU mode -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) const int kClusterWidth = getGPUClusterWidth(); if( kClusterWidth < 0 ) { @@ -1886,7 +1889,7 @@ string Params::speciesField( string field_name ) return ""; } -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) bool Params::isGPUParticleBinningAvailable() const { @@ -1903,7 +1906,7 @@ bool Params::isGPUParticleBinningAvailable() const #endif -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) int Params::getGPUClusterWidth() const { diff --git a/src/Params/Params.h b/src/Params/Params.h index e2b0603e6..32bf63a37 100755 --- a/src/Params/Params.h +++ b/src/Params/Params.h @@ -386,7 +386,7 @@ class Params //! bool isGPUParticleBinningAvailable() const; -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) //! Given dimension_id in [0, 3), return for dimension_id == : //! 1: the 1D value (not implemented) diff --git a/src/ParticleBC/BoundaryConditionType.cpp b/src/ParticleBC/BoundaryConditionType.cpp index 318b6b289..304656eca 100755 --- a/src/ParticleBC/BoundaryConditionType.cpp +++ b/src/ParticleBC/BoundaryConditionType.cpp @@ -18,7 +18,7 @@ void internal_inf( Species *species, int imin, int imax, int direction, double l energy_change = 0.; // no energy loss during exchange const double* const position = species->particles->getPtrPosition( direction ); int* const cell_keys = species->particles->getPtrCellKeys(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(position,cell_keys) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -28,9 +28,9 @@ void internal_inf( Species *species, int imin, int imax, int direction, double l cell_keys /* [imin:imax - imin] */ ) #pragma omp teams distribute parallel for #endif - for (int ipart=imin ; ipart= 0 && position[ ipart ] < limit_inf ) { + cell_keys[ ipart ] = -2 - 2 * direction; } } } @@ -40,7 +40,7 @@ void internal_sup( Species *species, int imin, int imax, int direction, double l energy_change = 0.; // no energy loss during exchange const double* const position = species->particles->getPtrPosition( direction ); int* const cell_keys = species->particles->getPtrCellKeys(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(position,cell_keys) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -50,9 +50,9 @@ void internal_sup( Species *species, int imin, int imax, int direction, double l cell_keys /* [imin:imax - imin] */ ) #pragma omp teams distribute parallel for #endif - for (int ipart=imin ; ipart= limit_sup) { - cell_keys[ ipart ] = -1; + for( int ipart=imin ; ipart= 0 && position[ ipart ] >= limit_sup ) { + cell_keys[ ipart ] = -3 - 2 * direction; } } } @@ -63,10 +63,11 @@ void internal_inf_AM( Species *species, int imin, int imax, int /*direction*/, d double* position_y = species->particles->getPtrPosition(1); double* position_z = species->particles->getPtrPosition(2); int* cell_keys = species->particles->getPtrCellKeys(); - for (int ipart=imin ; ipart= 0 && distance2ToAxis < limit_inf2 ) { + cell_keys[ ipart ] = -4; } } } @@ -77,10 +78,11 @@ void internal_sup_AM( Species *species, int imin, int imax, int /*direction*/, d double* position_y = species->particles->getPtrPosition(1); double* position_z = species->particles->getPtrPosition(2); int* cell_keys = species->particles->getPtrCellKeys(); - for (int ipart=imin ; ipart= limit_sup*limit_sup ) { - cell_keys[ ipart ] = -1; + if( cell_keys[ ipart ] >= 0 && distance2ToAxis >= limit_sup2 ) { + cell_keys[ ipart ] = -5; } } } @@ -90,15 +92,15 @@ void reflect_particle_inf( Species *species, int imin, int imax, int direction, energy_change = 0.; // no energy loss during reflection double* position = species->particles->getPtrPosition(direction); double* momentum = species->particles->getPtrMomentum(direction); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel deviceptr(position,momentum) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target is_device_ptr( position, momentum ) #pragma omp teams distribute parallel for #endif - for (int ipart=imin ; ipartparticles->getPtrPosition(direction); double* momentum = species->particles->getPtrMomentum(direction); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel deviceptr(position,momentum) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -187,9 +189,9 @@ void remove_particle_inf( Species* species, int imin, int imax, int direction, double limit_inf, - double dt, - std::vector& invgf, - Random* rand, + double /*dt*/, + std::vector& /*invgf*/, + Random* /*rand*/, double& energy_change ) { @@ -208,7 +210,7 @@ void remove_particle_inf( Species* species, : change_in_energy ) #pragma omp teams distribute parallel for reduction( + \ : change_in_energy ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight,charge,cell_keys) #pragma acc loop gang worker vector reduction(+ : change_in_energy) #else @@ -233,9 +235,9 @@ void remove_particle_sup( Species* species, int imin, int imax, int direction, double limit_sup, - double dt, - std::vector& invgf, - Random* rand, + double /*dt*/, + std::vector& /*invgf*/, + Random* /*rand*/, double& energy_change ) { @@ -254,7 +256,7 @@ void remove_particle_sup( Species* species, : change_in_energy ) #pragma omp teams distribute parallel for reduction( + \ : change_in_energy ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr(position,momentum_x,momentum_y,momentum_z,weight,charge,cell_keys) #pragma acc loop gang worker vector reduction(+ : change_in_energy) #else diff --git a/src/ParticleBC/PartBoundCond.h b/src/ParticleBC/PartBoundCond.h index 47ab7e235..7afd6ca9c 100755 --- a/src/ParticleBC/PartBoundCond.h +++ b/src/ParticleBC/PartBoundCond.h @@ -44,7 +44,7 @@ class PartBoundCond } else { int *const cell_keys = species->particles->getPtrCellKeys(); -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel deviceptr( cell_keys ) #pragma acc loop gang worker vector #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) diff --git a/src/Particles/Particles.cpp b/src/Particles/Particles.cpp index 308ee4319..30c685155 100755 --- a/src/Particles/Particles.cpp +++ b/src/Particles/Particles.cpp @@ -413,6 +413,51 @@ void Particles::copyParticles( unsigned int iPart, unsigned int nPart, Particles } } +// --------------------------------------------------------------------------------------------------------------------- +//! Copy particles indexed by array 'indices' to dest_id in dest_parts +//! The array 'indices' must be sorted in increasing order +//! cell keys not affected +// --------------------------------------------------------------------------------------------------------------------- +void Particles::copyParticles( vector indices, Particles &dest_parts, int dest_id ) +{ + const size_t transfer_size = indices.size(); + const size_t dest_new_size = dest_parts.size() + transfer_size; + const size_t displaced_size = dest_parts.size() - dest_id; + + for( unsigned int iprop=0 ; ipropresize( dest_new_size ); + auto loc = dest_parts.double_prop_[iprop]->begin() + dest_id; + move_backward( loc, loc + displaced_size, dest_parts.double_prop_[iprop]->end() ); + // Copy data + for( size_t i = 0; i < transfer_size; i++ ) { + ( *dest_parts.double_prop_[iprop] )[dest_id+i] = ( *double_prop_[iprop] )[indices[i]]; + } + } + + for( unsigned int iprop=0 ; ipropresize( dest_new_size ); + auto loc = dest_parts.short_prop_[iprop]->begin() + dest_id; + move_backward( loc, loc + displaced_size, dest_parts.short_prop_[iprop]->end() ); + // Copy data + for( size_t i = 0; i < transfer_size; i++ ) { + ( *dest_parts.short_prop_[iprop] )[dest_id+i] = ( *short_prop_[iprop] )[indices[i]]; + } + } + + for( unsigned int iprop=0 ; ipropresize( dest_new_size ); + auto loc = dest_parts.uint64_prop_[iprop]->begin() + dest_id; + move_backward( loc, loc + displaced_size, dest_parts.uint64_prop_[iprop]->end() ); + // Copy data + for( size_t i = 0; i < transfer_size; i++ ) { + ( *dest_parts.uint64_prop_[iprop] )[dest_id+i] = ( *uint64_prop_[iprop] )[indices[i]]; + } + } +} + // --------------------------------------------------------------------------------------------------------------------- //! Make a new particle at the position of another //! cell keys not affected @@ -529,6 +574,70 @@ void Particles::eraseParticle( unsigned int ipart, unsigned int npart, bool comp } + +// --------------------------------------------------------------------------------------------------------------------- +//! Erase particles indexed by array 'indices' to dest_id in dest_parts +//! The array 'indices' must be sorted in increasing order +//! cell keys not affected +// --------------------------------------------------------------------------------------------------------------------- +void Particles::eraseParticles( vector indices ) +{ + const size_t indices_size = indices.size(); + const size_t initial_size = size(); + + if( indices_size > 0 ) { + + for( auto prop : double_prop_ ) { + // Relocate data to fill erased space + size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0]; + for( size_t from = indices[0]+1; from < initial_size; from++ ) { + if( from < stop ) { + ( *prop )[to] = ( *prop )[from]; + to++; + } else { + j++; + stop = ( j == indices_size ) ? initial_size : indices[j]; + } + } + // Resize + prop->resize( initial_size - indices_size ); + } + + for( auto prop : short_prop_ ) { + // Relocate data to fill erased space + size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0]; + for( size_t from = indices[0]+1; from < initial_size; from++ ) { + if( from < stop ) { + ( *prop )[to] = ( *prop )[from]; + to++; + } else { + j++; + stop = ( j == indices_size ) ? initial_size : indices[j]; + } + } + // Resize + prop->resize( initial_size - indices_size ); + } + + for( auto prop : uint64_prop_ ) { + // Relocate data to fill erased space + size_t j = 1, stop = ( 1 == indices_size ) ? initial_size : indices[1], to = indices[0]; + for( size_t from = indices[0]+1; from < initial_size; from++ ) { + if( from < stop ) { + ( *prop )[to] = ( *prop )[from]; + to++; + } else { + j++; + stop = ( j == indices_size ) ? initial_size : indices[j]; + } + } + // Resize + prop->resize( initial_size - indices_size ); + } + + } +} + // --------------------------------------------------------------------------------------------------------------------- // Print parameters of particle iPart // --------------------------------------------------------------------------------------------------------------------- @@ -1190,21 +1299,61 @@ void Particles::copyFromHostToDevice() { ERROR( "Device only feature, should not have come here!" ); } -void Particles::copyFromDeviceToHost() +void Particles::copyFromDeviceToHost( bool ) { ERROR( "Device only feature, should not have come here!" ); } -void Particles::extractParticles( Particles* particles_to_move ) +// Loop all particles and copy the outgoing ones to buffers +void Particles::copyLeavingParticlesToBuffers( const vector copy, const vector buffer ) { - particles_to_move->clear(); - for ( int ipart=0 ; ipart indices; + for( size_t ipart = 0; ipart < buffer[0]->size(); ipart++ ) { + int direction = -buffer[0]->cell_keys[ipart] - 2; + if( direction > 0 ) { + if( copy[direction] ) { + buffer[0]->copyParticle( ipart, *buffer[direction] ); + } + indices.push_back( ipart ); + } + } + buffer[0]->eraseParticles( indices ); + +#else + + // CPU + + for( size_t ipart = 0; ipart < size(); ipart++ ) { + if( cell_keys[ipart] < -1 ) { + int direction = -cell_keys[ipart] - 2; + if( copy[direction] ) { + copyParticle( ipart, *buffer[direction] ); + } } } + +#endif } +void Particles::copyLeavingParticlesToBuffer( Particles* ) +{ + ERROR( "Device only feature, should not have come here!" ); +} + + void Particles::savePositions() { unsigned int ndim = Position.size(), npart = size(); double *p[3], *pold[3]; @@ -1249,13 +1398,13 @@ int Particles::eraseLeavingParticles() return 0; } -int Particles::injectParticles( Particles *particles_to_inject ) +int Particles::injectParticles( Particles */*particles_to_inject*/ ) { ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." ); return 0; } -void Particles::importAndSortParticles( Particles *particles_to_inject ) +void Particles::importAndSortParticles( Particles */*particles_to_inject*/ ) { ERROR( "Device only feature, should not have come here! On CPU it's done in sortParticles." ); } diff --git a/src/Particles/Particles.h b/src/Particles/Particles.h index 1f67ab1cc..c0e5958e3 100755 --- a/src/Particles/Particles.h +++ b/src/Particles/Particles.h @@ -143,6 +143,8 @@ class Particles //! Insert nPart particles starting at ipart to dest_id in dest_parts void copyParticles( unsigned int iPart, unsigned int nPart, Particles &dest_parts, int dest_id ); + //! Transfer particles indexed by array indices to dest_id in dest_parts + void copyParticles( std::vector indices, Particles &dest_parts, int dest_id ); //! Make a new particle at the position of another void makeParticleAt( Particles &source_particles, unsigned int ipart, double w, short q=0., double px=0., double py=0., double pz=0. ); @@ -151,6 +153,8 @@ class Particles void eraseParticle( unsigned int iPart, bool compute_cell_keys = false ); //! Suppress nPart particles from iPart void eraseParticle( unsigned int iPart, unsigned int nPart, bool compute_cell_keys = false ); + //! Suppress indexed particles + void eraseParticles( std::vector indices ); //! Suppress all particles from iPart to the end of particle array void eraseParticleTrail( unsigned int iPart, bool compute_cell_keys = false ); @@ -431,7 +435,7 @@ class Particles virtual void initializeDataOnDevice(); virtual void initializeIDsOnDevice(); virtual void copyFromHostToDevice(); - virtual void copyFromDeviceToHost(); + virtual void copyFromDeviceToHost( bool copy_keys = false ); //! Return the pointer toward the Position[idim] vector virtual double* getPtrPosition( int idim ) { @@ -469,10 +473,10 @@ class Particles // Accelerator specific virtual functions // ----------------------------------------------------------------------------- - //! Extract particles from the Particles object and put - //! them in the Particles object `particles_to_move` + //! Extract particles leaving the box to buffers // ----------------------------------------------------------------------------- - virtual void extractParticles( Particles *particles_to_move ); + void copyLeavingParticlesToBuffers( const std::vector copy, const std::vector buffer ); + virtual void copyLeavingParticlesToBuffer( Particles* buffer ); // ----------------------------------------------------------------------------- //! Erase particles leaving the patch object on device @@ -480,7 +484,7 @@ class Particles virtual int eraseLeavingParticles(); // ----------------------------------------------------------------------------- - //! Inject particles from particles_to_move object and put + //! Inject particles from particles_to_inject object and put //! them in the Particles object //! \param[in,out] particles_to_inject Particles object containing particles to inject virtual int injectParticles( Particles *particles_to_inject ); diff --git a/src/Particles/ParticlesFactory.cpp b/src/Particles/ParticlesFactory.cpp index 00f51bbb0..34e9a3a83 100755 --- a/src/Particles/ParticlesFactory.cpp +++ b/src/Particles/ParticlesFactory.cpp @@ -7,7 +7,7 @@ // ----------------------------------------------------------------------------- #include "ParticlesFactory.h" -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) extern "C" void* CreateGPUParticles( const void* parameters, const void* a_parent_patch ); #endif @@ -22,7 +22,7 @@ Particles* ParticlesFactory::create( const Params& parameters, // We export a C interface to avoid potential ABI problems // that could occur when using two different compilers (e.g., one to // compile cuda/hip and another one for the host code). -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) particles = static_cast( CreateGPUParticles( ¶meters, &a_parent_patch ) ); #else SMILEI_UNUSED( a_parent_patch ); diff --git a/src/Particles/nvidiaParticles.cu b/src/Particles/nvidiaParticles.cu index d7a63f0b3..af45bfadd 100644 --- a/src/Particles/nvidiaParticles.cu +++ b/src/Particles/nvidiaParticles.cu @@ -33,14 +33,33 @@ // Cell key manipulation functor definition //////////////////////////////////////////////////////////////////////////////// -//! Structure with specific function count_if_out for thrust::tuple operator -//! Return True if the entry is -1 as in the cell keys vector for instance -struct count_if_out +//! Predicate for cell_keys +//! Return True if the entry is equal to `code` +template +struct cellKeyEquals { constexpr __host__ __device__ bool operator()( const int& x ) const { - return x == -1; + return x == code; + } +}; + +struct cellKeyNegative +{ + constexpr __host__ __device__ bool + operator()( const int& x ) const + { + return x < 0; + } +}; + +struct cellKeyBelowMinus1 +{ + constexpr __host__ __device__ bool + operator()( const int& x ) const + { + return x < -1; } }; @@ -250,7 +269,7 @@ namespace detail { }; - //! This functor assign a cluster key to a_particle. + //! This functor checks the cluster key of a_particle. //! template struct OutOfClusterPredicate @@ -286,7 +305,7 @@ namespace detail { __host__ __device__ bool operator()( const Tuple& a_particle ) const { - return thrust::get<0>( a_particle ) /* cluster key */ == -1; + return thrust::get<0>( a_particle ) /* cluster key */ < 0; } }; @@ -467,34 +486,34 @@ namespace detail { // - compute bins // NOTE: This method consumes a lot of memory ! O(N) - const auto new_particle_to_inject_count = particle_to_inject.deviceSize(); - const auto current_local_particles_count = std::distance( first_particle, last_particle ); - const auto new_particle_count = new_particle_to_inject_count + current_local_particles_count; + const auto initial_count = std::distance( first_particle, last_particle ); + const auto inject_count = particle_to_inject.deviceSize(); + const auto new_count = initial_count + inject_count; // NOTE: We really want a non-initializing vector here! // It's possible to give a custom allocator to thrust::device_vector. // Create one with construct(<>) as a noop and derive from // thrust::device_malloc_allocator. For now we do an explicit resize. - particle_to_inject.softReserve( new_particle_count ); - particle_to_inject.resize( new_particle_count ); // We probably invalidated the iterators + particle_to_inject.softReserve( new_count ); + particle_to_inject.resize( new_count ); // We probably invalidated the iterators // Copy out of cluster/tile/chunk particles // partition_copy is way slower than copy_if/remove_copy_if on rocthrust // https://github.com/ROCmSoftwarePlatform/rocThrust/issues/247 - const auto first_particle_to_inject = particle_iterator_provider( particle_to_inject ); + const auto first_to_inject = particle_iterator_provider( particle_to_inject ); + const auto first_to_reorder = first_to_inject + inject_count; // NOTE: copy_if/remove_copy_if are stable. - const auto partitioned_particles_bounds_true = thrust::copy_if( thrust::device, + // First, copy particles that are not in their own cluster anymore + const auto first_already_ordered = thrust::copy_if( thrust::device, first_particle, last_particle, - // Dont overwrite the particle_to_inject (at the start of the array) - first_particle_to_inject + new_particle_to_inject_count, + first_to_reorder, OutOfClusterPredicate{ cluster_type } ); - const auto partitioned_particles_bounds_false = thrust::remove_copy_if( thrust::device, + // Then, copy particles that are still in their own cluster + const auto end = thrust::remove_copy_if( thrust::device, first_particle, last_particle, - // Do the copy with a destination - // starting from partitioned_particles_bounds_true - partitioned_particles_bounds_true, + first_already_ordered, OutOfClusterPredicate{ cluster_type } ); // Compute or recompute the cluster index of the particle_to_inject @@ -502,23 +521,23 @@ namespace detail { // - we can "save" some work here if cluster index is already computed // for the new particles to inject (not the one we got with copy_if). // - doComputeParticleClusterKey( first_particle_to_inject, - partitioned_particles_bounds_true, + doComputeParticleClusterKey( first_to_inject, + first_already_ordered, cluster_type ); - const auto first_particle_to_inject_no_key = particle_no_key_iterator_provider( particle_to_inject ); - const auto particle_to_rekey_count = std::distance( first_particle_to_inject, - partitioned_particles_bounds_true ); + const auto first_to_inject_no_key = particle_no_key_iterator_provider( particle_to_inject ); + const auto particle_to_rekey_count = std::distance( first_to_inject, + first_already_ordered ); doSortParticleByKey( particle_to_inject.getPtrCellKeys(), particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, - first_particle_to_inject_no_key ); + first_to_inject_no_key ); // This free generates a lot of memory fragmentation. // particle_container.free(); // Same as for particle_to_inject, non-initializing vector is best. - particle_container.softReserve( new_particle_count ); - particle_container.resize( new_particle_count ); + particle_container.softReserve( new_count ); + particle_container.resize( new_count ); // Merge by key // NOTE: Dont merge in place on GPU. That means we need an other large buffer! @@ -527,9 +546,9 @@ namespace detail { particle_to_inject.getPtrCellKeys(), // Input range 1, first key particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 1, last key particle_to_inject.getPtrCellKeys() + particle_to_rekey_count, // Input range 2, first key - particle_to_inject.getPtrCellKeys() + new_particle_count, // Input range 2, last key - first_particle_to_inject_no_key, // Input range 1, first value - first_particle_to_inject_no_key + particle_to_rekey_count, // Input range 2, first value + particle_to_inject.getPtrCellKeys() + new_count, // Input range 2, last key + first_to_inject_no_key, // Input range 1, first value + first_to_inject_no_key + particle_to_rekey_count, // Input range 2, first value particle_container.getPtrCellKeys(), // Output range first key particle_no_key_iterator_provider( particle_container ) ); // Output range first value @@ -1329,7 +1348,7 @@ void nvidiaParticles::copyFromHostToDevice() // ------------------------------------------------------------------------------------------------- //! Copy device to host // ------------------------------------------------------------------------------------------------- -void nvidiaParticles::copyFromDeviceToHost() +void nvidiaParticles::copyFromDeviceToHost( bool copy_keys ) { for (int idim=0;idimcopyFromDeviceToHost( true ); +} + + +//! Copy particles which statisfy some predicate +template +void nvidiaParticles::copyParticlesByPredicate( Particles* buffer, Predicate pred ) { // TODO(Etienne M): We are doing extra work. We could use something like - // std::partition to output the invalidated particles in particles_to_move + // std::partition to output the invalidated particles in buffer // and keep the good ones. This would help us avoid the std::remove_if in // the particle injection and sorting algorithm. - - // Manage the send data structure - nvidiaParticles* const cp_parts = static_cast( particles_to_move ); - const int nparts = gpu_nparts_; - const int position_dimension_count = nvidia_position_.size(); - - const int nparts_to_move = thrust::count_if( thrust::device, + + const int nparts = gpu_nparts_; + // Iterator of the main data structure + // NOTE: https://nvidia.github.io/thrust/api/classes/classthrust_1_1zip__iterator.html#class-thrustzip_iterator + const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(), + nvidia_momentum_[0].begin(), + nvidia_momentum_[1].begin(), + nvidia_momentum_[2].begin(), + nvidia_weight_.begin(), + nvidia_charge_.begin(), + nvidia_cell_keys_.begin() ) ); + const auto source_iterator_last = source_iterator_first + nparts; // std::advance + + nvidiaParticles* const cp_parts = static_cast( buffer ); + + const int nparts_to_copy = thrust::count_if( thrust::device, nvidia_cell_keys_.cbegin(), nvidia_cell_keys_.cbegin() + nparts, - count_if_out() ); + pred ); // Resize it, if too small (copy_if do not resize) - cp_parts->resize( nparts_to_move ); + cp_parts->resize( nparts_to_copy ); - // Iterator of the main data structure - // NOTE: https://nvidia.github.io/thrust/api/classes/classthrust_1_1zip__iterator.html#class-thrustzip_iterator - const auto source_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(), - nvidia_momentum_[0].begin(), - nvidia_momentum_[1].begin(), - nvidia_momentum_[2].begin(), - nvidia_weight_.begin(), - nvidia_charge_.begin() ) ); - const auto source_iterator_last = source_iterator_first + nparts; // std::advance const auto destination_iterator_first = thrust::make_zip_iterator( thrust::make_tuple( cp_parts->nvidia_position_[0].begin(), cp_parts->nvidia_momentum_[0].begin(), cp_parts->nvidia_momentum_[1].begin(), cp_parts->nvidia_momentum_[2].begin(), cp_parts->nvidia_weight_.begin(), - cp_parts->nvidia_charge_.begin() ) ); + cp_parts->nvidia_charge_.begin(), + cp_parts->nvidia_cell_keys_.begin() ) ); - // Copy send particles in dedicated data structure if nvidia_cell_keys_=0 (currently = 1 if keeped, new PartBoundCond::apply(...)) + // Copy send particles in dedicated data structure thrust::copy_if( thrust::device, source_iterator_first, source_iterator_last, - // Copy depending on count_if_out()(nvidia_cell_keys_[i]) nvidia_cell_keys_.cbegin(), destination_iterator_first, - count_if_out() ); + pred ); - // Copy the other position values depending on the simulation's grid - // dimensions - for( int i = 1; i < position_dimension_count; ++i ) { + // Copy the other position values depending on the simulation's grid dimensions + const int ndim_particles = nvidia_position_.size(); + for( int i = 1; i < ndim_particles; ++i ) { thrust::copy_if( thrust::device, nvidia_position_[i].cbegin(), nvidia_position_[i].cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_position_[i].begin(), - count_if_out() ); + pred ); } // Special treatment for chi if radiation emission @@ -1431,7 +1462,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) nvidia_chi_.cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_chi_.begin(), - count_if_out() ); + pred ); } if( has_Monte_Carlo_process ) { @@ -1440,7 +1471,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) nvidia_tau_.cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_tau_.begin(), - count_if_out() ); + pred ); } if( tracked ) { @@ -1449,10 +1480,9 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) nvidia_id_.cbegin() + nparts, nvidia_cell_keys_.cbegin(), cp_parts->nvidia_id_.begin(), - count_if_out() ); + pred ); } - particles_to_move->copyFromDeviceToHost(); } @@ -1475,7 +1505,7 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) // std::begin( nvidia_position_[i] ), // std::begin( nvidia_position_[i] ) + nparts, // std::cbegin( nvidia_cell_keys_ ), -// count_if_out() ); +// cellKeyEquals<-1>() ); // } // //} @@ -1484,14 +1514,19 @@ void nvidiaParticles::extractParticles( Particles* particles_to_move ) //! Erase particles leaving the patch object on device // ----------------------------------------------------------------------------- int nvidiaParticles::eraseLeavingParticles() +{ + return eraseParticlesByPredicate( cellKeyNegative() ); +} + +template +int nvidiaParticles::eraseParticlesByPredicate( Predicate pred ) { const int position_dimension_count = nvidia_position_.size(); const int nparts = gpu_nparts_; const int nparts_to_remove = thrust::count_if( thrust::device, nvidia_cell_keys_.begin(), nvidia_cell_keys_.begin() + nparts, - count_if_out() ); - + pred ); if( nparts_to_remove > 0 ) { const auto first_particle = thrust::make_zip_iterator( thrust::make_tuple( nvidia_position_[0].begin(), @@ -1508,7 +1543,7 @@ int nvidiaParticles::eraseLeavingParticles() first_particle, last_particle, nvidia_cell_keys_.cbegin(), - count_if_out() ); + pred ); // Remove the other position values depending on the simulation's grid // dimensions @@ -1517,7 +1552,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_position_[i].begin(), nvidia_position_[i].begin() + nparts, nvidia_cell_keys_.cbegin(), - count_if_out() ); + pred ); } if( has_quantum_parameter ) { @@ -1525,7 +1560,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_chi_.begin(), nvidia_chi_.begin() + nparts, nvidia_cell_keys_.cbegin(), - count_if_out() ); + pred ); } if( has_Monte_Carlo_process ) { @@ -1533,7 +1568,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_tau_.begin(), nvidia_tau_.begin() + nparts, nvidia_cell_keys_.cbegin(), - count_if_out() ); + pred ); } if( tracked ) { @@ -1541,7 +1576,7 @@ int nvidiaParticles::eraseLeavingParticles() nvidia_id_.begin(), nvidia_id_.begin() + nparts, nvidia_cell_keys_.cbegin(), - count_if_out() ); + pred ); } // Update current number of particles @@ -1679,8 +1714,7 @@ void nvidiaParticles::importAndSortParticles( Particles* particles_to_inject ) int nvidiaParticles::prepareBinIndex() { if( first_index.size() == 0 ) { - // Some Particles object like particles_to_move do not have allocated - // bins, we skip theses. + // Some Particles object do not have allocated bins, we skip theses. return -1; } diff --git a/src/Particles/nvidiaParticles.h b/src/Particles/nvidiaParticles.h index 249a9fcf2..5fa0a933b 100644 --- a/src/Particles/nvidiaParticles.h +++ b/src/Particles/nvidiaParticles.h @@ -78,7 +78,7 @@ class nvidiaParticles : public Particles void copyFromHostToDevice() override; //! Update the particles from device to host - void copyFromDeviceToHost() override; + void copyFromDeviceToHost( bool copy_keys = false ) override; unsigned int deviceCapacity() const override; @@ -113,18 +113,23 @@ class nvidiaParticles : public Particles }; // ----------------------------------------------------------------------------- - //! Extract particles from the Particles object and put - //! them in the Particles object `particles_to_move` + //! Move leaving particles to the buffers // ----------------------------------------------------------------------------- - void extractParticles( Particles* particles_to_move ) override; + void copyLeavingParticlesToBuffer( Particles* buffer ) override; + + template + void copyParticlesByPredicate( Particles* buffer, Predicate pred ); // ----------------------------------------------------------------------------- //! Erase particles leaving the patch object on device and returns the number of particle removed // ----------------------------------------------------------------------------- int eraseLeavingParticles() override; + template + int eraseParticlesByPredicate( Predicate pred ); + // ----------------------------------------------------------------------------- - //! Inject particles from particles_to_move into *this and return he number of particle added + //! Inject particles from particles_to_inject into *this and return the number of particle added // ----------------------------------------------------------------------------- int injectParticles( Particles* particles_to_inject ) override; diff --git a/src/Patch/Patch.cpp b/src/Patch/Patch.cpp index b8ed401d9..ca76c6ece 100755 --- a/src/Patch/Patch.cpp +++ b/src/Patch/Patch.cpp @@ -445,7 +445,7 @@ void Patch::setLocationAndAllocateFields( Params ¶ms, DomainDecomposition *d Patch::~Patch() { -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU deleteFieldsOnDevice(); #endif @@ -517,220 +517,155 @@ void Patch::updateMPIenv( SmileiMPI *smpi ) // --------------------------------------------------------------------------------------------------------------------- void Patch::cleanMPIBuffers( int ispec, Params ¶ms ) { - int ndim = params.nDim_field; + size_t ndim = params.nDim_field; + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; - for( int iDim=0 ; iDim < ndim ; iDim++ ) { + for( size_t iDim=0 ; iDim < ndim ; iDim++ ) { for( int iNeighbor=0 ; iNeighborMPI_buffer_.partRecv[iDim][iNeighbor].clear();//resize(0,ndim); - vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor].clear();//resize(0,ndim); - vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].clear(); - //vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].resize(0); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0; + buffer.partRecv[iDim][iNeighbor]->clear(); + buffer.partSend[iDim][iNeighbor]->clear(); } } } // cleanMPIBuffers // --------------------------------------------------------------------------------------------------------------------- -// Split particles Id to send in per direction and per patch neighbor dedicated buffers -// Apply periodicity if necessary +// Copy particles to be exchanged to buffers // --------------------------------------------------------------------------------------------------------------------- -void Patch::initExchParticles( int ispec, Params ¶ms ) +void Patch::copyExchParticlesToBuffers( int ispec, Params ¶ms ) { - Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move ); - int ndim = params.nDim_field; - int idim, check; -// double xmax[3]; - - for( int iDim=0 ; iDim < ndim ; iDim++ ) { - for( int iNeighbor=0 ; iNeighborMPI_buffer_.partRecv[iDim][iNeighbor].clear();//resize(0,ndim); - vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor].clear();//resize(0,ndim); - vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor].resize( 0 ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0; - } + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + Particles &part = *vecSpecies[ispec]->particles; + + cleanMPIBuffers( ispec, params ); + + // Make a list of buffers + vector copy( params.nDim_field*2, false ); + vector sendBuffer( params.nDim_field*2, nullptr ); + for( size_t iDim = 0; iDim < params.nDim_field; iDim++ ) { + copy[2*iDim+0] = neighbor_[iDim][0] != MPI_PROC_NULL; + copy[2*iDim+1] = neighbor_[iDim][1] != MPI_PROC_NULL; + sendBuffer[2*iDim+0] = buffer.partSend[iDim][0]; + sendBuffer[2*iDim+1] = buffer.partSend[iDim][1]; } - - int n_part_send = cuParticles.size(); - - int iPart; - - // Define where particles are going - //Put particles in the send buffer it belongs to. Priority to lower dimensions. - if( params.geometry != "AMcylindrical" ) { - for( int i=0 ; iMPI_buffer_.part_index_send[idim][0].push_back( iPart ); - } - //If particle is outside of the global domain (has no neighbor), it will not be put in a send buffer and will simply be deleted. - check = 1; - } else if( cuParticles.position( idim, iPart ) >= max_local_[idim] ) { - if( neighbor_[idim][1]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][1].push_back( iPart ); - } - check = 1; - } - idim++; - } - } - } else { //if (geometry == "AMcylindrical") - double r_min2, r_max2; - r_max2 = max_local_[1] * max_local_[1] ; - r_min2 = min_local_[1] * min_local_[1] ; - for( int i=0 ; iboundary_conditions_[0][0]!="periodic" ) ) { - continue; - } - vecSpecies[ispec]->MPI_buffer_.part_index_send[0][0].push_back( iPart ); - //MESSAGE("Sending particle to the left x= " << cuParticles.position(0,iPart) << " xmin = " << min_local_[0] ); - } - //If particle is outside of the global domain (has no neighbor), it will not be put in a send buffer and will simply be deleted. - } else if( cuParticles.position( 0, iPart ) >= max_local_[0] ) { - if ( (Pcoordinates[0]==params.number_of_patches[0]-1) && ( vecSpecies[ispec]->boundary_conditions_[0][1]!="periodic" ) ) { - continue; - } - if( neighbor_[0][1]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send[0][1].push_back( iPart ); - // MESSAGE("Sending particle to the right x= " << cuParticles.position(0,iPart) << " xmax = " << max_local_[0] ); - } - } else if( cuParticles.distance2ToAxis( iPart ) < r_min2 ) { - if( neighbor_[1][0]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send[1][0].push_back( iPart ); - //MESSAGE("Sending particle to the south r= " << cuParticles.distance2ToAxis(iPart) << " rmin2 = " << r_min2 ); - } - } else if( cuParticles.distance2ToAxis( iPart ) >= r_max2 ) { - if( neighbor_[1][1]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send[1][1].push_back( iPart ); - //MESSAGE("Sending particle to the north r= " << cuParticles.distance2ToAxis(iPart) << " rmax2 = " << r_max2 << " rmin2= " << r_min2 ); - } - } - - } + if( params.geometry == "AMcylindrical" ) { + copy[0] = copy[0] && ( Pcoordinates[0]!=0 || vecSpecies[ispec]->boundary_conditions_[0][0]=="periodic" ); + copy[1] = copy[1] && ( Pcoordinates[0]!=params.number_of_patches[0]-1 || vecSpecies[ispec]->boundary_conditions_[0][1]=="periodic" ); } - -} // initExchParticles(... iDim) + + part.copyLeavingParticlesToBuffers( copy, sendBuffer ); + +} // copyExchParticlesToBuffers(... iDim) // --------------------------------------------------------------------------------------------------------------------- -// For direction iDim, start exchange of number of particles -// - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles) -// - smpi : inhereted from previous SmileiMPI::exchangeParticles() +// Exchange number of particles to exchange to establish or not a communication // --------------------------------------------------------------------------------------------------------------------- void Patch::exchNbrOfParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, VectorPatch *vecPatch ) { - int h0 = ( *vecPatch )( 0 )->hindex; - /********************************************************************************/ - // Exchange number of particles to exchange to establish or not a communication - /********************************************************************************/ + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + for( int iNeighbor=0 ; iNeighborsize(); + + // Send number of particles from neighbor if( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) { - vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor] = ( vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor] ).size(); - if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { - //If neighbour is MPI ==> I send him the number of particles I'll send later. int local_hindex = hindex - vecPatch->refHindex_; int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); - MPI_Isend( &( vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor] ), 1, MPI_INT, MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ) ); + MPI_Isend( &buffer.partSendSize[iDim][iNeighbor], 1, MPI_INT, MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &buffer.srequest[iDim][iNeighbor] ); } else { - //Else, I directly set the receive size to the correct value. - ( *vecPatch )( neighbor_[iDim][iNeighbor]- h0 )->vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2] = vecSpecies[ispec]->MPI_buffer_.part_index_send_sz[iDim][iNeighbor]; + // If the destination is in the same MPI, directly set the number at destination + int destination_hindex = neighbor_[iDim][iNeighbor] - vecPatch->refHindex_; + SpeciesMPIbuffers &destination_buffer = ( *vecPatch )( destination_hindex )->vecSpecies[ispec]->MPI_buffer_; + destination_buffer.partRecvSize[iDim][iOppositeNeighbor] = buffer.partSendSize[iDim][iNeighbor]; } - } // END of Send - - if( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) { - if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) { - //If other neighbour is MPI ==> I receive the number of particles I'll receive later. - int local_hindex = neighbor_[iDim][( iNeighbor+1 )%2] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][( iNeighbor+1 )%2] ]; + } + + // Receive number of particles from neighbor + if( neighbor_[iDim][iOppositeNeighbor]!=MPI_PROC_NULL ) { + if( is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) { + int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ]; int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); - MPI_Irecv( &( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2] ), 1, MPI_INT, MPI_neighbor_[iDim][( iNeighbor+1 )%2], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ) ); + MPI_Irecv( &buffer.partRecvSize[iDim][iOppositeNeighbor], 1, MPI_INT, MPI_neighbor_[iDim][iOppositeNeighbor], tag, MPI_COMM_WORLD, &buffer.rrequest[iDim][iOppositeNeighbor] ); } } - }//end loop on nb_neighbors. - + + } + } // exchNbrOfParticles(... iDim) +// --------------------------------------------------------------------------------------------------------------------- +// Wait for end of communications over number of particles +// --------------------------------------------------------------------------------------------------------------------- void Patch::endNbrOfParticles( int ispec, int iDim ) { - Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move ); - - /********************************************************************************/ - // Wait for end of communications over number of particles - /********************************************************************************/ + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + for( int iNeighbor=0 ; iNeighborMPI_buffer_.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) ); - } + int iOppositeNeighbor = ( iNeighbor+1 )%2; + + MPI_Status sstat[2]; + MPI_Status rstat[2]; + if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { + MPI_Wait( &( buffer.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) ); } - if( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) { - if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) { - MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ), &( rstat[( iNeighbor+1 )%2] ) ); - if( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]!=0 ) { - //If I receive particles over MPI, I initialize my receive buffer with the appropriate size. - vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2].initialize( vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2], cuParticles ); - } - } + if( is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) { + MPI_Wait( &( buffer.rrequest[iDim][iOppositeNeighbor] ), &( rstat[iOppositeNeighbor] ) ); } } - } // END endNbrOfParticles(... iDim) // --------------------------------------------------------------------------------------------------------------------- -// For direction iDim, finalize receive of number of particles and really send particles +// For direction iDim, prepare particles to be sent // - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles) // - smpi : used smpi->periods_ // --------------------------------------------------------------------------------------------------------------------- void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iDim, VectorPatch *vecPatch ) { - Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move ); - - int n_part_send; - int h0 = ( *vecPatch )( 0 )->hindex; double x_max = params.cell_length[iDim]*( params.global_size_[iDim] ); - + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + for( int iNeighbor=0 ; iNeighborMPI_buffer_.part_index_send[iDim][iNeighbor] ).size(); - if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) { - // Enabled periodicity - if( smpi->periods_[iDim]==1 ) { - for( int iPart=0 ; iPartMPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) < 0. ) ) { - cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) += x_max; - } else if( ( iNeighbor==1 ) && ( Pcoordinates[iDim] == params.number_of_patches[iDim]-1 ) && ( cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) >= x_max ) ) { - cuParticles.position( iDim, vecSpecies[ispec]->MPI_buffer_.part_index_send[iDim][iNeighbor][iPart] ) -= x_max; + + Particles &partSend = *buffer.partSend[iDim][iNeighbor]; + + // Enabled periodicity + if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL ) { + if( partSend.size() > 0 && smpi->periods_[iDim]==1 ) { + if( iNeighbor == 0 && Pcoordinates[iDim] == 0 ) { + for( size_t iPart=0; iPart < partSend.size(); iPart++ ) { + if( partSend.position( iDim, iPart ) < 0. ) { + partSend.position( iDim, iPart ) += x_max; + } + } + } + if( iNeighbor == 1 && Pcoordinates[iDim] == params.number_of_patches[iDim]-1 ) { + for( size_t iPart=0; iPart < partSend.size(); iPart++ ) { + if( partSend.position( iDim, iPart ) >= x_max ) { + partSend.position( iDim, iPart ) -= x_max; + } } } } - // Send particles + + // Initialize receive buffer with the appropriate size if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { - // If MPI comm, first copy particles in the sendbuffer - for( int iPart=0 ; iPartMPI_buffer_.part_index_send[iDim][iNeighbor][iPart], vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ); + if( buffer.partRecvSize[iDim][iNeighbor]!=0 ) { + buffer.partRecv[iDim][iNeighbor]->initialize( buffer.partRecvSize[iDim][iNeighbor], *vecSpecies[ispec]->particles ); } + // Swap particles to other patch directly if it belongs to the same MPI } else { - //If not MPI comm, copy particles directly in the receive buffer - for( int iPart=0 ; iPartMPI_buffer_.part_index_send[iDim][iNeighbor][iPart], ( ( *vecPatch )( neighbor_[iDim][iNeighbor]- h0 )->vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ) ); - } + int iOppositeNeighbor = ( iNeighbor+1 )%2; + SpeciesMPIbuffers &neighbor_buffer = ( *vecPatch )( neighbor_[iDim][iNeighbor]- vecPatch->refHindex_ )->vecSpecies[ispec]->MPI_buffer_; + swap( buffer.partSend[iDim][iNeighbor], neighbor_buffer.partRecv[iDim][iOppositeNeighbor] ); } - } // END of Send - + } + } // END for iNeighbor } // END prepareParticles(... iDim) @@ -738,169 +673,133 @@ void Patch::prepareParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iD void Patch::exchParticles( SmileiMPI *smpi, int ispec, Params &, int iDim, VectorPatch *vecPatch ) { - int n_part_send, n_part_recv; - - for( int iNeighbor=0 ; iNeighborMPI_buffer_.part_index_send[iDim][iNeighbor] ).size(); - if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) { - // Send particles - if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { - // Then send particles - int local_hindex = hindex - vecPatch->refHindex_; - int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); - vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &( vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ) ); - MPI_Isend( &( ( vecSpecies[ispec]->MPI_buffer_.partSend[iDim][iNeighbor] ).position( 0, 0 ) ), 1, vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ) ); - } - } // END of Send - - n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]; - if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) { - if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) { - // If MPI comm, receive particles in the recv buffer previously initialized. - vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ) ); - int local_hindex = neighbor_[iDim][( iNeighbor+1 )%2] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][( iNeighbor+1 )%2] ]; - int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); - MPI_Irecv( &( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( 0, 0 ) ), 1, vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][( iNeighbor+1 )%2], tag, MPI_COMM_WORLD, &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ) ); - } - - } // END of Recv - - } // END for iNeighbor - + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + + for( int iNeighbor=0; iNeighborrefHindex_; + int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); + vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partSend ); + MPI_Isend( &partSend.position( 0, 0 ), 1, vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iNeighbor], tag, MPI_COMM_WORLD, &( buffer.srequest[iDim][iNeighbor] ) ); + } + + // Receive + int iOppositeNeighbor = ( iNeighbor+1 )%2; + Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor]; + if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) { + vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] = smpi->createMPIparticles( &partRecv ); + int local_hindex = neighbor_[iDim][iOppositeNeighbor] - smpi->patch_refHindexes[ MPI_neighbor_[iDim][iOppositeNeighbor] ]; + int tag = buildtag( local_hindex, iDim+1, iNeighbor+3 ); + MPI_Irecv( &partRecv.position( 0, 0 ), 1, vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor], MPI_neighbor_[iDim][iOppositeNeighbor], tag, MPI_COMM_WORLD, &buffer.rrequest[iDim][iOppositeNeighbor] ); + } + + } + } // END exchParticles(... iDim) // --------------------------------------------------------------------------------------------------------------------- -// For direction iDim, finalize receive of particles, temporary store particles if diagonalParticles -// And store recv particles at their definitive place. -// Call Patch::cleanupSentParticles -// - vecPatch : used for intra-MPI process comm (direct copy using Particels::copyParticles) -// - smpi : used smpi->periods_ +// For direction iDim, wait receive of particles // --------------------------------------------------------------------------------------------------------------------- -void Patch::finalizeExchParticles( int ispec, int iDim ) +void Patch::waitExchParticles( int ispec, int iDim ) { - - int n_part_send, n_part_recv; - - /********************************************************************************/ - // Wait for end of communications over Particles - /********************************************************************************/ + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + for( int iNeighbor=0 ; iNeighborMPI_buffer_.part_index_send[iDim][iNeighbor].size(); - n_part_recv = vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]; - - if( ( neighbor_[iDim][iNeighbor]!=MPI_PROC_NULL ) && ( n_part_send!=0 ) ) { - if( is_a_MPI_neighbor( iDim, iNeighbor ) ) { - MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.srequest[iDim][iNeighbor] ), &( sstat[iNeighbor] ) ); - MPI_Type_free( &( vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] ) ); - } + + int iOppositeNeighbor = ( iNeighbor+1 )%2; + Particles &partSend = *buffer.partSend[iDim][iNeighbor]; + Particles &partRecv = *buffer.partRecv[iDim][iOppositeNeighbor]; + + if( partSend.size() != 0 && is_a_MPI_neighbor( iDim, iNeighbor ) ) { + MPI_Wait( &buffer.srequest[iDim][iNeighbor], &sstat[iNeighbor] ); + MPI_Type_free( &vecSpecies[ispec]->typePartSend[( iDim*2 )+iNeighbor] ); } - if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) { - if( is_a_MPI_neighbor( iDim, ( iNeighbor+1 )%2 ) ) { - MPI_Wait( &( vecSpecies[ispec]->MPI_buffer_.rrequest[iDim][( iNeighbor+1 )%2] ), &( rstat[( iNeighbor+1 )%2] ) ); - MPI_Type_free( &( vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] ) ); - } + if( partRecv.size() != 0 && is_a_MPI_neighbor( iDim, iOppositeNeighbor ) ) { + MPI_Wait( &buffer.rrequest[iDim][iOppositeNeighbor], &rstat[iOppositeNeighbor] ); + MPI_Type_free( &vecSpecies[ispec]->typePartRecv[( iDim*2 )+iNeighbor] ); } } } void Patch::cornersParticles( int ispec, Params ¶ms, int iDim ) { - int ndim = params.nDim_field; - int idim, check; - - Particles &cuParticles = ( *vecSpecies[ispec]->particles_to_move ); - - int n_part_recv; - - /********************************************************************************/ - // Wait for end of communications over Particles - /********************************************************************************/ + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + + // No need to treat diag particles at last dimension + if( iDim == ndim-1 ) { + return; + } + for( int iNeighbor=0 ; iNeighborMPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]; - - if( ( neighbor_[iDim][( iNeighbor+1 )%2]!=MPI_PROC_NULL ) && ( n_part_recv!=0 ) ) { - - // Treat diagonalParticles - if( iDim < ndim-1 ) { // No need to treat diag particles at last dimension. - if( params.geometry != "AMcylindrical" ) { - for( int iPart=n_part_recv-1 ; iPart>=0; iPart-- ) { - check = 0; - idim = iDim+1;//We check next dimension - while( check == 0 && idimMPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( idim, iPart ) < min_local_[idim] ) { - if( neighbor_[idim][0]!=MPI_PROC_NULL ) { //if neighbour exists - //... copy it at the back of the local particle vector ... - ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles ); - //...adjust particles->last_index or cell_keys ... - //vecSpecies[ispec]->addSpaceForOneParticle(); - //... and add its index to the particles to be sent later... - vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][0].push_back( cuParticles.size()-1 ); - } - //Remove it from receive buffer. - ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).eraseParticle( iPart ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]--; - check = 1; - } - //Other side of idim - else if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).position( idim, iPart ) >= max_local_[idim] ) { - if( neighbor_[idim][1]!=MPI_PROC_NULL ) { //if neighbour exists - ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles ); - //...adjust particles->last_index or cell_keys ... - //vecSpecies[ispec]->addSpaceForOneParticle(); - vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][1].push_back( cuParticles.size()-1 ); - } - ( vecSpecies[ispec]->MPI_buffer_.partRecv[iDim][( iNeighbor+1 )%2] ).eraseParticle( iPart ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[iDim][( iNeighbor+1 )%2]--; - check = 1; - } - idim++; + + Particles &partRecv = *buffer.partRecv[iDim][iNeighbor]; + + vector> indices_corner_min( ndim-iDim-1 ); + vector> indices_corner_max( ndim-iDim-1 ); + vector indices_all_corners; + + if( neighbor_[iDim][iNeighbor] != MPI_PROC_NULL && partRecv.size() != 0 ) { + + // Find corner particles and store their indices + if( params.geometry != "AMcylindrical" ) { + + for( size_t iPart = 0; iPart < partRecv.size(); iPart++ ) { + for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) { + if( partRecv.position( otherDim, iPart ) < min_local_[otherDim] ) { + indices_corner_min[otherDim-iDim-1].push_back( iPart ); + indices_all_corners.push_back( iPart ); + break; + } else if( partRecv.position( otherDim, iPart ) >= max_local_[otherDim] ) { + indices_corner_max[otherDim-iDim-1].push_back( iPart ); + indices_all_corners.push_back( iPart ); + break; } } - } else { //In AM geometry - //In this case, iDim = 0 and idim = iDim + 1 = 1. We only have to check potential comms along R. - double r_min2, r_max2; - r_min2 = min_local_[1]*min_local_[1]; - r_max2 = max_local_[1]*max_local_[1]; - for( int iPart=n_part_recv-1 ; iPart>=0; iPart-- ) { - //MESSAGE("test particle diag r2 = " << (vecSpecies[ispec]->MPI_buffer_.partRecv[0][(iNeighbor+1)%2]).distance2ToAxis(iPart) << "rmin2 = " << r_min2 << " rmax2 = " << r_max2 ); - if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).distance2ToAxis( iPart ) < r_min2 ) { - if( neighbor_[1][0]!=MPI_PROC_NULL ) { //if neighbour exists - //... copy it at the back of the local particle vector ... - ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles ); - //...adjust particles->last_index or cell_keys ... - //vecSpecies[ispec]->addSpaceForOneParticle(); - //... and add its index to the particles to be sent later... - vecSpecies[ispec]->MPI_buffer_.part_index_send[1][0].push_back( cuParticles.size()-1 ); - //..without forgeting to add it to the list of particles to clean. - } - //Remove it from receive buffer. - ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).eraseParticle( iPart ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[0][( iNeighbor+1 )%2]--; - } - //Other side of idim - else if( ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).distance2ToAxis( iPart ) >= r_max2 ) { - if( neighbor_[1][1]!=MPI_PROC_NULL ) { //if neighbour exists - //MESSAGE("particle diag +R"); - ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).copyParticle( iPart, cuParticles ); - //...adjust particles->last_index or cell_keys ... - //vecSpecies[ispec]->addSpaceForOneParticle(); - vecSpecies[ispec]->MPI_buffer_.part_index_send[1][1].push_back( cuParticles.size()-1 ); - } - ( vecSpecies[ispec]->MPI_buffer_.partRecv[0][( iNeighbor+1 )%2] ).eraseParticle( iPart ); - vecSpecies[ispec]->MPI_buffer_.part_index_recv_sz[0][( iNeighbor+1 )%2]--; - } + } + + } else { //In AM geometry + + //In this case, iDim = 0 and idim = iDim + 1 = 1. We only have to check potential comms along R. + double r_min2 = min_local_[1]*min_local_[1]; + double r_max2 = max_local_[1]*max_local_[1]; + + for( size_t iPart = 0; iPart < partRecv.size(); iPart++ ) { + if( partRecv.distance2ToAxis( iPart ) < r_min2 ) { + indices_corner_min[0].push_back( iPart ); + indices_all_corners.push_back( iPart ); + break; + } else if( partRecv.distance2ToAxis( iPart ) >= r_max2 ) { + indices_corner_max[0].push_back( iPart ); + indices_all_corners.push_back( iPart ); + break; } } - }//If not last dim for diagonal particles. + + } + + // Copy corner particles to the end of the particles to be sent for the following dimension + for( size_t otherDim = iDim+1; otherDim < (size_t) ndim; otherDim++ ) { + if( indices_corner_min[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][0] != MPI_PROC_NULL ) { + partRecv.copyParticles( indices_corner_min[otherDim-iDim-1], *buffer.partSend[otherDim][0], buffer.partSend[otherDim][0]->size() ); + } + if( indices_corner_max[otherDim-iDim-1].size() > 0 && neighbor_[otherDim][1] != MPI_PROC_NULL ) { + partRecv.copyParticles( indices_corner_max[otherDim-iDim-1], *buffer.partSend[otherDim][1], buffer.partSend[otherDim][1]->size() ); + } + } + + // Erase corner particles from the current recv array + if( indices_all_corners.size() > 0 ) { + partRecv.eraseParticles( indices_all_corners ); + } + } //If received something } //loop i Neighbor } @@ -925,22 +824,20 @@ void Patch::importAndSortParticles( int ispec, Params ¶ms ) void Patch::cleanParticlesOverhead( Params ¶ms ) { - int ndim = params.nDim_field; + for( unsigned int ispec=0 ; ispecparticles ); - - for( int idim = 0; idim < ndim; idim++ ) { + SpeciesMPIbuffers &buffer = vecSpecies[ispec]->MPI_buffer_; + + for( size_t idim = 0; idim < params.nDim_field; idim++ ) { for( int iNeighbor=0 ; iNeighborMPI_buffer_.partRecv[idim][iNeighbor].clear(); - vecSpecies[ispec]->MPI_buffer_.partRecv[idim][iNeighbor].shrinkToFit( ); - vecSpecies[ispec]->MPI_buffer_.partSend[idim][iNeighbor].clear(); - vecSpecies[ispec]->MPI_buffer_.partSend[idim][iNeighbor].shrinkToFit( ); - vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor].clear(); - vector( vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor] ).swap( vecSpecies[ispec]->MPI_buffer_.part_index_send[idim][iNeighbor] ); + buffer.partRecv[idim][iNeighbor]->clear(); + buffer.partRecv[idim][iNeighbor]->shrinkToFit( ); + buffer.partSend[idim][iNeighbor]->clear(); + buffer.partSend[idim][iNeighbor]->shrinkToFit( ); } } - - cuParticles.shrinkToFit( ); + + vecSpecies[ispec]->particles->shrinkToFit( ); } } @@ -1256,7 +1153,7 @@ void Patch::computePoynting() { } } -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU // --------------------------------------------------------------------------------------------------------------------- // Allocate data on device @@ -1414,7 +1311,6 @@ void Patch::deleteFieldsOnDevice() // for( unsigned int ispec=0 ; ispec<( *this )( ipatch )->vecSpecies.size() ; ispec++ ) { // Species *spec = species( ipatch, ispec ); // spec->particles->initializeDataOnDevice(); -// spec->particles_to_move->initializeDataOnDevice(); // //#pragma acc enter data copyin(spec->nrj_radiation) // } diff --git a/src/Patch/Patch.h b/src/Patch/Patch.h index 6fc3f7578..8d06d21c2 100755 --- a/src/Patch/Patch.h +++ b/src/Patch/Patch.h @@ -174,7 +174,7 @@ class Patch //! Clean the MPI buffers for communications void cleanMPIBuffers( int ispec, Params ¶ms ); //! manage Idx of particles per direction, - void initExchParticles( int ispec, Params ¶ms ); + void copyExchParticlesToBuffers( int ispec, Params ¶ms ); //! init comm nbr of particles void exchNbrOfParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iDim, VectorPatch *vecPatch ); //! finalize comm / nbr of particles, init exch / particles @@ -184,7 +184,7 @@ class Patch //! effective exchange of particles void exchParticles( SmileiMPI *smpi, int ispec, Params ¶ms, int iDim, VectorPatch *vecPatch ); //! finalize exch / particles - void finalizeExchParticles( int ispec, int iDim ); + void waitExchParticles( int ispec, int iDim ); //! Treat diagonalParticles void cornersParticles( int ispec, Params ¶ms, int iDim ); //! inject particles received in main data structure and particles sorting @@ -194,7 +194,7 @@ class Patch //! delete Particles included in the index of particles to exchange. Assumes indexes are sorted. void cleanupSentParticles( int ispec, std::vector *indexes_of_particles_to_exchange ); -#ifdef SMILEI_ACCELERATOR_MODE +#ifdef SMILEI_ACCELERATOR_GPU //! Allocate and copy all the field grids on device void allocateAndCopyFieldsOnDevice(); diff --git a/src/Patch/SyncVectorPatch.cpp b/src/Patch/SyncVectorPatch.cpp index 09817b201..7f2cd183e 100755 --- a/src/Patch/SyncVectorPatch.cpp +++ b/src/Patch/SyncVectorPatch.cpp @@ -2,7 +2,7 @@ #include "SyncVectorPatch.h" #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif #include "Params.h" @@ -24,26 +24,15 @@ template void SyncVectorPatch::exchangeAlongAllDirections,cField template void SyncVectorPatch::exchangeAlongAllDirectionsNoOMP( std::vector fields, VectorPatch &vecPatches, SmileiMPI *smpi ); template void SyncVectorPatch::exchangeAlongAllDirectionsNoOMP,cField>( std::vector fields, VectorPatch &vecPatches, SmileiMPI *smpi ); -void SyncVectorPatch::exchangeParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ) +void SyncVectorPatch::initExchParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ) { #pragma omp for schedule(runtime) for( unsigned int ipatch=0 ; ipatchextractParticles(); - vecPatches( ipatch )->initExchParticles( ispec, params ); - } - - // Init comm in direction 0 -#ifndef _NO_MPI_TM - #pragma omp for schedule(runtime) -#else - #pragma omp single -#endif - for( unsigned int ipatch=0 ; ipatchexchNbrOfParticles( smpi, ispec, params, 0, &vecPatches ); + vecPatches( ipatch )->copyExchParticlesToBuffers( ispec, params ); } + + // Start exchange along dimension 0 only + SyncVectorPatch::initExchParticlesAlongDimension( vecPatches, ispec, 0, params, smpi ); } // --------------------------------------------------------------------------------------------------------------------- @@ -52,24 +41,17 @@ void SyncVectorPatch::exchangeParticles( VectorPatch &vecPatches, int ispec, Par //! - the importation of the new particles in the particle property arrays //! - the sorting of particles // --------------------------------------------------------------------------------------------------------------------- -void SyncVectorPatch::finalizeAndSortParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ) +void SyncVectorPatch::finalizeExchParticlesAndSort( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ) { - SyncVectorPatch::finalizeExchangeParticles( vecPatches, ispec, 0, params, smpi ); - - // Per direction + // finish exchange along dimension 0 only + SyncVectorPatch::finalizeExchParticlesAlongDimension( vecPatches, ispec, 0, params, smpi ); + + // Other directions for( unsigned int iDim=1 ; iDimexchNbrOfParticles( smpi, ispec, params, iDim, &vecPatches ); - } - - SyncVectorPatch::finalizeExchangeParticles( vecPatches, ispec, iDim, params, smpi ); + SyncVectorPatch::initExchParticlesAlongDimension( vecPatches, ispec, iDim, params, smpi ); + SyncVectorPatch::finalizeExchParticlesAlongDimension( vecPatches, ispec, iDim, params, smpi ); } - + #pragma omp for schedule(runtime) for( unsigned int ipatch=0 ; ipatchimportAndSortParticles( ispec, params ); @@ -108,8 +90,20 @@ void SyncVectorPatch::finalizeAndSortParticles( VectorPatch &vecPatches, int isp } +void SyncVectorPatch::initExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ) +{ + // Exchange numbers of particles in direction 0 only +#ifndef _NO_MPI_TM + #pragma omp for schedule(runtime) +#else + #pragma omp single +#endif + for( unsigned int ipatch=0 ; ipatchexchNbrOfParticles( smpi, ispec, params, iDim, &vecPatches ); + } +} -void SyncVectorPatch::finalizeExchangeParticles( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ) +void SyncVectorPatch::finalizeExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ) { #ifndef _NO_MPI_TM #pragma omp for schedule(runtime) @@ -140,7 +134,7 @@ void SyncVectorPatch::finalizeExchangeParticles( VectorPatch &vecPatches, int is #pragma omp single #endif for( unsigned int ipatch=0 ; ipatchfinalizeExchParticles( ispec, iDim ); + vecPatches( ipatch )->waitExchParticles( ispec, iDim ); } #pragma omp for schedule(runtime) @@ -275,7 +269,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches.densitiesMPIx[ifield ]->extract_fields_sum( 0, iNeighbor, oversize[0] ); vecPatches.densitiesMPIx[ifield+nPatchMPIx ]->extract_fields_sum( 0, iNeighbor, oversize[0] ); vecPatches.densitiesMPIx[ifield+2*nPatchMPIx]->extract_fields_sum( 0, iNeighbor, oversize[0] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIx[ifield ]; // double* Jx = field->sendFields_[iNeighbor]->data_; // int sizeofJx = field->sendFields_[iNeighbor]->size(); @@ -297,7 +291,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc // iDim = 0, local const int nFieldLocalx = vecPatches.densitiesLocalx.size() / 3; -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) // At initialization, we may get a CPU buffer than needs to be handled on the host. const bool is_memory_on_device = vecPatches.densitiesLocalx.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocalx[0]->data() ); @@ -330,9 +324,9 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc pt2 = &( vecPatches.densitiesLocalx[ifield]->data_[0] ); //Sum 2 ==> 1 - const int last = gsp[0] * ny_ * nz_; + const unsigned int last = gsp[0] * ny_ * nz_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = vecPatches.densitiesLocalx[ifield]->size(); int nspace0 = size[0]; #pragma acc parallel if ( is_memory_on_device) present(pt1[0-nspace0*ny_*nz_:ptsize],pt2[0:ptsize]) @@ -364,7 +358,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIx[ifield+2*nPatchMPIx], 0 ); // Jz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, ( iNeighbor+1 )%2 ) ) { -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIx[ifield ]; // double* Jx = field->recvFields_[(iNeighbor+1)%2]->data_; // int sizeofJx = field->recvFields_[(iNeighbor+1)%2]->size(); @@ -408,7 +402,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches.densitiesMPIy[ifield ]->extract_fields_sum( 1, iNeighbor, oversize[1] ); vecPatches.densitiesMPIy[ifield+nPatchMPIy ]->extract_fields_sum( 1, iNeighbor, oversize[1] ); vecPatches.densitiesMPIy[ifield+2*nPatchMPIy]->extract_fields_sum( 1, iNeighbor, oversize[1] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIy[ifield ]; // double* Jx = field->sendFields_[iNeighbor+2]->data_; // int sizeofJx = field->sendFields_[iNeighbor+2]->size(); @@ -430,7 +424,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc // iDim = 1, const int nFieldLocaly = vecPatches.densitiesLocaly.size() / 3; -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) const bool is_memory_on_device = vecPatches.densitiesLocaly.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocaly[0]->data() ); #endif @@ -463,11 +457,11 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc pt1 = &( fields[vecPatches( ipatch )->neighbor_[1][0]-h0+icomp*nPatches]->data_[size[1]*nz_] ); pt2 = &( vecPatches.densitiesLocaly[ifield]->data_[0] ); - const int outer_last = nx_ * ny_ * nz_; - const int outer_stride = ny_ * nz_; - const int inner_last = gsp[1] * nz_; + const unsigned int outer_last = nx_ * ny_ * nz_; + const unsigned int outer_stride = ny_ * nz_; + const unsigned int inner_last = gsp[1] * nz_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = vecPatches.densitiesLocaly[ifield]->size(); int blabla = size[1]; #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla*nz_:ptsize],pt2[0:ptsize]) @@ -502,7 +496,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIy[ifield+2*nPatchMPIy], 1 ); // Jz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, ( iNeighbor+1 )%2 ) ) { -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIy[ifield ]; // double* Jx = field->recvFields_[(iNeighbor+1)%2+2]->data_; // int sizeofJx = field->recvFields_[(iNeighbor+1)%2+2]->size(); @@ -544,7 +538,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches.densitiesMPIz[ifield ]->extract_fields_sum( 2, iNeighbor, oversize[2] ); vecPatches.densitiesMPIz[ifield+nPatchMPIz ]->extract_fields_sum( 2, iNeighbor, oversize[2] ); vecPatches.densitiesMPIz[ifield+2*nPatchMPIz]->extract_fields_sum( 2, iNeighbor, oversize[2] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIz[ifield ]; // double* Jx = field->sendFields_[iNeighbor+4]->data_; // int sizeofJx = field->sendFields_[iNeighbor+4]->size(); @@ -566,7 +560,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc // iDim = 2 local const int nFieldLocalz = vecPatches.densitiesLocalz.size() / 3; -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) const bool is_memory_on_device = vecPatches.densitiesLocalz.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( vecPatches.densitiesLocalz[0]->data() ); #endif @@ -600,11 +594,11 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc pt1 = &( fields[vecPatches( ipatch )->neighbor_[2][0]-h0+icomp*nPatches]->data_[size[2]] ); pt2 = &( vecPatches.densitiesLocalz[ifield]->data_[0] ); - const int outer_last = nx_ * ny_ * nz_; - const int outer_stride = nz_; - const int inner_last = gsp[2]; + const unsigned int outer_last = nx_ * ny_ * nz_; + const unsigned int outer_stride = nz_; + const unsigned int inner_last = gsp[2]; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = vecPatches.densitiesLocalz[ifield]->size(); int blabla = size[2]; #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla:ptsize],pt2[0:ptsize]) @@ -636,7 +630,7 @@ void SyncVectorPatch::sumAllComponents( std::vector &fields, VectorPatc vecPatches( ipatch )->finalizeSumField( vecPatches.densitiesMPIz[ifield+2*nPatchMPIz], 2 ); // Jz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, ( iNeighbor+1 )%2 ) ) { -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // Field* field = vecPatches.densitiesMPIz[ifield ]; // double* Jx = field->recvFields_[(iNeighbor+1)%2+4]->data_; // int sizeofJx = field->recvFields_[(iNeighbor+1)%2+4]->size(); @@ -803,7 +797,7 @@ void SyncVectorPatch::exchangeE( Params &, VectorPatch &vecPatches, int imode, S SyncVectorPatch::finalizeExchangeAlongAllDirections( vecPatches.listEt_[imode], vecPatches ); } -void SyncVectorPatch::exchangeBmBTIS3( Params ¶ms, VectorPatch &vecPatches, int imode, SmileiMPI *smpi ) +void SyncVectorPatch::exchangeBmBTIS3( Params &/*params*/, VectorPatch &vecPatches, int imode, SmileiMPI *smpi ) { SyncVectorPatch::exchangeAlongAllDirections,cField>( vecPatches.listBr_mBTIS3[imode], vecPatches, smpi ); SyncVectorPatch::finalizeExchangeAlongAllDirections( vecPatches.listBr_mBTIS3[imode], vecPatches ); @@ -887,7 +881,7 @@ void SyncVectorPatch::exchangeEnvEx( Params ¶ms, VectorPatch &vecPatches, Sm } } -void SyncVectorPatch::exchangeBmBTIS3( Params ¶ms, VectorPatch &vecPatches, SmileiMPI *smpi ) +void SyncVectorPatch::exchangeBmBTIS3( Params &/*params*/, VectorPatch &vecPatches, SmileiMPI *smpi ) { // exchange BmBTIS3 in Cartesian geometries // exchange ByBTIS3 @@ -1493,7 +1487,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongX( std::vector &fields, vecPatches.B_MPIx[ifield ]->extract_fields_exch( 0, iNeighbor, oversize ); vecPatches.B_MPIx[ifield+nMPIx]->create_sub_fields ( 0, iNeighbor, oversize ); vecPatches.B_MPIx[ifield+nMPIx]->extract_fields_exch( 0, iNeighbor, oversize ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B_MPIx[ifield ]; double* By = field->sendFields_[iNeighbor]->data_; int sizeofBy = field->sendFields_[iNeighbor]->size(); @@ -1586,7 +1580,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongX( VectorPatch &vecPatch vecPatches( ipatch )->finalizeExchange( vecPatches.B_MPIx[ifield+nMPIx], 0 ); // Bz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, ( iNeighbor+1 )%2 ) ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B_MPIx[ifield ]; double* By = field->recvFields_[(iNeighbor+1)%2]->data_; int sizeofBy = field->recvFields_[(iNeighbor+1)%2]->size(); @@ -1629,7 +1623,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongY( std::vector &fields, vecPatches.B1_MPIy[ifield ]->extract_fields_exch( 1, iNeighbor, oversize ); vecPatches.B1_MPIy[ifield+nMPIy]->create_sub_fields ( 1, iNeighbor, oversize ); vecPatches.B1_MPIy[ifield+nMPIy]->extract_fields_exch( 1, iNeighbor, oversize ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B1_MPIy[ifield ]; double* Bx = field->sendFields_[iNeighbor+2]->data_; int sizeofBx = field->sendFields_[iNeighbor+2]->size(); @@ -1677,7 +1671,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongY( std::vector &fields, if( vecPatches( ipatch )->MPI_me_ == vecPatches( ipatch )->MPI_neighbor_[1][0] ) { pt1 = &( fields[vecPatches( ipatch )->neighbor_[1][0]-h0+icomp*nPatches]->data_[size*nz_] ); pt2 = &( vecPatches.B1_localy[ifield]->data_[0] ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC int ptsize = vecPatches.B1_localy[ifield]->size(); #pragma acc parallel present(pt1[0-size*nz_:ptsize],pt2[0:ptsize]) #pragma acc loop gang worker vector @@ -1717,7 +1711,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongY( VectorPatch &vecPatch vecPatches( ipatch )->finalizeExchange( vecPatches.B1_MPIy[ifield+nMPIy], 1 ); // Bz for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, ( iNeighbor+1 )%2 ) ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B1_MPIy[ifield ]; double* Bx = field->recvFields_[(iNeighbor+1)%2+2]->data_; int sizeofBx = field->recvFields_[(iNeighbor+1)%2+2]->size(); @@ -1760,7 +1754,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongZ( std::vector fields, vecPatches.B2_MPIz[ifield ]->extract_fields_exch( 2, iNeighbor, oversize ); vecPatches.B2_MPIz[ifield+nMPIz]->create_sub_fields ( 2, iNeighbor, oversize ); vecPatches.B2_MPIz[ifield+nMPIz]->extract_fields_exch( 2, iNeighbor, oversize ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B2_MPIz[ifield ]; double* Bx = field->sendFields_[iNeighbor+4]->data_; int sizeofBx = field->sendFields_[iNeighbor+4]->size(); @@ -1805,7 +1799,7 @@ void SyncVectorPatch::exchangeAllComponentsAlongZ( std::vector fields, if( vecPatches( ipatch )->MPI_me_ == vecPatches( ipatch )->MPI_neighbor_[2][0] ) { pt1 = &( fields[vecPatches( ipatch )->neighbor_[2][0]-h0+icomp*nPatches]->data_[size] ); pt2 = &( vecPatches.B2_localz[ifield]->data_[0] ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC int ptsize = vecPatches.B2_localz[ifield]->size(); #pragma acc parallel present(pt1[0-size:ptsize],pt2[0:ptsize]) #pragma acc loop gang worker vector @@ -1845,7 +1839,7 @@ void SyncVectorPatch::finalizeExchangeAllComponentsAlongZ( VectorPatch &vecPatch vecPatches( ipatch )->finalizeExchange( vecPatches.B2_MPIz[ifield+nMPIz], 2 ); // By for (int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++) { if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, ( iNeighbor+1 )%2 ) ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC Field* field = vecPatches.B2_MPIz[ifield ]; double* Bx = field->recvFields_[(iNeighbor+1)%2+4]->data_; int sizeofBx = field->recvFields_[(iNeighbor+1)%2+4]->size(); diff --git a/src/Patch/SyncVectorPatch.h b/src/Patch/SyncVectorPatch.h index 0ce868cae..07435cd49 100755 --- a/src/Patch/SyncVectorPatch.h +++ b/src/Patch/SyncVectorPatch.h @@ -17,9 +17,10 @@ class SyncVectorPatch public : //! Particles synchronization - static void exchangeParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ); - static void finalizeAndSortParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ); - static void finalizeExchangeParticles( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ); + static void initExchParticles( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ); + static void finalizeExchParticlesAndSort( VectorPatch &vecPatches, int ispec, Params ¶ms, SmileiMPI *smpi ); + static void initExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ); + static void finalizeExchParticlesAlongDimension( VectorPatch &vecPatches, int ispec, int iDim, Params ¶ms, SmileiMPI *smpi ); //! Densities synchronization static void sumRhoJ( Params ¶ms, VectorPatch &vecPatches, SmileiMPI *smpi ); @@ -72,7 +73,7 @@ public : if ( vecPatches( ipatch )->is_a_MPI_neighbor( 0, iNeighbor ) ) { fields[ifield]->create_sub_fields ( 0, iNeighbor, 2*oversize[0]+1+fields[ifield]->isDual_[0] ); fields[ifield]->extract_fields_sum( 0, iNeighbor, oversize[0] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // double * pointer = fields[ifield]->sendFields_[iNeighbor]->data_; // int size = fields[ifield]->size(); // #endif @@ -86,7 +87,7 @@ public : // iDim = 0, local -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) // At initialization, we may get a CPU buffer than needs to be handled on the host. const bool is_memory_on_device = fields.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() ); @@ -122,7 +123,7 @@ public : const unsigned int last = gsp[0] * ny_ * nz_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = fields[ifield]->size(); int nspace0 = size[0]; #pragma acc parallel if ( is_memory_on_device) present(pt1[0-nspace0*ny_*nz_:ptsize],pt2[0:ptsize]) @@ -176,7 +177,7 @@ public : if ( vecPatches( ipatch )->is_a_MPI_neighbor( 1, iNeighbor ) ) { fields[ifield]->create_sub_fields ( 1, iNeighbor, 2*oversize[1]+1+fields[ifield]->isDual_[1] ); fields[ifield]->extract_fields_sum( 1, iNeighbor, oversize[1] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // double* pointer = fields[ifield]->recvFields_[(iNeighbor+1)%2]->data_; // int size = fields[ifield]->recvFields_[(iNeighbor+1)%2]->size(); // //#pragma acc update device( Jx[0:sizeofJx], Jy[0:sizeofJy], Jz[0:sizeofJz] ) @@ -191,7 +192,7 @@ public : // iDim = 1, local -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) const bool is_memory_on_device = fields.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() ); #endif @@ -219,11 +220,11 @@ public : pt1 = &( *field1 )( size[1]*nz_ ); pt2 = &( *field2 )( 0 ); - const int outer_last = nx_ * ny_ * nz_; - const int outer_stride = ny_ * nz_; - const int inner_last = gsp[1] * nz_; + const unsigned int outer_last = nx_ * ny_ * nz_; + const unsigned int outer_stride = ny_ * nz_; + const unsigned int inner_last = gsp[1] * nz_; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = fields[ifield]->size(); int blabla = size[1]; #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla*nz_:ptsize],pt2[0:ptsize]) @@ -281,7 +282,7 @@ public : if ( vecPatches( ipatch )->is_a_MPI_neighbor( 2, iNeighbor ) ) { fields[ifield]->create_sub_fields ( 2, iNeighbor, 2*oversize[2]+1+fields[ifield]->isDual_[2] ); fields[ifield]->extract_fields_sum( 2, iNeighbor, oversize[2] ); -// #ifdef SMILEI_OPENACC_MODE +// #ifdef SMILEI_ACCELERATOR_GPU_OACC // double* pointer = fields[ifield]->recvFields_[(iNeighbor+1)%2+2]->data_; // int size = fields[ifield]->recvFields_[(iNeighbor+1)%2+2]->size(); // #endif @@ -292,7 +293,7 @@ public : // iDim = 2 local -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) const bool is_memory_on_device = fields.size() > 0 && smilei::tools::gpu::HostDeviceMemoryManagement::IsHostPointerMappedOnDevice( fields[0]->data() ); #endif @@ -320,11 +321,11 @@ public : pt1 = &( *field1 )( size[2] ); pt2 = &( *field2 )( 0 ); - const int outer_last = nx_ * ny_ * nz_; - const int outer_stride = nz_; - const int inner_last = gsp[2]; + const unsigned int outer_last = nx_ * ny_ * nz_; + const unsigned int outer_stride = nz_; + const unsigned int inner_last = gsp[2]; -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) int ptsize = fields[ifield]->size(); int blabla = size[2]; #pragma acc parallel if (is_memory_on_device) present(pt1[0-blabla:ptsize],pt2[0:ptsize]) diff --git a/src/Patch/VectorPatch.cpp b/src/Patch/VectorPatch.cpp index 8b239b905..42f4dd3d8 100755 --- a/src/Patch/VectorPatch.cpp +++ b/src/Patch/VectorPatch.cpp @@ -301,7 +301,7 @@ void VectorPatch::reconfiguration( Params ¶ms, Timers &timers, int itime ) // --------------------------------------------------------------------------------------------------------------------- void VectorPatch::initialParticleSorting( Params ¶ms ) { -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC) // Initially I wanted to control the GPU particle sorting/bin initialization // here. In the end it was put in initializeDataOnDevice which is more // meaningful. @@ -322,7 +322,7 @@ void VectorPatch::initialParticleSorting( Params ¶ms ) } // --------------------------------------------------------------------------------------------------------------------- -// For all patches, move particles (restartRhoJ(s), dynamics and exchangeParticles) +// For all patches, move particles (restartRhoJ(s), dynamics and initExchParticles) // --------------------------------------------------------------------------------------------------------------------- void VectorPatch::dynamics( Params ¶ms, SmileiMPI *smpi, @@ -402,7 +402,7 @@ void VectorPatch::dynamics( Params ¶ms, for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) { Species *spec = species( 0, ispec ); if ( (!params.Laser_Envelope_model) && (spec->isProj( time_dual, simWindow )) ){ - SyncVectorPatch::exchangeParticles( ( *this ), ispec, params, smpi ); // Included sortParticles + SyncVectorPatch::initExchParticles( ( *this ), ispec, params, smpi ); // Included sortParticles } // end condition on Species and on envelope model } // end loop on species //MESSAGE("exchange particles"); @@ -460,7 +460,7 @@ void VectorPatch::projectionForDiags( Params ¶ms, // --------------------------------------------------------------------------------------------------------------------- //! For all patches, exchange particles and sort them. // --------------------------------------------------------------------------------------------------------------------- -void VectorPatch::finalizeAndSortParticles( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, +void VectorPatch::finalizeExchParticlesAndSort( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, double time_dual, Timers &timers, int itime ) { timers.syncPart.restart(); @@ -471,7 +471,7 @@ void VectorPatch::finalizeAndSortParticles( Params ¶ms, SmileiMPI *smpi, Sim for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) { if( ( *this )( 0 )->vecSpecies[ispec]->isProj( time_dual, simWindow ) ) { - SyncVectorPatch::finalizeAndSortParticles( ( *this ), ispec, params, smpi ); // Included sortParticles + SyncVectorPatch::finalizeExchParticlesAndSort( ( *this ), ispec, params, smpi ); // Included sortParticles } } @@ -491,7 +491,7 @@ void VectorPatch::finalizeAndSortParticles( Params ¶ms, SmileiMPI *smpi, Sim timers.syncPart.update( params.printNow( itime ) ); -} // END finalizeAndSortParticles +} // END finalizeExchParticlesAndSort //! Perform the particles merging on all patches @@ -853,7 +853,7 @@ void VectorPatch::sumDensities( Params ¶ms, double time_dual, Timers &timers #pragma omp for schedule(static) for( unsigned int ipatch=0 ; ipatchsize() ; ipatch++ ) { // Per species in global, Attention if output -> Sync / per species fields -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) // At itime == 0, data is still located on the Host if (itime == 0) { ( *this )( ipatch )->EMfields->computeTotalRhoJ(); @@ -1269,7 +1269,7 @@ void VectorPatch::closeAllDiags( SmileiMPI *smpi ) // --------------------------------------------------------------------------------------------------------------------- void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int itime, Timers &timers, SimWindow *simWindow ) { -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) bool data_on_cpu_updated = false; #endif @@ -1277,7 +1277,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int timers.diags.restart(); // Determine which data is required from the device -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) bool need_particles = false; bool need_fields = false; @@ -1346,7 +1346,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int for( unsigned int idiag = 0 ; idiag < globalDiags.size() ; idiag++ ) { diag_timers_[idiag]->restart(); -// #if defined( SMILEI_ACCELERATOR_MODE) +// #if defined( SMILEI_ACCELERATOR_GPU) // if( globalDiags[idiag]->timeSelection->theTimeIsNow( itime ) && // !data_on_cpu_updated && // ( itime > 0 ) ) { @@ -1462,7 +1462,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int for( unsigned int idiag = 0 ; idiag < localDiags.size() ; idiag++ ) { diag_timers_[globalDiags.size()+idiag]->restart(); -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // if( localDiags[idiag]->timeSelection->theTimeIsNow( itime ) && // !data_on_cpu_updated && // ( itime > 0 ) ) { @@ -1496,7 +1496,7 @@ void VectorPatch::runAllDiags( Params &/*params*/, SmileiMPI *smpi, unsigned int for( unsigned int ipatch=0 ; ipatchEMfields->restartRhoJs(); -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) // Delete species current and rho grids from device for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) { ( *this )( ipatch )->vecSpecies[ispec]->Species::deleteSpeciesCurrentAndChargeOnDevice(ispec, ( *this )( ipatch )->EMfields); @@ -2973,7 +2973,7 @@ void VectorPatch::createPatches( Params ¶ms, SmileiMPI *smpi, SimWindow *sim // Set Index of the 1st patch of the vector yet on current MPI rank // Is this really necessary ? It should be done already ... - refHindex_ = ( *this )( 0 )->Hindex(); + setRefHindex(); // Current number of patch int nPatches_now = this->size() ; @@ -4402,7 +4402,7 @@ void VectorPatch::moveWindow( // Bring all particles and field grids to the Host (except species grids) // This part can be optimized by copying only the patch to be destructed -#if defined( SMILEI_ACCELERATOR_MODE) +#if defined( SMILEI_ACCELERATOR_GPU) if( simWindow->isMoving( time_dual ) || itime == simWindow->getAdditionalShiftsIteration() ) { copyParticlesFromDeviceToHost(); copyFieldsFromDeviceToHost(); @@ -4412,10 +4412,11 @@ void VectorPatch::moveWindow( simWindow->shift( (*this), smpi, params, itime, time_dual, region ); - if (itime == simWindow->getAdditionalShiftsIteration() ) { + if( itime == (int) simWindow->getAdditionalShiftsIteration() ) { int adjust = simWindow->isMoving(time_dual)?0:1; - for (unsigned int n=0;n < simWindow->getNumberOfAdditionalShifts()-adjust; n++) + for( unsigned int n=0; n < simWindow->getNumberOfAdditionalShifts()-adjust; n++ ) { simWindow->shift( (*this), smpi, params, itime, time_dual, region ); + } } // Copy all Fields and Particles to the device @@ -4423,7 +4424,7 @@ void VectorPatch::moveWindow( // let's try initialising like we do at the start: -/*#if defined( SMILEI_ACCELERATOR_MODE ) +/*#if defined( SMILEI_ACCELERATOR_GPU ) // Allocate particle and field arrays // Also copy particle array content on device vecPatches.allocateDataOnDevice( params, &smpi, @@ -4434,7 +4435,7 @@ void VectorPatch::moveWindow( #endif*/ // does not do anything? - /*#if defined( SMILEI_ACCELERATOR_MODE) + /*#if defined( SMILEI_ACCELERATOR_GPU) if( simWindow->isMoving( time_dual ) || itime == simWindow->getAdditionalShiftsIteration() ) { copyFieldsFromHostToDevice(); copyParticlesFromHostToDevice(); @@ -4588,7 +4589,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrents( Params ¶ms, timers.syncPart.restart(); for( unsigned int ispec=0 ; ispec<( *this )( 0 )->vecSpecies.size(); ispec++ ) { if( ( *this )( 0 )->vecSpecies[ispec]->isProj( time_dual, simWindow ) ) { - SyncVectorPatch::exchangeParticles( ( *this ), ispec, params, smpi ); // Included sortParticles + SyncVectorPatch::initExchParticles( ( *this ), ispec, params, smpi ); // Included sortParticles } // end condition on species } // end loop on species timers.syncPart.update( params.printNow( itime ) ); @@ -4609,91 +4610,26 @@ void VectorPatch::initNewEnvelope( Params & ) } // END initNewEnvelope +#if defined( SMILEI_ACCELERATOR_GPU ) void VectorPatch::allocateDataOnDevice(Params ¶ms, SmileiMPI *smpi, RadiationTables *radiation_tables, MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables) { - -#if defined( SMILEI_ACCELERATOR_MODE ) // TODO(Etienne M): FREE. If we have load balancing or other patch // creation/destruction available (which is not the case on GPU ATM), // we should be taking care of freeing this GPU memory. - const int npatches = this->size(); - - // const int sizeofJx = patches_[0]->EMfields->Jx_->size(); - // const int sizeofJy = patches_[0]->EMfields->Jy_->size(); - // const int sizeofJz = patches_[0]->EMfields->Jz_->size(); - // const int sizeofRho = patches_[0]->EMfields->rho_->size(); - - // const int sizeofEx = patches_[0]->EMfields->Ex_->size(); - // const int sizeofEy = patches_[0]->EMfields->Ey_->size(); - // const int sizeofEz = patches_[0]->EMfields->Ez_->size(); - - // const int sizeofBx = patches_[0]->EMfields->Bx_->size(); - // const int sizeofBy = patches_[0]->EMfields->By_->size(); - // const int sizeofBz = patches_[0]->EMfields->Bz_->size(); - - for( int ipatch=0 ; ipatchvecSpecies.size(); ispec++ ) { - Species *spec = species( ipatch, ispec ); - spec->particles->initializeDataOnDevice(); - spec->particles_to_move->initializeDataOnDevice(); - - // Create photon species on the device - if ( spec->radiation_model_ == "mc" && spec->photon_species_) { - spec->radiated_photons_->initializeDataOnDevice(); - } - - // Create pair species on the device - if ( spec->mBW_pair_species_[0] && spec->mBW_pair_species_[1]) { - spec->mBW_pair_particles_[0]->initializeDataOnDevice(); - spec->mBW_pair_particles_[1]->initializeDataOnDevice(); - } - - //#pragma acc enter data copyin(spec->nrj_radiation) + for( auto spec: patch->vecSpecies ) { + spec->allocateParticlesOnDevice(); } // Allocate field data structures on GPU - patches_[ipatch]->allocateFieldsOnDevice(); - - // const double *const Jx = patches_[ipatch]->EMfields->Jx_->data(); - // const double *const Jy = patches_[ipatch]->EMfields->Jy_->data(); - // const double *const Jz = patches_[ipatch]->EMfields->Jz_->data(); - // const double *const Rho = patches_[ipatch]->EMfields->rho_->data(); - - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jx, sizeofJx ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jy, sizeofJy ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Jz, sizeofJz ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Rho, sizeofRho ); - - // const double *const Ex = patches_[ipatch]->EMfields->Ex_->data(); - // const double *const Ey = patches_[ipatch]->EMfields->Ey_->data(); - // const double *const Ez = patches_[ipatch]->EMfields->Ez_->data(); + patch->allocateFieldsOnDevice(); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ex, sizeofEx ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ey, sizeofEy ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Ez, sizeofEz ); - - // const double *const Bmx = patches_[ipatch]->EMfields->Bx_m->data(); - // const double *const Bmy = patches_[ipatch]->EMfields->By_m->data(); - // const double *const Bmz = patches_[ipatch]->EMfields->Bz_m->data(); - - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmx, sizeofBx ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmy, sizeofBy ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocate( Bmz, sizeofBz ); - - // const double *const Bx = patches_[ipatch]->EMfields->Bx_->data(); - // const double *const By = patches_[ipatch]->EMfields->By_->data(); - // const double *const Bz = patches_[ipatch]->EMfields->Bz_->data(); - - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( Bx, sizeofBx ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( By, sizeofBy ); - // smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( Bz, sizeofBz ); - } // end patch loop // TODO(Etienne M): We should create a function that does the copy of the radiation table. @@ -4745,17 +4681,24 @@ void VectorPatch::allocateDataOnDevice(Params ¶ms, smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( min_particle_chi_table, min_particle_chi_size ); smilei::tools::gpu::HostDeviceMemoryManagement::DeviceAllocateAndCopyHostToDevice( xi_table, xi_table_size ); } +} #else +void VectorPatch::allocateDataOnDevice(Params &, + SmileiMPI *, + RadiationTables *, + MultiphotonBreitWheelerTables *) +{ ERROR( "GPU related code should not be reached in CPU mode!" ); -#endif } +#endif + //! Clean data allocated on device +#if defined( SMILEI_ACCELERATOR_GPU ) void VectorPatch::cleanDataOnDevice( Params ¶ms, SmileiMPI *smpi, RadiationTables *radiation_tables, MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables) { -#if defined( SMILEI_OPENACC_MODE ) || defined( SMILEI_ACCELERATOR_GPU_OMP ) const int npatches = this->size(); @@ -4865,12 +4808,17 @@ void VectorPatch::cleanDataOnDevice( Params ¶ms, SmileiMPI *smpi, smilei::tools::gpu::HostDeviceMemoryManagement::DeviceFree( xi_table, xi_table_size ); } +} #else +void VectorPatch::cleanDataOnDevice( Params &, SmileiMPI *, + RadiationTables *, + MultiphotonBreitWheelerTables *) +{ ERROR( "GPU related code should not be reached in CPU mode!" ); -#endif } +#endif -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) //! Field Synchronization from the GPU (Device) to the CPU //! This function updates the data on the host from the data located on the device @@ -4910,9 +4858,7 @@ void VectorPatch::copyFieldsFromHostToDevice() } } -#endif -#if defined( SMILEI_ACCELERATOR_MODE) //! Sync all fields from device to host void VectorPatch::copyFieldsFromDeviceToHost() @@ -4925,10 +4871,6 @@ VectorPatch::copyFieldsFromDeviceToHost() } } -#endif - - -#if defined( SMILEI_ACCELERATOR_MODE) //! Copy all species particles from Host to devices void VectorPatch::copyParticlesFromHostToDevice() @@ -4940,9 +4882,6 @@ void VectorPatch::copyParticlesFromHostToDevice() } } } -#endif - -#if defined( SMILEI_ACCELERATOR_MODE) //! copy all patch Particles from device to Host void @@ -4955,9 +4894,7 @@ VectorPatch::copyParticlesFromDeviceToHost() for( int ipatch = 0; ipatch < npatches; ipatch++ ) { for( unsigned int ispec = 0; ispec < ( *this )( ipatch )->vecSpecies.size(); ispec++ ) { species( ipatch, ispec )->particles->copyFromDeviceToHost(); -#if defined ( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_MODE ) species( ipatch, ispec )->particles->setHostBinIndex(); -#endif // std::cerr // << "ipatch: " << ipatch // << " ispec: " << ispec @@ -4970,9 +4907,6 @@ VectorPatch::copyParticlesFromDeviceToHost() } } -#endif - -#if defined( SMILEI_ACCELERATOR_MODE) //! Sync all fields from device to host void VectorPatch::copySpeciesFieldsFromDeviceToHost() @@ -5052,7 +4986,7 @@ void VectorPatch::dynamicsWithoutTasks( Params ¶ms, if( spec->isProj( time_dual, simWindow ) || diag_flag ) { -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) if (diag_flag) { spec->Species::prepareSpeciesCurrentAndChargeOnDevice( ispec, @@ -5364,7 +5298,7 @@ void VectorPatch::dynamicsWithTasks( Params ¶ms, Species *spec_task = species( ipatch, ispec ); for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) { for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPartparticles->last_index[scell]; iPart++ ) { - if ( spec_task->particles->cell_keys[iPart] != -1 ) { + if ( spec_task->particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. spec_task->count[spec_task->particles->cell_keys[iPart]] ++; } @@ -5380,7 +5314,7 @@ void VectorPatch::dynamicsWithTasks( Params ¶ms, Species *spec_task = species( ipatch, ispec ); for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) { for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPartparticles->last_index[scell]; iPart++ ) { - if ( spec_task->particles->cell_keys[iPart] != -1 ) { + if ( spec_task->particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. spec_task->count[spec_task->particles->cell_keys[iPart]] ++; } @@ -5600,7 +5534,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrentsWithTasks( Params ¶m Species *spec_task = species( ipatch, ispec ); for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) { for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPartparticles->last_index[scell]; iPart++ ) { - if ( spec_task->particles->cell_keys[iPart] != -1 ) { + if ( spec_task->particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. spec_task->count[spec_task->particles->cell_keys[iPart]] ++; } @@ -5618,7 +5552,7 @@ void VectorPatch::ponderomotiveUpdatePositionAndCurrentsWithTasks( Params ¶m Species *spec_task = species( ipatch, ispec ); for( unsigned int scell = 0 ; scell < spec_task->Ncells ; scell++ ) { for( unsigned int iPart=spec_task->particles->first_index[scell] ; ( int )iPartparticles->last_index[scell]; iPart++ ) { - if ( spec_task->particles->cell_keys[iPart] != -1 ) { + if ( spec_task->particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. spec_task->count[spec_task->particles->cell_keys[iPart]] ++; } diff --git a/src/Patch/VectorPatch.h b/src/Patch/VectorPatch.h index 01ec195c2..051d78276 100755 --- a/src/Patch/VectorPatch.h +++ b/src/Patch/VectorPatch.h @@ -138,7 +138,7 @@ public : //! Particle sorting for all patches. This is done at initialization time. void initialParticleSorting( Params ¶ms ); - //! For all patch, move particles (restartRhoJ(s), dynamics and exchangeParticles) + //! For all patch, move particles (restartRhoJ(s), dynamics and initExchParticles) void dynamics( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, @@ -157,7 +157,7 @@ public : Timers &timers, int itime ); //! For all patches, exchange particles and sort them. - void finalizeAndSortParticles( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, + void finalizeExchParticlesAndSort( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, double time_dual, Timers &timers, int itime ); void finalizeSyncAndBCFields( Params ¶ms, SmileiMPI *smpi, SimWindow *simWindow, @@ -510,7 +510,7 @@ public : RadiationTables * radiation_tables, MultiphotonBreitWheelerTables *multiphoton_Breit_Wheeler_tables ); -#if defined( SMILEI_ACCELERATOR_MODE) +#if defined( SMILEI_ACCELERATOR_GPU) //! Field Synchronization from the GPU (Device) to the host (CPU) diff --git a/src/Projector/Projector2D2OrderGPU.cpp b/src/Projector/Projector2D2OrderGPU.cpp index cfe20eb7d..c669cc209 100755 --- a/src/Projector/Projector2D2OrderGPU.cpp +++ b/src/Projector/Projector2D2OrderGPU.cpp @@ -26,7 +26,7 @@ Projector2D2OrderGPU::Projector2D2OrderGPU( Params ¶meters, Patch *a_patch ) dts2 = dt / 2.0; dts4 = dts2 / 2.0; -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_GPU_OACC ) // When sorting is disabled, these values are invalid (-1) and the HIP // implementation can't be used. x_dimension_bin_count_ = parameters.getGPUBinCount( 1 ); @@ -41,7 +41,7 @@ Projector2D2OrderGPU::~Projector2D2OrderGPU() // EMPTY } -#if defined( SMILEI_ACCELERATOR_MODE ) //SMILEI_ACCELERATOR_GPU_OMP ) +#if defined( SMILEI_ACCELERATOR_GPU ) //SMILEI_ACCELERATOR_GPU_OMP ) extern "C" void currentDepositionKernel2DOnDevice( double *__restrict__ Jx, @@ -109,6 +109,7 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy /// Project global current densities (EMfields->Jx_/Jy_/Jz_) /// /* inline */ void +#if defined( SMILEI_ACCELERATOR_GPU )//SMILEI_ACCELERATOR_GPU_OMP ) currents( double *__restrict__ Jx, double *__restrict__ Jy, double *__restrict__ Jz, @@ -132,7 +133,6 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy double, int not_spectral ) { -#if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP ) currentDepositionKernel2DOnDevice( Jx, Jy, Jz, @@ -159,15 +159,22 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy j_domain_begin, nprimy, not_spectral ); + } #else + currents( double *__restrict__ , double *__restrict__ , double *__restrict__ , int, int, int, + Particles &, unsigned int , unsigned int ,const double *__restrict__ , + const int *__restrict__ , const double *__restrict__ , double , double , double , + double , double , int , int , int , double, int ) + { SMILEI_ASSERT( false ); -#endif } +#endif /// Like currents(), project the particle current on the grid (Jx_/Jy_/Jz_) /// but also compute global current densities rho used for diagFields timestep /// /* inline */ void +#if defined( SMILEI_ACCELERATOR_GPU )//SMILEI_ACCELERATOR_GPU_OMP ) currentsAndDensity( double *__restrict__ Jx, double *__restrict__ Jy, double *__restrict__ Jz, @@ -193,7 +200,6 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy double, int not_spectral ) { -#if defined( SMILEI_ACCELERATOR_MODE )//SMILEI_ACCELERATOR_GPU_OMP ) currentAndDensityDepositionKernelOnDevice( Jx, Jy, Jz, @@ -222,10 +228,16 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy j_domain_begin, nprimy, not_spectral ); + } #else + currentsAndDensity( double *__restrict__ , double *__restrict__ , double *__restrict__ , double *__restrict__ , + int , int , int , int , Particles &, unsigned int , unsigned int , + const double *__restrict__ , const int *__restrict__ , const double *__restrict__ , + double , double , double , double , double , int , int , int , double, int ) + { SMILEI_ASSERT( false ); -#endif } +#endif } // namespace @@ -233,7 +245,7 @@ void Projector2D2OrderGPU::basic( double *rhoj, Particles &particles, unsigned int ipart, unsigned int type, - int bin_shift ) + int /*bin_shift*/ ) { // Warning : this function is used for frozen species only. It is assumed that position = position_old !!! @@ -306,12 +318,12 @@ void Projector2D2OrderGPU::basic( double *rhoj, } } -void Projector2D2OrderGPU::ionizationCurrents( Field *Jx, - Field *Jy, - Field *Jz, - Particles &particles, - int ipart, - LocalFields Jion ) +void Projector2D2OrderGPU::ionizationCurrents( Field */*Jx*/, + Field */*Jy*/, + Field */*Jz*/, + Particles &/*particles*/, + int /*ipart*/, + LocalFields /*Jion */) { ERROR( "Projector2D2OrderGPU::ionizationCurrents(): Not implemented !" ); } @@ -325,8 +337,8 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields, bool diag_flag, bool is_spectral, int ispec, - int icell, - int ipart_ref ) + int /*icell*/, + int /*ipart_ref */) { std::vector &iold = smpi->dynamics_iold[ithread]; std::vector &delta = smpi->dynamics_deltaold[ithread]; @@ -425,20 +437,20 @@ void Projector2D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields, } } -void Projector2D2OrderGPU::susceptibility( ElectroMagn *EMfields, - Particles &particles, - double species_mass, - SmileiMPI *smpi, - int istart, - int iend, - int ithread, - int icell, - int ipart_ref ) +void Projector2D2OrderGPU::susceptibility( ElectroMagn */*EMfields*/, + Particles &/*particles*/, + double /*species_mass*/, + SmileiMPI */*smpi*/, + int /*istart*/, + int /*iend*/, + int /*ithread*/, + int /*icell*/, + int /*ipart_ref */) { ERROR( "Projector2D2OrderGPU::susceptibility(): Not implemented !" ); } -//#if defined( SMILEI_ACCELERATOR_MODE ) +//#if defined( SMILEI_ACCELERATOR_GPU ) ////! Project global current densities (EMfields->Jx_/Jy_/Jz_) ////! //extern "C" void diff --git a/src/Projector/Projector2D2OrderGPU.h b/src/Projector/Projector2D2OrderGPU.h index 9a799f9b5..ecdd4959d 100755 --- a/src/Projector/Projector2D2OrderGPU.h +++ b/src/Projector/Projector2D2OrderGPU.h @@ -46,21 +46,21 @@ class Projector2D2OrderGPU : public Projector2D int ipart_ref = 0 ) override; //!Wrapper for task-based implementation of Smilei - void currentsAndDensityWrapperOnBuffers( double *b_Jx, - double *b_Jy, - double *b_Jz, - double *b_rho, - int bin_width, - Particles &particles, - SmileiMPI *smpi, - int istart, - int iend, - int ithread, - bool diag_flag, - bool is_spectral, - int ispec, - int icell = 0, - int ipart_ref = 0 ) override {}; + void currentsAndDensityWrapperOnBuffers( double * /*b_Jx*/, + double * /*b_Jy*/, + double * /*b_Jz*/, + double * /*b_rho*/, + int /*bin_width*/, + Particles &/*particles*/, + SmileiMPI */*smpi*/, + int /*istart*/, + int /*iend*/, + int /*ithread*/, + bool /*diag_flag*/, + bool /*is_spectral*/, + int /*ispec*/, + int /*icell*/ = 0, + int /*ipart_ref*/ = 0 ) override {}; /// Project susceptibility, used as source term in envelope equation /// diff --git a/src/Projector/Projector2D2OrderGPUKernel.cpp b/src/Projector/Projector2D2OrderGPUKernel.cpp index 8f38f52fe..e2ec56495 100644 --- a/src/Projector/Projector2D2OrderGPUKernel.cpp +++ b/src/Projector/Projector2D2OrderGPUKernel.cpp @@ -1,4 +1,4 @@ -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #include "Projector2D2OrderGPUKernelCUDAHIP.h" #include diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu index 666a409f4..55082b793 100644 --- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu +++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.cu @@ -81,7 +81,7 @@ // device_particle_charge /* [0:particle_count] */, \ // device_particle_weight /* [0:particle_count] */ ) // #pragma omp teams thread_limit( 64 ) distribute parallel for -// #elif defined( SMILEI_OPENACC_MODE ) +// #elif defined( SMILEI_ACCELERATOR_GPU_OACC ) // #pragma acc parallel \ // deviceptr( device_particle_position_x, \ // device_particle_position_y, \ @@ -264,7 +264,7 @@ // device_particle_charge /* [0:particle_count] */, \ // device_particle_weight /* [0:particle_count] */ ) // #pragma omp teams thread_limit( 64 ) distribute parallel for -// #elif defined( SMILEI_OPENACC_MODE ) +// #elif defined( SMILEI_ACCELERATOR_GPU_OACC ) // #pragma acc parallel \ // deviceptr( device_particle_position_x, \ // device_particle_position_y, \ diff --git a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h index d607a4ab4..a21f757db 100644 --- a/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h +++ b/src/Projector/Projector2D2OrderGPUKernelCUDAHIP.h @@ -4,7 +4,7 @@ #define Projector2D2OrderGPUKernelCUDAHIP_H -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #if defined( __HIP__ ) #include diff --git a/src/Projector/Projector3D2OrderGPU.cpp b/src/Projector/Projector3D2OrderGPU.cpp index 39342b204..62ec54141 100755 --- a/src/Projector/Projector3D2OrderGPU.cpp +++ b/src/Projector/Projector3D2OrderGPU.cpp @@ -30,13 +30,13 @@ Projector3D2OrderGPU::Projector3D2OrderGPU( Params ¶meters, Patch *a_patch ) dts2 = dt / 2.0; dts4 = dts2 / 2.0; -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined ( SMILEI_ACCELERATOR_GPU_OACC ) // When sorting is disabled, these values are invalid (-1) and the HIP // implementation can't be used. x_dimension_bin_count_ = parameters.getGPUBinCount( 1 ); y_dimension_bin_count_ = parameters.getGPUBinCount( 2 ); z_dimension_bin_count_ = parameters.getGPUBinCount( 3 ); -//#elif defined( SMILEI_OPENACC_MODE ) +//#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) // x_dimension_bin_count_ = 1; // y_dimension_bin_count_ = 1; // z_dimension_bin_count_ = 1; @@ -50,7 +50,7 @@ Projector3D2OrderGPU::~Projector3D2OrderGPU() // EMPTY } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) extern "C" void currentDeposition3DOnDevice( double *__restrict__ Jx, double *__restrict__ Jy, @@ -122,6 +122,8 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy /// Project global current densities (EMfields->Jx_/Jy_/Jz_) /// /* inline */ void + +#if defined( SMILEI_ACCELERATOR_GPU ) currents( double *__restrict__ Jx, double *__restrict__ Jy, double *__restrict__ Jz, @@ -150,72 +152,77 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy double, int not_spectral ) { -#if defined( SMILEI_ACCELERATOR_MODE ) currentDeposition3DOnDevice( Jx, - Jy, - Jz, - Jx_size, - Jy_size, - Jz_size, - particles.getPtrPosition( 0 ), - particles.getPtrPosition( 1 ), - particles.getPtrPosition( 2 ), - particles.getPtrCharge(), - particles.getPtrWeight(), - particles.last_index.data(), - x_dimension_bin_count, - y_dimension_bin_count, - z_dimension_bin_count, - invgf_, - iold_, - deltaold_, - particles.deviceSize(), - inv_cell_volume, - dx_inv, - dy_inv, - dz_inv, - dx_ov_dt, - dy_ov_dt, - dz_ov_dt, - i_domain_begin, - j_domain_begin, - k_domain_begin, - nprimy, nprimz, - not_spectral ); + Jy, + Jz, + Jx_size, + Jy_size, + Jz_size, + particles.getPtrPosition( 0 ), + particles.getPtrPosition( 1 ), + particles.getPtrPosition( 2 ), + particles.getPtrCharge(), + particles.getPtrWeight(), + particles.last_index.data(), + x_dimension_bin_count, + y_dimension_bin_count, + z_dimension_bin_count, + invgf_, + iold_, + deltaold_, + particles.deviceSize(), + inv_cell_volume, + dx_inv, + dy_inv, + dz_inv, + dx_ov_dt, + dy_ov_dt, + dz_ov_dt, + i_domain_begin, + j_domain_begin, + k_domain_begin, + nprimy, nprimz, + not_spectral ); + } #else + currents( double *__restrict__ , double *__restrict__ , double *__restrict__ , int, int, int, + Particles &, unsigned int , unsigned int , unsigned int , const double *__restrict__ , + const int *__restrict__ , const double *__restrict__ , double , double , double , double , + double , double , double , int , int , int , int , int , double, int ) + { SMILEI_ASSERT( false ); -#endif } +#endif //! Project density /* inline */ void +#if defined( SMILEI_ACCELERATOR_GPU ) density( - double *__restrict__ rho, - int rho_size, - Particles &particles, - unsigned int x_dimension_bin_count, - unsigned int y_dimension_bin_count, - unsigned int z_dimension_bin_count, - const double *__restrict__ invgf_, - const int *__restrict__ iold_, - const double *__restrict__ deltaold_, - double inv_cell_volume, - double dx_inv, - double dy_inv, - double dz_inv, - double dx_ov_dt, - double dy_ov_dt, - double dz_ov_dt, - int i_domain_begin, - int j_domain_begin, - int k_domain_begin, - int nprimy, - int nprimz, - double, - int not_spectral ) + double *__restrict__ rho, + int rho_size, + Particles &particles, + unsigned int x_dimension_bin_count, + unsigned int y_dimension_bin_count, + unsigned int z_dimension_bin_count, + const double *__restrict__ invgf_, + const int *__restrict__ iold_, + const double *__restrict__ deltaold_, + double inv_cell_volume, + double dx_inv, + double dy_inv, + double dz_inv, + double dx_ov_dt, + double dy_ov_dt, + double dz_ov_dt, + int i_domain_begin, + int j_domain_begin, + int k_domain_begin, + int nprimy, + int nprimz, + double, + int not_spectral ) { -#if defined( SMILEI_ACCELERATOR_MODE ) densityDeposition3DOnDevice( rho, rho_size, @@ -244,10 +251,16 @@ namespace { // Unnamed namespace == static == internal linkage == no exported sy k_domain_begin, nprimy, nprimz, not_spectral ); + } #else + density( double *__restrict__ , int , Particles &, unsigned int , unsigned int , unsigned int , + const double *__restrict__ , const int *__restrict__ , const double *__restrict__ , + double , double , double , double , double , double , double , + int, int, int, int, int, double, int ) + { SMILEI_ASSERT( false ); -#endif } +#endif } // namespace @@ -255,7 +268,7 @@ void Projector3D2OrderGPU::basic( double *rhoj, Particles &particles, unsigned int ipart, unsigned int type, - int bin_shift ) + int /*bin_shift*/ ) { @@ -347,12 +360,12 @@ void Projector3D2OrderGPU::basic( double *rhoj, } } -void Projector3D2OrderGPU::ionizationCurrents( Field *Jx, - Field *Jy, - Field *Jz, - Particles &particles, - int ipart, - LocalFields Jion ) +void Projector3D2OrderGPU::ionizationCurrents( Field */*Jx*/, + Field */*Jy*/, + Field */*Jz*/, + Particles &/*particles*/, + int /*ipart*/, + LocalFields /*Jion */) { ERROR( "Projector3D2OrderGPU::ionizationCurrents(): Not implemented !" ); } @@ -366,8 +379,8 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields, bool diag_flag, bool is_spectral, int ispec, - int icell, - int ipart_ref ) + int /*icell*/, + int /*ipart_ref*/ ) { if( is_spectral ) { @@ -463,15 +476,15 @@ void Projector3D2OrderGPU::currentsAndDensityWrapper( ElectroMagn *EMfields, //std::cerr << sum << " " << sum2 << " " << sum_Jxs << " " << sum_Jx << std::endl; } -void Projector3D2OrderGPU::susceptibility( ElectroMagn *EMfields, - Particles &particles, - double species_mass, - SmileiMPI *smpi, - int istart, - int iend, - int ithread, - int icell, - int ipart_ref ) +void Projector3D2OrderGPU::susceptibility( ElectroMagn */*EMfields*/, + Particles &/*particles*/, + double /*species_mass*/, + SmileiMPI */*smpi*/, + int /*istart*/, + int /*iend*/, + int /*ithread*/, + int /*icell*/, + int /*ipart_ref */) { ERROR( "Projector3D2OrderGPU::susceptibility(): Not implemented !" ); } diff --git a/src/Projector/Projector3D2OrderGPU.cpp.backup b/src/Projector/Projector3D2OrderGPU.cpp.backup index 39ce7a4a5..761e6ae31 100755 --- a/src/Projector/Projector3D2OrderGPU.cpp.backup +++ b/src/Projector/Projector3D2OrderGPU.cpp.backup @@ -2,7 +2,7 @@ #include #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #include #endif @@ -136,7 +136,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ Sx0 [0:kTmpArraySize], \ @@ -262,7 +262,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSx [0:kTmpArraySize], sumX [0:kTmpArraySize] ) // #pragma acc parallel deviceptr( DSx, sumX ) @@ -287,7 +287,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jx [0:Jx_size], \ Sy0 [0:kTmpArraySize], \ @@ -310,7 +310,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( const double crx_p = dx_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex0 = iold[ipart+0*packsize]*yz_size0+iold[ipart+1*packsize]*z_size0+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=0 ; k<5 ; k++ ) { @@ -326,7 +326,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp atomic update -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc atomic #endif Jx [ jdx ] += val; @@ -339,7 +339,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSy [0:kTmpArraySize], \ sumX [0:kTmpArraySize] ) @@ -365,7 +365,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jy [0:Jy_size], \ Sx0 [0:kTmpArraySize], \ @@ -388,7 +388,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( const double cry_p = dy_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex1 = iold[ipart+0*packsize]*yz_size1+iold[ipart+1*packsize]*z_size1+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=0 ; k<5 ; k++ ) { @@ -404,7 +404,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp atomic update -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc atomic #endif Jy [ jdx ] += val; @@ -417,7 +417,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSz [0:kTmpArraySize], \ sumX [0:kTmpArraySize] ) @@ -443,7 +443,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jz [0:Jz_size], \ Sx0 [0:kTmpArraySize], \ @@ -466,7 +466,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( const double crz_p = dz_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex2 = iold[ipart+0*packsize]*yz_size2+iold[ipart+1*packsize]*z_size2+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=1 ; k<5 ; k++ ) { @@ -482,7 +482,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp atomic update -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc atomic #endif Jz[ jdx ] += val; @@ -498,7 +498,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ rho [0:rho_size], \ Sx1 [0:kTmpArraySize], \ @@ -523,7 +523,7 @@ Projector3D2OrderGPU::currentsAndDensityGPU( int jdx = idx + k; #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp atomic update -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc atomic #endif rho[ jdx ] += charge_weight * Sx1[ipart_pack+i*packsize]*Sy1[ipart_pack+j*packsize]*Sz1[ipart_pack+k*packsize]; diff --git a/src/Projector/Projector3D2OrderGPU.h b/src/Projector/Projector3D2OrderGPU.h index 2fac2402e..c76bf48a1 100755 --- a/src/Projector/Projector3D2OrderGPU.h +++ b/src/Projector/Projector3D2OrderGPU.h @@ -46,21 +46,21 @@ class Projector3D2OrderGPU : public Projector3D int ipart_ref = 0 ) override; //!Wrapper for task-based implementation of Smilei - void currentsAndDensityWrapperOnBuffers( double *b_Jx, - double *b_Jy, - double *b_Jz, - double *b_rho, - int bin_width, - Particles &particles, - SmileiMPI *smpi, - int istart, - int iend, - int ithread, - bool diag_flag, - bool is_spectral, - int ispec, - int icell = 0, - int ipart_ref = 0 ) override {}; + void currentsAndDensityWrapperOnBuffers( double * /*b_Jx*/, + double * /*b_Jy*/, + double * /*b_Jz*/, + double * /*b_rho*/, + int /*bin_width*/, + Particles &/*particles*/, + SmileiMPI */*smpi*/, + int /*istart*/, + int /*iend*/, + int /*ithread*/, + bool /*diag_flag*/, + bool /*is_spectral*/, + int /*ispec*/, + int /*icell*/ = 0, + int /*ipart_ref*/ = 0 ) override {}; /// Project susceptibility, used as source term in envelope equation /// diff --git a/src/Projector/Projector3D2OrderGPUKernel.cpp b/src/Projector/Projector3D2OrderGPUKernel.cpp index f77a4fda3..5d9f88b5d 100644 --- a/src/Projector/Projector3D2OrderGPUKernel.cpp +++ b/src/Projector/Projector3D2OrderGPUKernel.cpp @@ -5,7 +5,7 @@ // issues (!). -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) //! Simple switch to jump between the reference (omp) implementation and the //! hip one. diff --git a/src/Projector/Projector3D2OrderGPUKernelAcc.h b/src/Projector/Projector3D2OrderGPUKernelAcc.h index 9cf3b224d..43bff1cce 100644 --- a/src/Projector/Projector3D2OrderGPUKernelAcc.h +++ b/src/Projector/Projector3D2OrderGPUKernelAcc.h @@ -1,6 +1,6 @@ //! Optimized Acc projection (from Julien Derouillat) -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #include #include "Tools.h" @@ -110,7 +110,7 @@ namespace acc { position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ Sx0 [0:kTmpArraySize], \ @@ -236,7 +236,7 @@ namespace acc { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSx [0:kTmpArraySize], sumX [0:kTmpArraySize] ) // #pragma acc parallel deviceptr( DSx, sumX ) @@ -261,7 +261,7 @@ namespace acc { charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jx [0:Jx_size], \ Sy0 [0:kTmpArraySize], \ @@ -284,7 +284,7 @@ namespace acc { const double crx_p = dx_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex0 = iold[ipart+0*packsize]*yz_size0+iold[ipart+1*packsize]*z_size0+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=0 ; k<5 ; k++ ) { @@ -309,7 +309,7 @@ namespace acc { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSy [0:kTmpArraySize], \ sumX [0:kTmpArraySize] ) @@ -335,7 +335,7 @@ namespace acc { charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jy [0:Jy_size], \ Sx0 [0:kTmpArraySize], \ @@ -358,7 +358,7 @@ namespace acc { const double cry_p = dy_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex1 = iold[ipart+0*packsize]*yz_size1+iold[ipart+1*packsize]*z_size1+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=0 ; k<5 ; k++ ) { @@ -383,7 +383,7 @@ namespace acc { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( DSz [0:kTmpArraySize], \ sumX [0:kTmpArraySize] ) @@ -409,7 +409,7 @@ namespace acc { charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ Jz [0:Jz_size], \ Sx0 [0:kTmpArraySize], \ @@ -432,7 +432,7 @@ namespace acc { const double crz_p = dz_ov_dt_inv_cell_volume * static_cast( charge[ipart] ) * weight[ipart]; const int linindex2 = iold[ipart+0*packsize]*yz_size2+iold[ipart+1*packsize]*z_size2+iold[ipart+2*packsize]; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc loop vector #endif for( int k=1 ; k<5 ; k++ ) { @@ -536,7 +536,7 @@ namespace acc { position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ Sx1 [0:kTmpArraySize], \ @@ -630,7 +630,7 @@ namespace acc { charge /* [istart_pack:current_pack_size] */, \ weight /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ rho [0:rho_size], \ Sx1 [0:kTmpArraySize], \ diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu index 195a02667..dd8d1e61d 100644 --- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu +++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.cu @@ -1,6 +1,6 @@ //! HIP CUDA implementation -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) //#include "Projector3D2OrderGPUKernelCUDAHIP.h" diff --git a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h index 94368f4dd..1b78b1252 100644 --- a/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h +++ b/src/Projector/Projector3D2OrderGPUKernelCUDAHIP.h @@ -4,7 +4,7 @@ #define Projector3D2OrderGPUKernelCUDAHIP_H -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #if defined( __HIP__ ) #include diff --git a/src/Projector/Projector3D2OrderGPUKernelNaive.h b/src/Projector/Projector3D2OrderGPUKernelNaive.h index b6cfac080..a261af40b 100644 --- a/src/Projector/Projector3D2OrderGPUKernelNaive.h +++ b/src/Projector/Projector3D2OrderGPUKernelNaive.h @@ -1,6 +1,6 @@ //! Naive ACC/OMP implementation -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) #include #include "Tools.h" @@ -66,7 +66,7 @@ namespace acc { position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ Jx[0:Jx_size], \ @@ -344,7 +344,7 @@ namespace acc { position_y /* [istart_pack:current_pack_size] */, \ position_z /* [istart_pack:current_pack_size] */ ) #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( iold [0:3 * nparts], \ deltaold [0:3 * nparts], \ rho[0:rho_size] \ diff --git a/src/Projector/ProjectorAM2OrderV.cpp b/src/Projector/ProjectorAM2OrderV.cpp index b222aa4ee..890d37332 100755 --- a/src/Projector/ProjectorAM2OrderV.cpp +++ b/src/Projector/ProjectorAM2OrderV.cpp @@ -673,10 +673,6 @@ void ProjectorAM2OrderV::susceptibility( ElectroMagn *EMfields, Particles &parti double charge_weight[8] __attribute__( ( aligned( 64 ) ) ); // double r_bar[8] __attribute__( ( aligned( 64 ) ) ); - //double *invR_local = &(invR_[jpom2]); - // double *invRd_local = &(invRd_[jpom2]); - - double *invR_local = &(invR_[jpom2]); // Pointer for GPU and vectorization on ARM processors double * __restrict__ position_x = particles.getPtrPosition(0); double * __restrict__ position_y = particles.getPtrPosition(1); diff --git a/src/Projector/ProjectorFactory.h b/src/Projector/ProjectorFactory.h index db8c39e1f..278739301 100755 --- a/src/Projector/ProjectorFactory.h +++ b/src/Projector/ProjectorFactory.h @@ -42,7 +42,7 @@ class ProjectorFactory // --------------- else if( ( params.geometry == "2Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) { if( !vectorization ) { - #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) + #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) Proj = new Projector2D2OrderGPU( params, patch ); #else Proj = new Projector2D2Order( params, patch ); @@ -64,7 +64,7 @@ class ProjectorFactory // --------------- else if( ( params.geometry == "3Dcartesian" ) && ( params.interpolation_order == ( unsigned int )2 ) ) { if( !vectorization ) { - #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) + #if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) Proj = new Projector3D2OrderGPU( params, patch ); #else Proj = new Projector3D2Order( params, patch ); diff --git a/src/Pusher/PusherBoris.cpp b/src/Pusher/PusherBoris.cpp index 536def7a9..8f70a6cc3 100755 --- a/src/Pusher/PusherBoris.cpp +++ b/src/Pusher/PusherBoris.cpp @@ -57,7 +57,7 @@ void PusherBoris::operator()( Particles &particles, SmileiMPI *smpi, int istart, position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_buffer_offset; const int particle_number = iend - istart; diff --git a/src/Pusher/PusherBorisNR.cpp b/src/Pusher/PusherBorisNR.cpp index 84f072e1f..df4a3277b 100755 --- a/src/Pusher/PusherBorisNR.cpp +++ b/src/Pusher/PusherBorisNR.cpp @@ -57,7 +57,7 @@ void PusherBorisNR::operator()( Particles &particles, SmileiMPI *smpi, int istar position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_buffer_offset; const int particle_number = iend - istart; diff --git a/src/Pusher/PusherHigueraCary.cpp b/src/Pusher/PusherHigueraCary.cpp index 2ab234ae1..c85189fff 100755 --- a/src/Pusher/PusherHigueraCary.cpp +++ b/src/Pusher/PusherHigueraCary.cpp @@ -68,7 +68,7 @@ void PusherHigueraCary::operator()( Particles &particles, SmileiMPI *smpi, int i position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_buffer_offset; const int particle_number = iend - istart; diff --git a/src/Pusher/PusherPhoton.cpp b/src/Pusher/PusherPhoton.cpp index a94a521e3..5feb7823d 100755 --- a/src/Pusher/PusherPhoton.cpp +++ b/src/Pusher/PusherPhoton.cpp @@ -53,7 +53,7 @@ void PusherPhoton::operator()( Particles &particles, SmileiMPI *smpi, position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_ref; const int particle_number = iend - istart; diff --git a/src/Pusher/PusherPonderomotiveBoris.cpp b/src/Pusher/PusherPonderomotiveBoris.cpp index 41afa42e6..9d151dabb 100755 --- a/src/Pusher/PusherPonderomotiveBoris.cpp +++ b/src/Pusher/PusherPonderomotiveBoris.cpp @@ -55,7 +55,7 @@ void PusherPonderomotiveBoris::operator()( Particles &particles, SmileiMPI *smpi const double *const __restrict__ GradPhiz = &( ( *GradPhipart )[2*nparts] ); //double *inv_gamma_ponderomotive = &( ( *dynamics_inv_gamma_ponderomotive )[0*nparts] ); - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #else int np = iend-istart; diff --git a/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp b/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp index 379f41763..a32f359cb 100644 --- a/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp +++ b/src/Pusher/PusherPonderomotiveBorisBTIS3.cpp @@ -31,7 +31,6 @@ void PusherPonderomotiveBorisBTIS3::operator()( Particles &particles, SmileiMPI double charge_over_mass_dts2, charge_sq_over_mass_sq_dts4; double umx, umy, umz, upx, upy, upz; double alpha; - double TxTy, TyTz, TzTx; double pxsm, pysm, pzsm; //double one_ov_gamma_ponderomotive; diff --git a/src/Pusher/PusherPonderomotivePositionBoris.cpp b/src/Pusher/PusherPonderomotivePositionBoris.cpp index 16a4e6c69..9b9bea639 100755 --- a/src/Pusher/PusherPonderomotivePositionBoris.cpp +++ b/src/Pusher/PusherPonderomotivePositionBoris.cpp @@ -52,7 +52,7 @@ void PusherPonderomotivePositionBoris::operator()( Particles &particles, SmileiM const double *const __restrict__ GradPhi_my = &( ( *GradPhi_mpart )[1*nparts] ); const double *const __restrict__ GradPhi_mz = &( ( *GradPhi_mpart )[2*nparts] ); - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #else int np = iend-istart; diff --git a/src/Pusher/PusherVay.cpp b/src/Pusher/PusherVay.cpp index c1ba76693..83debaae4 100755 --- a/src/Pusher/PusherVay.cpp +++ b/src/Pusher/PusherVay.cpp @@ -67,7 +67,7 @@ void PusherVay::operator()( Particles &particles, SmileiMPI *smpi, int istart, i position_y /* [istart:particle_number] */, \ position_z /* [istart:particle_number] */ ) #pragma omp teams distribute parallel for -#elif defined(SMILEI_OPENACC_MODE) +#elif defined(SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_buffer_offset; const int particle_number = iend - istart; diff --git a/src/Python/pyprofiles.py b/src/Python/pyprofiles.py index 0e122a1a9..2fff14c1f 100755 --- a/src/Python/pyprofiles.py +++ b/src/Python/pyprofiles.py @@ -702,7 +702,7 @@ def LaserGaussianAM( box_side="xmin", a0=1., omega=1., focus=None, waist=3., print("ERROR: focus should be a list of length 1") exit(1) elif (len(focus)==2): - print("WARNING: deprecated focus in LaserEnvelopeGaussianAM should be a list of length 1") + print("WARNING: deprecated focus in LaserGaussianAM should be a list of length 1") # Polarization and amplitude [dephasing, amplitudeY, amplitudeZ] = transformPolarization(polarization_phi, ellipticity) amplitudeY *= a0 * omega diff --git a/src/Radiation/RadiationCorrLandauLifshitz.cpp b/src/Radiation/RadiationCorrLandauLifshitz.cpp index 16c7b01fe..ebb0e54dd 100755 --- a/src/Radiation/RadiationCorrLandauLifshitz.cpp +++ b/src/Radiation/RadiationCorrLandauLifshitz.cpp @@ -96,7 +96,7 @@ void RadiationCorrLandauLifshitz::operator()( // cumulative Radiated energy from istart to iend double radiated_energy_loc = 0; -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Local vector to store the radiated energy double * rad_norm_energy = new double [iend-istart]; // double * rad_norm_energy = (double*) aligned_alloc(64, (iend-istart)*sizeof(double)); @@ -112,7 +112,7 @@ void RadiationCorrLandauLifshitz::operator()( // Computation // NVIDIA GPUs - #if defined (SMILEI_OPENACC_MODE) + #if defined (SMILEI_ACCELERATOR_GPU_OACC) const int istart_offset = istart - ipart_ref; const int np = iend-istart; #pragma acc parallel \ @@ -185,7 +185,7 @@ void RadiationCorrLandauLifshitz::operator()( // _______________________________________________________________ // Computation of the thread radiated energy -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Exact energy loss due to the radiation rad_norm_energy[ipart-istart] = gamma - std::sqrt( 1.0 @@ -210,7 +210,7 @@ void RadiationCorrLandauLifshitz::operator()( // _______________________________________________________________ // Update of the quantum parameter -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd for( int ipart=istart ; ipart #include -#if defined(SMILEI_OPENACC_MODE) +#if defined(SMILEI_ACCELERATOR_GPU_OACC) #define __HIP_PLATFORM_NVCC__ #define __HIP_PLATFORM_NVIDIA__ #include "gpuRandom.h" @@ -103,7 +103,7 @@ void RadiationMonteCarlo::operator()( // Temporary double parameter double temp; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC unsigned long long seed; // Parameters for CUDA generator unsigned long long seq; unsigned long long offset; @@ -152,7 +152,7 @@ void RadiationMonteCarlo::operator()( // Number of photons int nphotons; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC int nphotons_start; #endif @@ -160,7 +160,7 @@ void RadiationMonteCarlo::operator()( const double photon_buffer_size_per_particle = radiation_photon_sampling_ * max_photon_emissions_; if (photons) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We reserve a large number of potential photons on device since we can't reallocate nphotons_start = photons->deviceSize(); //static_cast(photons)->deviceReserve( nphotons + (iend - istart) * photon_buffer_size_per_particle ); @@ -199,13 +199,13 @@ void RadiationMonteCarlo::operator()( double *const __restrict__ photon_tau = photons ? (photons->has_Monte_Carlo_process ? photons->getPtrTau() : nullptr) : nullptr; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // Cell keys as a mask int *const __restrict__ photon_cell_keys = photons ? photons->getPtrCellKeys() : nullptr; #endif // Table properties ---------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // Size of tables // int size_of_Table_integfochi = RadiationTables.integfochi_.size_particle_chi_; // int size_of_Table_min_photon_chi = RadiationTables.xi_.size_particle_chi_; @@ -221,7 +221,7 @@ void RadiationMonteCarlo::operator()( // _______________________________________________________________ // Computation -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // Management of the data on GPU though this data region int np = iend-istart; @@ -342,7 +342,7 @@ void RadiationMonteCarlo::operator()( // New final optical depth to reach for emision while( tau[ipart] <= epsilon_tau_ ) { //tau[ipart] = -log( 1.-Rand::uniform() ); - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC tau[ipart] = -std::log( 1.-rand_->uniform() ); #else seed_curand_1 = (int) (ipart+1)*(initial_seed_1+1); //Seed for linear generator @@ -385,7 +385,7 @@ void RadiationMonteCarlo::operator()( // Draw random number in [0,1[ - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC random_number = rand_->uniform(); #else seed_curand_2 = (int) (ipart + 1)*(initial_seed_2 + 1); //Seed for linear generator @@ -433,7 +433,7 @@ void RadiationMonteCarlo::operator()( && ( i_photon_emission < max_photon_emissions_)) { // CPU implementation (non-threaded implementation) -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC // Creation of new photons in the temporary array photons photons->createParticles( radiation_photon_sampling_ ); @@ -611,14 +611,14 @@ void RadiationMonteCarlo::operator()( } // end while } // end for -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } // end acc parallel #endif //if (photons) std::cerr << photons->deviceSize() << std::endl; // Remove extra space to save memory -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC if (photons) { photons->shrinkToFit( true ); } @@ -631,7 +631,7 @@ void RadiationMonteCarlo::operator()( // ____________________________________________________ // Update of the quantum parameter chi -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #else int np = iend-istart; @@ -660,11 +660,11 @@ void RadiationMonteCarlo::operator()( } - #ifdef SMILEI_OPENACC_MODE + #ifdef SMILEI_ACCELERATOR_GPU_OACC } // end acc parallel #endif -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } // end acc data #endif diff --git a/src/Radiation/RadiationMonteCarlo.h b/src/Radiation/RadiationMonteCarlo.h index 34b8c31db..4e84f169d 100755 --- a/src/Radiation/RadiationMonteCarlo.h +++ b/src/Radiation/RadiationMonteCarlo.h @@ -16,7 +16,7 @@ #include "Radiation.h" #include "userFunctions.h" -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include // This is wrong. Dont include nvidiaParticles, it may cause problem! // See particle factory. diff --git a/src/Radiation/RadiationNiel.cpp b/src/Radiation/RadiationNiel.cpp index 6e61f3759..dff292df4 100755 --- a/src/Radiation/RadiationNiel.cpp +++ b/src/Radiation/RadiationNiel.cpp @@ -127,7 +127,7 @@ void RadiationNiel::operator()( double radiated_energy_loc = 0; // Parameters for linear alleatory number generator - #ifdef SMILEI_OPENACC_MODE + #ifdef SMILEI_ACCELERATOR_GPU_OACC // Initialize initial seed for linear generator double initial_seed = rand_->uniform(); @@ -144,7 +144,7 @@ void RadiationNiel::operator()( //double t0 = MPI_Wtime(); // 1) Vectorized computation of gamma and the particle quantum parameter -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd #else @@ -190,12 +190,12 @@ void RadiationNiel::operator()( Ex[ipart-ipart_ref], Ey[ipart-ipart_ref], Ez[ipart-ipart_ref], Bx[ipart-ipart_ref], By[ipart-ipart_ref], Bz[ipart-ipart_ref] ); -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC } //finish cycle #endif //double t1 = MPI_Wtime(); - #ifdef SMILEI_OPENACC_MODE + #ifdef SMILEI_ACCELERATOR_GPU_OACC if( particle_chi[ipart] > minimum_chi_continuous ) { seed_curand = (int) (ipart+1)*(initial_seed+1); //Seed for linear generator @@ -297,7 +297,7 @@ void RadiationNiel::operator()( if( niel_computation_method == 0 ) { - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC for( ipart=istart ; ipart minimum_chi_continuous ) { @@ -310,7 +310,7 @@ void RadiationNiel::operator()( diffusion[ipart-istart] = std::sqrt( factor_classical_radiated_power*gamma[ipart-ipart_ref]*temp )*random_numbers[ipart-istart]; - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC } } #endif @@ -318,7 +318,7 @@ void RadiationNiel::operator()( // Using the fit at order 5 (vectorized) else if( niel_computation_method == 1 ) { - #ifndef SMILEI_OPENACC_MODE + #ifndef SMILEI_ACCELERATOR_GPU_OACC #pragma omp simd private(temp) for( ipart=istart ; ipart #endif diff --git a/src/Radiation/RadiationTables.h b/src/Radiation/RadiationTables.h index bc5003966..77bcac8e2 100755 --- a/src/Radiation/RadiationTables.h +++ b/src/Radiation/RadiationTables.h @@ -58,7 +58,7 @@ class RadiationTables //! param[in] particle_chi particle quantum parameter //! param[in] particle_gamma particle Lorentz factor //! param[in] integfochi_table table of the discretized integrated f/chi function for Photon production yield computation -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double computePhotonProductionYield( const double particle_chi, @@ -77,7 +77,7 @@ class RadiationTables //! \param[in] xi //! \param[in] table_min_photon_chi //! \param[in] table_xi -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double computeRandomPhotonChiWithInterpolation( double particle_chi, @@ -95,7 +95,7 @@ class RadiationTables //! from the computed table niel_.table //! \param particle_chi particle quantum parameter -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double getHNielFromTable( double particle_chi, double * tableNiel); @@ -116,7 +116,7 @@ class RadiationTables //! \param particle_chi particle quantum parameter //! \param dt time step //#pragma omp declare simd -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif inline double __attribute__((always_inline)) getRidgersCorrectedRadiatedEnergy( const double particle_chi, @@ -138,7 +138,7 @@ class RadiationTables //! Get of the classical continuous radiated energy during dt //! \param particle_chi particle quantum parameter //! \param dt time step -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif inline double __attribute__((always_inline)) getClassicalRadiatedEnergy( double particle_chi, double dt ) @@ -148,7 +148,7 @@ class RadiationTables //! Return the minimum_chi_discontinuous_ value //! Under this value, no discontinuous radiation reaction -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif inline double __attribute__((always_inline)) getMinimumChiDiscontinuous() @@ -158,7 +158,7 @@ class RadiationTables //! Return the minimum_chi_continuous_ value //! Under this value, no continuous radiation reaction -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif inline double __attribute__((always_inline)) getMinimumChiContinuous() diff --git a/src/Radiation/RadiationTools.h b/src/Radiation/RadiationTools.h index 33cb5f501..1746c894e 100644 --- a/src/Radiation/RadiationTools.h +++ b/src/Radiation/RadiationTools.h @@ -32,7 +32,7 @@ class RadiationTools { //! Valid between particle_chi in 1E-3 and 1E1 //! \param particle_chi particle quantum parameter // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) getHNielFitOrder10(double particle_chi) @@ -62,7 +62,7 @@ class RadiationTools { //! Valid between particle_chi in 1E-3 and 1E1 //! \param particle_chi particle quantum parameter // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) getHNielFitOrder5(double particle_chi) @@ -86,7 +86,7 @@ class RadiationTools { //! Ridgers et al., ArXiv 1708.04511 (2017) //! \param particle_chi particle quantum parameter // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) getHNielFitRidgers(double particle_chi) @@ -104,7 +104,7 @@ class RadiationTools { //! approximation formulae //! \param particle_chi particle quantum parameter //#pragma omp declare simd -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) computeGRidgers(double particle_chi) @@ -117,7 +117,7 @@ class RadiationTools { //! Return f1(nu) = Int_nu^\infty K_{5/3}(y) dy //! used in computed synchrotron power spectrum // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) computeF1Nu(double nu) @@ -155,7 +155,7 @@ class RadiationTools { //! Return f2(nu) = BesselK_{2/3}(nu) //! used in computed synchrotron power spectrum // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) computeF2Nu(double nu) @@ -194,7 +194,7 @@ class RadiationTools { //! = Int_nu^\infty K_{5/3}(y) dy + cst * BesselK_{2/3}(nu) //! used in computed synchrotron power spectrum // ----------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif static inline double __attribute__((always_inline)) computeBesselPartsRadiatedPower(double nu, double cst) diff --git a/src/Radiation/Table.h b/src/Radiation/Table.h index 8b74aeeaa..a028d4df3 100644 --- a/src/Radiation/Table.h +++ b/src/Radiation/Table.h @@ -45,7 +45,7 @@ class Table void compute_parameters(); //! get value using linear interpolation at position x -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif double get(double x); diff --git a/src/Smilei.cpp b/src/Smilei.cpp index 15cd7b047..81ba6c258 100755 --- a/src/Smilei.cpp +++ b/src/Smilei.cpp @@ -20,7 +20,7 @@ #include #include #include -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif @@ -44,7 +44,7 @@ using namespace std; // MAIN CODE // --------------------------------------------------------------------------------------------------------------------- -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #ifdef _OPENACC void initialization_openacc() { @@ -80,7 +80,7 @@ int main( int argc, char *argv[] ) // ------------------------- // Create the OpenACC environment -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC initialization_openacc(); #endif @@ -124,7 +124,7 @@ int main( int argc, char *argv[] ) // oblivious to the program (only one, the one by default). // This could be a missed but very advanced optimization for some // kernels/exchange. - ERROR( "Simlei needs only one accelerator (GPU). Look for HIP_VISIBLE_DEVICES or 'gpu-bind=closest' in your SLURM script or use a custom binding script." ); + ERROR( "Smilei needs only one accelerator (GPU). Look for HIP_VISIBLE_DEVICES or 'gpu-bind=closest' in your SLURM script or use a custom binding script." ); } else { // ::omp_set_default_device(0); } @@ -248,7 +248,7 @@ int main( int argc, char *argv[] ) checkpoint.restartAll( vecPatches, region, &smpi, params ); -#if !defined( SMILEI_ACCELERATOR_MODE ) +#if !defined( SMILEI_ACCELERATOR_GPU ) // CPU only, its too early to sort on GPU vecPatches.initialParticleSorting( params ); #endif @@ -271,7 +271,7 @@ int main( int argc, char *argv[] ) PatchesFactory::createVector( vecPatches, params, &smpi, openPMD, &radiation_tables_, 0 ); -#if !(defined( SMILEI_ACCELERATOR_MODE )) +#if !(defined( SMILEI_ACCELERATOR_GPU )) // CPU only, its too early to sort on GPU vecPatches.initialParticleSorting( params ); #endif @@ -407,7 +407,7 @@ int main( int argc, char *argv[] ) } } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) TITLE( "GPU allocation and copy of the fields and particles" ); // Allocate particle and field arrays // Also copy particle array content on device @@ -629,7 +629,7 @@ int main( int argc, char *argv[] ) #pragma omp parallel shared (time_dual,smpi,params, vecPatches, region, simWindow, checkpoint, itime) { // finalize particle exchanges and sort particles - vecPatches.finalizeAndSortParticles( params, &smpi, simWindow, + vecPatches.finalizeExchParticlesAndSort( params, &smpi, simWindow, time_dual, timers, itime ); // Particle merging @@ -685,7 +685,7 @@ int main( int argc, char *argv[] ) } //End omp parallel region if( params.has_load_balancing && params.load_balancing_time_selection->theTimeIsNow( itime ) ) { -// #if defined( SMILEI_ACCELERATOR_MODE ) +// #if defined( SMILEI_ACCELERATOR_GPU ) // ERROR( "Load balancing not tested on GPU !" ); // #endif count_dlb++; @@ -777,7 +777,7 @@ int main( int argc, char *argv[] ) region.clean(); } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) vecPatches.cleanDataOnDevice( params, &smpi, &radiation_tables_, &multiphoton_Breit_Wheeler_tables_ ); #endif diff --git a/src/SmileiMPI/AsyncMPIbuffers.cpp b/src/SmileiMPI/AsyncMPIbuffers.cpp index 0f7cebe9d..ff8efb17f 100755 --- a/src/SmileiMPI/AsyncMPIbuffers.cpp +++ b/src/SmileiMPI/AsyncMPIbuffers.cpp @@ -1,5 +1,6 @@ #include "AsyncMPIbuffers.h" +#include "ParticlesFactory.h" #include "Field.h" #include "Patch.h" @@ -66,30 +67,47 @@ SpeciesMPIbuffers::SpeciesMPIbuffers() SpeciesMPIbuffers::~SpeciesMPIbuffers() { + for( size_t i=0 ; i > partRecv; + std::vector< std::vector > partRecv; //! ndim vectors of 2 received packets of particles (1 per direction) - std::vector< std::vector > partSend; + std::vector< std::vector > partSend; - //! ndim vectors of 2 vectors of index particles to send (1 per direction) - //! - not sent - // - used to sort Species::indexes_of_particles_to_exchange built in Species::dynamics - std::vector< std::vector< std::vector > > part_index_send; //! ndim vectors of 2 numbers of particles to send (1 per direction) - std::vector< std::vector< unsigned int > > part_index_send_sz; + std::vector< std::vector< unsigned int > > partSendSize; //! ndim vectors of 2 numbers of particles to receive (1 per direction) - std::vector< std::vector< unsigned int > > part_index_recv_sz; + std::vector< std::vector< unsigned int > > partRecvSize; }; diff --git a/src/SmileiMPI/SmileiMPI.cpp b/src/SmileiMPI/SmileiMPI.cpp index c35a69fe9..88e03c864 100755 --- a/src/SmileiMPI/SmileiMPI.cpp +++ b/src/SmileiMPI/SmileiMPI.cpp @@ -763,7 +763,7 @@ void SmileiMPI::isend_species( Patch *patch, int to, int &irequest, int tag, Par irequest ++; } -#if defined( SMILEI_ACCELERATOR_MODE) +#if defined( SMILEI_ACCELERATOR_GPU) // For the particles for( unsigned int ispec=0; ispecvecSpecies[ispec]->particles, from, tag+2*ispec, recvParts ); MPI_Type_free( &( recvParts ) ); } - patch->vecSpecies[ispec]->particles->initializeDataOnDevice(); - patch->vecSpecies[ispec]->particles_to_move->initializeDataOnDevice(); + patch->vecSpecies[ispec]->allocateParticlesOnDevice(); } @@ -1210,7 +1209,7 @@ void SmileiMPI::send_PML(ElectroMagn *EM, Tpml embc, int bcId, int to, int &ire void SmileiMPI::isend( ElectroMagn *EM, int to, int &irequest, vector &requests, int tag, bool send_xmax_bc ) { -// #if defined (SMILEI_ACCELERATOR_MODE) +// #if defined (SMILEI_ACCELERATOR_GPU) // isendOnDevice( EM->Ex_, to, tag+irequest, requests[irequest] ); // irequest++; @@ -1746,7 +1745,7 @@ int SmileiMPI::recv_PML(ElectroMagn *EM, Tpml embc, int bcId, int from, int tag void SmileiMPI::recv( ElectroMagn *EM, int from, int &tag, bool recv_xmin_bc ) { -// #if defined (SMILEI_ACCELERATOR_MODE) +// #if defined (SMILEI_ACCELERATOR_GPU) // recvOnDevice( EM->Ex_, from, tag ); // tag++; @@ -2122,7 +2121,7 @@ void SmileiMPI::isend( Field *field, int to, int tag, MPI_Request &request ) } // End isend ( Field ) -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) //! Sends the whole Field Device to Device (assuming MPI enables it) void SmileiMPI::isendOnDevice( Field *field, int to, int tag, MPI_Request &request ) { @@ -2195,7 +2194,7 @@ void SmileiMPI::recv( Field *field, int from, int tag ) } // End recv ( Field ) -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) void SmileiMPI::recvOnDevice( Field *field, int from, int tag ) { @@ -2525,7 +2524,7 @@ void SmileiMPI::eraseBufferParticleTrail( const int ndim, const int istart, cons } -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) template static inline void diff --git a/src/SmileiMPI/SmileiMPI.h b/src/SmileiMPI/SmileiMPI.h index 13cacc416..2785921de 100755 --- a/src/SmileiMPI/SmileiMPI.h +++ b/src/SmileiMPI/SmileiMPI.h @@ -103,7 +103,7 @@ class SmileiMPI //! Sends the whole Field void isend( Field *field, int to, int tag, MPI_Request &request ); //! Sends the whole Field Device to Device (assuming MPI enables it) -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) void isendOnDevice( Field *field, int to, int tag, MPI_Request &request ); #endif @@ -114,7 +114,7 @@ class SmileiMPI //! Receives the whole Field void recv( Field *field, int from, int tag); //! Receives the whole Field Device to Device (assuming MPI enables it) -#if defined (SMILEI_ACCELERATOR_MODE) +#if defined (SMILEI_ACCELERATOR_GPU) void recvOnDevice( Field *field, int from, int tag); #endif @@ -248,7 +248,7 @@ class SmileiMPI //! Erase Particles from istart ot the end in the buffers of thread ithread void eraseBufferParticleTrail( const int ndim, const int istart, const int ithread, bool isAM = false ); -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) //! Map CPU buffers onto the GPU to at least accommodate particle_count //! particles. This method tries to reduce the number of //! allocation/deallocation which produces a lot of fragmentation on some diff --git a/src/Species/Species.cpp b/src/Species/Species.cpp index 37462566f..089e25f27 100755 --- a/src/Species/Species.cpp +++ b/src/Species/Species.cpp @@ -90,7 +90,6 @@ Species::Species( Params ¶ms, Patch *patch ) : { // &particles_sorted[0] particles = ParticlesFactory::create( params, *patch ); - particles_to_move = ParticlesFactory::create( params, *patch ); regular_number_array_.clear(); partBoundCond = NULL; @@ -104,7 +103,7 @@ Species::Species( Params ¶ms, Patch *patch ) : dx_inv_[1] = 1./cell_length[1]; dx_inv_[2] = 1./cell_length[2]; - initCluster( params ); + initCluster( params, patch ); inv_nDim_particles = 1./( ( double )nDim_particle ); length_[0]=0; @@ -123,7 +122,7 @@ Species::Species( Params ¶ms, Patch *patch ) : }//END Species creator -void Species::initCluster( Params ¶ms ) +void Species::initCluster( Params ¶ms, Patch *patch ) { // NOTE: On GPU we dont use first_index, it would contain redundant data but // we are forced to initialize it due to ParticleCreator::create() and the @@ -252,7 +251,7 @@ void Species::initCluster( Params ¶ms ) #endif //Initialize specMPI - MPI_buffer_.allocate( nDim_field ); + MPI_buffer_.allocate( params, patch ); //ener_tot = 0.; nrj_bc_lost = 0.; @@ -378,18 +377,14 @@ void Species::initOperators( Params ¶ms, Patch *patch ) partBoundCond = new PartBoundCond( params, this, patch ); for( unsigned int iDim=0 ; iDim < nDim_field ; iDim++ ) { for( unsigned int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) { - MPI_buffer_.partRecv[iDim][iNeighbor].initialize( 0, ( *particles ) ); - MPI_buffer_.partSend[iDim][iNeighbor].initialize( 0, ( *particles ) ); - MPI_buffer_.part_index_send[iDim][iNeighbor].resize( 0 ); - MPI_buffer_.part_index_recv_sz[iDim][iNeighbor] = 0; - MPI_buffer_.part_index_send_sz[iDim][iNeighbor] = 0; + MPI_buffer_.partRecv[iDim][iNeighbor]->initialize( 0, ( *particles ) ); + MPI_buffer_.partSend[iDim][iNeighbor]->initialize( 0, ( *particles ) ); } } typePartSend.resize( nDim_field*2, MPI_DATATYPE_NULL ); typePartRecv.resize( nDim_field*2, MPI_DATATYPE_NULL ); exchangePatch = MPI_DATATYPE_NULL; - particles_to_move->initialize( 0, *particles ); } @@ -399,7 +394,6 @@ void Species::initOperators( Params ¶ms, Patch *patch ) Species::~Species() { delete particles; - delete particles_to_move; delete Push; delete Interp; @@ -506,7 +500,7 @@ Species::~Species() } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) //! Prepare the species Current and Rho grids on Device void Species::prepareSpeciesCurrentAndChargeOnDevice( @@ -546,7 +540,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice( } -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc parallel present( Jx_s[0:Jx_size], \ Jy_s[0:Jy_size], \ Jz_s[0:Jz_size], \ @@ -557,7 +551,7 @@ Species::prepareSpeciesCurrentAndChargeOnDevice( #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target #pragma omp teams distribute parallel for -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc loop gang worker vector #endif for( unsigned int i=0 ; iinitializeDataOnDevice(); + + // The first send/recv buffers are also on device + MPI_buffer_.partSend[0][0]->initializeDataOnDevice(); + MPI_buffer_.partSend[0][1]->initializeDataOnDevice(); + MPI_buffer_.partRecv[0][0]->initializeDataOnDevice(); + MPI_buffer_.partRecv[0][1]->initializeDataOnDevice(); + + // Create photon species on the device + if( radiation_model_ == "mc" && photon_species_ ) { + radiated_photons_->initializeDataOnDevice(); + } + + // Create pair species on the device + if( mBW_pair_species_[0] && mBW_pair_species_[1] ) { + mBW_pair_particles_[0]->initializeDataOnDevice(); + mBW_pair_particles_[1]->initializeDataOnDevice(); + } +} + + //! Copy particles from host to device void Species::copyParticlesFromHostToDevice() @@ -641,7 +659,7 @@ Species::copyParticlesFromHostToDevice() particles->copyFromHostToDevice(); } -#endif // end if SMILEI_ACCELERATOR_MODE +#endif // end if SMILEI_ACCELERATOR_GPU // --------------------------------------------------------------------------------------------------------------------- //! Method calculating the Particle dynamics (interpolation, pusher, projection and more) @@ -682,7 +700,7 @@ void Species::dynamics( double time_dual, if( time_dual>time_frozen_ || Ionize) { // moving particle // Prepare temporary buffers for this iteration -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) smpi->resizeDeviceBuffers( ithread, nDim_field, particles->numberOfParticles() ); @@ -695,7 +713,7 @@ void Species::dynamics( double time_dual, patch->startFineTimer(mBW_timer_id_); -#if defined( SMILEI_OPENACC_MODE) +#if defined( SMILEI_ACCELERATOR_GPU_OACC) static_cast(mBW_pair_particles_[0])->deviceResize( particles->deviceSize() * Multiphoton_Breit_Wheeler_process->getPairCreationSampling(0) ); static_cast(mBW_pair_particles_[0])->resetCellKeys(); static_cast(mBW_pair_particles_[1])->deviceResize( particles->deviceSize() * Multiphoton_Breit_Wheeler_process->getPairCreationSampling(1) ); @@ -708,7 +726,7 @@ void Species::dynamics( double time_dual, patch->stopFineTimer(mBW_timer_id_); } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) // Make sure some bin preconditions are respected SMILEI_ASSERT( particles->first_index.size() == 1 ); SMILEI_ASSERT( particles->last_index.size() >= 1 ); @@ -814,7 +832,7 @@ void Species::dynamics( double time_dual, // Compression of the bins if necessary if( Multiphoton_Breit_Wheeler_process ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC removeTaggedParticles(smpi, &particles->first_index[0], &particles->last_index[0], @@ -1672,14 +1690,14 @@ void Species::dynamicsImportParticles( double time_dual, Params ¶ms, Patch * // Radiation losses if( Radiate && photon_species_ ) { // If creation of macro-photon, we add them to photon_species -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We first erase empty slots in the buffer of photons // radiation_photons_->cell_keys is used as a mask static_cast(radiated_photons_)->eraseLeavingParticles(); #endif photon_species_->importParticles( params, patch, *radiated_photons_, localDiags, time_dual ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We explicitely clear the device Particles static_cast(radiated_photons_)->deviceClear(); #endif @@ -1691,7 +1709,7 @@ void Species::dynamicsImportParticles( double time_dual, Params ¶ms, Patch * // Addition of the electron-positron particles for( int k=0; k<2; k++ ) { -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We first erase empty slots in the buffer of photons // radiation_photons_->cell_keys is used as a mask static_cast(mBW_pair_particles_[k])->eraseLeavingParticles(); @@ -1699,7 +1717,7 @@ void Species::dynamicsImportParticles( double time_dual, Params ¶ms, Patch * mBW_pair_species_[k]->importParticles( params, patch, *mBW_pair_particles_[k], localDiags, time_dual ); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC // We explicitely clear the device Particles static_cast(mBW_pair_particles_[k])->deviceClear(); #endif @@ -1747,53 +1765,32 @@ void Species::computeCharge( ElectroMagn *EMfields, bool old /*=false*/ ) }//END computeCharge -void Species::extractParticles() -{ - particles->extractParticles( particles_to_move ); -} - -// void Species::injectParticles( Params ¶ms ) -// { -// } - - // --------------------------------------------------------------------------------------------------------------------- //! Sort particles // --------------------------------------------------------------------------------------------------------------------- void Species::sortParticles( Params ¶ms ) { -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) // ----------------------------- // GPU version - - // particles_to_move contains, up to here, send particles - // clean it to manage recv particles - particles_to_move->clear(); // Clear on the host - // Merge all MPI_buffer_.partRecv in particles_to_move - for( int idim = 0; idim < params.nDim_field; idim++ ) { - for( int iNeighbor = 0; iNeighbor < 2; iNeighbor++ ) { - int n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor]; - if( ( n_part_recv != 0 ) ) { - // insert n_part_recv in particles_to_move from 0 - MPI_buffer_.partRecv[idim][iNeighbor].copyParticles( 0, - n_part_recv, - *particles_to_move, - particles_to_move->size() ); + + // Merge all MPI_buffer_.partRecv in the first one + Particles * first_buffer = MPI_buffer_.partRecv[0][0]; + for( auto &partRecvs: MPI_buffer_.partRecv ) { + for( auto partRecv: partRecvs ) { + if( partRecv != first_buffer && partRecv->size() > 0 ) { + partRecv->copyParticles( 0, partRecv->size(), *first_buffer, first_buffer->size() ); + partRecv->clear(); } } } - - particles_to_move->copyFromHostToDevice(); - - // // Erase particles that leaves this patch - // particles->last_index[0] = particles->eraseLeavingParticles(); - // - // // Inject newly arrived particles in particles_to_move - // particles->last_index[0] += particles->injectParticles( particles_to_move ); - - particles->importAndSortParticles( particles_to_move ); + + first_buffer->copyFromHostToDevice(); + + particles->importAndSortParticles( first_buffer ); + #else // -------------------------- @@ -1804,28 +1801,10 @@ void Species::sortParticles( Params ¶ms ) int ndim = params.nDim_field; int idim; - // Compute total number of particles received - // int total_number_part_recv = 0; - //Merge all MPI_buffer_.partRecv in particles_to_move - // for( int idim = 0; idim < ndim; idim++ ) { - // for( int iNeighbor=0 ; iNeighbor<2 ; iNeighbor++ ) { - // int n_part_recv = MPI_buffer_.part_index_recv_sz[idim][iNeighbor]; - // if( ( n_part_recv!=0 ) ) { - // // insert n_part_recv in particles_to_move from 0 - // //MPI_buffer_.partRecv[idim][iNeighbor].copyParticles( 0, n_part_recv, *particles_to_move, 0 ); - // total_number_part_recv += n_part_recv; - // //particles->last_index[particles->last_index.size()-1] += n_part_recv; - // //particles->cell_keys.resize(particles->cell_keys.size()+n_part_recv); - // } - // } - // } - //cout << "\t Species id : " << species_number_ << " - nparticles recv : " << blabla << endl; - - // Sort to adapt do cell_keys usage std::vector indexes_of_particles_to_exchange; for ( int ipart=0 ; ipart< (int)(getNbrOfParticles()) ; ipart++ ) { - if ( particles->cell_keys[ipart] == -1 ) { + if ( particles->cell_keys[ipart] < 0 ) { indexes_of_particles_to_exchange.push_back( ipart ); } } @@ -1900,15 +1879,15 @@ void Species::sortParticles( Params ¶ms ) //Evaluation of the necessary shift of all bins.2 //idim=0 - shift[1] += MPI_buffer_.part_index_recv_sz[0][0];//Particles coming from xmin all go to bin 0 and shift all the other bins. - shift[particles->last_index.size()] += MPI_buffer_.part_index_recv_sz[0][1];//Used only to count the total number of particles arrived. + shift[1] += MPI_buffer_.partRecv[0][0]->size();//Particles coming from xmin all go to bin 0 and shift all the other bins. + shift[particles->last_index.size()] += MPI_buffer_.partRecv[0][1]->size();//Used only to count the total number of particles arrived. //idim>0 for( idim = 1; idim < ndim; idim++ ) { for( int iNeighbor=0 ; iNeighborsize(); for( unsigned int j=0; j<( unsigned int )n_part_recv ; j++ ) { //We first evaluate how many particles arrive in each bin. - ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor].position( 0, j )-min_loc )/dbin ); //bin in which the particle goes. + ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor]->position( 0, j )-min_loc )/dbin ); //bin in which the particle goes. shift[ii+1]++; // It makes the next bins shift. } } @@ -1943,11 +1922,11 @@ void Species::sortParticles( Params ¶ms ) //Space has been made now to write the arriving particles into the correct bins //idim == 0 is the easy case, when particles arrive either in first or last bin. for( int iNeighbor=0 ; iNeighborsize(); //if ( (neighbor_[0][iNeighbor]!=MPI_PROC_NULL) && (n_part_recv!=0) ) { if( ( n_part_recv!=0 ) ) { ii = iNeighbor*( particles->last_index.size()-1 ); //0 if iNeighbor=0(particles coming from Xmin) and particles->last_index.size()-1 otherwise. - MPI_buffer_.partRecv[0][iNeighbor].overwriteParticle( 0, *particles, particles->last_index[ii], n_part_recv ); + MPI_buffer_.partRecv[0][iNeighbor]->overwriteParticle( 0, *particles, particles->last_index[ii], n_part_recv ); particles->last_index[ii] += n_part_recv ; } } @@ -1955,12 +1934,12 @@ void Species::sortParticles( Params ¶ms ) for( idim = 1; idim < ndim; idim++ ) { //if (idim!=iDim) continue; for( int iNeighbor=0 ; iNeighborsize(); //if ( (neighbor_[idim][iNeighbor]!=MPI_PROC_NULL) && (n_part_recv!=0) ) { if( ( n_part_recv!=0 ) ) { for( unsigned int j=0; j<( unsigned int )n_part_recv; j++ ) { - ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor].position( 0, j )-min_loc )/dbin ); //bin in which the particle goes. - MPI_buffer_.partRecv[idim][iNeighbor].overwriteParticle( j, *particles, particles->last_index[ii] ); + ii = int( ( MPI_buffer_.partRecv[idim][iNeighbor]->position( 0, j )-min_loc )/dbin ); //bin in which the particle goes. + MPI_buffer_.partRecv[idim][iNeighbor]->overwriteParticle( j, *particles, particles->last_index[ii] ); particles->last_index[ii] ++ ; } } @@ -2117,7 +2096,7 @@ void Species::countSortParticles( Params ¶ms ) // Move all particles from another species to this one void Species::importParticles( Params ¶ms, Patch *patch, Particles &source_particles, vector &localDiags, double time_dual, Ionization *I ) { -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) || defined( SMILEI_ACCELERATOR_GPU_OACC ) // --------------------------------------------------- // GPU version // Warning: the GPU version does not handle bin and sorting @@ -2228,7 +2207,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { const int nparts = smpi->dynamics_Epart[ithread].size()/3; -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC double *const __restrict__ weight = particles->getPtrWeight(); @@ -2267,7 +2246,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { const int nbin = particles->numberOfBins(); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel \ present(Ex[0:nparts],Ey[0:nparts],Ez[0:nparts], \ Bx[0:nparts], By[0:nparts], Bz[0:nparts], \ @@ -2312,7 +2291,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { if (copy_particle_number>0) { -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC particles->overwriteParticle(copy_first_index, particles->last_index[ibin], copy_particle_number, compute_cell_keys ); #else for (auto ipart = 0 ; ipart < copy_particle_number ; ipart ++) { @@ -2367,7 +2346,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { } } -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC if (thetaold) { for( unsigned int ipart = 0 ; ipart < copy_particle_number ; ipart ++ ) { thetaold[copy_first_index + ipart] = thetaold[particles->last_index[ibin] + ipart]; @@ -2405,7 +2384,7 @@ void Species::compress(SmileiMPI *smpi, int ithread, bool compute_cell_keys) { } } -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } // end parallel region #endif @@ -2439,7 +2418,7 @@ void Species::removeTaggedParticlesPerBin( // Weight shortcut double *const __restrict__ weight = particles->getPtrWeight(); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC double *const __restrict__ position_x = particles->getPtrPosition( 0 ); double *const __restrict__ position_y = nDim_particle > 1 ? particles->getPtrPosition( 1 ) : nullptr; double *const __restrict__ position_z = nDim_particle > 2 ? particles->getPtrPosition( 2 ) : nullptr; @@ -2457,7 +2436,7 @@ void Species::removeTaggedParticlesPerBin( // Total number of bins / cells const int nbin = particles->numberOfBins(); -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc parallel \ present(Epart[0:nparts*3],\ Bpart[0:nparts*3], \ @@ -2499,7 +2478,7 @@ void Species::removeTaggedParticlesPerBin( if( ipart < last_photon_index ) { // The last existing photon comes to the position of // the deleted photon -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC particles->overwriteParticle( last_photon_index, ipart, compute_cell_keys ); #else weight[ipart] = weight[last_photon_index]; @@ -2533,7 +2512,7 @@ void Species::removeTaggedParticlesPerBin( } gamma[ipart] = gamma[0*nparts+last_photon_index]; -#ifndef SMILEI_OPENACC_MODE +#ifndef SMILEI_ACCELERATOR_GPU_OACC if (thetaold) { thetaold[0*nparts+ipart] = thetaold[0*nparts+last_photon_index]; } @@ -2560,13 +2539,14 @@ void Species::removeTaggedParticlesPerBin( } // if last_index[ibin] > first_index[ibin] } // end loop over the bins -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC } // end parallel region #endif } //! This method removes particles with a negative weight //! when a single bin is used +#ifdef SMILEI_ACCELERATOR_GPU_OACC void Species::removeTaggedParticles( SmileiMPI *smpi, int *const first_index, @@ -2575,8 +2555,6 @@ void Species::removeTaggedParticles( bool compute_cell_keys) { -#ifdef SMILEI_OPENACC_MODE - unsigned int new_n_parts = 0; unsigned int nb_deleted = 0; @@ -2644,7 +2622,7 @@ void Species::removeTaggedParticles( // that will not be erased // Backward loop over the tagged particles to fill holes in the photon particle array (at the bin level only) -//#ifdef SMILEI_OPENACC_MODE +//#ifdef SMILEI_ACCELERATOR_GPU_OACC // #pragma acc loop seq //#endif for( int ipart=last_moving_index-1 ; ipart>=*first_index; ipart-- ) { @@ -2721,9 +2699,9 @@ void Species::removeTaggedParticles( } } // if nparts > 0 +} #endif -} // ------------------------------------------------ // Set position when using restart & moving window diff --git a/src/Species/Species.h b/src/Species/Species.h index 56c693d65..d4af3bf9d 100755 --- a/src/Species/Species.h +++ b/src/Species/Species.h @@ -6,7 +6,7 @@ // #include "PyTools.h" #include "Particles.h" -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include "nvidiaParticles.h" #endif #include "Params.h" @@ -147,8 +147,6 @@ class Species //! Vector containing all Particles of the considered Species Particles *particles; - //! Data structure through which passes particles which move from one patch to another - Particles *particles_to_move; Particles particles_sorted[2]; //std::vector index_of_particles_to_exchange; @@ -344,7 +342,7 @@ class Species // ----------------------------------------------------------------------------- // 5. Methods - virtual void initCluster( Params & ); + virtual void initCluster( Params &, Patch * ); virtual void resizeCluster( Params & ); @@ -384,7 +382,9 @@ class Species return particles->capacity(); } -#if defined( SMILEI_ACCELERATOR_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU ) + + void allocateParticlesOnDevice(); //! Copy particles from host to device void @@ -482,12 +482,6 @@ class Species //! Method calculating the Particle charge on the grid (projection) virtual void computeCharge( ElectroMagn *EMfields, bool old=false ); - //! Method used to select particles which will change of patches - virtual void extractParticles(); - - //! Method used to integrate particles which come from another patches - // virtual void injectParticles( Params ¶ms ); - //! Method used to inject and sort particles virtual void sortParticles( Params ¶m ); @@ -572,12 +566,14 @@ class Species //! This method removes particles with a negative weight //! when a single bin is used +#ifdef SMILEI_ACCELERATOR_GPU_OACC void removeTaggedParticles( SmileiMPI *smpi, int *const first_index, int *const last_index, int ithread, bool compute_cell_keys = false); +#endif //! Moving window boundary conditions managment void disableXmax(); diff --git a/src/Species/SpeciesV.cpp b/src/Species/SpeciesV.cpp index 98d5d9dbb..4a4199b63 100755 --- a/src/Species/SpeciesV.cpp +++ b/src/Species/SpeciesV.cpp @@ -46,7 +46,7 @@ using namespace std; SpeciesV::SpeciesV( Params ¶ms, Patch *patch ) : Species( params, patch ) { - initCluster( params ); + initCluster( params, patch ); npack_ = 0 ; packsize_ = 0; @@ -106,7 +106,7 @@ SpeciesV::~SpeciesV() } -void SpeciesV::initCluster( Params ¶ms ) +void SpeciesV::initCluster( Params ¶ms, Patch *patch ) { int ncells = 1; for( unsigned int iDim=0 ; iDimfirst_index[ipack*packsize_+scell] ; iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= length_[i]; @@ -552,7 +552,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec, // if( mass_>0 ) { // for( iPart=particles->first_index[ipack*packsize_+scell] ; iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -564,7 +564,7 @@ void SpeciesV::dynamics( double time_dual, unsigned int ispec, // } // for( iPart=particles->first_index[ipack*packsize_+scell] ; iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -1053,7 +1053,7 @@ void SpeciesV::dynamicsTasks( double time_dual, unsigned int ispec, if( mass_>0 ) { for( int scell = first_cell_of_bin[ibin] ; scell <= last_cell_of_bin[ibin] ; scell++ ) { for( int iPart=particles->first_index[ipack*packsize_+scell] ; ( int )iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -1067,7 +1067,7 @@ void SpeciesV::dynamicsTasks( double time_dual, unsigned int ispec, } else if( mass_==0 ) { for( int scell = first_cell_of_bin[ibin] ; scell <= last_cell_of_bin[ibin] ; scell++ ) { for( int iPart=particles->first_index[scell] ; ( int )iPartlast_index[scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles for( unsigned int i = 0 ; icell_keys[iPart] *= length[i]; @@ -1366,27 +1366,27 @@ void SpeciesV::sortParticles( Params ¶ms ) //Loop over just arrived particles to compute their cell keys and contribution to count for( unsigned int idim=0; idim < nDim_field ; idim++ ) { for( unsigned int ineighbor=0 ; ineighbor < 2 ; ineighbor++ ) { - buf_cell_keys[idim][ineighbor].resize( MPI_buffer_.part_index_recv_sz[idim][ineighbor] ); + buf_cell_keys[idim][ineighbor].resize( MPI_buffer_.partRecv[idim][ineighbor]->size() ); // #pragma omp simd - // for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) { + // for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) { // for( unsigned int ipos=0; ipos < nDim_field ; ipos++ ) { - // double X = ((this)->*(distance[ipos]))(&MPI_buffer_.partRecv[idim][ineighbor], ipos, ip); + // double X = ((this)->*(distance[ipos]))(MPI_buffer_.partRecv[idim][ineighbor], ipos, ip); // int IX = round( X * dx_inv_[ipos] ); // buf_cell_keys[idim][ineighbor][ip] = buf_cell_keys[idim][ineighbor][ip] * length_[ipos] + IX; // } // } // // not vectorizable because random access to count - // for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) { + // for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) { // count[buf_cell_keys[idim][ineighbor][ip]] ++; // } computeParticleCellKeys( params, - &MPI_buffer_.partRecv[idim][ineighbor], + MPI_buffer_.partRecv[idim][ineighbor], &buf_cell_keys[idim][ineighbor][0], &count[0], 0, - MPI_buffer_.part_index_recv_sz[idim][ineighbor] ); + MPI_buffer_.partRecv[idim][ineighbor]->size() ); } } @@ -1403,8 +1403,8 @@ void SpeciesV::sortParticles( Params ¶ms ) //Now proceed to the cycle sort - if( MPI_buffer_.partRecv[0][0].size() == 0 ) { - MPI_buffer_.partRecv[0][0].initialize( 0, *particles ); //Is this correct ? + if( MPI_buffer_.partRecv[0][0]->size() == 0 ) { + MPI_buffer_.partRecv[0][0]->initialize( 0, *particles ); //Is this correct ? } // Resize the particle vector @@ -1418,7 +1418,7 @@ void SpeciesV::sortParticles( Params ¶ms ) //Copy all particles from MPI buffers back to the writable particles via cycle sort pass. for( unsigned int idim=0; idim < nDim_field ; idim++ ) { for( unsigned int ineighbor=0 ; ineighbor < 2 ; ineighbor++ ) { - for( unsigned int ip=0; ip < MPI_buffer_.part_index_recv_sz[idim][ineighbor]; ip++ ) { + for( unsigned int ip=0; ip < MPI_buffer_.partRecv[idim][ineighbor]->size(); ip++ ) { cycle.resize( 1 ); cell_target = buf_cell_keys[idim][ineighbor][ip]; ip_dest = particles->first_index[cell_target]; @@ -1429,7 +1429,7 @@ void SpeciesV::sortParticles( Params ¶ms ) cycle[0] = ip_dest; cell_target = particles->cell_keys[ip_dest]; //As long as the particle is not erased, we can build up the cycle. - while( cell_target != -1 ) { + while( cell_target >= 0 ) { ip_dest = particles->first_index[cell_target]; while( particles->cell_keys[ip_dest] == cell_target ) { ip_dest++; @@ -1441,7 +1441,7 @@ void SpeciesV::sortParticles( Params ¶ms ) //Last target_cell is -1, the particle must be erased: particles->translateParticles( cycle ); //Eventually copy particle from the MPI buffer into the particle vector. - MPI_buffer_.partRecv[idim][ineighbor].overwriteParticle( ip, *particles, cycle[0] ); + MPI_buffer_.partRecv[idim][ineighbor]->overwriteParticle( ip, *particles, cycle[0] ); } } } @@ -1450,14 +1450,14 @@ void SpeciesV::sortParticles( Params ¶ms ) for( unsigned int ip=( unsigned int )particles->last_index.back(); ip < npart; ip++ ) { cell_target = particles->cell_keys[ip]; - if( cell_target == -1 ) { + if( cell_target < 0 ) { continue; } cycle.resize( 0 ); cycle.push_back( ip ); //As long as the particle is not erased, we can build up the cycle. - while( cell_target != -1 ) { + while( cell_target >= 0 ) { ip_dest = particles->first_index[cell_target]; @@ -1533,7 +1533,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, #pragma omp simd for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { //Compute cell_keys particles cell_keys[iPart] = std::round( position_x[iPart] * dx_inv_[0]) - min_loc_l ; cell_keys[iPart] *= length_[1]; @@ -1553,7 +1553,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, #pragma omp simd for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles cell_keys[iPart] = std::round(position_x[iPart] * dx_inv_[0] )- min_loc_x ; cell_keys[iPart] *= length_[1]; @@ -1573,7 +1573,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, #pragma omp simd for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles cell_keys[iPart] = std::round(position_x[iPart] * dx_inv_[0] )- min_loc_x ; cell_keys[iPart] *= length_[1]; @@ -1589,7 +1589,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, #pragma omp simd for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles cell_keys[iPart] = round(position_x[iPart] * dx_inv_[0] )- min_loc_x ; } @@ -1598,7 +1598,7 @@ void SpeciesV::computeParticleCellKeys( Params & params, } for( iPart=istart; iPart < iend ; iPart++ ) { - if ( cell_keys[iPart] != -1 ) { + if ( cell_keys[iPart] >= 0 ) { count[cell_keys[iPart]] ++; } } @@ -2526,7 +2526,7 @@ void SpeciesV::ponderomotiveUpdatePositionAndCurrentsTasks( double time_dual, un smpi->traceEventIfDiagTracing(diag_PartEventTracing, Tools::getOMPThreadNum(),0,11); for( int iPart=particles->first_index[scell] ; iPartlast_index[scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. for( int i = 0 ; i<( int )nDim_field; i++ ) { particles->cell_keys[iPart] *= length_[i]; diff --git a/src/Species/SpeciesV.h b/src/Species/SpeciesV.h index 39dc45089..7f5fe587c 100755 --- a/src/Species/SpeciesV.h +++ b/src/Species/SpeciesV.h @@ -26,7 +26,7 @@ class SpeciesV : public Species //! Species destructor virtual ~SpeciesV(); - void initCluster( Params ¶ms ) override; + void initCluster( Params ¶ms, Patch *patch ) override; //! Method calculating the Particle dynamics (interpolation, pusher, projection) void dynamics( double time, unsigned int ispec, diff --git a/src/Species/SpeciesVAdaptive.cpp b/src/Species/SpeciesVAdaptive.cpp index b24d86711..273362561 100755 --- a/src/Species/SpeciesVAdaptive.cpp +++ b/src/Species/SpeciesVAdaptive.cpp @@ -46,7 +46,7 @@ using namespace std; SpeciesVAdaptive::SpeciesVAdaptive( Params ¶ms, Patch *patch ) : SpeciesV( params, patch ) { - initCluster( params ); + initCluster( params, patch ); npack_ = 0 ; packsize_ = 0; }//END SpeciesVAdaptive creator @@ -275,7 +275,7 @@ void SpeciesVAdaptive::scalarDynamics( double time_dual, unsigned int ispec, // if( mass_>0 ) { // // for( iPart=particles->first_index[scell] ; ( int )iPartlast_index[scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -289,7 +289,7 @@ void SpeciesVAdaptive::scalarDynamics( double time_dual, unsigned int ispec, // } else if( mass_==0 ) { // // for( iPart=particles->first_index[scell] ; ( int )iPartlast_index[scell]; iPart++ ) { - // if ( particles->cell_keys[iPart] != -1 ) { + // if ( particles->cell_keys[iPart] >= 0 ) { // //Compute cell_keys of remaining particles // for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -754,7 +754,7 @@ void SpeciesVAdaptive::scalarDynamicsTasks( double time_dual, unsigned int ispec if( mass_>0 ) { for( int iPart=particles->first_index[ipack*packsize_+scell] ; ( int )iPartlast_index[ipack*packsize_+scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles for( unsigned int i = 0 ; icell_keys[iPart] *= this->length_[i]; @@ -768,7 +768,7 @@ void SpeciesVAdaptive::scalarDynamicsTasks( double time_dual, unsigned int ispec } else if( mass_==0 ) { for( int iPart=particles->first_index[scell] ; ( int )iPartlast_index[scell]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //Compute cell_keys of remaining particles for( unsigned int i = 0 ; icell_keys[iPart] *= length[i]; @@ -1662,7 +1662,7 @@ void SpeciesVAdaptive::scalarPonderomotiveUpdatePositionAndCurrentsTasks( double smpi->traceEventIfDiagTracing(diag_PartEventTracing, Tools::getOMPThreadNum(),0,11); for( int iPart=particles->first_index[first_cell_of_bin[ibin]] ; iPartlast_index[last_cell_of_bin[ibin]]; iPart++ ) { - if ( particles->cell_keys[iPart] != -1 ) { + if ( particles->cell_keys[iPart] >= 0 ) { //First reduction of the count sort algorithm. Lost particles are not included. for( int i = 0 ; i<( int )nDim_field; i++ ) { particles->cell_keys[iPart] *= length_[i]; diff --git a/src/Species/SpeciesVAdaptiveMixedSort.cpp b/src/Species/SpeciesVAdaptiveMixedSort.cpp index cc809d8c3..1889f0cd8 100755 --- a/src/Species/SpeciesVAdaptiveMixedSort.cpp +++ b/src/Species/SpeciesVAdaptiveMixedSort.cpp @@ -46,7 +46,7 @@ using namespace std; SpeciesVAdaptiveMixedSort::SpeciesVAdaptiveMixedSort( Params ¶ms, Patch *patch ) : SpeciesV( params, patch ) { - initCluster( params ); + initCluster( params, patch ); npack_ = 0 ; packsize_ = 0; diff --git a/src/Tools/Pragma.h b/src/Tools/Pragma.h index b1a81cdae..0fb5e1e9d 100644 --- a/src/Tools/Pragma.h +++ b/src/Tools/Pragma.h @@ -31,7 +31,7 @@ #if defined ( SMILEI_ACCELERATOR_GPU_OMP ) #define ATOMIC(mode) \ _Pragma( TOSTRING(omp atomic mode)) -#elif defined ( SMILEI_OPENACC_MODE ) +#elif defined ( SMILEI_ACCELERATOR_GPU_OACC ) #define ATOMIC(mode) \ _Pragma( TOSTRING(acc atomic mode)) #endif diff --git a/src/Tools/Timers.cpp b/src/Tools/Timers.cpp index 0cd6dac0c..d3edda0e4 100755 --- a/src/Tools/Timers.cpp +++ b/src/Tools/Timers.cpp @@ -18,7 +18,7 @@ Timers::Timers( SmileiMPI *smpi ) : collisions( "Collisions" ), // Call to Collisions methods movWindow( "Mov window" ), // Moving Window loadBal( "Load balancing" ), // Load balancing - syncPart( "Sync Particles" ), // Call exchangeParticles (MPI & Patch sync) + syncPart( "Sync Particles" ), // Call initExchParticles (MPI & Patch sync) syncField( "Sync Fields" ), // Call sumRhoJ(s), exchangeB (MPI & Patch sync) syncDens( "Sync Densities" ), // If necessary the following timers can be reintroduced particleMerging( "Part Merging" ), // Particle merging diff --git a/src/Tools/gpu.cpp b/src/Tools/gpu.cpp index 7ce000e03..497786096 100644 --- a/src/Tools/gpu.cpp +++ b/src/Tools/gpu.cpp @@ -1,6 +1,6 @@ #include "gpu.h" -#if defined( SMILEI_ACCELERATOR_GPU_OMP ) && defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OMP ) && defined( SMILEI_ACCELERATOR_GPU_OACC ) #error "You can not enable both OpenACC and OpenMP GPU support" #endif @@ -29,7 +29,7 @@ #else #error "Asking for OpenMP support without enabling compiler support for OpenMP" #endif -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #if defined( _OPENACC ) #include #else @@ -46,11 +46,12 @@ namespace smilei { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target enter data map( alloc \ : byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc enter data create( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -61,11 +62,12 @@ namespace smilei { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target enter data map( to \ : byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc enter data copyin( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -75,11 +77,12 @@ namespace smilei { const unsigned char* byte_array = static_cast( a_host_pointer ); #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target update to( byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc update device( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -89,11 +92,12 @@ namespace smilei { unsigned char* byte_array = static_cast( a_host_pointer ); #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target update from( byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc update host( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -104,11 +108,12 @@ namespace smilei { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target exit data map( from \ : byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc exit data copyout( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -119,11 +124,12 @@ namespace smilei { #if defined( SMILEI_ACCELERATOR_GPU_OMP ) #pragma omp target exit data map( delete \ : byte_array [0:a_count * an_object_size] ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #pragma acc exit data delete( byte_array [0:a_count * an_object_size] ) #else SMILEI_UNUSED( a_host_pointer ); SMILEI_UNUSED( a_count ); + SMILEI_UNUSED( an_object_size ); SMILEI_UNUSED( byte_array ); #endif } @@ -154,7 +160,7 @@ namespace smilei { SMILEI_ASSERT( a_device_pointer != nullptr ); return const_cast( a_device_pointer ); -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) //return const_cast( ::acc_deviceptr( a_host_pointer ) ); return ::acc_deviceptr( const_cast(a_host_pointer) ) ; #else @@ -171,7 +177,7 @@ namespace smilei { a_count * an_object_size, 0, 0, device_num, device_num ) != 0 ) { ERROR( "omp_target_memcpy failed" ); } -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) // It seems that the interface of ::acc_memcpy_device does not accept ptr to array of const type ! // https://www.openacc.org/sites/default/files/inline-files/OpenACC.2.7.pdf // void acc_memcpy_device( d_void* dest, d_void* src, size_t bytes ); diff --git a/src/Tools/gpu.h b/src/Tools/gpu.h index 28a8c98da..bc6552986 100644 --- a/src/Tools/gpu.h +++ b/src/Tools/gpu.h @@ -19,7 +19,7 @@ namespace smilei { #define SMILEI_ACCELERATOR_DECLARE_ROUTINE _Pragma( "omp declare target" ) #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END _Pragma( "omp end declare target" ) #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "omp atomic update" ) -#elif defined( SMILEI_OPENACC_MODE ) +#elif defined( SMILEI_ACCELERATOR_GPU_OACC ) #define SMILEI_ACCELERATOR_DECLARE_ROUTINE _Pragma( "acc routine seq" ) #define SMILEI_ACCELERATOR_DECLARE_ROUTINE_END #define SMILEI_ACCELERATOR_ATOMIC _Pragma( "acc atomic" ) diff --git a/src/Tools/gpuRandom.h b/src/Tools/gpuRandom.h index 916a7b8f8..bdb9aca59 100644 --- a/src/Tools/gpuRandom.h +++ b/src/Tools/gpuRandom.h @@ -1,7 +1,7 @@ #ifndef GPU_RANDOM #define GPU_RANDOM -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) // #include #include "curand_kernel.h" #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) @@ -29,7 +29,7 @@ namespace smilei { { protected: using State = -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) ::curandState_t; #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) // TODO @@ -42,7 +42,7 @@ namespace smilei { public: Random() -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) : a_state_{ 0xDEADBEEFU } #else @@ -53,26 +53,36 @@ namespace smilei { } // Initialization +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) void init( unsigned long long seed, unsigned long long seq, unsigned long long offset ) { -#if defined( SMILEI_OPENACC_MODE ) // Cuda generator initialization ::curand_init( seed, seq, offset, &a_state_ ); + } #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) + void init( unsigned long long seed, + unsigned long long , + unsigned long long ) + { // Hip generator initialization // ::hiprand_init( seed, seq, offset, &state ); a_state_ = State{ static_cast( seed ) }; + } #else + void init( unsigned long long seed, + unsigned long long , + unsigned long long ) + { a_state_ = State{ static_cast( seed ) }; -#endif } +#endif // Initialization double uniform() { -#if defined( SMILEI_OPENACC_MODE ) +#if defined( SMILEI_ACCELERATOR_GPU_OACC ) return ::curand_uniform( &a_state_ ); #elif defined( SMILEI_ACCELERATOR_GPU_OMP ) // TODO diff --git a/src/Tools/userFunctions.h b/src/Tools/userFunctions.h index 63753fb20..d9525723d 100755 --- a/src/Tools/userFunctions.h +++ b/src/Tools/userFunctions.h @@ -1,5 +1,5 @@ -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #include #endif @@ -36,7 +36,7 @@ class userFunctions //! \param array array in which to find the value //! \param elem element to be found //! \param nb_elem number of elements -#ifdef SMILEI_OPENACC_MODE +#ifdef SMILEI_ACCELERATOR_GPU_OACC #pragma acc routine seq #endif template diff --git a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py index ee807d65b..8d5b8ddb1 100644 --- a/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py +++ b/validation/analyses/validate_tst2d_v_o2_qed_cascade_vranic_cartesian.py @@ -247,7 +247,7 @@ def adaptive_error(values, statistics, thresholds): thresholds = {} thresholds["points"] = np.array([0. ,10 ,100,1000]) -thresholds["factor"] = np.array([1e9, 1.,0.5, 0.2]) +thresholds["factor"] = np.array([1e9, 1.,0.7, 0.2]) Validate("Average gamma for the electrons vs time", average_gamma["electron"], adaptive_error(average_gamma["electron"], Nelectron, thresholds)) Validate("Average gamma for the positrons vs time", average_gamma["positron"], adaptive_error(average_gamma["positron"], Npositron, thresholds)) diff --git a/validation/references/tst2d_04_laser_wake.py.txt b/validation/references/tst2d_04_laser_wake.py.txt index 48d9eaeca..094e7c366 100755 Binary files a/validation/references/tst2d_04_laser_wake.py.txt and b/validation/references/tst2d_04_laser_wake.py.txt differ