diff --git a/Makefile.in b/Makefile.in index 1ee21c5..65932a0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -8,10 +8,6 @@ RESULT = nbody6++@RESULT@ INSTALLDIR = @prefix@ -EXTRATOOLS = nb6++dumpb2a libinitial.so libnb6out3.so nb6++snapshot -ifeq ("x@EXTRARESULT@", "x $(EXTRATOOLS)") -EXTRAOBJS = installtools -endif VPATH=./build @@ -19,29 +15,16 @@ $(RESULT) @EXTRARESULT@: $(MAKE) -C ./build clean: - rm -f ./build/*.o ./build/*.so ./build/$(RESULT) $(EXTRATARGET) + rm -f ./build/*.o ./build/*.so ./build/$(RESULT) install: $(RESULT) $(EXTRAOBJS) @[ -d $(INSTALLDIR)/bin ] || mkdir $(INSTALLDIR)/bin @[ -d $(INSTALLDIR)/share ] || mkdir $(INSTALLDIR)/share @[ -d $(INSTALLDIR)/share/doc ] || mkdir $(INSTALLDIR)/share/doc cp ./build/$(RESULT) $(INSTALLDIR)/bin - cp ./doc/nbody6++_manual.pdf $(INSTALLDIR)/share/doc/ - - -installtools: @EXTRARESULT@ - @[ -d $(INSTALLDIR)/bin ] || mkdir $(INSTALLDIR)/bin - @[ -d $(INSTALLDIR)/include ] || mkdir $(INSTALLDIR)/include - @[ -d $(INSTALLDIR)/lib ] || mkdir $(INSTALLDIR)/lib - cp ./build/nb6++dumpb2a $(INSTALLDIR)/bin - cp ./build/nb6++snapshot $(INSTALLDIR)/bin - cp ./include/initial.h $(INSTALLDIR)/include - cp ./build/libinitial.so $(INSTALLDIR)/lib - cp ./include/nb6out3.h $(INSTALLDIR)/include - cp ./build/libnb6out3.so $(INSTALLDIR)/lib + cp ./doc/nbody6++_manual.* $(INSTALLDIR)/share/doc/ + ln -sf $(INSTALLDIR)/bin/$(RESULT) $(INSTALLDIR)/bin/nbody6++ uninstall: rm -f $(INSTALLDIR)/bin/$(RESULT) - rm -f $(INSTALLDIR)/lib/libinitial.so $(INSTALLDIR)/lib/libnb6out3.so - rm -f $(INSTALLDIR)/share/doc/nbody6++_manual.pdf - rm -f $(INSTALLDIR)/bin/nb6++dumpb2a $(INSTALLDIR)/bin/nb6++snapshot + rm -f $(INSTALLDIR)/share/doc/nbody6++_manual.* diff --git a/README.md b/README.md index 393e51c..8c26e03 100644 --- a/README.md +++ b/README.md @@ -39,78 +39,112 @@ slightly different ways. git clone git@github.com:nbody6ppgpu/Nbody6PPGPU-beijing ``` 1. This downloads the `stable` branch. The `stable` branch include major versions, and the `dev` branch include the most recent updates and bugfix. Changes in `dev` branch are merged to `stable` regularly. -2. If you want the most recent version (may contain bugs), use `git clone -b dev git@github.com:nbody6ppgpu/Nbody6PPGPU-beijing`, or run `git switch dev` after you `clone` without `-b dev` param. +2. If you want the most recent version, use +``` bash +git clone -b dev git@github.com:nbody6ppgpu/Nbody6PPGPU-beijing +``` +or run `git switch dev` after you `clone` without `-b dev` param. ## Configure for compile ```bash -./configure --with-par=b1m --enable-simd=sse --enable-mcmodel=large +./configure [options] ``` -1. If you run NBODY6++GPU on your personal computer or workstation rather than computer clusters, MPI can be disabled by append `--disable-mpi` to the command above. -2. In the following cases, you may need to append `--disable-gpu` +0. TL;DR: to quickly start on your personal computer, you may use `./configure --enable-mcmodel=large --with-par=b1m --disable-gpu --disable-mpi`, and jump to the next section [Compile the code](#Compile-the-code) +1. We recommend using `--enable-mcmodel=large` to allows the program to use much resources. +2. `--with-par=b1m` allows up to 1 million particle simulation. In case that your computer has very small memory (<4GB) and your star cluster has a small particle number, you may use smaller value (check ./configure --help for possible value for `--with-par`) +3. If you run NBODY6++GPU on your personal computer or workstation rather than computer clusters, MPI can be disabled by append `--disable-mpi` to the command above. +4. In the following cases, you may need to append `--disable-gpu` - The computer has no NVIDIA GPU - The computer has NVIDIA GPU but did not install CUDA compiler (Test: type `nvcc --version` in your terminal. If you see information about NVIDIA compiler, then it is installed. If you see errors like "nvcc: command not found" then it is not installed) -- Your simulation has relatively small particle number (< 50000). The code is for up to one million bodies with many initial binaries. In the case of small particle number, GPU can hardly boost the simulation and can sometimes slow it down. - -The configure script written by Long Wang has a multitude of further options, check with `./configure --help` or feel free to ask any question in [our discussion](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/discussions). +- Your simulation has relatively small particle number (<50000). The code is for up to one million bodies with many initial binaries. In the case of small particle number, GPU can hardly boost the simulation and can sometimes slow it down. +5. You may set `--prefix=[install path]` to specify the location to install the executable. +6. HDF5 is an efficient storage scheme, which is useful during large-scale or long-time simulations to boost the simulation and save disk spaces. Once enabled, the basic particle data (mass, position, velocity) and stellar evolution data will be stored in `.h5part` files, which may need extra tools to read. HDF5 is recommended but not necessary. You need to install additional libraries to use HDF5. For example, in Debian based Linux `sudo apt-get install libhdf5-openmpi-dev libhdf5-dev`. After that, append `--enable-hdf5` in configure command. +7. The configure script written by Long Wang has a multitude of further options, check with `./configure --help` or feel free to ask any question in [our discussion](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/discussions). -## Additional installation options +## Compile the code -HDF5 is an efficient storage scheme, which is useful during long-time simulations to boost the simulation and save disk spaces. Nevertheless, it is recommended but not necessary. To use HDF5, make sure it is installed in your computer. For example, in Debian based Linux, ```bash -apt-get install libhdf5-openmpi-dev -apt-get install libhdf5-dev +make clean +make -j ``` -after that, append `--enable-hdf5` in configure command -## Compile the code +After `make` you can find the executable in `build/`, named `nbody6++.[configure-options]`, where the suffix depends on your configure option (MPI, GPU, HDF5, SIMD, etc), for example `nbody6++.avx.mpi.gpu` + +If you have specified `--prefix=[install path]` during configure, you may want ```bash -make clean ; make -j +make install ``` - -After make you find the executable and object files in `build/`, named `nbody6++.sse.mpi.gpu`. The suffix may change with different compilation options. For example, if you have `--disable-mpi --disable-gpu` during configure, the executable may be named `nbody6++.sse` +and add the installation path to your `$PATH` environment variable. # Ready for your simulation -1. Copy the executable to the simulation directory you want +1. (If you have done `make install` you can skip this step) Copy the executable to the simulation directory you want -```bash -cp `ls build/nbody6++*` [your_simulation_path] -``` + ```bash + cp `ls build/nbody6++*` [your_simulation_dir] + ``` -2. Prepare an initial condition file. For a test run, you can find example initial conditions in `examples/input_files`. Copy `N100k.inp` and `dat.10` to your simulation path +2. Prepare an initial condition file. For a test run, you can find example initial conditions in `examples/input_files`. -```bash -cp examples/input_files/N100k.inp [your_simulation_path] -cp examples/input_files/dat.10 [your_simulation_path] -``` + ```bash + cp examples/input_files/N10k_noDat10.inp [your_simulation_dir] + ``` -> 💡 Starting from the stable version [May2023](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/releases/tag/May2023), NBODY6++GPU changes to a fundamentally new and more flexible method of reading input data (control data, not particle data). It uses Fortran NAMELIST input, which has a key=value format. All input data can be given in any order. If you are using a old-format input file, you can use the bash script which transform the old input file into the new one ([examples/input_files/@input-transform](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/blob/stable/examples/input_files/%40input-transform)) to transform it to the new NAMELIST format. See usage inside the script. + This input file let NBODY6++GPU generate a star cluster with 10000 stars with Plummer model, and simulate for only 2 Myr. You can also find `N100k.inp` and its pre-generated initial particle data `dat.10` in `examples/input_files` for a 100,000 stars, 1 Gyr simulation. -3. Finally, run it + > 💡 Starting from the stable version [May2023](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/releases/tag/May2023), NBODY6++GPU changes to a fundamentally new and more flexible method of reading input data (control data, not particle data). It uses Fortran NAMELIST input, which has a key=value format. All input data can be given in any order. If you are using a old-format input file, you can use the bash script which transform the old input file into the new one ([examples/input_files/@input-transform](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/blob/stable/examples/input_files/%40input-transform)) to transform it to the new NAMELIST format. See usage inside the script. -```bash -cd [your_simulation_path] -nbody6++.sse < N100k.inp -``` -Don't forget to replace `nbody6++.sse` with the name of your executable +3. CPU and memory -# Data analysis -Some Jupyter notebooks for simple data analysis are provided in [examples/](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/tree/stable/examples). You can check [the readme file there](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/tree/stable/examples) to get started. + In simulations with large particle number, segmentation fault may happen. To avoid this, we recommend setting a large `OMP_STACKSIZE` and disable the memory limitation. + ```bash + export OMP_STACKSIZE=4096M + ulimit -s unlimited + ``` + + By default, the program uses all CPU threads (which is usually 2 × number of CPU cores). For better performance, `OMP_NUM_THREADS` should not be too large, and cannot go beyond 32. In case you want to use fewer threads, especially when your computer has more than 32 cores (per node), you need to restrict `OMP_NUM_THREADS` + ```bash + export OMP_NUM_THREADS=[N_threads] + ``` + + After running them, you may want to add the these 3 commands to your shell initial file like `~/.bashrc`. + +4. Finally, run it + + ```bash + cd [your_simulation_path] + ``` + + If you have done `make install` and add the installation path to `$PATH`, run + ```bash + nbody6++ < N100k.inp + ``` + + otherwise you may have copied the executable to the simulation path, run + ```bash + ./[your executable filename] < N100k.inp + ``` # Documentation -For any further questions, read the documentations at +To understand the diagnostic information and columns of each output file, please read the documentations at https://www.overleaf.com/read/hcmxcyffjkzq -or ask any question in [our discussion](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/discussions) + +You are also welcomed to ask any question in [our discussion](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/discussions) + +# Data analysis +Some Jupyter notebooks for simple data analysis are provided in [examples/](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/tree/stable/examples). You can check [the readme file there](https://github.com/nbody6ppgpu/Nbody6PPGPU-beijing/tree/stable/examples) to get started. # Tips + - Before a simulation, it is always recommended to set `ulimit -s unlimited` before the simulation to avoid segmentation fault. + - The environment variable OMP_NUM_THREADS has to be set to the desired value of OpenMP threads per MPI process. (Maybe your system has it predefined). I also recommend to set OMP_STACKSIZE=4096M the shell where you run the code. - It is inefficient (and even more error prone) for particle numbers below about 50k-100k particles (depending on hardware). For smaller N you are advised to disable GPU, or use Nbody6 and Nbody6GPU for single node/process. - - It is recommended to provide a dat.10 file in N-body input format (see manual). Such file can be produced by other programs, like mcluster. + - It is recommended to provide a dat.10 file in N-body input format (see manual). Such file can be produced by other programs, like McLuster. # Seleted References: - https://ui.adsabs.harvard.edu/abs/1999PASP..111.1333A/abstract (Aarseth: NBODY1 to NBODY6) diff --git a/configure b/configure index 11322ec..a141cce 100755 --- a/configure +++ b/configure @@ -702,7 +702,6 @@ enable_gpu enable_simd enable_hdf5 enable_tt -enable_tools enable_openmp with_cuda with_cuda_sdk @@ -1352,8 +1351,6 @@ Optional Features: extension imported from NBODY6TT. (Ref: Renaud, F., et al., 2011, MNRAS, 418, 759; Renaud, F. & Gieles, M., 2015, MNRAS, 448, 3416) - --enable-tools Compile extral tools for data analysis, see - User_manual.pdf for detail --disable-openmp do not use OpenMP Optional Packages: @@ -2508,14 +2505,6 @@ else fi -# Check whether --enable-tools was given. -if test "${enable_tools+set}" = set; then : - enableval=$enable_tools; enable_tools=yes -else - enable_tools=no -fi - - # Checks for programs. # Fortran part @@ -6205,15 +6194,6 @@ fi fi -# Extral tools -if test "x$enable_tools" != xno; then : - EXTRARESULT=' $(EXTRATOOLS)' -else - EXTRARESULT='' -fi - - - @@ -6272,8 +6252,6 @@ $as_echo "$as_me: CUDA compiler: $NVCC" >&6;} $as_echo "$as_me: --Targets:" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Program name: nbody6++$RESULT" >&5 $as_echo "$as_me: Program name: nbody6++$RESULT" >&6;} -{ $as_echo "$as_me:${as_lineno-$LINENO}: Extra tools: $enable_tools" >&5 -$as_echo "$as_me: Extra tools: $enable_tools" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Install path: $prefix" >&5 $as_echo "$as_me: Install path: $prefix" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: --Parameters:" >&5 @@ -6288,16 +6266,6 @@ $as_echo "$as_me: LMAX: $LMAX" >&6;} $as_echo "$as_me: MMAX: $MMAX" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: -----------------------------------------------------------" >&5 $as_echo "$as_me: -----------------------------------------------------------" >&6;} -if test "x$enable_tools" != xno; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: Please add $prefix/lib into your runtime load library path:" >&5 -$as_echo "$as_me: Please add $prefix/lib into your runtime load library path:" >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:$prefix/lib" >&5 -$as_echo "$as_me: LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:$prefix/lib" >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: in your pre-configure script of shell (such as ~/.bashrc if you use bash)" >&5 -$as_echo "$as_me: in your pre-configure script of shell (such as ~/.bashrc if you use bash)" >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: -----------------------------------------------------------" >&5 -$as_echo "$as_me: -----------------------------------------------------------" >&6;} -fi { $as_echo "$as_me:${as_lineno-$LINENO}: Reference paper: Wang, L., et al., 2015, MNRAS, 450, 4070 (http://arxiv.org/abs/1504.03687)" >&5 $as_echo "$as_me: Reference paper: Wang, L., et al., 2015, MNRAS, 450, 4070 (http://arxiv.org/abs/1504.03687)" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Email: Rainer Spurzem " >&5 diff --git a/examples/input_files/N10k_noDat10.inp b/examples/input_files/N10k_noDat10.inp new file mode 100644 index 0000000..7ae38ae --- /dev/null +++ b/examples/input_files/N10k_noDat10.inp @@ -0,0 +1,37 @@ +&INNBODY6 +KSTART=1,TCOMP=1.0E8,TCRTP0=2,isernb=40,iserreg=40,iserks=0 / + +&ININPUT +N=1000,NFIX=1,NCRIT=10,NRAND=43532,NNBOPT=80,NRUN=1,NCOMM=10, +ETAI=0.01,ETAR=0.01,RS0=0.15,DTADJ=1.0,DELTAT=1.0,TCRIT=1000.0,QE=1.0,RBAR=1.223,ZMBAR=0.7, +KZ(1:10)= 1 1 1 0 1 0 3 2 3 2 +KZ(11:20)=0 1 0 2 2 0 0 0 3 6 +KZ(21:30)=1 1 2 0 2 2 3 2 0 2 +KZ(31:40)=1 0 2 2 1 0 1 1 2 1 +KZ(41:50)=0 0 0 0 0 4 3 0 3 0 , +DTMIN=2.5E-6,RMIN=8.E-5,ETAU=0.1,ECLOSE=1.0,GMIN=1.0E-06,GMAX=0.01,SMAX=1.0, +Level='C' / + +&INSSE / + +&INBSE / + +&INCOLL / + +&INDATA +ALPHAS=2.35,BODY1=150.0,BODYN=0.08,NBIN0=0,NHI0=0,ZMET=0.001,EPOCH0=0,DTPLOT=1.0 / + +&INSETUP SEMI=,ECC=,APO=,N2=,SCALE=,ZM1=,ZM2,ZMH,RCUT= / + +&INSCALE +Q=0.5,VXROT=0.0,VZROT=0.0,RTIDE=0.0 / + +&INXTRNL0 +GMG=1.78E11,RG0=13.3,DISK=,A=,B=,VCIRC=,RCIRC=,GMB=,AR=,GAM=,RG=,,,VG=,,,MP=,AP2=,MPDOT=,TDELAY= / + +&INBINPOP +SEMI0=0.0005,ECC0=-1.0,RATIO=1.0,RANGE=5.0,NSKIP=5,IDORM=0 / + +&INHIPOP +SEMI0=,ECC0=,RATIO=,RANGE= / +