diff --git a/source/module_base/mathzone_add1.cpp b/source/module_base/mathzone_add1.cpp index 0ec48e29b7..64958f39d5 100644 --- a/source/module_base/mathzone_add1.cpp +++ b/source/module_base/mathzone_add1.cpp @@ -5,6 +5,7 @@ #include "constants.h" #include "global_function.h" #include "math_sphbes.h" +#include "module_base/memory.h" #if defined __FFTW2 #include "fftw.h" @@ -279,6 +280,7 @@ void Mathzone_Add1::Uni_Deriv_Phi fftw_complex *fft_phik = new fftw_complex[FFT_NR]; fftw_complex *fft_ndphik = new fftw_complex[FFT_NR]; fftw_complex *fft_ndphir = new fftw_complex[FFT_NR]; + ModuleBase::Memory::record("Mathzone_Add1::Uni_Deriv_Phi",sizeof(fftw_complex) * FFT_NR * 4); fftw_plan p1; fftw_plan p2; diff --git a/source/module_base/module_container/ATen/core/tensor.cpp b/source/module_base/module_container/ATen/core/tensor.cpp index 8cae92fcac..b3ad969871 100644 --- a/source/module_base/module_container/ATen/core/tensor.cpp +++ b/source/module_base/module_container/ATen/core/tensor.cpp @@ -2,6 +2,7 @@ #include #include #include +#include "module_base/memory.h" #if defined(__CUDA) || defined(__ROCM) #include #endif // __CUDA || __ROCM @@ -216,6 +217,21 @@ void Tensor::resize(const TensorShape& new_shape) { shape_ = new_shape; } +// Resize tensor object with the given tensor_shape ( Using for ModuleBase::Memory::record ) +void Tensor::resize(const TensorShape& new_shape, const std::string& record_str) { + if (shape_ == new_shape) { + return; + } + REQUIRES_OK(buffer_->OwnsMemory() || this->NumElements() == 0, + "Cannot resize a tensor that mapped from a given data buffer") + if (buffer_ && buffer_->GetAllocatedBytes() < new_shape.NumElements() * SizeOfType(data_type_)) { + buffer_->unref(); + this->buffer_ = new TensorBuffer(GetAllocator(device_), new_shape.NumElements() * SizeOfType(data_type_)); + ModuleBase::Memory::record(record_str,new_shape.NumElements() * SizeOfType(data_type_)); + } + shape_ = new_shape; +} + Tensor& Tensor::operator=(const Tensor& other) { if (this == &other) { return *this; diff --git a/source/module_base/module_container/ATen/core/tensor.h b/source/module_base/module_container/ATen/core/tensor.h index 3faf820dbd..580aca8ec7 100644 --- a/source/module_base/module_container/ATen/core/tensor.h +++ b/source/module_base/module_container/ATen/core/tensor.h @@ -350,6 +350,8 @@ class Tensor { */ void resize(const TensorShape& new_shape); + void resize(const TensorShape& new_shape, const std::string& record_str); + /** * @brief Get the Allocator object according to the given device type. * diff --git a/source/module_base/module_container/ATen/core/tensor_buffer.h b/source/module_base/module_container/ATen/core/tensor_buffer.h index 52b393901d..47de3ba8ee 100644 --- a/source/module_base/module_container/ATen/core/tensor_buffer.h +++ b/source/module_base/module_container/ATen/core/tensor_buffer.h @@ -4,6 +4,7 @@ #include #include #include +#include "module_base/memory.h" namespace container { diff --git a/source/module_base/module_mixing/broyden_mixing.cpp b/source/module_base/module_mixing/broyden_mixing.cpp index ba42ff4029..e567a117ed 100644 --- a/source/module_base/module_mixing/broyden_mixing.cpp +++ b/source/module_base/module_mixing/broyden_mixing.cpp @@ -45,6 +45,7 @@ void Broyden_Mixing::tem_push_data(Mixing_Data& mdata, // container::Tensor data = data_in + mixing_beta * F; std::vector data(length); + ModuleBase::Memory::record("Broyden_Mixing::F_tmp&data",sizeof(FPTYPE)*length*2); mix(data.data(), data_in, F_tmp.data()); mdata.push(data.data()); @@ -70,6 +71,7 @@ void Broyden_Mixing::tem_push_data(Mixing_Data& mdata, if (dF != nullptr) free(dF); dF = malloc(sizeof(FPTYPE) * length * mixing_ndim); + ModuleBase::Memory::record("Broyden_Mixing::F&DF",sizeof(FPTYPE)*length*(mixing_ndim+1)); FP_dF = static_cast(dF); #ifdef _OPENMP #pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE)) diff --git a/source/module_base/module_mixing/broyden_mixing.h b/source/module_base/module_mixing/broyden_mixing.h index d3fbbd82f3..4189b55384 100644 --- a/source/module_base/module_mixing/broyden_mixing.h +++ b/source/module_base/module_mixing/broyden_mixing.h @@ -2,6 +2,7 @@ #define BROYDEN_MIXING_H_ #include "mixing.h" #include "module_base/matrix.h" +#include "module_base/memory.h" namespace Base_Mixing { diff --git a/source/module_basis/module_nao/two_center_table.cpp b/source/module_basis/module_nao/two_center_table.cpp index 969e118699..72ef4dac60 100644 --- a/source/module_basis/module_nao/two_center_table.cpp +++ b/source/module_basis/module_nao/two_center_table.cpp @@ -9,6 +9,7 @@ #include "module_base/constants.h" #include "module_base/math_integral.h" #include "module_base/cubic_spline.h" +#include "module_base/memory.h" void TwoCenterTable::build(const RadialCollection& bra, const RadialCollection& ket, @@ -44,8 +45,8 @@ void TwoCenterTable::build(const RadialCollection& bra, ntab_ = 0; two_center_loop(bra, ket, &TwoCenterTable::_indexing); - table_.resize({ntab_, nr_}); - dtable_.resize({ntab_, nr_}); + table_.resize({ntab_, nr_},"TwoCenterTable::table_"); + dtable_.resize({ntab_, nr_},"TwoCenterTable::dtable_"); two_center_loop(bra, ket, &TwoCenterTable::_tabulate); } diff --git a/source/module_cell/module_symmetry/symmetry.cpp b/source/module_cell/module_symmetry/symmetry.cpp index 7aac4c92fb..9315797409 100644 --- a/source/module_cell/module_symmetry/symmetry.cpp +++ b/source/module_cell/module_symmetry/symmetry.cpp @@ -1,3 +1,4 @@ +#include #include #include #include "symmetry.h" @@ -5,6 +6,7 @@ #include "module_base/mathzone.h" #include "module_base/constants.h" #include "module_base/timer.h" +#include "module_base/memory.h" namespace ModuleSymmetry { @@ -300,6 +302,7 @@ int Symmetry::standard_lat( ModuleBase::Vector3 &c, double *cel_const) const { + // ModuleBase::TITLE("Symmetry", "standard_lat"); static bool first = true; // there are only 14 types of Bravais lattice. int type = 15; @@ -921,6 +924,7 @@ void Symmetry::checksym(ModuleBase::Matrix3 &s, ModuleBase::Vector3 >r // is a valid symmetry operation on a supercell //---------------------------------------------- // the start atom index. + ModuleBase::TITLE("Symmetry", "checksym"); bool no_diff = 0; ModuleBase::Vector3 trans(2.0, 2.0, 2.0); s_flag = 0; @@ -1094,6 +1098,7 @@ void Symmetry::checksym(ModuleBase::Matrix3 &s, ModuleBase::Vector3 >r void Symmetry::pricell(double* pos, const Atom* atoms) { + ModuleBase::TITLE("Symmetry", "pricell"); bool no_diff = 0; s_flag = 0; ptrans.clear(); @@ -1209,6 +1214,7 @@ void Symmetry::pricell(double* pos, const Atom* atoms) //sort ptrans: double* ptrans_array = new double[ntrans*3]; + ModuleBase::Memory::record("Symmetry::ptrans_array",sizeof(double)*ntrans*3); for(int i=0;i *rhogtot, int* ixyz2ipw, const int &nx, const int &ny, const int &nz, const int &fftnx, const int &fftny, const int &fftnz) { + ModuleBase::TITLE("Symmetry", "rhog_symmetry"); // if (GlobalV::test_symmetry)ModuleBase::TITLE("Symmetry","rho_symmetry"); ModuleBase::timer::tick("Symmetry","rhog_symmetry"); // ---------------------------------------------------------------------- @@ -1472,6 +1480,7 @@ void Symmetry::rhog_symmetry(std::complex *rhogtot, int(*isymflag)[48] = new int[fftnx*fftny*fftnz][48];//which rotration operation the grid corresponds to int(*table_xyz)[48] = new int[fftnx * fftny * fftnz][48];// group information int* count_xyz = new int[fftnx * fftny * fftnz];// how many symmetry operations has been covered + ModuleBase::Memory::record("Symmetry::rhog_symmetry",sizeof(int) *fftnx*fftny*fftnz*98); for (int i = 0; i < fftnx * fftny * fftnz; i++) { symflag[i] = -1; @@ -1766,6 +1775,7 @@ void Symmetry::symmetrize_vec3_nat(double* v)const // pengfei 2016-12-20 void Symmetry::symmetrize_mat3(ModuleBase::matrix& sigma, const Lattice& lat)const //zhengdy added 2017 { + ModuleBase::TITLE("Symmetry", "symmetrize_mat3"); ModuleBase::matrix A = lat.latvec.to_matrix(); ModuleBase::matrix AT = lat.latvec.Transpose().to_matrix(); ModuleBase::matrix invA = lat.GT.to_matrix(); @@ -1781,6 +1791,7 @@ void Symmetry::symmetrize_mat3(ModuleBase::matrix& sigma, const Lattice& lat)con void Symmetry::gmatrix_convert_int(const ModuleBase::Matrix3* sa, ModuleBase::Matrix3* sb, const int n, const ModuleBase::Matrix3 &a, const ModuleBase::Matrix3 &b) const { + ModuleBase::TITLE("Symmetry", "gmatrix_convert_int"); auto round = [](double x){return (x>0.0)?floor(x+0.5):ceil(x-0.5);}; ModuleBase::Matrix3 ai = a.Inverse(); ModuleBase::Matrix3 bi = b.Inverse(); @@ -1802,6 +1813,7 @@ void Symmetry::gmatrix_convert_int(const ModuleBase::Matrix3* sa, ModuleBase::Ma void Symmetry::gmatrix_convert(const ModuleBase::Matrix3* sa, ModuleBase::Matrix3* sb, const int n, const ModuleBase::Matrix3 &a, const ModuleBase::Matrix3 &b)const { + ModuleBase::TITLE("Symmetry", "gmatrix_convert"); ModuleBase::Matrix3 ai = a.Inverse(); ModuleBase::Matrix3 bi = b.Inverse(); for (int i=0;i* va, ModuleBase: } void Symmetry::gmatrix_invmap(const ModuleBase::Matrix3* s, const int n, int* invmap) { + ModuleBase::TITLE("Symmetry", "gmatrix_invmap"); ModuleBase::Matrix3 eig(1, 0, 0, 0, 1, 0, 0, 0, 1); ModuleBase::Matrix3 tmp; for (int i=0;i &a1, ModuleBase::Vector3 &a2, ModuleBase::Vector3 &a3) const { + ModuleBase::TITLE("Symmetry", "get_shortest_latvec"); double len1=a1.norm(); double len2=a2.norm(); double len3=a3.norm(); @@ -1891,6 +1905,7 @@ void Symmetry::get_optlat(ModuleBase::Vector3 &v1, ModuleBase::Vector3 &w2, ModuleBase::Vector3 &w3, int& real_brav, double* cel_const, double* tmp_const) const { + ModuleBase::TITLE("Symmetry", "get_optlat"); ModuleBase::Vector3 r1, r2, r3; double cos1 = 1; double cos2 = 1; @@ -2152,6 +2167,7 @@ bool Symmetry::magmom_same_check(const Atom* atoms)const bool Symmetry::is_all_movable(const Atom* atoms, const Statistics& st)const { + ModuleBase::TITLE("Symmetry", "is_all_movable"); bool all_mbl = true; for (int iat = 0;iat < st.nat;++iat) { diff --git a/source/module_elecstate/module_charge/charge_mixing.cpp b/source/module_elecstate/module_charge/charge_mixing.cpp index e327904d0e..e8e526fa95 100755 --- a/source/module_elecstate/module_charge/charge_mixing.cpp +++ b/source/module_elecstate/module_charge/charge_mixing.cpp @@ -8,6 +8,7 @@ #include "module_base/parallel_reduce.h" #include "module_base/timer.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" +#include "module_base/memory.h" Charge_Mixing::Charge_Mixing() { @@ -123,12 +124,14 @@ void Charge_Mixing::init_mixing() this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->npw * 2, sizeof(std::complex)); + ModuleBase::Memory::record("Charge_Mixing::init_mixing_data",sizeof(std::complex)*this->mixing->data_ndim*this->rhopw->npw * 2); } else { this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->npw * GlobalV::NSPIN, sizeof(std::complex)); + ModuleBase::Memory::record("Charge_Mixing::init_mixing_data",sizeof(std::complex)*this->mixing->data_ndim*this->rhopw->npw * GlobalV::NSPIN); } } else @@ -136,10 +139,12 @@ void Charge_Mixing::init_mixing() if (GlobalV::NSPIN == 4 && GlobalV::MIXING_ANGLE > 0 ) { this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * 2, sizeof(double)); + ModuleBase::Memory::record("Charge_Mixing::init_mixing_data",sizeof(double)*this->mixing->data_ndim*this->rhopw->nrxx * 2); } else { this->mixing->init_mixing_data(this->rho_mdata, this->rhopw->nrxx * GlobalV::NSPIN, sizeof(double)); + ModuleBase::Memory::record("Charge_Mixing::init_mixing_data",sizeof(double)*this->mixing->data_ndim*this->rhopw->nrxx * GlobalV::NSPIN); } } @@ -151,10 +156,12 @@ void Charge_Mixing::init_mixing() this->mixing->init_mixing_data(this->tau_mdata, this->rhopw->npw * GlobalV::NSPIN, sizeof(std::complex)); + ModuleBase::Memory::record("Charge_Mixing::init_mixing_data",sizeof(std::complex)*this->mixing->data_ndim*this->rhopw->npw * GlobalV::NSPIN); } else { this->mixing->init_mixing_data(this->tau_mdata, this->rhopw->nrxx * GlobalV::NSPIN, sizeof(double)); + ModuleBase::Memory::record("Charge_Mixing::init_mixing_data",sizeof(double)*this->mixing->data_ndim*this->rhopw->nrxx * GlobalV::NSPIN); } } @@ -185,6 +192,7 @@ void Charge_Mixing::allocate_mixing_dmr(int nnr) else if (GlobalV::SCF_THR_TYPE == 2) { this->mixing->init_mixing_data(this->dmr_mdata, nnr * dmr_nspin, sizeof(double)); + ModuleBase::Memory::record("Charge_Mixing::init_mixing_dat_dmr",sizeof(double)*this->mixing->data_ndim*nnr*dmr_nspin); } this->dmr_mdata.reset(); @@ -218,6 +226,7 @@ double Charge_Mixing::get_drho(Charge* chr, const double nelec) ModuleBase::GlobalFunc::NOTE("Calculate the charge difference between rho(G) and rho_save(G)"); std::vector> drhog(GlobalV::NSPIN * this->rhopw->npw); + ModuleBase::Memory::record("Charge_Mixing::drhog",sizeof(std::complex)*GlobalV::NSPIN * this->rhopw->npw); #ifdef _OPENMP #pragma omp parallel for collapse(2) schedule(static, 512) #endif @@ -343,6 +352,7 @@ void Charge_Mixing::mix_rho_recip(Charge* chr) // allocate rhog_mag[is*ngmc] and rhog_mag_save[is*ngmc] rhog_mag = new std::complex[npw * GlobalV::NSPIN]; rhog_mag_save = new std::complex[npw * GlobalV::NSPIN]; + ModuleBase::Memory::record("Charge_Mixing::rhog_mag*",sizeof(std::complex)*npw * GlobalV::NSPIN*2); ModuleBase::GlobalFunc::ZEROS(rhog_mag, npw * GlobalV::NSPIN); ModuleBase::GlobalFunc::ZEROS(rhog_mag_save, npw * GlobalV::NSPIN); // get rhog_mag[is*ngmc] and rhog_mag_save[is*ngmc] @@ -447,6 +457,7 @@ void Charge_Mixing::mix_rho_recip(Charge* chr) const int nrxx = this->rhopw->nrxx; double* rho_magabs = new double[nrxx]; double* rho_magabs_save = new double[nrxx]; + ModuleBase::Memory::record("Charge_Mixing::rho_magabs*",sizeof(double)*nrxx*2); ModuleBase::GlobalFunc::ZEROS(rho_magabs, nrxx); ModuleBase::GlobalFunc::ZEROS(rho_magabs_save, nrxx); // calculate rho_magabs and rho_magabs_save @@ -461,6 +472,7 @@ void Charge_Mixing::mix_rho_recip(Charge* chr) const int npw = this->rhopw->npw; std::complex* rhog_magabs = new std::complex[npw * 2]; std::complex* rhog_magabs_save = new std::complex[npw * 2]; + ModuleBase::Memory::record("Charge_Mixing::rhog_magabs_2*",sizeof(std::complex)*npw*4); ModuleBase::GlobalFunc::ZEROS(rhog_magabs, npw * 2); ModuleBase::GlobalFunc::ZEROS(rhog_magabs_save, npw * 2); // calculate rhog_magabs and rhog_magabs_save @@ -632,6 +644,7 @@ void Charge_Mixing::mix_rho_real(Charge* chr) // allocate rho_mag[is*nnrx] and rho_mag_save[is*nnrx] rho_mag = new double[nrxx * GlobalV::NSPIN]; rho_mag_save = new double[nrxx * GlobalV::NSPIN]; + ModuleBase::Memory::record("Charge_Mixing::rho_mag_2*",sizeof(double)*nrxx * GlobalV::NSPIN*2); ModuleBase::GlobalFunc::ZEROS(rho_mag, nrxx * GlobalV::NSPIN); ModuleBase::GlobalFunc::ZEROS(rho_mag_save, nrxx * GlobalV::NSPIN); // get rho_mag[is*nnrx] and rho_mag_save[is*nnrx] @@ -726,6 +739,7 @@ void Charge_Mixing::mix_rho_real(Charge* chr) // allocate memory for rho_magabs and rho_magabs_save double* rho_magabs = new double[nrxx * 2]; double* rho_magabs_save = new double[nrxx * 2]; + ModuleBase::Memory::record("Charge_Mixing::rho_magabs_real*",sizeof(double)*nrxx*4); ModuleBase::GlobalFunc::ZEROS(rho_magabs, nrxx * 2); ModuleBase::GlobalFunc::ZEROS(rho_magabs_save, nrxx * 2); // calculate rho_magabs and rho_magabs_save @@ -822,6 +836,7 @@ void Charge_Mixing::mix_dmr(elecstate::DensityMatrix* DM) // allocate dmr_mag[is*nnrx] and dmr_mag_save[is*nnrx] dmr_mag = new double[nnr * GlobalV::NSPIN]; dmr_mag_save = new double[nnr * GlobalV::NSPIN]; + ModuleBase::Memory::record("Charge_Mixing::dmr_mag*",sizeof(double)*nnr*GlobalV::NSPIN*2); ModuleBase::GlobalFunc::ZEROS(dmr_mag, nnr * GlobalV::NSPIN); ModuleBase::GlobalFunc::ZEROS(dmr_mag_save, nnr * GlobalV::NSPIN); double* dmr_up; @@ -921,6 +936,7 @@ void Charge_Mixing::mix_dmr(elecstate::DensityMatrix, doubl // allocate dmr_mag[is*nnrx] and dmr_mag_save[is*nnrx] dmr_mag = new double[nnr * GlobalV::NSPIN]; dmr_mag_save = new double[nnr * GlobalV::NSPIN]; + ModuleBase::Memory::record("Charge_Mixing::dmr_mag_2*",sizeof(double)*nnr*GlobalV::NSPIN*2); ModuleBase::GlobalFunc::ZEROS(dmr_mag, nnr * GlobalV::NSPIN); ModuleBase::GlobalFunc::ZEROS(dmr_mag_save, nnr * GlobalV::NSPIN); double* dmr_up; @@ -1015,6 +1031,7 @@ void Charge_Mixing::mix_rho(Charge* chr) // the charge before mixing. const int nrxx = chr->rhopw->nrxx; std::vector rho123(GlobalV::NSPIN * nrxx); + ModuleBase::Memory::record("Charge_Mixing::rho123",sizeof(double)*GlobalV::NSPIN * nrxx); for (int is = 0; is < GlobalV::NSPIN; ++is) { if (is == 0 || is == 3 || !GlobalV::DOMAG_Z) @@ -1262,6 +1279,7 @@ double Charge_Mixing::inner_product_recip_rho(std::complex* rho1, std::c std::complex** rhog1 = new std::complex*[GlobalV::NSPIN]; std::complex** rhog2 = new std::complex*[GlobalV::NSPIN]; + ModuleBase::Memory::record("Charge_Mixing::rhog1&2",sizeof(std::complex)*GlobalV::NSPIN*2); for (int is = 0; is < GlobalV::NSPIN; is++) { rhog1[is] = rho1 + is * this->rhopw->npw; @@ -1623,10 +1641,13 @@ void Charge_Mixing::divide_data(std::complex* data_d, const int ndims = this->rhopw->npw; const int ndimhf = ndimd - ndims; data_s = new std::complex[GlobalV::NSPIN * ndims]; + ModuleBase::Memory::record("Charge_Mixing::data_s",sizeof(std::complex)*GlobalV::NSPIN * ndims); data_hf = nullptr; if (ndimhf > 0) { data_hf = new std::complex[GlobalV::NSPIN * ndimhf]; + + ModuleBase::Memory::record("Charge_Mixing::data_hf",sizeof(std::complex)*GlobalV::NSPIN * ndimhf); } for (int is = 0; is < GlobalV::NSPIN; ++is) { diff --git a/source/module_elecstate/module_charge/symmetry_rhog.cpp b/source/module_elecstate/module_charge/symmetry_rhog.cpp index daf89c5212..2e341f90b1 100644 --- a/source/module_elecstate/module_charge/symmetry_rhog.cpp +++ b/source/module_elecstate/module_charge/symmetry_rhog.cpp @@ -24,6 +24,7 @@ void Symmetry_rho::psymmg(std::complex* rhog_part, const ModulePW::PW_Ba ModuleBase::GlobalFunc::ZEROS(rhogtot, rho_basis->npwtot); ig2isztot = new int[rho_basis->npwtot]; ModuleBase::GlobalFunc::ZEROS(rhogtot, rho_basis->npwtot); + ModuleBase::Memory::record("Symmetry_rho::psymmg",sizeof(std::complex)*rho_basis->npwtot + sizeof(int)*(rho_basis->npwtot+rho_basis->fftnxy)); } // find max_npw int max_npw=0; diff --git a/source/module_elecstate/potentials/H_Hartree_pw.cpp b/source/module_elecstate/potentials/H_Hartree_pw.cpp index 055c3f6ade..37d0be783b 100644 --- a/source/module_elecstate/potentials/H_Hartree_pw.cpp +++ b/source/module_elecstate/potentials/H_Hartree_pw.cpp @@ -3,6 +3,7 @@ #include "module_base/constants.h" #include "module_base/timer.h" #include "module_base/parallel_reduce.h" +#include "module_base/memory.h" namespace elecstate { @@ -22,6 +23,7 @@ ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell, // Hartree potential VH(r) from n(r) std::vector> Porter(rho_basis->nmaxgr); + ModuleBase::Memory::record("H_Hartree_pw::Porter",sizeof(std::complex)*rho_basis->nmaxgr); const int nspin0 = (nspin == 2) ? 2 : 1; for (int is = 0; is < nspin0; is++) { @@ -71,6 +73,7 @@ ModuleBase::matrix H_Hartree_pw::v_hartree(const UnitCell &cell, // Add hartree potential to the xc potential //========================================== ModuleBase::matrix v(nspin, rho_basis->nrxx); + ModuleBase::Memory::record("H_Hartree_pw::v_hartree",sizeof(double)*nspin*rho_basis->nrxx + sizeof(std::complex)*rho_basis->npw); if (nspin == 4) { #ifdef _OPENMP @@ -114,6 +117,7 @@ void PotHartree::cal_v_eff(const Charge* chg, const UnitCell* ucell, ModuleBase: rho_tmp[is][ir] = chg->rho[is][ir] + chg->nhat[is][ir]; } } + ModuleBase::Memory::record("PotHartree::rho_tmp",sizeof(double)*chg->nspin*rho_basis_->nrxx); v_eff += H_Hartree_pw::v_hartree(*ucell, const_cast(this->rho_basis_), v_eff.nr, rho_tmp); for(int is = 0; is < chg->nspin; is++) diff --git a/source/module_hamilt_general/module_xc/xc_functional_gradcorr.cpp b/source/module_hamilt_general/module_xc/xc_functional_gradcorr.cpp index 4d373ef950..dc3c0733d0 100644 --- a/source/module_hamilt_general/module_xc/xc_functional_gradcorr.cpp +++ b/source/module_hamilt_general/module_xc/xc_functional_gradcorr.cpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include "module_base/memory.h" // from gradcorr.f90 void XC_Functional::gradcorr(double &etxc, double &vtxc, ModuleBase::matrix &v, @@ -88,6 +90,10 @@ void XC_Functional::gradcorr(double &etxc, double &vtxc, ModuleBase::matrix &v, gdr1 = new ModuleBase::Vector3[rhopw->nrxx]; if(!is_stress) h1 = new ModuleBase::Vector3[rhopw->nrxx]; + // ModuleBase::Memory::record + size_t record_num = sizeof(ModuleBase::Vector3)*rhopw->nrxx + sizeof(std::complex)*rhopw->npw + sizeof(double)*rhopw->nrxx; + if(!is_stress) record_num += sizeof(ModuleBase::Vector3)*rhopw->nrxx; + ModuleBase::Memory::record("XC_Functional::gradcorr", record_num); XC_Functional::grad_rho( rhogsum1 , gdr1, rhopw, ucell->tpiba); @@ -114,7 +120,11 @@ void XC_Functional::gradcorr(double &etxc, double &vtxc, ModuleBase::matrix &v, gdr2 = new ModuleBase::Vector3[rhopw->nrxx]; if(!is_stress) h2 = new ModuleBase::Vector3[rhopw->nrxx]; - + // ModuleBase::Memory::record + record_num = sizeof(std::complex)*rhopw->npw + sizeof(double)*rhopw->nrxx + sizeof(ModuleBase::Vector3)*rhopw->nrxx; + if(!is_stress) record_num += sizeof(ModuleBase::Vector3)*rhopw->nrxx; + ModuleBase::Memory::record("XC_Functional::gradcorr_2", record_num); + XC_Functional::grad_rho( rhogsum2 , gdr2, rhopw, ucell->tpiba); } @@ -180,6 +190,10 @@ void XC_Functional::gradcorr(double &etxc, double &vtxc, ModuleBase::matrix &v, gdr2 = new ModuleBase::Vector3[rhopw->nrxx]; h2 = new ModuleBase::Vector3[rhopw->nrxx]; + // ModuleBase::Memory::record + record_num = sizeof(std::complex)*rhopw->npw + sizeof(double)*rhopw->nrxx*2 + sizeof(ModuleBase::Vector3)*2*rhopw->nrxx; + if(!is_stress) record_num = record_num + sizeof(double)*GlobalV::NSPIN*rhopw->nrxx + sizeof(double)*nspin0*rhopw->nrxx; + ModuleBase::Memory::record("XC_Functional::gradcorr_4",record_num); XC_Functional::grad_rho( rhogsum1 , gdr1, rhopw, ucell->tpiba); XC_Functional::grad_rho( rhogsum2 , gdr2, rhopw, ucell->tpiba); @@ -638,6 +652,7 @@ void XC_Functional::grad_rho(const std::complex* rhog, { std::complex *gdrtmp = new std::complex[rho_basis->nmaxgr]; + ModuleBase::Memory::record("XC_Functional::gdrtmp",sizeof(std::complex)*rho_basis->nmaxgr); // the formula is : rho(r)^prime = \int iG * rho(G)e^{iGr} dG for(int i = 0 ; i < 3 ; ++i) { @@ -668,6 +683,7 @@ void XC_Functional::grad_dot(const ModuleBase::Vector3 *h, double *dh, M { std::complex *aux = new std::complex[rho_basis->nmaxgr]; std::complex *gaux = new std::complex[rho_basis->npw]; + ModuleBase::Memory::record("XC_Functional::aux&gaux",sizeof(std::complex)*(rho_basis->npw + rho_basis->nmaxgr)); for(int i = 0 ; i < 3 ; ++i) { diff --git a/source/module_hamilt_general/module_xc/xc_functional_vxc.cpp b/source/module_hamilt_general/module_xc/xc_functional_vxc.cpp index be64d92962..87eb66e679 100644 --- a/source/module_hamilt_general/module_xc/xc_functional_vxc.cpp +++ b/source/module_hamilt_general/module_xc/xc_functional_vxc.cpp @@ -8,6 +8,7 @@ #include "module_base/parallel_reduce.h" #include "module_base/timer.h" + // [etxc, vtxc, v] = XC_Functional::v_xc(...) std::tuple XC_Functional::v_xc( const int &nrxx, // number of real-space grid diff --git a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp index 59359dc674..93ced9a2c4 100644 --- a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp @@ -2,6 +2,7 @@ #include "module_base/timer.h" #include "module_base/ylm.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" +#include "module_base/memory.h" void Gint::gint_kernel_force( const int na_grid, @@ -92,6 +93,10 @@ void Gint::gint_kernel_force( dpsir_ylm_yy.ptr_2D, dpsir_ylm_yz.ptr_2D, dpsir_ylm_zz.ptr_2D, svl_dphi); } + size_t record_data = sizeof(double)*this->bxyz*(LD_pool+1)*6; + if(isstress) record_data += sizeof(double)*this->bxyz*(LD_pool+1)*6; + ModuleBase::Memory::record("Gint::gint_kernel_force",record_data); + //release memories delete[] block_iw; delete[] block_index; @@ -308,6 +313,10 @@ void Gint::gint_kernel_force_meta( svl_dphi); } + size_t record_data = sizeof(double)*this->bxyz*(LD_pool+1)*18; + if(isstress) record_data += sizeof(double)*this->bxyz*(LD_pool+1)*6; + ModuleBase::Memory::record("Gint::gint_kernel_force_meta",record_data); + //release memories delete[] block_iw; delete[] block_index; diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp index b63b108411..75ec35ca8e 100644 --- a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp @@ -4,6 +4,7 @@ #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_base/ylm.h" #include "module_base/timer.h" +#include "module_base/memory.h" void Gint_Gamma::cal_env(const double* wfc, double* rho) { @@ -35,6 +36,7 @@ void Gint_Gamma::cal_env(const double* wfc, double* rho) //evaluate psi on grids Gint_Tools::Array_Pool psir_ylm(this->bxyz, LD_pool); + ModuleBase::Memory::record("Gint_Gamma::cal_env",sizeof(double)*this->bxyz*(LD_pool+1)); Gint_Tools::cal_psir_ylm(*this->gridt, this->bxyz, size, grid_index, delta_r, diff --git a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp index 33c22526bf..c2070510f7 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp @@ -4,6 +4,7 @@ #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_base/ylm.h" #include "module_base/timer.h" +#include "module_base/memory.h" void Gint_k::cal_env_k(int ik, const std::complex* psi_k, @@ -45,6 +46,7 @@ void Gint_k::cal_env_k(int ik, //evaluate psi on grids Gint_Tools::Array_Pool psir_ylm(this->bxyz, LD_pool); + ModuleBase::Memory::record("Gint_k:cal_env_k",sizeof(double)*this->bxyz*(LD_pool+1)); Gint_Tools::cal_psir_ylm(*this->gridt, this->bxyz, size, grid_index, delta_r, block_index, block_size, diff --git a/source/module_hamilt_lcao/module_gint/gint_rho.cpp b/source/module_hamilt_lcao/module_gint/gint_rho.cpp index 4f9a58b64f..e079287cb7 100644 --- a/source/module_hamilt_lcao/module_gint/gint_rho.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_rho.cpp @@ -8,6 +8,8 @@ #include "module_base/blas_connector.h" #include "module_base/timer.h" #include "gint_tools.h" +#include "module_base/memory.h" + void Gint::gint_kernel_rho( const int na_grid, @@ -33,6 +35,7 @@ void Gint::gint_kernel_rho( for(int is=0; is psir_DM(this->bxyz, LD_pool); + ModuleBase::Memory::record("Gint_Tools::gint_kernel_rho",sizeof(double)*this->bxyz*(LD_pool+1)*2); ModuleBase::GlobalFunc::ZEROS(psir_DM.ptr_1D, this->bxyz*LD_pool); if(GlobalV::GAMMA_ONLY_LOCAL) { diff --git a/source/module_hamilt_lcao/module_gint/gint_tau.cpp b/source/module_hamilt_lcao/module_gint/gint_tau.cpp index f6f82f8a1c..216791a12f 100644 --- a/source/module_hamilt_lcao/module_gint/gint_tau.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_tau.cpp @@ -8,6 +8,7 @@ #include "module_base/blas_connector.h" #include "module_base/timer.h" #include "gint_tools.h" +#include "module_base/memory.h" void Gint::gint_kernel_tau( const int na_grid, @@ -43,6 +44,7 @@ void Gint::gint_kernel_tau( Gint_Tools::Array_Pool dpsix_DM(this->bxyz, LD_pool); Gint_Tools::Array_Pool dpsiy_DM(this->bxyz, LD_pool); Gint_Tools::Array_Pool dpsiz_DM(this->bxyz, LD_pool); + ModuleBase::Memory::record("Gint::gint_kernel_tau",sizeof(double)*this->bxyz*(LD_pool+1)*7); ModuleBase::GlobalFunc::ZEROS(dpsix_DM.ptr_1D, this->bxyz*LD_pool); ModuleBase::GlobalFunc::ZEROS(dpsiy_DM.ptr_1D, this->bxyz*LD_pool); ModuleBase::GlobalFunc::ZEROS(dpsiz_DM.ptr_1D, this->bxyz*LD_pool); diff --git a/source/module_hamilt_lcao/module_gint/gint_tools.cpp b/source/module_hamilt_lcao/module_gint/gint_tools.cpp index a8756ef402..1a8cad5ed5 100644 --- a/source/module_hamilt_lcao/module_gint/gint_tools.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_tools.cpp @@ -9,6 +9,7 @@ #include "module_base/ylm.h" #include "module_basis/module_ao/ORB_read.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" +#include "module_base/memory.h" namespace Gint_Tools { diff --git a/source/module_hamilt_lcao/module_gint/gint_vl.cpp b/source/module_hamilt_lcao/module_gint/gint_vl.cpp index cf443ed75e..5e83683cc2 100644 --- a/source/module_hamilt_lcao/module_gint/gint_vl.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_vl.cpp @@ -7,6 +7,7 @@ #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_base/blas_connector.h" #include "module_base/timer.h" +#include "module_base/memory.h" //#include #ifdef _OPENMP @@ -42,7 +43,7 @@ void Gint::gint_kernel_vlocal( //calculating f_mu(r) = v(r)*psi_mu(r)*dv const Gint_Tools::Array_Pool psir_vlbr3 = Gint_Tools::get_psir_vlbr3( this->bxyz, na_grid, LD_pool, block_index, cal_flag, vldr3, psir_ylm.ptr_2D); - + ModuleBase::Memory::record("Gint::gint_kernel_vlocal",sizeof(double)*this->bxyz*(LD_pool+1)*2); //integrate (psi_mu*v(r)*dv) * psi_nu on grid //and accumulates to the corresponding element in Hamiltonian if(GlobalV::GAMMA_ONLY_LOCAL) @@ -99,7 +100,7 @@ void Gint::gint_kernel_dvlocal( //calculating f_mu(r) = v(r)*psi_mu(r)*dv const Gint_Tools::Array_Pool psir_vlbr3 = Gint_Tools::get_psir_vlbr3( this->bxyz, na_grid, LD_pool, block_index, cal_flag, vldr3, psir_ylm.ptr_2D); - + ModuleBase::Memory::record("Gint::gint_kernel_dvlocal",sizeof(double)*this->bxyz*(LD_pool+1)*5); //integrate (psi_mu*v(r)*dv) * psi_nu on grid //and accumulates to the corresponding element in Hamiltonian this->cal_meshball_vlocal_k( @@ -167,6 +168,7 @@ void Gint::gint_kernel_vlocal_meta( this->bxyz, na_grid, LD_pool, block_index, cal_flag, vkdr3, dpsir_ylm_y.ptr_2D); const Gint_Tools::Array_Pool dpsiz_vlbr3 = Gint_Tools::get_psir_vlbr3( this->bxyz, na_grid, LD_pool, block_index, cal_flag, vkdr3, dpsir_ylm_z.ptr_2D); + ModuleBase::Memory::record("Gint::gint_kernel_vlocal_meta",sizeof(double)*this->bxyz*(LD_pool+1)*8); if(GlobalV::GAMMA_ONLY_LOCAL) { diff --git a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp index 37fa28291c..aafe0bdcb0 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp @@ -443,6 +443,7 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, const int& ik, std::complex resmem_var_op()(ctx, vkb1, nhm * npw, "VNL::vkb1"); ModuleBase::Vector3* _gk = new ModuleBase::Vector3[npw]; + ModuleBase::Memory::record("pseudopot_cell_vnl::_gk&atom",sizeof(ModuleBase::Vector3)*npw+sizeof(int)*GlobalC::ucell.ntype*3); #ifdef _OPENMP #pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE)) #endif @@ -1512,6 +1513,7 @@ void pseudopot_cell_vnl::newq(const ModuleBase::matrix& veff, const ModulePW::PW ModuleBase::YlmReal::Ylm_Real(lmaxq * lmaxq, npw, rho_basis->gcar, ylmk0); double* qnorm = new double[npw]; + ModuleBase::Memory::record("pseudopot_cell_vnl::ylmk0&qnorm",sizeof(double)*lmaxq*lmaxq*npw + sizeof(double)*npw); for (int ig = 0; ig < npw; ig++) { qnorm[ig] = rho_basis->gcar[ig].norm() * cell.tpiba; diff --git a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp index 7c2c15321f..a49d8cee5c 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/forces.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/forces.cpp @@ -15,6 +15,7 @@ #include "module_hamilt_general/module_surchem/surchem.h" #include "module_hamilt_general/module_vdw/vdw.h" #include "module_psi/kernels/device.h" + #ifdef _OPENMP #include #endif diff --git a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp index 7fda615edc..0d8daa1e79 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/stress_func_nl.cpp @@ -56,6 +56,7 @@ void Stress_Func::stress_nl(ModuleBase::matrix& sigma, std::complex *dbecp = nullptr, *becp = nullptr, *dbecp_noevc = nullptr, *vkb = nullptr, *pvkb0 = nullptr, *vkb1 = nullptr, *pvkb2 = nullptr; std::complex *_vkb0[3] = {nullptr, nullptr, nullptr}; + ModuleBase::Memory::record("Stree_func::ComplexMatrix",sizeof(double)*nkb*npwx*4); resmem_complex_op()(this->ctx, becp, GlobalV::NBANDS * nkb, "Stress::becp"); resmem_complex_op()(this->ctx, dbecp, GlobalV::NBANDS * nkb, "Stress::dbecp"); resmem_complex_op()(this->ctx, dbecp_noevc, nkb * npwx, "Stress::dbecp_noevc"); diff --git a/source/module_hamilt_pw/hamilt_pwdft/wavefunc.cpp b/source/module_hamilt_pw/hamilt_pwdft/wavefunc.cpp index c10bb386e4..f9845dc451 100644 --- a/source/module_hamilt_pw/hamilt_pwdft/wavefunc.cpp +++ b/source/module_hamilt_pw/hamilt_pwdft/wavefunc.cpp @@ -168,6 +168,7 @@ void diago_PAO_in_pw_k2(const int &ik, if (p_wf->init_wfc == "file") { ModuleBase::ComplexMatrix wfcatom(nbands, nbasis); + ModuleBase::Memory::record("wavefunc::wfcatom",nbands * nbasis * sizeof(std::complex)); std::stringstream filename; filename << GlobalV::global_readin_dir << "WAVEFUNC" << ik + 1 << ".dat"; bool result = ModuleIO::read_wfc_pw(filename.str(), wfc_basis, ik, p_wf->nkstot, wfcatom); @@ -175,6 +176,7 @@ void diago_PAO_in_pw_k2(const int &ik, if (result) { std::vector> s_wfcatom(nbands * nbasis); + ModuleBase::Memory::record("wavefunc::s_wfcatom",nbands * nbasis * sizeof(std::complex)); castmem_z2c_h2h_op()(cpu_ctx, cpu_ctx, s_wfcatom.data(), wfcatom.c, nbands * nbasis); if (GlobalV::KS_SOLVER == "cg") @@ -255,6 +257,7 @@ void diago_PAO_in_pw_k2(const int &ik, else if(p_wf->init_wfc.substr(0,6)=="atomic") { ModuleBase::ComplexMatrix wfcatom(starting_nw, nbasis);//added by zhengdy-soc + ModuleBase::Memory::record("wavefunc::wfcatom",starting_nw * nbasis * sizeof(std::complex)); if(GlobalV::test_wf)ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "starting_nw", starting_nw); p_wf->atomic_wfc(ik, current_nbasis, GlobalC::ucell.lmax_ppwf, wfc_basis, wfcatom, GlobalC::ppcell.tab_at, GlobalV::NQX, GlobalV::DQ); @@ -271,6 +274,7 @@ void diago_PAO_in_pw_k2(const int &ik, // (7) Diago with cg method. std::vector> s_wfcatom(starting_nw * nbasis); + ModuleBase::Memory::record("wavefunc::s_wfcatom",starting_nw * nbasis * sizeof(std::complex)); castmem_z2c_h2h_op()(cpu_ctx, cpu_ctx, s_wfcatom.data(), wfcatom.c, starting_nw * nbasis); //if(GlobalV::DIAGO_TYPE == "cg") xiaohui modify 2013-09-02 if(GlobalV::KS_SOLVER=="cg") //xiaohui add 2013-09-02 @@ -323,6 +327,7 @@ void diago_PAO_in_pw_k2(const int &ik, if (p_wf->init_wfc == "file") { ModuleBase::ComplexMatrix wfcatom(nbands, nbasis); + ModuleBase::Memory::record("wavefunc::wfcatom",nbands * nbasis * sizeof(std::complex)); std::stringstream filename; filename << GlobalV::global_readin_dir << "WAVEFUNC" << ik + 1 << ".dat"; bool result = ModuleIO::read_wfc_pw(filename.str(), wfc_basis, ik, p_wf->nkstot, wfcatom); @@ -406,6 +411,7 @@ void diago_PAO_in_pw_k2(const int &ik, else if (p_wf->init_wfc.substr(0, 6) == "atomic") { ModuleBase::ComplexMatrix wfcatom(starting_nw, nbasis); // added by zhengdy-soc + ModuleBase::Memory::record("wavefunc::wfcatom",starting_nw * nbasis * sizeof(std::complex)); if (GlobalV::test_wf) ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "starting_nw", starting_nw);