From 04655b65b9ee71d6d5b62c5761d1aaafd9496080 Mon Sep 17 00:00:00 2001
From: Ryo KOBAYASHI <ryo.kbys@gmail.com>
Date: Sun, 5 May 2024 18:39:53 +0900
Subject: [PATCH] fix: a fatal performace issue in PMD_ASE_Calculator.

---
 nappy/interface/ase/pmdrun.py | 87 ++++++++++++++++++++++++-----------
 nappy/io.py                   | 12 +++--
 nappy/pmd/__init__.py         | 12 ++++-
 nappy/pmd/pmd_wrapper.F90     |  8 ++--
 pmd/force_Morse.F90           |  2 +-
 pmd/mod_pairlist.F90          |  9 ++--
 pmd/pmd_core.F90              | 17 +++----
 7 files changed, 99 insertions(+), 48 deletions(-)

diff --git a/nappy/interface/ase/pmdrun.py b/nappy/interface/ase/pmdrun.py
index 2939765..a518708 100644
--- a/nappy/interface/ase/pmdrun.py
+++ b/nappy/interface/ase/pmdrun.py
@@ -1,8 +1,9 @@
 """
 ASE interface to pmd.
-"""
-from __future__ import print_function
 
+Changes:
+  <2024.05.04 RK>  Changed to call pmd via f2py and dynamic library instead of via subprocess.
+"""
 import os
 import subprocess
 import numpy as np
@@ -12,15 +13,30 @@
 from nappy.interface.ase.pmdio import get_fmvs
 from nappy.napsys import NAPSystem
 
+try:
+    import nappy.pmd.pmd_wrapper as pw
+except:
+    pass
+
 __author__  = "Ryo KOBAYASHI"
-__version__ = "160605"
+__version__ = "240504"
 __LICENSE__ = "MIT"
 
 CALC_END_MARK = "Job finished "
 some_changes = ['positions', 'numbers', 'cell',]
 
+def str2char(string,nlen):
+    slen = len(string)
+    if slen > nlen:
+        raise ValueError('slen > nlen !')
+    c = np.empty(nlen,dtype='c')
+    c[:] = ' '
+    c[0:slen] = [ string[i] for i in range(slen) ]
+    c[slen:] = [ ' ' for i in range(nlen-slen) ]
+    return c
 
-class PMD(FileIOCalculator):
+
+class PMD_ASE_Calculator(Calculator):
     """
     Class for PMD calculation for ASE.
 
@@ -76,7 +92,7 @@ class PMD(FileIOCalculator):
         'boundary': 'ppp',
     }
 
-    def __init__(self, restart=None, ignore_bad_restart_file=False,
+    def __init__(self, restart=None,
                  label='pmd', atoms=None, 
                  command='pmd > out.pmd',
                  dimension=(True,True,True),
@@ -104,10 +120,11 @@ def __init__(self, restart=None, ignore_bad_restart_file=False,
 
         """
 
-        Calculator.__init__(self, restart, ignore_bad_restart_file,
-                            label, atoms, **kwargs)
+        Calculator.__init__(self, restart=restart,
+                            label=label, atoms=atoms,
+                            directory='.', **kwargs)
 
-        if label not in ['pmd']:
+        if label != 'pmd':
             raise RuntimeError('label must be pmd.')
 
         if self.parameters['force_type'] is None:
@@ -120,7 +137,6 @@ def __init__(self, restart=None, ignore_bad_restart_file=False,
         else:
             self.command = command +' > out.'+self.label
 
-        self.specorder= specorder
         self.dimension = dimension
 
     def set(self, **kwargs):
@@ -131,19 +147,36 @@ def set(self, **kwargs):
     def calculate(self, atoms=None, properties=['energy'],
                   system_changes=some_changes):
         Calculator.calculate(self, atoms, properties, system_changes)
-        self.write_input(self.atoms, properties, system_changes)
-
-        olddir = os.getcwd()
-        try:
-            os.chdir(self.directory)
-            errorcode = subprocess.call(self.command, shell=True)
-        finally:
-            os.chdir(olddir)
-
-        if errorcode:
-            raise RuntimeError('%s returned an error: %d' %
-                               (self.name, errorcode))
-        self.read_results()
+        # # Write in.pmd to be read from pmd executable
+        # self.write_input(self.atoms, properties, system_changes)
+
+        # olddir = os.getcwd()
+        # try:
+        #     os.chdir(self.directory)
+        #     errorcode = subprocess.call(self.command, shell=True)
+        # finally:
+        #     os.chdir(olddir)
+
+        # if errorcode:
+        #     raise RuntimeError('%s returned an error: %d' %
+        #                        (self.name, errorcode))
+        # self.read_results()
+        
+        #...The code below is for calling pmd via f2py and dynamic library
+        if atoms is not None:
+            self.nsys = nappy.io.from_ase(atoms, get_forces=False)
+
+        #...Call pmd via f2py and dynamic library
+        pmd = nappy.pmd.PMD(self.nsys)
+        pmd.set_params(force_type = self.parameters['force_type'],
+                       cutoff_radius = self.parameters['cutoff_radius'],)
+        pmd.run(nstp=0, iprint=0)
+
+        self.results['energy'] = pmd.get_potential_energy()
+        self.results['forces'] = pmd.get_forces().T
+        self.results['stresses'] = pmd.get_stress()
+        self.results['stress'] = pmd.get_stress().sum() /3
+        return None
 
     def relax(self, atoms=None, properties=['energy'],
               system_changes=some_changes,
@@ -293,9 +326,9 @@ def get_input_txt(params,fmvs):
            'temperature_relax_time','flag_temp_dist','',
            'factor_direction','',
            'stress_control','pressure_target','stress_target',
-           'stress_relax_time','flag_compute_stress','',
+           'stress_relax_time','',
            'overlay','overlay_type','',
-           'zload_type','final_strain','',
+           'final_strain','',
            'boundary']
 
     int_keys=['num_nodes_x','num_nodes_y','num_nodes_z','num_omp_threads',
@@ -310,7 +343,7 @@ def get_input_txt(params,fmvs):
                 'converge_eps','final_strain']
     str_keys=['io_format','force_type','temperature_control',
               'stress_control','flag_temp_dist',
-              'flag_compute_stress','zload_type','boundary',
+              'boundary',
               'overlay_type']
 
     for key in order:
@@ -332,7 +365,9 @@ def get_input_txt(params,fmvs):
         elif key == 'temperature_target':
             vals = params[key]
             for i,v in enumerate(vals):
-                txt += '{0:25s} {1:2d} {2:6.1f}\n'.format(key,i+1,v)
+                #...Format of temperature target has been changed...
+                # txt += '{0:25s} {1:2d} {2:6.1f}\n'.format(key,i+1,v)
+                txt += f'{key:25s} {v:6.1f}\n'
         elif key == 'factor_direction':
             # vals = params[key]
             vals= fmvs
diff --git a/nappy/io.py b/nappy/io.py
index 2c7181e..4435224 100644
--- a/nappy/io.py
+++ b/nappy/io.py
@@ -1413,7 +1413,7 @@ def get_PDB_txt(nsys,**kwargs):
 
     return txt
 
-def from_ase(atoms,specorder=None):
+def from_ase(atoms, specorder=None, get_forces=True):
     """
     Convert ASE Atoms object to NAPSystem object.
     """
@@ -1445,10 +1445,12 @@ def from_ase(atoms,specorder=None):
         vels = np.zeros((natm,3))
     else:
         vels = np.array(vels)
-    try:
-        frcs = atoms.get_forces()
-    except:
-        frcs = np.zeros((natm,3))
+    frcs = np.zeros((natm,3))
+    if get_forces:
+        try:
+            frcs = atoms.get_forces()
+        except:
+            pass
 
     #...Create arrays to be installed into nsys.atoms
     sids = [ nsys.specorder.index(si)+1 for si in symbols ]
diff --git a/nappy/pmd/__init__.py b/nappy/pmd/__init__.py
index f470e66..37997b6 100644
--- a/nappy/pmd/__init__.py
+++ b/nappy/pmd/__init__.py
@@ -74,7 +74,10 @@ def run(self, nstp=0, dt=1.0, ifdmp=0, dmp=0.99, conveps=1e-5, convnum=3,
         ispcs = self.nsys.atoms.sid.values
         #...Run pmd by calling fortran-compiled library
         #print('calling pw.run')
-        res = pw.run(rtot.T,vtot.T,naux,hmat,ispcs,initialize)
+        try:
+            res = pw.run(rtot.T,vtot.T,naux,hmat,ispcs,initialize)
+        except:
+            raise
         #print('out from pw.run')
         self.result = {}
         self.result['rtot'] = res[0]
@@ -259,6 +262,13 @@ def get_stress_tensor(self):
             return None
         return self.result['stnsr']
 
+    def get_stress(self):
+        if not hasattr(self,'result'):
+            return None
+        stnsr = self.result['stnsr'] 
+        return np.array([stnsr[0,0], stnsr[1,1], stnsr[2,2],
+                         stnsr[1,2], stnsr[0,2], stnsr[0,1]])
+
     def get_forces(self):
         if not hasattr(self,'result'):
             return None
diff --git a/nappy/pmd/pmd_wrapper.F90 b/nappy/pmd/pmd_wrapper.F90
index e8bdd38..191ea45 100644
--- a/nappy/pmd/pmd_wrapper.F90
+++ b/nappy/pmd/pmd_wrapper.F90
@@ -134,6 +134,11 @@ subroutine set_pmdvars(nsp0,ns,ls,cspcs,nf,lf,cfrcs,rc0,rbuf0, &
     force_list(i) = trim(c128)
   end do
 
+  rc1nn = 3d0
+  rbuf = rbuf0
+!.....RC should be set before calling init_force
+  rc = rc0
+
 !.....It is required to call init_force and read some in.params.XXX to define aux array
   call init_force(.true.)
 !.....Before allocating auxiliary array, set naux (num of auxiliary data)
@@ -154,9 +159,6 @@ subroutine set_pmdvars(nsp0,ns,ls,cspcs,nf,lf,cfrcs,rc0,rbuf0, &
     endif
   enddo
   dt = dt0
-  rbuf = rbuf0
-  rc1nn = 3d0
-  rc = rc0
   nx = 1
   ny = 1
   nz = 1
diff --git a/pmd/force_Morse.F90 b/pmd/force_Morse.F90
index 3a1d20b..069c007 100644
--- a/pmd/force_Morse.F90
+++ b/pmd/force_Morse.F90
@@ -1,6 +1,6 @@
 module Morse
 !-----------------------------------------------------------------------
-!                     Last modified: <2023-12-18 16:13:18 KOBAYASHI Ryo>
+!                     Last modified: <2024-05-04 22:43:08 KOBAYASHI Ryo>
 !-----------------------------------------------------------------------
 !  Parallel implementation of Morse pontential.
 !    - For BVS, see Adams & Rao, Phys. Status Solidi A 208, No.8 (2011)
diff --git a/pmd/mod_pairlist.F90 b/pmd/mod_pairlist.F90
index c587591..d3d0412 100644
--- a/pmd/mod_pairlist.F90
+++ b/pmd/mod_pairlist.F90
@@ -171,7 +171,7 @@ subroutine mk_lspr_para(l1st)
 
     call mk_lscl_para()
 
-    call set_nnmax()
+    call set_nnmax(l1st)
 
     if( .not.allocated(lspr) ) then
       allocate(lspr(0:nnmax,namax))
@@ -954,7 +954,7 @@ subroutine sort_lspr(namax,natm,ra,nnmax,h,lspr)
     return
   end subroutine sort_lspr
 !=======================================================================
-  subroutine set_nnmax()
+  subroutine set_nnmax(l1st)
 !
 !  Estimate and set nnmax from lscl. Thus this must be called after 
 !  lscl and lshd are computed.
@@ -965,13 +965,14 @@ subroutine set_nnmax()
     use pmdvars,only: vol,myid_md,mpi_md_world,nnmax,nxyz,rc,rbuf, &
          iprint,ratio_nnmax_update,ntot
     include "mpif.h"
+    logical,intent(in):: l1st
     
     integer:: ierr,ic,i,inc,nmaxl,nmax,nnmax_estimate,nnmax_prev, &
          ncell,ncelltot
     integer:: ix,iy,iz
     real(8):: volc,rho
     real(8),parameter:: pi = 3.14159265358979d0
-    logical,save:: l1st = .true. 
+!!$    logical,save:: l1st = .true. 
 
     nmaxl = 0
     ncell = 0  ! num. of cells that contain at least one atom
@@ -1026,7 +1027,7 @@ subroutine set_nnmax()
         call flush(6)
       endif
     endif
-    l1st = .false.
+!!$    l1st = .false.
     return
   end subroutine set_nnmax
 !=======================================================================
diff --git a/pmd/pmd_core.F90 b/pmd/pmd_core.F90
index 81c0df8..5443da4 100644
--- a/pmd/pmd_core.F90
+++ b/pmd/pmd_core.F90
@@ -1,5 +1,5 @@
 !-----------------------------------------------------------------------
-!                     Last-modified: <2024-04-10 16:48:46 KOBAYASHI Ryo>
+!                     Last-modified: <2024-05-04 22:00:54 KOBAYASHI Ryo>
 !-----------------------------------------------------------------------
 ! Core subroutines/functions needed for pmd.
 !-----------------------------------------------------------------------
@@ -164,7 +164,7 @@ subroutine pmd_core(hunit,hmat,ntot0,tagtot,rtot,vtot,atot,stot &
   endif
 !.....perform space decomposition after reading atomic configuration
   tmp = mpi_wtime()
-  call space_decomp(ntot0,tagtot,rtot,vtot,auxtot)
+  call space_decomp(ntot0,tagtot,rtot,vtot,auxtot,.true.)
   call accum_time('space_decomp',mpi_wtime()-tmp)
 !.....Some conversions
   do i=1,natm
@@ -1092,7 +1092,7 @@ subroutine oneshot(hunit,hmat,ntot0,tagtot,rtot,vtot,atot,stot, &
     write(6,'(a,"[ ",3f12.3," ]")') '   b = ',h(1:3,2,0)
     write(6,'(a,"[ ",3f12.3," ]")') '   c = ',h(1:3,3,0)
   endif
-  call space_decomp(ntot0,tagtot,rtot,vtot,auxtot)
+  call space_decomp(ntot0,tagtot,rtot,vtot,auxtot,.true.)
 
 !.....Some conversions
   nsp= 0
@@ -1205,7 +1205,7 @@ subroutine oneshot4fitpot(hunit,hmat,ntot0,tagtot,rtot,vtot,atot,stot, &
     write(6,'(a,"[ ",3f12.3," ]")') '   c = ',h(1:3,3,0)
   endif
   call boxmat(h,hi,ht,g,gi,gt,vol,sgm)
-  call space_decomp(ntot0,tagtot,rtot,vtot,auxtot)
+  call space_decomp(ntot0,tagtot,rtot,vtot,auxtot,.true.)
 
 !.....Some conversions
   nsp= 0
@@ -1379,7 +1379,7 @@ subroutine min_core(hunit,hmat,ntot0,tagtot,rtot,vtot,atot,stot &
 
 !.....perform space decomposition after reading atomic configuration
   tmp = mpi_wtime()
-  call space_decomp(ntot0,tagtot,rtot,vtot,auxtot)
+  call space_decomp(ntot0,tagtot,rtot,vtot,auxtot,.true.)
   call accum_time('space_decomp',mpi_wtime()-tmp)
 !.....Some conversions
   do i=1,natm
@@ -2839,7 +2839,7 @@ subroutine vfire(num_fire,alp0_fire,alp_fire,falp_fire,dtmax_fire &
 !     &     ,ierr)
 end subroutine vfire
 !=======================================================================
-subroutine space_decomp(ntot0,tagtot,rtot,vtot,auxtot)
+subroutine space_decomp(ntot0,tagtot,rtot,vtot,auxtot,l1st)
 !
 !  Decompose the system and scatter atoms to every process.
 !
@@ -2851,6 +2851,7 @@ subroutine space_decomp(ntot0,tagtot,rtot,vtot,auxtot)
   real(8),intent(inout):: rtot(3,ntot0),tagtot(ntot0)
   real(8),intent(in):: vtot(3,ntot0)
   real(8),intent(in):: auxtot(naux,ntot0)
+  logical,intent(in):: l1st
 !!$  real(8),intent(in):: hunit,h(3,3,0:1)
 !!$  real(8),intent(inout):: rtot(3,ntot0),tagtot(ntot0)
 !!$  integer,intent(in):: ntot0,naux
@@ -2862,7 +2863,7 @@ subroutine space_decomp(ntot0,tagtot,rtot,vtot,auxtot)
   integer:: myxt,myyt,myzt,nmin
   real(8):: sxogt,syogt,szogt
   real(8):: t0
-  logical,save:: l1st = .true.
+!!$  logical,save:: l1st = .true.
 
   t0 = mpi_wtime()
 
@@ -2914,7 +2915,7 @@ subroutine space_decomp(ntot0,tagtot,rtot,vtot,auxtot)
       call estimate_nbmax(nalmax,h,nx,ny,nz,vol,rc,rbuf,nbmax,boundary)
       namax = namax +nbmax
       if( iprint.ne.0 .and. l1st ) then
-        l1st = .false.
+!!$        l1st = .false.
         print *,''
         print '(a)', ' space_decomp:'
         print '(a,2f6.3)','   rcut, rbuf = ',rc,rbuf