From 0307f92266f7b10476c95dd83ca7aa6e4510437e Mon Sep 17 00:00:00 2001 From: zhanghao Date: Wed, 24 Apr 2024 17:10:44 +0800 Subject: [PATCH 1/8] update abacus parse md --- dptb/data/interfaces/abacus.py | 305 ++++++++++++++++++++++++++++++++- 1 file changed, 297 insertions(+), 8 deletions(-) diff --git a/dptb/data/interfaces/abacus.py b/dptb/data/interfaces/abacus.py index 74b2bc1d..aa129a65 100644 --- a/dptb/data/interfaces/abacus.py +++ b/dptb/data/interfaces/abacus.py @@ -92,14 +92,24 @@ def recursive_parse(input_path, if os.path.exists(os.path.join(folder, data_name, "hscsr.tgz")): os.system("cd "+os.path.join(folder, data_name) + " && tar -zxvf hscsr.tgz && mv OUT.ABACUS/* ./") try: - _abacus_parse(folder, - os.path.join(preprocess_dir, f"{prefix}.{index}"), - data_name, - get_Ham=parse_Hamiltonian, - get_DM=parse_DM, - get_overlap=parse_overlap, - get_eigenvalues=parse_eigenvalues) + if os.path.exists(os.path.join(folder, data_name, "running_get_S.log")) or \ + os.path.exists(os.path.join(folder, data_name, "running_scf.log")): + _abacus_parse(folder, + os.path.join(preprocess_dir, f"{prefix}.{index}"), + data_name, + get_Ham=parse_Hamiltonian, + get_DM=parse_DM, + get_overlap=parse_overlap, + get_eigenvalues=parse_eigenvalues) #h5file_names.append(os.path.join(file, "AtomicData.h5")) + elif os.path.exists(os.path.join(folder, data_name, "running_md.log")): + _abacus_parse_md(folder, + os.path.join(preprocess_dir, f"{prefix}.{index}"), + data_name, + get_Ham=parse_Hamiltonian, + get_DM=parse_DM, + get_overlap=parse_overlap, + get_eigenvalues=parse_eigenvalues) pbar.update(1) except Exception as e: print(f"Error in {folder}/{data_name}: {e}") @@ -212,9 +222,10 @@ def find_target_line(f, target): lattice = np.zeros((3, 3)) for index_lat in range(3): lattice[index_lat, :] = np.array(f.readline().split()) + lattice = lattice * lattice_constant if coords_type == "cartesian": frac_coords = frac_coords @ np.matrix(lattice).I # get frac_coords anyway - lattice = lattice * lattice_constant + if get_Ham is False and get_overlap is True: spinful = False @@ -394,3 +405,281 @@ def parse_matrix(matrix_path, factor, spinful=False): # # else: # # f["kpoint"] = False # # f["eigenvalue"] = False + +def _abacus_parse_md(input_path, + output_path, + data_name, + get_Ham=False, + get_DM=False, + get_overlap=False, + get_eigenvalues=False): + + input_path = os.path.abspath(input_path) + output_path = os.path.abspath(output_path) + os.makedirs(output_path, exist_ok=True) + + def find_target_line(f, target): + line = f.readline() + while line: + if target in line: + return line + line = f.readline() + return None + log_file_name = "running_md.log" + + with open(os.path.join(input_path, data_name, log_file_name), 'r') as f_chk: + lines = f_chk.readlines() + if not lines or " Total Time :" not in lines[-1]: + raise ValueError(f"Job is not normal ending!") + + with open(os.path.join(input_path, data_name, log_file_name), 'r') as f: + f.readline() + line = f.readline() + # assert "WELCOME TO ABACUS" in line + assert find_target_line(f, "READING UNITCELL INFORMATION") is not None, 'Cannot find "READING UNITCELL INFORMATION" in log file' + num_atom_type = int(f.readline().split()[-1]) + + assert find_target_line(f, "lattice constant (Bohr)") is not None + lattice_constant = float(f.readline().split()[-1]) # unit is Angstrom, didn't read (Bohr) here. + + site_norbits_dict = {} + orbital_types_dict = {} + for index_type in range(num_atom_type): + tmp = find_target_line(f, "READING ATOM TYPE") + assert tmp is not None, 'Cannot find "ATOM TYPE" in log file' + assert tmp.split()[-1] == str(index_type + 1) + if tmp is None: + raise Exception(f"Cannot find ATOM {index_type} in {log_file_name}") + + line = f.readline() + assert "atom label =" in line + atom_label = line.split()[-1] + atom_label = ''.join(re.findall(r'[A-Za-z]', atom_label)) + assert atom_label in ase.data.atomic_numbers, "Atom label should be in periodic table" + atom_type = ase.data.atomic_numbers[atom_label] + + current_site_norbits = 0 + current_orbital_types = [] + while True: + line = f.readline() + if "number of zeta" in line: # parse the basis + tmp = line.split() + L = int(tmp[0][2:-1]) + num_L = int(tmp[-1]) + current_site_norbits += (2 * L + 1) * num_L + current_orbital_types.extend([L] * num_L) + else: + break + site_norbits_dict[atom_type] = current_site_norbits + orbital_types_dict[atom_type] = current_orbital_types + + #print(orbital_types_dict) + + line = find_target_line(f, "TOTAL ATOM NUMBER") + assert line is not None, 'Cannot find "TOTAL ATOM NUMBER" in log file' + nsites = int(line.split()[-1]) + nframes = 0 + with open(os.path.join(input_path, data_name, "MD_dump"), 'r') as f_dump: + line = 0 + frac_coords_list = [] + lattice_constant_list = [] + lattice_list = [] + line = find_target_line(f_dump, "MDSTEP:") + while line is not None: + step = line[9:] + nframes += 1 + lattice_constant = float(find_target_line(f_dump, "LATTICE_CONSTANT").split()[1]) + # get lattice vector + assert find_target_line(f_dump, "LATTICE_VECTORS") is not None + lattice = np.zeros((3, 3)) + for index_lat in range(3): + lattice[index_lat, :] = np.array(f_dump.readline().split()) + lattice = lattice * lattice_constant + lattice_list.append(lattice) + lattice_constant_list.append(lattice_constant) + + line = find_target_line(f_dump, "INDEX LABEL POSITION (Angstrom) FORCE (eV/Angstrom) VELOCITY (Angstrom/fs)") + # get coordinates + frac_coords = np.zeros((nsites, 3)) + site_norbits = np.zeros(nsites, dtype=int) + element = np.zeros(nsites, dtype=int) + for index_site in range(nsites): + line = f_dump.readline() + tmp = line.split() + atom_label = ''.join(re.findall(r'[A-Za-z]', tmp[1])) + assert atom_label in ase.data.atomic_numbers, "Atom label should be in periodic table" + element[index_site] = ase.data.atomic_numbers[atom_label] + site_norbits[index_site] = site_norbits_dict[element[index_site]] + frac_coords[index_site, :] = np.array(tmp[2:5]) + norbits = int(np.sum(site_norbits)) + frac_coords_list.append(frac_coords @ np.matrix(lattice).I) + site_norbits_cumsum = np.cumsum(site_norbits) + + + line = find_target_line(f_dump, "MDSTEP:") + + frac_coords_list = np.concatenate(frac_coords_list, axis=0) + lattice_list = np.concatenate(lattice_list, axis=0) + + if get_Ham is False and get_overlap is True: + spinful = False + else: + line = find_target_line(f, "nspin") + if line is None: + line = find_target_line(f, "NSPIN") + assert line is not None, 'Cannot find "NSPIN" in log file' + if "NSPIN == 1" or "npin = 1" in line: + spinful = False + elif "NSPIN == 4" or "nspin = 4" in line: + spinful = True + else: + raise ValueError(f'{line} is not supported') + + + np.savetxt(os.path.join(output_path, "cell.dat"), lattice_list) + np.savetxt(os.path.join(output_path, "rcell.dat"), np.linalg.inv(lattice.reshape(-1, 3, 3)).reshape(-1, 3) * 2 * np.pi) + cart_coords_list = frac_coords_list @ lattice + np.savetxt(os.path.join(output_path, "positions.dat").format(output_path), cart_coords_list) + np.savetxt(os.path.join(output_path, "atomic_numbers.dat"), element, fmt='%d') + #info = {'nsites' : nsites, 'isorthogonal': False, 'isspinful': spinful, 'norbits': norbits} + #with open('{}/info.json'.format(output_path), 'w') as info_f: + # json.dump(info, info_f) + with open(os.path.join(output_path, "basis.dat"), 'w') as f: + for atomic_number in element: + counter = Counter(orbital_types_dict[atomic_number]) + num_orbs = [counter[i] for i in range(6)] # s, p, d, f, g, h + for index_l, l in enumerate(num_orbs): + if l == 0: # no this orbit + continue + if index_l == 0: + f.write(f"{l}{orbitalId[index_l]}") + else: + f.write(f"{l}{orbitalId[index_l]}") + f.write('\n') + atomic_basis = {} + for atomic_number, orbitals in orbital_types_dict.items(): + atomic_basis[ase.data.chemical_symbols[atomic_number]] = orbitals + + U_orbital = OrbAbacus2DeepTB() + def parse_matrix(matrix_path, factor, spinful=False): + matrix_dict = dict() + with open(matrix_path, 'r') as f: + line = f.readline() # read "Matrix Dimension of ..." + if not "Matrix Dimension of" in line: + line = f.readline() # ABACUS >= 3.0 + assert "Matrix Dimension of" in line + f.readline() # read "Matrix number of ..." + norbits = int(line.split()[-1]) + for line in f: + line1 = line.split() + if len(line1) == 0: + break + num_element = int(line1[3]) + if num_element != 0: + R_cur = np.array(line1[:3]).astype(int) + line2 = f.readline().split() + line3 = f.readline().split() + line4 = f.readline().split() + if not spinful: + hamiltonian_cur = csr_matrix((np.array(line2).astype(float), np.array(line3).astype(int), + np.array(line4).astype(int)), shape=(norbits, norbits)).toarray() + else: + line2 = np.char.replace(line2, '(', '') + line2 = np.char.replace(line2, ')', 'j') + line2 = np.char.replace(line2, ',', '+') + line2 = np.char.replace(line2, '+-', '-') + hamiltonian_cur = csr_matrix((np.array(line2).astype(np.complex128), np.array(line3).astype(int), + np.array(line4).astype(int)), shape=(norbits, norbits)).toarray() + for index_site_i in range(nsites): + for index_site_j in range(nsites): + key_str = f"{index_site_i + 1}_{index_site_j + 1}_{R_cur[0]}_{R_cur[1]}_{R_cur[2]}" + mat = hamiltonian_cur[(site_norbits_cumsum[index_site_i] + - site_norbits[index_site_i]) * (1 + spinful): + site_norbits_cumsum[index_site_i] * (1 + spinful), + (site_norbits_cumsum[index_site_j] - site_norbits[index_site_j]) * (1 + spinful): + site_norbits_cumsum[index_site_j] * (1 + spinful)] + if abs(mat).max() < 1e-10: + continue + if not spinful: + mat = U_orbital.transform(mat, orbital_types_dict[element[index_site_i]], + orbital_types_dict[element[index_site_j]]) + else: + mat = mat.reshape((site_norbits[index_site_i], 2, site_norbits[index_site_j], 2)) + mat = mat.transpose((1, 0, 3, 2)).reshape((2 * site_norbits[index_site_i], + 2 * site_norbits[index_site_j])) + mat = U_orbital.transform(mat, orbital_types_dict[element[index_site_i]] * 2, + orbital_types_dict[element[index_site_j]] * 2) + matrix_dict[key_str] = mat * factor + return matrix_dict, norbits + + if get_Ham: + with h5py.File(os.path.join(output_path, "hamiltonians.h5"), 'w') as fid: + for i in range(nframes): + hamiltonian_dict, tmp = parse_matrix( + os.path.join(input_path, data_name, "matrix/"+str(i)+"_data-HR-sparse_SPIN0.csr"), 13.605698, # Ryd2eV + spinful=spinful) + assert tmp == norbits * (1 + spinful) + # creating a default group here adapting to the format used in DefaultDataset. + # by the way DefaultDataset loading h5 file, the index should be "1" here. + default_group = fid.create_group(str(i+1)) + for key_str, value in hamiltonian_dict.items(): + default_group[key_str] = value + + if get_overlap: + with h5py.File(os.path.join(output_path, "overlaps.h5"), 'w') as fid: + for i in range(nframes): + overlap_dict, tmp = parse_matrix(os.path.join(input_path, data_name, "matrix/"+str(i)+"_data-SR-sparse_SPIN0.csr"), 1, + spinful=spinful) + assert tmp == norbits * (1 + spinful) + if spinful: + overlap_dict_spinless = {} + for k, v in overlap_dict.items(): + overlap_dict_spinless[k] = v[:v.shape[0] // 2, :v.shape[1] // 2].real + overlap_dict_spinless, overlap_dict = overlap_dict, overlap_dict_spinless + + default_group = fid.create_group(str(i+1)) + for key_str, value in overlap_dict.items(): + default_group[key_str] = value + + if get_DM: + with h5py.File(os.path.join(output_path, "DM.h5"), 'w') as fid: + for i in range(nframes): + DM_dict, tmp = parse_matrix( + os.path.join(input_path, data_name, "matrix/"+str(i)+"_data-DMR-sparse_SPIN0.csr"), 1, spinful=spinful) + assert tmp == norbits * (1 + spinful) + # if spinful: + # DM_dict_spinless = {} + # for k, v in overlap_dict.items(): + # overlap_dict_spinless[k] = v[:v.shape[0] // 2, :v.shape[1] // 2].real + # overlap_dict_spinless, overlap_dict = overlap_dict, overlap_dict_spinless + default_group = fid.create_group(str(i+1)) + for key_str, value in DM_dict.items(): + default_group[key_str] = value + + if get_eigenvalues: + raise ValueError("Currently not support MD eigenvalues parsing.") + kpts = [] + with open(os.path.join(input_path, data_name, "kpoints"), "r") as f: + nkstot = f.readline().strip().split()[-1] + f.readline() + for _ in range(int(nkstot)): + line = f.readline() + kpt = [] + line = line.strip().split() + kpt.extend([float(line[1]), float(line[2]), float(line[3])]) + kpts.append(kpt) + kpts = np.array(kpts) + + with open(os.path.join(input_path, data_name, "BANDS_1.dat"), "r") as file: + band_lines = file.readlines() + band = [] + for line in band_lines: + values = line.strip().split() + eigs = [float(value) for value in values[1:]] + band.append(eigs) + band = np.array(band) + + assert len(band) == len(kpts) + np.save(os.path.join(output_path, "kpoints.npy"), kpts) + np.save(os.path.join(output_path, "eigenvalues.npy"), band) + From 86087dd59f8ec83293721af097053c3d8032a002 Mon Sep 17 00:00:00 2001 From: zhanghao Date: Thu, 25 Apr 2024 10:49:22 +0800 Subject: [PATCH 2/8] fix(default_dataset): The natom and nframe default setting --- dptb/data/dataset/_default_dataset.py | 28 +++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dptb/data/dataset/_default_dataset.py b/dptb/data/dataset/_default_dataset.py index 52ca442f..c6d50ae4 100644 --- a/dptb/data/dataset/_default_dataset.py +++ b/dptb/data/dataset/_default_dataset.py @@ -65,22 +65,11 @@ def __init__(self, else: raise ValueError("Wrong cell dimensions.") - # load atomic numbers - atomic_numbers = np.loadtxt(os.path.join(root, "atomic_numbers.dat")) - natoms = self.info["natoms"] - if natoms < 0: - natoms = atomic_numbers.shape[-1] - if atomic_numbers.shape[0] == self.info["natoms"]: - # same atomic_numbers, copy it to all frames. - atomic_numbers = np.expand_dims(atomic_numbers, axis=0) - self.data["atomic_numbers"] = np.broadcast_to(atomic_numbers, (self.info["nframes"], natoms)) - elif atomic_numbers.shape[0] == natoms * self.info["nframes"]: - self.data["atomic_numbers"] = atomic_numbers.reshape(self.info["nframes"],natoms) - else: - raise ValueError("Wrong atomic_number dimensions.") - # load positions, stored as cartesion no matter what provided. pos = np.loadtxt(os.path.join(root, "positions.dat")) + natoms = self.info["natoms"] + if natoms < 0: + natoms = int(pos.shape[0] / self.info["nframes"]) assert pos.shape[0] == self.info["nframes"] * natoms pos = pos.reshape(self.info["nframes"], natoms, 3) # ase use cartesian by default. @@ -91,6 +80,17 @@ def __init__(self, else: raise NameError("Position type must be cart / frac.") + # load atomic numbers + atomic_numbers = np.loadtxt(os.path.join(root, "atomic_numbers.dat")) + if atomic_numbers.shape[0] == natoms: + # same atomic_numbers, copy it to all frames. + atomic_numbers = np.expand_dims(atomic_numbers, axis=0) + self.data["atomic_numbers"] = np.broadcast_to(atomic_numbers, (self.info["nframes"], natoms)) + elif atomic_numbers.shape[0] == natoms * self.info["nframes"]: + self.data["atomic_numbers"] = atomic_numbers.reshape(self.info["nframes"],natoms) + else: + raise ValueError("Wrong atomic_number dimensions.") + # load optional data files if get_eigenvalues == True: if os.path.exists(os.path.join(self.root, "eigenvalues.npy")): From 2c8fbe79cd18eb62456ba9d789b71c58e4b806ae Mon Sep 17 00:00:00 2001 From: zhanghao Date: Sat, 27 Apr 2024 08:36:42 +0800 Subject: [PATCH 3/8] =?UTF-8?q?shift=20the=20'pos=E2=80=98=20position=20in?= =?UTF-8?q?=20test=20default=20dataset?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dptb/tests/test_default_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dptb/tests/test_default_dataset.py b/dptb/tests/test_default_dataset.py index 202edf7a..e8b42e82 100644 --- a/dptb/tests/test_default_dataset.py +++ b/dptb/tests/test_default_dataset.py @@ -59,7 +59,7 @@ def test_raw_data(self): assert self.dataset.raw_data[0].AtomicData_options == {'r_max': 5.0, 'er_max': 5.0, 'oer_max': 2.5, 'pbc': True} assert self.dataset.raw_data[0].info == self.info_files['kpath_spk.0'] assert "bandinfo" in self.dataset.raw_data[0].info - assert list(self.dataset.raw_data[0].data.keys()) == (['cell', 'atomic_numbers', 'pos', 'kpoints', 'eigenvalues']) + assert list(self.dataset.raw_data[0].data.keys()) == (['cell', 'pos', 'atomic_numbers', 'kpoints', 'eigenvalues']) assert (np.abs(self.dataset.raw_data[0].data['cell'] - self.strase[0].cell) < 1e-6).all() assert (np.abs(self.dataset.raw_data[0].data['atomic_numbers'] - np.array([[14., 14.]])) < 1e-6).all() assert (np.abs(self.dataset.raw_data[0].data['pos'] - self.strase[0].positions) < 1e-6).all() From af509570cf129454104f3eb771f35af49aa5d90e Mon Sep 17 00:00:00 2001 From: zhanghao Date: Mon, 29 Apr 2024 16:08:52 +0800 Subject: [PATCH 4/8] fix lattice constant transform --- dptb/data/interfaces/abacus.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dptb/data/interfaces/abacus.py b/dptb/data/interfaces/abacus.py index aa129a65..25fb7def 100644 --- a/dptb/data/interfaces/abacus.py +++ b/dptb/data/interfaces/abacus.py @@ -222,9 +222,9 @@ def find_target_line(f, target): lattice = np.zeros((3, 3)) for index_lat in range(3): lattice[index_lat, :] = np.array(f.readline().split()) - lattice = lattice * lattice_constant if coords_type == "cartesian": frac_coords = frac_coords @ np.matrix(lattice).I # get frac_coords anyway + lattice = lattice * lattice_constant if get_Ham is False and get_overlap is True: @@ -439,8 +439,9 @@ def find_target_line(f, target): assert find_target_line(f, "READING UNITCELL INFORMATION") is not None, 'Cannot find "READING UNITCELL INFORMATION" in log file' num_atom_type = int(f.readline().split()[-1]) - assert find_target_line(f, "lattice constant (Bohr)") is not None - lattice_constant = float(f.readline().split()[-1]) # unit is Angstrom, didn't read (Bohr) here. + line = find_target_line(f, "lattice constant (Angstrom)") + assert line is not None + lattice_constant = float(line.split()[-1]) # unit is Angstrom, didn't read (Bohr) here. site_norbits_dict = {} orbital_types_dict = {} From 1ed4a5aacba6bb1b46b789c2316dacc9b10a60b4 Mon Sep 17 00:00:00 2001 From: zhanghao Date: Mon, 29 Apr 2024 16:16:43 +0800 Subject: [PATCH 5/8] update parse_abacus_md --- dptb/data/interfaces/abacus.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dptb/data/interfaces/abacus.py b/dptb/data/interfaces/abacus.py index 25fb7def..b994c65e 100644 --- a/dptb/data/interfaces/abacus.py +++ b/dptb/data/interfaces/abacus.py @@ -482,7 +482,7 @@ def find_target_line(f, target): nframes = 0 with open(os.path.join(input_path, data_name, "MD_dump"), 'r') as f_dump: line = 0 - frac_coords_list = [] + coords_list = [] lattice_constant_list = [] lattice_list = [] line = find_target_line(f_dump, "MDSTEP:") @@ -513,13 +513,13 @@ def find_target_line(f, target): site_norbits[index_site] = site_norbits_dict[element[index_site]] frac_coords[index_site, :] = np.array(tmp[2:5]) norbits = int(np.sum(site_norbits)) - frac_coords_list.append(frac_coords @ np.matrix(lattice).I) + coords_list.append(frac_coords) site_norbits_cumsum = np.cumsum(site_norbits) line = find_target_line(f_dump, "MDSTEP:") - frac_coords_list = np.concatenate(frac_coords_list, axis=0) + coords_list = np.concatenate(coords_list, axis=0) lattice_list = np.concatenate(lattice_list, axis=0) if get_Ham is False and get_overlap is True: @@ -539,8 +539,7 @@ def find_target_line(f, target): np.savetxt(os.path.join(output_path, "cell.dat"), lattice_list) np.savetxt(os.path.join(output_path, "rcell.dat"), np.linalg.inv(lattice.reshape(-1, 3, 3)).reshape(-1, 3) * 2 * np.pi) - cart_coords_list = frac_coords_list @ lattice - np.savetxt(os.path.join(output_path, "positions.dat").format(output_path), cart_coords_list) + np.savetxt(os.path.join(output_path, "positions.dat").format(output_path), coords_list) np.savetxt(os.path.join(output_path, "atomic_numbers.dat"), element, fmt='%d') #info = {'nsites' : nsites, 'isorthogonal': False, 'isspinful': spinful, 'norbits': norbits} #with open('{}/info.json'.format(output_path), 'w') as info_f: From d2706e575c0791ac0427dea473d66d08c9aeb583 Mon Sep 17 00:00:00 2001 From: qqgu Date: Mon, 29 Apr 2024 17:11:08 +0800 Subject: [PATCH 6/8] update abacus.py --- dptb/data/interfaces/abacus.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/dptb/data/interfaces/abacus.py b/dptb/data/interfaces/abacus.py index b994c65e..a18cde90 100644 --- a/dptb/data/interfaces/abacus.py +++ b/dptb/data/interfaces/abacus.py @@ -92,8 +92,10 @@ def recursive_parse(input_path, if os.path.exists(os.path.join(folder, data_name, "hscsr.tgz")): os.system("cd "+os.path.join(folder, data_name) + " && tar -zxvf hscsr.tgz && mv OUT.ABACUS/* ./") try: + tasktype = "" if os.path.exists(os.path.join(folder, data_name, "running_get_S.log")) or \ os.path.exists(os.path.join(folder, data_name, "running_scf.log")): + tasktype = tasktype + "single_point" _abacus_parse(folder, os.path.join(preprocess_dir, f"{prefix}.{index}"), data_name, @@ -102,7 +104,8 @@ def recursive_parse(input_path, get_overlap=parse_overlap, get_eigenvalues=parse_eigenvalues) #h5file_names.append(os.path.join(file, "AtomicData.h5")) - elif os.path.exists(os.path.join(folder, data_name, "running_md.log")): + if os.path.exists(os.path.join(folder, data_name, "running_md.log")): + tasktype = tasktype + "molecular_dynamics" _abacus_parse_md(folder, os.path.join(preprocess_dir, f"{prefix}.{index}"), data_name, @@ -110,6 +113,11 @@ def recursive_parse(input_path, get_DM=parse_DM, get_overlap=parse_overlap, get_eigenvalues=parse_eigenvalues) + if tasktype == "": + raise ValueError(f"Cannot find any log file in {folder}") + elif not tasktype in ["single_point", "molecular_dynamics"]: + raise ValueError(f"Unknown task type in {folder}") + pbar.update(1) except Exception as e: print(f"Error in {folder}/{data_name}: {e}") From 6cafead3d3e2b0822ecff82ba78f5c6ba5f0bda3 Mon Sep 17 00:00:00 2001 From: qqgu Date: Mon, 29 Apr 2024 17:13:19 +0800 Subject: [PATCH 7/8] update abacus.py --- dptb/data/interfaces/abacus.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dptb/data/interfaces/abacus.py b/dptb/data/interfaces/abacus.py index a18cde90..5b9e69e2 100644 --- a/dptb/data/interfaces/abacus.py +++ b/dptb/data/interfaces/abacus.py @@ -232,6 +232,8 @@ def find_target_line(f, target): lattice[index_lat, :] = np.array(f.readline().split()) if coords_type == "cartesian": frac_coords = frac_coords @ np.matrix(lattice).I # get frac_coords anyway + # this is because the cartesian coordinates in abacus log files are in unit of a_0. Thus the lattice constant is not needed. + # now we need to convert the lattice constant to Angstrom. lattice = lattice * lattice_constant From a76b0e3746e7c2095d2677f0ccfd1e69686316c3 Mon Sep 17 00:00:00 2001 From: zhanghao Date: Mon, 29 Apr 2024 17:17:54 +0800 Subject: [PATCH 8/8] update parse abacus scf lattice constant --- dptb/data/interfaces/abacus.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dptb/data/interfaces/abacus.py b/dptb/data/interfaces/abacus.py index b994c65e..998e1898 100644 --- a/dptb/data/interfaces/abacus.py +++ b/dptb/data/interfaces/abacus.py @@ -153,8 +153,9 @@ def find_target_line(f, target): assert find_target_line(f, "READING UNITCELL INFORMATION") is not None, 'Cannot find "READING UNITCELL INFORMATION" in log file' num_atom_type = int(f.readline().split()[-1]) - assert find_target_line(f, "lattice constant (Bohr)") is not None - lattice_constant = float(f.readline().split()[-1]) # unit is Angstrom, didn't read (Bohr) here. + line = find_target_line(f, "lattice constant (Angstrom)") + assert line is not None + lattice_constant = float(line.split()[-1]) # unit is Angstrom, didn't read (Bohr) here. site_norbits_dict = {} orbital_types_dict = {}