Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added ABACUSInMemoryDataset in data module #11

Merged
merged 12 commits into from
Dec 1, 2023
2 changes: 2 additions & 0 deletions dptb/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
ASEDataset,
HDF5Dataset,
ABACUSDataset,
ABACUSInMemoryDataset,
)
from .dataloader import DataLoader, Collater, PartialSampler
from .build import dataset_from_config
Expand All @@ -33,6 +34,7 @@
ASEDataset,
HDF5Dataset,
ABACUSDataset,
ABACUSInMemoryDataset,
DataLoader,
Collater,
PartialSampler,
Expand Down
3 changes: 2 additions & 1 deletion dptb/data/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from ._npz_dataset import NpzDataset
from ._hdf5_dataset import HDF5Dataset
from ._abacus_dataset import ABACUSDataset
from ._abacus_dataset_mem import ABACUSInMemoryDataset

__all__ = [ABACUSDataset, ASEDataset, AtomicDataset, AtomicInMemoryDataset, NpzDataset, HDF5Dataset]
__all__ = [ABACUSInMemoryDataset, ABACUSDataset, ASEDataset, AtomicDataset, AtomicInMemoryDataset, NpzDataset, HDF5Dataset]
109 changes: 109 additions & 0 deletions dptb/data/dataset/_abacus_dataset_mem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from typing import Dict, Any, List, Callable, Union, Optional
import os

import numpy as np
import h5py

import torch

from .. import (
AtomicData,
AtomicDataDict,
)
from ..transforms import TypeMapper, OrbitalMapper
from ._base_datasets import AtomicInMemoryDataset
from dptb.nn.hamiltonian import E3Hamiltonian
from dptb.data.interfaces.ham_to_feature import ham_block_to_feature
from dptb.data.interfaces.abacus import recursive_parse

orbitalLId = {0:"s", 1:"p", 2:"d", 3:"f"}

def _abacus_h5_reader(h5file_path, AtomicData_options):
data = h5py.File(h5file_path, "r")
atomic_data = AtomicData.from_points(
pos = data["pos"][:],
cell = data["cell"][:],
atomic_numbers = data["atomic_numbers"][:],
**AtomicData_options,
)
if data["hamiltonian_blocks"]:
basis = {}
for key, value in data["basis"].items():
basis[key] = [(f"{i+1}" + orbitalLId[l]) for i, l in enumerate(value)]
idp = OrbitalMapper(basis)
e3 = E3Hamiltonian(idp=idp, decompose=True)
ham_block_to_feature(atomic_data, idp, data.get("hamiltonian_blocks", False), data.get("overlap_blocks", False))
with torch.no_grad():
atomic_data = e3(atomic_data.to_dict())
atomic_data = AtomicData.from_dict(atomic_data)

if data.get("eigenvalue") and data.get("kpoint"):
atomic_data[AtomicDataDict.KPOINT_KEY] = torch.as_tensor(data["kpoint"][:], dtype=torch.get_default_dtype())
atomic_data[AtomicDataDict.ENERGY_EIGENVALUE_KEY] = torch.as_tensor(data["eigenvalue"][:], dtype=torch.get_default_dtype())
return atomic_data


class ABACUSInMemoryDataset(AtomicInMemoryDataset):

def __init__(
self,
root: str,
abacus_args: Dict[str, Union[str,bool]] = {
"input_dir": None,
"preprocess_dir": None,
"only_overlap": False,
"get_Ham": False,
"add_overlap": False,
"get_eigenvalues": False,
},
file_name: Optional[str] = None,
url: Optional[str] = None,
AtomicData_options: Dict[str, Any] = {},
include_frames: Optional[List[int]] = None,
type_mapper: TypeMapper = None,
key_mapping: Dict[str, str] = {
"pos": AtomicDataDict.POSITIONS_KEY,
"energy": AtomicDataDict.TOTAL_ENERGY_KEY,
"atomic_numbers": AtomicDataDict.ATOMIC_NUMBERS_KEY,
"kpoints": AtomicDataDict.KPOINT_KEY,
"eigenvalues": AtomicDataDict.ENERGY_EIGENVALUE_KEY,
},
):
if file_name is not None:
self.file_name = file_name
else:
self.abacus_args = abacus_args
assert self.abacus_args.get("input_dir") is not None, "ABACUS calculation results MUST be provided."
if self.abacus_args.get("preprocess_dir") is None:
print("Creating new preprocess dictionary...")
os.mkdir(os.path.join(root, "preprocess"))
self.abacus_args["preprocess_dir"] = os.path.join(root, "preprocess")
self.key_mapping = key_mapping

print("Begin parsing ABACUS output...")
h5_filenames = recursive_parse(**self.abacus_args)
self.file_name = h5_filenames
print("Finished parsing ABACUS output.")

super().__init__(
file_name=self.file_name,
url=url,
root=root,
AtomicData_options=AtomicData_options,
include_frames=include_frames,
type_mapper=type_mapper,
)

def get_data(self):
data = []
for h5_file in self.file_name:
data.append(_abacus_h5_reader(h5_file, self.AtomicData_options))
return data

@property
def raw_file_names(self):
return "AtomicData.h5"

@property
def raw_dir(self):
return self.abacus_args.get("input_dir")
15 changes: 9 additions & 6 deletions dptb/data/interfaces/abacus.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,24 +51,27 @@ def transform(self, mat, l_lefts, l_rights):
block_rights = block_diag(*[self.get_U(l_right) for l_right in l_rights])
return block_lefts @ mat @ block_rights.T

def recursive_parse(input_dir, output_dir, data_name, only_S=False, get_Ham=False, add_overlap=False, get_eigenvalues=False):
def recursive_parse(input_dir, preprocess_dir, data_name="OUT.ABACUS", only_overlap=False, get_Ham=False, add_overlap=False, get_eigenvalues=False):
input_dir = os.path.abspath(input_dir)
output_dir = os.path.abspath(output_dir)
os.makedirs(output_dir, exist_ok=True)
preprocess_dir = os.path.abspath(preprocess_dir)
os.makedirs(preprocess_dir, exist_ok=True)
h5file_names = []
for file in os.listdir(input_dir):
if os.path.isdir(os.path.join(input_dir, file)):
datafiles = os.listdir(os.path.join(input_dir, file))
if data_name in datafiles:
if os.path.exists(os.path.join(input_dir, file, data_name, "hscsr.tgz")):
os.system("cd "+os.path.join(input_dir, file, data_name) + " && tar -zxvf hscsr.tgz && mv OUT.ABACUS/* ./")
try:
abacus_parse(os.path.join(input_dir, file), os.path.join(output_dir, file), data_name, only_S=only_S, get_Ham=get_Ham,
_abacus_parse(os.path.join(input_dir, file), os.path.join(preprocess_dir, file), data_name, only_S=only_overlap, get_Ham=get_Ham,
add_overlap=add_overlap, get_eigenvalues=get_eigenvalues)
h5file_names.append(os.path.join(preprocess_dir, file, "AtomicData.h5"))
except Exception as e:
print(f"Error in {data_name}: {e}")
continue
return h5file_names

def abacus_parse(input_path,
def _abacus_parse(input_path,
output_path,
data_name,
only_S=False,
Expand Down Expand Up @@ -328,4 +331,4 @@ def parse_matrix(matrix_path, factor, spinful=False):
f["eigenvalue"] = band
# else:
# f["kpoint"] = False
# f["eigenvalue"] = False
# f["eigenvalue"] = False