Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Bandoverlaps parser #3689

Merged
90 changes: 58 additions & 32 deletions pymatgen/io/lobster/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ def __init__(self, are_coops: bool = False, are_cobis: bool = False, filename: s
# the labeling had to be changed: there are more than one COHP for each atom combination
# this is done to make the labeling consistent with ICOHPLIST.lobster
bond_num = 0
bond_data = {}
label = ""
for bond in range(num_bonds):
bond_data = self._get_bond_data(contents[3 + bond])

Expand Down Expand Up @@ -305,6 +307,7 @@ def __init__(
# include case when there is only one ICOHP!!!
self.orbitalwise = len(data) > 2 and "_" in data[1].split()[1]

data_orbitals: list[str] = []
if self.orbitalwise:
data_without_orbitals = []
data_orbitals = []
Expand Down Expand Up @@ -333,6 +336,14 @@ def __init__(
list_num = []
list_icohp = []

# initialize static variables
label = ""
atom1 = ""
atom2 = ""
length = None
num = None
translation = []

for bond in range(num_bonds):
line = data_without_orbitals[bond].split()
icohp = {}
Expand Down Expand Up @@ -446,7 +457,7 @@ def __init__(self, filename: str | None = "NcICOBILIST.lobster"): # LOBSTER < 4

# LOBSTER list files have an extra trailing blank line
# and we don't need the header.
with zopen(filename, mode="rt") as file:
with zopen(filename, mode="rt") as file: # type:ignore
data = file.read().split("\n")[1:-1]
if len(data) == 0:
raise OSError("NcICOBILIST file contains no data.")
Expand Down Expand Up @@ -664,12 +675,12 @@ def energies(self) -> np.ndarray:
return self._energies

@property
def tdensities(self) -> np.ndarray:
def tdensities(self) -> dict[Spin, np.ndarray]:
"""total densities as a np.ndarray"""
return self._tdensities

@property
def itdensities(self) -> np.ndarray:
def itdensities(self) -> dict[Spin, np.ndarray]:
"""integrated total densities as a np.ndarray"""
return self._itdensities

Expand Down Expand Up @@ -1047,6 +1058,9 @@ def _get_elements_basistype_basisfunctions(data):
def _get_timing(data):
# will give back wall, user and sys time
begin = False
user_time = []
wall_time = []
sys_time = []
# end=False
# time=[]

Expand Down Expand Up @@ -1266,6 +1280,7 @@ def __init__(
]

ikpoint = -1
linenumber = 0
for line in contents[1:-1]:
if line.split()[0] == "#":
KPOINT = np.array(
Expand Down Expand Up @@ -1318,7 +1333,7 @@ def get_bandstructure(self):
kpoints=self.kpoints_array,
eigenvals=self.eigenvals,
lattice=self.lattice,
efermi=self.efermi,
efermi=self.efermi, # type: ignore
labels_dict=self.label_dict,
structure=self.structure,
projections=self.p_eigenvals,
Expand All @@ -1345,10 +1360,12 @@ def __init__(
"""
Args:
filename: filename of the "bandOverlaps.lobster" file.
band_overlaps_dict: A dictionary containing the band overlap data of the form: {spin: {"kpoint as string":
{"max_deviation":float that describes the max deviation, "matrix": 2D array of the size number of bands
times number of bands including the overlap matrices with}}}.
max_deviation (list[float]): A list of floats describing the maximal deviation for each problematic kpoint.
band_overlaps_dict: A dictionary containing the band overlap data of the form: {spin: {
"k_points" : list of k-point array,
"max_deviations": list of max deviations associated with each k-point,
"matrices": list of the overlap matrices associated with each k-point
}}.
max_deviation (list[float]): A list of floats describing the maximal deviation for each problematic k-point.
"""
self._filename = filename
self.band_overlaps_dict = {} if band_overlaps_dict is None else band_overlaps_dict
Expand All @@ -1371,6 +1388,9 @@ def _read(self, contents: list, spin_numbers: list):
contents: list of strings
spin_numbers: list of spin numbers depending on `Lobster` version.
"""
spin: Spin = Spin.up
kpoint_array: list = []
overlaps: list = []
# This has to be done like this because there can be different numbers of problematic k-points per spin
for line in contents:
if f"Overlap Matrix (abs) of the orthonormalized projected bands for spin {spin_numbers[0]}" in line:
Expand All @@ -1382,24 +1402,31 @@ def _read(self, contents: list, spin_numbers: list):
kpoint_array = []
for kpointel in kpoint:
if kpointel not in ["at", "k-point", ""]:
kpoint_array.append(str(kpointel))
kpoint_array += [float(kpointel)]

elif "maxDeviation" in line:
if spin not in self.band_overlaps_dict:
self.band_overlaps_dict[spin] = {}
if " ".join(kpoint_array) not in self.band_overlaps_dict[spin]:
self.band_overlaps_dict[spin][" ".join(kpoint_array)] = {}
if "k_points" not in self.band_overlaps_dict[spin]:
self.band_overlaps_dict[spin]["k_points"] = []
if "max_deviations" not in self.band_overlaps_dict[spin]:
self.band_overlaps_dict[spin]["max_deviations"] = []
if "matrices" not in self.band_overlaps_dict[spin]:
self.band_overlaps_dict[spin]["matrices"] = []
maxdev = line.split(" ")[2]
self.band_overlaps_dict[spin][" ".join(kpoint_array)]["maxDeviation"] = float(maxdev)
self.max_deviation.append(float(maxdev))
self.band_overlaps_dict[spin][" ".join(kpoint_array)]["matrix"] = []
self.band_overlaps_dict[spin]["max_deviations"] += [float(maxdev)]
self.band_overlaps_dict[spin]["k_points"] += [kpoint_array]
self.max_deviation += [float(maxdev)]
overlaps = []

else:
overlaps = []
rows = []
for el in line.split(" "):
if el != "":
overlaps.append(float(el))
self.band_overlaps_dict[spin][" ".join(kpoint_array)]["matrix"].append(overlaps)
rows += [float(el)]
overlaps += [rows]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

overlaps is possibly unbound now from renaming overlaps = [] to rows above.

we don't check for this in CI yet due to way too many legacy violations but really should. related discussion in #3646

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @janosh, I did not get what you meant here. Can you please elaborate a bit more? I need the overlaps variable initialized in the previous if clause to properly store matrices for each k-point.

Is there anything I need to do ? I could not think of any other way to do this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the problem is, if the code enters the else case overlaps won't be declared, so Python will throw a NameError

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, such case won't happen as per the current file format, it will always pass through the elif clause .

But I Will check for a few more examples that I have and see If I encounter any errors

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @janosh , I checked for about 20 different example files which I have, does not result in NameError due to file format , logic seems to work without breaking it 😃

But if you have any better idea to get same outcome, I am happy to implement it. Just at this point I seem to have blanked out 😅 and cannot think of any alternatives

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could pip install pyright and run it on this file. it'll show you the error i mean. you just need to ensure overlaps is declared by all code paths

Copy link
Contributor Author

@naik-aakash naik-aakash Mar 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @janosh , I have now addressed the errors which I could using pyright. If you think these are okay it could be merged.

if len(overlaps) == len(rows):
self.band_overlaps_dict[spin]["matrices"] += [np.matrix(overlaps)]

def has_good_quality_maxDeviation(self, limit_maxDeviation: float = 0.1) -> bool:
"""
Expand Down Expand Up @@ -1433,38 +1460,35 @@ def has_good_quality_check_occupied_bands(
Returns:
Boolean that will give you information about the quality of the projection
"""
for matrix in self.band_overlaps_dict[Spin.up].values():
for iband1, band1 in enumerate(matrix["matrix"]):
for matrix in self.band_overlaps_dict[Spin.up]["matrices"]:
for iband1, band1 in enumerate(matrix):
for iband2, band2 in enumerate(band1):
if iband1 < number_occ_bands_spin_up and iband2 < number_occ_bands_spin_up:
if iband1 == iband2:
if abs(band2 - 1.0) > limit_deviation:
if abs(band2 - 1.0).all() > limit_deviation:
return False
elif band2 > limit_deviation:
elif band2.all() > limit_deviation:
return False

if spin_polarized:
for matrix in self.band_overlaps_dict[Spin.down].values():
for iband1, band1 in enumerate(matrix["matrix"]):
for matrix in self.band_overlaps_dict[Spin.down]["matrices"]:
for iband1, band1 in enumerate(matrix):
for iband2, band2 in enumerate(band1):
if number_occ_bands_spin_down is not None:
if iband1 < number_occ_bands_spin_down and iband2 < number_occ_bands_spin_down:
if iband1 == iband2:
if abs(band2 - 1.0) > limit_deviation:
if abs(band2 - 1.0).all() > limit_deviation:
return False
elif band2 > limit_deviation:
elif band2.all() > limit_deviation:
return False
else:
ValueError("number_occ_bands_spin_down has to be specified")
return True

@property
def bandoverlapsdict(self):
warnings.warn(
"`bandoverlapsdict` attribute is deprecated. Use `band_overlaps_dict` instead.",
DeprecationWarning,
stacklevel=2,
)
msg = "`bandoverlapsdict` attribute is deprecated. Use `band_overlaps_dict` instead."
warnings.warn(msg, DeprecationWarning, stacklevel=2)
return self.band_overlaps_dict


Expand Down Expand Up @@ -1492,18 +1516,18 @@ def __init__(self, filename: str = "GROSSPOP.lobster", list_dict_grosspop: list[
# opens file
self._filename = filename
self.list_dict_grosspop = [] if list_dict_grosspop is None else list_dict_grosspop

if not self.list_dict_grosspop:
with zopen(filename, mode="rt") as file:
contents = file.read().split("\n")
# transfers content of file to list of dict
small_dict: dict[str, Any] = {}
for line in contents[3:]:
cleanline = [i for i in line.split(" ") if i != ""]
if len(cleanline) == 5:
small_dict = {}
small_dict["element"] = cleanline[1]
small_dict["Mulliken GP"] = {}
small_dict["Loewdin GP"] = {}
small_dict["element"] = cleanline[1]
small_dict["Mulliken GP"][cleanline[2]] = float(cleanline[3])
small_dict["Loewdin GP"][cleanline[2]] = float(cleanline[4])
elif len(cleanline) > 0:
Expand Down Expand Up @@ -2028,6 +2052,8 @@ def _parse_matrix(file_data, pattern, e_fermi):
end_inxs_imag.append(len(file_data))

# extract matrix data and store diagonal elements
matrix_real = []
matrix_imag = []
for start_inx_real, end_inx_real, start_inx_imag, end_inx_imag in zip(
start_inxs_real, end_inxs_real, start_inxs_imag, end_inxs_imag
):
Expand Down
Loading