Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read INCAR SYSTEM as is, check_params use proc_val #4136

Merged
merged 14 commits into from
Nov 12, 2024
Merged
47 changes: 36 additions & 11 deletions src/pymatgen/io/vasp/incar_parameters.json
Original file line number Diff line number Diff line change
Expand Up @@ -191,24 +191,35 @@
"GGA": {
"type": "str",
"values": [
"91",
"PE",
"AM",
"HL",
"CA",
"MK",
"RE",
"VW",
"B3",
"PZ",
"SL",
"CA_C",
"PZ_C",
"VW",
"HL",
"WI",
"LIBXC",
"LI",
"91",
"PE",
"PBE_X",
"PBE_C",
"RE",
"RP",
"B5",
"BF",
"CO",
"PS",
"AM",
"B3",
"B5",
"OR",
"BO",
"MK",
"ML",
"CX",
"BF",
"RP",
"CO",
"RE",
"03",
"05",
"10",
Expand Down Expand Up @@ -970,6 +981,17 @@
"ML_FF_WTSIF": {
"type": "float"
},
"ML_MODE": {
"type": "str",
"values": [
"train",
"select",
"refit",
"refitbayesian",
"run",
"none"
]
},
"NBANDS": {
"type": "int"
},
Expand Down Expand Up @@ -1444,5 +1466,8 @@
},
"MAGDIPOLOUT": {
"type": "bool"
},
"ZAB_VDW": {
"type": "float"
}
}
13 changes: 9 additions & 4 deletions src/pymatgen/io/vasp/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ class Incar(UserDict, MSONable):

- Keys are stored in uppercase to allow case-insensitive access (set, get, del, update, setdefault).
- String values are capitalized by default, except for keys specified
in the `lower_str_keys` of the `proc_val` method.
in the `lower_str_keys/as_is_str_keys` of the `proc_val` method.
"""

def __init__(self, params: dict[str, Any] | None = None) -> None:
Expand Down Expand Up @@ -971,6 +971,8 @@ def proc_val(key: str, val: str) -> list | bool | float | int | str:
"IVDW",
)
lower_str_keys = ("ML_MODE",)
# String keywords to read "as is" (no case transformation, only stripped)
as_is_str_keys = ("SYSTEM",)

def smart_int_or_float(num_str: str) -> float:
"""Determine whether a string represents an integer or a float."""
Expand Down Expand Up @@ -1006,6 +1008,9 @@ def smart_int_or_float(num_str: str) -> float:
if key in lower_str_keys:
return val.strip().lower()

if key in as_is_str_keys:
return val.strip()

except ValueError:
pass

Expand Down Expand Up @@ -1087,9 +1092,9 @@ def check_params(self) -> None:
# Only check value when it's not None,
# meaning there is recording for corresponding value
if allowed_values is not None:
# Note: param_type could be a Union type, e.g. "str | bool"
if "str" in param_type:
allowed_values = [item.capitalize() if isinstance(item, str) else item for item in allowed_values]
allowed_values = [
self.proc_val(tag, item) if isinstance(item, str) else item for item in allowed_values
]

if val not in allowed_values:
warnings.warn(
Expand Down
8 changes: 4 additions & 4 deletions src/pymatgen/io/vasp/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ def _parse(
elif tag == "dielectricfunction":
label = elem.attrib.get("comment", None)
if label is None:
if self.incar.get("ALGO", "Normal").upper() == "BSE":
if self.incar.get("ALGO", "Normal") == "Bse":
Copy link
Contributor Author

@DanielYang59 DanielYang59 Oct 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After #4122 we should be confident that string values are now capitalized (except for those in lower_str_keys and as_is_str_keys), these case conversion may not be needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If they are capitalized, why comparing with "Bse" (non-capitalized)

Copy link
Contributor Author

@DanielYang59 DanielYang59 Oct 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I may not understand your question? The value Bse is capitalized? The key ALGO is in uppercase.

print("BSE".capitalize())  # Bse

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sory, I got completely confused. Mostly because of apparent inconsistency between the code and VASP wiki. If you look at https://www.vasp.at/wiki/index.php/ALGO, you will see that BSE and many other values are all capitals:

ALGO = Normal | VeryFast | Fast | Conjugate | All | Damped | Subrot | Eigenval | Exact | None | Nothing | CHI | G0W0 | GW0 | GW | scGW0 | scGW | G0W0R | GW0R | GWR | scGW0R | scGWR | ACFDT | RPA | ACFDTR | RPAR | BSE | TDHF

So, the code reads awkward now.

Copy link
Contributor Author

@DanielYang59 DanielYang59 Oct 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got completely confused. Mostly because of apparent inconsistency between the code and VASP wiki

Yes I find it confusing too #4122 (comment), and @esoteric-ephemera said it's a convention to pymatgen #4122 (comment)

This is likely like this since the start:
image

As VASP itself is largely case insensitive, we just need to internally standardize for easy handling (no need to cast to upper/capitalize when comparing values and so on). I personally prefer to cast both value and tag to uppercase for the following reason:

  • VASP Wiki large prefer upper case for keys, and get rid of the headache to memorize "key is uppercase", but "case is capitalized" and so on
  • Most values (just like keys) are first-letter abbreviations of some sort, capitalizing them doesn't make much sense, though there're values that is indeed proper words like "Fast/Conjugate/All/..."

But this would be badly breaking (every downstream package would find the INCAR key in a different case) so I guess it's not worth the price. So we'd better come up with something that is not breaking. In any case I would leave this for a future PR.


I'm not sure it's doable or not, a good alternative is to subclass a "case insensitive string" as an inplace replacement of built in str to avoid the headache altogether (but I currently don't have a good way to overwrite the membership check), I might have another try later.

class CaseInsensitiveStr(str):
    def __new__(cls, value):
        return super().__new__(cls, value.casefold())

    def __eq__(self, other):
        if isinstance(other, str):
            return super().__eq__(other.casefold())
        return NotImplemented

    def __hash__(self):
        return hash(self.casefold())

str_a = CaseInsensitiveStr("HELLO")

print(str_a)  # hello

print(str_a == "hello")  # True
print(str_a == "HELLO")  # True

print(str_a in {"hello", "world"})  # True
print(str_a in {"HELLO", "world"})  # False, should be True
print(str_a in {"world"})  # False

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I find it confusing too #4122 (comment), and @esoteric-ephemera said it's a convention to pymatgen #4122 (comment)

I see, and I agree that abstracting things out into case-sensitive/insensitive class is the most robust way to go.

As for the convention, it annoyingly has exceptions:

class Incar(UserDict, MSONable):
    """
    A case-insensitive dictionary to read/write INCAR files with additional helper functions.

    - Keys are stored in uppercase to allow case-insensitive access (set, get, del, update, setdefault).
    - String values are capitalized by default, except for keys specified
        in the `lower_str_keys` of the `proc_val` method.
    """

As you can see, capitalization is documented, except "keys specified in lower_str_keys". Currently it is a single key - ML_MODE.

However, do note that Incar.check_params does not expect such exceptions:

            if allowed_values is not None:
                # Note: param_type could be a Union type, e.g. "str | bool"
                if "str" in param_type:
                    allowed_values = [item.capitalize() if isinstance(item, str) else item for item in allowed_values]

(aside: incar_parameters.json does not even list ML_MODE)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you can see, capitalization is documented, except "keys specified in lower_str_keys". Currently it is a single key - ML_MODE.

Yep I updated the docstring to reflect this key/value case issue, but I'm not sure about the casing of ML_MODE either (I thought VASP is largely case insensitive ) #4122 (comment)

I never use that tag so cannot comment, it would be great if you (or someone else) could confirm this behaviour.

However, do note that Incar.check_params does not expect such exceptions:

Thanks I would update it (one exception makes everything hard to handle)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I never use that tag so cannot comment, it would be great if you (or someone else) could confirm this behaviour.

I do not use this tag either, but clearly its value is case-sensitive - #3625
I looked up https://www.vasp.at/wiki/index.php/ML_MODE and it says

Warning: If any option other than the above is chosen or there is a spelling error (be careful to write everything in upper case or lower case letters) the code will exit with an error.

I conclude that case-insensitivity is not universal.

Ironically, simply upcasing (rathen than capitalizing) everything would not require making an exception for ML_MODE.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not use this tag either, but clearly its value is case-sensitive - #3625

Yes I did see this PR and the VASP comment as well.

I conclude that case-insensitivity is not universal.
Ironically, simply upcasing (rathen than capitalizing) everything would not require making an exception for ML_MODE.

I believe ML_MODE is the only exception for now. upcase everything (except for SYSTEM) would make everything easy, but I guess ML_MODE was not there when this part of code was written (12 years ago), and it's hard to change it now without breakage

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

t's hard to change it now without breakage

100% agree.

label = "freq_dependent"
elif "density" not in self.dielectric_data:
label = "density"
Expand Down Expand Up @@ -634,7 +634,7 @@ def converged_electronic(self) -> bool:
while set(final_elec_steps[idx]) == to_check:
idx += 1
return idx + 1 != self.parameters["NELM"]
if self.incar.get("ALGO", "").upper() == "EXACT" and self.incar.get("NELM") == 1:
if self.incar.get("ALGO", "") == "Exact" and self.incar.get("NELM") == 1:
return True
return len(final_elec_steps) < self.parameters["NELM"]

Expand Down Expand Up @@ -793,10 +793,10 @@ def run_type(self) -> str:
"--",
"None",
}:
incar_tag = self.incar.get("METAGGA", "").strip().upper()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this would be covered by

return val.strip().capitalize()

incar_tag = self.incar.get("METAGGA", "").upper()
run_type = METAGGA_TYPES.get(incar_tag, incar_tag)
elif self.parameters.get("GGA"):
incar_tag = self.parameters.get("GGA", "").strip().upper()
incar_tag = self.parameters.get("GGA", "").upper()
run_type = GGA_TYPES.get(incar_tag, incar_tag)
elif self.potcar_symbols[0].split()[0] == "PAW":
run_type = "LDA"
Expand Down
28 changes: 21 additions & 7 deletions tests/io/vasp/test_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,14 @@ def test_key_case_insensitive(self):
incar.setdefault("NPAR", 4)
assert incar["npar"] == 4

# Test pop
assert incar.pop(test_tag.lower()) == 520
assert test_tag.upper() not in incar
assert test_tag.lower() not in incar

# Test pop with default value
assert incar.pop("missing_key", 42) == 42

def test_copy(self):
incar2 = self.incar.copy()
assert isinstance(incar2, Incar), f"Expected Incar, got {type(incar2).__name__}"
Expand Down Expand Up @@ -678,9 +686,11 @@ def test_diff(self):
"ISMEAR": {"INCAR1": 0, "INCAR2": -5},
"NPAR": {"INCAR1": 8, "INCAR2": 1},
"SYSTEM": {
"INCAR1": "Id=[0] dblock_code=[97763-icsd] formula=[li mn (p o4)] sg_name=[p n m a]",
"INCAR2": "Id=[91090] dblock_code=[20070929235612linio-59.53134651-vasp] formula=[li3 ni3 o6] "
"sg_name=[r-3m]",
"INCAR1": "id=[0] dblock_code=[97763-ICSD] formula=[Li Mn (P O4)] sg_name=[P n m a]",
"INCAR2": (
"id=[91090] dblock_code=[20070929235612LiNiO-59.53134651-VASP] "
"formula=[Li3 Ni3 O6] sg_name=[R-3m]"
),
},
"ALGO": {"INCAR1": "Damped", "INCAR2": "Fast"},
"LHFCALC": {"INCAR1": True, "INCAR2": None},
Expand Down Expand Up @@ -715,14 +725,15 @@ def test_from_file_and_from_dict(self):
float/int, and might yield values in wrong type.
"""
# Init from dict
incar_dict = {"ENCUT": 500, "GGA": "PS", "NELM": 60.0}
incar_dict = {"ENCUT": 500, "GGA": "PS", "NELM": 60.0, "SYSTEM": "This should NOT BE capitalized"}
incar_from_dict = Incar(incar_dict)

# Init from file (from string)
incar_str = """\
ENCUT = 500
GGA = PS
NELM = 60.0
SYSTEM = This should NOT BE capitalized
"""

with ScratchDir("."):
Expand All @@ -739,6 +750,7 @@ def test_from_file_and_from_dict(self):
assert incar_from_dict == incar_from_file
for key in incar_from_dict:
assert type(incar_from_dict[key]) is type(incar_from_file[key])
assert incar_from_file["SYSTEM"] == "This should NOT BE capitalized"

def test_write(self):
tmp_file = f"{self.tmp_path}/INCAR.testing"
Expand Down Expand Up @@ -774,7 +786,7 @@ def test_get_str(self):
NUPDOWN = 0
PREC = Accurate
SIGMA = 0.05
SYSTEM = Id=[0] dblock_code=[97763-icsd] formula=[li mn (p o4)] sg_name=[p n m a]
SYSTEM = id=[0] dblock_code=[97763-ICSD] formula=[Li Mn (P O4)] sg_name=[P n m a]
TIME = 0.4"""
assert incar_str == expected

Expand Down Expand Up @@ -876,13 +888,15 @@ def test_check_params(self):
"AMIN": 0.01,
"ICHARG": 1,
"MAGMOM": [1, 2, 4, 5],
"ML_MODE": "RUN", # lower case string
"SYSTEM": "Hello world", # as is string
"ENCUT": 500, # make sure float key is casted
"GGA": "PS", # test string case insensitivity
"LREAL": True, # special case: Union type
"NBAND": 250, # typo in tag
"METAGGA": "SCAM", # typo in value
"EDIFF": 5 + 1j, # value should be a float
"ISIF": 9, # value out of range
"EDIFF": 5 + 1j, # value should be float
"ISIF": 9, # value not unknown
"LASPH": 5, # value should be bool
"PHON_TLIST": "is_a_str", # value should be a list
}
Expand Down