Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added methods to compute and compare DOS fingerprints #2772

Merged
merged 35 commits into from
Dec 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
177a7a9
added dos_fingerprint and similarity index methods
naik-aakash Dec 5, 2022
e9669d1
Added test cases and reformatted and cleaned code
naik-aakash Dec 5, 2022
6a52d42
added binwidth ,renamed states to densities in fp obj,updated tests
naik-aakash Dec 6, 2022
04b4ec7
Merge branch 'materialsproject:master' into dos_fingerprinting
naik-aakash Dec 6, 2022
876eefe
remove redundant comment
naik-aakash Dec 6, 2022
b7b2296
Merge branch 'dos_fingerprinting' of github.com:naik-aakash/pymatgen …
naik-aakash Dec 6, 2022
e5e1c10
Merge branch 'materialsproject:master' into dos_fingerprinting
naik-aakash Dec 13, 2022
dcd37ff
added source link
naik-aakash Dec 13, 2022
4f5752c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 13, 2022
1bc772f
fix liniting error
naik-aakash Dec 13, 2022
2a4c76f
Merge branch 'dos_fingerprinting' of github.com:naik-aakash/pymatgen …
naik-aakash Dec 13, 2022
0ee33dd
fix line length linting error
naik-aakash Dec 13, 2022
d5468ef
fix line lenghts
naik-aakash Dec 13, 2022
6d68324
changed get_dos_fp_similarity and fp_to_dict methods to static
naik-aakash Dec 13, 2022
cf25f28
fixed typo in doc string
naik-aakash Dec 13, 2022
eb8cfa4
fixed doc string suggestion
naik-aakash Dec 14, 2022
ff7c271
Updated doc strings based on suggestion, added type annotation to the…
naik-aakash Dec 15, 2022
1882916
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 15, 2022
3c476b7
Merge branch 'materialsproject:master' into dos_fingerprinting
naik-aakash Dec 15, 2022
7b1fcee
mypy fixes
naik-aakash Dec 15, 2022
b064ac9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 15, 2022
a1a44e8
fix flake8
naik-aakash Dec 15, 2022
433e5c1
mypy error fix
naik-aakash Dec 15, 2022
cb9484f
mypy error fix
naik-aakash Dec 15, 2022
f27e5fc
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 15, 2022
746c12a
Delete Test.py
naik-aakash Dec 15, 2022
a1f21c2
simplified dict updating, added missing type annotations
naik-aakash Dec 19, 2022
a681b70
mypy error fixes
naik-aakash Dec 19, 2022
6445f42
NamedTuple return type fixed
naik-aakash Dec 19, 2022
d0cfa20
Merge branch 'materialsproject:master' into dos_fingerprinting
naik-aakash Dec 19, 2022
4b1417e
Merge branch 'materialsproject:master' into dos_fingerprinting
naik-aakash Dec 20, 2022
c589945
small clean up
janosh Dec 24, 2022
1c2dbc4
document get_dos_fp() and get_dos_fp_similarity() raise conditions in…
janosh Dec 24, 2022
a8216c7
add types for fp1,fp2 and update doc str
janosh Dec 24, 2022
8309b1b
update exception tests
janosh Dec 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 162 additions & 1 deletion pymatgen/electronic_structure/dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

import functools
import warnings
from typing import Mapping
from collections import namedtuple
from typing import Mapping, NamedTuple

import numpy as np
from monty.json import MSONable
Expand Down Expand Up @@ -1165,6 +1166,166 @@ def get_upper_band_edge(
upper_band_edge = energies[np.argmax(densities)]
return upper_band_edge

def get_dos_fp(
self,
type: str = "summed_pdos",
binning: bool = True,
min_e: float | None = None,
max_e: float | None = None,
n_bins: int = 256,
normalize: bool = True,
) -> NamedTuple:
"""
Generates the DOS fingerprint based on work of
F. Knoop, T. A. r Purcell, M. Scheffler, C. Carbogno, J. Open Source Softw. 2020, 5, 2671.
Source - https://gitlab.com/vibes-developers/vibes/-/tree/master/vibes/materials_fp
Copyright (c) 2020 Florian Knoop, Thomas A.R.Purcell, Matthias Scheffler, Christian Carbogno


Args:
type (str): Specify fingerprint type needed can accept '{s/p/d/f/}summed_{pdos/tdos}'
(default is summed_pdos)
min_e (float): The minimum mode energy to include in the fingerprint (default is None)
max_e (float): The maximum mode energy to include in the fingerprint (default is None)
n_bins (int): Number of bins to be used in the fingerprint (default is 256)
normalize (bool): If true, normalizes the area under fp to equal to 1 (default is True)

Raises:
ValueError: If type is not one of the accepted values {s/p/d/f/}summed_{pdos/tdos}.

Returns:
Fingerprint(namedtuple) : The electronic density of states fingerprint
of format (energies, densities, type, n_bins)
"""
fingerprint = namedtuple("fingerprint", "energies densities type n_bins bin_width")
energies = self.energies - self.efermi

if max_e is None:
max_e = np.max(energies)

if min_e is None:
min_e = np.min(energies)

pdos_obj = self.get_spd_dos()

pdos = {}
for key in pdos_obj:
dens = pdos_obj[key].get_densities()

pdos[key.name] = dens

pdos["summed_pdos"] = np.sum(list(pdos.values()), axis=0)
pdos["tdos"] = self.get_densities()

try:
densities = pdos[type]
if len(energies) < n_bins:
inds = np.where((energies >= min_e) & (energies <= max_e))
return fingerprint(energies[inds], densities[inds], type, len(energies), np.diff(energies)[0])

if binning:
ener_bounds = np.linspace(min_e, max_e, n_bins + 1)
ener = ener_bounds[:-1] + (ener_bounds[1] - ener_bounds[0]) / 2.0
bin_width = np.diff(ener)[0]
else:
ener_bounds = np.array(energies)
ener = np.append(energies, [energies[-1] + np.abs(energies[-1]) / 10])
n_bins = len(energies)
bin_width = np.diff(energies)[0]

dos_rebin = np.zeros(ener.shape)

for ii, e1, e2 in zip(range(len(ener)), ener_bounds[0:-1], ener_bounds[1:]):
inds = np.where((energies >= e1) & (energies < e2))
dos_rebin[ii] = np.sum(densities[inds])
if normalize: # scale DOS bins to make area under histogram equal 1
area = np.sum(dos_rebin * bin_width)
dos_rebin_sc = dos_rebin / area
else:
dos_rebin_sc = dos_rebin

return fingerprint(np.array([ener]), dos_rebin_sc, type, n_bins, bin_width)

except KeyError:
raise ValueError(
"Please recheck type requested, either the orbital projections unavailable in input DOS or "
"there's a typo in type."
)

@staticmethod
def fp_to_dict(fp: NamedTuple) -> dict:
"""Converts a fingerprint into a dictionary

Args:
fp: The DOS fingerprint to be converted into a dictionary

Returns:
dict: A dict of the fingerprint Keys=type, Values=np.ndarray(energies, densities)
"""
fp_dict = {}
fp_dict[fp[2]] = np.array([fp[0], fp[1]], dtype="object").T

return fp_dict

@staticmethod
def get_dos_fp_similarity(
fp1: NamedTuple,
fp2: NamedTuple,
col: int = 1,
pt: int | str = "All",
normalize: bool = False,
tanimoto: bool = False,
) -> float:
"""Calculates the similarity index (dot product) of two fingerprints

Args:
fp1 (NamedTuple): The 1st dos fingerprint object
fp2 (NamedTuple): The 2nd dos fingerprint object
col (int): The item in the fingerprints (0:energies,1: densities) to take the dot product of (default is 1)
pt (int or str) : The index of the point that the dot product is to be taken (default is All)
normalize (bool): If True normalize the scalar product to 1 (default is False)
tanimoto (bool): If True will compute Tanimoto index (default is False)

Raises:
ValueError: If both tanimoto and normalize are set to True.

Returns:
Similarity index (float): The value of dot product
"""
if not isinstance(fp1, dict):
fp1_dict = CompleteDos.fp_to_dict(fp1)
else:
fp1_dict = fp1

if not isinstance(fp2, dict):
fp2_dict = CompleteDos.fp_to_dict(fp2)
else:
fp2_dict = fp2

if pt == "All":
vec1 = np.array([pt[col] for pt in fp1_dict.values()]).flatten()
vec2 = np.array([pt[col] for pt in fp2_dict.values()]).flatten()
else:
vec1 = fp1_dict[fp1[2][pt]][col]
vec2 = fp2_dict[fp2[2][pt]][col]

if not normalize and tanimoto:
rescale = np.linalg.norm(vec1) ** 2 + np.linalg.norm(vec2) ** 2 - np.dot(vec1, vec2)
return np.dot(vec1, vec2) / rescale

elif not tanimoto and normalize:
rescale = np.linalg.norm(vec1) * np.linalg.norm(vec2)
return np.dot(vec1, vec2) / rescale

elif not tanimoto and not normalize:
rescale = 1.0
return np.dot(vec1, vec2) / rescale

else:
raise ValueError(
"Cannot compute similarity index. Please set either normalize=True or tanimoto=True or both to False."
)

@classmethod
def from_dict(cls, d) -> CompleteDos:
"""
Expand Down
48 changes: 48 additions & 0 deletions pymatgen/electronic_structure/tests/test_dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,54 @@ def test_kurtosis(self):
kurtosis = dos.get_band_kurtosis()
self.assertAlmostEqual(kurtosis, 7.764506941340621)

def test_get_dos_fp(self):
# normalize=True
dos_fp = self.dos.get_dos_fp(type="s", min_e=-10, max_e=0, n_bins=56, normalize=True)
bin_width = np.diff(dos_fp.energies)[0][0]
self.assertLessEqual(max(dos_fp.energies[0]), 0)
self.assertGreaterEqual(min(dos_fp.energies[0]), -10)
self.assertEqual(len(dos_fp.energies[0]), 56)
self.assertEqual(dos_fp.type, "s")
self.assertAlmostEqual(sum(dos_fp.densities * bin_width), 1, delta=0.001)
# normalize=False
dos_fp2 = self.dos.get_dos_fp(type="s", min_e=-10, max_e=0, n_bins=56, normalize=False)
bin_width2 = np.diff(dos_fp2.energies)[0][0]
self.assertAlmostEqual(sum(dos_fp2.densities * bin_width2), 7.279303571428509, delta=0.001)
self.assertAlmostEqual(dos_fp2.bin_width, bin_width2, delta=0.001)
# binning=False
dos_fp = self.dos.get_dos_fp(type="s", min_e=None, max_e=None, n_bins=56, normalize=True, binning=False)
self.assertEqual(dos_fp.n_bins, len(self.dos.energies))

def test_get_dos_fp_similarity(self):
dos_fp = self.dos.get_dos_fp(type="s", min_e=-10, max_e=0, n_bins=56, normalize=True)
dos_fp2 = self.dos.get_dos_fp(type="tdos", min_e=-10, max_e=0, n_bins=56, normalize=True)
similarity_index = self.dos.get_dos_fp_similarity(dos_fp, dos_fp2, col=1, tanimoto=True)
self.assertAlmostEqual(similarity_index, 0.3342481451042263, delta=0.0001)

dos_fp = self.dos.get_dos_fp(type="s", min_e=-10, max_e=0, n_bins=56, normalize=True)
dos_fp2 = self.dos.get_dos_fp(type="s", min_e=-10, max_e=0, n_bins=56, normalize=True)
similarity_index = self.dos.get_dos_fp_similarity(dos_fp, dos_fp2, col=1, tanimoto=True)
self.assertEqual(similarity_index, 1)

def test_dos_fp_exceptions(self):
dos_fp = self.dos.get_dos_fp(type="s", min_e=-10, max_e=0, n_bins=56, normalize=True)
dos_fp2 = self.dos.get_dos_fp(type="tdos", min_e=-10, max_e=0, n_bins=56, normalize=True)
# test exceptions
with self.assertRaises(ValueError) as err:

self.dos.get_dos_fp_similarity(dos_fp, dos_fp2, col=1, tanimoto=True, normalize=True)
self.assertEqual(
err.exception.__str__(),
"Cannot compute similarity index. Please set either normalize=True or tanimoto=True or both to False.",
)
with self.assertRaises(ValueError) as err:

self.dos.get_dos_fp(type="k", min_e=-10, max_e=0, n_bins=56, normalize=True)
self.assertEqual(
err.exception.__str__(),
"Please recheck type requested, either the orbital projections unavailable in input DOS or there's a typo in type.",
)


class DOSTest(PymatgenTest):
def setUp(self):
Expand Down