From b5bf8b3f80043b024ea58622fb4ac2d4f2de6bc6 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sat, 27 May 2023 14:41:41 +0100 Subject: [PATCH 1/6] Add requirements folder to CI test trigger --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 808c7f66d..b878389e0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,12 +9,14 @@ on: - main paths: - matminer/** + - requirements/** pull_request: branches: - main paths: - matminer/** + - requirements/** workflow_dispatch: inputs: From a8c7fca0f1ff97c6079558db6f1e5609e6e734b1 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sat, 27 May 2023 21:40:45 +0100 Subject: [PATCH 2/6] Relint with new black --- matminer/data_retrieval/retrieve_AFLOW.py | 2 -- matminer/data_retrieval/tests/test_retrieve_MPDS.py | 1 - matminer/datasets/tests/test_datasets.py | 2 -- matminer/datasets/utils.py | 1 - matminer/featurizers/conversions.py | 1 - matminer/featurizers/dos.py | 1 - matminer/featurizers/function.py | 1 - matminer/featurizers/site/fingerprint.py | 1 - matminer/featurizers/site/rdf.py | 1 - matminer/featurizers/structure/bonding.py | 2 -- matminer/featurizers/structure/composite.py | 1 - matminer/featurizers/structure/sites.py | 1 - matminer/featurizers/structure/tests/test_bonding.py | 2 -- matminer/featurizers/structure/tests/test_misc.py | 1 - matminer/featurizers/tests/test_base.py | 1 - matminer/featurizers/tests/test_dos.py | 1 - matminer/utils/tests/test_io.py | 2 -- 17 files changed, 22 deletions(-) diff --git a/matminer/data_retrieval/retrieve_AFLOW.py b/matminer/data_retrieval/retrieve_AFLOW.py index f1a2a99f8..09438503e 100644 --- a/matminer/data_retrieval/retrieve_AFLOW.py +++ b/matminer/data_retrieval/retrieve_AFLOW.py @@ -218,13 +218,11 @@ def _add_filters(self, pymongo_query): """ for str_property, value in pymongo_query.items(): - # converts str representation of property to aflow.Keyword keyword = getattr(K, str_property) if isinstance(value, dict): # handles special operators for inner_key, inner_value in value.items(): - if inner_key == "$in": self.filter( reduce( diff --git a/matminer/data_retrieval/tests/test_retrieve_MPDS.py b/matminer/data_retrieval/tests/test_retrieve_MPDS.py index f99046e31..f3302e99c 100755 --- a/matminer/data_retrieval/tests/test_retrieve_MPDS.py +++ b/matminer/data_retrieval/tests/test_retrieve_MPDS.py @@ -36,7 +36,6 @@ def setUp(self): @unittest.skipIf(on_ci.upper() == "TRUE", "Bad Datasource-GHActions pipeline") @unittest.skipIf("MPDS_KEY" not in os.environ, "MPDS_KEY env var not set") def test_valid_answer(self): - client = MPDSDataRetrieval() answer = client.get_data(self.test_request, fields={}) diff --git a/matminer/datasets/tests/test_datasets.py b/matminer/datasets/tests/test_datasets.py index 5b450f0ab..5c75dfa0c 100644 --- a/matminer/datasets/tests/test_datasets.py +++ b/matminer/datasets/tests/test_datasets.py @@ -21,7 +21,6 @@ def universal_dataset_check( bool_headers=None, test_func=None, ): - # "Hard" integrity checks that take a long time. # These tests only run if the MATMINER_DATASET_FULL_TEST # environment variable is set to True @@ -577,7 +576,6 @@ def test_expt_gap_kingsbury(self): self.universal_dataset_check("expt_gap_kingsbury", object_headers, numeric_headers) def test_expt_formation_enthalpy_kingsbury(self): - object_headers = ["formula", "likely_mpid", "phaseinfo", "reference"] numeric_headers = ["expt_form_e", "uncertainty"] diff --git a/matminer/datasets/utils.py b/matminer/datasets/utils.py index e39c6aa37..45f85e2d8 100644 --- a/matminer/datasets/utils.py +++ b/matminer/datasets/utils.py @@ -78,7 +78,6 @@ def _validate_dataset(data_path, url=None, file_hash=None, download_if_missing=T do_download = False # If the file doesn't exist, download it if not os.path.exists(data_path): - # Ensure proper arguments for download if not download_if_missing: raise OSError("Data not found and download_if_missing set to False") diff --git a/matminer/featurizers/conversions.py b/matminer/featurizers/conversions.py index e0fb2594f..f61135722 100644 --- a/matminer/featurizers/conversions.py +++ b/matminer/featurizers/conversions.py @@ -643,7 +643,6 @@ class PymatgenFunctionApplicator(ConversionFeaturizer): """ def __init__(self, func, func_args=None, func_kwargs=None, target_col_id=None, overwrite_data=False): - if not callable(func): raise TypeError(f"Function {func} is not callable!") diff --git a/matminer/featurizers/dos.py b/matminer/featurizers/dos.py index cc40e1baf..33c2f6e4b 100644 --- a/matminer/featurizers/dos.py +++ b/matminer/featurizers/dos.py @@ -597,7 +597,6 @@ def get_site_dos_scores(dos, idx, decay_length, sampling_resolution, gaussian_sm orbital_scores = {} proj = dos.get_site_spd_dos(site) for orb in proj: - # smear dos for spin up and down smear_dos = proj[orb].get_smeared_densities(gaussian_smear) dos_up = smear_dos[Spin.up] diff --git a/matminer/featurizers/function.py b/matminer/featurizers/function.py index e331afd67..b2218ce57 100644 --- a/matminer/featurizers/function.py +++ b/matminer/featurizers/function.py @@ -84,7 +84,6 @@ def __init__( combo_function=None, latexify_labels=False, ): - self.expressions = expressions or default_exps self.multi_feature_depth = multi_feature_depth self.combo_function = combo_function or np.prod diff --git a/matminer/featurizers/site/fingerprint.py b/matminer/featurizers/site/fingerprint.py index 78e0743d5..d1b4ab661 100644 --- a/matminer/featurizers/site/fingerprint.py +++ b/matminer/featurizers/site/fingerprint.py @@ -167,7 +167,6 @@ def __init__( dist_exp=2, zero_ops=True, ): - cn_target_motif_op = load_cn_target_motif_op() cn_motif_op_params = load_cn_motif_op_params() diff --git a/matminer/featurizers/site/rdf.py b/matminer/featurizers/site/rdf.py index 77cf4de3c..f7ab51a1c 100644 --- a/matminer/featurizers/site/rdf.py +++ b/matminer/featurizers/site/rdf.py @@ -97,7 +97,6 @@ def g4(etas, zetas, gammas, neigh_dist, neigh_coords, cutoff): # Loop over each neighbor j for j, neigh_j in enumerate(neigh_coords): - # Compute the distance of each neighbor (k) to r r_ij = neigh_dist[j] d_jk = neigh_coords[(j + 1) :] - neigh_coords[j] diff --git a/matminer/featurizers/structure/bonding.py b/matminer/featurizers/structure/bonding.py index 3c4499ecb..51949b94f 100644 --- a/matminer/featurizers/structure/bonding.py +++ b/matminer/featurizers/structure/bonding.py @@ -407,7 +407,6 @@ def _approximate_bonds(self, local_bonds): nearest = [] d_min = None for abss in abonds_species.keys(): - # The distance between bonds is euclidean. To get a good # measure of the coordinate between mendeleev numbers for # each specie, we use the minimum difference. ie, for @@ -715,7 +714,6 @@ class GlobalInstabilityIndex(BaseFeaturizer): """ def __init__(self, r_cut=4.0, disordered_pymatgen=False): - bv = IUCrBondValenceData() self.bv_values = bv.params self.r_cut = r_cut diff --git a/matminer/featurizers/structure/composite.py b/matminer/featurizers/structure/composite.py index 9787f32cb..c7e066486 100644 --- a/matminer/featurizers/structure/composite.py +++ b/matminer/featurizers/structure/composite.py @@ -51,7 +51,6 @@ def __init__( use_ddf=True, use_nn=True, ): - self.use_cell = use_cell self.use_chem = use_chem self.use_chg = use_chg diff --git a/matminer/featurizers/structure/sites.py b/matminer/featurizers/structure/sites.py index 4b8100678..d6b5dfcc3 100644 --- a/matminer/featurizers/structure/sites.py +++ b/matminer/featurizers/structure/sites.py @@ -330,7 +330,6 @@ def featurize(self, s): return np.hstack(output) def compute_pssf(self, s, e): - # This code is extremely similar to super().featurize(). The key # difference is that only one specific element is analyzed. diff --git a/matminer/featurizers/structure/tests/test_bonding.py b/matminer/featurizers/structure/tests/test_bonding.py index 488ca5c4e..329ea5dcc 100644 --- a/matminer/featurizers/structure/tests/test_bonding.py +++ b/matminer/featurizers/structure/tests/test_bonding.py @@ -19,7 +19,6 @@ class BondingStructureTest(StructureFeaturesTest): def test_bondfractions(self): - # Test individual structures with featurize bf_md = BondFractions.from_preset("MinimumDistanceNN") bf_md.no_oxi = True @@ -62,7 +61,6 @@ def test_bondfractions(self): self.assertArrayEqual(df["Ni - Ni bond frac."].to_numpy(), [0.0, 0.5]) def test_bob(self): - # Test a single fit and featurization scm = SineCoulombMatrix(flatten=False) bob = BagofBonds(coulomb_matrix=scm, token=" - ") diff --git a/matminer/featurizers/structure/tests/test_misc.py b/matminer/featurizers/structure/tests/test_misc.py index 5ec686ba2..003332be1 100644 --- a/matminer/featurizers/structure/tests/test_misc.py +++ b/matminer/featurizers/structure/tests/test_misc.py @@ -37,7 +37,6 @@ def test_composition_features(self): self.assertEqual(comp.implementors(), f.implementors()) def test_xrd_powderPattern(self): - # default settings test xpp = XRDPowderPattern() pattern = xpp.featurize(self.diamond) diff --git a/matminer/featurizers/tests/test_base.py b/matminer/featurizers/tests/test_base.py index 7975eabab..1b1643e5c 100644 --- a/matminer/featurizers/tests/test_base.py +++ b/matminer/featurizers/tests/test_base.py @@ -449,7 +449,6 @@ def test_ignore_errors(self): # multiindex or not, and interaction over entries/featurizers for mi, re, n, iter_entries in product([True, False], [True, False], [1, 2], [True, False]): - mf = MultipleFeaturizer([self.multi, self.single], iterate_over_entries=iter_entries) # Make some test data that will cause errors data = pd.DataFrame({"x": ["a", 2, 3]}) diff --git a/matminer/featurizers/tests/test_dos.py b/matminer/featurizers/tests/test_dos.py index 069a2ce80..cf43b76d8 100644 --- a/matminer/featurizers/tests/test_dos.py +++ b/matminer/featurizers/tests/test_dos.py @@ -28,7 +28,6 @@ def setUp(self): self.nb3sn_df = pd.DataFrame({"dos": [nb3sn_dos]}) def test_SiteDOS(self): - dos = self.df["dos"][0] # ensure that both sites give same scores (expected behavior for si) diff --git a/matminer/utils/tests/test_io.py b/matminer/utils/tests/test_io.py index ad87e1fe4..7bb186722 100644 --- a/matminer/utils/tests/test_io.py +++ b/matminer/utils/tests/test_io.py @@ -52,7 +52,6 @@ def setUp(self): self.df = pd.DataFrame(data={"structure": [self.diamond]}) def test_store_dataframe_as_json(self): - # check write produces correct file temp_file = os.path.join(self.temp_folder, "test_dataframe.json") test_file = os.path.join(test_dir, "dataframe.json") @@ -121,7 +120,6 @@ def test_store_dataframe_as_json(self): self.assertDictsAlmostEqual(temp_data, test_data) def test_load_dataframe_from_json(self): - df = load_dataframe_from_json(os.path.join(test_dir, "dataframe.json")) self.assertTrue(self.diamond == df["structure"][0], "Dataframe contents do not match") From 8c03eb96d48b381954ec56ca66c12afc26a02899 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sun, 28 May 2023 17:11:58 +0100 Subject: [PATCH 3/6] Upgrade dscribe invocation to v2 --- matminer/featurizers/site/external.py | 8 ++++---- requirements/macos-latest_py3.10_extras.txt | 2 +- requirements/macos-latest_py3.8_extras.txt | 2 +- requirements/macos-latest_py3.9_extras.txt | 2 +- requirements/ubuntu-latest_py3.10_extras.txt | 2 +- requirements/ubuntu-latest_py3.8_extras.txt | 2 +- requirements/ubuntu-latest_py3.9_extras.txt | 2 +- setup.py | 16 ++++++++++++++-- 8 files changed, 24 insertions(+), 12 deletions(-) diff --git a/matminer/featurizers/site/external.py b/matminer/featurizers/site/external.py index 51c4477c2..d05e4f048 100644 --- a/matminer/featurizers/site/external.py +++ b/matminer/featurizers/site/external.py @@ -148,9 +148,9 @@ def fit(self, X, y=None): self.atomic_numbers = elements self.soap = SOAP_dscribe( species=self.atomic_numbers, - rcut=self.rcut, - nmax=self.nmax, - lmax=self.lmax, + r_cut=self.rcut, + n_max=self.nmax, + l_max=self.lmax, sigma=self.sigma, rbf=self.rbf, periodic=self.periodic, @@ -165,7 +165,7 @@ def fit(self, X, y=None): def featurize(self, struct, idx): self._check_fitted() s_ase = self.adaptor.get_atoms(struct) - return self.soap.create(s_ase, positions=[idx], n_jobs=self.n_jobs).tolist()[0] + return self.soap.create(s_ase, centers=[idx], n_jobs=self.n_jobs).tolist()[0] def feature_labels(self): self._check_fitted() diff --git a/requirements/macos-latest_py3.10_extras.txt b/requirements/macos-latest_py3.10_extras.txt index ea823338e..2f7034091 100644 --- a/requirements/macos-latest_py3.10_extras.txt +++ b/requirements/macos-latest_py3.10_extras.txt @@ -58,7 +58,7 @@ docopt==0.6.2 # via coveralls docutils==0.19 # via sphinx -dscribe==1.2.2 +dscribe==2.0.0 # via matminer (setup.py) emmet-core==0.38.9 # via mp-api diff --git a/requirements/macos-latest_py3.8_extras.txt b/requirements/macos-latest_py3.8_extras.txt index d9479abef..0d67dcee0 100644 --- a/requirements/macos-latest_py3.8_extras.txt +++ b/requirements/macos-latest_py3.8_extras.txt @@ -58,7 +58,7 @@ docopt==0.6.2 # via coveralls docutils==0.19 # via sphinx -dscribe==1.2.2 +dscribe==2.0.0 # via matminer (setup.py) emmet-core==0.38.9 # via mp-api diff --git a/requirements/macos-latest_py3.9_extras.txt b/requirements/macos-latest_py3.9_extras.txt index 7656d2449..30fef94f2 100644 --- a/requirements/macos-latest_py3.9_extras.txt +++ b/requirements/macos-latest_py3.9_extras.txt @@ -58,7 +58,7 @@ docopt==0.6.2 # via coveralls docutils==0.19 # via sphinx -dscribe==1.2.2 +dscribe==2.0.0 # via matminer (setup.py) emmet-core==0.38.9 # via mp-api diff --git a/requirements/ubuntu-latest_py3.10_extras.txt b/requirements/ubuntu-latest_py3.10_extras.txt index b5bc6b1dc..872d17b99 100644 --- a/requirements/ubuntu-latest_py3.10_extras.txt +++ b/requirements/ubuntu-latest_py3.10_extras.txt @@ -58,7 +58,7 @@ docopt==0.6.2 # via coveralls docutils==0.19 # via sphinx -dscribe==1.2.2 +dscribe==2.0.0 # via matminer (setup.py) emmet-core==0.38.9 # via mp-api diff --git a/requirements/ubuntu-latest_py3.8_extras.txt b/requirements/ubuntu-latest_py3.8_extras.txt index 87aac8fc2..97de2de58 100644 --- a/requirements/ubuntu-latest_py3.8_extras.txt +++ b/requirements/ubuntu-latest_py3.8_extras.txt @@ -58,7 +58,7 @@ docopt==0.6.2 # via coveralls docutils==0.19 # via sphinx -dscribe==1.2.2 +dscribe==2.0.0 # via matminer (setup.py) emmet-core==0.38.9 # via mp-api diff --git a/requirements/ubuntu-latest_py3.9_extras.txt b/requirements/ubuntu-latest_py3.9_extras.txt index ccbbf09b3..ca2db529a 100644 --- a/requirements/ubuntu-latest_py3.9_extras.txt +++ b/requirements/ubuntu-latest_py3.9_extras.txt @@ -58,7 +58,7 @@ docopt==0.6.2 # via coveralls docutils==0.19 # via sphinx -dscribe==1.2.2 +dscribe==2.0.0 # via matminer (setup.py) emmet-core==0.38.9 # via mp-api diff --git a/setup.py b/setup.py index 0931f9dcc..09b886210 100644 --- a/setup.py +++ b/setup.py @@ -14,14 +14,26 @@ def local_version(version): extras_require = { "mpds": ["ujson", "jmespath", "httplib2", "ase", "jsonschema"], - "dscribe": ["dscribe"], + "dscribe": ["dscribe~=2.0"], "mdfforge": ["mdf-forge"], "aflow": ["aflow"], "citrine": ["citrination-client"], - "dev": ["pytest", "pytest-cov", "pytest-timeout", "coverage", "coveralls", "flake8", "black", "pylint", "sphinx",], + "dev": [ + "pytest", + "pytest-cov", + "pytest-timeout", + "coverage", + "coveralls", + "flake8", + "black", + "pylint", + "sphinx", + ], } tests_require = [r for v in extras_require.values() for r in v] +extras_require["tests"] = tests_require + if __name__ == "__main__": setup( name="matminer", From c611f69e35da4f59381efb1a642eb068c823de37 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sun, 28 May 2023 16:31:35 +0100 Subject: [PATCH 4/6] Add upper pin for pandas --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 09b886210..042eaa549 100644 --- a/setup.py +++ b/setup.py @@ -56,7 +56,7 @@ def local_version(version): install_requires=[ "numpy>=1.20.1", "requests", - "pandas", + "pandas~=1.5", "tqdm", "pymongo", "future", From feb3a4c87e33087e0a0331da0d9b7a757d092a3e Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sun, 28 May 2023 16:40:49 +0100 Subject: [PATCH 5/6] Replace deprecated numpy type aliases --- matminer/featurizers/function.py | 2 +- matminer/featurizers/structure/matrix.py | 2 +- matminer/featurizers/structure/rdf.py | 2 +- matminer/featurizers/tests/test_function.py | 2 +- matminer/featurizers/utils/grdf.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/matminer/featurizers/function.py b/matminer/featurizers/function.py index b2218ce57..403b15f73 100644 --- a/matminer/featurizers/function.py +++ b/matminer/featurizers/function.py @@ -63,7 +63,7 @@ class FunctionFeaturizer(BaseFeaturizer): include pairwise combined features postprocess (function or type): type to cast functional outputs to, if, for example, you want to include the possibility of - complex numbers in your outputs, use postprocess=np.complex, + complex numbers in your outputs, use postprocess=np.complex128, defaults to float combo_function (function): function to combine multi-features, defaults to np.prod (i.e. cumulative product of expressions), diff --git a/matminer/featurizers/structure/matrix.py b/matminer/featurizers/structure/matrix.py index e35b577aa..70abd5e66 100644 --- a/matminer/featurizers/structure/matrix.py +++ b/matminer/featurizers/structure/matrix.py @@ -335,7 +335,7 @@ def get_ohv(self, sp, period_tag): ohd[l][curr_shell[2]] = 1 nume += curr_shell[2] shell_num += 1 - my_ohv = np.zeros(self.size, np.int) + my_ohv = np.zeros(self.size, np.int32) k = 0 for j in range(4): for i in range(2 * (2 * j + 1)): diff --git a/matminer/featurizers/structure/rdf.py b/matminer/featurizers/structure/rdf.py index 4f997b5d2..63e4d7014 100644 --- a/matminer/featurizers/structure/rdf.py +++ b/matminer/featurizers/structure/rdf.py @@ -340,7 +340,7 @@ def featurize(self, s): # Add oxidation states. struct = ValenceIonicRadiusEvaluator(struct).structure - distribution = np.zeros(self.nbins, dtype=np.float) + distribution = np.zeros(self.nbins, dtype=np.float64) for site in struct.sites: this_charge = float(site.specie.oxi_state) diff --git a/matminer/featurizers/tests/test_function.py b/matminer/featurizers/tests/test_function.py index dfba02e41..9b3e94bee 100644 --- a/matminer/featurizers/tests/test_function.py +++ b/matminer/featurizers/tests/test_function.py @@ -50,7 +50,7 @@ def test_featurize(self): # Test complex functionality expressions = ["sqrt(x)"] - ff = FunctionFeaturizer(expressions=expressions, postprocess=np.complex) + ff = FunctionFeaturizer(expressions=expressions, postprocess=np.complex128) new_df = ff.fit_featurize_dataframe(self.test_df, "a", inplace=False) self.assertEqual(new_df["sqrt(a)"][0], 1j) diff --git a/matminer/featurizers/utils/grdf.py b/matminer/featurizers/utils/grdf.py index 04f9e5aba..5e03ed537 100644 --- a/matminer/featurizers/utils/grdf.py +++ b/matminer/featurizers/utils/grdf.py @@ -88,7 +88,7 @@ def __call__(self, r_ij): return np.logical_and( np.greater_equal(r_ij, self.start), np.less(r_ij, self.start + self.width), - dtype=np.float, + dtype=np.float64, ) def volume(self, cutoff): From 259013307c6c0a27d65c51b8962cbf12cd0f98f8 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Sun, 28 May 2023 17:32:17 +0100 Subject: [PATCH 6/6] Add compatibility `np.array()` call for numpy 1.24 --- matminer/featurizers/site/chemical.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/matminer/featurizers/site/chemical.py b/matminer/featurizers/site/chemical.py index a815ff58e..8715bbfed 100644 --- a/matminer/featurizers/site/chemical.py +++ b/matminer/featurizers/site/chemical.py @@ -99,6 +99,11 @@ def fit(self, X, y=None): Returns: self """ + if isinstance(X, (list, tuple)): + # Required for numpy 1.24 due to changes in the way numpy casts + # object arrays. + X = np.array(X, dtype=object) + structs = np.atleast_2d(X)[:, 0] if not all([isinstance(struct, Structure) for struct in structs]): raise TypeError("This fit requires an array-like input of Pymatgen " "Structures and sites!")