diff --git a/matbench/analysis/analysis.py b/matbench/analysis/core.py similarity index 100% rename from matbench/analysis/analysis.py rename to matbench/analysis/core.py diff --git a/matbench/automl/tests/test_tpot.py b/matbench/automl/tests/test_tpot.py index b4bec7e9..d1e8e1e2 100644 --- a/matbench/automl/tests/test_tpot.py +++ b/matbench/automl/tests/test_tpot.py @@ -6,11 +6,11 @@ from matbench.automl.tpot_utils import TpotAutoml from matbench.automl.tpot_configs.classifier import classifier_config_dict_mb from matbench.automl.tpot_configs.regressor import regressor_config_dict_mb -from matbench.analysis.analysis import Analysis +from matbench.analysis.core import Analysis from matbench.data.load import load_double_perovskites_gap, \ load_glass_binary -from matbench.featurization.core import Featurize -from matbench.preprocessing.core import Preprocess +from matbench.featurization.core import Featurization +from matbench.preprocessing.core import Preprocessing from matminer.featurizers.composition import ElementProperty, TMetalFraction, \ Stoichiometry from sklearn.model_selection import train_test_split @@ -28,12 +28,12 @@ def test_tpot_regression(self, limit=500): target = 'gap gllbsc' # load and featurize: df_init = load_double_perovskites_gap(return_lumo=False)[:limit] - featzer = Featurize(ignore_cols=['a_1', 'b_1', 'a_2', 'b_2']) + featzer = Featurization(ignore_cols=['a_1', 'b_1', 'a_2', 'b_2']) df_feats = featzer.featurize_formula(df_init, featurizers=[ ElementProperty.from_preset(preset_name='matminer'), TMetalFraction()]) # preprocessing of the data - prep = Preprocess() + prep = Preprocessing() df = prep.handle_na(df_feats, max_na_frac=0.1) feats0 = set(df.columns) df = prep.prune_correlated_features(df, target, R_max=0.95) @@ -95,12 +95,12 @@ def test_tpot_classification(self, limit=500): target = 'gfa' # load and featurize: df_init = load_glass_binary()[:limit] - featzer = Featurize() + featzer = Featurization() df_feats = featzer.featurize_formula(df_init, featurizers=[ ElementProperty.from_preset(preset_name='matminer'), Stoichiometry()]) # preprocessing of the data - prep = Preprocess() + prep = Preprocessing() df = prep.handle_na(df_feats, max_na_frac=0.1) feats0 = set(df.columns) df = prep.prune_correlated_features(df, target, R_max=0.95) diff --git a/matbench/featurization/core.py b/matbench/featurization/core.py index 758fac4b..c751b2e2 100644 --- a/matbench/featurization/core.py +++ b/matbench/featurization/core.py @@ -10,7 +10,7 @@ StructureFeaturizers, BSFeaturizers, DOSFeaturizers -class Featurize(object): +class Featurization(object): """ Takes in a dataframe and generate features from preset columns such as "formula", "structure", "bandstructure", "dos", etc. One may use diff --git a/matbench/featurization/tests/test_featurize.py b/matbench/featurization/tests/test_featurize.py index f1e97bc2..6adb44cd 100644 --- a/matbench/featurization/tests/test_featurize.py +++ b/matbench/featurization/tests/test_featurize.py @@ -13,7 +13,7 @@ from matbench.data.load import load_double_perovskites_gap, \ load_castelli_perovskites -from matbench.featurization.core import Featurize +from matbench.featurization.core import Featurization from matbench.featurization.sets import AllFeaturizers from matbench.data.load import load_phonon_dielectric_mp @@ -25,7 +25,7 @@ class TestFeaturize(unittest.TestCase): def test_featurize_formula(self, limit=5): df_init = load_double_perovskites_gap(return_lumo=False)[:limit] ignore_cols = ['a_1', 'a_2', 'b_1', 'b_2'] - featurizer = Featurize(ignore_cols=ignore_cols, + featurizer = Featurization(ignore_cols=ignore_cols, ignore_errors=False, exclude=['CohesiveEnergy'], multiindex=False) @@ -83,7 +83,7 @@ def test_featurize_formula(self, limit=5): def test_featurize_structure(self, limit=5): df_init = load_castelli_perovskites()[:limit] - featurizer = Featurize(ignore_errors=False, multiindex=False) + featurizer = Featurization(ignore_errors=False, multiindex=False) df = featurizer.featurize_structure(df_init, inplace=False, featurizers="all") @@ -187,7 +187,7 @@ def test_featurize_bsdos(self, refresh_df_init=False, limit=1): else: df_init = pd.read_pickle(os.path.join(test_dir, df_bsdos_pickled)) df_init = df_init.dropna(axis=0) - featurizer = Featurize(ignore_errors=False, multiindex=False) + featurizer = Featurization(ignore_errors=False, multiindex=False) df = featurizer.featurize_dos(df_init, inplace=False) # sanity checks @@ -222,7 +222,7 @@ def test_featurize_bsdos(self, refresh_df_init=False, limit=1): def test_auto_featurize(self, limit=5): df_init = load_phonon_dielectric_mp()[:limit] print(df_init.structure) - featurizer = Featurize(ignore_errors=False, multiindex=True) + featurizer = Featurization(ignore_errors=False, multiindex=True) df = featurizer.auto_featurize(df_init, input_cols=('formula', 'structure')) diff --git a/matbench/preprocessing/core.py b/matbench/preprocessing/core.py index 93731322..52121227 100644 --- a/matbench/preprocessing/core.py +++ b/matbench/preprocessing/core.py @@ -1,4 +1,3 @@ -import logging import numpy as np import pandas as pd from matbench.utils.utils import setup_custom_logger @@ -8,7 +7,7 @@ from skrebate import ReliefF -class Preprocess(object): +class Preprocessing(object): """ Clean and prepare the data for visualization and training. diff --git a/matbench/preprocessing/tests/test_preprocess.py b/matbench/preprocessing/tests/test_preprocess.py index b0fac42e..8dbf6835 100644 --- a/matbench/preprocessing/tests/test_preprocess.py +++ b/matbench/preprocessing/tests/test_preprocess.py @@ -2,9 +2,9 @@ from matminer.datasets.dataframe_loader import load_elastic_tensor from matminer.featurizers.structure import GlobalSymmetryFeatures -from matbench.featurization.core import Featurize +from matbench.featurization.core import Featurization -from matbench.preprocessing.core import Preprocess +from matbench.preprocessing.core import Preprocessing class TestPreprocess(unittest.TestCase): @@ -19,10 +19,10 @@ def test_preprocess_basic(self): """ df = load_elastic_tensor()[:5][['K_VRH', 'structure']] df['K_VRH'] = df['K_VRH'].astype(str) - f = Featurize() + f = Featurization() df = f.featurize_structure(df, featurizers=[GlobalSymmetryFeatures()]) - p = Preprocess() + p = Preprocessing() df = p.preprocess(df, 'K_VRH') self.assertAlmostEqual(df['K_VRH'].iloc[0], 194.26888435900003) self.assertEqual(df["crystal_system_tetragonal"].iloc[0], 1) \ No newline at end of file