From 13b71f3a3157fad91706def0cf7ac08c09083a77 Mon Sep 17 00:00:00 2001 From: Alireza Date: Tue, 2 Oct 2018 13:38:33 -0700 Subject: [PATCH 1/4] Preprocess -> Preprocessing (noun same as other packages) --- matbench/preprocessing/core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/matbench/preprocessing/core.py b/matbench/preprocessing/core.py index 93731322..52121227 100644 --- a/matbench/preprocessing/core.py +++ b/matbench/preprocessing/core.py @@ -1,4 +1,3 @@ -import logging import numpy as np import pandas as pd from matbench.utils.utils import setup_custom_logger @@ -8,7 +7,7 @@ from skrebate import ReliefF -class Preprocess(object): +class Preprocessing(object): """ Clean and prepare the data for visualization and training. From 7a9f3e805965d947143be15595aa302837225836 Mon Sep 17 00:00:00 2001 From: Alireza Date: Tue, 2 Oct 2018 13:39:31 -0700 Subject: [PATCH 2/4] analysis.py -> core.py --- matbench/analysis/{analysis.py => core.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename matbench/analysis/{analysis.py => core.py} (100%) diff --git a/matbench/analysis/analysis.py b/matbench/analysis/core.py similarity index 100% rename from matbench/analysis/analysis.py rename to matbench/analysis/core.py From 16d865d20b4531a355caa121870a373b8a1dd03f Mon Sep 17 00:00:00 2001 From: Alireza Date: Tue, 2 Oct 2018 13:45:05 -0700 Subject: [PATCH 3/4] Featurize -> Featurization --- matbench/featurization/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matbench/featurization/core.py b/matbench/featurization/core.py index 758fac4b..c751b2e2 100644 --- a/matbench/featurization/core.py +++ b/matbench/featurization/core.py @@ -10,7 +10,7 @@ StructureFeaturizers, BSFeaturizers, DOSFeaturizers -class Featurize(object): +class Featurization(object): """ Takes in a dataframe and generate features from preset columns such as "formula", "structure", "bandstructure", "dos", etc. One may use From 19a36d13e20cc13a1c8fa2f11d77a93fe5b7eeff Mon Sep 17 00:00:00 2001 From: Alireza Date: Tue, 2 Oct 2018 13:45:37 -0700 Subject: [PATCH 4/4] update names in tests --- matbench/automl/tests/test_tpot.py | 14 +++++++------- matbench/featurization/tests/test_featurize.py | 10 +++++----- matbench/preprocessing/tests/test_preprocess.py | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/matbench/automl/tests/test_tpot.py b/matbench/automl/tests/test_tpot.py index b4bec7e9..d1e8e1e2 100644 --- a/matbench/automl/tests/test_tpot.py +++ b/matbench/automl/tests/test_tpot.py @@ -6,11 +6,11 @@ from matbench.automl.tpot_utils import TpotAutoml from matbench.automl.tpot_configs.classifier import classifier_config_dict_mb from matbench.automl.tpot_configs.regressor import regressor_config_dict_mb -from matbench.analysis.analysis import Analysis +from matbench.analysis.core import Analysis from matbench.data.load import load_double_perovskites_gap, \ load_glass_binary -from matbench.featurization.core import Featurize -from matbench.preprocessing.core import Preprocess +from matbench.featurization.core import Featurization +from matbench.preprocessing.core import Preprocessing from matminer.featurizers.composition import ElementProperty, TMetalFraction, \ Stoichiometry from sklearn.model_selection import train_test_split @@ -28,12 +28,12 @@ def test_tpot_regression(self, limit=500): target = 'gap gllbsc' # load and featurize: df_init = load_double_perovskites_gap(return_lumo=False)[:limit] - featzer = Featurize(ignore_cols=['a_1', 'b_1', 'a_2', 'b_2']) + featzer = Featurization(ignore_cols=['a_1', 'b_1', 'a_2', 'b_2']) df_feats = featzer.featurize_formula(df_init, featurizers=[ ElementProperty.from_preset(preset_name='matminer'), TMetalFraction()]) # preprocessing of the data - prep = Preprocess() + prep = Preprocessing() df = prep.handle_na(df_feats, max_na_frac=0.1) feats0 = set(df.columns) df = prep.prune_correlated_features(df, target, R_max=0.95) @@ -95,12 +95,12 @@ def test_tpot_classification(self, limit=500): target = 'gfa' # load and featurize: df_init = load_glass_binary()[:limit] - featzer = Featurize() + featzer = Featurization() df_feats = featzer.featurize_formula(df_init, featurizers=[ ElementProperty.from_preset(preset_name='matminer'), Stoichiometry()]) # preprocessing of the data - prep = Preprocess() + prep = Preprocessing() df = prep.handle_na(df_feats, max_na_frac=0.1) feats0 = set(df.columns) df = prep.prune_correlated_features(df, target, R_max=0.95) diff --git a/matbench/featurization/tests/test_featurize.py b/matbench/featurization/tests/test_featurize.py index f1e97bc2..6adb44cd 100644 --- a/matbench/featurization/tests/test_featurize.py +++ b/matbench/featurization/tests/test_featurize.py @@ -13,7 +13,7 @@ from matbench.data.load import load_double_perovskites_gap, \ load_castelli_perovskites -from matbench.featurization.core import Featurize +from matbench.featurization.core import Featurization from matbench.featurization.sets import AllFeaturizers from matbench.data.load import load_phonon_dielectric_mp @@ -25,7 +25,7 @@ class TestFeaturize(unittest.TestCase): def test_featurize_formula(self, limit=5): df_init = load_double_perovskites_gap(return_lumo=False)[:limit] ignore_cols = ['a_1', 'a_2', 'b_1', 'b_2'] - featurizer = Featurize(ignore_cols=ignore_cols, + featurizer = Featurization(ignore_cols=ignore_cols, ignore_errors=False, exclude=['CohesiveEnergy'], multiindex=False) @@ -83,7 +83,7 @@ def test_featurize_formula(self, limit=5): def test_featurize_structure(self, limit=5): df_init = load_castelli_perovskites()[:limit] - featurizer = Featurize(ignore_errors=False, multiindex=False) + featurizer = Featurization(ignore_errors=False, multiindex=False) df = featurizer.featurize_structure(df_init, inplace=False, featurizers="all") @@ -187,7 +187,7 @@ def test_featurize_bsdos(self, refresh_df_init=False, limit=1): else: df_init = pd.read_pickle(os.path.join(test_dir, df_bsdos_pickled)) df_init = df_init.dropna(axis=0) - featurizer = Featurize(ignore_errors=False, multiindex=False) + featurizer = Featurization(ignore_errors=False, multiindex=False) df = featurizer.featurize_dos(df_init, inplace=False) # sanity checks @@ -222,7 +222,7 @@ def test_featurize_bsdos(self, refresh_df_init=False, limit=1): def test_auto_featurize(self, limit=5): df_init = load_phonon_dielectric_mp()[:limit] print(df_init.structure) - featurizer = Featurize(ignore_errors=False, multiindex=True) + featurizer = Featurization(ignore_errors=False, multiindex=True) df = featurizer.auto_featurize(df_init, input_cols=('formula', 'structure')) diff --git a/matbench/preprocessing/tests/test_preprocess.py b/matbench/preprocessing/tests/test_preprocess.py index b0fac42e..8dbf6835 100644 --- a/matbench/preprocessing/tests/test_preprocess.py +++ b/matbench/preprocessing/tests/test_preprocess.py @@ -2,9 +2,9 @@ from matminer.datasets.dataframe_loader import load_elastic_tensor from matminer.featurizers.structure import GlobalSymmetryFeatures -from matbench.featurization.core import Featurize +from matbench.featurization.core import Featurization -from matbench.preprocessing.core import Preprocess +from matbench.preprocessing.core import Preprocessing class TestPreprocess(unittest.TestCase): @@ -19,10 +19,10 @@ def test_preprocess_basic(self): """ df = load_elastic_tensor()[:5][['K_VRH', 'structure']] df['K_VRH'] = df['K_VRH'].astype(str) - f = Featurize() + f = Featurization() df = f.featurize_structure(df, featurizers=[GlobalSymmetryFeatures()]) - p = Preprocess() + p = Preprocessing() df = p.preprocess(df, 'K_VRH') self.assertAlmostEqual(df['K_VRH'].iloc[0], 194.26888435900003) self.assertEqual(df["crystal_system_tetragonal"].iloc[0], 1) \ No newline at end of file