Skip to content

Commit

Permalink
Merge pull request #84 from albalu/master
Browse files Browse the repository at this point in the history
consistent naming
  • Loading branch information
albalu authored Oct 2, 2018
2 parents 6ba68fa + 19a36d1 commit 6b29ce9
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 19 deletions.
File renamed without changes.
14 changes: 7 additions & 7 deletions matbench/automl/tests/test_tpot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from matbench.automl.tpot_utils import TpotAutoml
from matbench.automl.tpot_configs.classifier import classifier_config_dict_mb
from matbench.automl.tpot_configs.regressor import regressor_config_dict_mb
from matbench.analysis.analysis import Analysis
from matbench.analysis.core import Analysis
from matbench.data.load import load_double_perovskites_gap, \
load_glass_binary
from matbench.featurization.core import Featurize
from matbench.preprocessing.core import Preprocess
from matbench.featurization.core import Featurization
from matbench.preprocessing.core import Preprocessing
from matminer.featurizers.composition import ElementProperty, TMetalFraction, \
Stoichiometry
from sklearn.model_selection import train_test_split
Expand All @@ -28,12 +28,12 @@ def test_tpot_regression(self, limit=500):
target = 'gap gllbsc'
# load and featurize:
df_init = load_double_perovskites_gap(return_lumo=False)[:limit]
featzer = Featurize(ignore_cols=['a_1', 'b_1', 'a_2', 'b_2'])
featzer = Featurization(ignore_cols=['a_1', 'b_1', 'a_2', 'b_2'])
df_feats = featzer.featurize_formula(df_init, featurizers=[
ElementProperty.from_preset(preset_name='matminer'),
TMetalFraction()])
# preprocessing of the data
prep = Preprocess()
prep = Preprocessing()
df = prep.handle_na(df_feats, max_na_frac=0.1)
feats0 = set(df.columns)
df = prep.prune_correlated_features(df, target, R_max=0.95)
Expand Down Expand Up @@ -95,12 +95,12 @@ def test_tpot_classification(self, limit=500):
target = 'gfa'
# load and featurize:
df_init = load_glass_binary()[:limit]
featzer = Featurize()
featzer = Featurization()
df_feats = featzer.featurize_formula(df_init, featurizers=[
ElementProperty.from_preset(preset_name='matminer'),
Stoichiometry()])
# preprocessing of the data
prep = Preprocess()
prep = Preprocessing()
df = prep.handle_na(df_feats, max_na_frac=0.1)
feats0 = set(df.columns)
df = prep.prune_correlated_features(df, target, R_max=0.95)
Expand Down
2 changes: 1 addition & 1 deletion matbench/featurization/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
StructureFeaturizers, BSFeaturizers, DOSFeaturizers


class Featurize(object):
class Featurization(object):
"""
Takes in a dataframe and generate features from preset columns such as
"formula", "structure", "bandstructure", "dos", etc. One may use
Expand Down
10 changes: 5 additions & 5 deletions matbench/featurization/tests/test_featurize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from matbench.data.load import load_double_perovskites_gap, \
load_castelli_perovskites
from matbench.featurization.core import Featurize
from matbench.featurization.core import Featurization
from matbench.featurization.sets import AllFeaturizers
from matbench.data.load import load_phonon_dielectric_mp

Expand All @@ -25,7 +25,7 @@ class TestFeaturize(unittest.TestCase):
def test_featurize_formula(self, limit=5):
df_init = load_double_perovskites_gap(return_lumo=False)[:limit]
ignore_cols = ['a_1', 'a_2', 'b_1', 'b_2']
featurizer = Featurize(ignore_cols=ignore_cols,
featurizer = Featurization(ignore_cols=ignore_cols,
ignore_errors=False,
exclude=['CohesiveEnergy'],
multiindex=False)
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_featurize_formula(self, limit=5):

def test_featurize_structure(self, limit=5):
df_init = load_castelli_perovskites()[:limit]
featurizer = Featurize(ignore_errors=False, multiindex=False)
featurizer = Featurization(ignore_errors=False, multiindex=False)
df = featurizer.featurize_structure(df_init, inplace=False,
featurizers="all")

Expand Down Expand Up @@ -187,7 +187,7 @@ def test_featurize_bsdos(self, refresh_df_init=False, limit=1):
else:
df_init = pd.read_pickle(os.path.join(test_dir, df_bsdos_pickled))
df_init = df_init.dropna(axis=0)
featurizer = Featurize(ignore_errors=False, multiindex=False)
featurizer = Featurization(ignore_errors=False, multiindex=False)
df = featurizer.featurize_dos(df_init, inplace=False)

# sanity checks
Expand Down Expand Up @@ -222,7 +222,7 @@ def test_featurize_bsdos(self, refresh_df_init=False, limit=1):
def test_auto_featurize(self, limit=5):
df_init = load_phonon_dielectric_mp()[:limit]
print(df_init.structure)
featurizer = Featurize(ignore_errors=False, multiindex=True)
featurizer = Featurization(ignore_errors=False, multiindex=True)
df = featurizer.auto_featurize(df_init,
input_cols=('formula', 'structure'))

Expand Down
3 changes: 1 addition & 2 deletions matbench/preprocessing/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
import numpy as np
import pandas as pd
from matbench.utils.utils import setup_custom_logger
Expand All @@ -8,7 +7,7 @@
from skrebate import ReliefF


class Preprocess(object):
class Preprocessing(object):
"""
Clean and prepare the data for visualization and training.
Expand Down
8 changes: 4 additions & 4 deletions matbench/preprocessing/tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from matminer.datasets.dataframe_loader import load_elastic_tensor
from matminer.featurizers.structure import GlobalSymmetryFeatures
from matbench.featurization.core import Featurize
from matbench.featurization.core import Featurization

from matbench.preprocessing.core import Preprocess
from matbench.preprocessing.core import Preprocessing


class TestPreprocess(unittest.TestCase):
Expand All @@ -19,10 +19,10 @@ def test_preprocess_basic(self):
"""
df = load_elastic_tensor()[:5][['K_VRH', 'structure']]
df['K_VRH'] = df['K_VRH'].astype(str)
f = Featurize()
f = Featurization()
df = f.featurize_structure(df, featurizers=[GlobalSymmetryFeatures()])

p = Preprocess()
p = Preprocessing()
df = p.preprocess(df, 'K_VRH')
self.assertAlmostEqual(df['K_VRH'].iloc[0], 194.26888435900003)
self.assertEqual(df["crystal_system_tetragonal"].iloc[0], 1)

0 comments on commit 6b29ce9

Please sign in to comment.