Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

consistent naming #84

Merged
merged 4 commits into from
Oct 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
14 changes: 7 additions & 7 deletions matbench/automl/tests/test_tpot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from matbench.automl.tpot_utils import TpotAutoml
from matbench.automl.tpot_configs.classifier import classifier_config_dict_mb
from matbench.automl.tpot_configs.regressor import regressor_config_dict_mb
from matbench.analysis.analysis import Analysis
from matbench.analysis.core import Analysis
from matbench.data.load import load_double_perovskites_gap, \
load_glass_binary
from matbench.featurization.core import Featurize
from matbench.preprocessing.core import Preprocess
from matbench.featurization.core import Featurization
from matbench.preprocessing.core import Preprocessing
from matminer.featurizers.composition import ElementProperty, TMetalFraction, \
Stoichiometry
from sklearn.model_selection import train_test_split
Expand All @@ -28,12 +28,12 @@ def test_tpot_regression(self, limit=500):
target = 'gap gllbsc'
# load and featurize:
df_init = load_double_perovskites_gap(return_lumo=False)[:limit]
featzer = Featurize(ignore_cols=['a_1', 'b_1', 'a_2', 'b_2'])
featzer = Featurization(ignore_cols=['a_1', 'b_1', 'a_2', 'b_2'])
df_feats = featzer.featurize_formula(df_init, featurizers=[
ElementProperty.from_preset(preset_name='matminer'),
TMetalFraction()])
# preprocessing of the data
prep = Preprocess()
prep = Preprocessing()
df = prep.handle_na(df_feats, max_na_frac=0.1)
feats0 = set(df.columns)
df = prep.prune_correlated_features(df, target, R_max=0.95)
Expand Down Expand Up @@ -95,12 +95,12 @@ def test_tpot_classification(self, limit=500):
target = 'gfa'
# load and featurize:
df_init = load_glass_binary()[:limit]
featzer = Featurize()
featzer = Featurization()
df_feats = featzer.featurize_formula(df_init, featurizers=[
ElementProperty.from_preset(preset_name='matminer'),
Stoichiometry()])
# preprocessing of the data
prep = Preprocess()
prep = Preprocessing()
df = prep.handle_na(df_feats, max_na_frac=0.1)
feats0 = set(df.columns)
df = prep.prune_correlated_features(df, target, R_max=0.95)
Expand Down
2 changes: 1 addition & 1 deletion matbench/featurization/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
StructureFeaturizers, BSFeaturizers, DOSFeaturizers


class Featurize(object):
class Featurization(object):
"""
Takes in a dataframe and generate features from preset columns such as
"formula", "structure", "bandstructure", "dos", etc. One may use
Expand Down
10 changes: 5 additions & 5 deletions matbench/featurization/tests/test_featurize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from matbench.data.load import load_double_perovskites_gap, \
load_castelli_perovskites
from matbench.featurization.core import Featurize
from matbench.featurization.core import Featurization
from matbench.featurization.sets import AllFeaturizers
from matbench.data.load import load_phonon_dielectric_mp

Expand All @@ -25,7 +25,7 @@ class TestFeaturize(unittest.TestCase):
def test_featurize_formula(self, limit=5):
df_init = load_double_perovskites_gap(return_lumo=False)[:limit]
ignore_cols = ['a_1', 'a_2', 'b_1', 'b_2']
featurizer = Featurize(ignore_cols=ignore_cols,
featurizer = Featurization(ignore_cols=ignore_cols,
ignore_errors=False,
exclude=['CohesiveEnergy'],
multiindex=False)
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_featurize_formula(self, limit=5):

def test_featurize_structure(self, limit=5):
df_init = load_castelli_perovskites()[:limit]
featurizer = Featurize(ignore_errors=False, multiindex=False)
featurizer = Featurization(ignore_errors=False, multiindex=False)
df = featurizer.featurize_structure(df_init, inplace=False,
featurizers="all")

Expand Down Expand Up @@ -187,7 +187,7 @@ def test_featurize_bsdos(self, refresh_df_init=False, limit=1):
else:
df_init = pd.read_pickle(os.path.join(test_dir, df_bsdos_pickled))
df_init = df_init.dropna(axis=0)
featurizer = Featurize(ignore_errors=False, multiindex=False)
featurizer = Featurization(ignore_errors=False, multiindex=False)
df = featurizer.featurize_dos(df_init, inplace=False)

# sanity checks
Expand Down Expand Up @@ -222,7 +222,7 @@ def test_featurize_bsdos(self, refresh_df_init=False, limit=1):
def test_auto_featurize(self, limit=5):
df_init = load_phonon_dielectric_mp()[:limit]
print(df_init.structure)
featurizer = Featurize(ignore_errors=False, multiindex=True)
featurizer = Featurization(ignore_errors=False, multiindex=True)
df = featurizer.auto_featurize(df_init,
input_cols=('formula', 'structure'))

Expand Down
3 changes: 1 addition & 2 deletions matbench/preprocessing/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
import numpy as np
import pandas as pd
from matbench.utils.utils import setup_custom_logger
Expand All @@ -8,7 +7,7 @@
from skrebate import ReliefF


class Preprocess(object):
class Preprocessing(object):
"""
Clean and prepare the data for visualization and training.

Expand Down
8 changes: 4 additions & 4 deletions matbench/preprocessing/tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from matminer.datasets.dataframe_loader import load_elastic_tensor
from matminer.featurizers.structure import GlobalSymmetryFeatures
from matbench.featurization.core import Featurize
from matbench.featurization.core import Featurization

from matbench.preprocessing.core import Preprocess
from matbench.preprocessing.core import Preprocessing


class TestPreprocess(unittest.TestCase):
Expand All @@ -19,10 +19,10 @@ def test_preprocess_basic(self):
"""
df = load_elastic_tensor()[:5][['K_VRH', 'structure']]
df['K_VRH'] = df['K_VRH'].astype(str)
f = Featurize()
f = Featurization()
df = f.featurize_structure(df, featurizers=[GlobalSymmetryFeatures()])

p = Preprocess()
p = Preprocessing()
df = p.preprocess(df, 'K_VRH')
self.assertAlmostEqual(df['K_VRH'].iloc[0], 194.26888435900003)
self.assertEqual(df["crystal_system_tetragonal"].iloc[0], 1)