Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make name of new column with predictions appended to dataframe configurable #267

Merged
merged 2 commits into from
May 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions automatminer/automl/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
"""

import abc
from typing import List
import logging
from typing import List

import numpy as np
import pandas as pd

from automatminer.base import DFTransformer
from automatminer.utils.log import AMM_LOG_PREDICT_STR, log_progress
from automatminer.utils.pkg import AutomatminerError, check_fitted
Expand Down Expand Up @@ -111,13 +110,15 @@ def deserialize(self) -> None:

@check_fitted
@log_progress(logger, AMM_LOG_PREDICT_STR)
def predict(self, df: pd.DataFrame, target: str) -> pd.DataFrame:
def predict(
self, df: pd.DataFrame, target: str, output_col=None
) -> pd.DataFrame:
"""
Predict the target property of materials given a df of features. This
base method is widely applicanble across different AutoML backends.

The predictions are appended to the dataframe in a column called:
"{target} predicted"
The predictions are appended to the dataframe in a column named according
to output_col. Default value is "{target_name} predicted"

Args:
df (pandas.DataFrame): Contains all features needed for ML (i.e.,
Expand Down Expand Up @@ -148,7 +149,7 @@ def predict(self, df: pd.DataFrame, target: str) -> pd.DataFrame:
else:
X = df[self.features].values # rectify feature order
y_pred = self.best_pipeline.predict(X)
df[target + " predicted"] = y_pred
df[output_col or (target + " predicted")] = y_pred

log_msg = "Prediction finished successfully."
try:
Expand Down
13 changes: 6 additions & 7 deletions automatminer/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,20 @@
from typing import Dict

import pandas as pd

from automatminer import __name__ as amm_name
from automatminer.base import DFTransformer
from automatminer.presets import get_preset_config
from automatminer.utils.log import initialize_logger
from automatminer.utils.ml import regression_or_classification
from automatminer.utils.pkg import (
check_fitted,
set_fitted,
return_attrs_recursively,
AutomatminerError,
VersionError,
check_fitted,
get_version,
return_attrs_recursively,
save_dict_to_file,
set_fitted,
)
from automatminer.utils.log import initialize_logger

logger = initialize_logger(logger_name=amm_name)

Expand Down Expand Up @@ -192,7 +191,7 @@ def transform(self, df, **transform_kwargs):
return self.predict(df, **transform_kwargs)

@check_fitted
def predict(self, df, ignore=None):
def predict(self, df, ignore=None, output_col=None):
"""
Predict a target property of a set of materials.

Expand Down Expand Up @@ -240,7 +239,7 @@ def predict(self, df, ignore=None):
df = self.autofeaturizer.transform(df, self.target)
df = self.cleaner.transform(df, self.target)
df = self.reducer.transform(df, self.target)
predictions = self.learner.predict(df, self.target)
predictions = self.learner.predict(df, self.target, output_col=output_col)
logger.info("MatPipe prediction completed.")
merged_df = predictions.join(ignore_df, how="left")
return merged_df
Expand Down
23 changes: 12 additions & 11 deletions automatminer/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def make_matpipe_test(config_preset, skip=None):
skippables = [
"transferability",
"user_features",
"ignore",
"predict_kwargs",
"benchmarking",
"persistence",
"digests",
Expand Down Expand Up @@ -135,16 +135,11 @@ def test_user_features(self):
test = df_test[self.target + " predicted"]
self.assertTrue(r2_score(true, test) > 0.75)

@unittest.skipIf("ignore" in skip, reason)
def test_ignore(self):
df = self.df
# pd.set_option('display.max_rows', 500)
# pd.set_option('display.max_columns', 500)
# pd.set_option('display.width', 1000)
# print(df)

df_train = df.iloc[:200]
df_test = df.iloc[201:250]
@unittest.skipIf("predict_kwargs" in skip, reason)
def test_predict_kwargs(self):
# Test mat_pipe.predict()'s ignore and output_col kwargs.
df_train = self.df.iloc[:200]
df_test = self.df.iloc[201:250]
ef = "ExtraFeature"
df_test[ef] = [i + 100 for i in range(df_test.shape[0])]
self.pipe.fit(df_train, self.target)
Expand All @@ -166,6 +161,12 @@ def test_ignore(self):
self.assertFalse(ef in predicted_some.columns)
self.assertTrue("composition" in predicted_some.columns)

output_col_name = self.target + "_pred"
predicted_custom_col = self.pipe.predict(
df_test, output_col=output_col_name
)
self.assertTrue(output_col_name in predicted_custom_col)

@unittest.skipIf("benchmarking" in skip, reason)
def test_benchmarking_no_cache(self):
pipe = self.pipe
Expand Down