From 683ae64ade15bfaa62f3927f45ca81c2d7815d98 Mon Sep 17 00:00:00 2001 From: janosh Date: Mon, 25 Nov 2019 12:47:45 +0000 Subject: [PATCH 1/2] add kwarg output_col to matpipe.predict() and learner.predict() --- automatminer/automl/base.py | 13 +++++++------ automatminer/pipeline.py | 13 ++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/automatminer/automl/base.py b/automatminer/automl/base.py index 56b9ed4d..c4705460 100644 --- a/automatminer/automl/base.py +++ b/automatminer/automl/base.py @@ -3,12 +3,11 @@ """ import abc -from typing import List import logging +from typing import List import numpy as np import pandas as pd - from automatminer.base import DFTransformer from automatminer.utils.log import AMM_LOG_PREDICT_STR, log_progress from automatminer.utils.pkg import AutomatminerError, check_fitted @@ -111,13 +110,15 @@ def deserialize(self) -> None: @check_fitted @log_progress(logger, AMM_LOG_PREDICT_STR) - def predict(self, df: pd.DataFrame, target: str) -> pd.DataFrame: + def predict( + self, df: pd.DataFrame, target: str, output_col=None + ) -> pd.DataFrame: """ Predict the target property of materials given a df of features. This base method is widely applicanble across different AutoML backends. - The predictions are appended to the dataframe in a column called: - "{target} predicted" + The predictions are appended to the dataframe in a column named according + to output_col. Default value is "{target_name} predicted" Args: df (pandas.DataFrame): Contains all features needed for ML (i.e., @@ -148,7 +149,7 @@ def predict(self, df: pd.DataFrame, target: str) -> pd.DataFrame: else: X = df[self.features].values # rectify feature order y_pred = self.best_pipeline.predict(X) - df[target + " predicted"] = y_pred + df[output_col or (target + " predicted")] = y_pred log_msg = "Prediction finished successfully." try: diff --git a/automatminer/pipeline.py b/automatminer/pipeline.py index c8ee66cc..5133a02d 100644 --- a/automatminer/pipeline.py +++ b/automatminer/pipeline.py @@ -6,21 +6,20 @@ from typing import Dict import pandas as pd - from automatminer import __name__ as amm_name from automatminer.base import DFTransformer from automatminer.presets import get_preset_config +from automatminer.utils.log import initialize_logger from automatminer.utils.ml import regression_or_classification from automatminer.utils.pkg import ( - check_fitted, - set_fitted, - return_attrs_recursively, AutomatminerError, VersionError, + check_fitted, get_version, + return_attrs_recursively, save_dict_to_file, + set_fitted, ) -from automatminer.utils.log import initialize_logger logger = initialize_logger(logger_name=amm_name) @@ -192,7 +191,7 @@ def transform(self, df, **transform_kwargs): return self.predict(df, **transform_kwargs) @check_fitted - def predict(self, df, ignore=None): + def predict(self, df, ignore=None, output_col=None): """ Predict a target property of a set of materials. @@ -240,7 +239,7 @@ def predict(self, df, ignore=None): df = self.autofeaturizer.transform(df, self.target) df = self.cleaner.transform(df, self.target) df = self.reducer.transform(df, self.target) - predictions = self.learner.predict(df, self.target) + predictions = self.learner.predict(df, self.target, output_col=output_col) logger.info("MatPipe prediction completed.") merged_df = predictions.join(ignore_df, how="left") return merged_df From 2c7956d57ef1acf2c00ed7f76886d4ec641337c3 Mon Sep 17 00:00:00 2001 From: janosh Date: Mon, 25 Nov 2019 12:48:40 +0000 Subject: [PATCH 2/2] refactor test_ignore to test_predict_kwargs --- automatminer/tests/test_pipeline.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/automatminer/tests/test_pipeline.py b/automatminer/tests/test_pipeline.py index c4fd0f11..2adb0c15 100644 --- a/automatminer/tests/test_pipeline.py +++ b/automatminer/tests/test_pipeline.py @@ -66,7 +66,7 @@ def make_matpipe_test(config_preset, skip=None): skippables = [ "transferability", "user_features", - "ignore", + "predict_kwargs", "benchmarking", "persistence", "digests", @@ -135,16 +135,11 @@ def test_user_features(self): test = df_test[self.target + " predicted"] self.assertTrue(r2_score(true, test) > 0.75) - @unittest.skipIf("ignore" in skip, reason) - def test_ignore(self): - df = self.df - # pd.set_option('display.max_rows', 500) - # pd.set_option('display.max_columns', 500) - # pd.set_option('display.width', 1000) - # print(df) - - df_train = df.iloc[:200] - df_test = df.iloc[201:250] + @unittest.skipIf("predict_kwargs" in skip, reason) + def test_predict_kwargs(self): + # Test mat_pipe.predict()'s ignore and output_col kwargs. + df_train = self.df.iloc[:200] + df_test = self.df.iloc[201:250] ef = "ExtraFeature" df_test[ef] = [i + 100 for i in range(df_test.shape[0])] self.pipe.fit(df_train, self.target) @@ -166,6 +161,12 @@ def test_ignore(self): self.assertFalse(ef in predicted_some.columns) self.assertTrue("composition" in predicted_some.columns) + output_col_name = self.target + "_pred" + predicted_custom_col = self.pipe.predict( + df_test, output_col=output_col_name + ) + self.assertTrue(output_col_name in predicted_custom_col) + @unittest.skipIf("benchmarking" in skip, reason) def test_benchmarking_no_cache(self): pipe = self.pipe