Skip to content

Commit

Permalink
[Logging] Reduce numpy 1.20 deprecation warnings from scipy (open-mml…
Browse files Browse the repository at this point in the history
  • Loading branch information
gradientsky authored Feb 6, 2021
1 parent 3f19d9f commit 7a9b583
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 38 deletions.
53 changes: 32 additions & 21 deletions core/src/autogluon/core/models/abstract/abstract_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pickle
import sys
import time
import warnings
from typing import Union

import numpy as np
Expand Down Expand Up @@ -299,12 +300,17 @@ def _preprocess_fit_resources(self, silent=False, **kwargs):
return kwargs

def fit(self, **kwargs):
kwargs = self._preprocess_fit_args(**kwargs)
if 'time_limit' not in kwargs or kwargs['time_limit'] is None or kwargs['time_limit'] > 0:
self._fit(**kwargs)
else:
logger.warning(f'\tWarning: Model has no time left to train, skipping model... (Time Left = {round(kwargs["time_limit"], 1)}s)')
raise TimeLimitExceeded
with warnings.catch_warnings():
# Suppress numpy 1.20 warnings (downstream scipy is not updated yet)
# https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
warnings.filterwarnings('ignore', message='`np.*` is a deprecated alias for the builtin `.*`')

kwargs = self._preprocess_fit_args(**kwargs)
if 'time_limit' not in kwargs or kwargs['time_limit'] is None or kwargs['time_limit'] > 0:
self._fit(**kwargs)
else:
logger.warning(f'\tWarning: Model has no time left to train, skipping model... (Time Left = {round(kwargs["time_limit"], 1)}s)')
raise TimeLimitExceeded

def _fit(self, X_train, y_train, **kwargs):
# kwargs may contain: num_cpus, num_gpus
Expand All @@ -326,23 +332,28 @@ def predict_proba(self, X, normalize=None, **kwargs):
return y_pred_proba

def _predict_proba(self, X, **kwargs):
X = self.preprocess(X, **kwargs)

if self.problem_type == REGRESSION:
return self.model.predict(X)

y_pred_proba = self.model.predict_proba(X)
if self.problem_type == BINARY:
if len(y_pred_proba.shape) == 1:
with warnings.catch_warnings():
# Suppress numpy 1.20 warnings (downstream scipy is not updated yet)
# https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
warnings.filterwarnings('ignore', message='`np.*` is a deprecated alias for the builtin `.*`')

X = self.preprocess(X, **kwargs)

if self.problem_type == REGRESSION:
return self.model.predict(X)

y_pred_proba = self.model.predict_proba(X)
if self.problem_type == BINARY:
if len(y_pred_proba.shape) == 1:
return y_pred_proba
elif y_pred_proba.shape[1] > 1:
return y_pred_proba[:, 1]
else:
return y_pred_proba
elif y_pred_proba.shape[1] > 2:
return y_pred_proba
elif y_pred_proba.shape[1] > 1:
return y_pred_proba[:, 1]
else:
return y_pred_proba
elif y_pred_proba.shape[1] > 2:
return y_pred_proba
else:
return y_pred_proba[:, 1]
return y_pred_proba[:, 1]

def score(self, X, y, eval_metric=None, metric_needs_y_pred=None, **kwargs):
if eval_metric is None:
Expand Down
41 changes: 24 additions & 17 deletions tabular/src/autogluon/tabular/trainer/auto_trainer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import logging
import warnings

import pandas as pd

from autogluon.core.utils import generate_train_test_split
Expand Down Expand Up @@ -26,25 +28,30 @@ def get_models(self, hyperparameters, **kwargs):
feature_metadata=feature_metadata, silent=silent, **kwargs)

def fit(self, X_train, y_train, hyperparameters, X_val=None, y_val=None, X_unlabeled=None, feature_prune=False, holdout_frac=0.1, num_stack_levels=0, core_kwargs: dict = None, time_limit=None, **kwargs):
for key in kwargs:
logger.warning(f'Warning: Unknown argument passed to `AutoTrainer.fit()`. Argument: {key}')
with warnings.catch_warnings():
# Suppress numpy 1.20 warnings (downstream scipy is not updated yet)
# https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
warnings.filterwarnings('ignore', message='`np.*` is a deprecated alias for the builtin `.*`')

for key in kwargs:
logger.warning(f'Warning: Unknown argument passed to `AutoTrainer.fit()`. Argument: {key}')

if self.bagged_mode:
if (y_val is not None) and (X_val is not None):
# TODO: User could be intending to blend instead. Perhaps switch from OOF preds to X_val preds while still bagging? Doubt a user would want this.
logger.warning('Warning: Training AutoGluon in Bagged Mode but X_val is specified, concatenating X_train and X_val for cross-validation')
X_train = pd.concat([X_train, X_val], ignore_index=True)
y_train = pd.concat([y_train, y_val], ignore_index=True)
X_val = None
y_val = None
else:
if (y_val is None) or (X_val is None):
X_train, X_val, y_train, y_val = generate_train_test_split(X_train, y_train, problem_type=self.problem_type, test_size=holdout_frac, random_state=self.random_state)
logger.log(20, f'Automatically generating train/validation split with holdout_frac={holdout_frac}, Train Rows: {len(X_train)}, Val Rows: {len(X_val)}')
if self.bagged_mode:
if (y_val is not None) and (X_val is not None):
# TODO: User could be intending to blend instead. Perhaps switch from OOF preds to X_val preds while still bagging? Doubt a user would want this.
logger.warning('Warning: Training AutoGluon in Bagged Mode but X_val is specified, concatenating X_train and X_val for cross-validation')
X_train = pd.concat([X_train, X_val], ignore_index=True)
y_train = pd.concat([y_train, y_val], ignore_index=True)
X_val = None
y_val = None
else:
if (y_val is None) or (X_val is None):
X_train, X_val, y_train, y_val = generate_train_test_split(X_train, y_train, problem_type=self.problem_type, test_size=holdout_frac, random_state=self.random_state)
logger.log(20, f'Automatically generating train/validation split with holdout_frac={holdout_frac}, Train Rows: {len(X_train)}, Val Rows: {len(X_val)}')

self._train_multi_and_ensemble(X_train, y_train, X_val, y_val, X_unlabeled=X_unlabeled, hyperparameters=hyperparameters,
feature_prune=feature_prune,
num_stack_levels=num_stack_levels, time_limit=time_limit, core_kwargs=core_kwargs)
self._train_multi_and_ensemble(X_train, y_train, X_val, y_val, X_unlabeled=X_unlabeled, hyperparameters=hyperparameters,
feature_prune=feature_prune,
num_stack_levels=num_stack_levels, time_limit=time_limit, core_kwargs=core_kwargs)

def get_models_distillation(self, hyperparameters, **kwargs):
path = kwargs.pop('path', self.path)
Expand Down

0 comments on commit 7a9b583

Please sign in to comment.