Skip to content

Commit

Permalink
Tabular: HPO bag fix (open-mmlab#912)
Browse files Browse the repository at this point in the history
* Tabular: Fixed time usage in HPO, reduced memory usage of HPO, improved naming of HPO models.

* minor update

* minor fix
  • Loading branch information
Innixma authored Feb 8, 2021
1 parent 14f8894 commit 6ef7d70
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 51 deletions.
3 changes: 2 additions & 1 deletion core/src/autogluon/core/models/abstract/abstract_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def _set_default_searchspace(self):
if self.params is not None:
self.params.update(def_search_space)

# TODO: v0.1 Change this to update path_root only, path change to property
def set_contexts(self, path_context):
self.path = self.create_contexts(path_context)
self.path_root = self.path.rsplit(self.path_suffix, 1)[0]
Expand Down Expand Up @@ -609,7 +610,7 @@ def _get_hpo_results(self, scheduler, scheduler_params: dict, time_start):
hpo_model_performances = {}
for trial in sorted(hpo_results['trial_info'].keys()):
# TODO: ignore models which were killed early by scheduler (eg. in Hyperband). How to ID these?
file_id = "trial_" + str(trial) # unique identifier to files from this trial
file_id = f"T{trial}" # unique identifier to files from this trial
trial_model_name = self.name + os.path.sep + file_id
trial_model_path = self.path_root + trial_model_name + os.path.sep
hpo_models[trial_model_name] = trial_model_path
Expand Down
14 changes: 11 additions & 3 deletions core/src/autogluon/core/models/abstract/model_trial.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,16 @@ def model_trial(args, reporter: LocalStatusReporter):

fit_model_args = dict(X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **util_args.get('fit_kwargs', dict()))
predict_proba_args = dict(X=X_val)
model = fit_and_save_model(model=model, params=args, fit_args=fit_model_args, predict_proba_args=predict_proba_args, y_val=y_val,
time_start=util_args.time_start, time_limit=util_args.get('time_limit', None), reporter=None)
model = fit_and_save_model(
model=model,
params=args,
fit_args=fit_model_args,
predict_proba_args=predict_proba_args,
y_val=y_val,
time_start=util_args.time_start,
time_limit=util_args.get('time_limit', None),
reporter=None,
)
except Exception as e:
if not isinstance(e, TimeLimitExceeded):
logger.exception(e, exc_info=True)
Expand All @@ -43,7 +51,7 @@ def prepare_inputs(args):
task_id = args.pop('task_id')
util_args = args.pop('util_args')

file_prefix = f"trial_{task_id}" # append to all file names created during this trial. Do NOT change!
file_prefix = f"T{task_id}" # append to all file names created during this trial. Do NOT change!
model = util_args.model # the model object must be passed into model_trial() here
model.name = model.name + os.path.sep + file_prefix
model.set_contexts(path_context=model.path_root + model.name + os.path.sep)
Expand Down
20 changes: 8 additions & 12 deletions core/src/autogluon/core/models/ensemble/bagged_ensemble_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def _fit(self, X_train, y_train, k_fold=5, k_fold_start=0, k_fold_end=None, n_re
time_start = time.time()

model_base = self._get_model_base()
model_base.rename(name='')
if self.features is not None:
model_base.features = self.features
model_base.feature_metadata = self.feature_metadata # TODO: Don't pass this here
Expand All @@ -136,6 +137,7 @@ def _fit(self, X_train, y_train, k_fold=5, k_fold_start=0, k_fold_end=None, n_re
if k_fold == 1:
if self._n_repeats != 0:
raise ValueError(f'n_repeats must equal 0 when fitting a single model with k_fold < 2, values: ({self._n_repeats}, {k_fold})')
model_base.name = f'{model_base.name}S1F1'
model_base.set_contexts(path_context=self.path + model_base.name + os.path.sep)
time_start_fit = time.time()
model_base.fit(X_train=X_train, y_train=y_train, time_limit=time_limit, **kwargs)
Expand Down Expand Up @@ -171,6 +173,7 @@ def _fit(self, X_train, y_train, k_fold=5, k_fold_start=0, k_fold_end=None, n_re
fold_end_n_repeat = min(fold_start_n_repeat + k_fold, fold_end)
# TODO: Consider moving model fit inner for loop to a function to simply this code
for i in range(fold_start_n_repeat, fold_end_n_repeat): # For each fold
fold_num_in_repeat = i - (j * k_fold) # The fold in the current repeat set (first fold in set = 0)
folds_finished = i - fold_start
folds_left = fold_end - i
fold = kfolds[i]
Expand All @@ -194,7 +197,7 @@ def _fit(self, X_train, y_train, k_fold=5, k_fold_start=0, k_fold_end=None, n_re
X_train_fold, X_val_fold = X_train.iloc[train_index, :], X_train.iloc[val_index, :]
y_train_fold, y_val_fold = y_train.iloc[train_index], y_train.iloc[val_index]
fold_model = copy.deepcopy(model_base)
fold_model.name = f'{fold_model.name}_F{i+1}'
fold_model.name = f'{fold_model.name}S{j+1}F{fold_num_in_repeat+1}' # S5F3 = 3rd fold of the 5th repeat set
fold_model.set_contexts(self.path + fold_model.name + os.path.sep)
fold_model.fit(X_train=X_train_fold, y_train=y_train_fold, X_val=X_val_fold, y_val=y_val_fold, time_limit=time_limit_fold, **kwargs)
time_train_end_fold = time.time()
Expand Down Expand Up @@ -350,14 +353,8 @@ def save_child(self, model, verbose=False):
def convert_to_refit_full_template(self):
init_args = self._get_init_args()
init_args['hyperparameters']['save_bag_folds'] = True # refit full models must save folds
model_base_name_orig = init_args['model_base'].name
init_args['model_base'] = self.convert_to_refitfull_template_child()
model_base_name_new = init_args['model_base'].name
if model_base_name_orig in init_args['name'] and model_base_name_orig != model_base_name_new:
init_args['name'] = init_args['name'].replace(model_base_name_orig, model_base_name_new, 1)
else:
init_args['name'] = init_args['name'] + '_FULL'

init_args['name'] = init_args['name'] + REFIT_FULL_SUFFIX
model_full_template = self.__class__(**init_args)
return model_full_template

Expand All @@ -366,8 +363,6 @@ def convert_to_refitfull_template_child(self):
child_compressed = copy.deepcopy(self._get_model_base())
child_compressed.feature_metadata = self.feature_metadata # TODO: Don't pass this here
child_compressed.params = compressed_params
child_compressed.name = child_compressed.name + REFIT_FULL_SUFFIX
child_compressed.set_contexts(self.path_root + child_compressed.name + os.path.sep)
return child_compressed

def _get_init_args(self):
Expand Down Expand Up @@ -637,14 +632,15 @@ def _hyperparameter_tune(self, X_train, y_train, k_fold, scheduler_options, prep

# TODO: Create new Ensemble Here
bag = copy.deepcopy(self)
bag.name = bag.name + os.path.sep + str(i)
bag.rename(f"{bag.name}{os.path.sep}T{i}")
bag.set_contexts(self.path_root + bag.name + os.path.sep)

oof_pred_proba, oof_pred_model_repeats = self._construct_empty_oof(X=X_train, y=y_train)
oof_pred_proba[test_index] += y_pred_proba
oof_pred_model_repeats[test_index] += 1

bag.model_base = None
child.rename('')
child.set_contexts(bag.path + child.name + os.path.sep)
bag.save_model_base(child.convert_to_template())

Expand All @@ -653,7 +649,7 @@ def _hyperparameter_tune(self, X_train, y_train, k_fold, scheduler_options, prep
bag._n_repeats = 1
bag._oof_pred_proba = oof_pred_proba
bag._oof_pred_model_repeats = oof_pred_model_repeats
child.name = child.name + '_fold_0'
child.rename('S1F1')
child.set_contexts(bag.path + child.name + os.path.sep)
if not self.params.get('save_bag_folds', True):
child.model = None
Expand Down
24 changes: 11 additions & 13 deletions core/src/autogluon/core/utils/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def plot_performance_vs_trials(results, output_directory, save_file="Performance
print("Plot of HPO performance saved to file: %s" % outputfile)
plt.show()


def plot_summary_of_models(results, output_directory, save_file='SummaryOfModels.html', plot_title="Models produced during fit()"):
""" Plot dynamic scatterplot summary of each model encountered during fit(), based on the returned Results object.
"""
Expand Down Expand Up @@ -71,6 +72,7 @@ def plot_summary_of_models(results, output_directory, save_file='SummaryOfModels
if save_path is not None:
print("Plot summary of models saved to file: %s" % save_file)


def plot_tabular_models(results, output_directory=None, save_file="SummaryOfModels.html", plot_title="Models produced during fit()"):
""" Plot dynamic scatterplot of every single model trained during tabular_prediction.fit()
Args:
Expand All @@ -87,19 +89,15 @@ def plot_tabular_models(results, output_directory=None, save_file="SummaryOfMode
hidden_keys.append(model_types)
model_hyperparams = [_formatDict(results['model_hyperparams'][key]) for key in model_names]
datadict = {'performance': val_perfs, 'model': model_names, 'model_type': model_types, 'hyperparameters': model_hyperparams}
hpo_used = results['hyperparameter_tune']
if not hpo_used: # currently, times are only stored without HPO
leaderboard = results['leaderboard'].copy()
leaderboard['fit_time'] = leaderboard['fit_time'].fillna(0)
leaderboard['pred_time_val'] = leaderboard['pred_time_val'].fillna(0)
leaderboard = results['leaderboard'].copy()
leaderboard['fit_time'] = leaderboard['fit_time'].fillna(0)
leaderboard['pred_time_val'] = leaderboard['pred_time_val'].fillna(0)

datadict['inference_latency'] = [leaderboard['pred_time_val'][leaderboard['model'] == m].values[0] for m in model_names]
datadict['training_time'] = [leaderboard['fit_time'][leaderboard['model'] == m].values[0] for m in model_names]
mousover_plot(datadict, attr_x='inference_latency', attr_y='performance', attr_color='model_type',
save_file=save_path, plot_title=plot_title, hidden_keys=hidden_keys)

datadict['inference_latency'] = [leaderboard['pred_time_val'][leaderboard['model'] == m].values[0] for m in model_names]
datadict['training_time'] = [leaderboard['fit_time'][leaderboard['model'] == m].values[0] for m in model_names]
mousover_plot(datadict, attr_x='inference_latency', attr_y='performance', attr_color='model_type',
save_file=save_path, plot_title=plot_title, hidden_keys=hidden_keys)
else:
mousover_plot(datadict, attr_x='model_type', attr_y='performance',
save_file=save_path, plot_title=plot_title, hidden_keys=hidden_keys)

def _formatDict(d):
""" Returns dict as string with HTML new-line tags <br> between key-value pairs. """
Expand All @@ -109,6 +107,7 @@ def _formatDict(d):
s += new_s
return s[:-4]


def mousover_plot(datadict, attr_x, attr_y, attr_color=None, attr_size=None, save_file=None, plot_title="",
point_transparency = 0.5, point_size=20, default_color="#2222aa", hidden_keys = []):
""" Produces dynamic scatter plot that can be interacted with by mousing over each point to see its label
Expand Down Expand Up @@ -216,4 +215,3 @@ def mousover_plot(datadict, attr_x, attr_y, attr_color=None, attr_size=None, sav
p.add_layout(Legend(items=[LegendItem(label='Size of points based on "'+attr_size + '"')]), 'below')

show(p)

Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@ def _fit(self, X_train, y_train, X_val=None, y_val=None, time_limit=None, num_gp
self.model = init_model
else:
if (init_model_best_score > self.stopping_metric._optimum) or (final_model_best_score > self.stopping_metric._optimum):
logger.warning(f'Warning: Sign differs between AG metric and CatBoost metric variants: {self.stopping_metric.name}, flipping signs.')
init_model_best_score = -init_model_best_score
final_model_best_score = -final_model_best_score

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,27 @@ def lgb_trial(args, reporter):
dataset_val = lgb.Dataset(util_args.directory + util_args.dataset_val_filename)
X_val, y_val = load_pkl.load(util_args.directory + util_args.dataset_val_pkl_filename)

reporter_fit = None # Set reporter_fit to reporter for per-iteration reporting, but will take up MUCH more space (can quickly lead to OOM).

fit_model_args = dict(dataset_train=dataset_train, dataset_val=dataset_val, **util_args.get('fit_kwargs', dict()))
predict_proba_args = dict(X=X_val)
model_trial.fit_and_save_model(model=model, params=args, fit_args=fit_model_args, predict_proba_args=predict_proba_args, y_val=y_val,
time_start=util_args.time_start, time_limit=util_args.get('time_limit', None), reporter=reporter)
model_trial.fit_and_save_model(
model=model,
params=args,
fit_args=fit_model_args,
predict_proba_args=predict_proba_args,
y_val=y_val,
time_start=util_args.time_start,
time_limit=util_args.get('time_limit', None),
reporter=reporter_fit,
)
except Exception as e:
if not isinstance(e, TimeLimitExceeded):
logger.exception(e, exc_info=True)
reporter.terminate()
else:
if reporter_fit is None:
reporter(epoch=1, validation_performance=model.val_score)

# FIXME: If stopping metric and eval metric differ, the previous reported scores will not align as they will be evaluated with stopping_metric, whereas this is evaluated with eval_metric
# This should only impact if the reporter data is used
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,8 @@ def fit_summary(self, verbosity=3):
-------
Dict containing various detailed information. We do not recommend directly printing this dict as it may be very large.
"""
hpo_used = len(self._trainer.hpo_results) > 0
# hpo_used = len(self._trainer.hpo_results) > 0
hpo_used = False # Disabled until a more memory efficient hpo_results object is implemented.
model_types = self._trainer.get_models_attribute_dict(attribute='type')
model_inner_types = self._trainer.get_models_attribute_dict(attribute='type_inner')
model_typenames = {key: model_types[key].__name__ for key in model_types}
Expand All @@ -341,13 +342,11 @@ def fit_summary(self, verbosity=3):
'model_pred_times': self._trainer.get_models_attribute_dict('predict_time'),
'num_bag_folds': self._trainer.k_fold,
'max_stack_level': self._trainer.get_max_level(),
'feature_prune': self._trainer.feature_prune,
'hyperparameter_tune': hpo_used,
}
if self.problem_type != REGRESSION:
results['num_classes'] = self._trainer.num_classes
if hpo_used:
results['hpo_results'] = self._trainer.hpo_results
# if hpo_used:
# results['hpo_results'] = self._trainer.hpo_results
# get dict mapping model name to final hyperparameter values for each model:
model_hyperparams = {}
for model_name in self._trainer.get_model_names():
Expand Down Expand Up @@ -376,9 +375,9 @@ def fit_summary(self, verbosity=3):
num_stack_str = f" (with {results['max_stack_level']} levels)"
print("Multi-layer stack-ensembling used: %s %s" % (stacking_used, num_stack_str))
hpo_str = ""
if hpo_used and verbosity <= 2:
hpo_str = " (call fit_summary() with verbosity >= 3 to see detailed HPO info)"
print("Hyperparameter-tuning used: %s %s" % (hpo_used, hpo_str))
# if hpo_used and verbosity <= 2:
# hpo_str = " (call fit_summary() with verbosity >= 3 to see detailed HPO info)"
# print("Hyperparameter-tuning used: %s %s" % (hpo_used, hpo_str))
# TODO: uncomment once feature_prune is functional: self._summarize('feature_prune', 'feature-selection used', results)
print("Feature Metadata (Processed):")
print("(raw dtype, special dtypes):")
Expand Down
10 changes: 5 additions & 5 deletions tabular/src/autogluon/tabular/trainer/abstract_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ def __init__(self, path: str, problem_type: str, eval_metric=None,
self._model_full_dict_val_score = {} # Dict of FULL model -> normal model validation score in case the normal model had been deleted.
self.reset_paths = False

self.hpo_results = {} # Stores summary of HPO process

self._time_limit = None # Internal float of the total time limit allowed for a given fit call. Used in logging statements.
self._time_train_start = None # Internal timestamp of the time training started for a given fit call. Used in logging statements.

Expand Down Expand Up @@ -1029,7 +1027,8 @@ def _train_single_full(self, X_train, y_train, model: AbstractModel, X_unlabeled
del model
model_names_trained = []
else:
self.hpo_results[model.name] = hpo_results
# Commented out because it takes too much space (>>5 GB if run for an hour on a small-medium sized dataset)
# self.hpo_results[model.name] = hpo_results
model_names_trained = []
for model_hpo_name, model_path in hpo_models.items():
model_hpo = self.load_model(model_hpo_name, path=model_path, model_type=type(model))
Expand Down Expand Up @@ -1117,18 +1116,19 @@ def _train_multi_initial(self, X_train, y_train, models: List[AbstractModel], k_
hpo_enabled = True
break

hpo_time_ratio = 0.9
if hpo_enabled:
time_split = True
else:
time_split = False
if k_fold == 0:
time_ratio = 0.9 if hpo_enabled else 1
time_ratio = hpo_time_ratio if hpo_enabled else 1
models = self._train_multi_fold(models=models, hyperparameter_tune_kwargs=hyperparameter_tune_kwargs, feature_prune=feature_prune, time_limit=time_limit, time_split=time_split, time_ratio=time_ratio, **fit_args)
else:
k_fold_start = 0
if hpo_enabled or feature_prune:
time_start = time.time()
time_ratio = (1 - (1 / k_fold)) * 0.9
time_ratio = (1 / k_fold) * hpo_time_ratio
models = self._train_multi_fold(models=models, hyperparameter_tune_kwargs=hyperparameter_tune_kwargs, feature_prune=feature_prune,
k_fold_start=0, k_fold_end=1, n_repeats=n_repeats, n_repeat_start=0, time_limit=time_limit, time_split=time_split, time_ratio=time_ratio, **fit_args)
k_fold_start = 1
Expand Down
13 changes: 7 additions & 6 deletions tabular/src/autogluon/tabular/trainer/model_presets/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,21 +264,22 @@ def model_factory(
name_prefix = model[AG_ARGS].get('name_prefix', '')
name_suff = model[AG_ARGS].get('name_suffix', '')
name_orig = name_prefix + name_main + name_suff
if name_suffix is not None:
name_orig = name_orig + name_suffix
name = name_orig
name_stacker = None
num_increment = 2
if name_suffix is None:
name_suffix = ''
if ensemble_kwargs is None:
name = f'{name_orig}{name_suffix}'
while name in invalid_name_set: # Ensure name is unique
name = f'{name_orig}_{num_increment}'
name = f'{name_orig}_{num_increment}{name_suffix}'
num_increment += 1
else:
name = name_orig
name_bag_suffix = model[AG_ARGS].get('name_bag_suffix', '_BAG')
name_stacker = f'{name}{name_bag_suffix}_L{level}'
name_stacker = f'{name}{name_bag_suffix}_L{level}{name_suffix}'
while name_stacker in invalid_name_set: # Ensure name is unique
name = f'{name_orig}_{num_increment}'
name_stacker = f'{name}{name_bag_suffix}_L{level}'
name_stacker = f'{name}{name_bag_suffix}_L{level}{name_suffix}'
num_increment += 1
model_params = copy.deepcopy(model)
model_params.pop(AG_ARGS, None)
Expand Down

0 comments on commit 6ef7d70

Please sign in to comment.