From dce4dd974a6a87d7a009bfd2dbf6a7f59f9876b1 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 15 Dec 2024 17:18:19 -0800 Subject: [PATCH] Updated support scripts for LightGBM 4 - #8 [skip ci] Co-authored-by: Nuno Silva --- test/support/classifier.py | 2 +- test/support/cv.py | 32 ++++++++++++++++---------------- test/support/regressor.py | 2 +- test/support/train.py | 12 ++++++------ 4 files changed, 24 insertions(+), 24 deletions(-) diff --git a/test/support/classifier.py b/test/support/classifier.py index 1abb4f4..6601092 100644 --- a/test/support/classifier.py +++ b/test/support/classifier.py @@ -33,7 +33,7 @@ print() print('test_early_stopping') -model.fit(X_train, ym_train, eval_set=[(X_test, ym_test)], early_stopping_rounds=5, verbose=True) +model.fit(X_train, ym_train, eval_set=[(X_test, ym_test)], callbacks=[lgb.early_stopping(stopping_rounds=5), lgb.log_evaluation()]) print() print('test_missing_numeric') diff --git a/test/support/cv.py b/test/support/cv.py index b2b3916..a3b45a9 100644 --- a/test/support/cv.py +++ b/test/support/cv.py @@ -16,10 +16,10 @@ regression_params = {'objective': 'regression', 'verbosity': -1} regression_train = lgb.Dataset(X_train, label=y_train) eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False) -print(eval_hist['l2-mean'][0]) -print(eval_hist['l2-mean'][-1]) -print(eval_hist['l2-stdv'][0]) -print(eval_hist['l2-stdv'][-1]) +print(eval_hist['valid l2-mean'][0]) +print(eval_hist['valid l2-mean'][-1]) +print(eval_hist['valid l2-stdv'][0]) +print(eval_hist['valid l2-stdv'][-1]) print() print('test_binary') @@ -27,10 +27,10 @@ binary_params = {'objective': 'binary', 'verbosity': -1} binary_train = lgb.Dataset(X_train, label=y_train.replace(2, 1)) eval_hist = lgb.cv(binary_params, binary_train, shuffle=False, stratified=False) -print(eval_hist['binary_logloss-mean'][0]) -print(eval_hist['binary_logloss-mean'][-1]) -print(eval_hist['binary_logloss-stdv'][0]) -print(eval_hist['binary_logloss-stdv'][-1]) +print(eval_hist['valid binary_logloss-mean'][0]) +print(eval_hist['valid binary_logloss-mean'][-1]) +print(eval_hist['valid binary_logloss-stdv'][0]) +print(eval_hist['valid binary_logloss-stdv'][-1]) print() print('test_multiclass') @@ -38,20 +38,20 @@ multiclass_params = {'objective': 'multiclass', 'num_class': 3, 'verbosity': -1} multiclass_train = lgb.Dataset(X_train, label=y_train) eval_hist = lgb.cv(multiclass_params, multiclass_train, shuffle=False, stratified=False) -print(eval_hist['multi_logloss-mean'][0]) -print(eval_hist['multi_logloss-mean'][-1]) -print(eval_hist['multi_logloss-stdv'][0]) -print(eval_hist['multi_logloss-stdv'][-1]) +print(eval_hist['valid multi_logloss-mean'][0]) +print(eval_hist['valid multi_logloss-mean'][-1]) +print(eval_hist['valid multi_logloss-stdv'][0]) +print(eval_hist['valid multi_logloss-stdv'][-1]) print('') print('test_early_stopping_early') -eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, verbose_eval=True, early_stopping_rounds=5) -print(len(eval_hist['l2-mean'])) +eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, callbacks=[lgb.log_evaluation(), lgb.early_stopping(stopping_rounds=5)]) +print(len(eval_hist['valid l2-mean'])) print('') print('test_early_stopping_not_early') -eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, verbose_eval=True, early_stopping_rounds=500) -print(len(eval_hist['l2-mean'])) +eval_hist = lgb.cv(regression_params, regression_train, shuffle=False, stratified=False, callbacks=[lgb.log_evaluation(), lgb.early_stopping(stopping_rounds=500)]) +print(len(eval_hist['valid l2-mean'])) diff --git a/test/support/regressor.py b/test/support/regressor.py index 6f732a9..8e7181a 100644 --- a/test/support/regressor.py +++ b/test/support/regressor.py @@ -19,4 +19,4 @@ print('feature_importances', model.feature_importances_.tolist()) print('early_stopping') -model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=True) +model.fit(X_train, y_train, eval_set=[(X_test, y_test)], callbacks=[lgb.early_stopping(stopping_rounds=5), lgb.log_evaluation()]) diff --git a/test/support/train.py b/test/support/train.py index 7c764c5..7755b20 100644 --- a/test/support/train.py +++ b/test/support/train.py @@ -17,7 +17,7 @@ regression_params = {'objective': 'regression', 'verbosity': -1} regression_train = lgb.Dataset(X_train, label=y_train) regression_test = lgb.Dataset(X_test, label=y_test) -bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test], verbose_eval=False) +bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test]) y_pred = bst.predict(X_test) print(np.sqrt(np.mean((y_pred - y_test)**2))) @@ -27,7 +27,7 @@ binary_params = {'objective': 'binary', 'verbosity': -1} binary_train = lgb.Dataset(X_train, label=y_train.replace(2, 1)) binary_test = lgb.Dataset(X_test, label=y_test.replace(2, 1)) -bst = lgb.train(binary_params, binary_train, valid_sets=[binary_train, binary_test], verbose_eval=False) +bst = lgb.train(binary_params, binary_train, valid_sets=[binary_train, binary_test]) y_pred = bst.predict(X_test) print(y_pred[0]) @@ -37,20 +37,20 @@ multiclass_params = {'objective': 'multiclass', 'num_class': 3, 'verbosity': -1} multiclass_train = lgb.Dataset(X_train, label=y_train) multiclass_test = lgb.Dataset(X_test, label=y_test) -bst = lgb.train(multiclass_params, multiclass_train, valid_sets=[multiclass_train, multiclass_test], verbose_eval=False) +bst = lgb.train(multiclass_params, multiclass_train, valid_sets=[multiclass_train, multiclass_test]) y_pred = bst.predict(X_test) print(y_pred[0].tolist()) print('') print('test_early_stopping_early') -bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test], early_stopping_rounds=5) +bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test], callbacks=[lgb.early_stopping(stopping_rounds=5), lgb.log_evaluation()]) print(bst.best_iteration) print('') print('test_early_stopping_not_early') -bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test], early_stopping_rounds=500) +bst = lgb.train(regression_params, regression_train, valid_sets=[regression_train, regression_test], callbacks=[lgb.early_stopping(stopping_rounds=500), lgb.log_evaluation()]) # appears to be using training set for best iteration instead of validation set print(bst.best_iteration) @@ -58,7 +58,7 @@ print('test_early_stopping_early_higher_better') params = {'objective': 'binary', 'metric': 'auc', 'verbosity': -1} -bst = lgb.train(params, binary_train, valid_sets=[binary_train, binary_test], early_stopping_rounds=5, verbose_eval=False) +bst = lgb.train(params, binary_train, valid_sets=[binary_train, binary_test], callbacks=[lgb.early_stopping(stopping_rounds=5)]) print(bst.best_iteration) print('')