From e37f874b8f85306e4b2bb2e9ddad6e0388e5fbe7 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 14 Mar 2020 00:48:10 -0500 Subject: [PATCH 01/10] [R-package] started implementing first_metric_only --- R-package/R/callback.R | 12 +++- R-package/R/lgb.cv.R | 1 + R-package/R/lgb.train.R | 40 +++++++++--- R-package/R/utils.R | 4 +- R-package/tests/testthat/test_basic.R | 93 +++++++++++++++++++++++++++ 5 files changed, 136 insertions(+), 14 deletions(-) diff --git a/R-package/R/callback.R b/R-package/R/callback.R index 3c8bb243783b..bb9074f21746 100644 --- a/R-package/R/callback.R +++ b/R-package/R/callback.R @@ -260,7 +260,7 @@ cb.record.evaluation <- function() { } -cb.early.stop <- function(stopping_rounds, verbose = TRUE) { +cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose = TRUE) { # Initialize variables factor_to_bigger_better <- NULL @@ -317,8 +317,16 @@ cb.early.stop <- function(stopping_rounds, verbose = TRUE) { # Store iteration cur_iter <- env$iteration + # By default, any metric can trigger early stopping. This can be disabled + # with 'first_metric_only = TRUE' + if (isTRUE(first_metric_only)) { + evals_to_check <- 1L + } else { + evals_to_check <- seq_len(eval_len) + } + # Loop through evaluation - for (i in seq_len(eval_len)) { + for (i in evals_to_check) { # Store score score <- env$eval_list[[i]]$value * factor_to_bigger_better[i] diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 3433aade6594..efe6f0a6cdd9 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -257,6 +257,7 @@ lgb.cv <- function(params = list() callbacks , cb.early.stop( stopping_rounds = early_stopping_rounds + , first_metric_only = isTRUE(params[["first_metric_only"]]) , verbose = verbose ) ) diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index d0dacecc0bd1..b018ce19620a 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -6,7 +6,17 @@ #' @param obj objective function, can be character or custom objective function. Examples include #' \code{regression}, \code{regression_l1}, \code{huber}, #' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass} -#' @param eval evaluation function, can be (a list of) character or custom eval function +#' @param eval evaluation function(s). This can be a function or list of functions. Each provided function +#' should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named +#' list with three elements. +#' \itemize{ +#' \item{\code{name}: A string with the name of the metric, used for printing and storing results.} +#' \item{\code{value}: A single number indicating the value of the metric for the given predictions and true values} +#' \item{ +#' \code{higher_better}: A boolean indicating whether higher values indicate a better fit. +#' For example, this would be \code{FALSE} for metrics like MAE or RMSE. +#' } +#' } #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} #' @param colnames feature names, if not null, will use this to overwrite the names in dataset #' @param categorical_feature list of str or int @@ -89,7 +99,7 @@ lgb.train <- function(params = list(), params <- lgb.check.obj(params, obj) params <- lgb.check.eval(params, eval) fobj <- NULL - feval <- NULL + eval_functions <- NULL # Check for objective (function or not) if (is.function(params$objective)) { @@ -97,9 +107,14 @@ lgb.train <- function(params = list(), params$objective <- "NONE" } - # Check for loss (function or not) + # If loss is a single function, store it as a 1-element list + # (for backwards compatibility). If it is a list of functions, store + # all of them if (is.function(eval)) { - feval <- eval + eval_functions <- list(eval) + } + if (methods::is(eval, "list") & all(sapply(eval, is.function))){ + eval_functions <- eval } # Init predictor to empty @@ -117,6 +132,7 @@ lgb.train <- function(params = list(), if (!is.null(predictor)) { begin_iteration <- predictor$current_iter() + 1L } + # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one n_trees <- .PARAMETER_ALIASES()[["num_iterations"]] if (any(names(params) %in% n_trees)) { @@ -225,6 +241,7 @@ lgb.train <- function(params = list(), callbacks , cb.early.stop( stopping_rounds = early_stopping_rounds + , first_metric_only = isTRUE(params[["first_metric_only"]]) , verbose = verbose ) ) @@ -269,13 +286,16 @@ lgb.train <- function(params = list(), # Collection: Has validation dataset? if (length(valids) > 0L) { - # Validation has training dataset? - if (valid_contain_train) { - eval_list <- append(eval_list, booster$eval_train(feval = feval)) - } + for (eval_function in eval_functions){ - # Has no validation dataset - eval_list <- append(eval_list, booster$eval_valid(feval = feval)) + # Validation has training dataset? + if (valid_contain_train) { + eval_list <- append(eval_list, booster$eval_train(feval = eval_function)) + } + + # Has no validation dataset + eval_list <- append(eval_list, booster$eval_valid(feval = eval_function)) + } } # Write evaluation result in environment diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 9b036f91db8d..df0ebe911eea 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -257,8 +257,8 @@ lgb.check.eval <- function(params, eval) { params$metric <- list() } - # If 'eval' is a list or character vector, store it in 'metric' - if (is.character(eval) || identical(class(eval), "list")) { + # If 'eval' is a list of strings or character vector, store it in 'metric' + if (is.character(eval) || (is.list(eval) && all(sapply(eval, is.character)))) { params$metric <- append(params$metric, eval) } diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index d35f257ddac9..3d588dafdf9b 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -571,3 +571,96 @@ test_that("lgb.train() works with early stopping for regression", { , early_stopping_rounds + 1L ) }) + + +test_that("lgb.train() only considers the first metric for early stopping if first_metric_only is provided", { + set.seed(708L) + trainDF <- data.frame( + "feat1" = rnorm(100) + , "target" = rnorm(100) + ) + validDF <- data.frame( + "feat1" = rnorm(50) + , "target" = rnorm(50) + ) + + .increasing_metric <- function(preds, dtrain){ + return(list( + name = "increasing_metric" + , value = as.double(Sys.time()) + , higher_better = TRUE + )) + } + + .constant_metric <- function(preds, dtrain){ + return(list( + name = "constant_metric" + , value = 0.2 + , higher_better = FALSE + )) + } + + dtrain <- lgb.Dataset( + data = as.matrix(trainDF[["feat1"]], drop = FALSE) + , label = trainDF[["target"]] + ) + dvalid <- lgb.Dataset( + data = as.matrix(validDF[["feat1"]], drop = FALSE) + , label = validDF[["target"]] + ) + nrounds <- 10L + + ################################ + # train with no early stopping # + ################################ + bst <- lgb.train( + params = list( + objective = "regression" + , metric = "rmse" + , min_data_in_bin = 5L + ) + , data = dtrain + , nrounds = nrounds + , valids = list( + "valid1" = dvalid + ) + , eval = list( + .increasing_metric + , .constant_metric + ) + , verbose = TRUE + ) + + # the best possible model should come from the first iteration, but + # all 10 training iterations should happen + expect_equal(bst$best_score, 55.0) + expect_equal(bst$best_iter, 1L) + expect_equal(length(bst$record_evals[["valid1"]][["rmse"]][["eval"]]), nrounds) + + ############################# + # train with early stopping # + ############################# + early_stopping_rounds <- 5L + bst <- lgb.train( + params = list( + objective = "regression" + , metric = "rmse" + , min_data_in_bin = 5L + , early_stopping_rounds = early_stopping_rounds + ) + , data = dtrain + , nrounds = nrounds + , valids = list( + "valid1" = dvalid + ) + ) + + # the best model should be from the first iteration, and only 6 rounds + # should have happen (1 with improvement, 5 consecutive with no improvement) + expect_equal(bst$best_score, 55.0) + expect_equal(bst$best_iter, 1L) + expect_equal( + length(bst$record_evals[["valid1"]][["rmse"]][["eval"]]) + , early_stopping_rounds + 1L + ) +}) From c0954a9063efb02e79763a06d46e6bd75a81786d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 14 Mar 2020 14:53:28 -0500 Subject: [PATCH 02/10] trying stuff --- R-package/tests/testthat/test_basic.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 3d588dafdf9b..69fe7c295f03 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -528,8 +528,6 @@ test_that("lgb.train() works with early stopping for regression", { bst <- lgb.train( params = list( objective = "regression" - , metric = "rmse" - , min_data_in_bin = 5L ) , data = dtrain , nrounds = nrounds @@ -625,10 +623,10 @@ test_that("lgb.train() only considers the first metric for early stopping if fir "valid1" = dvalid ) , eval = list( - .increasing_metric - , .constant_metric + .constant_metric + , .increasing_metric ) - , verbose = TRUE + , verbose = 1 ) # the best possible model should come from the first iteration, but From eddb2e27e88d731e65cb19f058d446d86083cd8a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 14 Mar 2020 21:02:20 -0500 Subject: [PATCH 03/10] more changes --- R-package/R/callback.R | 2 +- R-package/R/lgb.train.R | 65 ++++- R-package/R/utils.R | 28 +- R-package/man/lgb.train.Rd | 45 ++- R-package/man/lgb_shared_params.Rd | 3 +- R-package/man/lightgbm.Rd | 3 +- R-package/tests/testthat/test_basic.R | 376 ++++++++++++++++++++++---- 7 files changed, 441 insertions(+), 81 deletions(-) diff --git a/R-package/R/callback.R b/R-package/R/callback.R index bb9074f21746..3ac62f1765b4 100644 --- a/R-package/R/callback.R +++ b/R-package/R/callback.R @@ -285,7 +285,7 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose = cat("Will train until there is no improvement in ", stopping_rounds, " rounds.\n\n", sep = "") } - # Maximization or minimization task + # Internally treat everything as a maximization task factor_to_bigger_better <<- rep.int(1.0, eval_len) best_iter <<- rep.int(-1L, eval_len) best_score <<- rep.int(-Inf, eval_len) diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index b018ce19620a..af1441c1b8d6 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -6,17 +6,33 @@ #' @param obj objective function, can be character or custom objective function. Examples include #' \code{regression}, \code{regression_l1}, \code{huber}, #' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass} -#' @param eval evaluation function(s). This can be a function or list of functions. Each provided function -#' should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named -#' list with three elements. -#' \itemize{ -#' \item{\code{name}: A string with the name of the metric, used for printing and storing results.} -#' \item{\code{value}: A single number indicating the value of the metric for the given predictions and true values} -#' \item{ -#' \code{higher_better}: A boolean indicating whether higher values indicate a better fit. -#' For example, this would be \code{FALSE} for metrics like MAE or RMSE. -#' } -#' } +#' @param eval evaluation function(s). This can be a character vector, function, or list with a mixture of +#' strings and functions. +#' +#' \itemize{ +#' \item{\bold{a. character vector}: +#' If you provide a character vector to this argument, it should contain strings with valid +#' evaluation metrics. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{The "metric" section of the documentation} +#' for a list of valid metrics. +#' } +#' \item{\bold{b. function}: +#' You can provide a custom evaluation function. This +#' should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named +#' list with three elements: +#' \itemize{ +#' \item{\code{name}: A string with the name of the metric, used for printing and storing results.} +#' \item{\code{value}: A single number indicating the value of the metric for the given predictions and true values} +#' \item{ +#' \code{higher_better}: A boolean indicating whether higher values indicate a better fit. +#' For example, this would be \code{FALSE} for metrics like MAE or RMSE. +#' } +#' } +#' } +#' \item{\bold{c. list}: +#' If a list is given, it should only contain character vectors and functions. These should follow the +#' requirements from the descriptions above. +#' } +#' } #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} #' @param colnames feature names, if not null, will use this to overwrite the names in dataset #' @param categorical_feature list of str or int @@ -36,6 +52,21 @@ #' the number of real CPU cores, not the number of threads (most #' CPU using hyper-threading to generate 2 threads per CPU core).} #' } +#' @section Early Stopping: +#' +#' "early stopping" refers to stopping the training process if the model's performance on a given +#' validation set does not improve for several consecutive iterations. +#' +#' If multiple arguments are given to \code{eval}, their order will be preserved. If you enable +#' early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all +#' metrics will be considered for early stopping. +#' +#' If you want to only consider the first metric for early stopping, pass +#' \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} +#' in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, +#' a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) +#' or \code{objective} (passed into \code{params}). +#' #' @return a trained booster model \code{lgb.Booster}. #' #' @examples @@ -99,7 +130,7 @@ lgb.train <- function(params = list(), params <- lgb.check.obj(params, obj) params <- lgb.check.eval(params, eval) fobj <- NULL - eval_functions <- NULL + eval_functions <- list(NULL) # Check for objective (function or not) if (is.function(params$objective)) { @@ -113,8 +144,13 @@ lgb.train <- function(params = list(), if (is.function(eval)) { eval_functions <- list(eval) } - if (methods::is(eval, "list") & all(sapply(eval, is.function))){ - eval_functions <- eval + if (methods::is(eval, "list")) { + eval_functions <- Filter( + f = function(eval_element){ + is.function(eval_element) + } + , x = eval + ) } # Init predictor to empty @@ -296,6 +332,7 @@ lgb.train <- function(params = list(), # Has no validation dataset eval_list <- append(eval_list, booster$eval_valid(feval = eval_function)) } + } # Write evaluation result in environment diff --git a/R-package/R/utils.R b/R-package/R/utils.R index df0ebe911eea..7d7f05a4727b 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -250,6 +250,10 @@ lgb.check.obj <- function(params, obj) { } +# [description] Take any character values from eval and store them +# in params$metric. This has to account for the fact that +# `eval` could be a character vector, a function, a list of functions, +# or a list with a mix of strings and functions lgb.check.eval <- function(params, eval) { # Check if metric is null, if yes put a list instead @@ -257,9 +261,27 @@ lgb.check.eval <- function(params, eval) { params$metric <- list() } - # If 'eval' is a list of strings or character vector, store it in 'metric' - if (is.character(eval) || (is.list(eval) && all(sapply(eval, is.character)))) { - params$metric <- append(params$metric, eval) + # if 'eval' is a character vector or list, find the character + # elements and add them to 'metric' + if (!is.function(eval)) { + for (i in seq_along(eval)) { + element <- eval[[i]] + if (is.character(element)) { + print(paste0("Adding '", element, "' to list of metrics")) + params$metric <- append(params$metric, element) + } + } + } + + # If more than one character metric was given, then "None" should + # not be included + if (length(params$metric) > 1){ + params$metric <- Filter( + f = function(metric){ + metric != "None" + } + , x = params$metric + ) } return(params) diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index 98298ab6f954..65707450f5ad 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -38,7 +38,8 @@ may allow you to pass other types of data like \code{matrix} and then separately \code{regression}, \code{regression_l1}, \code{huber}, \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} -\item{eval}{evaluation function, can be (a list of) character or custom eval function} +\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of +strings and functions.} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} @@ -82,6 +83,48 @@ a trained booster model \code{lgb.Booster}. \description{ Logic to train with LightGBM } +\section{Early Stopping}{ + + + \itemize{ + \item{\bold{a. character vector}: + If you provide a character vector to this argument, it should contain strings with valid + evaluation metrics. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{The "metric" section of the documentation} + for a list of valid metrics. + } + \item{\bold{b. function}: + You can provide a custom evaluation function. This + should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named + list with three elements: + + \itemize{ + \item{\code{name}: A string with the name of the metric, used for printing and storing results.} + \item{\code{value}: A single number indicating the value of the metric for the given predictions and true values} + \item{ + \code{higher_better}: A boolean indicating whether higher values indicate a better fit. + For example, this would be \code{FALSE} for metrics like MAE or RMSE. + } + } + } + \item{\bold{c. list}: + If a list is given, it should only contain character vectors and functions. These should follow the + requirements from the descriptions above. + } + \item{\bold{Early stopping behavior}: + + If multiple arguments are given, their order will be preserved. If you enable early stopping by + setting \code{early_stopping_rounds} in \code{params}, by default all metrics will be + considered for early stopping. + + If you want to only consider the first metric for early stopping, pass + \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} + in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, + a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) + or \code{objective} (passed into \code{params}). + } + } +} + \examples{ library(lightgbm) data(agaricus.train, package = "lightgbm") diff --git a/R-package/man/lgb_shared_params.Rd b/R-package/man/lgb_shared_params.Rd index ae2f61a86256..a7143179495e 100644 --- a/R-package/man/lgb_shared_params.Rd +++ b/R-package/man/lgb_shared_params.Rd @@ -4,7 +4,8 @@ \alias{lgb_shared_params} \title{Shared parameter docs} \arguments{ -\item{callbacks}{List of callback functions that are applied at each iteration.} +\item{callbacks}{list of callback functions +List of callback functions that are applied at each iteration.} \item{data}{a \code{lgb.Dataset} object, used for training. Some functions, such as \code{\link{lgb.cv}}, may allow you to pass other types of data like \code{matrix} and then separately supply diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index 88d98d13525d..256a7dc6e8e9 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -45,7 +45,8 @@ If early stopping occurs, the model will have 'best_iter' field.} \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} -\item{callbacks}{List of callback functions that are applied at each iteration.} +\item{callbacks}{list of callback functions +List of callback functions that are applied at each iteration.} \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example \itemize{ diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 69fe7c295f03..f0e98df4163f 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -7,6 +7,57 @@ test <- agaricus.test TOLERANCE <- 1e-6 +# [description] Every time this function is called, it adds 0.1 +# to an accumulator then returns the current value. +# This is used to mock the situation where an evaluation +# metric increases every iteration +ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR" +assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv) +.increasing_metric <- function(preds, dtrain){ + if (!exists(ACCUMULATOR_NAME, envir = .GlobalEnv)){ + assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv) + } + assign( + x = ACCUMULATOR_NAME + , value = get(ACCUMULATOR_NAME, envir = .GlobalEnv) + 0.1 + , envir = .GlobalEnv + ) + return(list( + name = "increasing_metric" + , value = get(ACCUMULATOR_NAME, envir = .GlobalEnv) + , higher_better = TRUE + )) +} + +# [description] Evaluation function that always returns the +# same value +CONSTANT_METRIC_VALUE <- 0.2 +.constant_metric <- function(preds, dtrain){ + return(list( + name = "constant_metric" + , value = CONSTANT_METRIC_VALUE + , higher_better = FALSE + )) +} + +# sample datasets to test early stopping +DTRAIN_RANDOM_REGRESSION <- lgb.Dataset( + data = as.matrix(rnorm(100L), ncol = 1L, drop = FALSE) + , label = rnorm(100L) +) +DVALID_RANDOM_REGRESSION <- lgb.Dataset( + data = as.matrix(rnorm(50L), ncol = 1L, drop = FALSE) + , label = rnorm(50L) +) +DTRAIN_RANDOM_CLASSIFICATION <- lgb.Dataset( + data = as.matrix(rnorm(120), ncol = 1L, drop = FALSE) + , label = sample(c(0L, 1L), size = 120, replace = TRUE) +) +DVALID_RANDOM_CLASSIFICATION <- lgb.Dataset( + data = as.matrix(rnorm(37), ncol = 1L, drop = FALSE) + , label = sample(c(0L, 1L), size = 37, replace = TRUE) +) + test_that("train and predict binary classification", { nrounds <- 10L bst <- lightgbm( @@ -570,95 +621,300 @@ test_that("lgb.train() works with early stopping for regression", { ) }) - -test_that("lgb.train() only considers the first metric for early stopping if first_metric_only is provided", { +test_that("lgb.train() does not stop early if early_stopping_rounds is not given", { set.seed(708L) - trainDF <- data.frame( - "feat1" = rnorm(100) - , "target" = rnorm(100) + + increasing_metric_starting_value <- get( + ACCUMULATOR_NAME + , envir = .GlobalEnv ) - validDF <- data.frame( - "feat1" = rnorm(50) - , "target" = rnorm(50) + nrounds <- 10L + metrics <- list( + .constant_metric + , .increasing_metric + ) + bst <- lgb.train( + params = list( + objective = "regression" + , metric = "None" + ) + , data = DTRAIN_RANDOM_REGRESSION + , nrounds = nrounds + , valids = list("valid1" = DVALID_RANDOM_REGRESSION) + , eval = metrics ) - .increasing_metric <- function(preds, dtrain){ - return(list( - name = "increasing_metric" - , value = as.double(Sys.time()) - , higher_better = TRUE - )) - } + # Only the two functions provided to "eval" should have been evaluated + expect_equal(length(bst$record_evals[["valid1"]]), 2L) - .constant_metric <- function(preds, dtrain){ - return(list( - name = "constant_metric" - , value = 0.2 - , higher_better = FALSE - )) - } + # all 10 iterations should have happen, and the best_iter should be + # the first one (based on constant_metric) + best_iter <- 1L + expect_equal(bst$best_iter, best_iter) - dtrain <- lgb.Dataset( - data = as.matrix(trainDF[["feat1"]], drop = FALSE) - , label = trainDF[["target"]] + # best_score should be taken from the first metric + expect_equal( + bst$best_score + , bst$record_evals[["valid1"]][["constant_metric"]][["eval"]][[best_iter]] ) - dvalid <- lgb.Dataset( - data = as.matrix(validDF[["feat1"]], drop = FALSE) - , label = validDF[["target"]] + + # early stopping should not have happened. Even though constant_metric + # had 9 consecutive iterations with no improvement, it is ignored because of + # first_metric_only = TRUE + expect_equal( + length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]]) + , nrounds ) - nrounds <- 10L + expect_equal( + length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]]) + , nrounds + ) +}) - ################################ - # train with no early stopping # - ################################ +test_that("If first_metric_only is not given or is FALSE, lgb.train() decides to stop early based on all metrics", { + set.seed(708L) + + early_stopping_rounds <- 3L + param_variations <- list( + list( + objective = "regression" + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + ) + , list( + objective = "regression" + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + , first_metric_only = FALSE + ) + ) + + for (params in param_variations){ + + nrounds <- 10L + bst <- lgb.train( + params = params + , data = DTRAIN_RANDOM_REGRESSION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_REGRESSION + ) + , eval = list( + .increasing_metric + , .constant_metric + ) + ) + + # Only the two functions provided to "eval" should have been evaluated + expect_equal(length(bst$record_evals[["valid1"]]), 2L) + + # early stopping should have happened, and should have stopped early_stopping_rounds + 1 rounds in + # because constant_metric never improves + # + # the best iteration should be the last one, because increasing_metric was first + # and gets better every iteration + best_iter <- early_stopping_rounds + 1L + expect_equal(bst$best_iter, best_iter) + + # best_score should be taken from "increasing_metric" because it was first + expect_equal( + bst$best_score + , bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]][[best_iter]] + ) + + # early stopping should not have happened. even though increasing_metric kept + # getting better, early stopping should have happened because "constant_metric" + # did not improve + expect_equal( + length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]]) + , early_stopping_rounds + 1L + ) + expect_equal( + length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]]) + , early_stopping_rounds + 1L + ) + } + +}) + +test_that("If first_metric_only is TRUE, lgb.train() decides to stop early based on only the first metric", { + set.seed(708L) + nrounds <- 10L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) bst <- lgb.train( params = list( objective = "regression" - , metric = "rmse" - , min_data_in_bin = 5L + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + , first_metric_only = TRUE ) - , data = dtrain + , data = DTRAIN_RANDOM_REGRESSION , nrounds = nrounds , valids = list( - "valid1" = dvalid + "valid1" = DVALID_RANDOM_REGRESSION ) , eval = list( - .constant_metric - , .increasing_metric + .increasing_metric + , .constant_metric ) , verbose = 1 ) - # the best possible model should come from the first iteration, but - # all 10 training iterations should happen - expect_equal(bst$best_score, 55.0) - expect_equal(bst$best_iter, 1L) - expect_equal(length(bst$record_evals[["valid1"]][["rmse"]][["eval"]]), nrounds) + # Only the two functions provided to "eval" should have been evaluated + expect_equal(length(bst$record_evals[["valid1"]]), 2L) - ############################# - # train with early stopping # - ############################# - early_stopping_rounds <- 5L - bst <- lgb.train( + # all 10 iterations should happen, and the best_iter should be the final one + expect_equal(bst$best_iter, nrounds) + + # best_score should be taken from "increasing_metric" + expect_equal( + bst$best_score + , increasing_metric_starting_value + 0.1 * nrounds + ) + + # early stopping should not have happened. Even though constant_metric + # had 9 consecutive iterations with no improvement, it is ignored because of + # first_metric_only = TRUE + expect_equal( + length(bst$record_evals[["valid1"]][["constant_metric"]][["eval"]]) + , nrounds + ) + expect_equal( + length(bst$record_evals[["valid1"]][["increasing_metric"]][["eval"]]) + , nrounds + ) +}) + +test_that("lgb.train() works when a mixture of functions and strings are passed to eval", { + set.seed(708L) + nrounds <- 10L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( params = list( objective = "regression" - , metric = "rmse" - , min_data_in_bin = 5L - , early_stopping_rounds = early_stopping_rounds + , metric = "None" ) - , data = dtrain + , data = DTRAIN_RANDOM_REGRESSION , nrounds = nrounds , valids = list( - "valid1" = dvalid + "valid1" = DVALID_RANDOM_REGRESSION + ) + , eval = list( + .increasing_metric + , "rmse" + , .constant_metric + , "l2" ) ) - # the best model should be from the first iteration, and only 6 rounds - # should have happen (1 with improvement, 5 consecutive with no improvement) - expect_equal(bst$best_score, 55.0) - expect_equal(bst$best_iter, 1L) - expect_equal( - length(bst$record_evals[["valid1"]][["rmse"]][["eval"]]) - , early_stopping_rounds + 1L + # all 4 metrics should have been used + expect_named( + bst$record_evals[["valid1"]] + , expected = c("rmse", "l2", "increasing_metric", "constant_metric") + , ignore.order = TRUE + , ignore.case = FALSE + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 0.9278173) < TOLERANCE) + expect_true(abs(results[["l2"]][["eval"]][[1L]] - 0.8608449) < TOLERANCE) + expected_increasing_metric <- increasing_metric_starting_value + 0.1 + expect_true( + abs( + results[["increasing_metric"]][["eval"]][[1L]] - expected_increasing_metric + ) < TOLERANCE ) + expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) + +}) + +test_that("lgb.train() works when a character vector is passed to eval", { + set.seed(708L) + nrounds <- 10L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "binary" + , metric = "None" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_CLASSIFICATION + ) + , eval = c( + "binary_error" + , "binary_logloss" + ) + , verbose = 1 + ) + + # all 4 metrics should have been used + expect_named( + bst$record_evals[["valid1"]] + , expected = c("rmse", "l2", "increasing_metric", "constant_metric") + , ignore.order = TRUE + , ignore.case = FALSE + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 0.9278173) < TOLERANCE) + expect_true(abs(results[["l2"]][["eval"]][[1L]] - 0.8608449) < TOLERANCE) + expected_increasing_metric <- increasing_metric_starting_value + 0.1 + expect_true( + abs( + results[["increasing_metric"]][["eval"]][[1L]] - expected_increasing_metric + ) < TOLERANCE + ) + expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) + +}) + +test_that("lgb.train() works when a list of strings is passed to eval", { + set.seed(708L) + nrounds <- 10L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "binary" + , metric = "None" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_CLASSIFICATION + ) + , eval = list( + "binary_error" + , "binary_logloss" + ) + , verbose = 1 + ) + + # all 4 metrics should have been used + expect_named( + bst$record_evals[["valid1"]] + , expected = c("rmse", "l2", "increasing_metric", "constant_metric") + , ignore.order = TRUE + , ignore.case = FALSE + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 0.9278173) < TOLERANCE) + expect_true(abs(results[["l2"]][["eval"]][[1L]] - 0.8608449) < TOLERANCE) + expected_increasing_metric <- increasing_metric_starting_value + 0.1 + expect_true( + abs( + results[["increasing_metric"]][["eval"]][[1L]] - expected_increasing_metric + ) < TOLERANCE + ) + expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) + }) From 0800f675abf6d4f3d4736c96872bcf09988bfc48 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 14 Mar 2020 22:29:37 -0500 Subject: [PATCH 04/10] fixed handling of multiple metrics --- R-package/R/lgb.train.R | 35 +++++--- R-package/R/utils.R | 4 +- R-package/man/lgb.train.Rd | 82 ++++++++++-------- R-package/tests/testthat/test_basic.R | 120 ++++++++++++++++++++------ 4 files changed, 164 insertions(+), 77 deletions(-) diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index af1441c1b8d6..0f6f104f2f66 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -12,7 +12,9 @@ #' \itemize{ #' \item{\bold{a. character vector}: #' If you provide a character vector to this argument, it should contain strings with valid -#' evaluation metrics. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{The "metric" section of the documentation} +#' evaluation metrics. +#' See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ +#' The "metric" section of the documentation} #' for a list of valid metrics. #' } #' \item{\bold{b. function}: @@ -20,8 +22,12 @@ #' should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named #' list with three elements: #' \itemize{ -#' \item{\code{name}: A string with the name of the metric, used for printing and storing results.} -#' \item{\code{value}: A single number indicating the value of the metric for the given predictions and true values} +#' \item{\code{name}: A string with the name of the metric, used for printing +#' and storing results. +#' } +#' \item{\code{value}: A single number indicating the value of the metric for the +#' given predictions and true values +#' } #' \item{ #' \code{higher_better}: A boolean indicating whether higher values indicate a better fit. #' For example, this would be \code{FALSE} for metrics like MAE or RMSE. @@ -29,8 +35,8 @@ #' } #' } #' \item{\bold{c. list}: -#' If a list is given, it should only contain character vectors and functions. These should follow the -#' requirements from the descriptions above. +#' If a list is given, it should only contain character vectors and functions. +#' These should follow the requirements from the descriptions above. #' } #' } #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} @@ -146,9 +152,7 @@ lgb.train <- function(params = list(), } if (methods::is(eval, "list")) { eval_functions <- Filter( - f = function(eval_element){ - is.function(eval_element) - } + f = is.function , x = eval ) } @@ -322,14 +326,25 @@ lgb.train <- function(params = list(), # Collection: Has validation dataset? if (length(valids) > 0L) { - for (eval_function in eval_functions){ + # Get evaluation results with passed-in functions + for (eval_function in eval_functions) { # Validation has training dataset? if (valid_contain_train) { eval_list <- append(eval_list, booster$eval_train(feval = eval_function)) } - # Has no validation dataset + eval_list <- append(eval_list, booster$eval_valid(feval = eval_function)) + } + + # Calling booster$eval_valid() will get + # evaluation results with the metrics in params$metric by calling LGBM_BoosterGetEval_R", + # so need to be sure that gets called, which it wouldn't be above if no functions + # were passed in + if (length(eval_functions) == 0L) { + if (valid_contain_train) { + eval_list <- append(eval_list, booster$eval_train(feval = eval_function)) + } eval_list <- append(eval_list, booster$eval_valid(feval = eval_function)) } diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 7d7f05a4727b..0be28f041f59 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -275,9 +275,9 @@ lgb.check.eval <- function(params, eval) { # If more than one character metric was given, then "None" should # not be included - if (length(params$metric) > 1){ + if (length(params$metric) > 1L) { params$metric <- Filter( - f = function(metric){ + f = function(metric) { metric != "None" } , x = params$metric diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index 65707450f5ad..59c2e395a189 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -39,7 +39,38 @@ may allow you to pass other types of data like \code{matrix} and then separately \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} \item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of -strings and functions.} + strings and functions. + + \itemize{ + \item{\bold{a. character vector}: + If you provide a character vector to this argument, it should contain strings with valid + evaluation metrics. + See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ + The "metric" section of the documentation} + for a list of valid metrics. + } + \item{\bold{b. function}: + You can provide a custom evaluation function. This + should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named + list with three elements: + \itemize{ + \item{\code{name}: A string with the name of the metric, used for printing + and storing results. + } + \item{\code{value}: A single number indicating the value of the metric for the + given predictions and true values + } + \item{ + \code{higher_better}: A boolean indicating whether higher values indicate a better fit. + For example, this would be \code{FALSE} for metrics like MAE or RMSE. + } + } + } + \item{\bold{c. list}: + If a list is given, it should only contain character vectors and functions. + These should follow the requirements from the descriptions above. + } + }} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} @@ -86,43 +117,18 @@ Logic to train with LightGBM \section{Early Stopping}{ - \itemize{ - \item{\bold{a. character vector}: - If you provide a character vector to this argument, it should contain strings with valid - evaluation metrics. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{The "metric" section of the documentation} - for a list of valid metrics. - } - \item{\bold{b. function}: - You can provide a custom evaluation function. This - should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named - list with three elements: - - \itemize{ - \item{\code{name}: A string with the name of the metric, used for printing and storing results.} - \item{\code{value}: A single number indicating the value of the metric for the given predictions and true values} - \item{ - \code{higher_better}: A boolean indicating whether higher values indicate a better fit. - For example, this would be \code{FALSE} for metrics like MAE or RMSE. - } - } - } - \item{\bold{c. list}: - If a list is given, it should only contain character vectors and functions. These should follow the - requirements from the descriptions above. - } - \item{\bold{Early stopping behavior}: - - If multiple arguments are given, their order will be preserved. If you enable early stopping by - setting \code{early_stopping_rounds} in \code{params}, by default all metrics will be - considered for early stopping. - - If you want to only consider the first metric for early stopping, pass - \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} - in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, - a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) - or \code{objective} (passed into \code{params}). - } - } + "early stopping" refers to stopping the training process if the model's performance on a given + validation set does not improve for several consecutive iterations. + + If multiple arguments are given to \code{eval}, their order will be preserved. If you enable + early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all + metrics will be considered for early stopping. + + If you want to only consider the first metric for early stopping, pass + \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} + in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, + a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) + or \code{objective} (passed into \code{params}). } \examples{ diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index f0e98df4163f..4012af324f1f 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -13,8 +13,8 @@ TOLERANCE <- 1e-6 # metric increases every iteration ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR" assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv) -.increasing_metric <- function(preds, dtrain){ - if (!exists(ACCUMULATOR_NAME, envir = .GlobalEnv)){ +.increasing_metric <- function(preds, dtrain) { + if (!exists(ACCUMULATOR_NAME, envir = .GlobalEnv)) { assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv) } assign( @@ -32,7 +32,7 @@ assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv) # [description] Evaluation function that always returns the # same value CONSTANT_METRIC_VALUE <- 0.2 -.constant_metric <- function(preds, dtrain){ +.constant_metric <- function(preds, dtrain) { return(list( name = "constant_metric" , value = CONSTANT_METRIC_VALUE @@ -50,12 +50,12 @@ DVALID_RANDOM_REGRESSION <- lgb.Dataset( , label = rnorm(50L) ) DTRAIN_RANDOM_CLASSIFICATION <- lgb.Dataset( - data = as.matrix(rnorm(120), ncol = 1L, drop = FALSE) - , label = sample(c(0L, 1L), size = 120, replace = TRUE) + data = as.matrix(rnorm(120L), ncol = 1L, drop = FALSE) + , label = sample(c(0L, 1L), size = 120L, replace = TRUE) ) DVALID_RANDOM_CLASSIFICATION <- lgb.Dataset( - data = as.matrix(rnorm(37), ncol = 1L, drop = FALSE) - , label = sample(c(0L, 1L), size = 37, replace = TRUE) + data = as.matrix(rnorm(37L), ncol = 1L, drop = FALSE) + , label = sample(c(0L, 1L), size = 37L, replace = TRUE) ) test_that("train and predict binary classification", { @@ -689,7 +689,7 @@ test_that("If first_metric_only is not given or is FALSE, lgb.train() decides to ) ) - for (params in param_variations){ + for (params in param_variations) { nrounds <- 10L bst <- lgb.train( @@ -758,7 +758,6 @@ test_that("If first_metric_only is TRUE, lgb.train() decides to stop early based .increasing_metric , .constant_metric ) - , verbose = 1 ) # Only the two functions provided to "eval" should have been evaluated @@ -850,7 +849,6 @@ test_that("lgb.train() works when a character vector is passed to eval", { "binary_error" , "binary_logloss" ) - , verbose = 1 ) # all 4 metrics should have been used @@ -875,7 +873,55 @@ test_that("lgb.train() works when a character vector is passed to eval", { }) -test_that("lgb.train() works when a list of strings is passed to eval", { +test_that("lgb.train() works when a list of strings or a character vector is passed to eval", { + + # testing list and character vector, as well as length-1 and length-2 + eval_variations <- list( + c("binary_error", "binary_logloss") + , "binary_logloss" + , list("binary_error", "binary_logloss") + , list("binary_logloss") + ) + + for (eval_variation in eval_variations) { + + set.seed(708L) + nrounds <- 10L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "binary" + , metric = "None" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_CLASSIFICATION + ) + , eval = eval_variation + ) + + # both metrics should have been used + expect_named( + bst$record_evals[["valid1"]] + , expected = unlist(eval_variation) + , ignore.order = TRUE + , ignore.case = FALSE + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + if ("binary_error" %in% unlist(eval_variation)) { + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5135135) < TOLERANCE) + } + if ("binary_logloss" %in% unlist(eval_variation)) { + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6992222) < TOLERANCE) + } + } +}) + +test_that("lgb.train() works when you specify both 'metric' and 'eval' with strings", { set.seed(708L) nrounds <- 10L early_stopping_rounds <- 3L @@ -883,38 +929,58 @@ test_that("lgb.train() works when a list of strings is passed to eval", { bst <- lgb.train( params = list( objective = "binary" - , metric = "None" + , metric = "binary_error" ) , data = DTRAIN_RANDOM_CLASSIFICATION , nrounds = nrounds , valids = list( "valid1" = DVALID_RANDOM_CLASSIFICATION ) - , eval = list( - "binary_error" - , "binary_logloss" - ) - , verbose = 1 + , eval = "binary_logloss" ) - # all 4 metrics should have been used + # both metrics should have been used expect_named( bst$record_evals[["valid1"]] - , expected = c("rmse", "l2", "increasing_metric", "constant_metric") + , expected = c("binary_error", "binary_logloss") , ignore.order = TRUE , ignore.case = FALSE ) # the difference metrics shouldn't have been mixed up with each other results <- bst$record_evals[["valid1"]] - expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 0.9278173) < TOLERANCE) - expect_true(abs(results[["l2"]][["eval"]][[1L]] - 0.8608449) < TOLERANCE) - expected_increasing_metric <- increasing_metric_starting_value + 0.1 - expect_true( - abs( - results[["increasing_metric"]][["eval"]][[1L]] - expected_increasing_metric - ) < TOLERANCE + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5135135) < TOLERANCE) + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6992222) < TOLERANCE) +}) + +test_that("lgb.train() works when you specify both 'metric' and 'eval' with strings", { + set.seed(708L) + nrounds <- 10L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "binary" + , metric = "binary_error" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_CLASSIFICATION + ) + , eval = "binary_logloss" + ) + + # both metrics should have been used + expect_named( + bst$record_evals[["valid1"]] + , expected = c("binary_error", "binary_logloss") + , ignore.order = TRUE + , ignore.case = FALSE ) - expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5135135) < TOLERANCE) + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6992222) < TOLERANCE) }) From 68e784052f956d7a37d45de41f08d5f03c17a874 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 14 Mar 2020 23:02:10 -0500 Subject: [PATCH 05/10] fixed tests --- R-package/tests/testthat/test_basic.R | 86 ++++++++++----------------- 1 file changed, 31 insertions(+), 55 deletions(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 4012af324f1f..70345d519b03 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -6,6 +6,7 @@ train <- agaricus.train test <- agaricus.test TOLERANCE <- 1e-6 +set.seed(708L) # [description] Every time this function is called, it adds 0.1 # to an accumulator then returns the current value. @@ -788,7 +789,6 @@ test_that("If first_metric_only is TRUE, lgb.train() decides to stop early based test_that("lgb.train() works when a mixture of functions and strings are passed to eval", { set.seed(708L) nrounds <- 10L - early_stopping_rounds <- 3L increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) bst <- lgb.train( params = list( @@ -818,51 +818,8 @@ test_that("lgb.train() works when a mixture of functions and strings are passed # the difference metrics shouldn't have been mixed up with each other results <- bst$record_evals[["valid1"]] - expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 0.9278173) < TOLERANCE) - expect_true(abs(results[["l2"]][["eval"]][[1L]] - 0.8608449) < TOLERANCE) - expected_increasing_metric <- increasing_metric_starting_value + 0.1 - expect_true( - abs( - results[["increasing_metric"]][["eval"]][[1L]] - expected_increasing_metric - ) < TOLERANCE - ) - expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) - -}) - -test_that("lgb.train() works when a character vector is passed to eval", { - set.seed(708L) - nrounds <- 10L - early_stopping_rounds <- 3L - increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) - bst <- lgb.train( - params = list( - objective = "binary" - , metric = "None" - ) - , data = DTRAIN_RANDOM_CLASSIFICATION - , nrounds = nrounds - , valids = list( - "valid1" = DVALID_RANDOM_CLASSIFICATION - ) - , eval = c( - "binary_error" - , "binary_logloss" - ) - ) - - # all 4 metrics should have been used - expect_named( - bst$record_evals[["valid1"]] - , expected = c("rmse", "l2", "increasing_metric", "constant_metric") - , ignore.order = TRUE - , ignore.case = FALSE - ) - - # the difference metrics shouldn't have been mixed up with each other - results <- bst$record_evals[["valid1"]] - expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 0.9278173) < TOLERANCE) - expect_true(abs(results[["l2"]][["eval"]][[1L]] - 0.8608449) < TOLERANCE) + expect_true(abs(results[["rmse"]][["eval"]][[1L]] - 1.105012) < TOLERANCE) + expect_true(abs(results[["l2"]][["eval"]][[1L]] - 1.221051) < TOLERANCE) expected_increasing_metric <- increasing_metric_starting_value + 0.1 expect_true( abs( @@ -887,7 +844,6 @@ test_that("lgb.train() works when a list of strings or a character vector is pas set.seed(708L) nrounds <- 10L - early_stopping_rounds <- 3L increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) bst <- lgb.train( params = list( @@ -913,10 +869,10 @@ test_that("lgb.train() works when a list of strings or a character vector is pas # the difference metrics shouldn't have been mixed up with each other results <- bst$record_evals[["valid1"]] if ("binary_error" %in% unlist(eval_variation)) { - expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5135135) < TOLERANCE) + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE) } if ("binary_logloss" %in% unlist(eval_variation)) { - expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6992222) < TOLERANCE) + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE) } } }) @@ -924,7 +880,6 @@ test_that("lgb.train() works when a list of strings or a character vector is pas test_that("lgb.train() works when you specify both 'metric' and 'eval' with strings", { set.seed(708L) nrounds <- 10L - early_stopping_rounds <- 3L increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) bst <- lgb.train( params = list( @@ -949,14 +904,13 @@ test_that("lgb.train() works when you specify both 'metric' and 'eval' with stri # the difference metrics shouldn't have been mixed up with each other results <- bst$record_evals[["valid1"]] - expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5135135) < TOLERANCE) - expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6992222) < TOLERANCE) + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE) + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE) }) test_that("lgb.train() works when you specify both 'metric' and 'eval' with strings", { set.seed(708L) nrounds <- 10L - early_stopping_rounds <- 3L increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) bst <- lgb.train( params = list( @@ -981,6 +935,28 @@ test_that("lgb.train() works when you specify both 'metric' and 'eval' with stri # the difference metrics shouldn't have been mixed up with each other results <- bst$record_evals[["valid1"]] - expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5135135) < TOLERANCE) - expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6992222) < TOLERANCE) + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE) + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE) +}) + +test_that("lgb.train() works when you give a function for eval", { + set.seed(708L) + nrounds <- 10L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.train( + params = list( + objective = "binary" + , metric = "None" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_CLASSIFICATION + ) + , eval = .constant_metric + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid1"]] + expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) }) From ac74501e0736d60a698befee420275a30bfd2b0c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sun, 2 Aug 2020 23:35:25 -0500 Subject: [PATCH 06/10] remove duplicate tests --- R-package/tests/testthat/test_basic.R | 31 --------------------------- 1 file changed, 31 deletions(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index d73f1b62c308..22fdeda94237 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -1086,37 +1086,6 @@ test_that("lgb.train() works when you specify both 'metric' and 'eval' with stri expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE) }) -test_that("lgb.train() works when you specify both 'metric' and 'eval' with strings", { - set.seed(708L) - nrounds <- 10L - increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) - bst <- lgb.train( - params = list( - objective = "binary" - , metric = "binary_error" - ) - , data = DTRAIN_RANDOM_CLASSIFICATION - , nrounds = nrounds - , valids = list( - "valid1" = DVALID_RANDOM_CLASSIFICATION - ) - , eval = "binary_logloss" - ) - - # both metrics should have been used - expect_named( - bst$record_evals[["valid1"]] - , expected = c("binary_error", "binary_logloss") - , ignore.order = TRUE - , ignore.case = FALSE - ) - - # the difference metrics shouldn't have been mixed up with each other - results <- bst$record_evals[["valid1"]] - expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.4864865) < TOLERANCE) - expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.6932548) < TOLERANCE) -}) - test_that("lgb.train() works when you give a function for eval", { set.seed(708L) nrounds <- 10L From 5c685e82c8dc63699f29061a6143d7e1ba37072a Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 26 Aug 2020 20:50:14 -0500 Subject: [PATCH 07/10] get training tests --- R-package/R/aliases.R | 14 ++++++++++++++ R-package/R/lgb.train.R | 3 +-- R-package/R/utils.R | 15 +++++++++------ R-package/tests/testthat/test_basic.R | 7 +++++-- R-package/tests/testthat/test_utils.R | 9 +++++++++ 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R index 09cb86629872..8176125b6f2f 100644 --- a/R-package/R/aliases.R +++ b/R-package/R/aliases.R @@ -108,3 +108,17 @@ ) return(c(learning_params, .DATASET_PARAMETERS())) } + +# [description] +# Per https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst#metric, +# a few different strings can be used to indicate "no metrics". +# [returns] +# A character vector +.NO_METRIC_STRINGS <- function() { + return(c( + "na" + , "None" + , "null" + , "custom" + )) +} diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index 8c0e5e15af6d..10348153d171 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -173,7 +173,6 @@ lgb.train <- function(params = list(), if (!is.null(predictor)) { begin_iteration <- predictor$current_iter() + 1L } - # Check for number of rounds passed as parameter - in case there are multiple ones, take only the first one n_trees <- .PARAMETER_ALIASES()[["num_iterations"]] if (any(names(params) %in% n_trees)) { @@ -384,7 +383,7 @@ lgb.train <- function(params = list(), # when using a custom eval function, the metric name is returned from the # function, so figure it out from record_evals - if (!is.null(feval)) { + if (!is.null(eval_functions[1L])) { first_metric <- names(booster$record_evals[[first_valid_name]])[1L] } else { first_metric <- booster$.__enclos_env__$private$eval_names[1L] diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 5472313b4d66..770598810d2e 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -317,10 +317,11 @@ lgb.check.obj <- function(params, obj) { } -# [description] Take any character values from eval and store them -# in params$metric. This has to account for the fact that -# `eval` could be a character vector, a function, a list of functions, -# or a list with a mix of strings and functions +# [description] +# Take any character values from eval and store them in params$metric. +# This has to account for the fact that `eval` could be a character vector, +# a function, a list of functions, or a list with a mix of strings and +# functions lgb.check.eval <- function(params, eval) { if (is.null(params$metric)) { @@ -335,7 +336,6 @@ lgb.check.eval <- function(params, eval) { for (i in seq_along(eval)) { element <- eval[[i]] if (is.character(element)) { - print(paste0("Adding '", element, "' to list of metrics")) params$metric <- append(params$metric, element) } } @@ -346,7 +346,7 @@ lgb.check.eval <- function(params, eval) { if (length(params$metric) > 1L) { params$metric <- Filter( f = function(metric) { - metric != "None" + !(metric %in% .NO_METRIC_STRINGS()) } , x = params$metric ) @@ -356,5 +356,8 @@ lgb.check.eval <- function(params, eval) { params$metric <- append(params$metric, unlist(eval)) } + # duplicate metrics should be filtered out + params$metric <- as.list(unique(unlist(params$metric))) + return(params) } diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index c000b0d6df2a..2c82037a7cbb 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -13,7 +13,8 @@ set.seed(708L) # This is used to mock the situation where an evaluation # metric increases every iteration ACCUMULATOR_NAME <- "INCREASING_METRIC_ACUMULATOR" -assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv) +assign(x = "INCREASING_METRIC_ACUMULATOR", value = 0.0, envir = .GlobalEnv) + .increasing_metric <- function(preds, dtrain) { if (!exists(ACCUMULATOR_NAME, envir = .GlobalEnv)) { assign(ACCUMULATOR_NAME, 0.0, envir = .GlobalEnv) @@ -758,6 +759,7 @@ test_that("lgb.train() works with early stopping for regression", { bst <- lgb.train( params = list( objective = "regression" + , metric = "rmse" ) , data = dtrain , nrounds = nrounds @@ -780,7 +782,6 @@ test_that("lgb.train() works with early stopping for regression", { params = list( objective = "regression" , metric = "rmse" - , min_data_in_bin = 5L , early_stopping_rounds = early_stopping_rounds ) , data = dtrain @@ -1168,6 +1169,7 @@ test_that("lgb.train() works with early stopping for regression with a metric th ) }) + test_that("lgb.train() supports non-ASCII feature names", { testthat::skip("UTF-8 feature names are not fully supported in the R package") dtrain <- lgb.Dataset( @@ -1555,3 +1557,4 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai expect_equal(pred1, pred2) }) + diff --git a/R-package/tests/testthat/test_utils.R b/R-package/tests/testthat/test_utils.R index af440118f936..ab2c21950d64 100644 --- a/R-package/tests/testthat/test_utils.R +++ b/R-package/tests/testthat/test_utils.R @@ -115,3 +115,12 @@ test_that("lgb.check.eval adds eval to metric in params if a list is provided", expect_named(params, "metric") expect_identical(params[["metric"]], list("auc", "binary_error", "binary_logloss")) }) + +test_that("lgb.check.eval drops duplicate metrics and preserves order", { + params <- lgb.check.eval( + params = list(metric = "l1") + , eval = list("l2", "rmse", "l1", "rmse") + ) + expect_named(params, "metric") + expect_identical(params[["metric"]], list("l1", "l2", "rmse")) +}) From ed44f9ee190944dc2019958a623ed7d1db2b7a4d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 26 Aug 2020 23:41:31 -0500 Subject: [PATCH 08/10] fixes for lgb.cv() --- R-package/R/lgb.cv.R | 38 ++++++++++-------- R-package/R/lgb.train.R | 52 +------------------------ R-package/R/lightgbm.R | 51 +++++++++++++++++++++++++ R-package/R/utils.R | 4 -- R-package/man/lgb.cv.Rd | 55 +++++++++++++++++++++++++-- R-package/man/lgb_shared_params.Rd | 55 +++++++++++++++++++++++++++ R-package/man/lightgbm.Rd | 17 +++++++++ R-package/tests/testthat/test_basic.R | 1 - 8 files changed, 199 insertions(+), 74 deletions(-) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index e38a2d9a82b7..4734d4bad743 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -24,10 +24,6 @@ CVBooster <- R6::R6Class( #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples. #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}} #' @param weight vector of response values. If not NULL, will set to dataset -#' @param obj objective function, can be character or custom objective function. Examples include -#' \code{regression}, \code{regression_l1}, \code{huber}, -#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass} -#' @param eval evaluation function, can be (list of) character or custom eval function #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} #' @param showsd \code{boolean}, whether to show standard deviation of cross validation #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified @@ -52,7 +48,7 @@ CVBooster <- R6::R6Class( #' the number of real CPU cores, not the number of threads (most #' CPU using hyper-threading to generate 2 threads per CPU core).} #' } -#' +#' @inheritSection lgb_shared_params Early Stopping #' @return a trained model \code{lgb.CVBooster}. #' #' @examples @@ -114,7 +110,7 @@ lgb.cv <- function(params = list() params <- lgb.check.obj(params, obj) params <- lgb.check.eval(params, eval) fobj <- NULL - feval <- NULL + eval_functions <- list(NULL) # Check for objective (function or not) if (is.function(params$objective)) { @@ -122,9 +118,17 @@ lgb.cv <- function(params = list() params$objective <- "NONE" } - # Check for loss (function or not) + # If loss is a single function, store it as a 1-element list + # (for backwards compatibility). If it is a list of functions, store + # all of them if (is.function(eval)) { - feval <- eval + eval_functions <- list(eval) + } + if (methods::is(eval, "list")) { + eval_functions <- Filter( + f = is.function + , x = eval + ) } # Init predictor to empty @@ -358,7 +362,11 @@ lgb.cv <- function(params = list() # Update one boosting iteration msg <- lapply(cv_booster$boosters, function(fd) { fd$booster$update(fobj = fobj) - fd$booster$eval_valid(feval = feval) + out <- list() + for (eval_function in eval_functions) { + out <- append(out, fd$booster$eval_valid(feval = eval_function)) + } + return(out) }) # Prepare collection of evaluation results @@ -585,7 +593,6 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) { ret_eval[[j]]$value <- mean(eval_result[[j]]) } - # Preinit evaluation error ret_eval_err <- NULL # Check for standard deviation @@ -604,10 +611,11 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) { } - # Return errors - list( - eval_list = ret_eval - , eval_err_list = ret_eval_err - ) + return({ + list( + eval_list = ret_eval + , eval_err_list = ret_eval_err + ) + }) } diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index 10348153d171..e1637ed3c007 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -3,42 +3,6 @@ #' @description Logic to train with LightGBM #' @inheritParams lgb_shared_params #' @param valids a list of \code{lgb.Dataset} objects, used for validation -#' @param obj objective function, can be character or custom objective function. Examples include -#' \code{regression}, \code{regression_l1}, \code{huber}, -#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass} -#' @param eval evaluation function(s). This can be a character vector, function, or list with a mixture of -#' strings and functions. -#' -#' \itemize{ -#' \item{\bold{a. character vector}: -#' If you provide a character vector to this argument, it should contain strings with valid -#' evaluation metrics. -#' See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ -#' The "metric" section of the documentation} -#' for a list of valid metrics. -#' } -#' \item{\bold{b. function}: -#' You can provide a custom evaluation function. This -#' should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named -#' list with three elements: -#' \itemize{ -#' \item{\code{name}: A string with the name of the metric, used for printing -#' and storing results. -#' } -#' \item{\code{value}: A single number indicating the value of the metric for the -#' given predictions and true values -#' } -#' \item{ -#' \code{higher_better}: A boolean indicating whether higher values indicate a better fit. -#' For example, this would be \code{FALSE} for metrics like MAE or RMSE. -#' } -#' } -#' } -#' \item{\bold{c. list}: -#' If a list is given, it should only contain character vectors and functions. -#' These should follow the requirements from the descriptions above. -#' } -#' } #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals} #' @param colnames feature names, if not null, will use this to overwrite the names in dataset #' @param categorical_feature list of str or int @@ -58,21 +22,7 @@ #' the number of real CPU cores, not the number of threads (most #' CPU using hyper-threading to generate 2 threads per CPU core).} #' } -#' @section Early Stopping: -#' -#' "early stopping" refers to stopping the training process if the model's performance on a given -#' validation set does not improve for several consecutive iterations. -#' -#' If multiple arguments are given to \code{eval}, their order will be preserved. If you enable -#' early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all -#' metrics will be considered for early stopping. -#' -#' If you want to only consider the first metric for early stopping, pass -#' \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} -#' in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, -#' a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) -#' or \code{objective} (passed into \code{params}). -#' +#' @inheritSection lgb_shared_params Early Stopping #' @return a trained booster model \code{lgb.Booster}. #' #' @examples diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index 84e2503db2f6..2ea789278af6 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -10,11 +10,61 @@ #' and one metric. If there's more than one, will check all of them #' except the training data. Returns the model with (best_iter + early_stopping_rounds). #' If early stopping occurs, the model will have 'best_iter' field. +#' @param eval evaluation function(s). This can be a character vector, function, or list with a mixture of +#' strings and functions. +#' +#' \itemize{ +#' \item{\bold{a. character vector}: +#' If you provide a character vector to this argument, it should contain strings with valid +#' evaluation metrics. +#' See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ +#' The "metric" section of the documentation} +#' for a list of valid metrics. +#' } +#' \item{\bold{b. function}: +#' You can provide a custom evaluation function. This +#' should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named +#' list with three elements: +#' \itemize{ +#' \item{\code{name}: A string with the name of the metric, used for printing +#' and storing results. +#' } +#' \item{\code{value}: A single number indicating the value of the metric for the +#' given predictions and true values +#' } +#' \item{ +#' \code{higher_better}: A boolean indicating whether higher values indicate a better fit. +#' For example, this would be \code{FALSE} for metrics like MAE or RMSE. +#' } +#' } +#' } +#' \item{\bold{c. list}: +#' If a list is given, it should only contain character vectors and functions. +#' These should follow the requirements from the descriptions above. +#' } +#' } #' @param eval_freq evaluation output frequency, only effect when verbose > 0 #' @param init_model path of model file of \code{lgb.Booster} object, will continue training from this model #' @param nrounds number of training rounds +#' @param obj objective function, can be character or custom objective function. Examples include +#' \code{regression}, \code{regression_l1}, \code{huber}, +#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass} #' @param params List of parameters #' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training +#' @section Early Stopping: +#' +#' "early stopping" refers to stopping the training process if the model's performance on a given +#' validation set does not improve for several consecutive iterations. +#' +#' If multiple arguments are given to \code{eval}, their order will be preserved. If you enable +#' early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all +#' metrics will be considered for early stopping. +#' +#' If you want to only consider the first metric for early stopping, pass +#' \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} +#' in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, +#' a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) +#' or \code{objective} (passed into \code{params}). #' @keywords internal NULL @@ -47,6 +97,7 @@ NULL #' the number of real CPU cores, not the number of threads (most #' CPU using hyper-threading to generate 2 threads per CPU core).} #' } +#' @inheritSection lgb_shared_params Early Stopping #' @export lightgbm <- function(data, label = NULL, diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 770598810d2e..b3816a6debd7 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -352,10 +352,6 @@ lgb.check.eval <- function(params, eval) { ) } - if (identical(class(eval), "list")) { - params$metric <- append(params$metric, unlist(eval)) - } - # duplicate metrics should be filtered out params$metric <- as.list(unique(unlist(params$metric))) diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index d4f62c2b2207..987b04ec1a0a 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -45,9 +45,41 @@ may allow you to pass other types of data like \code{matrix} and then separately \item{obj}{objective function, can be character or custom objective function. Examples include \code{regression}, \code{regression_l1}, \code{huber}, - \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} - -\item{eval}{evaluation function, can be (list of) character or custom eval function} +\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} + +\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of + strings and functions. + + \itemize{ + \item{\bold{a. character vector}: + If you provide a character vector to this argument, it should contain strings with valid + evaluation metrics. + See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ + The "metric" section of the documentation} + for a list of valid metrics. + } + \item{\bold{b. function}: + You can provide a custom evaluation function. This + should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named + list with three elements: + \itemize{ + \item{\code{name}: A string with the name of the metric, used for printing + and storing results. + } + \item{\code{value}: A single number indicating the value of the metric for the + given predictions and true values + } + \item{ + \code{higher_better}: A boolean indicating whether higher values indicate a better fit. + For example, this would be \code{FALSE} for metrics like MAE or RMSE. + } + } + } + \item{\bold{c. list}: + If a list is given, it should only contain character vectors and functions. + These should follow the requirements from the descriptions above. + } + }} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} @@ -99,6 +131,23 @@ a trained model \code{lgb.CVBooster}. \description{ Cross validation logic used by LightGBM } +\section{Early Stopping}{ + + + "early stopping" refers to stopping the training process if the model's performance on a given + validation set does not improve for several consecutive iterations. + + If multiple arguments are given to \code{eval}, their order will be preserved. If you enable + early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all + metrics will be considered for early stopping. + + If you want to only consider the first metric for early stopping, pass + \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} + in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, + a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) + or \code{objective} (passed into \code{params}). +} + \examples{ \dontrun{ data(agaricus.train, package = "lightgbm") diff --git a/R-package/man/lgb_shared_params.Rd b/R-package/man/lgb_shared_params.Rd index 501997fd2e2d..ed1922bc2225 100644 --- a/R-package/man/lgb_shared_params.Rd +++ b/R-package/man/lgb_shared_params.Rd @@ -16,12 +16,50 @@ and one metric. If there's more than one, will check all of them except the training data. Returns the model with (best_iter + early_stopping_rounds). If early stopping occurs, the model will have 'best_iter' field.} +\item{eval}{evaluation function(s). This can be a character vector, function, or list with a mixture of + strings and functions. + + \itemize{ + \item{\bold{a. character vector}: + If you provide a character vector to this argument, it should contain strings with valid + evaluation metrics. + See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric}{ + The "metric" section of the documentation} + for a list of valid metrics. + } + \item{\bold{b. function}: + You can provide a custom evaluation function. This + should accept the keyword arguments \code{preds} and \code{dtrain} and should return a named + list with three elements: + \itemize{ + \item{\code{name}: A string with the name of the metric, used for printing + and storing results. + } + \item{\code{value}: A single number indicating the value of the metric for the + given predictions and true values + } + \item{ + \code{higher_better}: A boolean indicating whether higher values indicate a better fit. + For example, this would be \code{FALSE} for metrics like MAE or RMSE. + } + } + } + \item{\bold{c. list}: + If a list is given, it should only contain character vectors and functions. + These should follow the requirements from the descriptions above. + } + }} + \item{eval_freq}{evaluation output frequency, only effect when verbose > 0} \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} \item{nrounds}{number of training rounds} +\item{obj}{objective function, can be character or custom objective function. Examples include +\code{regression}, \code{regression_l1}, \code{huber}, +\code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}} + \item{params}{List of parameters} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} @@ -29,4 +67,21 @@ If early stopping occurs, the model will have 'best_iter' field.} \description{ Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm} } +\section{Early Stopping}{ + + + "early stopping" refers to stopping the training process if the model's performance on a given + validation set does not improve for several consecutive iterations. + + If multiple arguments are given to \code{eval}, their order will be preserved. If you enable + early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all + metrics will be considered for early stopping. + + If you want to only consider the first metric for early stopping, pass + \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} + in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, + a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) + or \code{objective} (passed into \code{params}). +} + \keyword{internal} diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index 256a7dc6e8e9..13806fbc05a7 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -74,3 +74,20 @@ List of callback functions that are applied at each iteration.} \description{ Simple interface for training a LightGBM model. } +\section{Early Stopping}{ + + + "early stopping" refers to stopping the training process if the model's performance on a given + validation set does not improve for several consecutive iterations. + + If multiple arguments are given to \code{eval}, their order will be preserved. If you enable + early stopping by setting \code{early_stopping_rounds} in \code{params}, by default all + metrics will be considered for early stopping. + + If you want to only consider the first metric for early stopping, pass + \code{first_metric_only = TRUE} in \code{params}. Note that if you also specify \code{metric} + in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, + a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) + or \code{objective} (passed into \code{params}). +} + diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 2c82037a7cbb..69a7fb188beb 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -1557,4 +1557,3 @@ test_that(paste0("lgb.train() gives same results when using interaction_constrai expect_equal(pred1, pred2) }) - From 62f4e793229e0368bb86f66a0645f5df883eba37 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 31 Aug 2020 01:02:04 -0500 Subject: [PATCH 09/10] fixes for lgb.cv() --- R-package/R/lgb.cv.R | 23 ++-- R-package/tests/testthat/test_basic.R | 166 ++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 8 deletions(-) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 4734d4bad743..671c0f10a850 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -393,7 +393,13 @@ lgb.cv <- function(params = list() # When early stopping is not activated, we compute the best iteration / score ourselves # based on the first first metric if (record && is.na(env$best_score)) { - first_metric <- cv_booster$boosters[[1L]][[1L]]$.__enclos_env__$private$eval_names[1L] + # when using a custom eval function, the metric name is returned from the + # function, so figure it out from record_evals + if (!is.null(eval_functions[1L])) { + first_metric <- names(cv_booster$record_evals[["valid"]])[1L] + } else { + first_metric <- cv_booster$.__enclos_env__$private$eval_names[1L] + } .find_best <- which.min if (isTRUE(env$eval_list[[1L]]$higher_better[1L])) { .find_best <- which.max @@ -585,7 +591,8 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) { msg[[i]][[j]]$value })) }) - # Get evaluation + # Get evaluation. Just taking the first element here to + # get structture (name, higher_bettter, data_name) ret_eval <- msg[[1L]] # Go through evaluation length items @@ -593,6 +600,7 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) { ret_eval[[j]]$value <- mean(eval_result[[j]]) } + # Preinit evaluation error ret_eval_err <- NULL # Check for standard deviation @@ -611,11 +619,10 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) { } - return({ - list( - eval_list = ret_eval - , eval_err_list = ret_eval_err - ) - }) + # Return errors + list( + eval_list = ret_eval + , eval_err_list = ret_eval_err + ) } diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 69a7fb188beb..7a7545ce1f60 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -1458,6 +1458,172 @@ test_that("using lightgbm() without early stopping, best_iter and best_score com expect_identical(bst$best_score, auc_scores[which.max(auc_scores)]) }) +test_that("lgb.cv() works when you specify both 'metric' and 'eval' with strings", { + set.seed(708L) + nrounds <- 10L + nfolds <- 4L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.cv( + params = list( + objective = "binary" + , metric = "binary_error" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nrounds = nrounds + , nfold = nfolds + , eval = "binary_logloss" + ) + + # both metrics should have been used + expect_named( + bst$record_evals[["valid"]] + , expected = c("binary_error", "binary_logloss") + , ignore.order = TRUE + , ignore.case = FALSE + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid"]] + expect_true(abs(results[["binary_error"]][["eval"]][[1L]] - 0.5005654) < TOLERANCE) + expect_true(abs(results[["binary_logloss"]][["eval"]][[1L]] - 0.7011232) < TOLERANCE) + + # all boosters should have been created + expect_length(bst$boosters, nfolds) +}) + +test_that("lgb.cv() works when you give a function for eval", { + set.seed(708L) + nrounds <- 10L + nfolds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.cv( + params = list( + objective = "binary" + , metric = "None" + ) + , data = DTRAIN_RANDOM_CLASSIFICATION + , nfold = nfolds + , nrounds = nrounds + , eval = .constant_metric + ) + + # the difference metrics shouldn't have been mixed up with each other + results <- bst$record_evals[["valid"]] + expect_true(abs(results[["constant_metric"]][["eval"]][[1L]] - CONSTANT_METRIC_VALUE) < TOLERANCE) + expect_named(results, "constant_metric") +}) + +test_that("If first_metric_only is TRUE, lgb.cv() decides to stop early based on only the first metric", { + set.seed(708L) + nrounds <- 10L + nfolds <- 5L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.cv( + params = list( + objective = "regression" + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + , first_metric_only = TRUE + ) + , data = DTRAIN_RANDOM_REGRESSION + , nfold = nfolds + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_REGRESSION + ) + , eval = list( + .increasing_metric + , .constant_metric + ) + ) + + # Only the two functions provided to "eval" should have been evaluated + expect_named(bst$record_evals[["valid"]], c("increasing_metric", "constant_metric")) + + # all 10 iterations should happen, and the best_iter should be the final one + expect_equal(bst$best_iter, nrounds) + + # best_score should be taken from "increasing_metric" + # + # this expected value looks magical and confusing, but it's because + # evaluation metrics are averaged over all folds. + # + # consider 5-fold CV with a metric that adds 0.1 to a global accumulator + # each time it's called + # + # * iter 1: [0.1, 0.2, 0.3, 0.4, 0.5] (mean = 0.3) + # * iter 2: [0.6, 0.7, 0.8, 0.9, 1.0] (mean = 1.3) + # * iter 3: [1.1, 1.2, 1.3, 1.4, 1.5] (mean = 1.8) + # + cv_value <- increasing_metric_starting_value + mean(seq_len(nfolds) / 10.0) + (nrounds - 1L) * 0.1 * nfolds + expect_equal(bst$best_score, cv_value) + + # early stopping should not have happened. Even though constant_metric + # had 9 consecutive iterations with no improvement, it is ignored because of + # first_metric_only = TRUE + expect_equal( + length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]]) + , nrounds + ) + expect_equal( + length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]]) + , nrounds + ) +}) + +test_that("early stopping works with lgb.cv()", { + set.seed(708L) + nrounds <- 10L + nfolds <- 5L + early_stopping_rounds <- 3L + increasing_metric_starting_value <- get(ACCUMULATOR_NAME, envir = .GlobalEnv) + bst <- lgb.cv( + params = list( + objective = "regression" + , metric = "None" + , early_stopping_rounds = early_stopping_rounds + , first_metric_only = TRUE + ) + , data = DTRAIN_RANDOM_REGRESSION + , nfold = nfolds + , nrounds = nrounds + , valids = list( + "valid1" = DVALID_RANDOM_REGRESSION + ) + , eval = list( + .constant_metric + , .increasing_metric + ) + ) + + # only the two functions provided to "eval" should have been evaluated + expect_named(bst$record_evals[["valid"]], c("constant_metric", "increasing_metric")) + + # best_iter should be based on the first metric. Since constant_metric + # never changes, its first iteration was the best oone + expect_equal(bst$best_iter, 1L) + + # best_score should be taken from the first metri + expect_equal(bst$best_score, 0.2) + + # early stopping should have happened, since constant_metric was the first + # one passed to eval and it will not improve over consecutive iterations + # + # note that this test is identical to the previous one, but with the + # order of the eval metrics switched + expect_equal( + length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]]) + , early_stopping_rounds + 1 + ) + expect_equal( + length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]]) + , early_stopping_rounds + 1 + ) +}) + +context("interaction constraints") + test_that("lgb.train() throws an informative error if interaction_constraints is not a list", { dtrain <- lgb.Dataset(train$data, label = train$label) params <- list(objective = "regression", interaction_constraints = "[1,2],[3]") From ea88e44a90c97b15451391c44e9642dc038acca0 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 31 Aug 2020 20:42:01 -0500 Subject: [PATCH 10/10] fix linting --- R-package/tests/testthat/test_basic.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 7a7545ce1f60..638981e60cdf 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -1614,11 +1614,11 @@ test_that("early stopping works with lgb.cv()", { # order of the eval metrics switched expect_equal( length(bst$record_evals[["valid"]][["constant_metric"]][["eval"]]) - , early_stopping_rounds + 1 + , early_stopping_rounds + 1L ) expect_equal( length(bst$record_evals[["valid"]][["increasing_metric"]][["eval"]]) - , early_stopping_rounds + 1 + , early_stopping_rounds + 1L ) })