feat: require unique learner ids in benchmark_grid (#1195)

* feat: require unique learner ids in benchmark_grid() * ... * ... * ... * ... * ...
mlr-org · Nov 5, 2024 · 0c46b69 · 0c46b69
1 parent 2e5267a
commit 0c46b69
Show file tree

Hide file tree

Showing 13 changed files with 40 additions and 37 deletions.
diff --git a/R/assertions.R b/R/assertions.R
@@ -107,7 +107,13 @@ test_matching_task_type = function(task_type, object, class) {
 #' @export
 #' @param learners (list of [Learner]).
 #' @rdname mlr_assertions
-assert_learners = function(learners, task = NULL, task_type = NULL, properties = character(), .var.name = vname(learners)) {
+assert_learners = function(learners, task = NULL, task_type = NULL, properties = character(), unique_ids = FALSE, .var.name = vname(learners)) {
+  if (unique_ids)  {
+    ids = map_chr(learners, "id")
+    if (!test_character(ids, unique = TRUE)) {
+      stopf("Learners need to have unique IDs: %s", str_collapse(ids))
+    }
+  }
   invisible(lapply(learners, assert_learner, task = task, task_type = NULL, properties = properties, .var.name = .var.name))
 }
 

diff --git a/R/benchmark.R b/R/benchmark.R
@@ -128,10 +128,6 @@ benchmark = function(design, store_models = FALSE, store_backends = TRUE, encaps
     # learner = assert_learner(as_learner(learner, clone = TRUE))
     assert_learnable(task, learner)
 
-    if (resampling$task_hash != task$hash) {
-      stopf("Resampling '%s' was not instantiated with task '%s'", resampling$id, task$id)
-    }
-
     iters = resampling$iters
     n_params = max(1L, length(param_values))
     # insert constant values

diff --git a/R/benchmark_grid.R b/R/benchmark_grid.R
@@ -67,7 +67,7 @@
 #'
 benchmark_grid = function(tasks, learners, resamplings, param_values = NULL, paired = FALSE) {
   tasks = assert_tasks(as_tasks(tasks))
-  learners = assert_learners(as_learners(learners))
+  learners = assert_learners(as_learners(learners), unique_ids = TRUE)
   resamplings = assert_resamplings(as_resamplings(resamplings))
   if (!is.null(param_values)) {
     assert_param_values(param_values, n_learners = length(learners))
@@ -103,7 +103,8 @@ benchmark_grid = function(tasks, learners, resamplings, param_values = NULL, pai
       if (!identical(task_nrow, unique(map_int(resamplings, "task_nrow")))) {
         stop("A Resampling is instantiated for a task with a different number of observations")
       }
-      instances = pmap(grid, function(task, resampling) resamplings[[resampling]]$clone())
+      # clone resamplings for each task and update task hashes
+      instances = pmap(grid, function(task, resampling) resampling = resamplings[[resampling]]$clone())
     } else {
       instances = pmap(grid, function(task, resampling) resamplings[[resampling]]$clone()$instantiate(tasks[[task]]))
     }

diff --git a/R/helper_hashes.R b/R/helper_hashes.R
@@ -31,7 +31,7 @@ resampling_task_hashes = function(task, resampling, learner = NULL) {
 task_hash = function(task, use_ids, test_ids = NULL, ignore_internal_valid_task = FALSE) {
   # order matters: we first check for test_ids and then for the internal_valid_task
   internal_valid_task_hash = if (!is.null(test_ids)) {
-    # this does the same as 
+    # this does the same as
     # task$internal_valid_task = test_ids
     # $internal_valid_task$hash
     # but avoids the deep clone
@@ -40,6 +40,12 @@ task_hash = function(task, use_ids, test_ids = NULL, ignore_internal_valid_task
     task$internal_valid_task$hash
   }
 
-  calculate_hash(class(task), task$id, task$backend$hash, task$col_info, use_ids, task$col_roles,
-    get_private(task)$.properties, internal_valid_task_hash)
+  calculate_hash(
+    class(task),
+    task$id,
+    task$backend$hash,
+    task$col_info,
+    use_ids,
+    get_private(task)$.properties,
+    internal_valid_task_hash)
 }
diff --git a/R/resample.R b/R/resample.R
@@ -71,10 +71,6 @@ resample = function(task, learner, resampling, store_models = FALSE, store_backe
     resampling = resampling$instantiate(task)
   }
 
-  if (resampling$task_hash != task$hash) {
-    stopf("Resampling '%s' was not instantiated with task '%s'", resampling$id, task$id)
-  }
-
   n = resampling$iters
   pb = if (isNamespaceLoaded("progressr")) {
     # NB: the progress bar needs to be created in this env

diff --git a/man/Resampling.Rd b/man/Resampling.Rd
diff --git a/man/Task.Rd b/man/Task.Rd
diff --git a/man/mlr_assertions.Rd b/man/mlr_assertions.Rd
diff --git a/tests/testthat/test_benchmark.R b/tests/testthat/test_benchmark.R
@@ -463,7 +463,7 @@ test_that("param_values in benchmark", {
 
 
   # benchmark grid with multiple params and multiple learners
-  design = benchmark_grid(tasks, lrns(c("classif.debug", "classif.debug")), rsmp("holdout"), param_values = list(list(list(x = 1), list(x = 0.5)), list()))
+  design = benchmark_grid(tasks, lrns(c("classif.debug", "classif.rpart")), rsmp("holdout"), param_values = list(list(list(x = 1), list(x = 0.5)), list()))
   bmr = benchmark(design)
   expect_benchmark_result(bmr)
   expect_equal(bmr$n_resample_results, 3)
@@ -582,14 +582,11 @@ test_that("score works with predictions and empty predictions", {
   expect_equal(tab$classif.ce[1], NaN)
 })
 
-test_that("resampling was instantiated on the task", {
+test_that("benchmark_grid only allows unique learner ids", {
+  task = tsk("iris")
   learner = lrn("classif.rpart")
-  task = tsk("pima")
-  resampling = rsmp("cv", folds = 5)
-  resampling$instantiate(task)
-  task = tsk("spam")
-
-  design = data.table(task = list(task), learner = list(learner), resampling = list(resampling))
+  resampling = rsmp("holdout")
 
-  expect_error(benchmark(design), "not instantiated")
+  expect_error(benchmark_grid(task, list(learner, learner), resampling), "unique")
 })
+
diff --git a/tests/testthat/test_hotstart.R b/tests/testthat/test_hotstart.R
@@ -154,7 +154,7 @@ test_that("learners are hotstarted when benchmark is called", {
   resampling = rsmp("cv", folds = 3)
   resampling$instantiate(task)
 
-  design = benchmark_grid(task, list(learner_1, learner_2), resampling)
+  design = data.table(task = list(task), learner = list(learner_1, learner_2), resampling = list(resampling))
   bmr = benchmark(design, store_models = TRUE)
 
   learners = unlist(map(seq_len(bmr$n_resample_results), function(i) bmr$resample_result(i)$learners))
@@ -183,7 +183,7 @@ test_that("learners are trained and hotstarted when benchmark is called", {
   resampling = rsmp("cv", folds = 3)
   resampling$instantiate(task)
 
-  design = benchmark_grid(task, list(learner_1, learner_2), resampling)
+  design = data.table(task = list(task), learner = list(learner_1, learner_2), resampling = list(resampling))
   bmr = benchmark(design, store_models = TRUE)
 
   learners = unlist(map(seq_len(bmr$n_resample_results), function(i) bmr$resample_result(i)$learners))

diff --git a/tests/testthat/test_parallel.R b/tests/testthat/test_parallel.R
@@ -46,7 +46,7 @@ test_that("parallel benchmark", {
   expect_equal(bmr$aggregate(conditions = TRUE)$warnings, 0L)
   expect_equal(bmr$aggregate(conditions = TRUE)$errors, 0L)
 
-  grid = benchmark_grid(list(tsk("wine"), tsk("sonar")), replicate(2, lrn("classif.debug")), rsmp("cv", folds = 2))
+  grid = benchmark_grid(list(tsk("wine"), tsk("sonar")), list(lrn("classif.debug", id = "learner_1"), lrn("classif.debug", id = "learner_2")), rsmp("cv", folds = 2))
   njobs = 3L
   bmr = with_future(future::multisession, {
     benchmark(grid, store_models = TRUE)

diff --git a/tests/testthat/test_resample.R b/tests/testthat/test_resample.R
@@ -376,6 +376,7 @@ test_that("can even use internal_valid predict set on learners that don't suppor
   task = tsk("mtcars")
   task$internal_valid_task = 1:10
   rr = resample(task, lrn("regr.debug", predict_sets = "internal_valid"), rsmp("holdout"))
+  expect_warning(rr$score(), "only predicted on sets")
 })
 
 test_that("callr during prediction triggers marshaling", {
@@ -511,12 +512,3 @@ test_that("predict_time is 0 if no predict_set is specified", {
   expect_true(all(times == 0))
 })
 
-test_that("resampling was instantiated on the task", {
-  learner = lrn("classif.rpart")
-  task = tsk("pima")
-  resampling = rsmp("cv", folds = 5)
-  resampling$instantiate(task)
-  task = tsk("spam")
-
-  expect_error(resample(task, learner, resampling), "not instantiated")
-})
diff --git a/tests/testthat/test_resultdata.R b/tests/testthat/test_resultdata.R
@@ -48,13 +48,14 @@ test_that("results are ordered", {
 
 test_that("mlr3tuning use case", {
   task = tsk("iris")
-  learners = lrns(c("classif.rpart", "classif.rpart", "classif.rpart"))
+  learners = replicate(3, lrn("classif.rpart"), simplify = FALSE)
   learners[[1]]$param_set$values = list(xval = 0, cp = 0.1)
   learners[[2]]$param_set$values = list(xval = 0, cp = 0.2)
   learners[[3]]$param_set$values = list(xval = 0, cp = 0.3)
   resampling = rsmp("holdout")
+  resampling$instantiate(task)
 
-  bmr = benchmark(benchmark_grid(task, learners, resampling))
+  bmr = benchmark(data.table(task = list(task), learner = learners, resampling = list(resampling)))
 
   rdata = get_private(bmr)$.data