From 47f8cda6497be88c79a8360472e09ec9f2a89dc8 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Mon, 16 Oct 2023 16:43:30 +0200 Subject: [PATCH] fix: don't use delayedAssign this simply was not necessary --- R/learner_CoxBoost_surv_coxboost.R | 153 +++++---- R/learner_CoxBoost_surv_cv_coxboost.R | 271 ++++++++-------- R/learner_aorsf_surv_aorsf.R | 291 +++++++++-------- R/learner_flexsurv_surv_flexible.R | 133 ++++---- R/learner_gbm_surv_gbm.R | 143 ++++----- R/learner_glmnet_surv_cv_glmnet.R | 187 ++++++----- R/learner_glmnet_surv_glmnet.R | 183 ++++++----- R/learner_gss_dens_spline.R | 127 ++++---- R/learner_ks_dens_kde_ks.R | 113 ++++--- R/learner_locfit_dens_locfit.R | 119 ++++--- R/learner_logspline_dens_logspline.R | 123 ++++--- R/learner_mboost_surv_blackboost.R | 353 ++++++++++----------- R/learner_mboost_surv_gamboost.R | 314 +++++++++--------- R/learner_mboost_surv_glmboost.R | 305 +++++++++--------- R/learner_mboost_surv_mboost.R | 303 +++++++++--------- R/learner_np_dens_mixed.R | 159 +++++----- R/learner_obliqueRSF_surv_obliqueRSF.R | 145 +++++---- R/learner_partykit_surv_cforest.R | 243 +++++++------- R/learner_partykit_surv_ctree.R | 209 ++++++------ R/learner_penalized_surv_penalized.R | 147 +++++---- R/learner_pendensity_dens_pen.R | 129 ++++---- R/learner_plugdensity_dens_plug.R | 79 +++-- R/learner_randomForestSRC_surv_rfsrc.R | 315 +++++++++--------- R/learner_ranger_surv_ranger.R | 177 +++++------ R/learner_sm_dens_nonpar.R | 109 ++++--- R/learner_survival_surv_nelson.R | 67 ++-- R/learner_survival_surv_parametric.R | 165 +++++----- R/learner_survivalmodels_surv_akritas.R | 95 +++--- R/learner_survivalmodels_surv_coxtime.R | 229 +++++++------- R/learner_survivalmodels_surv_deephit.R | 245 +++++++------- R/learner_survivalmodels_surv_deepsurv.R | 217 +++++++------ R/learner_survivalmodels_surv_dnnsurv.R | 215 +++++++------ R/learner_survivalmodels_surv_loghaz.R | 241 +++++++------- R/learner_survivalmodels_surv_pchazard.R | 239 +++++++------- R/learner_survivalsvm_surv_svm.R | 143 ++++----- R/learner_xgboost_surv_xgboost.R | 387 +++++++++++------------ 36 files changed, 3483 insertions(+), 3590 deletions(-) diff --git a/R/learner_CoxBoost_surv_coxboost.R b/R/learner_CoxBoost_surv_coxboost.R index 83f5fe867..746ae2c00 100644 --- a/R/learner_CoxBoost_surv_coxboost.R +++ b/R/learner_CoxBoost_surv_coxboost.R @@ -24,96 +24,93 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvCoxboost", - R6Class("LearnerSurvCoxboost", - inherit = mlr3proba::LearnerSurv, +LearnerSurvCoxboost = R6Class("LearnerSurvCoxboost", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - unpen.index = p_uty(tags = "train"), - standardize = p_lgl(default = TRUE, tags = "train"), - stepno = p_int(default = 100, lower = 0, tags = "train"), - penalty = p_dbl(tags = "train"), - criterion = p_fct(default = "pscore", levels = c("pscore", "score", "hpscore", "hscore"), tags = "train"), - stepsize.factor = p_dbl(default = 1, tags = "train"), - sf.scheme = p_fct(default = "sigmoid", levels = c("sigmoid", "linear"), tags = "train"), - pendistmat = p_uty(tags = "train"), - connected.index = p_uty(tags = "train"), - x.is.01 = p_lgl(default = FALSE, tags = "train"), - return.score = p_lgl(default = TRUE, tags = "train"), - trace = p_lgl(default = FALSE, tags = "train"), - at.step = p_uty(tags = "predict") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + unpen.index = p_uty(tags = "train"), + standardize = p_lgl(default = TRUE, tags = "train"), + stepno = p_int(default = 100, lower = 0, tags = "train"), + penalty = p_dbl(tags = "train"), + criterion = p_fct(default = "pscore", levels = c("pscore", "score", "hpscore", "hscore"), tags = "train"), + stepsize.factor = p_dbl(default = 1, tags = "train"), + sf.scheme = p_fct(default = "sigmoid", levels = c("sigmoid", "linear"), tags = "train"), + pendistmat = p_uty(tags = "train"), + connected.index = p_uty(tags = "train"), + x.is.01 = p_lgl(default = FALSE, tags = "train"), + return.score = p_lgl(default = TRUE, tags = "train"), + trace = p_lgl(default = FALSE, tags = "train"), + at.step = p_uty(tags = "predict") + ) - super$initialize( - # see the mlr3book for a description: https://mlr3book.mlr-org.com/extending-mlr3.html - id = "surv.coxboost", - packages = c("mlr3extralearners", "CoxBoost", "pracma"), - feature_types = c("integer", "numeric"), - predict_types = c("distr", "crank", "lp"), - param_set = ps, - properties = "weights", - man = "mlr3extralearners::mlr_learners_surv.coxboost", - label = "Likelihood-based Boosting" - ) - } - ), + super$initialize( + # see the mlr3book for a description: https://mlr3book.mlr-org.com/extending-mlr3.html + id = "surv.coxboost", + packages = c("mlr3extralearners", "CoxBoost", "pracma"), + feature_types = c("integer", "numeric"), + predict_types = c("distr", "crank", "lp"), + param_set = ps, + properties = "weights", + man = "mlr3extralearners::mlr_learners_surv.coxboost", + label = "Likelihood-based Boosting" + ) + } + ), - private = list( - .train = function(task) { - pars = self$param_set$get_values(tags = "train") + private = list( + .train = function(task) { + pars = self$param_set$get_values(tags = "train") - if ("weights" %in% task$properties) { - pars$weights = as.numeric(task$weights$weight) - } + if ("weights" %in% task$properties) { + pars$weights = as.numeric(task$weights$weight) + } - data = task$data() - tn = task$target_names - time = data[[tn[1L]]] - status = data[[tn[2L]]] - data = as.matrix(data[, !tn, with = FALSE]) + data = task$data() + tn = task$target_names + time = data[[tn[1L]]] + status = data[[tn[2L]]] + data = as.matrix(data[, !tn, with = FALSE]) - with_package("CoxBoost", { - invoke( - CoxBoost::CoxBoost, - time = time, - status = status, - x = data, - .args = pars - ) - }) - }, + with_package("CoxBoost", { + invoke( + CoxBoost::CoxBoost, + time = time, + status = status, + x = data, + .args = pars + ) + }) + }, - .predict = function(task) { + .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") + pars = self$param_set$get_values(tags = "predict") - # get newdata and ensure same ordering in train and predict - newdata = as.matrix(ordered_features(task, self)) + # get newdata and ensure same ordering in train and predict + newdata = as.matrix(ordered_features(task, self)) - lp = as.numeric(invoke(predict, - self$model, - newdata = newdata, - .args = pars, - type = "lp")) + lp = as.numeric(invoke(predict, + self$model, + newdata = newdata, + .args = pars, + type = "lp")) - surv = invoke(predict, - self$model, - newdata = newdata, - .args = pars, - type = "risk", - times = sort(unique(self$model$time))) + surv = invoke(predict, + self$model, + newdata = newdata, + .args = pars, + type = "risk", + times = sort(unique(self$model$time))) - mlr3proba::.surv_return(times = sort(unique(self$model$time)), - surv = surv, - lp = lp) - } - ) + mlr3proba::.surv_return(times = sort(unique(self$model$time)), + surv = surv, + lp = lp) + } ) ) -.extralrns_dict$add("surv.coxboost", function() LearnerSurvCoxboost$new()) +.extralrns_dict$add("surv.coxboost", LearnerSurvCoxboost) diff --git a/R/learner_CoxBoost_surv_cv_coxboost.R b/R/learner_CoxBoost_surv_cv_coxboost.R index e287493e7..9315dab2f 100644 --- a/R/learner_CoxBoost_surv_cv_coxboost.R +++ b/R/learner_CoxBoost_surv_cv_coxboost.R @@ -28,150 +28,147 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvCVCoxboost", - R6Class("LearnerSurvCVCoxboost", - inherit = mlr3proba::LearnerSurv, - - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - maxstepno = p_int(default = 100, lower = 0, tags = "train"), - K = p_int(default = 10, lower = 2, tags = "train"), - type = p_fct(default = "verweij", levels = c("verweij", "naive"), tags = "train"), - folds = p_uty(default = NULL, tags = "train"), - minstepno = p_int(default = 50, lower = 0, tags = "train"), - start.penalty = p_dbl(tags = "train"), - iter.max = p_int(default = 10, lower = 1, tags = "train"), - upper.margin = p_dbl(default = 0.05, lower = 0, upper = 1, tags = "train"), - unpen.index = p_uty(tags = "train"), - standardize = p_lgl(default = TRUE, tags = "train"), - penalty = p_dbl(special_vals = list("optimCoxBoostPenalty"), tags = "train"), - criterion = p_fct(default = "pscore", levels = c("pscore", "score", "hpscore", "hscore"), tags = "train"), - stepsize.factor = p_dbl(default = 1, tags = "train"), - sf.scheme = p_fct(default = "sigmoid", levels = c("sigmoid", "linear"), tags = "train"), - pendistmat = p_uty(tags = "train"), - connected.index = p_uty(tags = "train"), - x.is.01 = p_lgl(default = FALSE, tags = "train"), - return.score = p_lgl(default = TRUE, tags = "train"), - trace = p_lgl(default = FALSE, tags = "train"), - at.step = p_uty(tags = "predict") - ) - - super$initialize( - # see the mlr3book for a description: https://mlr3book.mlr-org.com/extending-mlr3.html - id = "surv.cv_coxboost", - packages = c("mlr3extralearners", "CoxBoost", "pracma"), - feature_types = c("integer", "numeric"), - predict_types = c("distr", "crank", "lp"), - param_set = ps, - properties = "weights", - # the help file name is the one used as @name in the roxygen2 block - man = "mlr3extralearners::mlr_learners_surv.cv_coxboost", - label = "Likelihood-based Boosting" - ) +LearnerSurvCVCoxboost = R6Class("LearnerSurvCVCoxboost", + inherit = mlr3proba::LearnerSurv, + + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + maxstepno = p_int(default = 100, lower = 0, tags = "train"), + K = p_int(default = 10, lower = 2, tags = "train"), + type = p_fct(default = "verweij", levels = c("verweij", "naive"), tags = "train"), + folds = p_uty(default = NULL, tags = "train"), + minstepno = p_int(default = 50, lower = 0, tags = "train"), + start.penalty = p_dbl(tags = "train"), + iter.max = p_int(default = 10, lower = 1, tags = "train"), + upper.margin = p_dbl(default = 0.05, lower = 0, upper = 1, tags = "train"), + unpen.index = p_uty(tags = "train"), + standardize = p_lgl(default = TRUE, tags = "train"), + penalty = p_dbl(special_vals = list("optimCoxBoostPenalty"), tags = "train"), + criterion = p_fct(default = "pscore", levels = c("pscore", "score", "hpscore", "hscore"), tags = "train"), + stepsize.factor = p_dbl(default = 1, tags = "train"), + sf.scheme = p_fct(default = "sigmoid", levels = c("sigmoid", "linear"), tags = "train"), + pendistmat = p_uty(tags = "train"), + connected.index = p_uty(tags = "train"), + x.is.01 = p_lgl(default = FALSE, tags = "train"), + return.score = p_lgl(default = TRUE, tags = "train"), + trace = p_lgl(default = FALSE, tags = "train"), + at.step = p_uty(tags = "predict") + ) + + super$initialize( + # see the mlr3book for a description: https://mlr3book.mlr-org.com/extending-mlr3.html + id = "surv.cv_coxboost", + packages = c("mlr3extralearners", "CoxBoost", "pracma"), + feature_types = c("integer", "numeric"), + predict_types = c("distr", "crank", "lp"), + param_set = ps, + properties = "weights", + # the help file name is the one used as @name in the roxygen2 block + man = "mlr3extralearners::mlr_learners_surv.cv_coxboost", + label = "Likelihood-based Boosting" + ) + } + ), + + private = list( + .train = function(task) { + pars = self$param_set$get_values(tags = "train") + + optim_args = c("minstepno", "start.penalty", "iter.max", "upper.margin", "penalty") + cv_args = c("maxstepno", "K", "type", "folds") + + opt_pars = pars[names(pars) %in% optim_args] + cv_pars = pars[names(pars) %in% cv_args] + cox_pars = pars[names(pars) %nin% c(names(opt_pars), names(cv_pars))] + + if ("weights" %in% task$properties) { + cox_pars$weights = as.numeric(task$weights$weight) } - ), - private = list( - .train = function(task) { - pars = self$param_set$get_values(tags = "train") - - optim_args = c("minstepno", "start.penalty", "iter.max", "upper.margin", "penalty") - cv_args = c("maxstepno", "K", "type", "folds") - - opt_pars = pars[names(pars) %in% optim_args] - cv_pars = pars[names(pars) %in% cv_args] - cox_pars = pars[names(pars) %nin% c(names(opt_pars), names(cv_pars))] - - if ("weights" %in% task$properties) { - cox_pars$weights = as.numeric(task$weights$weight) + data = task$data() + tn = task$target_names + time = data[[tn[1L]]] + status = data[[tn[2L]]] + data = as.matrix(data[, !tn, with = FALSE]) + + pen_optim = FALSE + if (!is.null(opt_pars$penalty)) { + if (opt_pars$penalty == "optimCoxBoostPenalty") { + pen_optim = TRUE + opt_pars$penalty = NULL } + } else { + cv_pars = insert_named(cv_pars, list(penalty = NULL)) + } - data = task$data() - tn = task$target_names - time = data[[tn[1L]]] - status = data[[tn[2L]]] - data = as.matrix(data[, !tn, with = FALSE]) - - pen_optim = FALSE - if (!is.null(opt_pars$penalty)) { - if (opt_pars$penalty == "optimCoxBoostPenalty") { - pen_optim = TRUE - opt_pars$penalty = NULL - } + with_package("CoxBoost", { + if (pen_optim) { + optim = invoke( + CoxBoost::optimCoxBoostPenalty, + time = time, + status = status, + x = data, + .args = c(opt_pars, cv_pars) + ) + + return(invoke( + CoxBoost::CoxBoost, + time = time, + status = status, + x = data, + stepno = optim$cv.res$optimal.step, + penalty = optim$penalty, + .args = cox_pars + )) } else { - cv_pars = insert_named(cv_pars, list(penalty = NULL)) + optimal_step = invoke( + CoxBoost::cv.CoxBoost, + time = time, + status = status, + x = data, + .args = c(cv_pars, cox_pars) + )$optimal.step + + return(invoke( + CoxBoost::CoxBoost, + time = time, + status = status, + x = data, + stepno = optimal_step, + .args = cox_pars + )) } + }) + }, - with_package("CoxBoost", { - if (pen_optim) { - optim = invoke( - CoxBoost::optimCoxBoostPenalty, - time = time, - status = status, - x = data, - .args = c(opt_pars, cv_pars) - ) - - return(invoke( - CoxBoost::CoxBoost, - time = time, - status = status, - x = data, - stepno = optim$cv.res$optimal.step, - penalty = optim$penalty, - .args = cox_pars - )) - } else { - optimal_step = invoke( - CoxBoost::cv.CoxBoost, - time = time, - status = status, - x = data, - .args = c(cv_pars, cox_pars) - )$optimal.step - - return(invoke( - CoxBoost::CoxBoost, - time = time, - status = status, - x = data, - stepno = optimal_step, - .args = cox_pars - )) - } - }) - }, - - .predict = function(task) { - - pars = self$param_set$get_values(tags = "predict") - - # get newdata and ensure same ordering in train and predict - newdata = as.matrix(ordered_features(task, self)) - - lp = as.numeric(invoke(predict, - self$model, - newdata = newdata, - .args = pars, - type = "lp")) - - surv = invoke(predict, - self$model, - newdata = newdata, - .args = pars, - type = "risk", - times = sort(unique(self$model$time))) - - mlr3proba::.surv_return(times = sort(unique(self$model$time)), - surv = surv, - lp = lp) - } - ) + .predict = function(task) { + + pars = self$param_set$get_values(tags = "predict") + + # get newdata and ensure same ordering in train and predict + newdata = as.matrix(ordered_features(task, self)) + + lp = as.numeric(invoke(predict, + self$model, + newdata = newdata, + .args = pars, + type = "lp")) + + surv = invoke(predict, + self$model, + newdata = newdata, + .args = pars, + type = "risk", + times = sort(unique(self$model$time))) + + mlr3proba::.surv_return(times = sort(unique(self$model$time)), + surv = surv, + lp = lp) + } ) ) -.extralrns_dict$add("surv.cv_coxboost", function() LearnerSurvCVCoxboost$new()) +.extralrns_dict$add("surv.cv_coxboost", LearnerSurvCVCoxboost) diff --git a/R/learner_aorsf_surv_aorsf.R b/R/learner_aorsf_surv_aorsf.R index 2bdee937c..d76bbeed9 100644 --- a/R/learner_aorsf_surv_aorsf.R +++ b/R/learner_aorsf_surv_aorsf.R @@ -25,156 +25,153 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvAorsf", - R6Class("LearnerSurvAorsf", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - n_tree = p_int(default = 500L, lower = 1L, tags = "train"), - n_split = p_int(default = 5L, lower = 1L, tags = "train"), - n_retry = p_int(default = 3L, lower = 0L, tags = "train"), - mtry = p_int(default = NULL, lower = 1L, - special_vals = list(NULL), tags = "train"), - mtry_ratio = p_dbl(lower = 0, upper = 1, tags = "train"), - control_type = p_fct(levels = c("fast", "cph", "net"), - default = "fast", tags = "train"), - control_fast_do_scale = p_lgl(default = TRUE, tags = "train"), - control_fast_method = p_fct(levels = c("efron", "breslow"), - default = "efron", tags = "train"), - control_cph_method = p_fct(levels = c("efron", "breslow"), - default = "efron", tags = "train"), - control_cph_eps = p_dbl(default = 1e-9, lower = 0, tags = "train"), - control_cph_iter_max = p_int(default = 20L, lower = 1, tags = "train"), - control_net_alpha = p_dbl(default = 0.5, tags = "train"), - control_net_df_target = p_int(default = NULL, lower = 1L, - special_vals = list(NULL), - tags = "train"), - leaf_min_events = p_int(default = 1L, lower = 1L, tags = "train"), - leaf_min_obs = p_int(default = 5L, lower = 1L, tags = "train"), - split_min_events = p_int(default = 5L, lower = 1L, tags = "train"), - split_min_obs = p_int(default = 10, lower = 1L, tags = "train"), - split_min_stat = p_dbl(default = 3.841459, lower = 0, tags = "train"), - oobag_pred_type = p_fct(levels = c("none", "surv", "risk", "chf"), - default = "surv", tags = "train"), - importance = p_fct(levels = c("none", "anova", "negate", "permute"), - default = "anova", tags = "train"), - oobag_pred_horizon = p_dbl(default = NULL, special_vals = list(NULL), - tags = "train", lower = 0), - oobag_eval_every = p_int(default = NULL, special_vals = list(NULL), - lower = 1, tags = "train"), - attach_data = p_lgl(default = TRUE, tags = "train"), - verbose_progress = p_lgl(default = FALSE, tags = "train"), - na_action = p_fct(levels = c("fail", "omit", "impute_meanmode"), default = "fail", tags = "train")) +LearnerSurvAorsf = R6Class("LearnerSurvAorsf", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + n_tree = p_int(default = 500L, lower = 1L, tags = "train"), + n_split = p_int(default = 5L, lower = 1L, tags = "train"), + n_retry = p_int(default = 3L, lower = 0L, tags = "train"), + mtry = p_int(default = NULL, lower = 1L, + special_vals = list(NULL), tags = "train"), + mtry_ratio = p_dbl(lower = 0, upper = 1, tags = "train"), + control_type = p_fct(levels = c("fast", "cph", "net"), + default = "fast", tags = "train"), + control_fast_do_scale = p_lgl(default = TRUE, tags = "train"), + control_fast_method = p_fct(levels = c("efron", "breslow"), + default = "efron", tags = "train"), + control_cph_method = p_fct(levels = c("efron", "breslow"), + default = "efron", tags = "train"), + control_cph_eps = p_dbl(default = 1e-9, lower = 0, tags = "train"), + control_cph_iter_max = p_int(default = 20L, lower = 1, tags = "train"), + control_net_alpha = p_dbl(default = 0.5, tags = "train"), + control_net_df_target = p_int(default = NULL, lower = 1L, + special_vals = list(NULL), + tags = "train"), + leaf_min_events = p_int(default = 1L, lower = 1L, tags = "train"), + leaf_min_obs = p_int(default = 5L, lower = 1L, tags = "train"), + split_min_events = p_int(default = 5L, lower = 1L, tags = "train"), + split_min_obs = p_int(default = 10, lower = 1L, tags = "train"), + split_min_stat = p_dbl(default = 3.841459, lower = 0, tags = "train"), + oobag_pred_type = p_fct(levels = c("none", "surv", "risk", "chf"), + default = "surv", tags = "train"), + importance = p_fct(levels = c("none", "anova", "negate", "permute"), + default = "anova", tags = "train"), + oobag_pred_horizon = p_dbl(default = NULL, special_vals = list(NULL), + tags = "train", lower = 0), + oobag_eval_every = p_int(default = NULL, special_vals = list(NULL), + lower = 1, tags = "train"), + attach_data = p_lgl(default = TRUE, tags = "train"), + verbose_progress = p_lgl(default = FALSE, tags = "train"), + na_action = p_fct(levels = c("fail", "omit", "impute_meanmode"), default = "fail", tags = "train")) - super$initialize( - id = "surv.aorsf", - packages = c("mlr3extralearners", "aorsf", "pracma"), - feature_types = c("integer", "numeric", "factor", "ordered"), - predict_types = c("crank", "distr"), - param_set = ps, - properties = c("oob_error", "importance", "missings"), - man = "mlr3extralearners::mlr_learners_surv.aorsf", - label = "Oblique Random Forest" - ) - }, - #' @description - #' OOB concordance error extracted from the model slot - #' `eval_oobag$stat_values` - #' @return `numeric()`. - oob_error = function() { - nrows = nrow(self$model$eval_oobag$stat_values) - 1 - self$model$eval_oobag$stat_values[nrows, 1L] - }, - #' @description - #' The importance scores are extracted from the model. - #' @return Named `numeric()`. - importance = function() { - if (is.null(self$model)) { - stopf("No model stored") - } - sort(aorsf::orsf_vi(self$model, group_factors = TRUE), - decreasing = TRUE) + super$initialize( + id = "surv.aorsf", + packages = c("mlr3extralearners", "aorsf", "pracma"), + feature_types = c("integer", "numeric", "factor", "ordered"), + predict_types = c("crank", "distr"), + param_set = ps, + properties = c("oob_error", "importance", "missings"), + man = "mlr3extralearners::mlr_learners_surv.aorsf", + label = "Oblique Random Forest" + ) + }, + #' @description + #' OOB concordance error extracted from the model slot + #' `eval_oobag$stat_values` + #' @return `numeric()`. + oob_error = function() { + nrows = nrow(self$model$eval_oobag$stat_values) + 1 - self$model$eval_oobag$stat_values[nrows, 1L] + }, + #' @description + #' The importance scores are extracted from the model. + #' @return Named `numeric()`. + importance = function() { + if (is.null(self$model)) { + stopf("No model stored") } - ), - private = list( - .train = function(task) { - # initialize - pv = self$param_set$get_values(tags = "train") - pv = convert_ratio(pv, "mtry", "mtry_ratio", - length(task$feature_names)) - # helper function to organize aorsf control function inputs - dflt_if_null = function(params, slot_name) { - out = params[[slot_name]] - if (is.null(out)) out <- self$param_set$default[[slot_name]] - out - } - # default value for oobag_eval_every is ntree, but putting - # default = ntree in p_int() above would be problematic, so: - if (is.null(pv$oobag_eval_every)) { - pv$oobag_eval_every = dflt_if_null(pv, "n_tree") - } - control = switch( - dflt_if_null(pv, "control_type"), - "fast" = { - aorsf::orsf_control_fast( - method = dflt_if_null(pv, "control_fast_method"), - do_scale = dflt_if_null(pv, "control_fast_do_scale") - ) - }, - "cph" = { - aorsf::orsf_control_cph( - method = dflt_if_null(pv, "control_cph_method"), - eps = dflt_if_null(pv, "control_cph_eps"), - iter_max = dflt_if_null(pv, "control_cph_iter_max") - ) - }, - "net" = { - aorsf::orsf_control_net( - alpha = dflt_if_null(pv, "control_net_alpha"), - df_target = dflt_if_null(pv, "control_net_df_target") - ) - } - ) - # these parameters are used to organize the control arguments - # above but are not used directly by aorsf::orsf(), so: - pv$control_type = NULL - pv$control_fast_do_scale = NULL - pv$control_fast_method = NULL - pv$control_cph_method = NULL - pv$control_cph_eps = NULL - pv$control_cph_iter_max = NULL - pv$control_net_alpha = NULL - pv$control_net_df_target = NULL - invoke( - aorsf::orsf, - data = task$data(), - formula = task$formula(), - weights = task$weights, - control = control, - no_fit = FALSE, - .args = pv - ) - }, - .predict = function(task) { - pv = self$param_set$get_values(tags = "predict") - time = self$model$data[[task$target_names[1]]] - status = self$model$data[[task$target_names[2]]] - utime = sort(unique(time[status == 1]), decreasing = FALSE) - surv = mlr3misc::invoke(predict, - self$model, - new_data = ordered_features(task, self), - pred_horizon = utime, - pred_type = "surv", - .args = pv - ) - mlr3proba::.surv_return(times = utime, surv = surv) + sort(aorsf::orsf_vi(self$model, group_factors = TRUE), + decreasing = TRUE) + } + ), + private = list( + .train = function(task) { + # initialize + pv = self$param_set$get_values(tags = "train") + pv = convert_ratio(pv, "mtry", "mtry_ratio", + length(task$feature_names)) + # helper function to organize aorsf control function inputs + dflt_if_null = function(params, slot_name) { + out = params[[slot_name]] + if (is.null(out)) out <- self$param_set$default[[slot_name]] + out } - ) + # default value for oobag_eval_every is ntree, but putting + # default = ntree in p_int() above would be problematic, so: + if (is.null(pv$oobag_eval_every)) { + pv$oobag_eval_every = dflt_if_null(pv, "n_tree") + } + control = switch( + dflt_if_null(pv, "control_type"), + "fast" = { + aorsf::orsf_control_fast( + method = dflt_if_null(pv, "control_fast_method"), + do_scale = dflt_if_null(pv, "control_fast_do_scale") + ) + }, + "cph" = { + aorsf::orsf_control_cph( + method = dflt_if_null(pv, "control_cph_method"), + eps = dflt_if_null(pv, "control_cph_eps"), + iter_max = dflt_if_null(pv, "control_cph_iter_max") + ) + }, + "net" = { + aorsf::orsf_control_net( + alpha = dflt_if_null(pv, "control_net_alpha"), + df_target = dflt_if_null(pv, "control_net_df_target") + ) + } + ) + # these parameters are used to organize the control arguments + # above but are not used directly by aorsf::orsf(), so: + pv$control_type = NULL + pv$control_fast_do_scale = NULL + pv$control_fast_method = NULL + pv$control_cph_method = NULL + pv$control_cph_eps = NULL + pv$control_cph_iter_max = NULL + pv$control_net_alpha = NULL + pv$control_net_df_target = NULL + invoke( + aorsf::orsf, + data = task$data(), + formula = task$formula(), + weights = task$weights, + control = control, + no_fit = FALSE, + .args = pv + ) + }, + .predict = function(task) { + pv = self$param_set$get_values(tags = "predict") + time = self$model$data[[task$target_names[1]]] + status = self$model$data[[task$target_names[2]]] + utime = sort(unique(time[status == 1]), decreasing = FALSE) + surv = mlr3misc::invoke(predict, + self$model, + new_data = ordered_features(task, self), + pred_horizon = utime, + pred_type = "surv", + .args = pv + ) + mlr3proba::.surv_return(times = utime, surv = surv) + } ) ) -.extralrns_dict$add("surv.aorsf", function() LearnerSurvAorsf$new()) +.extralrns_dict$add("surv.aorsf", LearnerSurvAorsf) diff --git a/R/learner_flexsurv_surv_flexible.R b/R/learner_flexsurv_surv_flexible.R index a51527fe1..611e26fe7 100644 --- a/R/learner_flexsurv_surv_flexible.R +++ b/R/learner_flexsurv_surv_flexible.R @@ -34,75 +34,72 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvFlexible", - R6Class("LearnerSurvFlexible", - inherit = mlr3proba::LearnerSurv, - - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - bhazard = p_uty(tags = "train"), - k = p_int(default = 0L, lower = 0L, tags = "train"), - knots = p_uty(tags = "train"), - bknots = p_uty(tags = "train"), - scale = p_fct(default = "hazard", levels = c("hazard", "odds", "normal"), tags = "train"), - timescale = p_fct(default = "log", levels = c("log", "identity"), tags = "train"), - inits = p_uty(tags = "train"), - rtrunc = p_uty(tags = "train"), - fixedpars = p_uty(tags = "train"), - cl = p_dbl(default = 0.95, lower = 0, upper = 1, tags = "train"), - maxiter = p_int(default = 30L, tags = "train"), - rel.tolerance = p_dbl(default = 1e-09, tags = "train"), - toler.chol = p_dbl(default = 1e-10, tags = "train"), - debug = p_int(default = 0, lower = 0, upper = 1, tags = "train"), - outer.max = p_int(default = 10L, tags = "train") - ) - - # value of k is changed as the default is equivalent (and a much more inefficient) - # implementation of `surv.parametric` - ps$values = list(k = 1) - - super$initialize( - id = "surv.flexible", - packages = c("mlr3extralearners", "flexsurv", "pracma"), - feature_types = c("logical", "integer", "factor", "numeric"), - predict_types = c("distr", "crank", "lp"), - param_set = ps, - properties = "weights", - man = "mlr3extralearners::mlr_learners_surv.flexible", - label = "Flexible Parametric Splines" - ) +LearnerSurvFlexible = R6Class("LearnerSurvFlexible", + inherit = mlr3proba::LearnerSurv, + + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + bhazard = p_uty(tags = "train"), + k = p_int(default = 0L, lower = 0L, tags = "train"), + knots = p_uty(tags = "train"), + bknots = p_uty(tags = "train"), + scale = p_fct(default = "hazard", levels = c("hazard", "odds", "normal"), tags = "train"), + timescale = p_fct(default = "log", levels = c("log", "identity"), tags = "train"), + inits = p_uty(tags = "train"), + rtrunc = p_uty(tags = "train"), + fixedpars = p_uty(tags = "train"), + cl = p_dbl(default = 0.95, lower = 0, upper = 1, tags = "train"), + maxiter = p_int(default = 30L, tags = "train"), + rel.tolerance = p_dbl(default = 1e-09, tags = "train"), + toler.chol = p_dbl(default = 1e-10, tags = "train"), + debug = p_int(default = 0, lower = 0, upper = 1, tags = "train"), + outer.max = p_int(default = 10L, tags = "train") + ) + + # value of k is changed as the default is equivalent (and a much more inefficient) + # implementation of `surv.parametric` + ps$values = list(k = 1) + + super$initialize( + id = "surv.flexible", + packages = c("mlr3extralearners", "flexsurv", "pracma"), + feature_types = c("logical", "integer", "factor", "numeric"), + predict_types = c("distr", "crank", "lp"), + param_set = ps, + properties = "weights", + man = "mlr3extralearners::mlr_learners_surv.flexible", + label = "Flexible Parametric Splines" + ) + } + ), + + private = list( + .train = function(task) { + pars_train = self$param_set$get_values(tags = "train") + args_ctrl = formalArgs(survival::survreg.control) + pars_ctrl = pars_train[names(pars_train) %in% args_ctrl] + pars_train = pars_train[names(pars_train) %nin% args_ctrl] + pars_train$sr.control = invoke(survival::survreg.control, .args = pars_ctrl) + + if ("weights" %in% task$properties) { + pars_train$weights = task$weights$weight } - ), - private = list( - .train = function(task) { - pars_train = self$param_set$get_values(tags = "train") - args_ctrl = formalArgs(survival::survreg.control) - pars_ctrl = pars_train[names(pars_train) %in% args_ctrl] - pars_train = pars_train[names(pars_train) %nin% args_ctrl] - pars_train$sr.control = invoke(survival::survreg.control, .args = pars_ctrl) - - if ("weights" %in% task$properties) { - pars_train$weights = task$weights$weight - } - - invoke(flexsurv::flexsurvspline, - formula = task$formula(task$feature_names), - data = task$data(), .args = pars_train) - }, - - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - pred = invoke(predict_flexsurvreg, self$model, task, .args = pars, learner = self) - - # crank is defined as the mean of the survival distribution - list(distr = pred$distr, lp = pred$lp, crank = pred$lp) - } - ) + invoke(flexsurv::flexsurvspline, + formula = task$formula(task$feature_names), + data = task$data(), .args = pars_train) + }, + + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + pred = invoke(predict_flexsurvreg, self$model, task, .args = pars, learner = self) + + # crank is defined as the mean of the survival distribution + list(distr = pred$distr, lp = pred$lp, crank = pred$lp) + } ) ) @@ -209,4 +206,4 @@ predict_flexsurvreg = function(object, task, learner, ...) { return(list(distr = distr, lp = lp)) } -.extralrns_dict$add("surv.flexible", function() LearnerSurvFlexible$new()) +.extralrns_dict$add("surv.flexible", LearnerSurvFlexible) diff --git a/R/learner_gbm_surv_gbm.R b/R/learner_gbm_surv_gbm.R index 0dd397edd..62109438b 100644 --- a/R/learner_gbm_surv_gbm.R +++ b/R/learner_gbm_surv_gbm.R @@ -30,90 +30,87 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvGBM", - R6Class("LearnerSurvGBM", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - distribution = p_fct(levels = c("coxph"), default = "coxph", tags = "train"), - n.trees = p_int(default = 100L, lower = 1L, tags = c("train", "predict")), - cv.folds = p_int(default = 0L, lower = 0L, tags = "train"), - interaction.depth = p_int(default = 1L, lower = 1L, tags = "train"), - n.minobsinnode = p_int(default = 10L, lower = 1L, tags = "train"), - shrinkage = p_dbl(default = 0.001, lower = 0, tags = "train"), - bag.fraction = p_dbl(default = 0.5, lower = 0, upper = 1, tags = "train"), - train.fraction = p_dbl(default = 1, lower = 0, upper = 1, tags = "train"), - keep.data = p_lgl(default = FALSE, tags = "train"), - verbose = p_lgl(default = FALSE, tags = "train"), - var.monotone = p_uty(tags = "train"), - n.cores = p_int(default = 1, tags = c("train", "threads")), - single.tree = p_lgl(default = FALSE, tags = "predict") - ) - ps$values = list(distribution = "coxph", keep.data = FALSE, n.cores = 1) +LearnerSurvGBM = R6Class("LearnerSurvGBM", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + distribution = p_fct(levels = c("coxph"), default = "coxph", tags = "train"), + n.trees = p_int(default = 100L, lower = 1L, tags = c("train", "predict")), + cv.folds = p_int(default = 0L, lower = 0L, tags = "train"), + interaction.depth = p_int(default = 1L, lower = 1L, tags = "train"), + n.minobsinnode = p_int(default = 10L, lower = 1L, tags = "train"), + shrinkage = p_dbl(default = 0.001, lower = 0, tags = "train"), + bag.fraction = p_dbl(default = 0.5, lower = 0, upper = 1, tags = "train"), + train.fraction = p_dbl(default = 1, lower = 0, upper = 1, tags = "train"), + keep.data = p_lgl(default = FALSE, tags = "train"), + verbose = p_lgl(default = FALSE, tags = "train"), + var.monotone = p_uty(tags = "train"), + n.cores = p_int(default = 1, tags = c("train", "threads")), + single.tree = p_lgl(default = FALSE, tags = "predict") + ) + ps$values = list(distribution = "coxph", keep.data = FALSE, n.cores = 1) - super$initialize( - id = "surv.gbm", - param_set = ps, - predict_types = c("crank", "lp"), - feature_types = c("integer", "numeric", "factor", "ordered"), - properties = c("missings", "weights", "importance"), - man = "mlr3extralearners::mlr_learners_surv.gbm", - packages = c("mlr3extralearners", "gbm"), - label = "Gradient Boosting" - ) - }, + super$initialize( + id = "surv.gbm", + param_set = ps, + predict_types = c("crank", "lp"), + feature_types = c("integer", "numeric", "factor", "ordered"), + properties = c("missings", "weights", "importance"), + man = "mlr3extralearners::mlr_learners_surv.gbm", + packages = c("mlr3extralearners", "gbm"), + label = "Gradient Boosting" + ) + }, - #' @description - #' The importance scores are extracted from the model slot `variable.importance`. - #' @return Named `numeric()`. - importance = function() { - if (is.null(self$model)) { - stopf("No model stored") - } - sum = summary(self$model, plotit = FALSE) - relinf = sum$rel.inf - names(relinf) = sum$var - - relinf + #' @description + #' The importance scores are extracted from the model slot `variable.importance`. + #' @return Named `numeric()`. + importance = function() { + if (is.null(self$model)) { + stopf("No model stored") } - ), + sum = summary(self$model, plotit = FALSE) + relinf = sum$rel.inf + names(relinf) = sum$var - private = list( - .train = function(task) { + relinf + } + ), - # hacky formula construction as gbm fails when "type" argument specified in Surv() + private = list( + .train = function(task) { - tn = task$target_names - lhs = sprintf("Surv(%s, %s)", tn[1L], tn[2L]) - f = formulate(lhs, task$feature_names, env = getNamespace("survival")) + # hacky formula construction as gbm fails when "type" argument specified in Surv() - # collect arguments for predict - pars = self$param_set$get_values(tags = "train") - pars = c(pars, list(weights = task$weights$weight)) + tn = task$target_names + lhs = sprintf("Surv(%s, %s)", tn[1L], tn[2L]) + f = formulate(lhs, task$feature_names, env = getNamespace("survival")) - invoke( - gbm::gbm, - formula = f, - data = task$data(), - .args = pars - ) - }, + # collect arguments for predict + pars = self$param_set$get_values(tags = "train") + pars = c(pars, list(weights = task$weights$weight)) - .predict = function(task) { - pv = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) + invoke( + gbm::gbm, + formula = f, + data = task$data(), + .args = pars + ) + }, - # predict linear predictor - lp = invoke(predict, self$model, newdata = newdata, .args = pv) + .predict = function(task) { + pv = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) - list(crank = lp, lp = lp) - } - ) + # predict linear predictor + lp = invoke(predict, self$model, newdata = newdata, .args = pv) + + list(crank = lp, lp = lp) + } ) ) -.extralrns_dict$add("surv.gbm", function() LearnerSurvGBM$new()) +.extralrns_dict$add("surv.gbm", LearnerSurvGBM) diff --git a/R/learner_glmnet_surv_cv_glmnet.R b/R/learner_glmnet_surv_cv_glmnet.R index 84b9fdbdb..224986bef 100644 --- a/R/learner_glmnet_surv_cv_glmnet.R +++ b/R/learner_glmnet_surv_cv_glmnet.R @@ -18,109 +18,106 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvCVGlmnet", - R6Class("LearnerSurvCVGlmnet", - inherit = mlr3proba::LearnerSurv, +LearnerSurvCVGlmnet = R6Class("LearnerSurvCVGlmnet", + inherit = mlr3proba::LearnerSurv, - public = list( + public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"), - alpha = p_dbl(0, 1, default = 1, tags = "train"), - big = p_dbl(default = 9.9e35, tags = "train"), - devmax = p_dbl(0, 1, default = 0.999, tags = "train"), - dfmax = p_int(0L, tags = "train"), - eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"), - epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"), - exclude = p_uty(tags = "train"), - exmx = p_dbl(default = 250.0, tags = "train"), - fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"), - foldid = p_uty(default = NULL, tags = "train"), - gamma = p_uty(tags = "train"), - grouped = p_lgl(default = TRUE, tags = "train"), - intercept = p_lgl(default = TRUE, tags = "train"), - keep = p_lgl(default = FALSE, tags = "train"), - lambda = p_uty(tags = "train"), - lambda.min.ratio = p_dbl(0, 1, tags = "train"), - lower.limits = p_uty(default = -Inf, tags = "train"), - maxit = p_int(1L, default = 100000L, tags = "train"), - mnlam = p_int(1L, default = 5L, tags = "train"), - mxit = p_int(1L, default = 100L, tags = "train"), - mxitnr = p_int(1L, default = 25L, tags = "train"), - nfolds = p_int(3L, default = 10L, tags = "train"), - nlambda = p_int(1L, default = 100L, tags = "train"), - offset = p_uty(default = NULL, tags = "train"), - parallel = p_lgl(default = FALSE, tags = "train"), - penalty.factor = p_uty(tags = "train"), - pmax = p_int(0L, tags = "train"), - pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"), - prec = p_dbl(default = 1e-10, tags = "train"), - predict.gamma = p_dbl(default = "gamma.1se", special_vals = list("gamma.1se", "gamma.min"), tags = "predict"), - relax = p_lgl(default = FALSE, tags = "train"), - s = p_dbl(0, 1, special_vals = list("lambda.1se", "lambda.min"), default = "lambda.1se", tags = "predict"), - standardize = p_lgl(default = TRUE, tags = "train"), - standardize.response = p_lgl(default = FALSE, tags = "train"), - thresh = p_dbl(0, default = 1e-07, tags = "train"), - trace.it = p_int(0, 1, default = 0, tags = "train"), - type.gaussian = p_fct(c("covariance", "naive"), tags = "train"), - type.logistic = p_fct(c("Newton", "modified.Newton"), default = "Newton", tags = "train"), - type.measure = p_fct(c("deviance", "C"), default = "deviance", tags = "train"), - type.multinomial = p_fct(c("ungrouped", "grouped"), default = "ungrouped", tags = "train"), - upper.limits = p_uty(default = Inf, tags = "train") - ) + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"), + alpha = p_dbl(0, 1, default = 1, tags = "train"), + big = p_dbl(default = 9.9e35, tags = "train"), + devmax = p_dbl(0, 1, default = 0.999, tags = "train"), + dfmax = p_int(0L, tags = "train"), + eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"), + epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"), + exclude = p_uty(tags = "train"), + exmx = p_dbl(default = 250.0, tags = "train"), + fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"), + foldid = p_uty(default = NULL, tags = "train"), + gamma = p_uty(tags = "train"), + grouped = p_lgl(default = TRUE, tags = "train"), + intercept = p_lgl(default = TRUE, tags = "train"), + keep = p_lgl(default = FALSE, tags = "train"), + lambda = p_uty(tags = "train"), + lambda.min.ratio = p_dbl(0, 1, tags = "train"), + lower.limits = p_uty(default = -Inf, tags = "train"), + maxit = p_int(1L, default = 100000L, tags = "train"), + mnlam = p_int(1L, default = 5L, tags = "train"), + mxit = p_int(1L, default = 100L, tags = "train"), + mxitnr = p_int(1L, default = 25L, tags = "train"), + nfolds = p_int(3L, default = 10L, tags = "train"), + nlambda = p_int(1L, default = 100L, tags = "train"), + offset = p_uty(default = NULL, tags = "train"), + parallel = p_lgl(default = FALSE, tags = "train"), + penalty.factor = p_uty(tags = "train"), + pmax = p_int(0L, tags = "train"), + pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"), + prec = p_dbl(default = 1e-10, tags = "train"), + predict.gamma = p_dbl(default = "gamma.1se", special_vals = list("gamma.1se", "gamma.min"), tags = "predict"), + relax = p_lgl(default = FALSE, tags = "train"), + s = p_dbl(0, 1, special_vals = list("lambda.1se", "lambda.min"), default = "lambda.1se", tags = "predict"), + standardize = p_lgl(default = TRUE, tags = "train"), + standardize.response = p_lgl(default = FALSE, tags = "train"), + thresh = p_dbl(0, default = 1e-07, tags = "train"), + trace.it = p_int(0, 1, default = 0, tags = "train"), + type.gaussian = p_fct(c("covariance", "naive"), tags = "train"), + type.logistic = p_fct(c("Newton", "modified.Newton"), default = "Newton", tags = "train"), + type.measure = p_fct(c("deviance", "C"), default = "deviance", tags = "train"), + type.multinomial = p_fct(c("ungrouped", "grouped"), default = "ungrouped", tags = "train"), + upper.limits = p_uty(default = Inf, tags = "train") + ) - super$initialize( - id = "surv.cv_glmnet", - param_set = ps, - feature_types = c("logical", "integer", "numeric"), - predict_types = c("crank", "lp"), - properties = c("weights", "selected_features"), - packages = c("mlr3extralearners", "glmnet"), - man = "mlr3extralearners::mlr_learners_surv.cv_glmnet", - label = "Regularized Generalized Linear Model" - ) - }, + super$initialize( + id = "surv.cv_glmnet", + param_set = ps, + feature_types = c("logical", "integer", "numeric"), + predict_types = c("crank", "lp"), + properties = c("weights", "selected_features"), + packages = c("mlr3extralearners", "glmnet"), + man = "mlr3extralearners::mlr_learners_surv.cv_glmnet", + label = "Regularized Generalized Linear Model" + ) + }, - #' @description - #' Returns the set of selected features as reported by [glmnet::predict.glmnet()] - #' with `type` set to `"nonzero"`. - #' - #' @param lambda (`numeric(1)`)\cr - #' Custom `lambda`, defaults to the active lambda depending on parameter set. - #' - #' @return (`character()`) of feature names. - selected_features = function(lambda = NULL) { - glmnet_selected_features(self, lambda) - } - ), + #' @description + #' Returns the set of selected features as reported by [glmnet::predict.glmnet()] + #' with `type` set to `"nonzero"`. + #' + #' @param lambda (`numeric(1)`)\cr + #' Custom `lambda`, defaults to the active lambda depending on parameter set. + #' + #' @return (`character()`) of feature names. + selected_features = function(lambda = NULL) { + glmnet_selected_features(self, lambda) + } + ), - private = list( - .train = function(task) { - data = as.matrix(task$data(cols = task$feature_names)) - target = task$truth() - pv = self$param_set$get_values(tags = "train") - pv$family = "cox" - if ("weights" %in% task$properties) { - pv$weights = task$weights$weight - } + private = list( + .train = function(task) { + data = as.matrix(task$data(cols = task$feature_names)) + target = task$truth() + pv = self$param_set$get_values(tags = "train") + pv$family = "cox" + if ("weights" %in% task$properties) { + pv$weights = task$weights$weight + } - glmnet_invoke(data, target, pv, cv = TRUE) - }, + glmnet_invoke(data, target, pv, cv = TRUE) + }, - .predict = function(task) { - newdata = as.matrix(ordered_features(task, self)) - pv = self$param_set$get_values(tags = "predict") - pv = rename(pv, "predict.gamma", "gamma") + .predict = function(task) { + newdata = as.matrix(ordered_features(task, self)) + pv = self$param_set$get_values(tags = "predict") + pv = rename(pv, "predict.gamma", "gamma") - lp = as.numeric(invoke(predict, self$model, newx = newdata, type = "link", .args = pv)) - list(lp = lp, crank = lp) - } - ) + lp = as.numeric(invoke(predict, self$model, newx = newdata, type = "link", .args = pv)) + list(lp = lp, crank = lp) + } ) ) -.extralrns_dict$add("surv.cv_glmnet", function() LearnerSurvCVGlmnet$new()) +.extralrns_dict$add("surv.cv_glmnet", LearnerSurvCVGlmnet) diff --git a/R/learner_glmnet_surv_glmnet.R b/R/learner_glmnet_surv_glmnet.R index cd4c8bf18..2721c5165 100644 --- a/R/learner_glmnet_surv_glmnet.R +++ b/R/learner_glmnet_surv_glmnet.R @@ -35,108 +35,105 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvGlmnet", - R6Class("LearnerSurvGlmnet", - inherit = mlr3proba::LearnerSurv, +LearnerSurvGlmnet = R6Class("LearnerSurvGlmnet", + inherit = mlr3proba::LearnerSurv, - public = list( + public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"), - alpha = p_dbl(0, 1, default = 1, tags = "train"), - big = p_dbl(default = 9.9e35, tags = "train"), - devmax = p_dbl(0, 1, default = 0.999, tags = "train"), - dfmax = p_int(0L, tags = "train"), - eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"), - epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"), - exact = p_lgl(default = FALSE, tags = "predict"), - exclude = p_uty(tags = "train"), - exmx = p_dbl(default = 250.0, tags = "train"), - fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"), - gamma = p_uty(tags = "train"), - grouped = p_lgl(default = TRUE, tags = "train"), - intercept = p_lgl(default = TRUE, tags = "train"), - keep = p_lgl(default = FALSE, tags = "train"), - lambda = p_uty(tags = "train"), - lambda.min.ratio = p_dbl(0, 1, tags = "train"), - lower.limits = p_uty(default = -Inf, tags = "train"), - maxit = p_int(1L, default = 100000L, tags = "train"), - mnlam = p_int(1L, default = 5L, tags = "train"), - mxit = p_int(1L, default = 100L, tags = "train"), - mxitnr = p_int(1L, default = 25L, tags = "train"), - newoffset = p_uty(tags = "predict"), - nlambda = p_int(1L, default = 100L, tags = "train"), - offset = p_uty(default = NULL, tags = "train"), - parallel = p_lgl(default = FALSE, tags = "train"), - penalty.factor = p_uty(tags = "train"), - pmax = p_int(0L, tags = "train"), - pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"), - prec = p_dbl(default = 1e-10, tags = "train"), - predict.gamma = p_dbl(default = "gamma.1se", special_vals = list("gamma.1se", "gamma.min"), tags = "predict"), - relax = p_lgl(default = FALSE, tags = "train"), - s = p_dbl(0, default = 0.01, tags = "predict"), - standardize = p_lgl(default = TRUE, tags = "train"), - thresh = p_dbl(0, default = 1e-07, tags = "train"), - trace.it = p_int(0, 1, default = 0, tags = "train"), - type.logistic = p_fct(c("Newton", "modified.Newton"), default = "Newton", tags = "train"), - type.multinomial = p_fct(c("ungrouped", "grouped"), default = "ungrouped", tags = "train"), - upper.limits = p_uty(default = Inf, tags = "train") - ) + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"), + alpha = p_dbl(0, 1, default = 1, tags = "train"), + big = p_dbl(default = 9.9e35, tags = "train"), + devmax = p_dbl(0, 1, default = 0.999, tags = "train"), + dfmax = p_int(0L, tags = "train"), + eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"), + epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"), + exact = p_lgl(default = FALSE, tags = "predict"), + exclude = p_uty(tags = "train"), + exmx = p_dbl(default = 250.0, tags = "train"), + fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"), + gamma = p_uty(tags = "train"), + grouped = p_lgl(default = TRUE, tags = "train"), + intercept = p_lgl(default = TRUE, tags = "train"), + keep = p_lgl(default = FALSE, tags = "train"), + lambda = p_uty(tags = "train"), + lambda.min.ratio = p_dbl(0, 1, tags = "train"), + lower.limits = p_uty(default = -Inf, tags = "train"), + maxit = p_int(1L, default = 100000L, tags = "train"), + mnlam = p_int(1L, default = 5L, tags = "train"), + mxit = p_int(1L, default = 100L, tags = "train"), + mxitnr = p_int(1L, default = 25L, tags = "train"), + newoffset = p_uty(tags = "predict"), + nlambda = p_int(1L, default = 100L, tags = "train"), + offset = p_uty(default = NULL, tags = "train"), + parallel = p_lgl(default = FALSE, tags = "train"), + penalty.factor = p_uty(tags = "train"), + pmax = p_int(0L, tags = "train"), + pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"), + prec = p_dbl(default = 1e-10, tags = "train"), + predict.gamma = p_dbl(default = "gamma.1se", special_vals = list("gamma.1se", "gamma.min"), tags = "predict"), + relax = p_lgl(default = FALSE, tags = "train"), + s = p_dbl(0, default = 0.01, tags = "predict"), + standardize = p_lgl(default = TRUE, tags = "train"), + thresh = p_dbl(0, default = 1e-07, tags = "train"), + trace.it = p_int(0, 1, default = 0, tags = "train"), + type.logistic = p_fct(c("Newton", "modified.Newton"), default = "Newton", tags = "train"), + type.multinomial = p_fct(c("ungrouped", "grouped"), default = "ungrouped", tags = "train"), + upper.limits = p_uty(default = Inf, tags = "train") + ) - super$initialize( - id = "surv.glmnet", - param_set = ps, - feature_types = c("logical", "integer", "numeric"), - predict_types = c("crank", "lp"), - properties = c("weights", "selected_features"), - packages = c("mlr3extralearners", "glmnet"), - man = "mlr3extralearners::mlr_learners_surv.glmnet", - label = "Regularized Generalized Linear Model" - ) - }, + super$initialize( + id = "surv.glmnet", + param_set = ps, + feature_types = c("logical", "integer", "numeric"), + predict_types = c("crank", "lp"), + properties = c("weights", "selected_features"), + packages = c("mlr3extralearners", "glmnet"), + man = "mlr3extralearners::mlr_learners_surv.glmnet", + label = "Regularized Generalized Linear Model" + ) + }, - #' @description - #' Returns the set of selected features as reported by [glmnet::predict.glmnet()] - #' with `type` set to `"nonzero"`. - #' - #' @param lambda (`numeric(1)`)\cr - #' Custom `lambda`, defaults to the active lambda depending on parameter set. - #' - #' @return (`character()`) of feature names. - selected_features = function(lambda = NULL) { - glmnet_selected_features(self, lambda) - } - ), + #' @description + #' Returns the set of selected features as reported by [glmnet::predict.glmnet()] + #' with `type` set to `"nonzero"`. + #' + #' @param lambda (`numeric(1)`)\cr + #' Custom `lambda`, defaults to the active lambda depending on parameter set. + #' + #' @return (`character()`) of feature names. + selected_features = function(lambda = NULL) { + glmnet_selected_features(self, lambda) + } + ), - private = list( - .train = function(task) { - data = as.matrix(task$data(cols = task$feature_names)) - target = task$truth() - pv = self$param_set$get_values(tags = "train") - pv$family = "cox" - if ("weights" %in% task$properties) { - pv$weights = task$weights$weight - } + private = list( + .train = function(task) { + data = as.matrix(task$data(cols = task$feature_names)) + target = task$truth() + pv = self$param_set$get_values(tags = "train") + pv$family = "cox" + if ("weights" %in% task$properties) { + pv$weights = task$weights$weight + } - glmnet_invoke(data, target, pv) - }, + glmnet_invoke(data, target, pv) + }, - .predict = function(task) { - newdata = as.matrix(ordered_features(task, self)) - pv = self$param_set$get_values(tags = "predict") - pv = rename(pv, "predict.gamma", "gamma") - pv$s = glmnet_get_lambda(self, pv) + .predict = function(task) { + newdata = as.matrix(ordered_features(task, self)) + pv = self$param_set$get_values(tags = "predict") + pv = rename(pv, "predict.gamma", "gamma") + pv$s = glmnet_get_lambda(self, pv) - lp = invoke(predict, self$model, newx = newdata, type = "link", .args = pv) + lp = invoke(predict, self$model, newx = newdata, type = "link", .args = pv) - list(crank = lp, lp = lp) - } - ) + list(crank = lp, lp = lp) + } ) ) -.extralrns_dict$add("surv.glmnet", function() LearnerSurvGlmnet$new()) +.extralrns_dict$add("surv.glmnet", LearnerSurvGlmnet) diff --git a/R/learner_gss_dens_spline.R b/R/learner_gss_dens_spline.R index eb43cab08..62b7d7ac9 100644 --- a/R/learner_gss_dens_spline.R +++ b/R/learner_gss_dens_spline.R @@ -15,83 +15,80 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerDensSpline", - R6Class("LearnerDensSpline", - inherit = mlr3proba::LearnerDens, +LearnerDensSpline = R6Class("LearnerDensSpline", + inherit = mlr3proba::LearnerDens, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - type = p_uty(tags = "train"), - alpha = p_dbl(default = 1.4, tags = "train"), - weights = p_uty(tags = "train"), - na.action = p_uty(default = stats::na.omit, tags = "train"), - id.basis = p_uty(tags = "train"), - nbasis = p_int(tags = "train"), - seed = p_dbl(tags = "train"), - domain = p_uty(tags = "train"), - quad = p_uty(tags = "train"), - qdsz.depth = p_dbl(tags = "train"), - bias = p_uty(tags = "train"), - prec = p_dbl(default = 1e-7, tags = "train"), - maxiter = p_int(default = 30, lower = 1, tags = "train"), - skip.iter = p_lgl(tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + type = p_uty(tags = "train"), + alpha = p_dbl(default = 1.4, tags = "train"), + weights = p_uty(tags = "train"), + na.action = p_uty(default = stats::na.omit, tags = "train"), + id.basis = p_uty(tags = "train"), + nbasis = p_int(tags = "train"), + seed = p_dbl(tags = "train"), + domain = p_uty(tags = "train"), + quad = p_uty(tags = "train"), + qdsz.depth = p_dbl(tags = "train"), + bias = p_uty(tags = "train"), + prec = p_dbl(default = 1e-7, tags = "train"), + maxiter = p_int(default = 30, lower = 1, tags = "train"), + skip.iter = p_lgl(tags = "train") + ) - super$initialize( - id = "dens.spline", - packages = c("mlr3extralearners", "gss"), - feature_types = c("integer", "numeric"), - predict_types = c("pdf", "cdf"), - param_set = ps, - properties = "missings", - man = "mlr3extralearners::mlr_learners_dens.spline", - label = "Density Smoothing Splines" - ) - } - ), + super$initialize( + id = "dens.spline", + packages = c("mlr3extralearners", "gss"), + feature_types = c("integer", "numeric"), + predict_types = c("pdf", "cdf"), + param_set = ps, + properties = "missings", + man = "mlr3extralearners::mlr_learners_dens.spline", + label = "Density Smoothing Splines" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - pars = self$param_set$get_values(tags = "train") + pars = self$param_set$get_values(tags = "train") - data = task$data()[[1]] + data = task$data()[[1]] - fit = invoke(gss::ssden, formula = ~data, .args = pars) + fit = invoke(gss::ssden, formula = ~data, .args = pars) - pdf = function(x) {} # nolint - body(pdf) = substitute({ - invoke(gss::dssden, object = fit, x = x) - }) + pdf = function(x) {} # nolint + body(pdf) = substitute({ + invoke(gss::dssden, object = fit, x = x) + }) - cdf = function(x) {} # nolint - body(cdf) = substitute({ - invoke(gss::pssden, object = fit, q = x) - }) + cdf = function(x) {} # nolint + body(cdf) = substitute({ + invoke(gss::pssden, object = fit, q = x) + }) - quantile = function(p) {} # nolint - body(quantile) = substitute({ - invoke(gss::qssden, object = fit, p = p) - }) + quantile = function(p) {} # nolint + body(quantile) = substitute({ + invoke(gss::qssden, object = fit, p = p) + }) - distr6::Distribution$new( - name = "Smoothing Spline Density Estimator", - short_name = "splineDens", - pdf = pdf, cdf = cdf, quantile = quantile, type = set6::Reals$new()) - }, + distr6::Distribution$new( + name = "Smoothing Spline Density Estimator", + short_name = "splineDens", + pdf = pdf, cdf = cdf, quantile = quantile, type = set6::Reals$new()) + }, - .predict = function(task) { - newdata = task$data()[[1]] - pars = self$param_set$get_values(tags = "predict") - invoke(list, pdf = self$model$pdf(newdata), cdf = self$model$cdf(newdata), .args = pars) - } - ) + .predict = function(task) { + newdata = task$data()[[1]] + pars = self$param_set$get_values(tags = "predict") + invoke(list, pdf = self$model$pdf(newdata), cdf = self$model$cdf(newdata), .args = pars) + } ) ) -.extralrns_dict$add("dens.spline", function() LearnerDensSpline$new()) +.extralrns_dict$add("dens.spline", LearnerDensSpline) diff --git a/R/learner_ks_dens_kde_ks.R b/R/learner_ks_dens_kde_ks.R index 1cb4ccc27..7fd4c79cb 100644 --- a/R/learner_ks_dens_kde_ks.R +++ b/R/learner_ks_dens_kde_ks.R @@ -15,71 +15,68 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerDensKDEks", - R6Class("LearnerDensKDEks", - inherit = mlr3proba::LearnerDens, +LearnerDensKDEks = R6Class("LearnerDensKDEks", + inherit = mlr3proba::LearnerDens, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - h = p_dbl(lower = 0, tags = "train"), - H = p_uty(tags = "train"), - gridsize = p_uty(tags = "train"), - gridtype = p_uty(tags = "train"), - xmin = p_dbl(tags = "train"), - xmax = p_dbl(tags = "train"), - supp = p_dbl(default = 3.7, tags = "train"), - binned = p_dbl(tags = "train"), - bgridsize = p_uty(tags = "train"), - positive = p_lgl(default = FALSE, tags = "train"), - adj.positive = p_uty(tags = "train"), - w = p_uty(tags = "train"), - compute.cont = p_lgl(default = TRUE, tags = "train"), - approx.cont = p_lgl(default = TRUE, tags = "train"), - unit.interval = p_lgl(default = FALSE, tags = "train"), - verbose = p_lgl(default = FALSE, tags = "train"), - density = p_lgl(default = FALSE, tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + h = p_dbl(lower = 0, tags = "train"), + H = p_uty(tags = "train"), + gridsize = p_uty(tags = "train"), + gridtype = p_uty(tags = "train"), + xmin = p_dbl(tags = "train"), + xmax = p_dbl(tags = "train"), + supp = p_dbl(default = 3.7, tags = "train"), + binned = p_dbl(tags = "train"), + bgridsize = p_uty(tags = "train"), + positive = p_lgl(default = FALSE, tags = "train"), + adj.positive = p_uty(tags = "train"), + w = p_uty(tags = "train"), + compute.cont = p_lgl(default = TRUE, tags = "train"), + approx.cont = p_lgl(default = TRUE, tags = "train"), + unit.interval = p_lgl(default = FALSE, tags = "train"), + verbose = p_lgl(default = FALSE, tags = "train"), + density = p_lgl(default = FALSE, tags = "train") + ) - super$initialize( - id = "dens.kde_ks", - packages = c("mlr3extralearners", "ks"), - feature_types = c("integer", "numeric"), - predict_types = "pdf", - param_set = ps, - man = "mlr3extralearners::mlr_learners_dens.kde_ks", - label = "Kernel Density Estimator" - ) - } - ), + super$initialize( + id = "dens.kde_ks", + packages = c("mlr3extralearners", "ks"), + feature_types = c("integer", "numeric"), + predict_types = "pdf", + param_set = ps, + man = "mlr3extralearners::mlr_learners_dens.kde_ks", + label = "Kernel Density Estimator" + ) + } + ), - private = list( - .train = function(task) { - pars = self$param_set$get_values(tags = "train") + private = list( + .train = function(task) { + pars = self$param_set$get_values(tags = "train") - data = task$data()[[1]] + data = task$data()[[1]] - pdf = function(x) { - } - body(pdf) = substitute({ - invoke(ks::kde, x = data, eval.points = x, .args = pars)$estimate - }) + pdf = function(x) { + } + body(pdf) = substitute({ + invoke(ks::kde, x = data, eval.points = x, .args = pars)$estimate + }) - distr6::Distribution$new( - name = "ks KDE", - short_name = "ksKDE", - pdf = pdf, type = set6::Reals$new()) - }, + distr6::Distribution$new( + name = "ks KDE", + short_name = "ksKDE", + pdf = pdf, type = set6::Reals$new()) + }, - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - invoke(list, pdf = self$model$pdf(task$data()[[1]]), .args = pars) - } - ) + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + invoke(list, pdf = self$model$pdf(task$data()[[1]]), .args = pars) + } ) ) -.extralrns_dict$add("dens.kde_ks", function() LearnerDensKDEks$new()) +.extralrns_dict$add("dens.kde_ks", LearnerDensKDEks) diff --git a/R/learner_locfit_dens_locfit.R b/R/learner_locfit_dens_locfit.R index d351347ba..ec6b5b673 100644 --- a/R/learner_locfit_dens_locfit.R +++ b/R/learner_locfit_dens_locfit.R @@ -15,74 +15,71 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerDensLocfit", - R6Class("LearnerDensLocfit", - inherit = mlr3proba::LearnerDens, +LearnerDensLocfit = R6Class("LearnerDensLocfit", + inherit = mlr3proba::LearnerDens, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - window = p_fct(levels = c( - "tcub", "rect", "trwt", - "tria", "epan", "bisq", - "gaus"), default = "gaus", tags = "train"), - width = p_dbl(tags = "train"), - from = p_dbl(tags = "train"), - to = p_dbl(tags = "train"), - cut = p_dbl(tags = "train"), - deg = p_dbl(default = 0, tags = "train"), - link = p_fct(default = "ident", tags = "train", - levels = c("ident", "log", "logit", "inverse", "sqrt", "arcsin")), - kern = p_fct(default = "tcub", tags = "train", - levels = c("rect", "trwt", "tria", "epan", "bisq", "gauss", "tcub")), - kt = p_fct(default = "sph", tags = "train", - levels = c("sph", "prod")), - renorm = p_lgl(default = FALSE, tags = "train"), - maxk = p_int(default = 100, lower = 0, tags = "train"), - itype = p_fct(levels = c("prod", "mult", "mlin", "haz"), tags = "train"), - mint = p_int(default = 20, lower = 1, tags = "train"), - maxit = p_int(default = 20, lower = 1, tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + window = p_fct(levels = c( + "tcub", "rect", "trwt", + "tria", "epan", "bisq", + "gaus"), default = "gaus", tags = "train"), + width = p_dbl(tags = "train"), + from = p_dbl(tags = "train"), + to = p_dbl(tags = "train"), + cut = p_dbl(tags = "train"), + deg = p_dbl(default = 0, tags = "train"), + link = p_fct(default = "ident", tags = "train", + levels = c("ident", "log", "logit", "inverse", "sqrt", "arcsin")), + kern = p_fct(default = "tcub", tags = "train", + levels = c("rect", "trwt", "tria", "epan", "bisq", "gauss", "tcub")), + kt = p_fct(default = "sph", tags = "train", + levels = c("sph", "prod")), + renorm = p_lgl(default = FALSE, tags = "train"), + maxk = p_int(default = 100, lower = 0, tags = "train"), + itype = p_fct(levels = c("prod", "mult", "mlin", "haz"), tags = "train"), + mint = p_int(default = 20, lower = 1, tags = "train"), + maxit = p_int(default = 20, lower = 1, tags = "train") + ) - super$initialize( - id = "dens.locfit", - packages = c("mlr3extralearners", "locfit"), - feature_types = c("integer", "numeric"), - predict_types = "pdf", - param_set = ps, - man = "mlr3extralearners::mlr_learners_dens.locfit", - label = "Local Density Estimation" - ) - } - ), + super$initialize( + id = "dens.locfit", + packages = c("mlr3extralearners", "locfit"), + feature_types = c("integer", "numeric"), + predict_types = "pdf", + param_set = ps, + man = "mlr3extralearners::mlr_learners_dens.locfit", + label = "Local Density Estimation" + ) + } + ), - private = list( - .train = function(task) { - pars = self$param_set$get_values(tags = "train") + private = list( + .train = function(task) { + pars = self$param_set$get_values(tags = "train") - data = task$data()[[1]] + data = task$data()[[1]] - pdf = function(x) { - } - body(pdf) = substitute({ - invoke(locfit::density.lf, x = data, ev = x, .args = pars)$y - }) + pdf = function(x) { + } + body(pdf) = substitute({ + invoke(locfit::density.lf, x = data, ev = x, .args = pars)$y + }) - distr6::Distribution$new( - name = paste("LocFit Density", self$param_set$values$window), - short_name = paste0("LocFitDens", self$param_set$values$window), - pdf = pdf, - type = set6::Reals$new()) - }, + distr6::Distribution$new( + name = paste("LocFit Density", self$param_set$values$window), + short_name = paste0("LocFitDens", self$param_set$values$window), + pdf = pdf, + type = set6::Reals$new()) + }, - .predict = function(task) { - list(pdf = self$model$pdf(task$data()[[1]])) - } - ) + .predict = function(task) { + list(pdf = self$model$pdf(task$data()[[1]])) + } ) ) -.extralrns_dict$add("dens.locfit", function() LearnerDensLocfit$new()) +.extralrns_dict$add("dens.locfit", LearnerDensLocfit) diff --git a/R/learner_logspline_dens_logspline.R b/R/learner_logspline_dens_logspline.R index f4613b46f..6802638f4 100644 --- a/R/learner_logspline_dens_logspline.R +++ b/R/learner_logspline_dens_logspline.R @@ -15,82 +15,79 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerDensLogspline", - R6Class("LearnerDensLogspline", - inherit = mlr3proba::LearnerDens, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - lbound = p_dbl(tags = "train"), - ubound = p_dbl(tags = "train"), - maxknots = p_dbl(default = 0, lower = 0, tags = "train"), - knots = p_uty(tags = "train"), - nknots = p_dbl(default = 0, lower = 0, tags = "train"), - penalty = p_uty(tags = "train"), - silent = p_lgl(default = TRUE, tags = "train"), - mind = p_dbl(default = -1, tags = "train"), - error.action = p_int(default = 2, lower = 0, upper = 2, tags = "train") - ) +LearnerDensLogspline = R6Class("LearnerDensLogspline", + inherit = mlr3proba::LearnerDens, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + lbound = p_dbl(tags = "train"), + ubound = p_dbl(tags = "train"), + maxknots = p_dbl(default = 0, lower = 0, tags = "train"), + knots = p_uty(tags = "train"), + nknots = p_dbl(default = 0, lower = 0, tags = "train"), + penalty = p_uty(tags = "train"), + silent = p_lgl(default = TRUE, tags = "train"), + mind = p_dbl(default = -1, tags = "train"), + error.action = p_int(default = 2, lower = 0, upper = 2, tags = "train") + ) - super$initialize( - id = "dens.logspline", - packages = c("mlr3extralearners", "logspline"), - feature_types = c("integer", "numeric"), - predict_types = c("pdf", "cdf"), - param_set = ps, - man = "mlr3extralearners::mlr_learners_dens.logspline", - label = "Logspline Density Estimation" - ) - } - ), + super$initialize( + id = "dens.logspline", + packages = c("mlr3extralearners", "logspline"), + feature_types = c("integer", "numeric"), + predict_types = c("pdf", "cdf"), + param_set = ps, + man = "mlr3extralearners::mlr_learners_dens.logspline", + label = "Logspline Density Estimation" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - data = task$data()[[1]] + data = task$data()[[1]] - pars = self$param_set$get_values(tags = "train") + pars = self$param_set$get_values(tags = "train") - fit = invoke(logspline::logspline, x = data, .args = pars) + fit = invoke(logspline::logspline, x = data, .args = pars) - pdf = function(x) {} # nolint - body(pdf) = substitute({ - invoke(logspline::dlogspline, q = x, fit = fit) - }) + pdf = function(x) {} # nolint + body(pdf) = substitute({ + invoke(logspline::dlogspline, q = x, fit = fit) + }) - cdf = function(x) {} # nolint - body(cdf) = substitute({ - invoke(logspline::plogspline, q = x, fit = fit) - }) + cdf = function(x) {} # nolint + body(cdf) = substitute({ + invoke(logspline::plogspline, q = x, fit = fit) + }) - quantile = function(p) {} # nolint - body(quantile) = substitute({ - invoke(logspline::qlogspline, p = p, fit = fit) - }) + quantile = function(p) {} # nolint + body(quantile) = substitute({ + invoke(logspline::qlogspline, p = p, fit = fit) + }) - rand = function(n) {} # nolint - body(rand) = substitute({ - invoke(logspline::rlogspline, n = n, fit = fit) - }) + rand = function(n) {} # nolint + body(rand) = substitute({ + invoke(logspline::rlogspline, n = n, fit = fit) + }) - distr6::Distribution$new( - name = "Logspline Density Estimator", - short_name = "LogsplineDens", - pdf = pdf, cdf = cdf, quantile = quantile, rand = rand, type = set6::Reals$new()) + distr6::Distribution$new( + name = "Logspline Density Estimator", + short_name = "LogsplineDens", + pdf = pdf, cdf = cdf, quantile = quantile, rand = rand, type = set6::Reals$new()) - }, + }, - .predict = function(task) { - newdata = task$data()[[1]] - pars = self$param_set$get_values(tags = "predict") - invoke(list, pdf = self$model$pdf(newdata), cdf = self$model$cdf(newdata), .args = pars) - } - ) + .predict = function(task) { + newdata = task$data()[[1]] + pars = self$param_set$get_values(tags = "predict") + invoke(list, pdf = self$model$pdf(newdata), cdf = self$model$cdf(newdata), .args = pars) + } ) ) -.extralrns_dict$add("dens.logspline", function() LearnerDensLogspline$new()) +.extralrns_dict$add("dens.logspline", LearnerDensLogspline) diff --git a/R/learner_mboost_surv_blackboost.R b/R/learner_mboost_surv_blackboost.R index df9d0a1b7..00b3f08d9 100644 --- a/R/learner_mboost_surv_blackboost.R +++ b/R/learner_mboost_surv_blackboost.R @@ -19,186 +19,183 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvBlackBoost", - R6Class("LearnerSurvBlackBoost", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - family = p_fct(default = "coxph", - levels = c( - "coxph", "weibull", "loglog", "lognormal", "gehan", "cindex", - "custom"), tags = "train"), - custom.family = p_uty(tags = "train"), - nuirange = p_uty(default = c(0, 100), tags = "train"), - offset = p_uty(tags = "train"), - center = p_lgl(default = TRUE, tags = "train"), - mstop = p_int(default = 100L, lower = 0L, tags = "train"), - nu = p_dbl(default = 0.1, lower = 0, upper = 1, tags = "train"), - risk = p_fct(levels = c("inbag", "oobag", "none"), tags = "train"), - stopintern = p_lgl(default = FALSE, tags = "train"), - trace = p_lgl(default = FALSE, tags = "train"), - oobweights = p_uty(tags = "train"), - teststat = p_fct( - default = "quadratic", levels = c("quadratic", "maximum"), - tags = "train"), - splitstat = p_fct( - default = "quadratic", levels = c("quadratic", "maximum"), - tags = "train"), - splittest = p_lgl(default = FALSE, tags = "train"), - testtype = p_fct( - default = "Bonferroni", - levels = c("Bonferroni", "MonteCarlo", "Univariate", "Teststatistic"), tags = "train"), - maxpts = p_int(default = 25000L, lower = 1L, tags = "train"), - abseps = p_dbl(default = 0.001, tags = "train"), - releps = p_dbl(default = 0, tags = "train"), - nmax = p_uty(tags = "train"), - alpha = p_dbl(default = 0.05, lower = 0, upper = 1, tags = "train"), - mincriterion = p_dbl(default = 0.95, lower = 0, upper = 1, tags = "train"), - logmincriterion = p_dbl(default = log(0.95), upper = 0, tags = "train"), - minsplit = p_int(default = 20L, lower = 0L, tags = "train"), - minbucket = p_int(default = 7L, lower = 0L, tags = "train"), - minprob = p_dbl(default = 0.01, lower = 0, upper = 1, tags = "train"), - stump = p_lgl(default = FALSE, tags = "train"), - lookahead = p_lgl(default = FALSE, tags = "train"), - MIA = p_lgl(default = FALSE, tags = "train"), - nresample = p_int(default = 9999L, lower = 1L, tags = "train"), - tol = p_dbl(default = sqrt(.Machine$double.eps), lower = 0, tags = "train"), - maxsurrogate = p_int(default = 0L, lower = 0L, tags = "train"), - mtry = p_int(lower = 0L, tags = "train"), - maxdepth = p_int(lower = 0L, tags = "train"), - multiway = p_lgl(default = FALSE, tags = "train"), - splittry = p_int(default = 2L, lower = 1L, tags = "train"), - intersplit = p_lgl(default = FALSE, tags = "train"), - majority = p_lgl(default = FALSE, tags = "train"), - caseweights = p_lgl(default = TRUE, tags = "train"), - sigma = p_dbl(default = 0.1, lower = 0, upper = 1, - tags = "train"), - ipcw = p_uty(default = 1, tags = "train"), - na.action = p_uty(default = stats::na.omit, tags = "train") - ) - - ps$values = list(family = "coxph") - ps$add_dep("sigma", "family", CondEqual$new("cindex")) - ps$add_dep("ipcw", "family", CondEqual$new("cindex")) - - super$initialize( - id = "surv.blackboost", - param_set = ps, - feature_types = c("integer", "numeric", "factor"), - predict_types = c("distr", "crank", "lp"), - properties = "weights", - packages = c("mlr3extralearners", "mboost", "pracma"), - man = "mlr3extralearners::mlr_learners_surv.blackboost", - label = "Gradient Boosting" - ) +LearnerSurvBlackBoost = R6Class("LearnerSurvBlackBoost", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + family = p_fct(default = "coxph", + levels = c( + "coxph", "weibull", "loglog", "lognormal", "gehan", "cindex", + "custom"), tags = "train"), + custom.family = p_uty(tags = "train"), + nuirange = p_uty(default = c(0, 100), tags = "train"), + offset = p_uty(tags = "train"), + center = p_lgl(default = TRUE, tags = "train"), + mstop = p_int(default = 100L, lower = 0L, tags = "train"), + nu = p_dbl(default = 0.1, lower = 0, upper = 1, tags = "train"), + risk = p_fct(levels = c("inbag", "oobag", "none"), tags = "train"), + stopintern = p_lgl(default = FALSE, tags = "train"), + trace = p_lgl(default = FALSE, tags = "train"), + oobweights = p_uty(tags = "train"), + teststat = p_fct( + default = "quadratic", levels = c("quadratic", "maximum"), + tags = "train"), + splitstat = p_fct( + default = "quadratic", levels = c("quadratic", "maximum"), + tags = "train"), + splittest = p_lgl(default = FALSE, tags = "train"), + testtype = p_fct( + default = "Bonferroni", + levels = c("Bonferroni", "MonteCarlo", "Univariate", "Teststatistic"), tags = "train"), + maxpts = p_int(default = 25000L, lower = 1L, tags = "train"), + abseps = p_dbl(default = 0.001, tags = "train"), + releps = p_dbl(default = 0, tags = "train"), + nmax = p_uty(tags = "train"), + alpha = p_dbl(default = 0.05, lower = 0, upper = 1, tags = "train"), + mincriterion = p_dbl(default = 0.95, lower = 0, upper = 1, tags = "train"), + logmincriterion = p_dbl(default = log(0.95), upper = 0, tags = "train"), + minsplit = p_int(default = 20L, lower = 0L, tags = "train"), + minbucket = p_int(default = 7L, lower = 0L, tags = "train"), + minprob = p_dbl(default = 0.01, lower = 0, upper = 1, tags = "train"), + stump = p_lgl(default = FALSE, tags = "train"), + lookahead = p_lgl(default = FALSE, tags = "train"), + MIA = p_lgl(default = FALSE, tags = "train"), + nresample = p_int(default = 9999L, lower = 1L, tags = "train"), + tol = p_dbl(default = sqrt(.Machine$double.eps), lower = 0, tags = "train"), + maxsurrogate = p_int(default = 0L, lower = 0L, tags = "train"), + mtry = p_int(lower = 0L, tags = "train"), + maxdepth = p_int(lower = 0L, tags = "train"), + multiway = p_lgl(default = FALSE, tags = "train"), + splittry = p_int(default = 2L, lower = 1L, tags = "train"), + intersplit = p_lgl(default = FALSE, tags = "train"), + majority = p_lgl(default = FALSE, tags = "train"), + caseweights = p_lgl(default = TRUE, tags = "train"), + sigma = p_dbl(default = 0.1, lower = 0, upper = 1, + tags = "train"), + ipcw = p_uty(default = 1, tags = "train"), + na.action = p_uty(default = stats::na.omit, tags = "train") + ) + + ps$values = list(family = "coxph") + ps$add_dep("sigma", "family", CondEqual$new("cindex")) + ps$add_dep("ipcw", "family", CondEqual$new("cindex")) + + super$initialize( + id = "surv.blackboost", + param_set = ps, + feature_types = c("integer", "numeric", "factor"), + predict_types = c("distr", "crank", "lp"), + properties = "weights", + packages = c("mlr3extralearners", "mboost", "pracma"), + man = "mlr3extralearners::mlr_learners_surv.blackboost", + label = "Gradient Boosting" + ) + } + ), + + private = list( + .train = function(task) { + + # parameter custom.family takes precedence over family + if (!is.null(self$param_set$values$custom.family)) { + self$param_set$values$family = "custom" } - ), - - private = list( - .train = function(task) { - - # parameter custom.family takes precedence over family - if (!is.null(self$param_set$values$custom.family)) { - self$param_set$values$family = "custom" - } - - pars = self$param_set$get_values(tags = "train") - - if ("weights" %in% task$properties) { - pars$weights = task$weights$weight - } - - # mboost control - # Save control settings and return on exit - saved_ctrl = mboost::boost_control() - is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) - # ensure only relevant pars passed to fitted model - if (any(is_ctrl_pars)) { - pars$control = do.call(mboost::boost_control, pars[is_ctrl_pars]) - pars = pars[!is_ctrl_pars] - } - - # GenzBretz control - # Save control settings and return on exit - saved_ctrl = mvtnorm::GenzBretz() - is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) - # ensure only relevant pars passed to fitted model - if (any(is_ctrl_pars)) { - pars$pargs = do.call(mvtnorm::GenzBretz, pars[is_ctrl_pars]) - pars = pars[!is_ctrl_pars] - } - - # ctree control - # Save control settings and return on exit - saved_ctrl = partykit::ctree_control() - is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) - # ensure only relevant pars passed to fitted model - if (any(is_ctrl_pars)) { - pars$tree_controls = do.call(partykit::ctree_control, pars[is_ctrl_pars]) - pars = pars[!is_ctrl_pars] - } - - # if ("weights" %in% task$properties) - # pars$weights = task$weights$weight - - family = switch(pars$family, - coxph = mboost::CoxPH(), - weibull = invoke(mboost::Weibull, - .args = pars[names(pars) %in% formalArgs(mboost::Weibull)]), - loglog = invoke(mboost::Loglog, - .args = pars[names(pars) %in% formalArgs(mboost::Loglog)]), - lognormal = invoke(mboost::Lognormal, - .args = pars[names(pars) %in% formalArgs(mboost::Lognormal)]), - gehan = mboost::Gehan(), - cindex = invoke(mboost::Cindex, - .args = pars[names(pars) %in% formalArgs(mboost::Cindex)]), - custom = pars$custom.family - ) - - # FIXME - until issue closes - pars = pars[!(names(pars) %in% formalArgs(mboost::Weibull))] - pars = pars[!(names(pars) %in% formalArgs(mboost::Cindex))] - pars = pars[!(names(pars) %in% c("family", "custom.family"))] - - invoke(mboost::blackboost, - formula = task$formula(task$feature_names), - data = task$data(), family = family, .args = pars) - }, - - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) - # predict linear predictor - lp = as.numeric(invoke(predict, self$model, newdata = newdata, type = "link", - .args = pars - )) - - # predict survival - if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { - survfit = invoke(mboost::survFit, self$model, newdata = newdata) - - mlr3proba::.surv_return(times = survfit$time, - surv = t(survfit$surv), - lp = lp) - } else { - mlr3proba::.surv_return(lp = -lp) - } - - - # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR - # response = NULL - # if (!is.null(self$param_set$values$family)) { - # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { - # response = exp(lp) - # } - # } + + pars = self$param_set$get_values(tags = "train") + + if ("weights" %in% task$properties) { + pars$weights = task$weights$weight + } + + # mboost control + # Save control settings and return on exit + saved_ctrl = mboost::boost_control() + is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) + # ensure only relevant pars passed to fitted model + if (any(is_ctrl_pars)) { + pars$control = do.call(mboost::boost_control, pars[is_ctrl_pars]) + pars = pars[!is_ctrl_pars] + } + + # GenzBretz control + # Save control settings and return on exit + saved_ctrl = mvtnorm::GenzBretz() + is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) + # ensure only relevant pars passed to fitted model + if (any(is_ctrl_pars)) { + pars$pargs = do.call(mvtnorm::GenzBretz, pars[is_ctrl_pars]) + pars = pars[!is_ctrl_pars] } - ) + + # ctree control + # Save control settings and return on exit + saved_ctrl = partykit::ctree_control() + is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) + # ensure only relevant pars passed to fitted model + if (any(is_ctrl_pars)) { + pars$tree_controls = do.call(partykit::ctree_control, pars[is_ctrl_pars]) + pars = pars[!is_ctrl_pars] + } + + # if ("weights" %in% task$properties) + # pars$weights = task$weights$weight + + family = switch(pars$family, + coxph = mboost::CoxPH(), + weibull = invoke(mboost::Weibull, + .args = pars[names(pars) %in% formalArgs(mboost::Weibull)]), + loglog = invoke(mboost::Loglog, + .args = pars[names(pars) %in% formalArgs(mboost::Loglog)]), + lognormal = invoke(mboost::Lognormal, + .args = pars[names(pars) %in% formalArgs(mboost::Lognormal)]), + gehan = mboost::Gehan(), + cindex = invoke(mboost::Cindex, + .args = pars[names(pars) %in% formalArgs(mboost::Cindex)]), + custom = pars$custom.family + ) + + # FIXME - until issue closes + pars = pars[!(names(pars) %in% formalArgs(mboost::Weibull))] + pars = pars[!(names(pars) %in% formalArgs(mboost::Cindex))] + pars = pars[!(names(pars) %in% c("family", "custom.family"))] + + invoke(mboost::blackboost, + formula = task$formula(task$feature_names), + data = task$data(), family = family, .args = pars) + }, + + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) + # predict linear predictor + lp = as.numeric(invoke(predict, self$model, newdata = newdata, type = "link", + .args = pars + )) + + # predict survival + if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { + survfit = invoke(mboost::survFit, self$model, newdata = newdata) + + mlr3proba::.surv_return(times = survfit$time, + surv = t(survfit$surv), + lp = lp) + } else { + mlr3proba::.surv_return(lp = -lp) + } + + + # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR + # response = NULL + # if (!is.null(self$param_set$values$family)) { + # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { + # response = exp(lp) + # } + # } + } ) ) -.extralrns_dict$add("surv.blackboost", function() LearnerSurvBlackBoost$new()) +.extralrns_dict$add("surv.blackboost", LearnerSurvBlackBoost) diff --git a/R/learner_mboost_surv_gamboost.R b/R/learner_mboost_surv_gamboost.R index ac3c4032d..94a9fa39a 100644 --- a/R/learner_mboost_surv_gamboost.R +++ b/R/learner_mboost_surv_gamboost.R @@ -18,166 +18,162 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvGAMBoost", - R6Class("LearnerSurvGAMBoost", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - family = p_fct(default = "coxph", - levels = c( - "coxph", "weibull", "loglog", "lognormal", "gehan", "cindex", - "custom"), tags = "train"), - custom.family = p_uty(tags = "train"), - nuirange = p_uty(default = c(0, 100), tags = "train"), - offset = p_dbl(tags = "train"), - center = p_lgl(default = TRUE, tags = "train"), - mstop = p_int(default = 100L, lower = 0L, tags = "train"), - nu = p_dbl(default = 0.1, lower = 0, upper = 1, tags = "train"), - risk = p_fct(default = "inbag", - levels = c("inbag", "oobag", "none"), tags = "train"), - stopintern = p_uty(default = FALSE, tags = "train"), - trace = p_lgl(default = FALSE, tags = "train"), - oobweights = p_uty(default = NULL, tags = "train"), - baselearner = p_fct(default = "bbs", - levels = c("bbs", "bols", "btree"), tags = "train"), - dfbase = p_int(default = 4L, lower = 0L, tags = "train"), - sigma = p_dbl(default = 0.1, lower = 0, upper = 1, - tags = "train"), - ipcw = p_uty(default = 1, tags = "train"), - na.action = p_uty(default = stats::na.omit, tags = "train") - ) - - ps$values = list(family = "coxph") - ps$add_dep("sigma", "family", CondEqual$new("cindex")) - ps$add_dep("ipcw", "family", CondEqual$new("cindex")) - - super$initialize( - id = "surv.gamboost", - param_set = ps, - feature_types = c("integer", "numeric", "factor", "logical"), - predict_types = c("distr", "crank", "lp"), - properties = c("weights", "importance", "selected_features"), - packages = c("mlr3extralearners", "mboost", "pracma"), - man = "mlr3extralearners::mlr_learners_surv.gamboost", - label = "Boosted Generalized Additive Model" - ) - }, - - #' @description - #' The importance scores are extracted with the function [mboost::varimp()] - #' with the default arguments. - #' @return Named `numeric()`. - importance = function() { - if (is.null(self$model)) { - stopf("No model stored") - } - - vimp = as.numeric(mboost::varimp(self$model)) - names(vimp) = unname(stats::variable.names(self$model)) - - sort(vimp, decreasing = TRUE) - }, - - #' @description - #' Selected features are extracted with the function - #' [mboost::variable.names.mboost()], with - #' `used.only = TRUE`. - #' @return `character()`. - selected_features = function() { - if (is.null(self$model)) { - stopf("No model stored") - } - - unname(stats::variable.names(self$model, usedonly = TRUE)) +LearnerSurvGAMBoost = R6Class("LearnerSurvGAMBoost", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + family = p_fct(default = "coxph", + levels = c( + "coxph", "weibull", "loglog", "lognormal", "gehan", "cindex", + "custom"), tags = "train"), + custom.family = p_uty(tags = "train"), + nuirange = p_uty(default = c(0, 100), tags = "train"), + offset = p_dbl(tags = "train"), + center = p_lgl(default = TRUE, tags = "train"), + mstop = p_int(default = 100L, lower = 0L, tags = "train"), + nu = p_dbl(default = 0.1, lower = 0, upper = 1, tags = "train"), + risk = p_fct(default = "inbag", + levels = c("inbag", "oobag", "none"), tags = "train"), + stopintern = p_uty(default = FALSE, tags = "train"), + trace = p_lgl(default = FALSE, tags = "train"), + oobweights = p_uty(default = NULL, tags = "train"), + baselearner = p_fct(default = "bbs", + levels = c("bbs", "bols", "btree"), tags = "train"), + dfbase = p_int(default = 4L, lower = 0L, tags = "train"), + sigma = p_dbl(default = 0.1, lower = 0, upper = 1, + tags = "train"), + ipcw = p_uty(default = 1, tags = "train"), + na.action = p_uty(default = stats::na.omit, tags = "train") + ) + + ps$values = list(family = "coxph") + ps$add_dep("sigma", "family", CondEqual$new("cindex")) + ps$add_dep("ipcw", "family", CondEqual$new("cindex")) + + super$initialize( + id = "surv.gamboost", + param_set = ps, + feature_types = c("integer", "numeric", "factor", "logical"), + predict_types = c("distr", "crank", "lp"), + properties = c("weights", "importance", "selected_features"), + packages = c("mlr3extralearners", "mboost", "pracma"), + man = "mlr3extralearners::mlr_learners_surv.gamboost", + label = "Boosted Generalized Additive Model" + ) + }, + + #' @description + #' The importance scores are extracted with the function [mboost::varimp()] + #' with the default arguments. + #' @return Named `numeric()`. + importance = function() { + if (is.null(self$model)) { + stopf("No model stored") } - ), - - private = list( - .train = function(task) { - - # parameter custom.family takes precedence over family - if (!is.null(self$param_set$values$custom.family)) { - self$param_set$values$family = "custom" - } - - pars = self$param_set$get_values(tags = "train") - - if ("weights" %in% task$properties) { - pars$weights = task$weights$weight - } - - # Save control settings and return on exit - saved_ctrl = mboost::boost_control() - on.exit(invoke(mboost::boost_control, .args = saved_ctrl)) - is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) - - # ensure only relevant pars passed to fitted model - if (any(is_ctrl_pars)) { - pars$control = do.call(mboost::boost_control, pars[is_ctrl_pars]) - pars = pars[!is_ctrl_pars] - } - - family = switch(pars$family, - coxph = mboost::CoxPH(), - weibull = invoke(mboost::Weibull, - .args = pars[names(pars) %in% formalArgs(mboost::Weibull)]), - loglog = invoke(mboost::Loglog, - .args = pars[names(pars) %in% formalArgs(mboost::Loglog)]), - lognormal = invoke(mboost::Lognormal, - .args = pars[names(pars) %in% formalArgs(mboost::Lognormal)]), - gehan = mboost::Gehan(), - cindex = invoke(mboost::Cindex, - .args = pars[names(pars) %in% formalArgs(mboost::Cindex)]), - custom = pars$custom.family - ) - - # FIXME - until issue closes - pars = pars[!(names(pars) %in% formalArgs(mboost::Weibull))] - pars = pars[!(names(pars) %in% formalArgs(mboost::Cindex))] - pars = pars[!(names(pars) %in% c("family", "custom.family"))] - - - with_package("mboost", { - invoke(mboost::gamboost, - formula = task$formula(task$feature_names), - data = task$data(), family = family, .args = pars) - }) - }, - - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) - # predict linear predictor - lp = as.numeric(invoke(predict, self$model, newdata = newdata, type = "link", - .args = pars - )) - - # predict survival - if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { - survfit = invoke(mboost::survFit, self$model, newdata = newdata) - - mlr3proba::.surv_return(times = survfit$time, - surv = t(survfit$surv), - lp = lp) - } else { - mlr3proba::.surv_return(lp = -lp) - } - - - # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR - # response = NULL - # if (!is.null(self$param_set$values$family)) { - # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { - # response = exp(lp) - # } - # } + + vimp = as.numeric(mboost::varimp(self$model)) + names(vimp) = unname(stats::variable.names(self$model)) + + sort(vimp, decreasing = TRUE) + }, + + #' @description + #' Selected features are extracted with the function + #' [mboost::variable.names.mboost()], with + #' `used.only = TRUE`. + #' @return `character()`. + selected_features = function() { + if (is.null(self$model)) { + stopf("No model stored") + } + + unname(stats::variable.names(self$model, usedonly = TRUE)) + } + ), + + private = list( + .train = function(task) { + + # parameter custom.family takes precedence over family + if (!is.null(self$param_set$values$custom.family)) { + self$param_set$values$family = "custom" + } + + pars = self$param_set$get_values(tags = "train") + + if ("weights" %in% task$properties) { + pars$weights = task$weights$weight } - ) + + # Save control settings and return on exit + saved_ctrl = mboost::boost_control() + on.exit(invoke(mboost::boost_control, .args = saved_ctrl)) + is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) + + # ensure only relevant pars passed to fitted model + if (any(is_ctrl_pars)) { + pars$control = do.call(mboost::boost_control, pars[is_ctrl_pars]) + pars = pars[!is_ctrl_pars] + } + + family = switch(pars$family, + coxph = mboost::CoxPH(), + weibull = invoke(mboost::Weibull, + .args = pars[names(pars) %in% formalArgs(mboost::Weibull)]), + loglog = invoke(mboost::Loglog, + .args = pars[names(pars) %in% formalArgs(mboost::Loglog)]), + lognormal = invoke(mboost::Lognormal, + .args = pars[names(pars) %in% formalArgs(mboost::Lognormal)]), + gehan = mboost::Gehan(), + cindex = invoke(mboost::Cindex, + .args = pars[names(pars) %in% formalArgs(mboost::Cindex)]), + custom = pars$custom.family + ) + + # FIXME - until issue closes + pars = pars[!(names(pars) %in% formalArgs(mboost::Weibull))] + pars = pars[!(names(pars) %in% formalArgs(mboost::Cindex))] + pars = pars[!(names(pars) %in% c("family", "custom.family"))] + + + with_package("mboost", { + invoke(mboost::gamboost, + formula = task$formula(task$feature_names), + data = task$data(), family = family, .args = pars) + }) + }, + + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) + # predict linear predictor + lp = as.numeric(invoke(predict, self$model, newdata = newdata, type = "link", + .args = pars + )) + + # predict survival + if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { + survfit = invoke(mboost::survFit, self$model, newdata = newdata) + + mlr3proba::.surv_return(times = survfit$time, + surv = t(survfit$surv), + lp = lp) + } else { + mlr3proba::.surv_return(lp = -lp) + } + + + # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR + # response = NULL + # if (!is.null(self$param_set$values$family)) { + # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { + # response = exp(lp) + # } + # } + } ) ) - -.extralrns_dict$add("surv.gamboost", function() LearnerSurvGAMBoost$new()) +.extralrns_dict$add("surv.gamboost", LearnerSurvGAMBoost) diff --git a/R/learner_mboost_surv_glmboost.R b/R/learner_mboost_surv_glmboost.R index 95c32b926..56fe486b5 100644 --- a/R/learner_mboost_surv_glmboost.R +++ b/R/learner_mboost_surv_glmboost.R @@ -18,164 +18,161 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvGLMBoost", - R6Class("LearnerSurvGLMBoost", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - offset = p_dbl(tags = "train"), - family = p_fct(default = "coxph", - levels = c( - "coxph", "weibull", "loglog", "lognormal", "gehan", "cindex", - "custom"), tags = "train"), - custom.family = p_uty(tags = "train"), - nuirange = p_uty(default = c(0, 100), tags = "train"), - center = p_lgl(default = TRUE, tags = "train"), - mstop = p_int(default = 100L, lower = 0L, tags = "train"), - nu = p_dbl(default = 0.1, lower = 0, upper = 1, tags = "train"), - risk = p_fct(default = "inbag", - levels = c("inbag", "oobag", "none"), tags = "train"), - oobweights = p_uty(default = NULL, tags = "train"), - stopintern = p_lgl(default = FALSE, tags = "train"), - trace = p_lgl(default = FALSE, tags = "train"), - sigma = p_dbl(default = 0.1, lower = 0, upper = 1, - tags = "train"), - ipcw = p_uty(default = 1, tags = "train"), - na.action = p_uty(default = stats::na.omit, tags = "train"), - contrasts.arg = p_uty(tags = "train") - ) - - ps$values = list(family = "coxph") - ps$add_dep("sigma", "family", CondEqual$new("cindex")) - ps$add_dep("ipcw", "family", CondEqual$new("cindex")) - ps$add_dep("oobweights", "risk", CondEqual$new("oobag")) - - super$initialize( - id = "surv.glmboost", - param_set = ps, - feature_types = c("integer", "numeric", "factor", "logical"), - predict_types = c("distr", "crank", "lp"), - properties = "weights", - packages = c("mlr3extralearners", "mboost", "pracma"), - man = "mlr3extralearners::mlr_learners_surv.glmboost", - label = "Boosted Generalized Linear Model" - ) +LearnerSurvGLMBoost = R6Class("LearnerSurvGLMBoost", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + offset = p_dbl(tags = "train"), + family = p_fct(default = "coxph", + levels = c( + "coxph", "weibull", "loglog", "lognormal", "gehan", "cindex", + "custom"), tags = "train"), + custom.family = p_uty(tags = "train"), + nuirange = p_uty(default = c(0, 100), tags = "train"), + center = p_lgl(default = TRUE, tags = "train"), + mstop = p_int(default = 100L, lower = 0L, tags = "train"), + nu = p_dbl(default = 0.1, lower = 0, upper = 1, tags = "train"), + risk = p_fct(default = "inbag", + levels = c("inbag", "oobag", "none"), tags = "train"), + oobweights = p_uty(default = NULL, tags = "train"), + stopintern = p_lgl(default = FALSE, tags = "train"), + trace = p_lgl(default = FALSE, tags = "train"), + sigma = p_dbl(default = 0.1, lower = 0, upper = 1, + tags = "train"), + ipcw = p_uty(default = 1, tags = "train"), + na.action = p_uty(default = stats::na.omit, tags = "train"), + contrasts.arg = p_uty(tags = "train") + ) + + ps$values = list(family = "coxph") + ps$add_dep("sigma", "family", CondEqual$new("cindex")) + ps$add_dep("ipcw", "family", CondEqual$new("cindex")) + ps$add_dep("oobweights", "risk", CondEqual$new("oobag")) + + super$initialize( + id = "surv.glmboost", + param_set = ps, + feature_types = c("integer", "numeric", "factor", "logical"), + predict_types = c("distr", "crank", "lp"), + properties = "weights", + packages = c("mlr3extralearners", "mboost", "pracma"), + man = "mlr3extralearners::mlr_learners_surv.glmboost", + label = "Boosted Generalized Linear Model" + ) + } + + #' Importance is supported but fails tests as internally data + #' is coerced to model + #' matrix and original names can't be recovered. + #' + # importance = function() { + # if (is.null(self$model)) { + # stopf("No model stored") + # } + # + # sort(mboost::varimp(self$model)[-1], decreasing = TRUE) + # }, + + #' Importance is supported but fails tests as internally data + #' is coerced to model + #' matrix and original names can't be recovered. + #' + #' description + #' Selected features are extracted with the function + #' [mboost::variable.names.mboost()], with + #' `used.only = TRUE`. + #' return `character()`. + # selected_features = function() { + # if (is.null(self$model)) { + # stopf("No model stored") + # } + # + # sel = unique(names(self$model$model.frame())[self$model$xselect()]) + # sel = sel[!(sel %in% "(Intercept)")] + # + # return(sel) + # } + ), + + private = list( + .train = function(task) { + # parameter custom.family takes precedence over family + if (!is.null(self$param_set$values$custom.family)) { + self$param_set$values$family = "custom" } - #' Importance is supported but fails tests as internally data - #' is coerced to model - #' matrix and original names can't be recovered. - #' - # importance = function() { - # if (is.null(self$model)) { - # stopf("No model stored") - # } - # - # sort(mboost::varimp(self$model)[-1], decreasing = TRUE) - # }, - - #' Importance is supported but fails tests as internally data - #' is coerced to model - #' matrix and original names can't be recovered. - #' - #' description - #' Selected features are extracted with the function - #' [mboost::variable.names.mboost()], with - #' `used.only = TRUE`. - #' return `character()`. - # selected_features = function() { - # if (is.null(self$model)) { - # stopf("No model stored") + pars = self$param_set$get_values(tags = "train") + + saved_ctrl = mboost::boost_control() + on.exit(invoke(mboost::boost_control, .args = saved_ctrl)) + is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) + + # ensure only relevant pars passed to fitted model + if (any(is_ctrl_pars)) { + pars$control = do.call(mboost::boost_control, pars[is_ctrl_pars]) + pars = pars[!is_ctrl_pars] + } + + if ("weights" %in% task$properties) { + pars$weights = task$weights$weight + } + + family = switch(pars$family, + coxph = mboost::CoxPH(), + weibull = invoke(mboost::Weibull, + .args = pars[names(pars) %in% formalArgs(mboost::Weibull)]), + loglog = invoke(mboost::Loglog, + .args = pars[names(pars) %in% formalArgs(mboost::Loglog)]), + lognormal = invoke(mboost::Lognormal, + .args = pars[names(pars) %in% formalArgs(mboost::Lognormal)]), + gehan = mboost::Gehan(), + cindex = invoke(mboost::Cindex, + .args = pars[names(pars) %in% formalArgs(mboost::Cindex)]), + custom = pars$custom.family + ) + + # FIXME - until issue closes + pars = pars[!(names(pars) %in% formalArgs(mboost::Weibull))] + pars = pars[!(names(pars) %in% formalArgs(mboost::Cindex))] + pars = pars[!(names(pars) %in% c("family", "custom.family"))] + + invoke(mboost::glmboost, task$formula(task$feature_names), + data = task$data(), family = family, .args = pars) + }, + + .predict = function(task) { + newdata = ordered_features(task, self) + # predict linear predictor + pars = self$param_set$get_values(tags = "predict") + lp = as.numeric( + invoke(predict, self$model, newdata = newdata, type = "link", + .args = pars + )) + + # predict survival + if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { + survfit = invoke(mboost::survFit, self$model, newdata = newdata) + + mlr3proba::.surv_return(times = survfit$time, + surv = t(survfit$surv), + lp = lp) + } else { + mlr3proba::.surv_return(lp = -lp) + } + + + # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR + # response = NULL + # if (!is.null(self$param_set$values$family)) { + # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { + # response = exp(lp) # } - # - # sel = unique(names(self$model$model.frame())[self$model$xselect()]) - # sel = sel[!(sel %in% "(Intercept)")] - # - # return(sel) # } - ), - - private = list( - .train = function(task) { - # parameter custom.family takes precedence over family - if (!is.null(self$param_set$values$custom.family)) { - self$param_set$values$family = "custom" - } - - pars = self$param_set$get_values(tags = "train") - - saved_ctrl = mboost::boost_control() - on.exit(invoke(mboost::boost_control, .args = saved_ctrl)) - is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) - - # ensure only relevant pars passed to fitted model - if (any(is_ctrl_pars)) { - pars$control = do.call(mboost::boost_control, pars[is_ctrl_pars]) - pars = pars[!is_ctrl_pars] - } - - if ("weights" %in% task$properties) { - pars$weights = task$weights$weight - } - - family = switch(pars$family, - coxph = mboost::CoxPH(), - weibull = invoke(mboost::Weibull, - .args = pars[names(pars) %in% formalArgs(mboost::Weibull)]), - loglog = invoke(mboost::Loglog, - .args = pars[names(pars) %in% formalArgs(mboost::Loglog)]), - lognormal = invoke(mboost::Lognormal, - .args = pars[names(pars) %in% formalArgs(mboost::Lognormal)]), - gehan = mboost::Gehan(), - cindex = invoke(mboost::Cindex, - .args = pars[names(pars) %in% formalArgs(mboost::Cindex)]), - custom = pars$custom.family - ) - - # FIXME - until issue closes - pars = pars[!(names(pars) %in% formalArgs(mboost::Weibull))] - pars = pars[!(names(pars) %in% formalArgs(mboost::Cindex))] - pars = pars[!(names(pars) %in% c("family", "custom.family"))] - - invoke(mboost::glmboost, task$formula(task$feature_names), - data = task$data(), family = family, .args = pars) - }, - - .predict = function(task) { - newdata = ordered_features(task, self) - # predict linear predictor - pars = self$param_set$get_values(tags = "predict") - lp = as.numeric( - invoke(predict, self$model, newdata = newdata, type = "link", - .args = pars - )) - - # predict survival - if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { - survfit = invoke(mboost::survFit, self$model, newdata = newdata) - - mlr3proba::.surv_return(times = survfit$time, - surv = t(survfit$surv), - lp = lp) - } else { - mlr3proba::.surv_return(lp = -lp) - } - - - # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR - # response = NULL - # if (!is.null(self$param_set$values$family)) { - # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { - # response = exp(lp) - # } - # } - } - ) + } ) ) -.extralrns_dict$add("surv.glmboost", function() LearnerSurvGLMBoost$new()) +.extralrns_dict$add("surv.glmboost", LearnerSurvGLMBoost) diff --git a/R/learner_mboost_surv_mboost.R b/R/learner_mboost_surv_mboost.R index b5f6e8240..ad5f18f28 100644 --- a/R/learner_mboost_surv_mboost.R +++ b/R/learner_mboost_surv_mboost.R @@ -18,161 +18,158 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvMBoost", - R6Class("LearnerSurvMBoost", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - family = p_fct(default = "coxph", - levels = c( - "coxph", "weibull", "loglog", "lognormal", "gehan", "cindex", - "custom"), tags = c("train", "predict")), - custom.family = p_uty(tags = "train"), - nuirange = p_uty(default = c(0, 100), tags = "train"), - offset = p_dbl(tags = "train"), - center = p_lgl(default = TRUE, tags = "train"), - mstop = p_int(default = 100L, lower = 0L, tags = "train"), - nu = p_dbl(default = 0.1, lower = 0, upper = 1, tags = "train"), - risk = p_fct(default = "inbag", - levels = c("inbag", "oobag", "none"), tags = "train"), - stopintern = p_lgl(default = FALSE, tags = "train"), - trace = p_lgl(default = FALSE, tags = "train"), - oobweights = p_uty(default = NULL, tags = "train"), - baselearner = p_fct(default = "bbs", - levels = c("bbs", "bols", "btree"), tags = "train"), - sigma = p_dbl(default = 0.1, lower = 0, upper = 1, - tags = "train"), - ipcw = p_uty(default = 1, tags = "train"), - na.action = p_uty(default = stats::na.omit, tags = "train") - ) - - ps$values = list(family = "coxph") - ps$add_dep("sigma", "family", CondEqual$new("cindex")) - ps$add_dep("ipcw", "family", CondEqual$new("cindex")) - - super$initialize( - id = "surv.mboost", - param_set = ps, - feature_types = c("integer", "numeric", "factor", "logical"), - predict_types = c("distr", "crank", "lp"), - properties = c("weights", "importance", "selected_features"), - packages = c("mlr3extralearners", "mboost"), - man = "mlr3extralearners::mlr_learners_surv.mboost", - label = "Boosted Generalized Additive Model" - ) - }, - - #' @description - #' The importance scores are extracted with the function [mboost::varimp()] with the - #' default arguments. - #' @return Named `numeric()`. - importance = function() { - if (is.null(self$model)) { - stopf("No model stored") - } - - vimp = as.numeric(mboost::varimp(self$model)) - names(vimp) = unname(stats::variable.names(self$model)) - - sort(vimp, decreasing = TRUE) - }, - - #' @description - #' Selected features are extracted with the function [mboost::variable.names.mboost()], with - #' `used.only = TRUE`. - #' @return `character()`. - selected_features = function() { - if (is.null(self$model)) { - stopf("No model stored") - } - - unname(stats::variable.names(self$model, usedonly = TRUE)) +LearnerSurvMBoost = R6Class("LearnerSurvMBoost", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + family = p_fct(default = "coxph", + levels = c( + "coxph", "weibull", "loglog", "lognormal", "gehan", "cindex", + "custom"), tags = c("train", "predict")), + custom.family = p_uty(tags = "train"), + nuirange = p_uty(default = c(0, 100), tags = "train"), + offset = p_dbl(tags = "train"), + center = p_lgl(default = TRUE, tags = "train"), + mstop = p_int(default = 100L, lower = 0L, tags = "train"), + nu = p_dbl(default = 0.1, lower = 0, upper = 1, tags = "train"), + risk = p_fct(default = "inbag", + levels = c("inbag", "oobag", "none"), tags = "train"), + stopintern = p_lgl(default = FALSE, tags = "train"), + trace = p_lgl(default = FALSE, tags = "train"), + oobweights = p_uty(default = NULL, tags = "train"), + baselearner = p_fct(default = "bbs", + levels = c("bbs", "bols", "btree"), tags = "train"), + sigma = p_dbl(default = 0.1, lower = 0, upper = 1, + tags = "train"), + ipcw = p_uty(default = 1, tags = "train"), + na.action = p_uty(default = stats::na.omit, tags = "train") + ) + + ps$values = list(family = "coxph") + ps$add_dep("sigma", "family", CondEqual$new("cindex")) + ps$add_dep("ipcw", "family", CondEqual$new("cindex")) + + super$initialize( + id = "surv.mboost", + param_set = ps, + feature_types = c("integer", "numeric", "factor", "logical"), + predict_types = c("distr", "crank", "lp"), + properties = c("weights", "importance", "selected_features"), + packages = c("mlr3extralearners", "mboost"), + man = "mlr3extralearners::mlr_learners_surv.mboost", + label = "Boosted Generalized Additive Model" + ) + }, + + #' @description + #' The importance scores are extracted with the function [mboost::varimp()] with the + #' default arguments. + #' @return Named `numeric()`. + importance = function() { + if (is.null(self$model)) { + stopf("No model stored") } - ), - - private = list( - .train = function(task) { - - # parameter custom.family takes precedence over family - if (!is.null(self$param_set$values$custom.family)) { - self$param_set$values$family = "custom" - } - - pars = self$param_set$get_values(tags = "train") - - if ("weights" %in% task$properties) { - pars$weights = task$weights$weight - } - - # Save control settings and return on exit - saved_ctrl = mboost::boost_control() - on.exit(invoke(mboost::boost_control, .args = saved_ctrl)) - is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) - - # ensure only relevant pars passed to fitted model - if (any(is_ctrl_pars)) { - pars$control = do.call(mboost::boost_control, pars[is_ctrl_pars]) - pars = pars[!is_ctrl_pars] - } - - family = switch(pars$family, - coxph = mboost::CoxPH(), - weibull = invoke(mboost::Weibull, - .args = pars[names(pars) %in% formalArgs(mboost::Weibull)]), - loglog = invoke(mboost::Loglog, - .args = pars[names(pars) %in% formalArgs(mboost::Loglog)]), - lognormal = invoke(mboost::Lognormal, - .args = pars[names(pars) %in% formalArgs(mboost::Lognormal)]), - gehan = mboost::Gehan(), - cindex = invoke(mboost::Cindex, - .args = pars[names(pars) %in% formalArgs(mboost::Cindex)]), - custom = pars$custom.family - ) - - # FIXME - until issue closes - pars = pars[!(names(pars) %in% formalArgs(mboost::Weibull))] - pars = pars[!(names(pars) %in% formalArgs(mboost::Cindex))] - pars = pars[!(names(pars) %in% c("family", "custom.family"))] - - with_package("mboost", { - invoke(mboost::mboost, - formula = task$formula(task$feature_names), - data = task$data(), family = family, .args = pars) - }) - }, - - .predict = function(task) { - - newdata = ordered_features(task, self) - # predict linear predictor - lp = as.numeric(invoke(predict, self$model, newdata = newdata, type = "link")) - - # predict survival - if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { - survfit = invoke(mboost::survFit, self$model, newdata = newdata) - - mlr3proba::.surv_return(times = survfit$time, - surv = t(survfit$surv), - lp = lp) - } else { - mlr3proba::.surv_return(lp = -lp) - } - - - # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR - # response = NULL - # if (!is.null(self$param_set$values$family)) { - # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { - # response = exp(lp) - # } - # } + + vimp = as.numeric(mboost::varimp(self$model)) + names(vimp) = unname(stats::variable.names(self$model)) + + sort(vimp, decreasing = TRUE) + }, + + #' @description + #' Selected features are extracted with the function [mboost::variable.names.mboost()], with + #' `used.only = TRUE`. + #' @return `character()`. + selected_features = function() { + if (is.null(self$model)) { + stopf("No model stored") } - ) + + unname(stats::variable.names(self$model, usedonly = TRUE)) + } + ), + + private = list( + .train = function(task) { + + # parameter custom.family takes precedence over family + if (!is.null(self$param_set$values$custom.family)) { + self$param_set$values$family = "custom" + } + + pars = self$param_set$get_values(tags = "train") + + if ("weights" %in% task$properties) { + pars$weights = task$weights$weight + } + + # Save control settings and return on exit + saved_ctrl = mboost::boost_control() + on.exit(invoke(mboost::boost_control, .args = saved_ctrl)) + is_ctrl_pars = (names(pars) %in% names(saved_ctrl)) + + # ensure only relevant pars passed to fitted model + if (any(is_ctrl_pars)) { + pars$control = do.call(mboost::boost_control, pars[is_ctrl_pars]) + pars = pars[!is_ctrl_pars] + } + + family = switch(pars$family, + coxph = mboost::CoxPH(), + weibull = invoke(mboost::Weibull, + .args = pars[names(pars) %in% formalArgs(mboost::Weibull)]), + loglog = invoke(mboost::Loglog, + .args = pars[names(pars) %in% formalArgs(mboost::Loglog)]), + lognormal = invoke(mboost::Lognormal, + .args = pars[names(pars) %in% formalArgs(mboost::Lognormal)]), + gehan = mboost::Gehan(), + cindex = invoke(mboost::Cindex, + .args = pars[names(pars) %in% formalArgs(mboost::Cindex)]), + custom = pars$custom.family + ) + + # FIXME - until issue closes + pars = pars[!(names(pars) %in% formalArgs(mboost::Weibull))] + pars = pars[!(names(pars) %in% formalArgs(mboost::Cindex))] + pars = pars[!(names(pars) %in% c("family", "custom.family"))] + + with_package("mboost", { + invoke(mboost::mboost, + formula = task$formula(task$feature_names), + data = task$data(), family = family, .args = pars) + }) + }, + + .predict = function(task) { + + newdata = ordered_features(task, self) + # predict linear predictor + lp = as.numeric(invoke(predict, self$model, newdata = newdata, type = "link")) + + # predict survival + if (is.null(self$param_set$values$family) || self$param_set$values$family == "coxph") { + survfit = invoke(mboost::survFit, self$model, newdata = newdata) + + mlr3proba::.surv_return(times = survfit$time, + surv = t(survfit$surv), + lp = lp) + } else { + mlr3proba::.surv_return(lp = -lp) + } + + + # FIXME - RE-ADD ONCE INTERPRETATION IS CLEAR + # response = NULL + # if (!is.null(self$param_set$values$family)) { + # if (self$param_set$values$family %in% c("weibull", "loglog", "lognormal", "gehan")) { + # response = exp(lp) + # } + # } + } ) ) -.extralrns_dict$add("surv.mboost", function() LearnerSurvMBoost$new()) +.extralrns_dict$add("surv.mboost", LearnerSurvMBoost) diff --git a/R/learner_np_dens_mixed.R b/R/learner_np_dens_mixed.R index 437964395..c968ab0c8 100644 --- a/R/learner_np_dens_mixed.R +++ b/R/learner_np_dens_mixed.R @@ -15,92 +15,89 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerDensMixed", - R6Class("LearnerDensMixed", - inherit = mlr3proba::LearnerDens, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - bws = p_uty(tags = "train"), - ckertype = p_fct( - default = "gaussian", - levels = c("gaussian", "epanechnikov", "uniform"), - tags = c("train")), - bwscaling = p_lgl(default = FALSE, tags = "train"), - bwmethod = p_fct( - default = "cv.ml", - levels = c("cv.ml", "cv.ls", "normal-reference"), - tags = "train"), - bwtype = p_fct( - default = "fixed", - levels = c("fixed", "generalized_nn", "adaptive_nn"), - tags = "train"), - bandwidth.compute = p_lgl(default = FALSE, tags = "train"), - ckerorder = p_int(default = 2, lower = 2, upper = 8, tags = "train"), - remin = p_lgl(default = TRUE, tags = "train"), - itmax = p_int(lower = 1, default = 10000, tags = "train"), - nmulti = p_int(lower = 1, tags = "train"), - ftol = p_dbl(default = 1.490116e-07, tags = "train"), - tol = p_dbl(default = 1.490116e-04, tags = "train"), - small = p_dbl(default = 1.490116e-05, tags = "train"), - lbc.dir = p_dbl(default = 0.5, tags = "train"), - dfc.dir = p_dbl(default = 0.5, tags = "train"), - cfac.dir = p_uty(default = 2.5 * (3.0 - sqrt(5)), tags = "train"), - initc.dir = p_dbl(default = 1.0, tags = "train"), - lbd.dir = p_dbl(default = 0.1, tags = "train"), - hbd.dir = p_dbl(default = 1, tags = "train"), - dfac.dir = p_uty(default = 0.25 * (3.0 - sqrt(5)), tags = "train"), - initd.dir = p_dbl(default = 1.0, tags = "train"), - lbc.init = p_dbl(default = 0.1, tags = "train"), - hbc.init = p_dbl(default = 2.0, tags = "train"), - cfac.init = p_dbl(default = 0.5, tags = "train"), - lbd.init = p_dbl(default = 0.1, tags = "train"), - hbd.init = p_dbl(default = 0.9, tags = "train"), - dfac.init = p_dbl(default = 0.37, tags = "train"), - ukertype = p_fct(levels = c("aitchisonaitken", "liracine"), tags = "train"), - okertype = p_fct(levels = c("wangvanryzin", "liracine"), tags = "train") - ) +LearnerDensMixed = R6Class("LearnerDensMixed", + inherit = mlr3proba::LearnerDens, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + bws = p_uty(tags = "train"), + ckertype = p_fct( + default = "gaussian", + levels = c("gaussian", "epanechnikov", "uniform"), + tags = c("train")), + bwscaling = p_lgl(default = FALSE, tags = "train"), + bwmethod = p_fct( + default = "cv.ml", + levels = c("cv.ml", "cv.ls", "normal-reference"), + tags = "train"), + bwtype = p_fct( + default = "fixed", + levels = c("fixed", "generalized_nn", "adaptive_nn"), + tags = "train"), + bandwidth.compute = p_lgl(default = FALSE, tags = "train"), + ckerorder = p_int(default = 2, lower = 2, upper = 8, tags = "train"), + remin = p_lgl(default = TRUE, tags = "train"), + itmax = p_int(lower = 1, default = 10000, tags = "train"), + nmulti = p_int(lower = 1, tags = "train"), + ftol = p_dbl(default = 1.490116e-07, tags = "train"), + tol = p_dbl(default = 1.490116e-04, tags = "train"), + small = p_dbl(default = 1.490116e-05, tags = "train"), + lbc.dir = p_dbl(default = 0.5, tags = "train"), + dfc.dir = p_dbl(default = 0.5, tags = "train"), + cfac.dir = p_uty(default = 2.5 * (3.0 - sqrt(5)), tags = "train"), + initc.dir = p_dbl(default = 1.0, tags = "train"), + lbd.dir = p_dbl(default = 0.1, tags = "train"), + hbd.dir = p_dbl(default = 1, tags = "train"), + dfac.dir = p_uty(default = 0.25 * (3.0 - sqrt(5)), tags = "train"), + initd.dir = p_dbl(default = 1.0, tags = "train"), + lbc.init = p_dbl(default = 0.1, tags = "train"), + hbc.init = p_dbl(default = 2.0, tags = "train"), + cfac.init = p_dbl(default = 0.5, tags = "train"), + lbd.init = p_dbl(default = 0.1, tags = "train"), + hbd.init = p_dbl(default = 0.9, tags = "train"), + dfac.init = p_dbl(default = 0.37, tags = "train"), + ukertype = p_fct(levels = c("aitchisonaitken", "liracine"), tags = "train"), + okertype = p_fct(levels = c("wangvanryzin", "liracine"), tags = "train") + ) - super$initialize( - id = "dens.mixed", - packages = c("mlr3extralearners", "np"), - feature_types = c("integer", "numeric"), - predict_types = "pdf", - param_set = ps, - man = "mlr3extralearners::mlr_learners_dens.mixed", - label = "Kernel Density Estimator" - ) - } - ), + super$initialize( + id = "dens.mixed", + packages = c("mlr3extralearners", "np"), + feature_types = c("integer", "numeric"), + predict_types = "pdf", + param_set = ps, + man = "mlr3extralearners::mlr_learners_dens.mixed", + label = "Kernel Density Estimator" + ) + } + ), - private = list( - .train = function(task) { - pars = self$param_set$get_values(tags = "train") - data = task$data()[[1]] + private = list( + .train = function(task) { + pars = self$param_set$get_values(tags = "train") + data = task$data()[[1]] - pdf = function(x) {} # nolint - body(pdf) = substitute({ - with_package("np", invoke(np::npudens, - tdat = data.frame(data), - edat = data.frame(x), .args = pars)$dens) - }) + pdf = function(x) {} # nolint + body(pdf) = substitute({ + with_package("np", invoke(np::npudens, + tdat = data.frame(data), + edat = data.frame(x), .args = pars)$dens) + }) - kernel = if (is.null(pars$ckertype)) "gaussian" else pars$ckertype - distr6::Distribution$new( - name = paste("Mixed KDE", kernel), - short_name = paste0("MixedKDE_", kernel), - pdf = pdf, type = set6::Reals$new()) - }, + kernel = if (is.null(pars$ckertype)) "gaussian" else pars$ckertype + distr6::Distribution$new( + name = paste("Mixed KDE", kernel), + short_name = paste0("MixedKDE_", kernel), + pdf = pdf, type = set6::Reals$new()) + }, - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - invoke(list, pdf = self$model$pdf(task$data()[[1]]), .args = pars) - } - ) + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + invoke(list, pdf = self$model$pdf(task$data()[[1]]), .args = pars) + } ) ) -.extralrns_dict$add("dens.mixed", function() LearnerDensMixed$new()) +.extralrns_dict$add("dens.mixed", LearnerDensMixed) diff --git a/R/learner_obliqueRSF_surv_obliqueRSF.R b/R/learner_obliqueRSF_surv_obliqueRSF.R index 74d5fb474..280cf4e5a 100644 --- a/R/learner_obliqueRSF_surv_obliqueRSF.R +++ b/R/learner_obliqueRSF_surv_obliqueRSF.R @@ -25,90 +25,87 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvObliqueRSF", - R6Class("LearnerSurvObliqueRSF", - inherit = mlr3proba::LearnerSurv, +LearnerSurvObliqueRSF = R6Class("LearnerSurvObliqueRSF", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - alpha = p_dbl(default = 0.5, tags = "train"), - ntree = p_int(default = 100L, lower = 1L, tags = "train"), - eval_times = p_uty(tags = "train"), - min_events_to_split_node = p_int(default = 5L, lower = 1L, tags = "train"), - min_obs_to_split_node = p_int(default = 10L, lower = 1L, tags = "train"), - min_obs_in_leaf_node = p_int(default = 5L, lower = 1L, tags = "train"), - min_events_in_leaf_node = p_int(default = 1L, lower = 1L, tags = "train"), - nsplit = p_int(default = 25L, lower = 1, tags = "train"), - gamma = p_dbl(default = 0.5, lower = 1e-16, tags = "train"), - max_pval_to_split_node = p_dbl(lower = 0, upper = 1, default = 0.5, - tags = "train"), - mtry = p_int(lower = 1, tags = "train"), - mtry_ratio = p_dbl(0, 1, tags = "train"), - dfmax = p_int(lower = 1, tags = "train"), - use.cv = p_lgl(default = FALSE, tags = "train"), - verbose = p_lgl(default = TRUE, tags = "train"), - compute_oob_predictions = p_lgl(default = FALSE, tags = "train"), - random_seed = p_int(tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + alpha = p_dbl(default = 0.5, tags = "train"), + ntree = p_int(default = 100L, lower = 1L, tags = "train"), + eval_times = p_uty(tags = "train"), + min_events_to_split_node = p_int(default = 5L, lower = 1L, tags = "train"), + min_obs_to_split_node = p_int(default = 10L, lower = 1L, tags = "train"), + min_obs_in_leaf_node = p_int(default = 5L, lower = 1L, tags = "train"), + min_events_in_leaf_node = p_int(default = 1L, lower = 1L, tags = "train"), + nsplit = p_int(default = 25L, lower = 1, tags = "train"), + gamma = p_dbl(default = 0.5, lower = 1e-16, tags = "train"), + max_pval_to_split_node = p_dbl(lower = 0, upper = 1, default = 0.5, + tags = "train"), + mtry = p_int(lower = 1, tags = "train"), + mtry_ratio = p_dbl(0, 1, tags = "train"), + dfmax = p_int(lower = 1, tags = "train"), + use.cv = p_lgl(default = FALSE, tags = "train"), + verbose = p_lgl(default = TRUE, tags = "train"), + compute_oob_predictions = p_lgl(default = FALSE, tags = "train"), + random_seed = p_int(tags = "train") + ) - ps$values = list(verbose = FALSE) + ps$values = list(verbose = FALSE) - super$initialize( - id = "surv.obliqueRSF", - packages = c("mlr3extralearners", "obliqueRSF", "pracma"), - feature_types = c("integer", "numeric", "factor", "ordered"), - predict_types = c("crank", "distr"), - param_set = ps, - properties = c("missings", "oob_error"), - man = "mlr3extralearners::mlr_learners_surv.obliqueRSF", - label = "Oblique Random Forest" - ) - }, + super$initialize( + id = "surv.obliqueRSF", + packages = c("mlr3extralearners", "obliqueRSF", "pracma"), + feature_types = c("integer", "numeric", "factor", "ordered"), + predict_types = c("crank", "distr"), + param_set = ps, + properties = c("missings", "oob_error"), + man = "mlr3extralearners::mlr_learners_surv.obliqueRSF", + label = "Oblique Random Forest" + ) + }, - #' @description - #' Integrated brier score OOB error extracted from the model slot `oob_error`. - #' Concordance is also available. - #' @return `numeric()`. - oob_error = function() { - self$model$oob_error$integrated_briscr[2, ] - } - ), + #' @description + #' Integrated brier score OOB error extracted from the model slot `oob_error`. + #' Concordance is also available. + #' @return `numeric()`. + oob_error = function() { + self$model$oob_error$integrated_briscr[2, ] + } + ), - private = list( - .train = function(task) { - pv = self$param_set$get_values(tags = "train") - pv = convert_ratio(pv, "mtry", "mtry_ratio", length(task$feature_names)) - targets = task$target_names + private = list( + .train = function(task) { + pv = self$param_set$get_values(tags = "train") + pv = convert_ratio(pv, "mtry", "mtry_ratio", length(task$feature_names)) + targets = task$target_names - invoke( - obliqueRSF::ORSF, - data = data.table::setDF(task$data()), - time = targets[1L], - status = targets[2L], - .args = pv - ) - }, + invoke( + obliqueRSF::ORSF, + data = data.table::setDF(task$data()), + time = targets[1L], + status = targets[2L], + .args = pv + ) + }, - .predict = function(task) { + .predict = function(task) { - time = self$model$data[[task$target_names[1]]] - status = self$model$data[[task$target_names[2]]] - utime = unique(time[status == 1]) + time = self$model$data[[task$target_names[1]]] + status = self$model$data[[task$target_names[2]]] + utime = unique(time[status == 1]) - surv = mlr3misc::invoke(predict, - self$model, - newdata = ordered_features(task, self), - times = utime, - .args = self$param_set$get_values(tags = "predict")) + surv = mlr3misc::invoke(predict, + self$model, + newdata = ordered_features(task, self), + times = utime, + .args = self$param_set$get_values(tags = "predict")) - mlr3proba::.surv_return(times = utime, surv = surv) - } - ) + mlr3proba::.surv_return(times = utime, surv = surv) + } ) ) -.extralrns_dict$add("surv.obliqueRSF", function() LearnerSurvObliqueRSF$new()) +.extralrns_dict$add("surv.obliqueRSF", LearnerSurvObliqueRSF) diff --git a/R/learner_partykit_surv_cforest.R b/R/learner_partykit_surv_cforest.R index 1f891ba4a..4cd391545 100644 --- a/R/learner_partykit_surv_cforest.R +++ b/R/learner_partykit_surv_cforest.R @@ -17,142 +17,139 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvCForest", - R6Class("LearnerSurvCForest", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { +LearnerSurvCForest = R6Class("LearnerSurvCForest", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { - ps = ps( - ntree = p_int(default = 500L, lower = 1L, tags = "train"), - replace = p_lgl(default = FALSE, tags = "train"), - fraction = p_dbl(default = 0.632, lower = 0, upper = 1, - tags = "train"), - mtry = p_int(lower = 0L, special_vals = list(Inf), - tags = "train"), # default actually "ceiling(sqrt(nvar))" - mtryratio = p_dbl(lower = 0, upper = 1, tags = "train"), - applyfun = p_uty(tags = c("train", "importance")), - cores = p_int(default = NULL, special_vals = list(NULL), - tags = c("train", "importance", "threads")), - trace = p_lgl(default = FALSE, tags = "train"), - offset = p_uty(tags = "train"), - cluster = p_uty(tags = "train"), - na.action = p_uty(default = "stats::na.pass", tags = "train"), - scores = p_uty(tags = "train"), + ps = ps( + ntree = p_int(default = 500L, lower = 1L, tags = "train"), + replace = p_lgl(default = FALSE, tags = "train"), + fraction = p_dbl(default = 0.632, lower = 0, upper = 1, + tags = "train"), + mtry = p_int(lower = 0L, special_vals = list(Inf), + tags = "train"), # default actually "ceiling(sqrt(nvar))" + mtryratio = p_dbl(lower = 0, upper = 1, tags = "train"), + applyfun = p_uty(tags = c("train", "importance")), + cores = p_int(default = NULL, special_vals = list(NULL), + tags = c("train", "importance", "threads")), + trace = p_lgl(default = FALSE, tags = "train"), + offset = p_uty(tags = "train"), + cluster = p_uty(tags = "train"), + na.action = p_uty(default = "stats::na.pass", tags = "train"), + scores = p_uty(tags = "train"), - teststat = p_fct(default = "quadratic", - levels = c("quadratic", "maximum"), tags = "train"), - splitstat = p_fct(default = "quadratic", - levels = c("quadratic", "maximum"), tags = "train"), - splittest = p_lgl(default = FALSE, tags = "train"), - testtype = p_fct(default = "Univariate", - levels = c("Bonferroni", "MonteCarlo", "Univariate", "Teststatistic"), - tags = "train"), - nmax = p_uty(tags = "train"), - alpha = p_dbl(default = 0.05, lower = 0, upper = 1, - tags = "train"), - mincriterion = p_dbl(default = 0.95, lower = 0, upper = 1, - tags = "train"), - logmincriterion = p_dbl(default = log(0.95), tags = "train"), - minsplit = p_int(lower = 1L, default = 20L, tags = "train"), - minbucket = p_int(lower = 1L, default = 7L, tags = "train"), - minprob = p_dbl(default = 0.01, lower = 0, upper = 1, - tags = "train"), - stump = p_lgl(default = FALSE, tags = "train"), - lookahead = p_lgl(default = FALSE, tags = "train"), - MIA = p_lgl(default = FALSE, tags = "train"), - nresample = p_int(default = 9999L, lower = 1L, tags = "train"), - tol = p_dbl(default = sqrt(.Machine$double.eps), lower = 0, - tags = "train"), - maxsurrogate = p_int(default = 0L, lower = 0L, tags = "train"), - numsurrogate = p_lgl(default = FALSE, tags = "train"), - maxdepth = p_int(default = Inf, lower = 0L, - special_vals = list(Inf), tags = "train"), - multiway = p_lgl(default = FALSE, tags = "train"), - splittry = p_int(default = 2L, lower = 0L, tags = "train"), - intersplit = p_lgl(default = FALSE, tags = "train"), - majority = p_lgl(default = FALSE, tags = "train"), - caseweights = p_lgl(default = TRUE, tags = "train"), - saveinfo = p_lgl(default = FALSE, tags = "train"), - update = p_lgl(default = FALSE, tags = "train"), - splitflavour = p_fct(default = "ctree", - levels = c("ctree", "exhaustive"), tags = "train"), - maxvar = p_int(lower = 1L, tags = "train"), + teststat = p_fct(default = "quadratic", + levels = c("quadratic", "maximum"), tags = "train"), + splitstat = p_fct(default = "quadratic", + levels = c("quadratic", "maximum"), tags = "train"), + splittest = p_lgl(default = FALSE, tags = "train"), + testtype = p_fct(default = "Univariate", + levels = c("Bonferroni", "MonteCarlo", "Univariate", "Teststatistic"), + tags = "train"), + nmax = p_uty(tags = "train"), + alpha = p_dbl(default = 0.05, lower = 0, upper = 1, + tags = "train"), + mincriterion = p_dbl(default = 0.95, lower = 0, upper = 1, + tags = "train"), + logmincriterion = p_dbl(default = log(0.95), tags = "train"), + minsplit = p_int(lower = 1L, default = 20L, tags = "train"), + minbucket = p_int(lower = 1L, default = 7L, tags = "train"), + minprob = p_dbl(default = 0.01, lower = 0, upper = 1, + tags = "train"), + stump = p_lgl(default = FALSE, tags = "train"), + lookahead = p_lgl(default = FALSE, tags = "train"), + MIA = p_lgl(default = FALSE, tags = "train"), + nresample = p_int(default = 9999L, lower = 1L, tags = "train"), + tol = p_dbl(default = sqrt(.Machine$double.eps), lower = 0, + tags = "train"), + maxsurrogate = p_int(default = 0L, lower = 0L, tags = "train"), + numsurrogate = p_lgl(default = FALSE, tags = "train"), + maxdepth = p_int(default = Inf, lower = 0L, + special_vals = list(Inf), tags = "train"), + multiway = p_lgl(default = FALSE, tags = "train"), + splittry = p_int(default = 2L, lower = 0L, tags = "train"), + intersplit = p_lgl(default = FALSE, tags = "train"), + majority = p_lgl(default = FALSE, tags = "train"), + caseweights = p_lgl(default = TRUE, tags = "train"), + saveinfo = p_lgl(default = FALSE, tags = "train"), + update = p_lgl(default = FALSE, tags = "train"), + splitflavour = p_fct(default = "ctree", + levels = c("ctree", "exhaustive"), tags = "train"), + maxvar = p_int(lower = 1L, tags = "train"), - # predict; missing FUN and simplify (not needed here) - OOB = p_lgl(default = FALSE, tags = c("predict", "importance")), - simplify = p_lgl(default = TRUE, tags = "predict"), - scale = p_lgl(default = TRUE, tags = "predict"), + # predict; missing FUN and simplify (not needed here) + OOB = p_lgl(default = FALSE, tags = c("predict", "importance")), + simplify = p_lgl(default = TRUE, tags = "predict"), + scale = p_lgl(default = TRUE, tags = "predict"), - # importance; OOB see predict, applyfun, cores see train - # nperm = p_int(default = 1L, lower = 0L, tags = c("train", "importance")), - # risk = p_fct(default = "loglik", levels = c("loglik", "misclassification"), - # tags = c("train", "importance")), - # conditional = p_lgl(default = FALSE, tags = c("train", "importance")), - # threshold = p_dbl(default = 0.2, tags = c("train", "importance")), + # importance; OOB see predict, applyfun, cores see train + # nperm = p_int(default = 1L, lower = 0L, tags = c("train", "importance")), + # risk = p_fct(default = "loglik", levels = c("loglik", "misclassification"), + # tags = c("train", "importance")), + # conditional = p_lgl(default = FALSE, tags = c("train", "importance")), + # threshold = p_dbl(default = 0.2, tags = c("train", "importance")), - maxpts = p_int(default = 25000L, tags = "train"), - abseps = p_dbl(default = 0.001, lower = 0, tags = "train"), - releps = p_dbl(default = 0, lower = 0, tags = "train") - ) + maxpts = p_int(default = 25000L, tags = "train"), + abseps = p_dbl(default = 0.001, lower = 0, tags = "train"), + releps = p_dbl(default = 0, lower = 0, tags = "train") + ) - ps$add_dep("nresample", on = "testtype", - cond = CondEqual$new("MonteCarlo")) - # ps$add_dep("nperm", on = "conditional", cond = CondEqual$new(TRUE)) - # ps$add_dep("threshold", on = "conditional", cond = CondEqual$new(TRUE)) + ps$add_dep("nresample", on = "testtype", + cond = CondEqual$new("MonteCarlo")) + # ps$add_dep("nperm", on = "conditional", cond = CondEqual$new(TRUE)) + # ps$add_dep("threshold", on = "conditional", cond = CondEqual$new(TRUE)) - # set the cforest specific ctree_control parameters - ps$values$teststat = "quadratic" - ps$values$testtype = "Univariate" - ps$values$mincriterion = 0 - ps$values$saveinfo = FALSE + # set the cforest specific ctree_control parameters + ps$values$teststat = "quadratic" + ps$values$testtype = "Univariate" + ps$values$mincriterion = 0 + ps$values$saveinfo = FALSE - super$initialize( - id = "surv.cforest", - param_set = ps, - # can also predict weights, node, but not really useful here - predict_types = c("distr", "crank"), - feature_types = c("integer", "numeric", "factor", "ordered"), - properties = c("weights"), - packages = c("mlr3extralearners", "partykit", "sandwich", "coin"), - man = "mlr3extralearners::mlr_learners_surv.cforest", - label = "Conditional Random Forest" - ) - } - ), + super$initialize( + id = "surv.cforest", + param_set = ps, + # can also predict weights, node, but not really useful here + predict_types = c("distr", "crank"), + feature_types = c("integer", "numeric", "factor", "ordered"), + properties = c("weights"), + packages = c("mlr3extralearners", "partykit", "sandwich", "coin"), + man = "mlr3extralearners::mlr_learners_surv.cforest", + label = "Conditional Random Forest" + ) + } + ), - private = list( - .train = function(task) { - train_cforest(self, task) - }, + private = list( + .train = function(task) { + train_cforest(self, task) + }, - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) - preds = invoke(predict, object = self$model, newdata = newdata, - type = "prob", .args = pars) + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) + preds = invoke(predict, object = self$model, newdata = newdata, + type = "prob", .args = pars) - # Define WeightedDiscrete distr6 distribution from the survival function - x = lapply(preds, function(z) { - time = c(0, z$time, max(z$time) + 1e-3) - surv = c(1, z$surv, 0) - data.frame(x = time, cdf = 1 - surv) - }) - distr = distr6::VectorDistribution$new( - distribution = "WeightedDiscrete", - params = x, - decorators = c("CoreStatistics", "ExoticStatistics")) + # Define WeightedDiscrete distr6 distribution from the survival function + x = lapply(preds, function(z) { + time = c(0, z$time, max(z$time) + 1e-3) + surv = c(1, z$surv, 0) + data.frame(x = time, cdf = 1 - surv) + }) + distr = distr6::VectorDistribution$new( + distribution = "WeightedDiscrete", + params = x, + decorators = c("CoreStatistics", "ExoticStatistics")) - # Define crank as the mean of the survival distribution - crank = -vapply(x, function(z) sum(z[, 1] * c(z[, 2][1], diff(z[, 2]))), numeric(1)) + # Define crank as the mean of the survival distribution + crank = -vapply(x, function(z) sum(z[, 1] * c(z[, 2][1], diff(z[, 2]))), numeric(1)) - list(crank = crank, distr = distr) - } - ) + list(crank = crank, distr = distr) + } ) ) -.extralrns_dict$add("surv.cforest", function() LearnerSurvCForest$new()) +.extralrns_dict$add("surv.cforest", LearnerSurvCForest) diff --git a/R/learner_partykit_surv_ctree.R b/R/learner_partykit_surv_ctree.R index a78558809..249991ae7 100644 --- a/R/learner_partykit_surv_ctree.R +++ b/R/learner_partykit_surv_ctree.R @@ -16,124 +16,121 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvCTree", - R6Class("LearnerSurvCTree", - inherit = mlr3proba::LearnerSurv, - public = list( +LearnerSurvCTree = R6Class("LearnerSurvCTree", + inherit = mlr3proba::LearnerSurv, + public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - teststat = p_fct(levels = c("quadratic", "maximum"), - default = "quadratic", tags = "train"), - splitstat = p_fct(levels = c("quadratic", "maximum"), - default = "quadratic", tags = "train"), - splittest = p_lgl(default = FALSE, tags = "train"), - testtype = p_fct(levels = c("Bonferroni", "MonteCarlo", - "Univariate", "Teststatistic"), default = "Bonferroni", + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + teststat = p_fct(levels = c("quadratic", "maximum"), + default = "quadratic", tags = "train"), + splitstat = p_fct(levels = c("quadratic", "maximum"), + default = "quadratic", tags = "train"), + splittest = p_lgl(default = FALSE, tags = "train"), + testtype = p_fct(levels = c("Bonferroni", "MonteCarlo", + "Univariate", "Teststatistic"), default = "Bonferroni", + tags = "train"), + nmax = p_uty(tags = "train"), + alpha = p_dbl(lower = 0, upper = 1, default = 0.05, tags = "train"), - nmax = p_uty(tags = "train"), - alpha = p_dbl(lower = 0, upper = 1, default = 0.05, - tags = "train"), - mincriterion = p_dbl(lower = 0, upper = 1, default = 0.95, - tags = "train"), - logmincriterion = p_dbl(tags = "train"), - minsplit = p_int(lower = 1L, default = 20L, tags = "train"), - minbucket = p_int(lower = 1L, default = 7L, tags = "train"), - minprob = p_dbl(lower = 0, default = 0.01, tags = "train"), - stump = p_lgl(default = FALSE, tags = "train"), - lookahead = p_lgl(default = FALSE, tags = "train"), - MIA = p_lgl(default = FALSE, tags = "train"), - nresample = p_int(lower = 1L, default = 9999L, - tags = "train"), - tol = p_dbl(lower = 0, tags = "train"), - maxsurrogate = p_int(lower = 0L, default = 0L, - tags = "train"), - numsurrogate = p_lgl(default = FALSE, tags = "train"), - mtry = p_int(lower = 0L, special_vals = list(Inf), - default = Inf, tags = "train"), - maxdepth = p_int(lower = 0L, special_vals = list(Inf), - default = Inf, tags = "train"), - maxvar = p_int(lower = 1L, tags = "train"), - multiway = p_lgl(default = FALSE, tags = "train"), - splittry = p_int(lower = 0L, default = 2L, tags = "train"), - intersplit = p_lgl(default = FALSE, tags = "train"), - majority = p_lgl(default = FALSE, tags = "train"), - caseweights = p_lgl(default = FALSE, tags = "train"), - applyfun = p_uty(tags = "train"), - cores = p_int(special_vals = list(NULL), default = NULL, - tags = c("train", "threads")), - saveinfo = p_lgl(default = TRUE, tags = "train"), - update = p_lgl(default = FALSE, tags = "train"), - splitflavour = p_fct(default = "ctree", # goes into control - levels = c("ctree", "exhaustive"), tags = "train"), - offset = p_uty(tags = "train"), - cluster = p_uty(tags = "train"), - scores = p_uty(tags = "train"), - doFit = p_lgl(default = TRUE, tags = "train"), - maxpts = p_int(default = 25000L, tags = "train"), - abseps = p_dbl(default = 0.001, lower = 0, tags = "train"), - releps = p_dbl(default = 0, lower = 0, tags = "train") - ) + mincriterion = p_dbl(lower = 0, upper = 1, default = 0.95, + tags = "train"), + logmincriterion = p_dbl(tags = "train"), + minsplit = p_int(lower = 1L, default = 20L, tags = "train"), + minbucket = p_int(lower = 1L, default = 7L, tags = "train"), + minprob = p_dbl(lower = 0, default = 0.01, tags = "train"), + stump = p_lgl(default = FALSE, tags = "train"), + lookahead = p_lgl(default = FALSE, tags = "train"), + MIA = p_lgl(default = FALSE, tags = "train"), + nresample = p_int(lower = 1L, default = 9999L, + tags = "train"), + tol = p_dbl(lower = 0, tags = "train"), + maxsurrogate = p_int(lower = 0L, default = 0L, + tags = "train"), + numsurrogate = p_lgl(default = FALSE, tags = "train"), + mtry = p_int(lower = 0L, special_vals = list(Inf), + default = Inf, tags = "train"), + maxdepth = p_int(lower = 0L, special_vals = list(Inf), + default = Inf, tags = "train"), + maxvar = p_int(lower = 1L, tags = "train"), + multiway = p_lgl(default = FALSE, tags = "train"), + splittry = p_int(lower = 0L, default = 2L, tags = "train"), + intersplit = p_lgl(default = FALSE, tags = "train"), + majority = p_lgl(default = FALSE, tags = "train"), + caseweights = p_lgl(default = FALSE, tags = "train"), + applyfun = p_uty(tags = "train"), + cores = p_int(special_vals = list(NULL), default = NULL, + tags = c("train", "threads")), + saveinfo = p_lgl(default = TRUE, tags = "train"), + update = p_lgl(default = FALSE, tags = "train"), + splitflavour = p_fct(default = "ctree", # goes into control + levels = c("ctree", "exhaustive"), tags = "train"), + offset = p_uty(tags = "train"), + cluster = p_uty(tags = "train"), + scores = p_uty(tags = "train"), + doFit = p_lgl(default = TRUE, tags = "train"), + maxpts = p_int(default = 25000L, tags = "train"), + abseps = p_dbl(default = 0.001, lower = 0, tags = "train"), + releps = p_dbl(default = 0, lower = 0, tags = "train") + ) - ps$add_dep("nresample", "testtype", CondEqual$new("MonteCarlo")) + ps$add_dep("nresample", "testtype", CondEqual$new("MonteCarlo")) - super$initialize( - id = "surv.ctree", - packages = c("mlr3extralearners", "partykit", "coin", "sandwich"), - feature_types = c("integer", "numeric", "factor", "ordered"), - predict_types = c("distr", "crank"), - param_set = ps, - properties = "weights", - man = "mlr3extralearners::mlr_learners_surv.ctree", - label = "Conditional Inference Tree" - ) - } - ), + super$initialize( + id = "surv.ctree", + packages = c("mlr3extralearners", "partykit", "coin", "sandwich"), + feature_types = c("integer", "numeric", "factor", "ordered"), + predict_types = c("distr", "crank"), + param_set = ps, + properties = "weights", + man = "mlr3extralearners::mlr_learners_surv.ctree", + label = "Conditional Inference Tree" + ) + } + ), - private = list( - .train = function(task) { - pars = self$param_set$get_values(tags = "train") - pars_pargs = pars[names(pars) %in% formalArgs(mvtnorm::GenzBretz)] - pars = pars[names(pars) %nin% formalArgs(mvtnorm::GenzBretz)] + private = list( + .train = function(task) { + pars = self$param_set$get_values(tags = "train") + pars_pargs = pars[names(pars) %in% formalArgs(mvtnorm::GenzBretz)] + pars = pars[names(pars) %nin% formalArgs(mvtnorm::GenzBretz)] - if ("weights" %in% task$properties) { - pars$weights = task$weights$weight - } + if ("weights" %in% task$properties) { + pars$weights = task$weights$weight + } - pars$pargs = invoke(mvtnorm::GenzBretz, pars_pargs) + pars$pargs = invoke(mvtnorm::GenzBretz, pars_pargs) - invoke(partykit::ctree, formula = task$formula(), - data = task$data(), .args = pars) - }, + invoke(partykit::ctree, formula = task$formula(), + data = task$data(), .args = pars) + }, - .predict = function(task) { - newdata = ordered_features(task, self) - pars = self$param_set$get_values(tags = "predict") - preds = invoke(predict, self$model, type = "prob", newdata = newdata, - .args = pars - ) + .predict = function(task) { + newdata = ordered_features(task, self) + pars = self$param_set$get_values(tags = "predict") + preds = invoke(predict, self$model, type = "prob", newdata = newdata, + .args = pars + ) - # Define WeightedDiscrete distr6 distribution from the survival function - x = lapply(preds, function(z) { - time = c(0, z$time, max(z$time) + 1e-3) - surv = c(1, z$surv, 0) - data.frame(x = time, cdf = 1 - surv) - }) - distr = distr6::VectorDistribution$new( - distribution = "WeightedDiscrete", - params = x, - decorators = c("CoreStatistics", "ExoticStatistics")) + # Define WeightedDiscrete distr6 distribution from the survival function + x = lapply(preds, function(z) { + time = c(0, z$time, max(z$time) + 1e-3) + surv = c(1, z$surv, 0) + data.frame(x = time, cdf = 1 - surv) + }) + distr = distr6::VectorDistribution$new( + distribution = "WeightedDiscrete", + params = x, + decorators = c("CoreStatistics", "ExoticStatistics")) - # Define crank as the mean of the survival distribution - crank = -vapply(x, function(z) sum(z[, 1] * c(z[, 2][1], diff(z[, 2]))), numeric(1)) + # Define crank as the mean of the survival distribution + crank = -vapply(x, function(z) sum(z[, 1] * c(z[, 2][1], diff(z[, 2]))), numeric(1)) - list(crank = crank, distr = distr) - } - ) + list(crank = crank, distr = distr) + } ) ) -.extralrns_dict$add("surv.ctree", function() LearnerSurvCTree$new()) +.extralrns_dict$add("surv.ctree", LearnerSurvCTree) diff --git a/R/learner_penalized_surv_penalized.R b/R/learner_penalized_surv_penalized.R index 3fe7de491..1da9c4536 100644 --- a/R/learner_penalized_surv_penalized.R +++ b/R/learner_penalized_surv_penalized.R @@ -21,92 +21,89 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvPenalized", - R6Class("LearnerSurvPenalized", - inherit = mlr3proba::LearnerSurv, +LearnerSurvPenalized = R6Class("LearnerSurvPenalized", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - unpenalized = p_uty(tags = c("train", "predict")), - lambda1 = p_uty(default = 0, tags = "train"), - lambda2 = p_uty(default = 0, tags = "train"), - positive = p_lgl(default = FALSE, tags = "train"), - fusedl = p_lgl(default = FALSE, tags = "train"), - startbeta = p_dbl(tags = "train"), - startgamma = p_dbl(tags = "train"), - steps = p_int(lower = 1L, default = 1L, tags = "train"), - epsilon = p_dbl(default = 1.0e-10, lower = 0, upper = 1, tags = "train"), - maxiter = p_int(lower = 1, tags = "train"), - standardize = p_lgl(default = FALSE, tags = "train"), - trace = p_lgl(default = TRUE, tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + unpenalized = p_uty(tags = c("train", "predict")), + lambda1 = p_uty(default = 0, tags = "train"), + lambda2 = p_uty(default = 0, tags = "train"), + positive = p_lgl(default = FALSE, tags = "train"), + fusedl = p_lgl(default = FALSE, tags = "train"), + startbeta = p_dbl(tags = "train"), + startgamma = p_dbl(tags = "train"), + steps = p_int(lower = 1L, default = 1L, tags = "train"), + epsilon = p_dbl(default = 1.0e-10, lower = 0, upper = 1, tags = "train"), + maxiter = p_int(lower = 1, tags = "train"), + standardize = p_lgl(default = FALSE, tags = "train"), + trace = p_lgl(default = TRUE, tags = "train") + ) - super$initialize( - id = "surv.penalized", - packages = c("mlr3extralearners", "penalized", "pracma"), - feature_types = c("integer", "numeric", "factor", "logical"), - predict_types = c("distr", "crank"), - param_set = ps, - man = "mlr3extralearners::mlr_learners_surv.penalized", - label = "Penalized Regression" - ) - } - ), + super$initialize( + id = "surv.penalized", + packages = c("mlr3extralearners", "penalized", "pracma"), + feature_types = c("integer", "numeric", "factor", "logical"), + predict_types = c("distr", "crank"), + param_set = ps, + man = "mlr3extralearners::mlr_learners_surv.penalized", + label = "Penalized Regression" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - # Checks missing data early to prevent crashing, which is not caught earlier by task/train + # Checks missing data early to prevent crashing, which is not caught earlier by task/train - if (any(task$missings() > 0)) { - stop("Missing data is not supported by ", self$id) - } + if (any(task$missings() > 0)) { + stop("Missing data is not supported by ", self$id) + } - # Changes the structure of the penalized and unpenalized parameters to be more user friendly. - # Now the user supplies the column names as a vector and these are added to the formula as - # required. - pars = self$param_set$get_values(tags = "train") - if (length(pars$unpenalized) == 0) { - penalized = formulate(rhs = task$feature_names) - } else { - penalized = formulate(rhs = task$feature_names[task$feature_names %nin% pars$unpenalized]) - pars$unpenalized = formulate(rhs = pars$unpenalized) - } + # Changes the structure of the penalized and unpenalized parameters to be more user friendly. + # Now the user supplies the column names as a vector and these are added to the formula as + # required. + pars = self$param_set$get_values(tags = "train") + if (length(pars$unpenalized) == 0) { + penalized = formulate(rhs = task$feature_names) + } else { + penalized = formulate(rhs = task$feature_names[task$feature_names %nin% pars$unpenalized]) + pars$unpenalized = formulate(rhs = pars$unpenalized) + } - with_package("penalized", { - invoke(penalized::penalized, - response = task$truth(), penalized = penalized, - data = task$data(cols = task$feature_names), model = "cox", .args = pars) - }) - }, + with_package("penalized", { + invoke(penalized::penalized, + response = task$truth(), penalized = penalized, + data = task$data(cols = task$feature_names), model = "cox", .args = pars) + }) + }, - .predict = function(task) { - # Again the penalized and unpenalized covariates are automatically converted to the - # correct formula + .predict = function(task) { + # Again the penalized and unpenalized covariates are automatically converted to the + # correct formula - pars = self$param_set$get_values(tags = "predict") - if (length(pars$unpenalized) == 0) { - penalized = formulate(rhs = task$feature_names) - } else { - penalized = formulate(rhs = task$feature_names[task$feature_names %nin% pars$unpenalized]) - pars$unpenalized = formulate(rhs = pars$unpenalized) - } + pars = self$param_set$get_values(tags = "predict") + if (length(pars$unpenalized) == 0) { + penalized = formulate(rhs = task$feature_names) + } else { + penalized = formulate(rhs = task$feature_names[task$feature_names %nin% pars$unpenalized]) + pars$unpenalized = formulate(rhs = pars$unpenalized) + } - surv = with_package("penalized", { - invoke(penalized::predict, self$model, - penalized = penalized, - data = ordered_features(task, self), - .args = pars) - }) + surv = with_package("penalized", { + invoke(penalized::predict, self$model, + penalized = penalized, + data = ordered_features(task, self), + .args = pars) + }) - mlr3proba::.surv_return(times = surv@time, surv = surv@curves) - } - ) + mlr3proba::.surv_return(times = surv@time, surv = surv@curves) + } ) ) -.extralrns_dict$add("surv.penalized", function() LearnerSurvPenalized$new()) +.extralrns_dict$add("surv.penalized", LearnerSurvPenalized) diff --git a/R/learner_pendensity_dens_pen.R b/R/learner_pendensity_dens_pen.R index 540085b28..6fb04d946 100644 --- a/R/learner_pendensity_dens_pen.R +++ b/R/learner_pendensity_dens_pen.R @@ -15,80 +15,77 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerDensPenalized", - R6Class("LearnerDensPenalized", - inherit = mlr3proba::LearnerDens, +LearnerDensPenalized = R6Class("LearnerDensPenalized", + inherit = mlr3proba::LearnerDens, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - base = p_fct(default = "bspline", - levels = c("bspline", "gaussian"), tags = "train"), - no.base = p_dbl(default = 41, tags = "train"), - max.iter = p_dbl(default = 20, tags = "train"), - lambda0 = p_dbl(default = 500, tags = "train"), - q = p_dbl(default = 3, tags = "train"), - sort = p_lgl(default = TRUE, tags = "train"), - with.border = p_uty(tags = "train"), - m = p_dbl(default = 3, tags = "train"), - eps = p_dbl(default = 0.01, tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + base = p_fct(default = "bspline", + levels = c("bspline", "gaussian"), tags = "train"), + no.base = p_dbl(default = 41, tags = "train"), + max.iter = p_dbl(default = 20, tags = "train"), + lambda0 = p_dbl(default = 500, tags = "train"), + q = p_dbl(default = 3, tags = "train"), + sort = p_lgl(default = TRUE, tags = "train"), + with.border = p_uty(tags = "train"), + m = p_dbl(default = 3, tags = "train"), + eps = p_dbl(default = 0.01, tags = "train") + ) - super$initialize( - id = "dens.pen", - packages = c("mlr3extralearners", "pendensity"), - feature_types = c("integer", "numeric"), - predict_types = c("pdf", "cdf"), - param_set = ps, - man = "mlr3extralearners::mlr_learners_dens.pen", - label = "Penalized Density Estimation" - ) - } - ), - - private = list( - .train = function(task) { + super$initialize( + id = "dens.pen", + packages = c("mlr3extralearners", "pendensity"), + feature_types = c("integer", "numeric"), + predict_types = c("pdf", "cdf"), + param_set = ps, + man = "mlr3extralearners::mlr_learners_dens.pen", + label = "Penalized Density Estimation" + ) + } + ), - pars = self$param_set$get_values(tags = "train") - fit = invoke(pendensity::pendensity, - form = task$data()[[1]] ~ 1, - .args = pars) + private = list( + .train = function(task) { - pdf = function(x) {} # nolint - body(pdf) = substitute({ - invoke(pendensity::dpendensity, - x = fit, - val = x) - }) + pars = self$param_set$get_values(tags = "train") + fit = invoke(pendensity::pendensity, + form = task$data()[[1]] ~ 1, + .args = pars) - cdf = function(x) {} # nolint - body(cdf) = substitute({ - invoke(pendensity::ppendensity, - x = fit, - val = x) - }) + pdf = function(x) {} # nolint + body(pdf) = substitute({ + invoke(pendensity::dpendensity, + x = fit, + val = x) + }) - base = if (is.null(pars$base)) { - "gaussian" - } else { - pars$base - } - distr6::Distribution$new( - name = paste("Penalized Density", base), - short_name = paste0("PenDens_", base), - pdf = pdf, cdf = cdf, type = set6::Reals$new()) - }, + cdf = function(x) {} # nolint + body(cdf) = substitute({ + invoke(pendensity::ppendensity, + x = fit, + val = x) + }) - .predict = function(task) { - newdata = task$data()[[1]] - pars = self$param_set$get_values(tags = "predict") - invoke(list, pdf = self$model$pdf(newdata), cdf = self$model$pdf(newdata), .args = pars) + base = if (is.null(pars$base)) { + "gaussian" + } else { + pars$base } - ) + distr6::Distribution$new( + name = paste("Penalized Density", base), + short_name = paste0("PenDens_", base), + pdf = pdf, cdf = cdf, type = set6::Reals$new()) + }, + + .predict = function(task) { + newdata = task$data()[[1]] + pars = self$param_set$get_values(tags = "predict") + invoke(list, pdf = self$model$pdf(newdata), cdf = self$model$pdf(newdata), .args = pars) + } ) ) -.extralrns_dict$add("dens.pen", function() LearnerDensPenalized$new()) +.extralrns_dict$add("dens.pen", LearnerDensPenalized) diff --git a/R/learner_plugdensity_dens_plug.R b/R/learner_plugdensity_dens_plug.R index 1a016191c..7132c45ad 100644 --- a/R/learner_plugdensity_dens_plug.R +++ b/R/learner_plugdensity_dens_plug.R @@ -15,52 +15,49 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerDensPlugin", - R6Class("LearnerDensPlugin", - inherit = mlr3proba::LearnerDens, +LearnerDensPlugin = R6Class("LearnerDensPlugin", + inherit = mlr3proba::LearnerDens, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - na.rm = p_lgl(default = FALSE, tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + na.rm = p_lgl(default = FALSE, tags = "train") + ) - super$initialize( - id = "dens.plug", - packages = c("mlr3extralearners", "plugdensity"), - feature_types = "numeric", - predict_types = "pdf", - param_set = ps, - properties = "missings", - man = "mlr3extralearners::mlr_learners_dens.plug", - label = "Kernel Density Estimator" - ) - } - ), + super$initialize( + id = "dens.plug", + packages = c("mlr3extralearners", "plugdensity"), + feature_types = "numeric", + predict_types = "pdf", + param_set = ps, + properties = "missings", + man = "mlr3extralearners::mlr_learners_dens.plug", + label = "Kernel Density Estimator" + ) + } + ), - private = list( - .train = function(task) { - pdf = function(x) {} # nolint - body(pdf) = substitute({ - invoke(plugdensity::plugin.density, x = data, xout = x, na.rm = TRUE)$y - }, list(data = task$data()[[1]])) + private = list( + .train = function(task) { + pdf = function(x) {} # nolint + body(pdf) = substitute({ + invoke(plugdensity::plugin.density, x = data, xout = x, na.rm = TRUE)$y + }, list(data = task$data()[[1]])) - distr6::Distribution$new( - name = "Plugin KDE", - short_name = "PluginKDE", - pdf = pdf, - type = set6::Reals$new()) - }, + distr6::Distribution$new( + name = "Plugin KDE", + short_name = "PluginKDE", + pdf = pdf, + type = set6::Reals$new()) + }, - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - invoke(list, pdf = self$model$pdf(task$data()[[1]]), .args = pars) - } - ) + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + invoke(list, pdf = self$model$pdf(task$data()[[1]]), .args = pars) + } ) ) -.extralrns_dict$add("dens.plug", function() LearnerDensPlugin$new()) +.extralrns_dict$add("dens.plug", LearnerDensPlugin) diff --git a/R/learner_randomForestSRC_surv_rfsrc.R b/R/learner_randomForestSRC_surv_rfsrc.R index 9044eb5bd..037307912 100644 --- a/R/learner_randomForestSRC_surv_rfsrc.R +++ b/R/learner_randomForestSRC_surv_rfsrc.R @@ -24,167 +24,164 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvRandomForestSRC", - R6Class("LearnerSurvRandomForestSRC", - inherit = mlr3proba::LearnerSurv, - - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - ntree = p_int(default = 1000, lower = 1L, tags = "train"), - mtry = p_int(lower = 1L, tags = "train"), - mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"), - nodesize = p_int(default = 15L, lower = 1L, tags = "train"), - nodedepth = p_int(lower = 1L, tags = "train"), - splitrule = p_fct( - levels = c("logrank", "bs.gradient", "logrankscore"), - default = "logrank", tags = "train"), - nsplit = p_int(lower = 0, default = 10, tags = "train"), - importance = p_fct( - default = "FALSE", - levels = c("FALSE", "TRUE", "none", "permute", "random", "anti"), - tags = c("train", "predict")), - block.size = p_int(default = 10L, lower = 1L, tags = c("train", "predict")), - bootstrap = p_fct( - default = "by.root", - levels = c("by.root", "by.node", "none", "by.user"), tags = "train"), - samptype = p_fct( - default = "swor", levels = c("swor", "swr"), - tags = "train"), - samp = p_uty(tags = "train"), - membership = p_lgl(default = FALSE, tags = c("train", "predict")), - sampsize = p_uty(tags = "train"), - sampsize.ratio = p_dbl(0, 1, tags = "train"), - na.action = p_fct( - default = "na.omit", levels = c("na.omit", "na.impute"), - tags = c("train", "predict")), - nimpute = p_int(default = 1L, lower = 1L, tags = "train"), - ntime = p_int(lower = 1L, tags = "train"), - cause = p_int(lower = 1L, tags = "train"), - proximity = p_fct( - default = "FALSE", - levels = c("FALSE", "TRUE", "inbag", "oob", "all"), - tags = c("train", "predict")), - distance = p_fct( - default = "FALSE", - levels = c("FALSE", "TRUE", "inbag", "oob", "all"), - tags = c("train", "predict")), - forest.wt = p_fct( - default = "FALSE", - levels = c("FALSE", "TRUE", "inbag", "oob", "all"), - tags = c("train", "predict")), - xvar.wt = p_uty(tags = "train"), - split.wt = p_uty(tags = "train"), - forest = p_lgl(default = TRUE, tags = "train"), - var.used = p_fct( - default = "FALSE", - levels = c("FALSE", "all.trees", "by.tree"), tags = c("train", "predict")), - split.depth = p_fct( - default = "FALSE", - levels = c("FALSE", "all.trees", "by.tree"), tags = c("train", "predict")), - seed = p_int(upper = -1L, tags = c("train", "predict")), - do.trace = p_lgl(default = FALSE, tags = c("train", "predict")), - statistics = p_lgl(default = FALSE, tags = c("train", "predict")), - get.tree = p_uty(tags = "predict"), - outcome = p_fct( - default = "train", levels = c("train", "test"), - tags = "predict"), - ptn.count = p_int(default = 0L, lower = 0L, tags = "predict"), - estimator = p_fct(default = "nelson", levels = c("nelson", "kaplan"), - tags = "predict"), - cores = p_int(default = 1L, lower = 1L, tags = c("train", "predict", "threads")), - save.memory = p_lgl(default = FALSE, tags = "train"), - perf.type = p_fct(levels = "none", tags = "train") - ) - - super$initialize( - id = "surv.rfsrc", - packages = c("mlr3extralearners", "randomForestSRC", "pracma"), - feature_types = c("logical", "integer", "numeric", "factor"), - predict_types = c("crank", "distr"), - param_set = ps, - # selected features is possible but there's a bug somewhere in rfsrc so that the model - # can be trained but not predicted. so public method retained but property not included - properties = c("weights", "missings", "importance", "oob_error"), - man = "mlr3extralearners::mlr_learners_surv.rfsrc", - label = "Random Forest" - ) - }, - - #' @description - #' The importance scores are extracted from the model slot `importance`. - #' @return Named `numeric()`. - importance = function() { - if (is.null(self$model$importance) & !is.null(self$model)) { - stopf("Set 'importance' to one of: {'TRUE', 'permute', 'random', 'anti'}.") - } - - sort(self$model$importance, decreasing = TRUE) - }, - - #' @description - #' Selected features are extracted from the model slot `var.used`. - #' @return `character()`. - selected_features = function() { - if (is.null(self$model$var.used) & !is.null(self$model)) { - stopf("Set 'var.used' to one of: {'all.trees', 'by.tree'}.") - } - - self$model$var.used - }, - - #' @description - #' OOB error extracted from the model slot `err.rate`. - #' @return `numeric()`. - oob_error = function() { - self$model$err.rate[self$model$ntree] +LearnerSurvRandomForestSRC = R6Class("LearnerSurvRandomForestSRC", + inherit = mlr3proba::LearnerSurv, + + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + ntree = p_int(default = 1000, lower = 1L, tags = "train"), + mtry = p_int(lower = 1L, tags = "train"), + mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"), + nodesize = p_int(default = 15L, lower = 1L, tags = "train"), + nodedepth = p_int(lower = 1L, tags = "train"), + splitrule = p_fct( + levels = c("logrank", "bs.gradient", "logrankscore"), + default = "logrank", tags = "train"), + nsplit = p_int(lower = 0, default = 10, tags = "train"), + importance = p_fct( + default = "FALSE", + levels = c("FALSE", "TRUE", "none", "permute", "random", "anti"), + tags = c("train", "predict")), + block.size = p_int(default = 10L, lower = 1L, tags = c("train", "predict")), + bootstrap = p_fct( + default = "by.root", + levels = c("by.root", "by.node", "none", "by.user"), tags = "train"), + samptype = p_fct( + default = "swor", levels = c("swor", "swr"), + tags = "train"), + samp = p_uty(tags = "train"), + membership = p_lgl(default = FALSE, tags = c("train", "predict")), + sampsize = p_uty(tags = "train"), + sampsize.ratio = p_dbl(0, 1, tags = "train"), + na.action = p_fct( + default = "na.omit", levels = c("na.omit", "na.impute"), + tags = c("train", "predict")), + nimpute = p_int(default = 1L, lower = 1L, tags = "train"), + ntime = p_int(lower = 1L, tags = "train"), + cause = p_int(lower = 1L, tags = "train"), + proximity = p_fct( + default = "FALSE", + levels = c("FALSE", "TRUE", "inbag", "oob", "all"), + tags = c("train", "predict")), + distance = p_fct( + default = "FALSE", + levels = c("FALSE", "TRUE", "inbag", "oob", "all"), + tags = c("train", "predict")), + forest.wt = p_fct( + default = "FALSE", + levels = c("FALSE", "TRUE", "inbag", "oob", "all"), + tags = c("train", "predict")), + xvar.wt = p_uty(tags = "train"), + split.wt = p_uty(tags = "train"), + forest = p_lgl(default = TRUE, tags = "train"), + var.used = p_fct( + default = "FALSE", + levels = c("FALSE", "all.trees", "by.tree"), tags = c("train", "predict")), + split.depth = p_fct( + default = "FALSE", + levels = c("FALSE", "all.trees", "by.tree"), tags = c("train", "predict")), + seed = p_int(upper = -1L, tags = c("train", "predict")), + do.trace = p_lgl(default = FALSE, tags = c("train", "predict")), + statistics = p_lgl(default = FALSE, tags = c("train", "predict")), + get.tree = p_uty(tags = "predict"), + outcome = p_fct( + default = "train", levels = c("train", "test"), + tags = "predict"), + ptn.count = p_int(default = 0L, lower = 0L, tags = "predict"), + estimator = p_fct(default = "nelson", levels = c("nelson", "kaplan"), + tags = "predict"), + cores = p_int(default = 1L, lower = 1L, tags = c("train", "predict", "threads")), + save.memory = p_lgl(default = FALSE, tags = "train"), + perf.type = p_fct(levels = "none", tags = "train") + ) + + super$initialize( + id = "surv.rfsrc", + packages = c("mlr3extralearners", "randomForestSRC", "pracma"), + feature_types = c("logical", "integer", "numeric", "factor"), + predict_types = c("crank", "distr"), + param_set = ps, + # selected features is possible but there's a bug somewhere in rfsrc so that the model + # can be trained but not predicted. so public method retained but property not included + properties = c("weights", "missings", "importance", "oob_error"), + man = "mlr3extralearners::mlr_learners_surv.rfsrc", + label = "Random Forest" + ) + }, + + #' @description + #' The importance scores are extracted from the model slot `importance`. + #' @return Named `numeric()`. + importance = function() { + if (is.null(self$model$importance) & !is.null(self$model)) { + stopf("Set 'importance' to one of: {'TRUE', 'permute', 'random', 'anti'}.") } - ), - - private = list( - .train = function(task) { - pv = self$param_set$get_values(tags = "train") - pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names)) - pv = convert_ratio(pv, "sampsize", "sampsize.ratio", task$nrow) - cores = pv$cores %??% 1L - pv$cores = NULL - - if ("weights" %in% task$properties) { - pv$case.wt = as.numeric(task$weights$weight) # nolint - } - - invoke(randomForestSRC::rfsrc, - formula = task$formula(), data = task$data(), - .args = pv, .opts = list(rf.cores = cores)) - }, - - .predict = function(task) { - newdata = ordered_features(task, self) - pars_predict = self$param_set$get_values(tags = "predict") - # default estimator is nelson, hence nelson selected if NULL - estimator = pars_predict$estimator %??% "nelson" - pars_predict$estimator = NULL - pars_predict$var.used = "FALSE" - cores = pars_predict$cores %??% 1L # additionaly implemented by author - pars_predict$cores = NULL - - p = invoke(predict, object = self$model, newdata = newdata, .args = pars_predict, - .opts = list(rf.cores = cores)) - - # rfsrc uses Nelson-Aalen in chf and Kaplan-Meier for survival, as these - # don't give equivalent results one must be chosen and the relevant functions are transformed - # as required. - - - surv = if (estimator == "nelson") exp(-p$chf) else p$survival - - mlr3proba::.surv_return(times = self$model$time.interest, surv = surv) + + sort(self$model$importance, decreasing = TRUE) + }, + + #' @description + #' Selected features are extracted from the model slot `var.used`. + #' @return `character()`. + selected_features = function() { + if (is.null(self$model$var.used) & !is.null(self$model)) { + stopf("Set 'var.used' to one of: {'all.trees', 'by.tree'}.") + } + + self$model$var.used + }, + + #' @description + #' OOB error extracted from the model slot `err.rate`. + #' @return `numeric()`. + oob_error = function() { + self$model$err.rate[self$model$ntree] + } + ), + + private = list( + .train = function(task) { + pv = self$param_set$get_values(tags = "train") + pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names)) + pv = convert_ratio(pv, "sampsize", "sampsize.ratio", task$nrow) + cores = pv$cores %??% 1L + pv$cores = NULL + + if ("weights" %in% task$properties) { + pv$case.wt = as.numeric(task$weights$weight) # nolint } - ) + + invoke(randomForestSRC::rfsrc, + formula = task$formula(), data = task$data(), + .args = pv, .opts = list(rf.cores = cores)) + }, + + .predict = function(task) { + newdata = ordered_features(task, self) + pars_predict = self$param_set$get_values(tags = "predict") + # default estimator is nelson, hence nelson selected if NULL + estimator = pars_predict$estimator %??% "nelson" + pars_predict$estimator = NULL + pars_predict$var.used = "FALSE" + cores = pars_predict$cores %??% 1L # additionaly implemented by author + pars_predict$cores = NULL + + p = invoke(predict, object = self$model, newdata = newdata, .args = pars_predict, + .opts = list(rf.cores = cores)) + + # rfsrc uses Nelson-Aalen in chf and Kaplan-Meier for survival, as these + # don't give equivalent results one must be chosen and the relevant functions are transformed + # as required. + + + surv = if (estimator == "nelson") exp(-p$chf) else p$survival + + mlr3proba::.surv_return(times = self$model$time.interest, surv = surv) + } ) ) -.extralrns_dict$add("surv.rfsrc", function() LearnerSurvRandomForestSRC$new()) +.extralrns_dict$add("surv.rfsrc", LearnerSurvRandomForestSRC) diff --git a/R/learner_ranger_surv_ranger.R b/R/learner_ranger_surv_ranger.R index b471e7965..e7406b37e 100644 --- a/R/learner_ranger_surv_ranger.R +++ b/R/learner_ranger_surv_ranger.R @@ -24,104 +24,101 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvRanger", - R6Class("LearnerSurvRanger", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - alpha = p_dbl(default = 0.5, tags = "train"), - always.split.variables = p_uty(tags = "train"), - holdout = p_lgl(default = FALSE, tags = "train"), # FIXME: do we need this? - importance = p_fct(c("none", "impurity", "impurity_corrected", "permutation"), tags = "train"), - keep.inbag = p_lgl(default = FALSE, tags = "train"), - max.depth = p_int(default = NULL, lower = 0L, special_vals = list(NULL), tags = "train"), - min.node.size = p_int(1L, default = 5L, tags = "train"), - minprop = p_dbl(default = 0.1, tags = "train"), - mtry = p_int(lower = 1L, tags = "train"), - mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"), - num.random.splits = p_int(1L, default = 1L, tags = "train"), # requires = quote(splitrule == "extratrees") - num.threads = p_int(1L, default = 1L, tags = c("train", "predict", "threads")), - num.trees = p_int(1L, default = 500L, tags = c("train", "predict")), - oob.error = p_lgl(default = TRUE, tags = "train"), - regularization.factor = p_uty(default = 1, tags = "train"), - regularization.usedepth = p_lgl(default = FALSE, tags = "train"), - replace = p_lgl(default = TRUE, tags = "train"), - respect.unordered.factors = p_fct(c("ignore", "order", "partition"), default = "ignore", tags = "train"), # for splitrule == "extratrees", def = partition - sample.fraction = p_dbl(0L, 1L, tags = "train"), # for replace == FALSE, def = 0.632 - save.memory = p_lgl(default = FALSE, tags = "train"), - scale.permutation.importance = p_lgl(default = FALSE, tags = "train"), # requires = quote(importance == "permutation") - seed = p_int(default = NULL, special_vals = list(NULL), tags = c("train", "predict")), - split.select.weights = p_dbl(0, 1, tags = "train"), - splitrule = p_fct(c("logrank", "extratrees", "C", "maxstat"), default = "logrank", tags = "train"), - verbose = p_lgl(default = TRUE, tags = c("train", "predict")), - write.forest = p_lgl(default = TRUE, tags = "train"), - min.bucket = p_int(default = 3, tags = "train") - ) +LearnerSurvRanger = R6Class("LearnerSurvRanger", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + alpha = p_dbl(default = 0.5, tags = "train"), + always.split.variables = p_uty(tags = "train"), + holdout = p_lgl(default = FALSE, tags = "train"), # FIXME: do we need this? + importance = p_fct(c("none", "impurity", "impurity_corrected", "permutation"), tags = "train"), + keep.inbag = p_lgl(default = FALSE, tags = "train"), + max.depth = p_int(default = NULL, lower = 0L, special_vals = list(NULL), tags = "train"), + min.node.size = p_int(1L, default = 5L, tags = "train"), + minprop = p_dbl(default = 0.1, tags = "train"), + mtry = p_int(lower = 1L, tags = "train"), + mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"), + num.random.splits = p_int(1L, default = 1L, tags = "train"), # requires = quote(splitrule == "extratrees") + num.threads = p_int(1L, default = 1L, tags = c("train", "predict", "threads")), + num.trees = p_int(1L, default = 500L, tags = c("train", "predict")), + oob.error = p_lgl(default = TRUE, tags = "train"), + regularization.factor = p_uty(default = 1, tags = "train"), + regularization.usedepth = p_lgl(default = FALSE, tags = "train"), + replace = p_lgl(default = TRUE, tags = "train"), + respect.unordered.factors = p_fct(c("ignore", "order", "partition"), default = "ignore", tags = "train"), # for splitrule == "extratrees", def = partition + sample.fraction = p_dbl(0L, 1L, tags = "train"), # for replace == FALSE, def = 0.632 + save.memory = p_lgl(default = FALSE, tags = "train"), + scale.permutation.importance = p_lgl(default = FALSE, tags = "train"), # requires = quote(importance == "permutation") + seed = p_int(default = NULL, special_vals = list(NULL), tags = c("train", "predict")), + split.select.weights = p_dbl(0, 1, tags = "train"), + splitrule = p_fct(c("logrank", "extratrees", "C", "maxstat"), default = "logrank", tags = "train"), + verbose = p_lgl(default = TRUE, tags = c("train", "predict")), + write.forest = p_lgl(default = TRUE, tags = "train"), + min.bucket = p_int(default = 3, tags = "train") + ) - ps$values = list(num.threads = 1L) + ps$values = list(num.threads = 1L) - super$initialize( - id = "surv.ranger", - param_set = ps, - predict_types = c("distr", "crank"), - feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered"), - properties = c("weights", "importance", "oob_error"), - packages = c("mlr3extralearners", "ranger"), - man = "mlr3extralearners::mlr_learners_surv.ranger", - label = "Random Forest" - ) - }, + super$initialize( + id = "surv.ranger", + param_set = ps, + predict_types = c("distr", "crank"), + feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered"), + properties = c("weights", "importance", "oob_error"), + packages = c("mlr3extralearners", "ranger"), + man = "mlr3extralearners::mlr_learners_surv.ranger", + label = "Random Forest" + ) + }, - #' @description - #' The importance scores are extracted from the model slot `variable.importance`. - #' @return Named `numeric()`. - importance = function() { - if (is.null(self$model)) { - stopf("No model stored") - } - if (self$model$importance.mode == "none") { - stopf("No importance stored") - } + #' @description + #' The importance scores are extracted from the model slot `variable.importance`. + #' @return Named `numeric()`. + importance = function() { + if (is.null(self$model)) { + stopf("No model stored") + } + if (self$model$importance.mode == "none") { + stopf("No importance stored") + } - sort(self$model$variable.importance, decreasing = TRUE) - }, + sort(self$model$variable.importance, decreasing = TRUE) + }, - #' @description - #' The out-of-bag error is extracted from the model slot `prediction.error`. - #' @return `numeric(1)`. - oob_error = function() { - self$model$prediction.error - } - ), + #' @description + #' The out-of-bag error is extracted from the model slot `prediction.error`. + #' @return `numeric(1)`. + oob_error = function() { + self$model$prediction.error + } + ), - private = list( - .train = function(task) { - pv = self$param_set$get_values(tags = "train") - pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names)) - targets = task$target_names + private = list( + .train = function(task) { + pv = self$param_set$get_values(tags = "train") + pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names)) + targets = task$target_names - invoke(ranger::ranger, - formula = NULL, - dependent.variable.name = targets[1L], - status.variable.name = targets[2L], - data = task$data(), - case.weights = task$weights$weight, - .args = pv - ) - }, + invoke(ranger::ranger, + formula = NULL, + dependent.variable.name = targets[1L], + status.variable.name = targets[2L], + data = task$data(), + case.weights = task$weights$weight, + .args = pv + ) + }, - .predict = function(task) { - pv = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) - prediction = invoke(predict, object = self$model, data = newdata, .args = pv) - mlr3proba::.surv_return(times = prediction$unique.death.times, surv = prediction$survival) - } - ) + .predict = function(task) { + pv = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) + prediction = invoke(predict, object = self$model, data = newdata, .args = pv) + mlr3proba::.surv_return(times = prediction$unique.death.times, surv = prediction$survival) + } ) ) -.extralrns_dict$add("surv.ranger", function() LearnerSurvRanger$new()) +.extralrns_dict$add("surv.ranger", LearnerSurvRanger) diff --git a/R/learner_sm_dens_nonpar.R b/R/learner_sm_dens_nonpar.R index 7a2039aef..b28586fed 100644 --- a/R/learner_sm_dens_nonpar.R +++ b/R/learner_sm_dens_nonpar.R @@ -15,69 +15,66 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerDensNonparametric", - R6Class("LearnerDensNonparametric", - inherit = mlr3proba::LearnerDens, +LearnerDensNonparametric = R6Class("LearnerDensNonparametric", + inherit = mlr3proba::LearnerDens, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - h = p_dbl(tags = "train"), - group = p_uty(tags = "train"), - delta = p_dbl(tags = "train"), - h.weights = p_dbl(default = 1, tags = "train"), - hmult = p_uty(default = 1, tags = "train"), - method = p_fct(default = "normal", - levels = c("normal", "cv", "sj", "df", "aicc"), tags = "train"), - positive = p_lgl(default = FALSE, tags = "train"), - verbose = p_uty(default = 1, tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + h = p_dbl(tags = "train"), + group = p_uty(tags = "train"), + delta = p_dbl(tags = "train"), + h.weights = p_dbl(default = 1, tags = "train"), + hmult = p_uty(default = 1, tags = "train"), + method = p_fct(default = "normal", + levels = c("normal", "cv", "sj", "df", "aicc"), tags = "train"), + positive = p_lgl(default = FALSE, tags = "train"), + verbose = p_uty(default = 1, tags = "train") + ) - super$initialize( - id = "dens.nonpar", - packages = c("mlr3extralearners", "sm"), - feature_types = c("integer", "numeric"), - predict_types = "pdf", - param_set = ps, - properties = "weights", - man = "mlr3extralearners::mlr_learners_dens.nonpar", - label = "Nonparametric Density Estimation" - ) - } - ), + super$initialize( + id = "dens.nonpar", + packages = c("mlr3extralearners", "sm"), + feature_types = c("integer", "numeric"), + predict_types = "pdf", + param_set = ps, + properties = "weights", + man = "mlr3extralearners::mlr_learners_dens.nonpar", + label = "Nonparametric Density Estimation" + ) + } + ), - private = list( - .train = function(task) { - pars = self$param_set$get_values(tags = "train") + private = list( + .train = function(task) { + pars = self$param_set$get_values(tags = "train") - if ("weights" %in% task$properties) { - pars$weights = task$weights$weight - } + if ("weights" %in% task$properties) { + pars$weights = task$weights$weight + } - # TODO: Why is it done that way?? - pdf = function(x) {} # nolint - body(pdf) = substitute({ - invoke(sm::sm.density, - x = data, eval.points = x, display = "none", show.script = FALSE, - .args = pars)$estimate - }, list(data = task$data(cols = task$feature_names)[[1]])) + # TODO: Why is it done that way?? + pdf = function(x) {} # nolint + body(pdf) = substitute({ + invoke(sm::sm.density, + x = data, eval.points = x, display = "none", show.script = FALSE, + .args = pars)$estimate + }, list(data = task$data(cols = task$feature_names)[[1]])) - distr6::Distribution$new( - name = "Nonparametric Density", - short_name = "NonparDens", - type = set6::Reals$new(), - pdf = pdf) - }, + distr6::Distribution$new( + name = "Nonparametric Density", + short_name = "NonparDens", + type = set6::Reals$new(), + pdf = pdf) + }, - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - invoke(list, pdf = self$model$pdf(task$data(cols = task$feature_names)[[1]]), .args = pars) - } - ) + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + invoke(list, pdf = self$model$pdf(task$data(cols = task$feature_names)[[1]]), .args = pars) + } ) ) -.extralrns_dict$add("dens.nonpar", function() LearnerDensNonparametric$new()) +.extralrns_dict$add("dens.nonpar", LearnerDensNonparametric) diff --git a/R/learner_survival_surv_nelson.R b/R/learner_survival_surv_nelson.R index 6e328b703..b433b94ec 100644 --- a/R/learner_survival_surv_nelson.R +++ b/R/learner_survival_surv_nelson.R @@ -15,45 +15,42 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvNelson", - R6Class("LearnerSurvNelson", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - super$initialize( - id = "surv.nelson", - predict_types = c("crank", "distr"), - feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered"), - properties = "missings", - packages = c("mlr3extralearners", "survival", "pracma"), - man = "mlr3extralearners::mlr_learners_surv.nelson", - label = "Nelson-Aalen Estimator" - ) - } - ), +LearnerSurvNelson = R6Class("LearnerSurvNelson", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + super$initialize( + id = "surv.nelson", + predict_types = c("crank", "distr"), + feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered"), + properties = "missings", + packages = c("mlr3extralearners", "survival", "pracma"), + man = "mlr3extralearners::mlr_learners_surv.nelson", + label = "Nelson-Aalen Estimator" + ) + } + ), - private = list( - .train = function(task) { - pars = self$param_set$get_values(tags = "train") - invoke(survival::survfit, formula = task$formula(1), data = task$data(), - .args = pars - ) - }, + private = list( + .train = function(task) { + pars = self$param_set$get_values(tags = "train") + invoke(survival::survfit, formula = task$formula(1), data = task$data(), + .args = pars + ) + }, - .predict = function(task) { + .predict = function(task) { - times = self$model$time - surv = matrix(rep(exp(-self$model$cumhaz), task$nrow), - ncol = length(times), nrow = task$nrow, - byrow = TRUE) + times = self$model$time + surv = matrix(rep(exp(-self$model$cumhaz), task$nrow), + ncol = length(times), nrow = task$nrow, + byrow = TRUE) - mlr3proba::.surv_return(times = times, surv = surv) - } - ) + mlr3proba::.surv_return(times = times, surv = surv) + } ) ) -.extralrns_dict$add("surv.nelson", function() LearnerSurvNelson$new()) +.extralrns_dict$add("surv.nelson", LearnerSurvNelson) diff --git a/R/learner_survival_surv_parametric.R b/R/learner_survival_surv_parametric.R index 4588e6cb9..83acaafd8 100644 --- a/R/learner_survival_surv_parametric.R +++ b/R/learner_survival_surv_parametric.R @@ -50,100 +50,97 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvParametric", - R6Class("LearnerSurvParametric", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - type = p_fct(default = "aft", levels = c("aft", "ph", "po", "tobit"), - tags = "predict"), - na.action = p_uty(tags = "train"), - dist = p_fct(default = "weibull", - levels = c("weibull", "exponential", "gaussian", - "lognormal", "loglogistic"), tags = "train"), - parms = p_uty(tags = "train"), - init = p_uty(tags = "train"), - scale = p_dbl(default = 0, lower = 0, tags = "train"), - maxiter = p_int(default = 30L, tags = "train"), - rel.tolerance = p_dbl(default = 1e-09, tags = "train"), - toler.chol = p_dbl(default = 1e-10, tags = "train"), - debug = p_int(default = 0, lower = 0, upper = 1, tags = "train"), - outer.max = p_int(default = 10L, tags = "train"), - robust = p_lgl(default = FALSE, tags = "train"), - score = p_lgl(default = FALSE, tags = "train"), - cluster = p_uty(tags = "train") - ) +LearnerSurvParametric = R6Class("LearnerSurvParametric", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + type = p_fct(default = "aft", levels = c("aft", "ph", "po", "tobit"), + tags = "predict"), + na.action = p_uty(tags = "train"), + dist = p_fct(default = "weibull", + levels = c("weibull", "exponential", "gaussian", + "lognormal", "loglogistic"), tags = "train"), + parms = p_uty(tags = "train"), + init = p_uty(tags = "train"), + scale = p_dbl(default = 0, lower = 0, tags = "train"), + maxiter = p_int(default = 30L, tags = "train"), + rel.tolerance = p_dbl(default = 1e-09, tags = "train"), + toler.chol = p_dbl(default = 1e-10, tags = "train"), + debug = p_int(default = 0, lower = 0, upper = 1, tags = "train"), + outer.max = p_int(default = 10L, tags = "train"), + robust = p_lgl(default = FALSE, tags = "train"), + score = p_lgl(default = FALSE, tags = "train"), + cluster = p_uty(tags = "train") + ) - super$initialize( - id = "surv.parametric", - param_set = ps, - predict_types = c("distr", "lp", "crank"), - feature_types = c("logical", "integer", "numeric", "factor"), - properties = "weights", - packages = c("mlr3extralearners", "survival", "pracma"), - man = "mlr3extralearners::mlr_learners_surv.parametric", - label = "Fully Parametric Learner" - ) - } - ), + super$initialize( + id = "surv.parametric", + param_set = ps, + predict_types = c("distr", "lp", "crank"), + feature_types = c("logical", "integer", "numeric", "factor"), + properties = "weights", + packages = c("mlr3extralearners", "survival", "pracma"), + man = "mlr3extralearners::mlr_learners_surv.parametric", + label = "Fully Parametric Learner" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - pv = self$param_set$get_values(tags = "train") + pv = self$param_set$get_values(tags = "train") - if ("weights" %in% task$properties) { - pv$weights = task$weights$weight - } + if ("weights" %in% task$properties) { + pv$weights = task$weights$weight + } - fit = invoke(survival::survreg, formula = task$formula(), data = task$data(), - .args = pv) + fit = invoke(survival::survreg, formula = task$formula(), data = task$data(), + .args = pv) - # Fits the baseline distribution by reparameterising the fitted coefficients. - # These were mostly derived numerically as precise documentation on the parameterisations is - # hard to find. - location = as.numeric(fit$coefficients[1]) - scale = fit$scale - eps = 1e-15 + # Fits the baseline distribution by reparameterising the fitted coefficients. + # These were mostly derived numerically as precise documentation on the parameterisations is + # hard to find. + location = as.numeric(fit$coefficients[1]) + scale = fit$scale + eps = 1e-15 - if (scale < eps) { - scale = eps - } else if (scale > .Machine$double.xmax) { - scale = .Machine$double.xmax - } + if (scale < eps) { + scale = eps + } else if (scale > .Machine$double.xmax) { + scale = .Machine$double.xmax + } - if (location < -709 & fit$dist %in% c("weibull", "exponential", "loglogistic")) { - location = -709 - } + if (location < -709 & fit$dist %in% c("weibull", "exponential", "loglogistic")) { + location = -709 + } - basedist = switch(fit$dist, - "weibull" = distr6::Weibull$new(shape = 1 / scale, scale = exp(location), - decorators = "ExoticStatistics"), - "exponential" = distr6::Exponential$new(scale = exp(location), - decorators = "ExoticStatistics"), - "gaussian" = distr6::Normal$new(mean = location, sd = scale, - decorators = "ExoticStatistics"), - "lognormal" = distr6::Lognormal$new(meanlog = location, sdlog = scale, - decorators = "ExoticStatistics"), - "loglogistic" = distr6::Loglogistic$new(scale = exp(location), - shape = 1 / scale, - decorators = "ExoticStatistics") - ) + basedist = switch(fit$dist, + "weibull" = distr6::Weibull$new(shape = 1 / scale, scale = exp(location), + decorators = "ExoticStatistics"), + "exponential" = distr6::Exponential$new(scale = exp(location), + decorators = "ExoticStatistics"), + "gaussian" = distr6::Normal$new(mean = location, sd = scale, + decorators = "ExoticStatistics"), + "lognormal" = distr6::Lognormal$new(meanlog = location, sdlog = scale, + decorators = "ExoticStatistics"), + "loglogistic" = distr6::Loglogistic$new(scale = exp(location), + shape = 1 / scale, + decorators = "ExoticStatistics") + ) - set_class(list(fit = fit, basedist = basedist), "surv.parametric") - }, + set_class(list(fit = fit, basedist = basedist), "surv.parametric") + }, - .predict = function(task) { - pv = self$param_set$get_values(tags = "predict") - pred = invoke(.predict_survreg, object = self$model, task = task, learner = self, .args = pv) - # lp is aft-style, where higher value = lower risk, opposite needed for crank - list(distr = pred$distr, crank = -pred$lp, lp = -pred$lp) - } - ) + .predict = function(task) { + pv = self$param_set$get_values(tags = "predict") + pred = invoke(.predict_survreg, object = self$model, task = task, learner = self, .args = pv) + # lp is aft-style, where higher value = lower risk, opposite needed for crank + list(distr = pred$distr, crank = -pred$lp, lp = -pred$lp) + } ) ) @@ -287,4 +284,4 @@ delayedAssign( list(lp = as.numeric(lp), distr = distr) } -.extralrns_dict$add("surv.parametric", function() LearnerSurvParametric$new()) +.extralrns_dict$add("surv.parametric", LearnerSurvParametric) diff --git a/R/learner_survivalmodels_surv_akritas.R b/R/learner_survivalmodels_surv_akritas.R index a39b93a52..461fc46dc 100644 --- a/R/learner_survivalmodels_surv_akritas.R +++ b/R/learner_survivalmodels_surv_akritas.R @@ -17,61 +17,58 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvAkritas", - R6Class("LearnerSurvAkritas", - inherit = mlr3proba::LearnerSurv, +LearnerSurvAkritas = R6Class("LearnerSurvAkritas", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - lambda = p_dbl(default = 0.5, lower = 0, upper = 1, tags = "predict"), - reverse = p_lgl(default = FALSE, tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + lambda = p_dbl(default = 0.5, lower = 0, upper = 1, tags = "predict"), + reverse = p_lgl(default = FALSE, tags = "train") + ) - super$initialize( - id = "surv.akritas", - feature_types = c("logical", "integer", "character", "numeric", "factor"), - predict_types = c("crank", "distr"), - param_set = ps, - packages = c("mlr3extralearners", "survivalmodels", "distr6"), - man = "mlr3extralearners::mlr_learners_surv.akritas", - label = "Akritas Estimator" - ) - } - ), + super$initialize( + id = "surv.akritas", + feature_types = c("logical", "integer", "character", "numeric", "factor"), + predict_types = c("crank", "distr"), + param_set = ps, + packages = c("mlr3extralearners", "survivalmodels", "distr6"), + man = "mlr3extralearners::mlr_learners_surv.akritas", + label = "Akritas Estimator" + ) + } + ), - private = list( - .train = function(task) { - pars = self$param_set$get_values(tags = "train") - invoke( - survivalmodels::akritas, - data = data.table::setDF(task$data()), - time_variable = task$target_names[1L], - status_variable = task$target_names[2L], - .args = pars - ) - }, + private = list( + .train = function(task) { + pars = self$param_set$get_values(tags = "train") + invoke( + survivalmodels::akritas, + data = data.table::setDF(task$data()), + time_variable = task$target_names[1L], + status_variable = task$target_names[2L], + .args = pars + ) + }, - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) - pred = invoke( - predict, - self$model, - newdata = newdata, - distr6 = FALSE, - type = "all", - .args = pars - ) + pred = invoke( + predict, + self$model, + newdata = newdata, + distr6 = FALSE, + type = "all", + .args = pars + ) - list(crank = pred$risk, distr = pred$surv) - } - ) + list(crank = pred$risk, distr = pred$surv) + } ) ) -.extralrns_dict$add("surv.akritas", function() LearnerSurvAkritas$new()) +.extralrns_dict$add("surv.akritas", LearnerSurvAkritas) diff --git a/R/learner_survivalmodels_surv_coxtime.R b/R/learner_survivalmodels_surv_coxtime.R index 00a39a762..aebcea917 100644 --- a/R/learner_survivalmodels_surv_coxtime.R +++ b/R/learner_survivalmodels_surv_coxtime.R @@ -17,136 +17,133 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvCoxtime", - R6Class("LearnerSurvCoxtime", - inherit = mlr3proba::LearnerSurv, +LearnerSurvCoxtime = R6Class("LearnerSurvCoxtime", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { - ps = ps( - frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), - standardize_time = p_lgl(default = FALSE, tags = "train"), - log_duration = p_lgl(default = FALSE, tags = "train"), - with_mean = p_lgl(default = TRUE, tags = "train"), - with_std = p_lgl(default = TRUE, tags = "train"), - num_nodes = p_uty(default = c(32L, 32L), tags = "train"), - batch_norm = p_lgl(default = TRUE, tags = "train"), - dropout = p_dbl(lower = 0, upper = 1, tags = "train"), - activation = p_fct(default = "relu", - levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", - "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", - "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", - "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), - tags = "train"), - device = p_uty(tags = "train"), - shrink = p_dbl(default = 0, lower = 0, tags = "train"), - optimizer = p_fct(default = "adam", - levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", - "sgd", "sparse_adam"), tags = "train"), - rho = p_dbl(default = 0.9, tags = "train"), - eps = p_dbl(default = 1e-8, tags = "train"), - lr = p_dbl(default = 1, tags = "train"), - weight_decay = p_dbl(default = 0, tags = "train"), - learning_rate = p_dbl(default = 1e-2, tags = "train"), - lr_decay = p_dbl(default = 0, tags = "train"), - betas = p_uty(default = c(0.9, 0.999), tags = "train"), - amsgrad = p_lgl(default = FALSE, tags = "train"), - lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), - alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), - t0 = p_dbl(default = 1e6, tags = "train"), - momentum = p_dbl(default = 0, tags = "train"), - centered = p_lgl(default = TRUE, tags = "train"), - etas = p_uty(default = c(0.5, 1.2), tags = "train"), - step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), - dampening = p_dbl(default = 0, tags = "train"), - nesterov = p_lgl(default = FALSE, tags = "train"), - batch_size = p_int(default = 256L, tags = c("train", "predict")), - epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), - verbose = p_lgl(default = TRUE, tags = "train"), - num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), - shuffle = p_lgl(default = TRUE, tags = "train"), - best_weights = p_lgl(default = FALSE, tags = "train"), - early_stopping = p_lgl(default = FALSE, tags = "train"), - min_delta = p_dbl(default = 0, tags = "train"), - patience = p_int(default = 10, tags = "train") - ) + ps = ps( + frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), + standardize_time = p_lgl(default = FALSE, tags = "train"), + log_duration = p_lgl(default = FALSE, tags = "train"), + with_mean = p_lgl(default = TRUE, tags = "train"), + with_std = p_lgl(default = TRUE, tags = "train"), + num_nodes = p_uty(default = c(32L, 32L), tags = "train"), + batch_norm = p_lgl(default = TRUE, tags = "train"), + dropout = p_dbl(lower = 0, upper = 1, tags = "train"), + activation = p_fct(default = "relu", + levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", + "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", + "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", + "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), + tags = "train"), + device = p_uty(tags = "train"), + shrink = p_dbl(default = 0, lower = 0, tags = "train"), + optimizer = p_fct(default = "adam", + levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", + "sgd", "sparse_adam"), tags = "train"), + rho = p_dbl(default = 0.9, tags = "train"), + eps = p_dbl(default = 1e-8, tags = "train"), + lr = p_dbl(default = 1, tags = "train"), + weight_decay = p_dbl(default = 0, tags = "train"), + learning_rate = p_dbl(default = 1e-2, tags = "train"), + lr_decay = p_dbl(default = 0, tags = "train"), + betas = p_uty(default = c(0.9, 0.999), tags = "train"), + amsgrad = p_lgl(default = FALSE, tags = "train"), + lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), + alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), + t0 = p_dbl(default = 1e6, tags = "train"), + momentum = p_dbl(default = 0, tags = "train"), + centered = p_lgl(default = TRUE, tags = "train"), + etas = p_uty(default = c(0.5, 1.2), tags = "train"), + step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), + dampening = p_dbl(default = 0, tags = "train"), + nesterov = p_lgl(default = FALSE, tags = "train"), + batch_size = p_int(default = 256L, tags = c("train", "predict")), + epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), + verbose = p_lgl(default = TRUE, tags = "train"), + num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), + shuffle = p_lgl(default = TRUE, tags = "train"), + best_weights = p_lgl(default = FALSE, tags = "train"), + early_stopping = p_lgl(default = FALSE, tags = "train"), + min_delta = p_dbl(default = 0, tags = "train"), + patience = p_int(default = 10, tags = "train") + ) - ps$add_dep("log_duration", "standardize_time", CondEqual$new(TRUE)) - ps$add_dep("with_mean", "standardize_time", CondEqual$new(TRUE)) - ps$add_dep("with_std", "standardize_time", CondEqual$new(TRUE)) + ps$add_dep("log_duration", "standardize_time", CondEqual$new(TRUE)) + ps$add_dep("with_mean", "standardize_time", CondEqual$new(TRUE)) + ps$add_dep("with_std", "standardize_time", CondEqual$new(TRUE)) - ps$add_dep("rho", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", - "adamw", "rmsprop", "sparse_adam"))) - ps$add_dep("lr", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("weight_decay", "optimizer", - CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "sgd"))) - ps$add_dep("learning_rate", "optimizer", CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", - "asgd", "rmsprop", "rprop", "sgd", "sparse_adam"))) - ps$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) - ps$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) - ps$add_dep("lambd", "optimizer", CondEqual$new("asgd")) - ps$add_dep("t0", "optimizer", CondEqual$new("asgd")) - ps$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) - ps$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) - ps$add_dep("etas", "optimizer", CondEqual$new("rprop")) - ps$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) - ps$add_dep("dampening", "optimizer", CondEqual$new("sgd")) - ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) + ps$add_dep("rho", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", + "adamw", "rmsprop", "sparse_adam"))) + ps$add_dep("lr", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("weight_decay", "optimizer", + CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "sgd"))) + ps$add_dep("learning_rate", "optimizer", CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", + "asgd", "rmsprop", "rprop", "sgd", "sparse_adam"))) + ps$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) + ps$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) + ps$add_dep("lambd", "optimizer", CondEqual$new("asgd")) + ps$add_dep("t0", "optimizer", CondEqual$new("asgd")) + ps$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) + ps$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) + ps$add_dep("etas", "optimizer", CondEqual$new("rprop")) + ps$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) + ps$add_dep("dampening", "optimizer", CondEqual$new("sgd")) + ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) - ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) - ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) - super$initialize( - id = "surv.coxtime", - feature_types = c("integer", "numeric"), - predict_types = c("crank", "distr"), - param_set = ps, - man = "mlr3extralearners::mlr_learners_surv.coxtime", - packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), - label = "Cox-Time Estimator" - ) - } - ), + super$initialize( + id = "surv.coxtime", + feature_types = c("integer", "numeric"), + predict_types = c("crank", "distr"), + param_set = ps, + man = "mlr3extralearners::mlr_learners_surv.coxtime", + packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), + label = "Cox-Time Estimator" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - pars = self$param_set$get_values(tags = "train") - invoke( - survivalmodels::coxtime, - data = data.table::setDF(task$data()), - time_variable = task$target_names[1L], - status_variable = task$target_names[2L], - .args = pars - ) + pars = self$param_set$get_values(tags = "train") + invoke( + survivalmodels::coxtime, + data = data.table::setDF(task$data()), + time_variable = task$target_names[1L], + status_variable = task$target_names[2L], + .args = pars + ) - }, + }, - .predict = function(task) { + .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) - pred = invoke( - predict, - self$model, - newdata = newdata, - distr6 = FALSE, - type = "all", - .args = pars - ) + pred = invoke( + predict, + self$model, + newdata = newdata, + distr6 = FALSE, + type = "all", + .args = pars + ) - list(crank = pred$risk, distr = pred$surv) + list(crank = pred$risk, distr = pred$surv) - } - ) + } ) ) -.extralrns_dict$add("surv.coxtime", function() LearnerSurvCoxtime$new()) +.extralrns_dict$add("surv.coxtime", LearnerSurvCoxtime) diff --git a/R/learner_survivalmodels_surv_deephit.R b/R/learner_survivalmodels_surv_deephit.R index ee71e05d5..0df2325eb 100644 --- a/R/learner_survivalmodels_surv_deephit.R +++ b/R/learner_survivalmodels_surv_deephit.R @@ -23,144 +23,141 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvDeephit", - R6Class("LearnerSurvDeephit", - inherit = mlr3proba::LearnerSurv, +LearnerSurvDeephit = R6Class("LearnerSurvDeephit", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { - ps = ps( - frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), - cuts = p_int(default = 10L, lower = 1L, tags = "train"), - cutpoints = p_uty(tags = "train"), - scheme = p_fct(default = "equidistant", levels = c("equidistant", "quantiles"), - tags = "train"), - cut_min = p_dbl(default = 0, lower = 0, tags = "train"), - num_nodes = p_uty(default = c(32L, 32L), tags = "train"), - batch_norm = p_lgl(default = TRUE, tags = "train"), - dropout = p_dbl(lower = 0, upper = 1, tags = "train"), - activation = p_fct(default = "relu", - levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", - "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", - "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", - "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), - tags = "train"), - custom_net = p_uty(tags = "train"), - device = p_uty(tags = "train"), - mod_alpha = p_dbl(default = 0.2, lower = 0, upper = 1, tags = "train"), - sigma = p_dbl(default = 0.1, lower = 0, tags = "train"), - optimizer = p_fct(default = "adam", - levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", - "sgd", "sparse_adam"), tags = "train"), - rho = p_dbl(default = 0.9, tags = "train"), - eps = p_dbl(default = 1e-8, tags = "train"), - lr = p_dbl(default = 1, tags = "train"), - weight_decay = p_dbl(default = 0, tags = "train"), - learning_rate = p_dbl(default = 1e-2, tags = "train"), - lr_decay = p_dbl(default = 0, tags = "train"), - betas = p_uty(default = c(0.9, 0.999), tags = "train"), - amsgrad = p_lgl(default = FALSE, tags = "train"), - lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), - alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), - t0 = p_dbl(default = 1e6, tags = "train"), - momentum = p_dbl(default = 0, tags = "train"), - centered = p_lgl(default = TRUE, tags = "train"), - etas = p_uty(default = c(0.5, 1.2), tags = "train"), - step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), - dampening = p_dbl(default = 0, tags = "train"), - nesterov = p_lgl(default = FALSE, tags = "train"), - batch_size = p_int(default = 256L, tags = c("train", "predict")), - epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), - verbose = p_lgl(default = TRUE, tags = "train"), - num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), - shuffle = p_lgl(default = TRUE, tags = "train"), - best_weights = p_lgl(default = FALSE, tags = "train"), - early_stopping = p_lgl(default = FALSE, tags = "train"), - min_delta = p_dbl(default = 0, tags = "train"), - patience = p_int(default = 10, tags = "train"), - interpolate = p_lgl(default = FALSE, tags = "predict"), - inter_scheme = p_fct(default = "const_hazard", - levels = c("const_hazard", "const_pdf"), tags = "predict"), - sub = p_int(default = 10L, lower = 1L, tags = "predict") - ) + ps = ps( + frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), + cuts = p_int(default = 10L, lower = 1L, tags = "train"), + cutpoints = p_uty(tags = "train"), + scheme = p_fct(default = "equidistant", levels = c("equidistant", "quantiles"), + tags = "train"), + cut_min = p_dbl(default = 0, lower = 0, tags = "train"), + num_nodes = p_uty(default = c(32L, 32L), tags = "train"), + batch_norm = p_lgl(default = TRUE, tags = "train"), + dropout = p_dbl(lower = 0, upper = 1, tags = "train"), + activation = p_fct(default = "relu", + levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", + "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", + "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", + "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), + tags = "train"), + custom_net = p_uty(tags = "train"), + device = p_uty(tags = "train"), + mod_alpha = p_dbl(default = 0.2, lower = 0, upper = 1, tags = "train"), + sigma = p_dbl(default = 0.1, lower = 0, tags = "train"), + optimizer = p_fct(default = "adam", + levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", + "sgd", "sparse_adam"), tags = "train"), + rho = p_dbl(default = 0.9, tags = "train"), + eps = p_dbl(default = 1e-8, tags = "train"), + lr = p_dbl(default = 1, tags = "train"), + weight_decay = p_dbl(default = 0, tags = "train"), + learning_rate = p_dbl(default = 1e-2, tags = "train"), + lr_decay = p_dbl(default = 0, tags = "train"), + betas = p_uty(default = c(0.9, 0.999), tags = "train"), + amsgrad = p_lgl(default = FALSE, tags = "train"), + lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), + alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), + t0 = p_dbl(default = 1e6, tags = "train"), + momentum = p_dbl(default = 0, tags = "train"), + centered = p_lgl(default = TRUE, tags = "train"), + etas = p_uty(default = c(0.5, 1.2), tags = "train"), + step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), + dampening = p_dbl(default = 0, tags = "train"), + nesterov = p_lgl(default = FALSE, tags = "train"), + batch_size = p_int(default = 256L, tags = c("train", "predict")), + epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), + verbose = p_lgl(default = TRUE, tags = "train"), + num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), + shuffle = p_lgl(default = TRUE, tags = "train"), + best_weights = p_lgl(default = FALSE, tags = "train"), + early_stopping = p_lgl(default = FALSE, tags = "train"), + min_delta = p_dbl(default = 0, tags = "train"), + patience = p_int(default = 10, tags = "train"), + interpolate = p_lgl(default = FALSE, tags = "predict"), + inter_scheme = p_fct(default = "const_hazard", + levels = c("const_hazard", "const_pdf"), tags = "predict"), + sub = p_int(default = 10L, lower = 1L, tags = "predict") + ) - ps$add_dep("rho", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", - "adamw", "rmsprop", "sparse_adam"))) - ps$add_dep("lr", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("weight_decay", "optimizer", - CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", - "asgd", "rmsprop", "sgd"))) - ps$add_dep("learning_rate", "optimizer", - CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", - "sgd", "sparse_adam"))) - ps$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) - ps$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) - ps$add_dep("lambd", "optimizer", CondEqual$new("asgd")) - ps$add_dep("t0", "optimizer", CondEqual$new("asgd")) - ps$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) - ps$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) - ps$add_dep("etas", "optimizer", CondEqual$new("rprop")) - ps$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) - ps$add_dep("dampening", "optimizer", CondEqual$new("sgd")) - ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) + ps$add_dep("rho", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", + "adamw", "rmsprop", "sparse_adam"))) + ps$add_dep("lr", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("weight_decay", "optimizer", + CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", + "asgd", "rmsprop", "sgd"))) + ps$add_dep("learning_rate", "optimizer", + CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", + "sgd", "sparse_adam"))) + ps$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) + ps$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) + ps$add_dep("lambd", "optimizer", CondEqual$new("asgd")) + ps$add_dep("t0", "optimizer", CondEqual$new("asgd")) + ps$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) + ps$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) + ps$add_dep("etas", "optimizer", CondEqual$new("rprop")) + ps$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) + ps$add_dep("dampening", "optimizer", CondEqual$new("sgd")) + ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) - ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) - ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) - ps$add_dep("sub", "interpolate", CondEqual$new(TRUE)) - ps$add_dep("inter_scheme", "interpolate", CondEqual$new(TRUE)) + ps$add_dep("sub", "interpolate", CondEqual$new(TRUE)) + ps$add_dep("inter_scheme", "interpolate", CondEqual$new(TRUE)) - super$initialize( - id = "surv.deephit", - feature_types = c("integer", "numeric"), - predict_types = c("crank", "distr"), - param_set = ps, - man = "mlr3extralearners::mlr_learners_surv.deephit", - packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), - label = "Neural Network" - ) - } - ), + super$initialize( + id = "surv.deephit", + feature_types = c("integer", "numeric"), + predict_types = c("crank", "distr"), + param_set = ps, + man = "mlr3extralearners::mlr_learners_surv.deephit", + packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), + label = "Neural Network" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - pars = self$param_set$get_values(tags = "train") - invoke( - survivalmodels::deephit, - data = data.table::setDF(task$data()), - time_variable = task$target_names[1L], - status_variable = task$target_names[2L], - .args = pars - ) + pars = self$param_set$get_values(tags = "train") + invoke( + survivalmodels::deephit, + data = data.table::setDF(task$data()), + time_variable = task$target_names[1L], + status_variable = task$target_names[2L], + .args = pars + ) - }, + }, - .predict = function(task) { + .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) - pred = invoke( - predict, - self$model, - newdata = newdata, - distr6 = FALSE, - type = "all", - .args = pars - ) + pred = invoke( + predict, + self$model, + newdata = newdata, + distr6 = FALSE, + type = "all", + .args = pars + ) - list(crank = pred$risk, distr = pred$surv) + list(crank = pred$risk, distr = pred$surv) - } - ) + } ) ) -.extralrns_dict$add("surv.deephit", function() LearnerSurvDeephit$new()) +.extralrns_dict$add("surv.deephit", LearnerSurvDeephit) diff --git a/R/learner_survivalmodels_surv_deepsurv.R b/R/learner_survivalmodels_surv_deepsurv.R index 1456f8232..fdd65067c 100644 --- a/R/learner_survivalmodels_surv_deepsurv.R +++ b/R/learner_survivalmodels_surv_deepsurv.R @@ -17,129 +17,126 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvDeepsurv", - R6Class("LearnerSurvDeepsurv", - inherit = mlr3proba::LearnerSurv, +LearnerSurvDeepsurv = R6Class("LearnerSurvDeepsurv", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { - ps = ps( - frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), - num_nodes = p_uty(default = c(32L, 32L), tags = "train"), - batch_norm = p_lgl(default = TRUE, tags = "train"), - dropout = p_dbl(lower = 0, upper = 1, tags = "train"), - activation = p_fct(default = "relu", - levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", - "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", - "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", - "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), - tags = "train"), - device = p_uty(tags = "train"), - optimizer = p_fct(default = "adam", - levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", - "sgd", "sparse_adam"), tags = "train"), - rho = p_dbl(default = 0.9, tags = "train"), - eps = p_dbl(default = 1e-8, tags = "train"), - lr = p_dbl(default = 1, tags = "train"), - weight_decay = p_dbl(default = 0, tags = "train"), - learning_rate = p_dbl(default = 1e-2, tags = "train"), - lr_decay = p_dbl(default = 0, tags = "train"), - betas = p_uty(default = c(0.9, 0.999), tags = "train"), - amsgrad = p_lgl(default = FALSE, tags = "train"), - lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), - alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), - t0 = p_dbl(default = 1e6, tags = "train"), - momentum = p_dbl(default = 0, tags = "train"), - centered = p_lgl(default = TRUE, tags = "train"), - etas = p_uty(default = c(0.5, 1.2), tags = "train"), - step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), - dampening = p_dbl(default = 0, tags = "train"), - nesterov = p_lgl(default = FALSE, tags = "train"), - batch_size = p_int(default = 256L, tags = c("train", "predict")), - epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), - verbose = p_lgl(default = TRUE, tags = "train"), - num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), - shuffle = p_lgl(default = TRUE, tags = "train"), - best_weights = p_lgl(default = FALSE, tags = "train"), - early_stopping = p_lgl(default = FALSE, tags = "train"), - min_delta = p_dbl(default = 0, tags = "train"), - patience = p_int(default = 10, tags = "train") - ) + ps = ps( + frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), + num_nodes = p_uty(default = c(32L, 32L), tags = "train"), + batch_norm = p_lgl(default = TRUE, tags = "train"), + dropout = p_dbl(lower = 0, upper = 1, tags = "train"), + activation = p_fct(default = "relu", + levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", + "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", + "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", + "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), + tags = "train"), + device = p_uty(tags = "train"), + optimizer = p_fct(default = "adam", + levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", + "sgd", "sparse_adam"), tags = "train"), + rho = p_dbl(default = 0.9, tags = "train"), + eps = p_dbl(default = 1e-8, tags = "train"), + lr = p_dbl(default = 1, tags = "train"), + weight_decay = p_dbl(default = 0, tags = "train"), + learning_rate = p_dbl(default = 1e-2, tags = "train"), + lr_decay = p_dbl(default = 0, tags = "train"), + betas = p_uty(default = c(0.9, 0.999), tags = "train"), + amsgrad = p_lgl(default = FALSE, tags = "train"), + lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), + alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), + t0 = p_dbl(default = 1e6, tags = "train"), + momentum = p_dbl(default = 0, tags = "train"), + centered = p_lgl(default = TRUE, tags = "train"), + etas = p_uty(default = c(0.5, 1.2), tags = "train"), + step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), + dampening = p_dbl(default = 0, tags = "train"), + nesterov = p_lgl(default = FALSE, tags = "train"), + batch_size = p_int(default = 256L, tags = c("train", "predict")), + epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), + verbose = p_lgl(default = TRUE, tags = "train"), + num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), + shuffle = p_lgl(default = TRUE, tags = "train"), + best_weights = p_lgl(default = FALSE, tags = "train"), + early_stopping = p_lgl(default = FALSE, tags = "train"), + min_delta = p_dbl(default = 0, tags = "train"), + patience = p_int(default = 10, tags = "train") + ) - ps$add_dep("rho", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", - "adamw", "rmsprop", "sparse_adam"))) - ps$add_dep("lr", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("weight_decay", "optimizer", - CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", - "asgd", "rmsprop", "sgd"))) - ps$add_dep("learning_rate", "optimizer", - CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", - "sgd", "sparse_adam"))) - ps$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) - ps$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) - ps$add_dep("lambd", "optimizer", CondEqual$new("asgd")) - ps$add_dep("t0", "optimizer", CondEqual$new("asgd")) - ps$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) - ps$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) - ps$add_dep("etas", "optimizer", CondEqual$new("rprop")) - ps$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) - ps$add_dep("dampening", "optimizer", CondEqual$new("sgd")) - ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) + ps$add_dep("rho", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", + "adamw", "rmsprop", "sparse_adam"))) + ps$add_dep("lr", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("weight_decay", "optimizer", + CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", + "asgd", "rmsprop", "sgd"))) + ps$add_dep("learning_rate", "optimizer", + CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", + "sgd", "sparse_adam"))) + ps$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) + ps$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) + ps$add_dep("lambd", "optimizer", CondEqual$new("asgd")) + ps$add_dep("t0", "optimizer", CondEqual$new("asgd")) + ps$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) + ps$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) + ps$add_dep("etas", "optimizer", CondEqual$new("rprop")) + ps$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) + ps$add_dep("dampening", "optimizer", CondEqual$new("sgd")) + ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) - ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) - ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) - super$initialize( - id = "surv.deepsurv", - feature_types = c("integer", "numeric"), - predict_types = c("crank", "distr"), - param_set = ps, - man = "mlr3extralearners::mlr_learners_surv.deepsurv", - packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), - label = "Neural Network" - ) - } - ), + super$initialize( + id = "surv.deepsurv", + feature_types = c("integer", "numeric"), + predict_types = c("crank", "distr"), + param_set = ps, + man = "mlr3extralearners::mlr_learners_surv.deepsurv", + packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), + label = "Neural Network" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - pars = self$param_set$get_values(tags = "fit") - invoke( - survivalmodels::deepsurv, - data = data.table::setDF(task$data()), - time_variable = task$target_names[1L], - status_variable = task$target_names[2L], - .args = pars - ) + pars = self$param_set$get_values(tags = "fit") + invoke( + survivalmodels::deepsurv, + data = data.table::setDF(task$data()), + time_variable = task$target_names[1L], + status_variable = task$target_names[2L], + .args = pars + ) - }, + }, - .predict = function(task) { + .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) - pred = invoke( - predict, - self$model, - newdata = newdata, - distr6 = FALSE, - type = "all", - .args = pars - ) + pred = invoke( + predict, + self$model, + newdata = newdata, + distr6 = FALSE, + type = "all", + .args = pars + ) - list(crank = pred$risk, distr = pred$surv) + list(crank = pred$risk, distr = pred$surv) - } - ) + } ) ) -.extralrns_dict$add("surv.deepsurv", function() LearnerSurvDeepsurv$new()) +.extralrns_dict$add("surv.deepsurv", LearnerSurvDeepsurv) diff --git a/R/learner_survivalmodels_surv_dnnsurv.R b/R/learner_survivalmodels_surv_dnnsurv.R index 131902689..a942455dd 100644 --- a/R/learner_survivalmodels_surv_dnnsurv.R +++ b/R/learner_survivalmodels_surv_dnnsurv.R @@ -26,128 +26,125 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvDNNSurv", - R6Class("LearnerSurvDNNSurv", - inherit = mlr3proba::LearnerSurv, +LearnerSurvDNNSurv = R6Class("LearnerSurvDNNSurv", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { - ps = ps( - cuts = p_int(default = 5, lower = 1, tags = "train"), - cutpoints = p_uty(tags = "train"), - custom_model = p_uty(tags = "train"), - optimizer = p_fct(default = "adam", - levels = c("adadelta", "adagrad", "adamax", "adam", "nadam", "rmsprop", "sgd"), - tags = "train"), - lr = p_dbl(default = 0.02, lower = 0, tags = "train"), - beta_1 = p_dbl(default = 0.9, lower = 0, upper = 1, tags = "train"), - beta_2 = p_dbl(default = 0.999, lower = 0, upper = 1, tags = "train"), - epsilon = p_dbl(lower = 0, tags = "train"), - decay = p_dbl(default = 0, lower = 0, tags = "train"), - clipnorm = p_dbl(tags = "train"), - clipvalue = p_dbl(tags = "train"), - momentum = p_dbl(default = 0, lower = 0, tags = "train"), - nesterov = p_lgl(default = FALSE, tags = "train"), - loss_weights = p_uty(tags = "train"), - weighted_metrics = p_uty(tags = "train"), - early_stopping = p_lgl(default = FALSE, tags = "train"), - min_delta = p_dbl(default = 0, lower = 0, tags = "train"), - patience = p_int(default = 0L, lower = 0, tags = "train"), - verbose = p_int(default = 0L, lower = 0, upper = 2, tags = c("train", "predict")), - baseline = p_dbl(tags = "train"), - restore_best_weights = p_lgl(default = FALSE, tags = "train"), - batch_size = p_int(default = 32L, lower = 1, tags = c("train", "predict")), - epochs = p_int(default = 10L, lower = 1, tags = "train"), - validation_split = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), - shuffle = p_lgl(default = TRUE, tags = "train"), - sample_weight = p_uty(tags = "train"), - initial_epoch = p_int(default = 0L, lower = 0, tags = "train"), - steps_per_epoch = p_int(lower = 1, tags = "train"), - validation_steps = p_int(lower = 1, tags = "train"), - steps = p_int(lower = 0, tags = "predict"), - callbacks = p_uty(tags = "predict"), - rho = p_dbl(default = 0.95, tags = "train"), - global_clipnorm = p_dbl(tags = "train"), - use_ema = p_lgl(tags = "train"), - ema_momentum = p_dbl(default = 0.99, tags = "train"), - ema_overwrite_frequency = p_dbl(tags = "train"), - jit_compile = p_lgl(default = TRUE, tags = "train"), - initial_accumultator_value = p_dbl(default = 0.1, tags = "train"), - amsgrad = p_lgl(default = FALSE, tags = "train"), - lr_power = p_dbl(default = -0.5, tags = "train"), - l1_regularization_strength = p_dbl(lower = 0, default = 0, tags = "train"), - l2_regularization_strength = p_dbl(lower = 0, default = 0, tags = "train"), - l2_shrinkage_regularization_strength = p_dbl(lower = 0, default = 0, tags = "train"), - beta = p_dbl(default = 0, tags = "train"), - centered = p_lgl(default = FALSE, tags = "train") - ) + ps = ps( + cuts = p_int(default = 5, lower = 1, tags = "train"), + cutpoints = p_uty(tags = "train"), + custom_model = p_uty(tags = "train"), + optimizer = p_fct(default = "adam", + levels = c("adadelta", "adagrad", "adamax", "adam", "nadam", "rmsprop", "sgd"), + tags = "train"), + lr = p_dbl(default = 0.02, lower = 0, tags = "train"), + beta_1 = p_dbl(default = 0.9, lower = 0, upper = 1, tags = "train"), + beta_2 = p_dbl(default = 0.999, lower = 0, upper = 1, tags = "train"), + epsilon = p_dbl(lower = 0, tags = "train"), + decay = p_dbl(default = 0, lower = 0, tags = "train"), + clipnorm = p_dbl(tags = "train"), + clipvalue = p_dbl(tags = "train"), + momentum = p_dbl(default = 0, lower = 0, tags = "train"), + nesterov = p_lgl(default = FALSE, tags = "train"), + loss_weights = p_uty(tags = "train"), + weighted_metrics = p_uty(tags = "train"), + early_stopping = p_lgl(default = FALSE, tags = "train"), + min_delta = p_dbl(default = 0, lower = 0, tags = "train"), + patience = p_int(default = 0L, lower = 0, tags = "train"), + verbose = p_int(default = 0L, lower = 0, upper = 2, tags = c("train", "predict")), + baseline = p_dbl(tags = "train"), + restore_best_weights = p_lgl(default = FALSE, tags = "train"), + batch_size = p_int(default = 32L, lower = 1, tags = c("train", "predict")), + epochs = p_int(default = 10L, lower = 1, tags = "train"), + validation_split = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), + shuffle = p_lgl(default = TRUE, tags = "train"), + sample_weight = p_uty(tags = "train"), + initial_epoch = p_int(default = 0L, lower = 0, tags = "train"), + steps_per_epoch = p_int(lower = 1, tags = "train"), + validation_steps = p_int(lower = 1, tags = "train"), + steps = p_int(lower = 0, tags = "predict"), + callbacks = p_uty(tags = "predict"), + rho = p_dbl(default = 0.95, tags = "train"), + global_clipnorm = p_dbl(tags = "train"), + use_ema = p_lgl(tags = "train"), + ema_momentum = p_dbl(default = 0.99, tags = "train"), + ema_overwrite_frequency = p_dbl(tags = "train"), + jit_compile = p_lgl(default = TRUE, tags = "train"), + initial_accumultator_value = p_dbl(default = 0.1, tags = "train"), + amsgrad = p_lgl(default = FALSE, tags = "train"), + lr_power = p_dbl(default = -0.5, tags = "train"), + l1_regularization_strength = p_dbl(lower = 0, default = 0, tags = "train"), + l2_regularization_strength = p_dbl(lower = 0, default = 0, tags = "train"), + l2_shrinkage_regularization_strength = p_dbl(lower = 0, default = 0, tags = "train"), + beta = p_dbl(default = 0, tags = "train"), + centered = p_lgl(default = FALSE, tags = "train") + ) - ps$add_dep("lr", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam", "rmsprop", "sgd"))) - ps$add_dep("beta_1", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam"))) - ps$add_dep("beta_2", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam"))) - ps$add_dep("epsilon", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam"))) - ps$add_dep("decay", "optimizer", CondAnyOf$new(c("adamax", "adam", "sgd"))) - ps$add_dep("clipnorm", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam", "sgd"))) - ps$add_dep("clipvalue", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam", "sgd"))) - ps$add_dep("momentum", "optimizer", CondEqual$new("sgd")) - ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) + ps$add_dep("lr", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam", "rmsprop", "sgd"))) + ps$add_dep("beta_1", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam"))) + ps$add_dep("beta_2", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam"))) + ps$add_dep("epsilon", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam"))) + ps$add_dep("decay", "optimizer", CondAnyOf$new(c("adamax", "adam", "sgd"))) + ps$add_dep("clipnorm", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam", "sgd"))) + ps$add_dep("clipvalue", "optimizer", CondAnyOf$new(c("adamax", "adam", "nadam", "sgd"))) + ps$add_dep("momentum", "optimizer", CondEqual$new("sgd")) + ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) - ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) - ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) - ps$add_dep("baseline", "early_stopping", CondEqual$new(TRUE)) - ps$add_dep("restore_best_weights", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("baseline", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("restore_best_weights", "early_stopping", CondEqual$new(TRUE)) - # verbose default changed to prevent plotting - ps$values$verbose = 0L + # verbose default changed to prevent plotting + ps$values$verbose = 0L - super$initialize( - id = "surv.dnnsurv", - feature_types = c("integer", "numeric"), - predict_types = c("crank", "distr"), - param_set = ps, - man = "mlr3extralearners::mlr_learners_surv.dnnsurv", - packages = c("mlr3extralearners", "survivalmodels", "keras", "pseudo", "tensorflow", "distr6"), - label = "Neural Network" - ) - } - ), + super$initialize( + id = "surv.dnnsurv", + feature_types = c("integer", "numeric"), + predict_types = c("crank", "distr"), + param_set = ps, + man = "mlr3extralearners::mlr_learners_surv.dnnsurv", + packages = c("mlr3extralearners", "survivalmodels", "keras", "pseudo", "tensorflow", "distr6"), + label = "Neural Network" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - pars = self$param_set$get_values(tags = "train") - invoke( - survivalmodels::dnnsurv, - data = data.table::setDF(task$data()), - time_variable = task$target_names[1L], - status_variable = task$target_names[2L], - .args = pars - ) + pars = self$param_set$get_values(tags = "train") + invoke( + survivalmodels::dnnsurv, + data = data.table::setDF(task$data()), + time_variable = task$target_names[1L], + status_variable = task$target_names[2L], + .args = pars + ) - }, + }, - .predict = function(task) { + .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) - pred = invoke( - predict, - self$model, - newdata = newdata, - distr6 = FALSE, - type = "all", - .args = pars - ) + pred = invoke( + predict, + self$model, + newdata = newdata, + distr6 = FALSE, + type = "all", + .args = pars + ) - list(crank = pred$risk, distr = pred$surv) - } - ) + list(crank = pred$risk, distr = pred$surv) + } ) ) -.extralrns_dict$add("surv.dnnsurv", function() LearnerSurvDNNSurv$new()) +.extralrns_dict$add("surv.dnnsurv", LearnerSurvDNNSurv) diff --git a/R/learner_survivalmodels_surv_loghaz.R b/R/learner_survivalmodels_surv_loghaz.R index 3320248f6..38ec352ba 100644 --- a/R/learner_survivalmodels_surv_loghaz.R +++ b/R/learner_survivalmodels_surv_loghaz.R @@ -23,142 +23,139 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvLogisticHazard", - R6Class("LearnerSurvLogisticHazard", - inherit = mlr3proba::LearnerSurv, +LearnerSurvLogisticHazard = R6Class("LearnerSurvLogisticHazard", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { - param_set = ps( - frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), - cuts = p_int(default = 10L, lower = 1L, tags = "train"), - cutpoints = p_uty(tags = "train"), - scheme = p_fct(default = "equidistant", levels = c("equidistant", "quantiles"), - tags = "train"), - cut_min = p_dbl(default = 0, lower = 0, tags = "train"), - num_nodes = p_uty(default = c(32L, 32L), tags = "train"), - batch_norm = p_lgl(default = TRUE, tags = "train"), - dropout = p_dbl(lower = 0, upper = 1, tags = "train"), - activation = p_fct(default = "relu", - levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", - "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", - "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", - "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), - tags = "train"), - custom_net = p_uty(tags = "train"), - device = p_uty(tags = "train"), - optimizer = p_fct(default = "adam", - levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", - "sgd", "sparse_adam"), tags = "train"), - rho = p_dbl(default = 0.9, tags = "train"), - eps = p_dbl(default = 1e-8, tags = "train"), - lr = p_dbl(default = 1, tags = "train"), - weight_decay = p_dbl(default = 0, tags = "train"), - learning_rate = p_dbl(default = 1e-2, tags = "train"), - lr_decay = p_dbl(default = 0, tags = "train"), - betas = p_uty(default = c(0.9, 0.999), tags = "train"), - amsgrad = p_lgl(default = FALSE, tags = "train"), - lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), - alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), - t0 = p_dbl(default = 1e6, tags = "train"), - momentum = p_dbl(default = 0, tags = "train"), - centered = p_lgl(default = TRUE, tags = "train"), - etas = p_uty(default = c(0.5, 1.2), tags = "train"), - step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), - dampening = p_dbl(default = 0, tags = "train"), - nesterov = p_lgl(default = FALSE, tags = "train"), - batch_size = p_int(default = 256L, tags = c("train", "predict")), - epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), - verbose = p_lgl(default = TRUE, tags = "train"), - num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), - shuffle = p_lgl(default = TRUE, tags = "train"), - best_weights = p_lgl(default = FALSE, tags = "train"), - early_stopping = p_lgl(default = FALSE, tags = "train"), - min_delta = p_dbl(default = 0, tags = "train"), - patience = p_int(default = 10, tags = "train"), - interpolate = p_lgl(default = FALSE, tags = "predict"), - inter_scheme = p_fct(default = "const_hazard", - levels = c("const_hazard", "const_pdf"), tags = "predict"), - sub = p_int(default = 10L, lower = 1L, tags = "predict") - ) + param_set = ps( + frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), + cuts = p_int(default = 10L, lower = 1L, tags = "train"), + cutpoints = p_uty(tags = "train"), + scheme = p_fct(default = "equidistant", levels = c("equidistant", "quantiles"), + tags = "train"), + cut_min = p_dbl(default = 0, lower = 0, tags = "train"), + num_nodes = p_uty(default = c(32L, 32L), tags = "train"), + batch_norm = p_lgl(default = TRUE, tags = "train"), + dropout = p_dbl(lower = 0, upper = 1, tags = "train"), + activation = p_fct(default = "relu", + levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", + "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", + "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", + "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), + tags = "train"), + custom_net = p_uty(tags = "train"), + device = p_uty(tags = "train"), + optimizer = p_fct(default = "adam", + levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", + "sgd", "sparse_adam"), tags = "train"), + rho = p_dbl(default = 0.9, tags = "train"), + eps = p_dbl(default = 1e-8, tags = "train"), + lr = p_dbl(default = 1, tags = "train"), + weight_decay = p_dbl(default = 0, tags = "train"), + learning_rate = p_dbl(default = 1e-2, tags = "train"), + lr_decay = p_dbl(default = 0, tags = "train"), + betas = p_uty(default = c(0.9, 0.999), tags = "train"), + amsgrad = p_lgl(default = FALSE, tags = "train"), + lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), + alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), + t0 = p_dbl(default = 1e6, tags = "train"), + momentum = p_dbl(default = 0, tags = "train"), + centered = p_lgl(default = TRUE, tags = "train"), + etas = p_uty(default = c(0.5, 1.2), tags = "train"), + step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), + dampening = p_dbl(default = 0, tags = "train"), + nesterov = p_lgl(default = FALSE, tags = "train"), + batch_size = p_int(default = 256L, tags = c("train", "predict")), + epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), + verbose = p_lgl(default = TRUE, tags = "train"), + num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), + shuffle = p_lgl(default = TRUE, tags = "train"), + best_weights = p_lgl(default = FALSE, tags = "train"), + early_stopping = p_lgl(default = FALSE, tags = "train"), + min_delta = p_dbl(default = 0, tags = "train"), + patience = p_int(default = 10, tags = "train"), + interpolate = p_lgl(default = FALSE, tags = "predict"), + inter_scheme = p_fct(default = "const_hazard", + levels = c("const_hazard", "const_pdf"), tags = "predict"), + sub = p_int(default = 10L, lower = 1L, tags = "predict") + ) - param_set$add_dep("rho", "optimizer", CondEqual$new("adadelta")) - param_set$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", - "adamw", "rmsprop", "sparse_adam"))) - param_set$add_dep("lr", "optimizer", CondEqual$new("adadelta")) - param_set$add_dep("weight_decay", "optimizer", - CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", - "asgd", "rmsprop", "sgd"))) - param_set$add_dep("learning_rate", "optimizer", - CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", - "sgd", "sparse_adam"))) - param_set$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) - param_set$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) - param_set$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) - param_set$add_dep("lambd", "optimizer", CondEqual$new("asgd")) - param_set$add_dep("t0", "optimizer", CondEqual$new("asgd")) - param_set$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) - param_set$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) - param_set$add_dep("etas", "optimizer", CondEqual$new("rprop")) - param_set$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) - param_set$add_dep("dampening", "optimizer", CondEqual$new("sgd")) - param_set$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) + param_set$add_dep("rho", "optimizer", CondEqual$new("adadelta")) + param_set$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", + "adamw", "rmsprop", "sparse_adam"))) + param_set$add_dep("lr", "optimizer", CondEqual$new("adadelta")) + param_set$add_dep("weight_decay", "optimizer", + CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", + "asgd", "rmsprop", "sgd"))) + param_set$add_dep("learning_rate", "optimizer", + CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", + "sgd", "sparse_adam"))) + param_set$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) + param_set$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) + param_set$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) + param_set$add_dep("lambd", "optimizer", CondEqual$new("asgd")) + param_set$add_dep("t0", "optimizer", CondEqual$new("asgd")) + param_set$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) + param_set$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) + param_set$add_dep("etas", "optimizer", CondEqual$new("rprop")) + param_set$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) + param_set$add_dep("dampening", "optimizer", CondEqual$new("sgd")) + param_set$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) - param_set$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) - param_set$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) + param_set$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) + param_set$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) - param_set$add_dep("sub", "interpolate", CondEqual$new(TRUE)) - param_set$add_dep("inter_scheme", "interpolate", CondEqual$new(TRUE)) + param_set$add_dep("sub", "interpolate", CondEqual$new(TRUE)) + param_set$add_dep("inter_scheme", "interpolate", CondEqual$new(TRUE)) - super$initialize( - id = "surv.loghaz", - feature_types = c("integer", "numeric"), - predict_types = c("crank", "distr"), - param_set = param_set, - man = "mlr3extralearners::mlr_learners_surv.loghaz", - packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), - label = "Logistic-Hazard Learner" - ) - } - ), + super$initialize( + id = "surv.loghaz", + feature_types = c("integer", "numeric"), + predict_types = c("crank", "distr"), + param_set = param_set, + man = "mlr3extralearners::mlr_learners_surv.loghaz", + packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), + label = "Logistic-Hazard Learner" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - pars = self$param_set$get_values(tags = "train") - invoke( - survivalmodels::loghaz, - data = data.table::setDF(task$data()), - time_variable = task$target_names[1L], - status_variable = task$target_names[2L], - .args = pars - ) + pars = self$param_set$get_values(tags = "train") + invoke( + survivalmodels::loghaz, + data = data.table::setDF(task$data()), + time_variable = task$target_names[1L], + status_variable = task$target_names[2L], + .args = pars + ) - }, + }, - .predict = function(task) { + .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) - pred = invoke( - predict, - self$model, - newdata = newdata, - distr6 = FALSE, - type = "all", - .args = pars - ) + pred = invoke( + predict, + self$model, + newdata = newdata, + distr6 = FALSE, + type = "all", + .args = pars + ) - list(crank = pred$risk, distr = pred$surv) + list(crank = pred$risk, distr = pred$surv) - } - ) + } ) ) -.extralrns_dict$add("surv.loghaz", function() LearnerSurvLogisticHazard$new()) +.extralrns_dict$add("surv.loghaz", LearnerSurvLogisticHazard) diff --git a/R/learner_survivalmodels_surv_pchazard.R b/R/learner_survivalmodels_surv_pchazard.R index e4d4df0aa..61a00b9ae 100644 --- a/R/learner_survivalmodels_surv_pchazard.R +++ b/R/learner_survivalmodels_surv_pchazard.R @@ -24,141 +24,138 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvPCHazard", - R6Class("LearnerSurvPCHazard", - inherit = mlr3proba::LearnerSurv, +LearnerSurvPCHazard = R6Class("LearnerSurvPCHazard", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { - ps = ps( - frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), - cuts = p_int(default = 10L, lower = 1L, tags = "train"), - cutpoints = p_uty(tags = "train"), - scheme = p_fct(default = "equidistant", levels = c("equidistant", "quantiles"), - tags = "train"), - cut_min = p_dbl(default = 0, lower = 0, tags = "train"), - num_nodes = p_uty(default = c(32L, 32L), tags = "train"), - batch_norm = p_lgl(default = TRUE, tags = "train"), - reduction = p_fct(default = "mean", levels = c("mean", "none", "sum"), - tags = "train"), - dropout = p_dbl(lower = 0, upper = 1, tags = "train"), - activation = p_fct(default = "relu", - levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", - "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", - "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", - "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), - tags = "train"), - custom_net = p_uty(tags = "train"), - device = p_uty(tags = "train"), - optimizer = p_fct(default = "adam", - levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", - "sgd", "sparse_adam"), tags = "train"), - rho = p_dbl(default = 0.9, tags = "train"), - eps = p_dbl(default = 1e-8, tags = "train"), - lr = p_dbl(default = 1, tags = "train"), - weight_decay = p_dbl(default = 0, tags = "train"), - learning_rate = p_dbl(default = 1e-2, tags = "train"), - lr_decay = p_dbl(default = 0, tags = "train"), - betas = p_uty(default = c(0.9, 0.999), tags = "train"), - amsgrad = p_lgl(default = FALSE, tags = "train"), - lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), - alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), - t0 = p_dbl(default = 1e6, tags = "train"), - momentum = p_dbl(default = 0, tags = "train"), - centered = p_lgl(default = TRUE, tags = "train"), - etas = p_uty(default = c(0.5, 1.2), tags = "train"), - step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), - dampening = p_dbl(default = 0, tags = "train"), - nesterov = p_lgl(default = FALSE, tags = "train"), - batch_size = p_int(default = 256L, tags = c("train", "predict")), - epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), - verbose = p_lgl(default = TRUE, tags = "train"), - num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), - shuffle = p_lgl(default = TRUE, tags = "train"), - best_weights = p_lgl(default = FALSE, tags = "train"), - early_stopping = p_lgl(default = FALSE, tags = "train"), - min_delta = p_dbl(default = 0, tags = "train"), - patience = p_int(default = 10, tags = "train"), - interpolate = p_lgl(default = FALSE, tags = "predict"), - sub = p_int(default = 10L, lower = 1L, tags = "predict") - ) + ps = ps( + frac = p_dbl(default = 0, lower = 0, upper = 1, tags = "train"), + cuts = p_int(default = 10L, lower = 1L, tags = "train"), + cutpoints = p_uty(tags = "train"), + scheme = p_fct(default = "equidistant", levels = c("equidistant", "quantiles"), + tags = "train"), + cut_min = p_dbl(default = 0, lower = 0, tags = "train"), + num_nodes = p_uty(default = c(32L, 32L), tags = "train"), + batch_norm = p_lgl(default = TRUE, tags = "train"), + reduction = p_fct(default = "mean", levels = c("mean", "none", "sum"), + tags = "train"), + dropout = p_dbl(lower = 0, upper = 1, tags = "train"), + activation = p_fct(default = "relu", + levels = c("celu", "elu", "gelu", "glu", "hardshrink", "hardsigmoid", "hardswish", + "hardtanh", "relu6", "leakyrelu", "logsigmoid", "logsoftmax", "prelu", + "rrelu", "relu", "selu", "sigmoid", "softmax", "softmax2d", "softmin", + "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold"), + tags = "train"), + custom_net = p_uty(tags = "train"), + device = p_uty(tags = "train"), + optimizer = p_fct(default = "adam", + levels = c("adadelta", "adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", + "sgd", "sparse_adam"), tags = "train"), + rho = p_dbl(default = 0.9, tags = "train"), + eps = p_dbl(default = 1e-8, tags = "train"), + lr = p_dbl(default = 1, tags = "train"), + weight_decay = p_dbl(default = 0, tags = "train"), + learning_rate = p_dbl(default = 1e-2, tags = "train"), + lr_decay = p_dbl(default = 0, tags = "train"), + betas = p_uty(default = c(0.9, 0.999), tags = "train"), + amsgrad = p_lgl(default = FALSE, tags = "train"), + lambd = p_dbl(default = 1e-4, lower = 0, tags = "train"), + alpha = p_dbl(default = 0.75, lower = 0, tags = "train"), + t0 = p_dbl(default = 1e6, tags = "train"), + momentum = p_dbl(default = 0, tags = "train"), + centered = p_lgl(default = TRUE, tags = "train"), + etas = p_uty(default = c(0.5, 1.2), tags = "train"), + step_sizes = p_uty(default = c(1e-6, 50), tags = "train"), + dampening = p_dbl(default = 0, tags = "train"), + nesterov = p_lgl(default = FALSE, tags = "train"), + batch_size = p_int(default = 256L, tags = c("train", "predict")), + epochs = p_int(lower = 1L, upper = Inf, default = 1, tags = "train"), + verbose = p_lgl(default = TRUE, tags = "train"), + num_workers = p_int(default = 0L, tags = c("train", "predict", "threads")), + shuffle = p_lgl(default = TRUE, tags = "train"), + best_weights = p_lgl(default = FALSE, tags = "train"), + early_stopping = p_lgl(default = FALSE, tags = "train"), + min_delta = p_dbl(default = 0, tags = "train"), + patience = p_int(default = 10, tags = "train"), + interpolate = p_lgl(default = FALSE, tags = "predict"), + sub = p_int(default = 10L, lower = 1L, tags = "predict") + ) - ps$add_dep("rho", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", - "adamw", "rmsprop", "sparse_adam"))) - ps$add_dep("lr", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("weight_decay", "optimizer", - CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", - "asgd", "rmsprop", "sgd"))) - ps$add_dep("learning_rate", "optimizer", - CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", - "sgd", "sparse_adam"))) - ps$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) - ps$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) - ps$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) - ps$add_dep("lambd", "optimizer", CondEqual$new("asgd")) - ps$add_dep("t0", "optimizer", CondEqual$new("asgd")) - ps$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) - ps$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) - ps$add_dep("etas", "optimizer", CondEqual$new("rprop")) - ps$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) - ps$add_dep("dampening", "optimizer", CondEqual$new("sgd")) - ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) + ps$add_dep("rho", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("eps", "optimizer", CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", + "adamw", "rmsprop", "sparse_adam"))) + ps$add_dep("lr", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("weight_decay", "optimizer", + CondAnyOf$new(c("adadelta", "adagrad", "adam", "adamax", "adamw", + "asgd", "rmsprop", "sgd"))) + ps$add_dep("learning_rate", "optimizer", + CondAnyOf$new(c("adagrad", "adam", "adamax", "adamw", "asgd", "rmsprop", "rprop", + "sgd", "sparse_adam"))) + ps$add_dep("lr_decay", "optimizer", CondEqual$new("adadelta")) + ps$add_dep("betas", "optimizer", CondAnyOf$new(c("adam", "adamax", "adamw", "sparse_adam"))) + ps$add_dep("amsgrad", "optimizer", CondAnyOf$new(c("adam", "adamw"))) + ps$add_dep("lambd", "optimizer", CondEqual$new("asgd")) + ps$add_dep("t0", "optimizer", CondEqual$new("asgd")) + ps$add_dep("momentum", "optimizer", CondAnyOf$new(c("sgd", "rmsprop"))) + ps$add_dep("centered", "optimizer", CondEqual$new("rmsprop")) + ps$add_dep("etas", "optimizer", CondEqual$new("rprop")) + ps$add_dep("step_sizes", "optimizer", CondEqual$new("rprop")) + ps$add_dep("dampening", "optimizer", CondEqual$new("sgd")) + ps$add_dep("nesterov", "optimizer", CondEqual$new("sgd")) - ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) - ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("min_delta", "early_stopping", CondEqual$new(TRUE)) + ps$add_dep("patience", "early_stopping", CondEqual$new(TRUE)) - ps$add_dep("sub", "interpolate", CondEqual$new(TRUE)) + ps$add_dep("sub", "interpolate", CondEqual$new(TRUE)) - super$initialize( - id = "surv.pchazard", - feature_types = c("integer", "numeric"), - predict_types = c("crank", "distr"), - param_set = ps, - man = "mlr3extralearners::mlr_learners_surv.pchazard", - packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), - label = "PC-Hazard Learner" - ) - } - ), + super$initialize( + id = "surv.pchazard", + feature_types = c("integer", "numeric"), + predict_types = c("crank", "distr"), + param_set = ps, + man = "mlr3extralearners::mlr_learners_surv.pchazard", + packages = c("mlr3extralearners", "survivalmodels", "distr6", "reticulate"), + label = "PC-Hazard Learner" + ) + } + ), - private = list( - .train = function(task) { + private = list( + .train = function(task) { - pars = self$param_set$get_values(tags = "train") - invoke( - survivalmodels::pchazard, - data = data.table::setDF(task$data()), - time_variable = task$target_names[1L], - status_variable = task$target_names[2L], - .args = pars - ) + pars = self$param_set$get_values(tags = "train") + invoke( + survivalmodels::pchazard, + data = data.table::setDF(task$data()), + time_variable = task$target_names[1L], + status_variable = task$target_names[2L], + .args = pars + ) - }, + }, - .predict = function(task) { + .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - newdata = ordered_features(task, self) + pars = self$param_set$get_values(tags = "predict") + newdata = ordered_features(task, self) - pred = invoke( - predict, - self$model, - newdata = newdata, - distr6 = FALSE, - type = "all", - .args = pars - ) + pred = invoke( + predict, + self$model, + newdata = newdata, + distr6 = FALSE, + type = "all", + .args = pars + ) - list(crank = pred$risk, distr = pred$surv) + list(crank = pred$risk, distr = pred$surv) - } - ) + } ) ) -.extralrns_dict$add("surv.pchazard", function() LearnerSurvPCHazard$new()) +.extralrns_dict$add("surv.pchazard", LearnerSurvPCHazard) diff --git a/R/learner_survivalsvm_surv_svm.R b/R/learner_survivalsvm_surv_svm.R index 0a4fee6bf..5363c3763 100644 --- a/R/learner_survivalsvm_surv_svm.R +++ b/R/learner_survivalsvm_surv_svm.R @@ -24,86 +24,83 @@ #' @template seealso_learner #' @template example #' @export -delayedAssign( - "LearnerSurvSVM", - R6Class("LearnerSurvSVM", - inherit = mlr3proba::LearnerSurv, +LearnerSurvSVM = R6Class("LearnerSurvSVM", + inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - ps = ps( - type = p_fct( - default = "regression", - levels = c("regression", "vanbelle1", "vanbelle2", "hybrid"), - tags = "train"), - diff.meth = p_fct( - levels = c("makediff1", "makediff2", "makediff3"), - tags = c("train")), - gamma.mu = p_uty(tags = c("train", "required")), - opt.meth = p_fct( - default = "quadprog", levels = c("quadprog", "ipop"), - tags = "train"), - kernel = p_fct( - default = "lin_kernel", - levels = c("lin_kernel", "add_kernel", "rbf_kernel", "poly_kernel"), - tags = "train"), - kernel.pars = p_uty(tags = "train"), - sgf.sv = p_int(default = 5L, lower = 0L, tags = "train"), - sigf = p_int(default = 7L, lower = 0L, tags = "train"), - maxiter = p_int(default = 20L, lower = 0L, tags = "train"), - margin = p_dbl(default = 0.05, lower = 0, tags = "train"), - bound = p_dbl(default = 10, lower = 0, tags = "train"), - eig.tol = p_dbl(default = 1e-06, lower = 0, tags = "train"), - conv.tol = p_dbl(default = 1e-07, lower = 0, tags = "train"), - posd.tol = p_dbl(default = 1e-08, lower = 0, tags = "train") - ) + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + ps = ps( + type = p_fct( + default = "regression", + levels = c("regression", "vanbelle1", "vanbelle2", "hybrid"), + tags = "train"), + diff.meth = p_fct( + levels = c("makediff1", "makediff2", "makediff3"), + tags = c("train")), + gamma.mu = p_uty(tags = c("train", "required")), + opt.meth = p_fct( + default = "quadprog", levels = c("quadprog", "ipop"), + tags = "train"), + kernel = p_fct( + default = "lin_kernel", + levels = c("lin_kernel", "add_kernel", "rbf_kernel", "poly_kernel"), + tags = "train"), + kernel.pars = p_uty(tags = "train"), + sgf.sv = p_int(default = 5L, lower = 0L, tags = "train"), + sigf = p_int(default = 7L, lower = 0L, tags = "train"), + maxiter = p_int(default = 20L, lower = 0L, tags = "train"), + margin = p_dbl(default = 0.05, lower = 0, tags = "train"), + bound = p_dbl(default = 10, lower = 0, tags = "train"), + eig.tol = p_dbl(default = 1e-06, lower = 0, tags = "train"), + conv.tol = p_dbl(default = 1e-07, lower = 0, tags = "train"), + posd.tol = p_dbl(default = 1e-08, lower = 0, tags = "train") + ) - ps$add_dep("diff.meth", "type", CondAnyOf$new(c("vanbelle1", "vanbelle2", "hybrid"))) + ps$add_dep("diff.meth", "type", CondAnyOf$new(c("vanbelle1", "vanbelle2", "hybrid"))) - super$initialize( - id = "surv.svm", - packages = c("mlr3extralearners", "survivalsvm"), - feature_types = c("integer", "numeric", "character", "factor", "logical"), - predict_types = c("crank", "response"), - param_set = ps, - man = "mlr3extralearners::mlr_learners_surv.svm", - label = "Support Vector Machine" - ) - } - ), - - private = list( - .train = function(task) { - with_package("survivalsvm", { - invoke(survivalsvm::survivalsvm, - formula = task$formula(), - data = task$data(), - .args = self$param_set$get_values(tags = "train")) - }) - }, + super$initialize( + id = "surv.svm", + packages = c("mlr3extralearners", "survivalsvm"), + feature_types = c("integer", "numeric", "character", "factor", "logical"), + predict_types = c("crank", "response"), + param_set = ps, + man = "mlr3extralearners::mlr_learners_surv.svm", + label = "Support Vector Machine" + ) + } + ), - .predict = function(task) { - pars = self$param_set$get_values(tags = "predict") - fit = predict(self$model, newdata = ordered_features(task, self), - .args = pars - ) - crank = as.numeric(fit$predicted) + private = list( + .train = function(task) { + with_package("survivalsvm", { + invoke(survivalsvm::survivalsvm, + formula = task$formula(), + data = task$data(), + .args = self$param_set$get_values(tags = "train")) + }) + }, - if (is.null(self$param_set$values$type) || - (self$param_set$values$type %in% c("regression", "hybrid"))) { - # higher survival time = lower risk - response = crank - } else { - response = NULL - } + .predict = function(task) { + pars = self$param_set$get_values(tags = "predict") + fit = predict(self$model, newdata = ordered_features(task, self), + .args = pars + ) + crank = as.numeric(fit$predicted) - # higher rank = higher risk - list(crank = -crank, response = response) + if (is.null(self$param_set$values$type) || + (self$param_set$values$type %in% c("regression", "hybrid"))) { + # higher survival time = lower risk + response = crank + } else { + response = NULL } - ) + + # higher rank = higher risk + list(crank = -crank, response = response) + } ) ) -.extralrns_dict$add("surv.svm", function() LearnerSurvSVM$new()) +.extralrns_dict$add("surv.svm", LearnerSurvSVM) diff --git a/R/learner_xgboost_surv_xgboost.R b/R/learner_xgboost_surv_xgboost.R index 4a3d0687a..1176d8ec5 100644 --- a/R/learner_xgboost_surv_xgboost.R +++ b/R/learner_xgboost_surv_xgboost.R @@ -31,202 +31,201 @@ #' @export #' @template seealso_learner #' @template example -delayedAssign( - "LearnerSurvXgboost", R6Class("LearnerSurvXgboost", - inherit = mlr3proba::LearnerSurv, - public = list( - #' @description - #' Creates a new instance of this [R6][R6::R6Class] class. - initialize = function() { - - ps = ps( - aft_loss_distribution = p_fct(c("normal", "logistic", "extreme"), default = "normal", tags = "train"), - aft_loss_distribution_scale = p_dbl(tags = "train"), - alpha = p_dbl(0, default = 0, tags = "train"), - base_score = p_dbl(default = 0.5, tags = "train"), - booster = p_fct(c("gbtree", "gblinear", "dart"), default = "gbtree", tags = "train"), - callbacks = p_uty(default = list(), tags = "train"), - colsample_bylevel = p_dbl(0, 1, default = 1, tags = "train"), - colsample_bynode = p_dbl(0, 1, default = 1, tags = "train"), - colsample_bytree = p_dbl(0, 1, default = 1, tags = "train"), - disable_default_eval_metric = p_lgl(default = FALSE, tags = "train"), - early_stopping_rounds = p_int(1L, default = NULL, special_vals = list(NULL), tags = "train"), - early_stopping_set = p_fct(c("none", "train", "test"), default = "none", tags = "train"), - eta = p_dbl(0, 1, default = 0.3, tags = "train"), - feature_selector = p_fct(c("cyclic", "shuffle", "random", "greedy", "thrifty"), default = "cyclic", tags = "train"), - feval = p_uty(default = NULL, tags = "train"), - gamma = p_dbl(0, default = 0, tags = "train"), - grow_policy = p_fct(c("depthwise", "lossguide"), default = "depthwise", tags = "train"), - interaction_constraints = p_uty(tags = "train"), - iterationrange = p_uty(tags = "predict"), - lambda = p_dbl(0, default = 1, tags = "train"), - lambda_bias = p_dbl(0, default = 0, tags = "train"), - max_bin = p_int(2L, default = 256L, tags = "train"), - max_delta_step = p_dbl(0, default = 0, tags = "train"), - max_depth = p_int(0L, default = 6L, tags = "train"), - max_leaves = p_int(0L, default = 0L, tags = "train"), - maximize = p_lgl(default = NULL, special_vals = list(NULL), tags = "train"), - min_child_weight = p_dbl(0, default = 1, tags = "train"), - missing = p_dbl(default = NA, tags = c("train", "predict"), special_vals = list(NA, NA_real_, NULL)), - monotone_constraints = p_int(-1L, 1L, default = 0L, tags = "train"), - normalize_type = p_fct(c("tree", "forest"), default = "tree", tags = "train"), - nrounds = p_int(1L, tags = "train"), - nthread = p_int(1L, default = 1L, tags = c("train", "threads")), - ntreelimit = p_int(1L, tags = "predict"), - num_parallel_tree = p_int(1L, default = 1L, tags = "train"), - objective = p_fct(c("survival:cox", "survival:aft"), default = "survival:cox", tags = c("train", "predict")), - one_drop = p_lgl(default = FALSE, tags = "train"), - print_every_n = p_int(1L, default = 1L, tags = "train"), - process_type = p_fct(c("default", "update"), default = "default", tags = "train"), - rate_drop = p_dbl(0, 1, default = 0, tags = "train"), - refresh_leaf = p_lgl(default = TRUE, tags = "train"), - sampling_method = p_fct(c("uniform", "gradient_based"), default = "uniform", tags = "train"), - sample_type = p_fct(c("uniform", "weighted"), default = "uniform", tags = "train"), - save_name = p_uty(tags = "train"), - save_period = p_int(0L, tags = "train"), - scale_pos_weight = p_dbl(default = 1, tags = "train"), - seed_per_iteration = p_lgl(default = FALSE, tags = "train"), - skip_drop = p_dbl(0, 1, default = 0, tags = "train"), - strict_shape = p_lgl(default = FALSE, tags = "predict"), - subsample = p_dbl(0, 1, default = 1, tags = "train"), - top_k = p_int(0, default = 0, tags = "train"), - tree_method = p_fct(c("auto", "exact", "approx", "hist", "gpu_hist"), default = "auto", tags = "train"), - tweedie_variance_power = p_dbl(1, 2, default = 1.5, tags = "train"), - updater = p_uty(tags = "train"), # Default depends on the selected booster - verbose = p_int(0L, 2L, default = 1L, tags = "train"), - watchlist = p_uty(default = NULL, tags = "train"), - xgb_model = p_uty(tags = "train"), - device = p_uty(tags = "train") - ) - # param deps - ps$add_dep("print_every_n", "verbose", CondEqual$new(1L)) - ps$add_dep("sample_type", "booster", CondEqual$new("dart")) - ps$add_dep("normalize_type", "booster", CondEqual$new("dart")) - ps$add_dep("rate_drop", "booster", CondEqual$new("dart")) - ps$add_dep("skip_drop", "booster", CondEqual$new("dart")) - ps$add_dep("one_drop", "booster", CondEqual$new("dart")) - ps$add_dep("tree_method", "booster", CondAnyOf$new(c("gbtree", "dart"))) - ps$add_dep("grow_policy", "tree_method", CondEqual$new("hist")) - ps$add_dep("max_leaves", "grow_policy", CondEqual$new("lossguide")) - ps$add_dep("max_bin", "tree_method", CondEqual$new("hist")) - ps$add_dep("feature_selector", "booster", CondEqual$new("gblinear")) - ps$add_dep("top_k", "booster", CondEqual$new("gblinear")) - ps$add_dep("top_k", "feature_selector", CondAnyOf$new(c("greedy", "thrifty"))) - ps$add_dep("aft_loss_distribution", "objective", CondEqual$new("survival:aft")) - ps$add_dep("aft_loss_distribution_scale", "objective", CondEqual$new("survival:aft")) - - # custom defaults - ps$values = list(nrounds = 1L, nthread = 1L, verbose = 0L, early_stopping_set = "none") - - super$initialize( - id = "surv.xgboost", - param_set = ps, - predict_types = c("crank", "lp"), - feature_types = c("integer", "numeric"), - properties = c("weights", "missings", "importance"), - packages = c("mlr3extralearners", "xgboost"), - man = "mlr3extralearners::mlr_learners_surv.xgboost", - label = "Gradient Boosting" - ) - }, - - #' @description - #' The importance scores are calculated with [xgboost::xgb.importance()]. - #' - #' @return Named `numeric()`. - importance = function() { - if (is.null(self$model)) { - stopf("No model stored") - } - - imp = xgboost::xgb.importance( - model = self$model - ) - set_names(imp$Gain, imp$Feature) +LearnerSurvXgboost = R6Class("LearnerSurvXgboost", + inherit = mlr3proba::LearnerSurv, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + + ps = ps( + aft_loss_distribution = p_fct(c("normal", "logistic", "extreme"), default = "normal", tags = "train"), + aft_loss_distribution_scale = p_dbl(tags = "train"), + alpha = p_dbl(0, default = 0, tags = "train"), + base_score = p_dbl(default = 0.5, tags = "train"), + booster = p_fct(c("gbtree", "gblinear", "dart"), default = "gbtree", tags = "train"), + callbacks = p_uty(default = list(), tags = "train"), + colsample_bylevel = p_dbl(0, 1, default = 1, tags = "train"), + colsample_bynode = p_dbl(0, 1, default = 1, tags = "train"), + colsample_bytree = p_dbl(0, 1, default = 1, tags = "train"), + disable_default_eval_metric = p_lgl(default = FALSE, tags = "train"), + early_stopping_rounds = p_int(1L, default = NULL, special_vals = list(NULL), tags = "train"), + early_stopping_set = p_fct(c("none", "train", "test"), default = "none", tags = "train"), + eta = p_dbl(0, 1, default = 0.3, tags = "train"), + feature_selector = p_fct(c("cyclic", "shuffle", "random", "greedy", "thrifty"), default = "cyclic", tags = "train"), + feval = p_uty(default = NULL, tags = "train"), + gamma = p_dbl(0, default = 0, tags = "train"), + grow_policy = p_fct(c("depthwise", "lossguide"), default = "depthwise", tags = "train"), + interaction_constraints = p_uty(tags = "train"), + iterationrange = p_uty(tags = "predict"), + lambda = p_dbl(0, default = 1, tags = "train"), + lambda_bias = p_dbl(0, default = 0, tags = "train"), + max_bin = p_int(2L, default = 256L, tags = "train"), + max_delta_step = p_dbl(0, default = 0, tags = "train"), + max_depth = p_int(0L, default = 6L, tags = "train"), + max_leaves = p_int(0L, default = 0L, tags = "train"), + maximize = p_lgl(default = NULL, special_vals = list(NULL), tags = "train"), + min_child_weight = p_dbl(0, default = 1, tags = "train"), + missing = p_dbl(default = NA, tags = c("train", "predict"), special_vals = list(NA, NA_real_, NULL)), + monotone_constraints = p_int(-1L, 1L, default = 0L, tags = "train"), + normalize_type = p_fct(c("tree", "forest"), default = "tree", tags = "train"), + nrounds = p_int(1L, tags = "train"), + nthread = p_int(1L, default = 1L, tags = c("train", "threads")), + ntreelimit = p_int(1L, tags = "predict"), + num_parallel_tree = p_int(1L, default = 1L, tags = "train"), + objective = p_fct(c("survival:cox", "survival:aft"), default = "survival:cox", tags = c("train", "predict")), + one_drop = p_lgl(default = FALSE, tags = "train"), + print_every_n = p_int(1L, default = 1L, tags = "train"), + process_type = p_fct(c("default", "update"), default = "default", tags = "train"), + rate_drop = p_dbl(0, 1, default = 0, tags = "train"), + refresh_leaf = p_lgl(default = TRUE, tags = "train"), + sampling_method = p_fct(c("uniform", "gradient_based"), default = "uniform", tags = "train"), + sample_type = p_fct(c("uniform", "weighted"), default = "uniform", tags = "train"), + save_name = p_uty(tags = "train"), + save_period = p_int(0L, tags = "train"), + scale_pos_weight = p_dbl(default = 1, tags = "train"), + seed_per_iteration = p_lgl(default = FALSE, tags = "train"), + skip_drop = p_dbl(0, 1, default = 0, tags = "train"), + strict_shape = p_lgl(default = FALSE, tags = "predict"), + subsample = p_dbl(0, 1, default = 1, tags = "train"), + top_k = p_int(0, default = 0, tags = "train"), + tree_method = p_fct(c("auto", "exact", "approx", "hist", "gpu_hist"), default = "auto", tags = "train"), + tweedie_variance_power = p_dbl(1, 2, default = 1.5, tags = "train"), + updater = p_uty(tags = "train"), # Default depends on the selected booster + verbose = p_int(0L, 2L, default = 1L, tags = "train"), + watchlist = p_uty(default = NULL, tags = "train"), + xgb_model = p_uty(tags = "train"), + device = p_uty(tags = "train") + ) + # param deps + ps$add_dep("print_every_n", "verbose", CondEqual$new(1L)) + ps$add_dep("sample_type", "booster", CondEqual$new("dart")) + ps$add_dep("normalize_type", "booster", CondEqual$new("dart")) + ps$add_dep("rate_drop", "booster", CondEqual$new("dart")) + ps$add_dep("skip_drop", "booster", CondEqual$new("dart")) + ps$add_dep("one_drop", "booster", CondEqual$new("dart")) + ps$add_dep("tree_method", "booster", CondAnyOf$new(c("gbtree", "dart"))) + ps$add_dep("grow_policy", "tree_method", CondEqual$new("hist")) + ps$add_dep("max_leaves", "grow_policy", CondEqual$new("lossguide")) + ps$add_dep("max_bin", "tree_method", CondEqual$new("hist")) + ps$add_dep("feature_selector", "booster", CondEqual$new("gblinear")) + ps$add_dep("top_k", "booster", CondEqual$new("gblinear")) + ps$add_dep("top_k", "feature_selector", CondAnyOf$new(c("greedy", "thrifty"))) + ps$add_dep("aft_loss_distribution", "objective", CondEqual$new("survival:aft")) + ps$add_dep("aft_loss_distribution_scale", "objective", CondEqual$new("survival:aft")) + + # custom defaults + ps$values = list(nrounds = 1L, nthread = 1L, verbose = 0L, early_stopping_set = "none") + + super$initialize( + id = "surv.xgboost", + param_set = ps, + predict_types = c("crank", "lp"), + feature_types = c("integer", "numeric"), + properties = c("weights", "missings", "importance"), + packages = c("mlr3extralearners", "xgboost"), + man = "mlr3extralearners::mlr_learners_surv.xgboost", + label = "Gradient Boosting" + ) + }, + + #' @description + #' The importance scores are calculated with [xgboost::xgb.importance()]. + #' + #' @return Named `numeric()`. + importance = function() { + if (is.null(self$model)) { + stopf("No model stored") } - ), - - private = list( - # helper function to construct an `xgb.DMatrix` object - .get_data = function(task, pv, row_ids = NULL) { - # use all task rows if `rows_ids` is not specified - if (is.null(row_ids)) - row_ids = task$row_ids - - data = task$data(rows = row_ids, cols = task$feature_names) - target = task$data(rows = row_ids, cols = task$target_names) - targets = task$target_names - label = target[[targets[1]]] # time - status = target[[targets[2]]] - - if (pv$objective == "survival:cox") { - label[status != 1] = -1L * label[status != 1] - data = xgboost::xgb.DMatrix( - data = as_numeric_matrix(data), - label = label) - } else { - y_lower_bound = y_upper_bound = label - y_upper_bound[status == 0] = Inf - - data = xgboost::xgb.DMatrix(as_numeric_matrix(data)) - xgboost::setinfo(data, "label_lower_bound", y_lower_bound) - xgboost::setinfo(data, "label_upper_bound", y_upper_bound) - } - data - }, - - .train = function(task) { - - pv = self$param_set$get_values(tags = "train") - - if (is.null(pv$objective)) { - pv$objective = "survival:cox" - } - - if (pv$objective == "survival:cox") { - pv$eval_metric = "cox-nloglik" - } else { - pv$eval_metric = "aft-nloglik" - } - - data = private$.get_data(task, pv) - - if ("weights" %in% task$properties) { - xgboost::setinfo(data, "weight", task$weights$weight) - } - - # XGBoost uses the last element in the watchlist as - # the early stopping set - if (pv$early_stopping_set != "none") { - pv$watchlist = c(pv$watchlist, list(train = data)) - } - - if (pv$early_stopping_set == "test" && !is.null(task$row_roles$test)) { - test_data = private$.get_data(task, pv, task$row_roles$test) - pv$watchlist = c(pv$watchlist, list(test = test_data)) - } - pv$early_stopping_set = NULL - - invoke(xgboost::xgb.train, data = data, .args = pv) - }, - - .predict = function(task) { - pv = self$param_set$get_values(tags = "predict") + + imp = xgboost::xgb.importance( model = self$model - newdata = as_numeric_matrix(ordered_features(task, self)) - lp = log(invoke( - predict, model, - newdata = newdata, - .args = pv - )) - - if (!is.null(pv$objective) && pv$objective == "survival:aft") { - lp = -lp - } - - list(crank = lp, lp = lp) + ) + set_names(imp$Gain, imp$Feature) + } + ), + + private = list( + # helper function to construct an `xgb.DMatrix` object + .get_data = function(task, pv, row_ids = NULL) { + # use all task rows if `rows_ids` is not specified + if (is.null(row_ids)) + row_ids = task$row_ids + + data = task$data(rows = row_ids, cols = task$feature_names) + target = task$data(rows = row_ids, cols = task$target_names) + targets = task$target_names + label = target[[targets[1]]] # time + status = target[[targets[2]]] + + if (pv$objective == "survival:cox") { + label[status != 1] = -1L * label[status != 1] + data = xgboost::xgb.DMatrix( + data = as_numeric_matrix(data), + label = label) + } else { + y_lower_bound = y_upper_bound = label + y_upper_bound[status == 0] = Inf + + data = xgboost::xgb.DMatrix(as_numeric_matrix(data)) + xgboost::setinfo(data, "label_lower_bound", y_lower_bound) + xgboost::setinfo(data, "label_upper_bound", y_upper_bound) + } + data + }, + + .train = function(task) { + + pv = self$param_set$get_values(tags = "train") + + if (is.null(pv$objective)) { + pv$objective = "survival:cox" } - ) + + if (pv$objective == "survival:cox") { + pv$eval_metric = "cox-nloglik" + } else { + pv$eval_metric = "aft-nloglik" + } + + data = private$.get_data(task, pv) + + if ("weights" %in% task$properties) { + xgboost::setinfo(data, "weight", task$weights$weight) + } + + # XGBoost uses the last element in the watchlist as + # the early stopping set + if (pv$early_stopping_set != "none") { + pv$watchlist = c(pv$watchlist, list(train = data)) + } + + if (pv$early_stopping_set == "test" && !is.null(task$row_roles$test)) { + test_data = private$.get_data(task, pv, task$row_roles$test) + pv$watchlist = c(pv$watchlist, list(test = test_data)) + } + pv$early_stopping_set = NULL + + invoke(xgboost::xgb.train, data = data, .args = pv) + }, + + .predict = function(task) { + pv = self$param_set$get_values(tags = "predict") + model = self$model + newdata = as_numeric_matrix(ordered_features(task, self)) + lp = log(invoke( + predict, model, + newdata = newdata, + .args = pv + )) + + if (!is.null(pv$objective) && pv$objective == "survival:aft") { + lp = -lp + } + + list(crank = lp, lp = lp) + } ) ) -.extralrns_dict$add("surv.xgboost", function() LearnerSurvXgboost$new()) + +.extralrns_dict$add("surv.xgboost", LearnerSurvXgboost)