diff --git a/DESCRIPTION b/DESCRIPTION index 9389a34fa..381391143 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,7 +47,7 @@ Imports: data.table, digest, lgr, - mlr3 (>= 0.6.0), + mlr3 (>= 0.11.0), mlr3misc (>= 0.7.0), paradox, R6, @@ -102,6 +102,7 @@ Collate: 'LearnerAvg.R' 'NO_OP.R' 'PipeOpTaskPreproc.R' + 'PipeOpAggregate.R' 'PipeOpBoxCox.R' 'PipeOpBranch.R' 'PipeOpChunk.R' diff --git a/NAMESPACE b/NAMESPACE index f4c424ba8..a7cc92ea3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(LearnerRegrAvg) export(Multiplicity) export(NO_OP) export(PipeOp) +export(PipeOpAggregate) export(PipeOpBoxCox) export(PipeOpBranch) export(PipeOpChunk) diff --git a/NEWS.md b/NEWS.md index f56e76b75..d6e04b898 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ # mlr3pipelines 0.3.4-9000 +* Changed PipeOps: + - PipeOpLearnerCV now also wraps a Resampling allowing for a wider use of resampling methods +* New PipeOps: + - PipeOpAggregate # mlr3pipelines 0.3.4 diff --git a/R/PipeOpAggregate.R b/R/PipeOpAggregate.R new file mode 100644 index 000000000..e57b057f7 --- /dev/null +++ b/R/PipeOpAggregate.R @@ -0,0 +1,148 @@ +#' @title Aggregate Features Row-Wise +#' +#' @usage NULL +#' @name mlr_pipeops_aggregate +#' @format [`R6Class`] object inheriting from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @description +#' Aggregates features row-wise based on multiple observations indicated via a column of role `row_reference` according to expressions given as formulas. +#' Typically used after [`PipeOpLearnerCV`] and prior to [`PipeOpFeatureUnion`] if the resampling method returned multiple predictions per row id. +#' However, note that not all [`Resampling`][mlr3::Resampling] methods result in at least one prediction per original row id. +#' +#' @section Construction: +#' ``` +#' PipeOpAggregate$new(id = "aggregate", param_vals = list()) +#' ``` +#' * `id` :: `character(1)`\cr +#' Identifier of resulting object, default `"aggregate"`. +#' * `param_vals` :: named `list`\cr +#' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. +#' +#' @section Input and Output Channels: +#' Input and output channels are inherited from [`PipeOpTaskPreproc`]. +# +#' The output is a [`Task`][mlr3::Task] with the same target as the input [`Task`][mlr3::Task], with features aggregated as specified. +#' +#' @section State: +#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`]. +#' +#' @section Parameters: +#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: +#' * `aggregation` :: named `list` of `formula`\cr +#' Expressions for how features should be aggregated, in the form of `formula`. +#' Each element of the list is a `formula` with the name of the element naming the feature to aggregate and the formula expression determining the result. +#' Each formula is evaluated within [`data.table`] environments of the [`Task`][mlr3::Task] that contain all features split via the `by` argument (see below). +#' Initialized to `list()`, i.e., no aggregation is performed. +#' * `by` :: `character(1)` | `NULL`\cr +#' Column indicating the `row_reference` column of the [`Task`][mlr3::Task] that should be the row-wise basis for the aggregation. +#' Initialized to `NULL`, i.e., no aggregation is performed. +#' +#' @section Internals: +#' A `formula` created using the `~` operator always contains a reference to the `environment` in which +#' the `formula` is created. This makes it possible to use variables in the `~`-expressions that both +#' reference either column names or variable names. +#' +#' @section Fields: +#' Only fields inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @section Methods: +#' Only methods inherited from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @family PipeOps +#' @seealso https://mlr3book.mlr-org.com/list-pipeops.html +#' @include PipeOpTaskPreproc.R +#' @export +#' @examples +#' library("mlr3") +#' calculate_mode = function(x) { +#' unique_x = unique(x) +#' unique_x[which.max(tabulate(match(x, unique_x)))] +#' } +#' +#' task = tsk("iris") +#' learner = lrn("classif.rpart") +#' +#' lrnloo_po = po("learner_cv", learner, rsmp("loo")) +#' nop = mlr_pipeops$get("nop") +#' agg_po = po("aggregate", +#' aggregation = list( +#' classif.rpart.response = ~ calculate_mode(classif.rpart.response) +#' ), +#' by = "pre.classif.rpart") +#' +#' graph = gunion(list( +#' lrnloo_po %>>% agg_po, +#' nop +#' )) %>>% po("featureunion") +#' +#' graph$train(task) +#' +#' graph$pipeops$classif.rpart$learner$predict_type = "prob" +#' graph$param_set$values$aggregate.aggregation = list( +#' classif.rpart.prob.setosa = ~ mean(classif.rpart.prob.setosa), +#' classif.rpart.prob.versicolor = ~ mean(classif.rpart.prob.versicolor), +#' classif.rpart.prob.virginica = ~ mean(classif.rpart.prob.virginica) +#' ) +#' graph$train(task) +PipeOpAggregate = R6Class("Aggregate", + inherit = PipeOpTaskPreprocSimple, + public = list( + initialize = function(id = "aggregate", param_vals = list()) { + ps = ParamSet$new(params = list( + ParamUty$new("aggregation", tags = c("train", "predict", "required"), custom_check = check_aggregation_formulae), + ParamUty$new("by", tags = c("train", "predict", "required"), custom_check = function(x) check_string(x, null.ok = TRUE)) + )) + ps$values = list(aggregation = list(), by = NULL) + super$initialize(id, ps, param_vals = param_vals, tags = "ensemble") + } + ), + private = list( + .transform = function(task) { + + if (length(self$param_set$values$aggregation) == 0L || is.null(self$param_set$values$by)) { + return(task) # early exit + } + + assert_set_equal(names(self$param_set$values$aggregation), task$feature_names) + assert_choice(self$param_set$values$by, choices = task$col_roles$row_reference) + + taskdata = task$data(cols = c(task$feature_names, task$col_roles$row_reference)) + taskdata_split = split(taskdata, by = self$param_set$values$by) + + newdata = unique(task$data(cols = c(task$target_names, task$col_roles$row_reference[match(task$col_roles$row_reference, self$param_set$values$by)])), by = self$param_set$values$by) + + nms = names(self$param_set$values$aggregation) + for (i in seq_along(nms)) { + frm = self$param_set$values$aggregation[[i]] + set(newdata, j = nms[i], value = unlist(map(taskdata_split, .f = function(split) eval(frm[[2L]], envir = split, enclos = environment(frm))))) + } + setnames(newdata, old = self$param_set$values$by, new = task$backend$primary_key) + + # get task_type from mlr_reflections and call constructor + constructor = get(mlr_reflections$task_types[["task"]][chmatch(task$task_type, table = mlr_reflections$task_types[["type"]], nomatch = 0L)][[1L]]) + newtask = invoke(constructor$new, id = task$id, backend = as_data_backend(newdata, primary_key = task$backend$primary_key), target = task$target_names, .args = task$extra_args) + newtask$extra_args = task$extra_args + + newtask + } + ) +) + +mlr_pipeops$add("aggregate", PipeOpAggregate) + +# check the `aggregation` parameter of PipeOpAggregate +# @param x [list] whatever `aggregation` is being set to +# checks that `aggregation` is +# * a named list of `formula` +# * that each element has only a rhs +check_aggregation_formulae = function(x) { + check_list(x, types = "formula", names = "unique") %check&&% + Reduce(`%check&&%`, lapply(x, function(xel) { + if (length(xel) != 2L) { + return(sprintf("formula %s must not have a left hand side.", + deparse(xel, nlines = 1L, width.cutoff = 500L))) + } + TRUE + }), TRUE) +} + diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R index 7afd4c433..8c2c11d6a 100644 --- a/R/PipeOpLearnerCV.R +++ b/R/PipeOpLearnerCV.R @@ -1,13 +1,13 @@ -#' @title Wrap a Learner into a PipeOp with Cross-validated Predictions as Features +#' @title Wrap a Learner into a PipeOp with Resampled Predictions as Features #' #' @usage NULL #' @name mlr_pipeops_learner_cv #' @format [`R6Class`] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @description -#' Wraps an [`mlr3::Learner`] into a [`PipeOp`]. +#' Wraps a [`mlr3::Learner`] and [`mlr3::Resampling`] into a [`PipeOp`]. #' -#' Returns cross-validated predictions during training as a [`Task`][mlr3::Task] and stores a model of the +#' Returns resampled predictions during training as a [`Task`][mlr3::Task] and stores a model of the #' [`Learner`][mlr3::Learner] trained on the whole data in `$state`. This is used to create a similar #' [`Task`][mlr3::Task] during prediction. #' @@ -16,21 +16,29 @@ #' for `$predict.type` `"prob"` the `.prob.` features are created, and for `$predict.type` `"se"` the new columns #' are `.response` and `.se`. `` denotes the `$id` of the [`PipeOpLearnerCV`] object. #' -#' Inherits the `$param_set` (and therefore `$param_set$values`) from the [`Learner`][mlr3::Learner] it is constructed from. +#' In the case of the resampling method returning multiple predictions per row id, the predictions +#' are returned unaltered. The output [`Task`][mlr3::Task] always gains a `row_reference` column +#' named `pre.` indicating the original row id prior to the resampling process. [`PipeOpAggregate`] should then +#' be used to aggregate these multiple predictions per row id. +#' +#' Inherits both the `$param_set` (and therefore `$param_set$values`) from the [`Learner`][mlr3::Learner] and +#' [`Resampling`][mlr3::Resampling] it is constructed from. The parameter ids of the latter one are prefixed with `"resampling."` +#' and the tags of these parameters are extended by `"train"`. #' #' [`PipeOpLearnerCV`] can be used to create "stacking" or "super learning" [`Graph`]s that use the output of one [`Learner`][mlr3::Learner] -#' as feature for another [`Learner`][mlr3::Learner]. Because the [`PipeOpLearnerCV`] erases the original input features, it is often +#' as features for another [`Learner`][mlr3::Learner]. Because the [`PipeOpLearnerCV`] erases the original input features, it is often #' useful to use [`PipeOpFeatureUnion`] to bind the prediction [`Task`][mlr3::Task] to the original input [`Task`][mlr3::Task]. #' #' @section Construction: #' ``` -#' PipeOpLearnerCV$new(learner, id = NULL, param_vals = list()) +#' PipeOpLearnerCV$new(learner, resampling = rsmp("cv", folds = 3), id = NULL, param_vals = list()) #' ``` #' #' * `learner` :: [`Learner`][mlr3::Learner] \cr -#' [`Learner`][mlr3::Learner] to use for cross validation / prediction, or a string identifying a -#' [`Learner`][mlr3::Learner] in the [`mlr3::mlr_learners`] [`Dictionary`][mlr3misc::Dictionary]. -#' * `id` :: `character(1)` +#' [`Learner`][mlr3::Learner] to use for resampling / prediction. +#' * `resampling` :: [`Resampling`][mlr3::Resampling] \cr +#' [`Resampling`][mlr3::Resampling] to use for resampling. Initialized to 3-fold cross-validation. +#' * `id` :: `character(1)`\cr #' Identifier of the resulting object, internally defaulting to the `id` of the [`Learner`][mlr3::Learner] being wrapped. #' * `param_vals` :: named `list`\cr #' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. @@ -42,7 +50,7 @@ #' [`PipeOpLearnerCV`] has one output channel named `"output"`, producing a [`Task`][mlr3::Task] specific to the [`Learner`][mlr3::Learner] #' type given to `learner` during construction; both during training and prediction. #' -#' The output is a task with the same target as the input task, with features replaced by predictions made by the [`Learner`][mlr3::Learner]. +#' The output is a [`Task`][mlr3::Task] with the same target as the input [`Task`][mlr3::Task], with features replaced by predictions made by the [`Learner`][mlr3::Learner]. #' During training, this prediction is the out-of-sample prediction made by [`resample`][mlr3::resample], during prediction, this is the #' ordinary prediction made on the data by a [`Learner`][mlr3::Learner] trained on the training phase data. #' @@ -61,13 +69,9 @@ #' Prediction time, in seconds. #' #' @section Parameters: -#' The parameters are the parameters inherited from the [`PipeOpTaskPreproc`], as well as the parameters of the [`Learner`][mlr3::Learner] wrapped by this object. +#' The parameters are the parameters inherited from the [`PipeOpTaskPreproc`], as well as the parameters of the [`Learner`][mlr3::Learner] and +#' [`Resampling`][mlr3::Resampling] wrapped by this object. #' Besides that, parameters introduced are: -#' * `resampling.method` :: `character(1)`\cr -#' Which resampling method do we want to use. Currently only supports `"cv"` and `"insample"`. `"insample"` generates -#' predictions with the model trained on all training data. -#' * `resampling.folds` :: `numeric(1)`\cr -#' Number of cross validation folds. Initialized to 3. Only used for `resampling.method = "cv"`. #' * `keep_response` :: `logical(1)`\cr #' Only effective during `"prob"` prediction: Whether to keep response values, if available. Initialized to `FALSE`. #' @@ -80,6 +84,8 @@ #' [`Learner`][mlr3::Learner] that is being wrapped. Read-only. #' * `learner_model` :: [`Learner`][mlr3::Learner]\cr #' [`Learner`][mlr3::Learner] that is being wrapped. This learner contains the model if the `PipeOp` is trained. Read-only. +#' * `resampling` :: [`Resampling`][mlr3::Resampling]\cr +#' [`Resampling`][mlr3::Resampling] that is being wrapped. Read-only. #' #' @section Methods: #' Methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. @@ -95,7 +101,7 @@ #' task = tsk("iris") #' learner = lrn("classif.rpart") #' -#' lrncv_po = po("learner_cv", learner) +#' lrncv_po = po("learner_cv", learner, rsmp("cv")) #' lrncv_po$learner$predict_type = "response" #' #' nop = mlr_pipeops$get("nop") @@ -113,29 +119,29 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", inherit = PipeOpTaskPreproc, public = list( - initialize = function(learner, id = NULL, param_vals = list()) { + initialize = function(learner, resampling = rsmp("cv", folds = 3), id = NULL, param_vals = list()) { private$.learner = as_learner(learner, clone = TRUE) private$.learner$param_set$set_id = "" - id = id %??% private$.learner$id - # FIXME: can be changed when mlr-org/mlr3#470 has an answer + private$.resampling = as_resampling(resampling, clone = TRUE) + private$.resampling$param_set$set_id = "resampling" + + # tags of resampling parameters should include "train"; we fix this here + for (i in seq_along(private$.resampling$param_set$params)) { + private$.resampling$param_set$params[[i]]$tags = c("train", private$.resampling$param_set$params[[i]]$tags) + } + + + id = id %??% self$learner$id task_type = mlr_reflections$task_types[get("type") == private$.learner$task_type][order(get("package"))][1L]$task - private$.crossval_param_set = ParamSet$new(params = list( - ParamFct$new("method", levels = c("cv", "insample"), tags = c("train", "required")), - ParamInt$new("folds", lower = 2L, upper = Inf, tags = c("train", "required")), + private$.additional_param_set = ParamSet$new(params = list( ParamLgl$new("keep_response", tags = c("train", "required")) )) - private$.crossval_param_set$values = list(method = "cv", folds = 3, keep_response = FALSE) - private$.crossval_param_set$set_id = "resampling" - # Dependencies in paradox have been broken from the start and this is known since at least a year: - # https://github.com/mlr-org/paradox/issues/216 - # The following would make it _impossible_ to set "method" to "insample", because then "folds" - # is both _required_ (required tag above) and at the same time must be unset (because of this - # dependency). We will opt for the least annoying behaviour here and just not use dependencies - # in PipeOp ParamSets. - # private$.crossval_param_set$add_dep("folds", "method", CondEqual$new("cv")) # don't do this. + private$.additional_param_set$values = list(keep_response = FALSE) + private$.additional_param_set$set_id = "" - super$initialize(id, alist(private$.crossval_param_set, private$.learner$param_set), param_vals = param_vals, can_subset_cols = TRUE, task_type = task_type, tags = c("learner", "ensemble")) + super$initialize(id, param_set = alist(private$.resampling$param_set, private$.additional_param_set, private$.learner$param_set), + param_vals = param_vals, can_subset_cols = TRUE, task_type = task_type, tags = c("learner", "ensemble")) } ), @@ -159,53 +165,71 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV", } else { multiplicity_recurse(self$state, clone_with_state, learner = private$.learner) } + }, + resampling = function(val) { + if (!missing(val)) { + if (!identical(val, private$.resampling)) { + stop("$resampling is read-only.") + } + } + private$.resampling } ), private = list( .train_task = function(task) { on.exit({private$.learner$state = NULL}) - - # Train a learner for predicting + # train a learner for predicting self$state = private$.learner$train(task)$state - pv = private$.crossval_param_set$values - # Compute CV Predictions - if (pv$method != "insample") { - rdesc = mlr_resamplings$get(pv$method) - if (pv$method == "cv") rdesc$param_set$values = list(folds = pv$folds) - rr = resample(task, private$.learner, rdesc) - prds = as.data.table(rr$prediction(predict_sets = "test")) - } else { - prds = as.data.table(private$.learner$predict(task)) - } + # compute resampled predictions + rr = resample(task, private$.learner, private$.resampling) + prds = as.data.table(rr$prediction(predict_sets = "test")) - private$pred_to_task(prds, task) + private$.pred_to_task(prds, task) }, .predict_task = function(task) { on.exit({private$.learner$state = NULL}) private$.learner$state = self$state - prediction = as.data.table(private$.learner$predict(task)) - private$pred_to_task(prediction, task) + prds = as.data.table(private$.learner$predict(task)) + private$.pred_to_task(prds, task) }, - pred_to_task = function(prds, task) { - if (!is.null(prds$truth)) prds[, truth := NULL] - if (!self$param_set$values$resampling.keep_response && self$learner$predict_type == "prob") { + .pred_to_task = function(prds, task) { + if (!self$param_set$values$keep_response && self$learner$predict_type == "prob") { prds[, response := NULL] } - renaming = setdiff(colnames(prds), c("row_id", "row_ids")) - setnames(prds, renaming, sprintf("%s.%s", self$id, renaming)) + renaming = setdiff(colnames(prds), c("row_ids", "truth")) + setnames(prds, old = renaming, new = sprintf("%s.%s", self$id, renaming)) + setnames(prds, old = "truth", new = task$target_names) + row_reference = paste0("pre.", self$id) + while (row_reference %in% task$col_info$id) { + row_reference = paste0(row_reference, ".") + } + setnames(prds, old = "row_ids", new = row_reference) - # This can be simplified for mlr3 >= 0.11.0; - # will be always "row_ids" - row_id_col = intersect(colnames(prds), c("row_id", "row_ids")) - setnames(prds, old = row_id_col, new = task$backend$primary_key) - task$select(character(0))$cbind(prds) + # the following is needed to pertain correct row ids in the case of e.g. cv + # here we do not necessarily apply PipeOpAggregate later + backend = if (identical(sort(prds[[row_reference]]), sort(task$row_ids))) { + set(prds, j = task$backend$primary_key, value = prds[[row_reference]]) + as_data_backend(prds, primary_key = task$backend$primary_key) + } else { + as_data_backend(prds) + } + + # get task_type from mlr_reflections and call constructor + constructor = get(mlr_reflections$task_types[["task"]][chmatch(task$task_type, table = mlr_reflections$task_types[["type"]], nomatch = 0L)][[1L]]) + newtask = invoke(constructor$new, id = task$id, backend = backend, target = task$target_names, .args = task$extra_args) + newtask$extra_args = task$extra_args + newtask$set_col_roles(row_reference, "row_reference") + + newtask }, - .crossval_param_set = NULL, - .learner = NULL + .additional_param_set = NULL, + .learner = NULL, + .resampling = NULL ) ) mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ParamSet$new()))$new())) + diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R index 1990c5c61..39522762a 100644 --- a/R/PipeOpTuneThreshold.R +++ b/R/PipeOpTuneThreshold.R @@ -143,7 +143,12 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold", }, .task_to_prediction = function(input) { prob = as.matrix(input$data(cols = input$feature_names)) - colnames(prob) = unlist(input$levels()) + # setting the column names the following way is safer + nms = map_chr(strsplit(colnames(prob), "\\."), function(x) x[length(x)]) + if (!setequal(nms, input$levels(input$target_names)[[input$target_names]])) { + stopf("Cannot assign correct class levels to probability columns.") + } + colnames(prob) = map_chr(strsplit(colnames(prob), "\\."), function(x) x[length(x)]) PredictionClassif$new(input, row_ids = input$row_ids, truth = input$truth(), response = factor(colnames(prob)[max.col(prob, ties.method = "random")], levels = unlist(input$levels())), prob = prob) diff --git a/R/zzz.R b/R/zzz.R index 885b08e68..40afa1d20 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -15,6 +15,9 @@ register_mlr3 = function() { c("abstract", "meta", "missings", "feature selection", "imbalanced data", "data transform", "target transform", "ensemble", "robustify", "learner", "encode", "multiplicity"))) + if (!all(grepl("row_reference", x$task_col_roles))) { + x$task_col_roles = map(x$task_col_roles, function(col_roles) c(col_roles, "row_reference")) + } } .onLoad = function(libname, pkgname) { # nocov start diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 15c71495a..b252e56e7 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -225,6 +225,7 @@ Other PipeOps: \code{\link{PipeOpTargetTrafo}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd index f9dc38e0e..f7bc22365 100644 --- a/man/PipeOpEnsemble.Rd +++ b/man/PipeOpEnsemble.Rd @@ -102,6 +102,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd index 2e254b0c8..e29fcc67b 100644 --- a/man/PipeOpImpute.Rd +++ b/man/PipeOpImpute.Rd @@ -132,6 +132,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd index 9a567930c..539cfa103 100644 --- a/man/PipeOpTargetTrafo.Rd +++ b/man/PipeOpTargetTrafo.Rd @@ -143,6 +143,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd index 54d44c0bb..6b4ac96b1 100644 --- a/man/PipeOpTaskPreproc.Rd +++ b/man/PipeOpTaskPreproc.Rd @@ -192,6 +192,7 @@ Other PipeOps: \code{\link{PipeOpTargetTrafo}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd index 73d30ad7e..7058f250a 100644 --- a/man/PipeOpTaskPreprocSimple.Rd +++ b/man/PipeOpTaskPreprocSimple.Rd @@ -135,6 +135,7 @@ Other PipeOps: \code{\link{PipeOpTargetTrafo}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd index 156975a4d..e2b3b3452 100644 --- a/man/mlr_pipeops.Rd +++ b/man/mlr_pipeops.Rd @@ -73,6 +73,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_aggregate.Rd b/man/mlr_pipeops_aggregate.Rd new file mode 100644 index 000000000..2c087840a --- /dev/null +++ b/man/mlr_pipeops_aggregate.Rd @@ -0,0 +1,178 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpAggregate.R +\name{mlr_pipeops_aggregate} +\alias{mlr_pipeops_aggregate} +\alias{PipeOpAggregate} +\title{Aggregate Features Row-Wise} +\format{ +\code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} +\description{ +Aggregates features row-wise based on multiple observations indicated via a column of role \code{row_reference} according to expressions given as formulas. +Typically used after \code{\link{PipeOpLearnerCV}} and prior to \code{\link{PipeOpFeatureUnion}} if the resampling method returned multiple predictions per row id. +However, note that not all \code{\link[mlr3:Resampling]{Resampling}} methods result in at least one prediction per original row id. +} +\section{Construction}{ +\preformatted{PipeOpAggregate$new(id = "aggregate", param_vals = list()) +} +\itemize{ +\item \code{id} :: \code{character(1)}\cr +Identifier of resulting object, default \code{"aggregate"}. +\item \code{param_vals} :: named \code{list}\cr +List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}. +} +} + +\section{Input and Output Channels}{ + +Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}. +The output is a \code{\link[mlr3:Task]{Task}} with the same target as the input \code{\link[mlr3:Task]{Task}}, with features aggregated as specified. +} + +\section{State}{ + +The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}. +} + +\section{Parameters}{ + +The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as: +\itemize{ +\item \code{aggregation} :: named \code{list} of \code{formula}\cr +Expressions for how features should be aggregated, in the form of \code{formula}. +Each element of the list is a \code{formula} with the name of the element naming the feature to aggregate and the formula expression determining the result. +Each formula is evaluated within \code{\link{data.table}} environments of the \code{\link[mlr3:Task]{Task}} that contain all features split via the \code{by} argument (see below). +Initialized to \code{list()}, i.e., no aggregation is performed. +\item \code{by} :: \code{character(1)} | \code{NULL}\cr +Column indicating the \code{row_reference} column of the \code{\link[mlr3:Task]{Task}} that should be the row-wise basis for the aggregation. +Initialized to \code{NULL}, i.e., no aggregation is performed. +} +} + +\section{Internals}{ + +A \code{formula} created using the \code{~} operator always contains a reference to the \code{environment} in which +the \code{formula} is created. This makes it possible to use variables in the \code{~}-expressions that both +reference either column names or variable names. +} + +\section{Fields}{ + +Only fields inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\section{Methods}{ + +Only methods inherited from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\examples{ +library("mlr3") +calculate_mode = function(x) { + unique_x = unique(x) + unique_x[which.max(tabulate(match(x, unique_x)))] +} + +task = tsk("iris") +learner = lrn("classif.rpart") + +lrnloo_po = po("learner_cv", learner, rsmp("loo")) +nop = mlr_pipeops$get("nop") +agg_po = po("aggregate", + aggregation = list( + classif.rpart.response = ~ calculate_mode(classif.rpart.response) + ), + by = "pre.classif.rpart") + +graph = gunion(list( + lrnloo_po \%>>\% agg_po, + nop +)) \%>>\% po("featureunion") + +graph$train(task) + +graph$pipeops$classif.rpart$learner$predict_type = "prob" +graph$param_set$values$aggregate.aggregation = list( + classif.rpart.prob.setosa = ~ mean(classif.rpart.prob.setosa), + classif.rpart.prob.versicolor = ~ mean(classif.rpart.prob.versicolor), + classif.rpart.prob.virginica = ~ mean(classif.rpart.prob.virginica) +) +graph$train(task) +} +\seealso{ +https://mlr3book.mlr-org.com/list-pipeops.html + +Other PipeOps: +\code{\link{PipeOpEnsemble}}, +\code{\link{PipeOpImpute}}, +\code{\link{PipeOpTargetTrafo}}, +\code{\link{PipeOpTaskPreprocSimple}}, +\code{\link{PipeOpTaskPreproc}}, +\code{\link{PipeOp}}, +\code{\link{mlr_pipeops_boxcox}}, +\code{\link{mlr_pipeops_branch}}, +\code{\link{mlr_pipeops_chunk}}, +\code{\link{mlr_pipeops_classbalancing}}, +\code{\link{mlr_pipeops_classifavg}}, +\code{\link{mlr_pipeops_classweights}}, +\code{\link{mlr_pipeops_colapply}}, +\code{\link{mlr_pipeops_collapsefactors}}, +\code{\link{mlr_pipeops_colroles}}, +\code{\link{mlr_pipeops_copy}}, +\code{\link{mlr_pipeops_datefeatures}}, +\code{\link{mlr_pipeops_encodeimpact}}, +\code{\link{mlr_pipeops_encodelmer}}, +\code{\link{mlr_pipeops_encode}}, +\code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filter}}, +\code{\link{mlr_pipeops_fixfactors}}, +\code{\link{mlr_pipeops_histbin}}, +\code{\link{mlr_pipeops_ica}}, +\code{\link{mlr_pipeops_imputeconstant}}, +\code{\link{mlr_pipeops_imputehist}}, +\code{\link{mlr_pipeops_imputelearner}}, +\code{\link{mlr_pipeops_imputemean}}, +\code{\link{mlr_pipeops_imputemedian}}, +\code{\link{mlr_pipeops_imputemode}}, +\code{\link{mlr_pipeops_imputeoor}}, +\code{\link{mlr_pipeops_imputesample}}, +\code{\link{mlr_pipeops_kernelpca}}, +\code{\link{mlr_pipeops_learner}}, +\code{\link{mlr_pipeops_missind}}, +\code{\link{mlr_pipeops_modelmatrix}}, +\code{\link{mlr_pipeops_multiplicityexply}}, +\code{\link{mlr_pipeops_multiplicityimply}}, +\code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nmf}}, +\code{\link{mlr_pipeops_nop}}, +\code{\link{mlr_pipeops_ovrsplit}}, +\code{\link{mlr_pipeops_ovrunite}}, +\code{\link{mlr_pipeops_pca}}, +\code{\link{mlr_pipeops_proxy}}, +\code{\link{mlr_pipeops_quantilebin}}, +\code{\link{mlr_pipeops_randomprojection}}, +\code{\link{mlr_pipeops_randomresponse}}, +\code{\link{mlr_pipeops_regravg}}, +\code{\link{mlr_pipeops_removeconstants}}, +\code{\link{mlr_pipeops_renamecolumns}}, +\code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_scalemaxabs}}, +\code{\link{mlr_pipeops_scalerange}}, +\code{\link{mlr_pipeops_scale}}, +\code{\link{mlr_pipeops_select}}, +\code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_spatialsign}}, +\code{\link{mlr_pipeops_subsample}}, +\code{\link{mlr_pipeops_targetinvert}}, +\code{\link{mlr_pipeops_targetmutate}}, +\code{\link{mlr_pipeops_targettrafoscalerange}}, +\code{\link{mlr_pipeops_textvectorizer}}, +\code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_unbranch}}, +\code{\link{mlr_pipeops_updatetarget}}, +\code{\link{mlr_pipeops_vtreat}}, +\code{\link{mlr_pipeops_yeojohnson}}, +\code{\link{mlr_pipeops}} +} +\concept{PipeOps} diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd index cf7b8b976..a6d52e3f4 100644 --- a/man/mlr_pipeops_boxcox.Rd +++ b/man/mlr_pipeops_boxcox.Rd @@ -85,6 +85,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, \code{\link{mlr_pipeops_classbalancing}}, diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd index 256afebab..e5242bba4 100644 --- a/man/mlr_pipeops_branch.Rd +++ b/man/mlr_pipeops_branch.Rd @@ -105,6 +105,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_chunk}}, \code{\link{mlr_pipeops_classbalancing}}, diff --git a/man/mlr_pipeops_chunk.Rd b/man/mlr_pipeops_chunk.Rd index e7dc01689..3c0787cef 100644 --- a/man/mlr_pipeops_chunk.Rd +++ b/man/mlr_pipeops_chunk.Rd @@ -84,6 +84,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_classbalancing}}, diff --git a/man/mlr_pipeops_classbalancing.Rd b/man/mlr_pipeops_classbalancing.Rd index 4e87e9ac5..3fe479b1e 100644 --- a/man/mlr_pipeops_classbalancing.Rd +++ b/man/mlr_pipeops_classbalancing.Rd @@ -125,6 +125,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd index f9aab7eb4..e51f59e0d 100644 --- a/man/mlr_pipeops_classifavg.Rd +++ b/man/mlr_pipeops_classifavg.Rd @@ -99,6 +99,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_classweights.Rd b/man/mlr_pipeops_classweights.Rd index deed5fcb7..91fcc2282 100644 --- a/man/mlr_pipeops_classweights.Rd +++ b/man/mlr_pipeops_classweights.Rd @@ -93,6 +93,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_colapply.Rd b/man/mlr_pipeops_colapply.Rd index ec8ff0d99..fdfa50a5e 100644 --- a/man/mlr_pipeops_colapply.Rd +++ b/man/mlr_pipeops_colapply.Rd @@ -114,6 +114,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_collapsefactors.Rd b/man/mlr_pipeops_collapsefactors.Rd index 4404732c0..e06bc020b 100644 --- a/man/mlr_pipeops_collapsefactors.Rd +++ b/man/mlr_pipeops_collapsefactors.Rd @@ -81,6 +81,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index f342d33d5..89c08da05 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -73,6 +73,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_copy.Rd b/man/mlr_pipeops_copy.Rd index 02ae18124..3bf4aae61 100644 --- a/man/mlr_pipeops_copy.Rd +++ b/man/mlr_pipeops_copy.Rd @@ -103,6 +103,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_datefeatures.Rd b/man/mlr_pipeops_datefeatures.Rd index 5c84d7451..c35830cdc 100644 --- a/man/mlr_pipeops_datefeatures.Rd +++ b/man/mlr_pipeops_datefeatures.Rd @@ -120,6 +120,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_encode.Rd b/man/mlr_pipeops_encode.Rd index 80e336189..5ee052258 100644 --- a/man/mlr_pipeops_encode.Rd +++ b/man/mlr_pipeops_encode.Rd @@ -106,6 +106,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_encodeimpact.Rd b/man/mlr_pipeops_encodeimpact.Rd index 0be88b7da..9f2a9afc0 100644 --- a/man/mlr_pipeops_encodeimpact.Rd +++ b/man/mlr_pipeops_encodeimpact.Rd @@ -98,6 +98,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd index aebf5291b..8b84935ce 100644 --- a/man/mlr_pipeops_encodelmer.Rd +++ b/man/mlr_pipeops_encodelmer.Rd @@ -109,6 +109,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_featureunion.Rd b/man/mlr_pipeops_featureunion.Rd index c99233a66..6f5c10dd3 100644 --- a/man/mlr_pipeops_featureunion.Rd +++ b/man/mlr_pipeops_featureunion.Rd @@ -118,6 +118,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd index a87ccb638..fdb9d8ef1 100644 --- a/man/mlr_pipeops_filter.Rd +++ b/man/mlr_pipeops_filter.Rd @@ -127,6 +127,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_fixfactors.Rd b/man/mlr_pipeops_fixfactors.Rd index 66a9226da..61ebd0b39 100644 --- a/man/mlr_pipeops_fixfactors.Rd +++ b/man/mlr_pipeops_fixfactors.Rd @@ -73,6 +73,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_histbin.Rd b/man/mlr_pipeops_histbin.Rd index 7cef85cce..0eabd0a4a 100644 --- a/man/mlr_pipeops_histbin.Rd +++ b/man/mlr_pipeops_histbin.Rd @@ -85,6 +85,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd index da0800c70..cae4243ba 100644 --- a/man/mlr_pipeops_ica.Rd +++ b/man/mlr_pipeops_ica.Rd @@ -111,6 +111,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputeconstant.Rd b/man/mlr_pipeops_imputeconstant.Rd index 4ffd9ecdb..5392bf6e8 100644 --- a/man/mlr_pipeops_imputeconstant.Rd +++ b/man/mlr_pipeops_imputeconstant.Rd @@ -87,6 +87,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputehist.Rd b/man/mlr_pipeops_imputehist.Rd index 43a3beb86..ea5dd8a94 100644 --- a/man/mlr_pipeops_imputehist.Rd +++ b/man/mlr_pipeops_imputehist.Rd @@ -72,6 +72,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd index f86074f27..f4eada177 100644 --- a/man/mlr_pipeops_imputelearner.Rd +++ b/man/mlr_pipeops_imputelearner.Rd @@ -101,6 +101,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputemean.Rd b/man/mlr_pipeops_imputemean.Rd index 9a34246aa..15016de56 100644 --- a/man/mlr_pipeops_imputemean.Rd +++ b/man/mlr_pipeops_imputemean.Rd @@ -72,6 +72,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputemedian.Rd b/man/mlr_pipeops_imputemedian.Rd index b89c02ee3..82df3dd15 100644 --- a/man/mlr_pipeops_imputemedian.Rd +++ b/man/mlr_pipeops_imputemedian.Rd @@ -72,6 +72,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputemode.Rd b/man/mlr_pipeops_imputemode.Rd index 1ec28fc65..c82b59fe4 100644 --- a/man/mlr_pipeops_imputemode.Rd +++ b/man/mlr_pipeops_imputemode.Rd @@ -79,6 +79,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputeoor.Rd b/man/mlr_pipeops_imputeoor.Rd index c141c4d33..cf07c0d3a 100644 --- a/man/mlr_pipeops_imputeoor.Rd +++ b/man/mlr_pipeops_imputeoor.Rd @@ -101,6 +101,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputesample.Rd b/man/mlr_pipeops_imputesample.Rd index e31bcb461..1617527a8 100644 --- a/man/mlr_pipeops_imputesample.Rd +++ b/man/mlr_pipeops_imputesample.Rd @@ -74,6 +74,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd index 1b426e65d..85ec21d15 100644 --- a/man/mlr_pipeops_kernelpca.Rd +++ b/man/mlr_pipeops_kernelpca.Rd @@ -86,6 +86,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd index 9a5a12024..09787d973 100644 --- a/man/mlr_pipeops_learner.Rd +++ b/man/mlr_pipeops_learner.Rd @@ -105,6 +105,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_learner_cv.Rd b/man/mlr_pipeops_learner_cv.Rd index 27f2ce86a..1eb3457e4 100644 --- a/man/mlr_pipeops_learner_cv.Rd +++ b/man/mlr_pipeops_learner_cv.Rd @@ -3,14 +3,14 @@ \name{mlr_pipeops_learner_cv} \alias{mlr_pipeops_learner_cv} \alias{PipeOpLearnerCV} -\title{Wrap a Learner into a PipeOp with Cross-validated Predictions as Features} +\title{Wrap a Learner into a PipeOp with Resampled Predictions as Features} \format{ \code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. } \description{ -Wraps an \code{\link[mlr3:Learner]{mlr3::Learner}} into a \code{\link{PipeOp}}. +Wraps a \code{\link[mlr3:Learner]{mlr3::Learner}} and \code{\link[mlr3:Resampling]{mlr3::Resampling}} into a \code{\link{PipeOp}}. -Returns cross-validated predictions during training as a \code{\link[mlr3:Task]{Task}} and stores a model of the +Returns resampled predictions during training as a \code{\link[mlr3:Task]{Task}} and stores a model of the \code{\link[mlr3:Learner]{Learner}} trained on the whole data in \verb{$state}. This is used to create a similar \code{\link[mlr3:Task]{Task}} during prediction. @@ -19,20 +19,28 @@ The \code{\link[mlr3:Task]{Task}} gets features depending on the capsuled \code{ for \verb{$predict.type} \code{"prob"} the \verb{.prob.} features are created, and for \verb{$predict.type} \code{"se"} the new columns are \verb{.response} and \verb{.se}. \verb{} denotes the \verb{$id} of the \code{\link{PipeOpLearnerCV}} object. -Inherits the \verb{$param_set} (and therefore \verb{$param_set$values}) from the \code{\link[mlr3:Learner]{Learner}} it is constructed from. +In the case of the resampling method returning multiple predictions per row id, the predictions +are returned unaltered. The output \code{\link[mlr3:Task]{Task}} always gains a \code{row_reference} column +named \verb{pre.} indicating the original row id prior to the resampling process. \code{\link{PipeOpAggregate}} should then +be used to aggregate these multiple predictions per row id. + +Inherits both the \verb{$param_set} (and therefore \verb{$param_set$values}) from the \code{\link[mlr3:Learner]{Learner}} and +\code{\link[mlr3:Resampling]{Resampling}} it is constructed from. The parameter ids of the latter one are prefixed with \code{"resampling."} +and the tags of these parameters are extended by \code{"train"}. \code{\link{PipeOpLearnerCV}} can be used to create "stacking" or "super learning" \code{\link{Graph}}s that use the output of one \code{\link[mlr3:Learner]{Learner}} -as feature for another \code{\link[mlr3:Learner]{Learner}}. Because the \code{\link{PipeOpLearnerCV}} erases the original input features, it is often +as features for another \code{\link[mlr3:Learner]{Learner}}. Because the \code{\link{PipeOpLearnerCV}} erases the original input features, it is often useful to use \code{\link{PipeOpFeatureUnion}} to bind the prediction \code{\link[mlr3:Task]{Task}} to the original input \code{\link[mlr3:Task]{Task}}. } \section{Construction}{ -\preformatted{PipeOpLearnerCV$new(learner, id = NULL, param_vals = list()) +\preformatted{PipeOpLearnerCV$new(learner, resampling = rsmp("cv", folds = 3), id = NULL, param_vals = list()) } \itemize{ \item \code{learner} :: \code{\link[mlr3:Learner]{Learner}} \cr -\code{\link[mlr3:Learner]{Learner}} to use for cross validation / prediction, or a string identifying a -\code{\link[mlr3:Learner]{Learner}} in the \code{\link[mlr3:mlr_learners]{mlr3::mlr_learners}} \code{\link[mlr3misc:Dictionary]{Dictionary}}. -\item \code{id} :: \code{character(1)} +\code{\link[mlr3:Learner]{Learner}} to use for resampling / prediction. +\item \code{resampling} :: \code{\link[mlr3:Resampling]{Resampling}} \cr +\code{\link[mlr3:Resampling]{Resampling}} to use for resampling. Initialized to 3-fold cross-validation. +\item \code{id} :: \code{character(1)}\cr Identifier of the resulting object, internally defaulting to the \code{id} of the \code{\link[mlr3:Learner]{Learner}} being wrapped. \item \code{param_vals} :: named \code{list}\cr List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}. @@ -47,7 +55,7 @@ type given to \code{learner} during construction; both during training and predi \code{\link{PipeOpLearnerCV}} has one output channel named \code{"output"}, producing a \code{\link[mlr3:Task]{Task}} specific to the \code{\link[mlr3:Learner]{Learner}} type given to \code{learner} during construction; both during training and prediction. -The output is a task with the same target as the input task, with features replaced by predictions made by the \code{\link[mlr3:Learner]{Learner}}. +The output is a \code{\link[mlr3:Task]{Task}} with the same target as the input \code{\link[mlr3:Task]{Task}}, with features replaced by predictions made by the \code{\link[mlr3:Learner]{Learner}}. During training, this prediction is the out-of-sample prediction made by \code{\link[mlr3:resample]{resample}}, during prediction, this is the ordinary prediction made on the data by a \code{\link[mlr3:Learner]{Learner}} trained on the training phase data. } @@ -72,14 +80,10 @@ Prediction time, in seconds. \section{Parameters}{ -The parameters are the parameters inherited from the \code{\link{PipeOpTaskPreproc}}, as well as the parameters of the \code{\link[mlr3:Learner]{Learner}} wrapped by this object. +The parameters are the parameters inherited from the \code{\link{PipeOpTaskPreproc}}, as well as the parameters of the \code{\link[mlr3:Learner]{Learner}} and +\code{\link[mlr3:Resampling]{Resampling}} wrapped by this object. Besides that, parameters introduced are: \itemize{ -\item \code{resampling.method} :: \code{character(1)}\cr -Which resampling method do we want to use. Currently only supports \code{"cv"} and \code{"insample"}. \code{"insample"} generates -predictions with the model trained on all training data. -\item \code{resampling.folds} :: \code{numeric(1)}\cr -Number of cross validation folds. Initialized to 3. Only used for \code{resampling.method = "cv"}. \item \code{keep_response} :: \code{logical(1)}\cr Only effective during \code{"prob"} prediction: Whether to keep response values, if available. Initialized to \code{FALSE}. } @@ -98,6 +102,8 @@ Fields inherited from \code{\link{PipeOp}}, as well as: \code{\link[mlr3:Learner]{Learner}} that is being wrapped. Read-only. \item \code{learner_model} :: \code{\link[mlr3:Learner]{Learner}}\cr \code{\link[mlr3:Learner]{Learner}} that is being wrapped. This learner contains the model if the \code{PipeOp} is trained. Read-only. +\item \code{resampling} :: \code{\link[mlr3:Resampling]{Resampling}}\cr +\code{\link[mlr3:Resampling]{Resampling}} that is being wrapped. Read-only. } } @@ -112,7 +118,7 @@ library("mlr3") task = tsk("iris") learner = lrn("classif.rpart") -lrncv_po = po("learner_cv", learner) +lrncv_po = po("learner_cv", learner, rsmp("cv")) lrncv_po$learner$predict_type = "response" nop = mlr_pipeops$get("nop") diff --git a/man/mlr_pipeops_missind.Rd b/man/mlr_pipeops_missind.Rd index 2e04a6645..00f6b7589 100644 --- a/man/mlr_pipeops_missind.Rd +++ b/man/mlr_pipeops_missind.Rd @@ -101,6 +101,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_modelmatrix.Rd b/man/mlr_pipeops_modelmatrix.Rd index a001d496c..35fcee80f 100644 --- a/man/mlr_pipeops_modelmatrix.Rd +++ b/man/mlr_pipeops_modelmatrix.Rd @@ -78,6 +78,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_multiplicityexply.Rd b/man/mlr_pipeops_multiplicityexply.Rd index bd0398108..9aa10147b 100644 --- a/man/mlr_pipeops_multiplicityexply.Rd +++ b/man/mlr_pipeops_multiplicityexply.Rd @@ -84,6 +84,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_multiplicityimply.Rd b/man/mlr_pipeops_multiplicityimply.Rd index e7fa51394..1b820c814 100644 --- a/man/mlr_pipeops_multiplicityimply.Rd +++ b/man/mlr_pipeops_multiplicityimply.Rd @@ -90,6 +90,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_mutate.Rd b/man/mlr_pipeops_mutate.Rd index d8b9aa8d4..9554ced38 100644 --- a/man/mlr_pipeops_mutate.Rd +++ b/man/mlr_pipeops_mutate.Rd @@ -95,6 +95,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd index 69de35de1..eb3602e6b 100644 --- a/man/mlr_pipeops_nmf.Rd +++ b/man/mlr_pipeops_nmf.Rd @@ -124,6 +124,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_nop.Rd b/man/mlr_pipeops_nop.Rd index 72e23ec84..29633ce14 100644 --- a/man/mlr_pipeops_nop.Rd +++ b/man/mlr_pipeops_nop.Rd @@ -80,6 +80,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd index 7d7e62379..172815f29 100644 --- a/man/mlr_pipeops_ovrsplit.Rd +++ b/man/mlr_pipeops_ovrsplit.Rd @@ -95,6 +95,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd index 4c58a76fe..64ffaff54 100644 --- a/man/mlr_pipeops_ovrunite.Rd +++ b/man/mlr_pipeops_ovrunite.Rd @@ -90,6 +90,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_pca.Rd b/man/mlr_pipeops_pca.Rd index df07ac656..a968adccd 100644 --- a/man/mlr_pipeops_pca.Rd +++ b/man/mlr_pipeops_pca.Rd @@ -89,6 +89,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd index 343e40014..a110c3a7f 100644 --- a/man/mlr_pipeops_proxy.Rd +++ b/man/mlr_pipeops_proxy.Rd @@ -101,6 +101,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_quantilebin.Rd b/man/mlr_pipeops_quantilebin.Rd index 59c70c60e..113d10669 100644 --- a/man/mlr_pipeops_quantilebin.Rd +++ b/man/mlr_pipeops_quantilebin.Rd @@ -77,6 +77,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_randomprojection.Rd b/man/mlr_pipeops_randomprojection.Rd index 7567e8ef0..96dd4906d 100644 --- a/man/mlr_pipeops_randomprojection.Rd +++ b/man/mlr_pipeops_randomprojection.Rd @@ -89,6 +89,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd index 557be29e7..9191ea642 100644 --- a/man/mlr_pipeops_randomresponse.Rd +++ b/man/mlr_pipeops_randomresponse.Rd @@ -104,6 +104,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd index 054da76d8..f25ab5a40 100644 --- a/man/mlr_pipeops_regravg.Rd +++ b/man/mlr_pipeops_regravg.Rd @@ -90,6 +90,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_removeconstants.Rd b/man/mlr_pipeops_removeconstants.Rd index e4743aff6..e5a318c03 100644 --- a/man/mlr_pipeops_removeconstants.Rd +++ b/man/mlr_pipeops_removeconstants.Rd @@ -82,6 +82,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_renamecolumns.Rd b/man/mlr_pipeops_renamecolumns.Rd index 714611a68..299595f29 100644 --- a/man/mlr_pipeops_renamecolumns.Rd +++ b/man/mlr_pipeops_renamecolumns.Rd @@ -81,6 +81,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_replicate.Rd b/man/mlr_pipeops_replicate.Rd index 5a5a4ab15..dea415fac 100644 --- a/man/mlr_pipeops_replicate.Rd +++ b/man/mlr_pipeops_replicate.Rd @@ -74,6 +74,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_scale.Rd b/man/mlr_pipeops_scale.Rd index 1189e238b..718c68032 100644 --- a/man/mlr_pipeops_scale.Rd +++ b/man/mlr_pipeops_scale.Rd @@ -96,6 +96,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_scalemaxabs.Rd b/man/mlr_pipeops_scalemaxabs.Rd index cf765c8dc..d7c72eb6f 100644 --- a/man/mlr_pipeops_scalemaxabs.Rd +++ b/man/mlr_pipeops_scalemaxabs.Rd @@ -71,6 +71,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_scalerange.Rd b/man/mlr_pipeops_scalerange.Rd index 34c58e39d..93c2a01bb 100644 --- a/man/mlr_pipeops_scalerange.Rd +++ b/man/mlr_pipeops_scalerange.Rd @@ -76,6 +76,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_select.Rd b/man/mlr_pipeops_select.Rd index ffaf3c5a7..df47a817c 100644 --- a/man/mlr_pipeops_select.Rd +++ b/man/mlr_pipeops_select.Rd @@ -92,6 +92,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd index c6870bda0..59fd8e0d3 100644 --- a/man/mlr_pipeops_smote.Rd +++ b/man/mlr_pipeops_smote.Rd @@ -93,6 +93,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_spatialsign.Rd b/man/mlr_pipeops_spatialsign.Rd index e8b2ee70c..eeb735863 100644 --- a/man/mlr_pipeops_spatialsign.Rd +++ b/man/mlr_pipeops_spatialsign.Rd @@ -71,6 +71,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_subsample.Rd b/man/mlr_pipeops_subsample.Rd index a66619dd4..2f4c2e5ea 100644 --- a/man/mlr_pipeops_subsample.Rd +++ b/man/mlr_pipeops_subsample.Rd @@ -86,6 +86,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_targetinvert.Rd b/man/mlr_pipeops_targetinvert.Rd index e76f0f094..33b7c9d02 100644 --- a/man/mlr_pipeops_targetinvert.Rd +++ b/man/mlr_pipeops_targetinvert.Rd @@ -71,6 +71,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd index 6c4953cdb..fc437d1e3 100644 --- a/man/mlr_pipeops_targetmutate.Rd +++ b/man/mlr_pipeops_targetmutate.Rd @@ -117,6 +117,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd index 53f983901..c3bf733d9 100644 --- a/man/mlr_pipeops_targettrafoscalerange.Rd +++ b/man/mlr_pipeops_targettrafoscalerange.Rd @@ -83,6 +83,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd index fccc3503c..c392f396f 100644 --- a/man/mlr_pipeops_textvectorizer.Rd +++ b/man/mlr_pipeops_textvectorizer.Rd @@ -181,6 +181,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd index 8aa23ccc0..e6129b226 100644 --- a/man/mlr_pipeops_threshold.Rd +++ b/man/mlr_pipeops_threshold.Rd @@ -76,6 +76,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd index 56947c7ef..f51cf126b 100644 --- a/man/mlr_pipeops_tunethreshold.Rd +++ b/man/mlr_pipeops_tunethreshold.Rd @@ -97,6 +97,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_unbranch.Rd b/man/mlr_pipeops_unbranch.Rd index 8cbb4dacc..2a0f63dc7 100644 --- a/man/mlr_pipeops_unbranch.Rd +++ b/man/mlr_pipeops_unbranch.Rd @@ -83,6 +83,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd index 245314651..29525b78f 100644 --- a/man/mlr_pipeops_updatetarget.Rd +++ b/man/mlr_pipeops_updatetarget.Rd @@ -96,6 +96,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd index d2747fbcb..e45abd615 100644 --- a/man/mlr_pipeops_vtreat.Rd +++ b/man/mlr_pipeops_vtreat.Rd @@ -149,6 +149,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd index 32eb7f47c..5dba9be0b 100644 --- a/man/mlr_pipeops_yeojohnson.Rd +++ b/man/mlr_pipeops_yeojohnson.Rd @@ -86,6 +86,7 @@ Other PipeOps: \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOp}}, +\code{\link{mlr_pipeops_aggregate}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/tests/testthat/test_conversion.R b/tests/testthat/test_conversion.R index 68526a694..d7ce9480d 100644 --- a/tests/testthat/test_conversion.R +++ b/tests/testthat/test_conversion.R @@ -155,7 +155,7 @@ test_that("PipeOp to GraphLearner", { expect_equal(r1, r3) - po_cv = po("learner_cv", learner = po, param_vals = list(resampling.method = "insample")) + po_cv = po("learner_cv", learner = po, resampling = rsmp("insample")) expect_true("GraphLearner" %in% class(po_cv$learner)) train_out = po_cv$train(list(task)) diff --git a/tests/testthat/test_pipeop_aggregate.R b/tests/testthat/test_pipeop_aggregate.R new file mode 100644 index 000000000..da6bbcf68 --- /dev/null +++ b/tests/testthat/test_pipeop_aggregate.R @@ -0,0 +1,159 @@ +context("PipeOpAggregate") + +test_that("PipeOpAggregate - basic properties", { + op = PipeOpAggregate$new() + expect_pipeop(op) + + # generic tests + task = tsk("iris") + task$select(cols = "Petal.Length") + expect_datapreproc_pipeop_class(PipeOpAggregate, task = task) + + op$param_set$values$aggregation = list(NO_DEF = ~ mean(NO_DEF)) + expect_equal(task$data(), op$train(list(task))[[1L]]$data()) + + op$param_set$values$aggregation = list() + op$param_set$values$by = "NO_DEF" + expect_equal(task$data(), op$train(list(task))[[1L]]$data()) + + op$param_set$values$aggregation = list(NO_DEF = ~ mean(NO_DEF)) + expect_error(op$train(list(task)), regexp = "Must be equal to") + op$param_set$values$aggregation = list(Petal.Length = ~ mean(Petal.Length)) + expect_error(op$train(list(task)), regexp = "Must be element of") + + # toy aggregation works + calculate_mode = function(x) { + unique_x = unique(x) + unique_x[which.max(tabulate(match(x, unique_x)))] + } + task$cbind(data.table(row_reference = rep(1:3, each = 50L))) + task$cbind(data.table(categorical = as.factor(rep(c("a", "b", "c"), 50L)))) + task$set_col_roles("row_reference", roles = "row_reference") + op$param_set$values$aggregation = list(Petal.Length = ~ mean(Petal.Length), categorical = ~ calculate_mode(categorical)) + op$param_set$values$by = "row_reference" + train_out = op$train(list(task))[[1L]] + expect_data_table(train_out$data(), nrows = 3L, ncols = 3L) + expect_equal(train_out$data(cols = "Petal.Length")[["Petal.Length"]], + aggregate(Petal.Length ~ row_reference, FUN = mean, data = task$data(cols = c(task$feature_names, task$col_roles$row_reference)))[["Petal.Length"]]) + expect_equal(train_out$data(cols = "categorical")[["categorical"]], + aggregate(categorical ~ row_reference, FUN = calculate_mode, data = task$data(cols = c(task$feature_names, task$col_roles$row_reference)))[["categorical"]]) +}) + +test_that("PipeOpLearnerCV and PipeOpAggregate- different methods", { + skip_on_cran() # takes too long + + calculate_mode = function(x) { + unique_x = unique(x) + unique_x[which.max(tabulate(match(x, unique_x)))] + } + + # helper + test_valid_resampled_task = function(polrn, poagg, task, predict_type) { + polrn$learner$predict_type = predict_type + + lrn_out = polrn$train(list(task))[[1L]] + lrn_out_data = lrn_out$data() + if (class(polrn)[[1L]] %in% c("ResamplingCV", "ResamplingInsample", "ResamplingLoo")) { + expect_identical(lrn_out$row_ids, task$row_ids) + } else { + expect_subset(lrn_out$data(cols = lrn_out$col_roles$row_reference)[[lrn_out$col_roles$row_reference]], task$row_ids) + } + + agg_out = poagg$train(list(lrn_out))[[1L]] + if (class(polrn)[[1L]] %in% c("ResamplingCV", "ResamplingInsample", "ResamplingLoo", "ResamplingRepeatedCV")) { + expect_identical(agg_out$row_ids, task$row_ids) + } else { + expect_subset(agg_out$row_ids, task$row_ids) + } + + if (task$task_type == "classif") { + if (polrn$learner$predict_type == "response") { + feature = agg_out$data(cols = grep("*.response", agg_out$feature_names, value = TRUE))[[1L]] + expect_true(is.factor(feature)) + expect_identical(task$class_names, levels(feature)) + } else { # "prob" + features = agg_out$data(cols = grep("*.prob*", agg_out$feature_names, value = TRUE)) + sums = rowSums(is.na(features)) + expect_true(all(sums == 0 | sums == NCOL(features))) # either all or none missing + features = features[sums == 0, ] + expect_true(all(apply(features, MARGIN = 2L, function(x) x >= 0 & x <= 1))) # between 0 and 1 + expect_equal(rowSums(features), rep_len(1, length.out = NROW(features))) # sum is 1 + } + } else { # "regr" + if (polrn$learner$predict_type == "response") { + feature = agg_out$data(cols = grep("*.response", agg_out$feature_names, value = TRUE))[[1L]] + expect_true(is.numeric(feature)) + } else { # "se" + features = agg_out$data(cols = grep("*.response|*.se", agg_out$feature_names, value = TRUE)) + expect_true(all(apply(features, MARGIN = 2L, is.numeric))) + } + } + } + + set.seed(1234) + # faster training + taskc = tsk("german_credit")$filter(sample(1000, 50)) + taskc$select("age") + taskr = tsk("boston_housing")$filter(sample(sample(506, 50))) + taskr$select("rad") + + poaggcr = PipeOpAggregate$new( + param_vals = list(aggregation = list(classif.rpart.response = ~ calculate_mode(classif.rpart.response)), + by = "pre.classif.rpart")) + poaggcp = PipeOpAggregate$new( + param_vals = list(aggregation = list(classif.rpart.prob.bad = ~ mean(classif.rpart.prob.bad), classif.rpart.prob.good = ~ mean(classif.rpart.prob.good)), + by = "pre.classif.rpart")) + poaggrs = PipeOpAggregate$new( + param_vals = list(aggregation = list(regr.lm.response = ~ mean(regr.lm.response), regr.lm.se = ~ mean(regr.lm.se)), + by = "pre.regr.lm")) + + # cv + polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("cv", folds = 2L)) + polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("cv", folds = 2L)) + test_valid_resampled_task(polrnc, poaggcr, taskc, "response") + test_valid_resampled_task(polrnc, poaggcp, taskc, "prob") + test_valid_resampled_task(polrnr, poaggrs, taskr, "se") + + # insample + polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("insample")) + polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("insample")) + test_valid_resampled_task(polrnc, poaggcr, taskc, "response") + test_valid_resampled_task(polrnc, poaggcp, taskc, "prob") + test_valid_resampled_task(polrnr, poaggrs, taskr, "se") + + # bootstrap + polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("bootstrap", repeats = 2L)) + polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("bootstrap", repeats = 2L)) + test_valid_resampled_task(polrnc, poaggcr, taskc, "response") + test_valid_resampled_task(polrnc, poaggcp, taskc, "prob") + test_valid_resampled_task(polrnr, poaggrs, taskr, "se") + + # holdout + polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("holdout")) + polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("holdout")) + test_valid_resampled_task(polrnc, poaggcr, taskc, "response") + test_valid_resampled_task(polrnc, poaggcp, taskc, "prob") + test_valid_resampled_task(polrnr, poaggrs, taskr, "se") + + # loo + polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("loo")) + polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("loo")) + test_valid_resampled_task(polrnc, poaggcr, taskc, "response") + test_valid_resampled_task(polrnc, poaggcp, taskc, "prob") + test_valid_resampled_task(polrnr, poaggrs, taskr, "se") + + # repeated_cv + polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("repeated_cv", folds = 2L, repeats = 2L)) + polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("repeated_cv", folds = 2L, repeats = 2L)) + test_valid_resampled_task(polrnc, poaggcr, taskc, "response") + test_valid_resampled_task(polrnc, poaggcp, taskc, "prob") + test_valid_resampled_task(polrnr, poaggrs, taskr, "se") + + # subsampling + polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("subsampling", repeats = 2L)) + polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("subsampling", repeats = 2L)) + test_valid_resampled_task(polrnc, poaggcr, taskc, "response") + test_valid_resampled_task(polrnc, poaggcp, taskc, "prob") + test_valid_resampled_task(polrnr, poaggrs, taskr, "se") +}) + diff --git a/tests/testthat/test_pipeop_colroles.R b/tests/testthat/test_pipeop_colroles.R index 0257f8b8d..adab8a9cf 100644 --- a/tests/testthat/test_pipeop_colroles.R +++ b/tests/testthat/test_pipeop_colroles.R @@ -34,7 +34,7 @@ test_that("PipeOpColRoles - functionality works", { train_out = train_pipeop(op, inputs = list(task))$output expect_equal(train_out$col_roles, list(feature = c("Sepal.Length", "Sepal.Width"), target = "Species", name = "Petal.Length", - order = "Petal.Length", stratum = character(), group = character(), weight = character(), uri = character(0) + order = "Petal.Length", stratum = character(0L), group = character(0L), weight = character(0L), uri = character(0L), row_reference = character(0L) ) ) expect_equal(train_out$row_names$row_name, task$data(cols = "Petal.Length")[[1L]]) diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R index 62595d6c4..1112f3e13 100644 --- a/tests/testthat/test_pipeop_learnercv.R +++ b/tests/testthat/test_pipeop_learnercv.R @@ -4,18 +4,18 @@ test_that("PipeOpLearnerCV - basic properties", { lrn = mlr_learners$get("classif.featureless") po = PipeOpLearnerCV$new(lrn) expect_pipeop(po$clone(), check_ps_default_values = FALSE) - expect_data_table(po$input, nrows = 1) - expect_data_table(po$output, nrows = 1) + expect_data_table(po$input, nrows = 1L) + expect_data_table(po$output, nrows = 1L) task = mlr_tasks$get("iris") - tsk = train_pipeop(po, list(task = task))[[1]] + tsk = train_pipeop(po, list(task = task))[[1L]] expect_class(tsk, "Task") expect_true(tsk$nrow == 150L) expect_true(tsk$ncol == 2L) expect_equal(task$target_names, tsk$target_names) expect_equal(task$class_names, tsk$class_names) vals = factor(unique(tsk$data(cols = tsk$feature_names)$response)) - expect_character(setdiff(vals, task$class_names), len = 0) + expect_character(setdiff(vals, task$class_names), len = 0L) tsk = predict_pipeop(po, list(task = task))[[1]] expect_class(tsk, "Task") @@ -24,50 +24,49 @@ test_that("PipeOpLearnerCV - basic properties", { expect_equal(task$target_names, tsk$target_names) expect_equal(task$class_names, tsk$class_names) vals = factor(unique(tsk$data(cols = tsk$feature_names)$response)) - expect_character(setdiff(vals, task$class_names), len = 0) + expect_character(setdiff(vals, task$class_names), len = 0L) lrn = mlr_learners$get("classif.featureless") iris_with_unambiguous_mode = mlr_tasks$get("iris")$filter(c(1:49, 52:150)) # want featureless learner without randomness expect_datapreproc_pipeop_class(PipeOpLearnerCV, - list(lrn), iris_with_unambiguous_mode, predict_like_train = FALSE, deterministic_train = FALSE, check_ps_default_values = FALSE) + list(lrn), iris_with_unambiguous_mode, predict_like_train = FALSE, deterministic_train = FALSE, affect_context_independent = FALSE, check_ps_default_values = FALSE) # 'insample' PipeOpLearnerCV with deterministic Learner is deterministic in every regard! expect_datapreproc_pipeop_class(PipeOpLearnerCV, - list(lrn, param_vals = list(resampling.method = "insample")), iris_with_unambiguous_mode, check_ps_default_values = FALSE) + list(lrn, resampling = rsmp("insample")), iris_with_unambiguous_mode, affect_context_independent = FALSE, check_ps_default_values = FALSE) expect_error(PipeOpLearnerCV$new()) - }) test_that("PipeOpLearnerCV - param values", { lrn = mlr_learners$get("classif.rpart") polrn = PipeOpLearnerCV$new(lrn) - expect_subset(c("minsplit", "resampling.method", "resampling.folds"), names(polrn$param_set$params)) - expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, xval = 0)) - polrn$param_set$values$minsplit = 2 - expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, minsplit = 2, xval = 0)) - polrn$param_set$values$resampling.folds = 4 - expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 4, resampling.keep_response = FALSE, minsplit = 2, xval = 0)) + expect_subset(c("minsplit", "resampling.folds", "keep_response"), names(polrn$param_set$params)) + expect_equal(polrn$param_set$values, list(resampling.folds = 3L, keep_response = FALSE, xval = 0)) + polrn$param_set$values$minsplit = 2L + expect_equal(polrn$param_set$values, list(resampling.folds = 3L, keep_response = FALSE, minsplit = 2L, xval = 0)) + polrn$param_set$values$resampling.folds = 4L + expect_equal(polrn$param_set$values, list(resampling.folds = 4L, keep_response = FALSE, minsplit = 2L, xval = 0)) }) test_that("PipeOpLearnerCV - within resampling", { lrn = mlr_learners$get("classif.rpart") gr = GraphLearner$new(PipeOpLearnerCV$new(lrn) %>>% po(id = "l2", lrn)) - resample(tsk("iris"), gr, rsmp("holdout")) + expect_r6(resample(tsk("iris"), gr, rsmp("holdout")), classes = "ResampleResult") }) test_that("PipeOpLearnerCV - insample resampling", { lrn = mlr_learners$get("classif.featureless") iris_with_unambiguous_mode = mlr_tasks$get("iris")$filter(c(1:49, 52:150)) # want featureless learner without randomness - polrn = PipeOpLearnerCV$new(lrn, param_vals = list(resampling.method = "insample")) - expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1]]$data(), + polrn = PipeOpLearnerCV$new(lrn, rsmp("insample")) + expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1L]]$data(), cbind(iris_with_unambiguous_mode$data(cols = "Species"), classif.featureless.response = factor("virginica", levels = levels(iris[[5]])))) lrn = mlr_learners$get("classif.rpart") - polrn = PipeOpLearnerCV$new(lrn, param_vals = list(resampling.method = "insample")) - expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1]], - polrn$predict(list(iris_with_unambiguous_mode))[[1]]) + polrn = PipeOpLearnerCV$new(lrn, rsmp("insample")) + expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1L]], + polrn$predict(list(iris_with_unambiguous_mode))[[1L]]) }) test_that("PipeOpLearnerCV - graph but no id", { @@ -98,3 +97,4 @@ test_that("PipeOpLearnerCV - model active binding to state", { expect_null(po$learner$state) expect_equal(po$learner_model$state, po$state) }) + diff --git a/tests/testthat/test_usecases.R b/tests/testthat/test_usecases.R index 40117175b..baf2d0dfb 100644 --- a/tests/testthat/test_usecases.R +++ b/tests/testthat/test_usecases.R @@ -152,7 +152,7 @@ test_that("stacking", { pipe$pipeops$classif.rpart$learner$predict_type = "prob" pipe$pipeops$classif.featureless$learner$predict_type = "prob" - pipe$pipeops$classif.featureless$param_set$values$resampling.keep_response = TRUE + pipe$pipeops$classif.featureless$param_set$values$keep_response = TRUE result = pipe$train(task)[[1]]