From 2efdb99918cc92738e7de275fcda5a1e4ff88d1b Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Wed, 30 Sep 2020 22:42:15 +0200
Subject: [PATCH 1/8] add experimental other resamplings, handle duplicates and
 missings

---
 R/PipeOpLearnerCV.R | 81 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 70 insertions(+), 11 deletions(-)

diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R
index b8aba7bf6..fc1134081 100644
--- a/R/PipeOpLearnerCV.R
+++ b/R/PipeOpLearnerCV.R
@@ -110,6 +110,7 @@
 #' graph$pipeops$classif.rpart$learner$predict_type = "prob"
 #'
 #' graph$train(task)
+# FIXME: docs and tests
 PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
   inherit = PipeOpTaskPreproc,
   public = list(
@@ -121,11 +122,15 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       task_type = mlr_reflections$task_types[get("type") == private$.learner$task_type][order(get("package"))][1L]$task
 
       private$.crossval_param_set = ParamSet$new(params = list(
-        ParamFct$new("method", levels = c("cv", "insample"), tags = c("train", "required")),
+        ParamFct$new("method", levels = c("bootstrap", "custom", "cv", "holdout", "insample", "loo", "repeated_cv", "subsampling"), tags = c("train", "required")),
+        ParamInt$new("repeats", lower = 1L, tags = c("train", "required")),
         ParamInt$new("folds", lower = 2L, upper = Inf, tags = c("train", "required")),
-        ParamLgl$new("keep_response", tags = c("train", "required"))
+        ParamDbl$new("ratio", lower = 0, upper = 1, tags = c("train", "required")),
+        ParamLgl$new("keep_response", tags = c("train", "required")),
+        ParamUty$new("train_sets", tags = "train", custom_check = function(x) check_list(types = "atomicvector", any.missing = FALSE)),
+        ParamUty$new("test_sets", tags = "train", custom_check = function(x) check_list(types = "atomicvector", any.missing = FALSE))
       ))
-      private$.crossval_param_set$values = list(method = "cv", folds = 3, keep_response = FALSE)
+      private$.crossval_param_set$values = list(method = "cv", repeats = 30L, folds = 3, ratio = 2 / 3, keep_response = FALSE)
       private$.crossval_param_set$set_id = "resampling"
       # Dependencies in paradox have been broken from the start and this is known since at least a year:
       # https://github.com/mlr-org/paradox/issues/216
@@ -169,14 +174,45 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       self$state = private$.learner$train(task)$state
       pv = private$.crossval_param_set$values
 
-      # Compute CV Predictions
-      if (pv$method != "insample") {
-        rdesc = mlr_resamplings$get(pv$method)
-        if (pv$method == "cv") rdesc$param_set$values = list(folds = pv$folds)
-        rr = resample(task, private$.learner, rdesc)
-        prds = as.data.table(rr$prediction(predict_sets = "test"))
-      } else {
-        prds = as.data.table(private$.learner$predict(task))
+      if (pv$method == "insample") {
+        return(private$pred_to_task(as.data.table(private$.learner$predict(task)), task))  # early exit
+      }
+
+      # Compute resampled Predictions
+      rdesc = mlr_resamplings$get(pv$method)
+      rdesc$param_set$values = switch(pv$method,
+        "bootstrap" = list(repeats = pv$repeats, ratio = pv$ratio),
+        "custom" = list(),
+        "cv" = list(folds = pv$folds),
+        "holdout" = list(ratio = pv$ratio),
+        "loo" = list(),
+        "repeated_cv" = list(repeats = pv$repeats, folds = pv$folds),
+        "subsampling" = list(repeats = pv$repeats, ratio = pv$ratio))
+      if (pv$method == "custom") {
+        rdesc$instantiate(task, train_sets = private$.crossval_param_set$values$train_sets, test_sets = private$.crossval_param_set$values$test_sets)
+      }
+      rr = resample(task, private$.learner, rdesc)
+      prds = as.data.table(rr$prediction(predict_sets = "test"))
+      nrows_duplicated = length(prds$row_id[duplicated(prds$row_id)])
+      missing_rows = setdiff(task$row_ids, prds$row_id)
+      nrows_missing = length(setdiff(task$row_ids, prds$row_id))
+
+      if (nrows_duplicated || nrows_missing) {  # duplicates or missings
+        SDcols = setdiff(colnames(prds), c("row_id", "truth"))
+        prds_corrected = if (nrows_duplicated) {
+          prds[, map(.SD, aggregation), by = "row_id", .SDcols = SDcols]
+        } else {
+          setNames(data.table(matrix(nrow = 0L, ncol = NCOL(prds))), colnames(prds))
+        }
+        prds_extended = as.list(prds_corrected)[SDcols]
+        prds_extended = map(prds_extended, add_missings, len = nrows_missing)
+        prds_extended[["row_id"]] = c(prds_corrected[["row_id"]], missing_rows)
+        prds = setDT(prds_extended)
+
+        target = task$truth(prds$row_id)
+        if (task$task_type == "classif") {
+          prds$response = factor(prds$response, levels = levels(target), ordered = is.ordered(target))
+        }
       }
 
       private$pred_to_task(prds, task)
@@ -204,4 +240,27 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
   )
 )
 
+# Helper function for aggregating predictions if duplicated rows are present:
+#  - handles response, prob etc. naturally
+#  - if x is a factor (e.g., response if classif) take the mode and return this level as a character (factor fix is applied later)
+#  - if x is numeric (e.g., response if regr, or prob or se), take the mean (for prob this is invariant w.r.t to [0, 1] boundaries)
+aggregation = function(x) {
+  if (length(x) == 1L) {
+    return(x)  # early exit
+  }
+  if (is.factor(x)) {
+    tt = table(x)
+    names(tt[which.max(tt)])
+  } else {
+    mean(x, na.rm = TRUE)
+  }
+}
+
+# Helper function to add missings to predictions based on their storage mode
+add_missings = function(x, len) {
+  c(x, switch(typeof(x),
+    "character" = rep_len(NA_character_, length.out = len),
+    "double" = rep_len(NA_real_, length.out = len)))
+}
+
 mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ParamSet$new()))$new()))

From b8e1deb157ae343a15df1060898ef24950fef3e6 Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Thu, 1 Oct 2020 16:51:08 +0200
Subject: [PATCH 2/8] extend PipeOpLearnerCV to other resamplings, add tests,
 update docs

---
 R/PipeOpLearnerCV.R                    | 126 +++++++++++++--------
 man/Graph.Rd                           |   4 +-
 man/mlr_pipeops_histbin.Rd             |   2 +-
 man/mlr_pipeops_learner_cv.Rd          |  31 ++++--
 man/mlr_pipeops_nmf.Rd                 |   2 +-
 man/mlr_pipeops_targetmutate.Rd        |   2 +-
 man/mlr_pipeops_tunethreshold.Rd       |   4 +-
 tests/testthat/test_pipeop_learnercv.R | 145 ++++++++++++++++++++++++-
 8 files changed, 250 insertions(+), 66 deletions(-)

diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R
index fc1134081..365bec0d2 100644
--- a/R/PipeOpLearnerCV.R
+++ b/R/PipeOpLearnerCV.R
@@ -7,7 +7,7 @@
 #' @description
 #' Wraps an [`mlr3::Learner`] into a [`PipeOp`].
 #'
-#' Returns cross-validated predictions during training as a [`Task`][mlr3::Task] and stores a model of the
+#' Returns resampled predictions during training as a [`Task`][mlr3::Task] and stores a model of the
 #' [`Learner`][mlr3::Learner] trained on the whole data in `$state`. This is used to create a similar
 #' [`Task`][mlr3::Task] during prediction.
 #'
@@ -19,7 +19,7 @@
 #' Inherits the `$param_set` (and therefore `$param_set$values`) from the [`Learner`][mlr3::Learner] it is constructed from.
 #'
 #' [`PipeOpLearnerCV`] can be used to create "stacking" or "super learning" [`Graph`]s that use the output of one [`Learner`][mlr3::Learner]
-#' as feature for another [`Learner`][mlr3::Learner]. Because the [`PipeOpLearnerCV`] erases the original input features, it is often
+#' as features for another [`Learner`][mlr3::Learner]. Because the [`PipeOpLearnerCV`] erases the original input features, it is often
 #' useful to use [`PipeOpFeatureUnion`] to bind the prediction [`Task`][mlr3::Task] to the original input [`Task`][mlr3::Task].
 #'
 #' @section Construction:
@@ -28,8 +28,7 @@
 #' ```
 #'
 #' * `learner` :: [`Learner`][mlr3::Learner] \cr
-#'   [`Learner`][mlr3::Learner] to use for cross validation / prediction, or a string identifying a
-#'   [`Learner`][mlr3::Learner] in the [`mlr3::mlr_learners`] [`Dictionary`][mlr3misc::Dictionary].
+#'   [`Learner`][mlr3::Learner] to use for resampling / prediction.
 #' * `id` :: `character(1)`
 #'   Identifier of the resulting object, internally defaulting to the `id` of the [`Learner`][mlr3::Learner] being wrapped.
 #' * `param_vals` :: named `list`\cr
@@ -43,7 +42,7 @@
 #' type given to `learner` during construction; both during training and prediction.
 #'
 #' The output is a task with the same target as the input task, with features replaced by predictions made by the [`Learner`][mlr3::Learner].
-#' During training, this prediction is the out-of-sample prediction made by [`resample`][mlr3::resample], during prediction, this is the
+#' During training, this prediction is the prediction made by [`resample`][mlr3::resample], during prediction, this is the
 #' ordinary prediction made on the data by a [`Learner`][mlr3::Learner] trained on the training phase data.
 #'
 #' @section State:
@@ -64,10 +63,24 @@
 #' The parameters are the parameters inherited from the [`PipeOpTaskPreproc`], as well as the parameters of the [`Learner`][mlr3::Learner] wrapped by this object.
 #' Besides that, parameters introduced are:
 #' * `resampling.method` :: `character(1)`\cr
-#'   Which resampling method do we want to use. Currently only supports `"cv"` and `"insample"`. `"insample"` generates
-#'   predictions with the model trained on all training data.
-#' * `resampling.folds` :: `numeric(1)`\cr
-#'   Number of cross validation folds. Initialized to 3. Only used for `resampling.method = "cv"`.
+#'   Which resampling method to use. Supports `"cv"`,`"bootstrap"`, `"holdout"`, `"loo"`, `"repeated_cv"`, `"subsampling"`, `"custom"` and `"insample"`.
+#'   See [`mlr_resamplings`][mlr3::mlr_resamplings].
+#'   `"insample"` generates predictions with the model trained on all training data.
+#'   In the case of the resampling method returing multiple predictions per row id, the predictions are aggregated via their mean
+#'   (execpt for the `"response"` in the case of a [classification Task][mlr3::TaskClassif] which is aggregated using the mode).
+#'   In the case of the resampling method not returning predictions for all row ids as given in the input [`Task`][mlr3::Task], these predictions are added as missing.
+#' * `resampling.repeats` :: `integer(1)`\cr
+#'   Number of repetitions. Initialized to 30. Only used for `resampling.method = "bootstrap"`, or `"repeated_cv"`, or `"subsampling"`.
+#' * `resampling.folds` :: `integer(1)`\cr
+#'   Number of cross validation folds. Initialized to 3. Only used for `resampling.method = "cv"`, or `"repeated_cv"`.
+#' * `resampling.ratio` :: `numeric(1)`\cr
+#'   Ratio of observations to put into the training set. Initialized to 2/3. Only used for `resampling.method = "bootstrap"`, or `"holdout"` or `"subsampling"`.
+#' * `resampling.custom.train_sets` :: `list()`\cr
+#'   List with row ids for training, one list element per iteration. Must have the same length as `resampling.custom.test_sets`.
+#'   Only used for `resampling.method = "custom"`.
+#' * `resampling.custom.test_sets` :: `list()`\cr
+#'   List with row ids for testing, one list element per iteration. Must have the same length as `resampling.custom.train_sets`.
+#'   Only used for `resampling.method = "custom"`.
 #' * `keep_response` :: `logical(1)`\cr
 #'   Only effective during `"prob"` prediction: Whether to keep response values, if available. Initialized to `FALSE`.
 #'
@@ -110,7 +123,6 @@
 #' graph$pipeops$classif.rpart$learner$predict_type = "prob"
 #'
 #' graph$train(task)
-# FIXME: docs and tests
 PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
   inherit = PipeOpTaskPreproc,
   public = list(
@@ -127,10 +139,10 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
         ParamInt$new("folds", lower = 2L, upper = Inf, tags = c("train", "required")),
         ParamDbl$new("ratio", lower = 0, upper = 1, tags = c("train", "required")),
         ParamLgl$new("keep_response", tags = c("train", "required")),
-        ParamUty$new("train_sets", tags = "train", custom_check = function(x) check_list(types = "atomicvector", any.missing = FALSE)),
-        ParamUty$new("test_sets", tags = "train", custom_check = function(x) check_list(types = "atomicvector", any.missing = FALSE))
+        ParamUty$new("custom.train_sets", tags = "train", custom_check = function(x) check_list(x, types = "atomicvector", any.missing = FALSE)),
+        ParamUty$new("custom.test_sets", tags = "train", custom_check = function(x) check_list(x, types = "atomicvector", any.missing = FALSE))
       ))
-      private$.crossval_param_set$values = list(method = "cv", repeats = 30L, folds = 3, ratio = 2 / 3, keep_response = FALSE)
+      private$.crossval_param_set$values = list(method = "cv", repeats = 30L, folds = 3L, ratio = 2 / 3, keep_response = FALSE)
       private$.crossval_param_set$set_id = "resampling"
       # Dependencies in paradox have been broken from the start and this is known since at least a year:
       # https://github.com/mlr-org/paradox/issues/216
@@ -189,33 +201,68 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
         "repeated_cv" = list(repeats = pv$repeats, folds = pv$folds),
         "subsampling" = list(repeats = pv$repeats, ratio = pv$ratio))
       if (pv$method == "custom") {
-        rdesc$instantiate(task, train_sets = private$.crossval_param_set$values$train_sets, test_sets = private$.crossval_param_set$values$test_sets)
+        rdesc$instantiate(task, train_sets = private$.crossval_param_set$values$custom.train_sets, test_sets = private$.crossval_param_set$values$custom.test_sets)
       }
+      # FIXME: we may want to instantiate here in general for safety reasons
       rr = resample(task, private$.learner, rdesc)
       prds = as.data.table(rr$prediction(predict_sets = "test"))
-      nrows_duplicated = length(prds$row_id[duplicated(prds$row_id)])
+      nrows_multiple = length(prds$row_id[duplicated(prds$row_id)])
       missing_rows = setdiff(task$row_ids, prds$row_id)
-      nrows_missing = length(setdiff(task$row_ids, prds$row_id))
+      nrows_missing = length(missing_rows)
 
-      if (nrows_duplicated || nrows_missing) {  # duplicates or missings
-        SDcols = setdiff(colnames(prds), c("row_id", "truth"))
-        prds_corrected = if (nrows_duplicated) {
-          prds[, map(.SD, aggregation), by = "row_id", .SDcols = SDcols]
+      if (!nrows_multiple && !nrows_missing) {
+        return(private$pred_to_task(prds, task))  # early exit
+      }
+
+      # Some resamplings will result in rows being sampled multiple times and some being missing
+      task_type = task$task_type
+      prds_names = colnames(prds)
+
+      prds_corrected = if (nrows_multiple) {
+        # classif: prob, regr: response, (se)
+        SDcols_multiple = setdiff(prds_names, if (task_type == "classif") c("row_id", "truth", "response") else c("row_id", "truth"))
+
+        # aggregation functions:
+        #  - mean for prob, response (regr), se
+        #  - mode for response (classif)
+        prds_corrected = prds[, map(.SD, function(x) {
+          if (length(x) == 1L) return(x)  # early exit
+          mean(x, na.rm = TRUE)
+        }), by = "row_id", .SDcols = SDcols_multiple]
+
+        if (NROW(prds_corrected) == 0L) prds_corrected = unique(prds[, "row_id"])
+
+        if (task_type == "classif") {
+          cbind(prds_corrected, prds[, map(.SD, function(x) {
+            if (length(x) == 1L) return(as.character(x))  # early exit
+            tt = table(x)
+            names(tt[which.max(tt)])
+          }), by = "row_id", .SDcols = "response"][, "response"])
         } else {
-          setNames(data.table(matrix(nrow = 0L, ncol = NCOL(prds))), colnames(prds))
+          prds_corrected
         }
-        prds_extended = as.list(prds_corrected)[SDcols]
-        prds_extended = map(prds_extended, add_missings, len = nrows_missing)
-        prds_extended[["row_id"]] = c(prds_corrected[["row_id"]], missing_rows)
-        prds = setDT(prds_extended)
-
-        target = task$truth(prds$row_id)
-        if (task$task_type == "classif") {
-          prds$response = factor(prds$response, levels = levels(target), ordered = is.ordered(target))
+      } else {
+        if (task_type == "classif") {
+          prds[, "response" := as.character(response)]
         }
+        prds[, !"truth"]
       }
 
-      private$pred_to_task(prds, task)
+      if (nrows_missing) {
+        SDcols_missing = setdiff(prds_names, "truth")
+        # add missings
+        prds_corrected = prds_corrected[, map(.SD, add_missings, len = nrows_missing), .SDcols = SDcols_missing]
+        prds_corrected$row_id[is.na(prds_corrected$row_id)] = missing_rows
+      }
+
+      if (task_type == "classif") {
+        target = task$truth(prds_corrected$row_id)
+        prds_corrected$response = factor(prds_corrected$response, levels = levels(target), ordered = is.ordered(target))
+      }
+
+      # FIXME: safety cheks?
+
+      private$pred_to_task(prds_corrected, task)
     },
 
     .predict_task = function(task) {
@@ -240,27 +287,12 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
   )
 )
 
-# Helper function for aggregating predictions if duplicated rows are present:
-#  - handles response, prob etc. naturally
-#  - if x is a factor (e.g., response if classif) take the mode and return this level as a character (factor fix is applied later)
-#  - if x is numeric (e.g., response if regr, or prob or se), take the mean (for prob this is invariant w.r.t to [0, 1] boundaries)
-aggregation = function(x) {
-  if (length(x) == 1L) {
-    return(x)  # early exit
-  }
-  if (is.factor(x)) {
-    tt = table(x)
-    names(tt[which.max(tt)])
-  } else {
-    mean(x, na.rm = TRUE)
-  }
-}
-
 # Helper function to add missings to predictions based on their storage mode
 add_missings = function(x, len) {
   c(x, switch(typeof(x),
     "character" = rep_len(NA_character_, length.out = len),
-    "double" = rep_len(NA_real_, length.out = len)))
+    "double" = rep_len(NA_real_, length.out = len),
+    "integer" = rep_len(NA_integer_, length.out = len)))
 }
 
 mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ParamSet$new()))$new()))
diff --git a/man/Graph.Rd b/man/Graph.Rd
index 91b9fecfc..599473314 100644
--- a/man/Graph.Rd
+++ b/man/Graph.Rd
@@ -94,8 +94,8 @@ are therefore unambiguous, they can be omitted (i.e. left as \code{NULL}).
 \item \code{plot(html)} \cr
 (\code{logical(1)}) -> \code{NULL} \cr
 Plot the \code{\link{Graph}}, using either the \pkg{igraph} package (for \code{html = FALSE}, default) or
-the \code{visNetwork} package for \code{html = TRUE} producing a \code{\link[htmlwidgets:htmlwidgets]{htmlWidget}}.
-The \code{\link[htmlwidgets:htmlwidgets]{htmlWidget}} can be rescaled using \code{\link[visNetwork:visOptions]{visOptions}}.
+the \code{visNetwork} package for \code{html = TRUE} producing a \code{\link[htmlwidgets:htmlwidgets-package]{htmlWidget}}.
+The \code{\link[htmlwidgets:htmlwidgets-package]{htmlWidget}} can be rescaled using \code{\link[visNetwork:visOptions]{visOptions}}.
 \item \code{print(dot = FALSE, dotname = "dot", fontsize = 24L)} \cr
 (\code{logical(1)}, \code{character(1)}, \code{integer(1)}) -> \code{NULL} \cr
 Print a representation of the \code{\link{Graph}} on the console. If \code{dot} is \code{FALSE}, output is a table with one row for each contained \code{\link{PipeOp}} and
diff --git a/man/mlr_pipeops_histbin.Rd b/man/mlr_pipeops_histbin.Rd
index bf4d1423f..2b50a748b 100644
--- a/man/mlr_pipeops_histbin.Rd
+++ b/man/mlr_pipeops_histbin.Rd
@@ -49,7 +49,7 @@ Either a \code{character(1)} string naming an algorithm to compute the number of
 a \code{numeric(1)} giving the number of breaks for the histogram,
 a vector \code{numeric} giving the breakpoints between the histogram cells, or
 a \code{function} to compute the vector of breakpoints or to compute the number
-of cells. Default is algorithm \code{"Sturges"} (see \code{\link[grDevices:nclass.Sturges]{grDevices::nclass.Sturges()}}).
+of cells. Default is algorithm \code{"Sturges"} (see \code{\link[grDevices:nclass]{grDevices::nclass.Sturges()}}).
 For details see \code{\link[graphics:hist]{hist()}}.
 }
 }
diff --git a/man/mlr_pipeops_learner_cv.Rd b/man/mlr_pipeops_learner_cv.Rd
index 202904de1..db0aae79c 100644
--- a/man/mlr_pipeops_learner_cv.Rd
+++ b/man/mlr_pipeops_learner_cv.Rd
@@ -10,7 +10,7 @@
 \description{
 Wraps an \code{\link[mlr3:Learner]{mlr3::Learner}} into a \code{\link{PipeOp}}.
 
-Returns cross-validated predictions during training as a \code{\link[mlr3:Task]{Task}} and stores a model of the
+Returns resampled predictions during training as a \code{\link[mlr3:Task]{Task}} and stores a model of the
 \code{\link[mlr3:Learner]{Learner}} trained on the whole data in \verb{$state}. This is used to create a similar
 \code{\link[mlr3:Task]{Task}} during prediction.
 
@@ -22,7 +22,7 @@ are \verb{<ID>.response} and \verb{<ID>.se}. \verb{<ID>} denotes the \verb{$id}
 Inherits the \verb{$param_set} (and therefore \verb{$param_set$values}) from the \code{\link[mlr3:Learner]{Learner}} it is constructed from.
 
 \code{\link{PipeOpLearnerCV}} can be used to create "stacking" or "super learning" \code{\link{Graph}}s that use the output of one \code{\link[mlr3:Learner]{Learner}}
-as feature for another \code{\link[mlr3:Learner]{Learner}}. Because the \code{\link{PipeOpLearnerCV}} erases the original input features, it is often
+as features for another \code{\link[mlr3:Learner]{Learner}}. Because the \code{\link{PipeOpLearnerCV}} erases the original input features, it is often
 useful to use \code{\link{PipeOpFeatureUnion}} to bind the prediction \code{\link[mlr3:Task]{Task}} to the original input \code{\link[mlr3:Task]{Task}}.
 }
 \section{Construction}{
@@ -30,8 +30,7 @@ useful to use \code{\link{PipeOpFeatureUnion}} to bind the prediction \code{\lin
 }
 \itemize{
 \item \code{learner} :: \code{\link[mlr3:Learner]{Learner}} \cr
-\code{\link[mlr3:Learner]{Learner}} to use for cross validation / prediction, or a string identifying a
-\code{\link[mlr3:Learner]{Learner}} in the \code{\link[mlr3:mlr_learners]{mlr3::mlr_learners}} \code{\link[mlr3misc:Dictionary]{Dictionary}}.
+\code{\link[mlr3:Learner]{Learner}} to use for resampling / prediction.
 \item \code{id} :: \code{character(1)}
 Identifier of the resulting object, internally defaulting to the \code{id} of the \code{\link[mlr3:Learner]{Learner}} being wrapped.
 \item \code{param_vals} :: named \code{list}\cr
@@ -48,7 +47,7 @@ type given to \code{learner} during construction; both during training and predi
 type given to \code{learner} during construction; both during training and prediction.
 
 The output is a task with the same target as the input task, with features replaced by predictions made by the \code{\link[mlr3:Learner]{Learner}}.
-During training, this prediction is the out-of-sample prediction made by \code{\link[mlr3:resample]{resample}}, during prediction, this is the
+During training, this prediction is the prediction made by \code{\link[mlr3:resample]{resample}}, during prediction, this is the
 ordinary prediction made on the data by a \code{\link[mlr3:Learner]{Learner}} trained on the training phase data.
 }
 
@@ -76,10 +75,24 @@ The parameters are the parameters inherited from the \code{\link{PipeOpTaskPrepr
 Besides that, parameters introduced are:
 \itemize{
 \item \code{resampling.method} :: \code{character(1)}\cr
-Which resampling method do we want to use. Currently only supports \code{"cv"} and \code{"insample"}. \code{"insample"} generates
-predictions with the model trained on all training data.
-\item \code{resampling.folds} :: \code{numeric(1)}\cr
-Number of cross validation folds. Initialized to 3. Only used for \code{resampling.method = "cv"}.
+Which resampling method to use. Supports \code{"cv"},\code{"bootstrap"}, \code{"holdout"}, \code{"loo"}, \code{"repeated_cv"}, \code{"subsampling"}, \code{"custom"} and \code{"insample"}.
+See \code{\link[mlr3:mlr_resamplings]{mlr_resamplings}}.
+\code{"insample"} generates predictions with the model trained on all training data.
+In the case of the resampling method returing multiple predictions per row id, the predictions are aggregated via their mean
+(execpt for the \code{"response"} in the case of a \link[mlr3:TaskClassif]{classification Task} which is aggregated using the mode).
+In the case of the resampling method not returning predictions for all row ids as given in the input \code{\link[mlr3:Task]{Task}}, these predictions are added as missing.
+\item \code{resampling.repeats} :: \code{integer(1)}\cr
+Number of repetitions. Initialized to 30. Only used for \code{resampling.method = "bootstrap"}, or \code{"repeated_cv"}, or \code{"subsampling"}.
+\item \code{resampling.folds} :: \code{integer(1)}\cr
+Number of cross validation folds. Initialized to 3. Only used for \code{resampling.method = "cv"}, or \code{"repeated_cv"}.
+\item \code{resampling.ratio} :: \code{numeric(1)}\cr
+Ratio of observations to put into the training set. Initialized to 2/3. Only used for \code{resampling.method = "bootstrap"}, or \code{"holdout"} or \code{"subsampling"}.
+\item \code{resampling.custom.train_sets} :: \code{list()}\cr
+List with row ids for training, one list element per iteration. Must have the same length as \code{resampling.custom.test_sets}.
+Only used for \code{resampling.method = "custom"}.
+\item \code{resampling.custom.test_sets} :: \code{list()}\cr
+List with row ids for testing, one list element per iteration. Must have the same length as \code{resampling.custom.train_sets}.
+Only used for \code{resampling.method = "custom"}.
 \item \code{keep_response} :: \code{logical(1)}\cr
 Only effective during \code{"prob"} prediction: Whether to keep response values, if available. Initialized to \code{FALSE}.
 }
diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd
index 721f2a45f..78a4a5140 100644
--- a/man/mlr_pipeops_nmf.Rd
+++ b/man/mlr_pipeops_nmf.Rd
@@ -59,7 +59,7 @@ to use \code{mlr3}'s \code{future}-based parallelization.
 
 \section{Internals}{
 
-Uses the \code{\link[NMF:nmf]{nmf}} function as well as \code{\link[NMF:basis]{basis}}, \code{\link[NMF:coef]{coef}} and
+Uses the \code{\link[NMF:nmf]{nmf}} function as well as \code{\link[NMF:basis-coef-methods]{basis}}, \code{\link[NMF:basis-coef-methods]{coef}} and
 \code{\link[MASS:ginv]{ginv}}.
 }
 
diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd
index 12f0589d2..75ca70a6c 100644
--- a/man/mlr_pipeops_targetmutate.Rd
+++ b/man/mlr_pipeops_targetmutate.Rd
@@ -44,7 +44,7 @@ The parameters are the parameters inherited from \code{\link{PipeOpTargetTrafo}}
 Transformation function for the target. Should only be a function of the target, i.e., taking a
 single \code{data.table} argument, typically with one column. The return value is used as the new
 target of the resulting \code{\link[mlr3:Task]{Task}}. To change target names, change the column name of the data
-using e.g. \code{\link[data.table:setnames]{setnames()}}.\cr
+using e.g. \code{\link[data.table:setattr]{setnames()}}.\cr
 Note that this function also gets called during prediction and should thus gracefully handle \code{NA} values.\cr
 Initialized to \code{identity()}.
 \item \code{inverter} :: \code{function} \code{data.table} -> \code{data.table} | named \code{list}\cr
diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd
index a46a84846..7f104e6f8 100644
--- a/man/mlr_pipeops_tunethreshold.Rd
+++ b/man/mlr_pipeops_tunethreshold.Rd
@@ -19,7 +19,7 @@ Returns a single \code{\link[mlr3:PredictionClassif]{PredictionClassif}}.
 This PipeOp should be used in conjunction with \code{\link{PipeOpLearnerCV}} in order to
 optimize thresholds of cross-validated predictions.
 In order to optimize thresholds without cross-validation, use \code{\link{PipeOpLearnerCV}}
-in conjunction with \code{\link[mlr3:ResamplingInsample]{ResamplingInsample}}.
+in conjunction with \code{\link[mlr3:mlr_resamplings_insample]{ResamplingInsample}}.
 }
 \section{Construction}{
 \preformatted{* `PipeOpTuneThreshold$new(id = "tunethreshold", param_vals = list())` \\cr
@@ -58,7 +58,7 @@ Initialized to \code{"classif.ce"}, i.e. misclassification error.
 \item \code{optimizer} :: \code{\link[bbotk:Optimizer]{Optimizer}}|\code{character(1)}\cr
 \code{\link[bbotk:Optimizer]{Optimizer}} used to find optimal thresholds.
 If \code{character}, converts to \code{\link[bbotk:Optimizer]{Optimizer}}
-via \code{\link[bbotk:opt]{opt}}. Initialized to \code{\link[bbotk:OptimizerGenSA]{OptimizerGenSA}}.
+via \code{\link[bbotk:opt]{opt}}. Initialized to \code{\link[bbotk:mlr_optimizers_gensa]{OptimizerGenSA}}.
 \item \code{log_level} :: \code{character(1)} | \code{integer(1)}\cr
 Set a temporary log-level for \code{lgr::get_logger("bbotk")}. Initialized to: "warn".
 }
diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R
index 34d0e475a..038c3fc14 100644
--- a/tests/testthat/test_pipeop_learnercv.R
+++ b/tests/testthat/test_pipeop_learnercv.R
@@ -42,11 +42,11 @@ test_that("PipeOpLearnerCV - param values", {
   lrn = mlr_learners$get("classif.rpart")
   polrn = PipeOpLearnerCV$new(lrn)
   expect_subset(c("minsplit", "resampling.method", "resampling.folds"), names(polrn$param_set$params))
-  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, xval = 0))
+  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.repeats = 30, resampling.folds = 3, resampling.ratio = 2/3, resampling.keep_response = FALSE, xval = 0))
   polrn$param_set$values$minsplit = 2
-  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 3, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
+  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.repeats = 30, resampling.folds = 3, resampling.ratio = 2/3, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
   polrn$param_set$values$resampling.folds = 4
-  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.folds = 4, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
+  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.repeats = 30, resampling.folds = 4, resampling.ratio = 2/3, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
 })
 
 test_that("PipeOpLearnerCV - within resampling", {
@@ -98,3 +98,142 @@ test_that("PipeOpLearnerCV - model active binding to state", {
   expect_null(po$learner$state)
   expect_equal(po$learner_model$state, po$state)
 })
+
+test_that("PipeOpLearnerCV - different methods", {
+  skip_on_cran()  # takes too long
+  # Helper
+  test_valid_resampled_task = function(polrn, task, predict_type) {
+    polrn$param_set$values$resampling.keep_response = FALSE
+    polrn$learner$predict_type = predict_type
+
+    train_out = polrn$train(list(task))[[1]]
+    train_out_data = train_out$data()
+    expect_identical(task$row_ids, train_out$row_ids)
+
+    if (task$task_type == "classif") {
+      if (polrn$learner$predict_type == "response") {
+        feature = train_out$data(cols = grep("*.response", train_out$feature_names, value = TRUE))[[1L]]
+        expect_true(is.factor(feature))
+        expect_identical(task$class_names, levels(feature))
+      } else {  # "prob"
+        features = train_out$data(cols = grep("*.prob*", train_out$feature_names, value = TRUE))
+        sums = rowSums(is.na(features))
+        expect_true(all(sums == 0 | sums == NCOL(features)))  # either all or none missing
+        features = features[sums == 0, ]
+        expect_true(all(apply(features, MARGIN = 2L, function(x) x >= 0 & x <= 1)))  # between 0 and 1
+        expect_equal(rowSums(features), rep_len(1, length.out = NROW(features)))  # sum is 1
+      }
+    } else {  # "regr"
+      if (polrn$learner$predict_type == "response") {
+        feature = train_out$data(cols = grep("*.response", train_out$feature_names, value = TRUE))[[1L]]
+        expect_true(is.numeric(feature))
+      } else {  # "se"
+        features = train_out$data(cols = grep("*.response|*.se", train_out$feature_names, value = TRUE))
+        expect_true(all(apply(features, MARGIN = 2L, is.numeric)))
+      }
+    }
+  }
+
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), param_vals = list(resampling.method = "cv", resampling.folds = 2, resampling.repeats = 2))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), param_vals = list(resampling.method = "cv", resampling.folds = 2, resampling.repeats = 2))
+
+  set.seed(1234)
+  # faster training
+  taskc = tsk("german_credit")$filter(sample(1000, 50))
+  taskc$select("age")
+  taskr = tsk("boston_housing")$filter(sample(sample(506, 50)))
+  taskr$select("rad")
+
+  # cv (see params above)
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+  test_valid_resampled_task(polrnr, taskr, "se")
+
+  # bootstrap
+  polrnc$param_set$values$resampling.method = "bootstrap"
+  polrnr$param_set$values$resampling.method = "bootstrap"
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+  test_valid_resampled_task(polrnr, taskr, "se")
+
+  # holdout
+  polrnc$param_set$values$resampling.method = "holdout"
+  polrnr$param_set$values$resampling.method = "holdout"
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+  test_valid_resampled_task(polrnr, taskr, "se")
+
+  # loo
+  polrnc$param_set$values$resampling.method = "loo"
+  polrnr$param_set$values$resampling.method = "loo"
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+  test_valid_resampled_task(polrnr, taskr, "se")
+
+  # repeated_cv
+  polrnc$param_set$values$resampling.method = "repeated_cv"
+  polrnr$param_set$values$resampling.method = "repeated_cv"
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+  test_valid_resampled_task(polrnr, taskr, "se")
+
+  # subsampling
+  polrnc$param_set$values$resampling.method = "subsampling"
+  polrnr$param_set$values$resampling.method = "subsampling"
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+  test_valid_resampled_task(polrnr, taskr, "se")
+
+  # custom
+  # classif
+  polrnc$param_set$values$resampling.method = "custom"
+  polrnc$param_set$values$resampling.custom.train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50])
+  polrnc$param_set$values$resampling.custom.test_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50])  # no multiples no missings
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+
+  polrnc$param_set$values$resampling.custom.test_sets = list(taskc$row_ids[1:25], taskc$row_ids[1:50])  # multiples but no missings
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+
+  polrnc$param_set$values$resampling.custom.test_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:45])  # no multiples but missings
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+  polrnc$learner$predict_type = "response"
+  feature_out = polrnc$train(list(taskc))[[1L]]$data(cols = "classif.rpart.response")[[1L]]
+  expect_true(all(which(is.na(feature_out)) == 46:50))
+  polrnc$learner$predict_type = "prob"
+  features_out = polrnc$train(list(taskc))[[1L]]$data(cols = c("classif.rpart.prob.good", "classif.rpart.prob.bad"))
+  expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
+
+  polrnc$param_set$values$resampling.custom.test_sets = list(taskc$row_ids[1:25], taskc$row_ids[20:45])  # multiples and missings
+  test_valid_resampled_task(polrnc, taskc, "response")
+  test_valid_resampled_task(polrnc, taskc, "prob")
+  polrnc$learner$predict_type = "response"
+  feature_out = polrnc$train(list(taskc))[[1L]]$data(cols = "classif.rpart.response")[[1L]]
+  expect_true(all(which(is.na(feature_out)) == 46:50))
+  polrnc$learner$predict_type = "prob"
+  features_out = polrnc$train(list(taskc))[[1L]]$data(cols = c("classif.rpart.prob.good", "classif.rpart.prob.bad"))
+  expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
+
+  # regr
+  polrnr$param_set$values$resampling.method = "custom"
+  polrnr$param_set$values$resampling.custom.train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50])
+  polrnr$param_set$values$resampling.custom.test_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50])  # no multiples no missings
+  test_valid_resampled_task(polrnr, taskr, "se")
+
+  polrnr$param_set$values$resampling.custom.test_sets = list(taskr$row_ids[1:25], taskr$row_ids[1:50])  # multiples but no missings
+  test_valid_resampled_task(polrnr, taskr, "se")
+
+  polrnr$param_set$values$resampling.custom.test_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:45])  # no multiples but missings
+  test_valid_resampled_task(polrnr, taskr, "se")
+  polrnr$learner$predict_type = "se"
+  features_out = polrnr$train(list(taskr))[[1L]]$data(cols = c("regr.lm.response", "regr.lm.se"))
+  expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
+
+  polrnr$param_set$values$resampling.custom.test_sets = list(taskr$row_ids[1:25], taskr$row_ids[20:45])  # multiples and missings
+  test_valid_resampled_task(polrnr, taskr, "se")
+  polrnr$learner$predict_type = "se"
+  features_out = polrnr$train(list(taskr))[[1L]]$data(cols = c("regr.lm.response", "regr.lm.se"))
+  expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
+})

From a90616ca7157d5322b020bda61044e6c10d9ada2 Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Sun, 4 Oct 2020 20:24:31 +0200
Subject: [PATCH 3/8] allow for more flexible Resampling, fix tests and docs
 accordingly

---
 R/PipeOpLearnerCV.R                    | 111 ++++++++++---------------
 man/mlr_pipeops_learner_cv.Rd          |  42 ++++------
 tests/testthat/test_pipeop_learnercv.R |  67 ++++++++-------
 3 files changed, 93 insertions(+), 127 deletions(-)

diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R
index 365bec0d2..66494fddf 100644
--- a/R/PipeOpLearnerCV.R
+++ b/R/PipeOpLearnerCV.R
@@ -5,7 +5,7 @@
 #' @format [`R6Class`] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`].
 #'
 #' @description
-#' Wraps an [`mlr3::Learner`] into a [`PipeOp`].
+#' Wraps a [`mlr3::Learner`] and [`mlr3::Resampling`] into a [`PipeOp`].
 #'
 #' Returns resampled predictions during training as a [`Task`][mlr3::Task] and stores a model of the
 #' [`Learner`][mlr3::Learner] trained on the whole data in `$state`. This is used to create a similar
@@ -16,7 +16,13 @@
 #' for `$predict.type` `"prob"` the `<ID>.prob.<CLASS>` features are created, and for `$predict.type` `"se"` the new columns
 #' are `<ID>.response` and `<ID>.se`. `<ID>` denotes the `$id` of the [`PipeOpLearnerCV`] object.
 #'
-#' Inherits the `$param_set` (and therefore `$param_set$values`) from the [`Learner`][mlr3::Learner] it is constructed from.
+#' In the case of the resampling method returing multiple predictions per row id, the predictions are aggregated via their mean
+#' (execpt for the `"response"` in the case of a [classification Task][mlr3::TaskClassif] which is aggregated using the mode).
+#' In the case of the resampling method not returning predictions for all row ids as given in the input [`Task`][mlr3::Task],
+#' these predictions are added as missing.
+#'
+#' Inherits both the `$param_set` (and therefore `$param_set$values`) from the [`Learner`][mlr3::Learner] and
+#' [`Resampling`][mlr3::Resampling] it is constructed from. The parameter ids of the latter one are prefixed with `"resampling."`.
 #'
 #' [`PipeOpLearnerCV`] can be used to create "stacking" or "super learning" [`Graph`]s that use the output of one [`Learner`][mlr3::Learner]
 #' as features for another [`Learner`][mlr3::Learner]. Because the [`PipeOpLearnerCV`] erases the original input features, it is often
@@ -24,12 +30,14 @@
 #'
 #' @section Construction:
 #' ```
-#' PipeOpLearnerCV$new(learner, id = NULL, param_vals = list())
+#' PipeOpLearnerCV$new(learner, resampling = rsmp("cv", folds = 3), id = NULL, param_vals = list())
 #' ```
 #'
 #' * `learner` :: [`Learner`][mlr3::Learner] \cr
 #'   [`Learner`][mlr3::Learner] to use for resampling / prediction.
-#' * `id` :: `character(1)`
+#' * `resampling` :: [`Resampling`][mlr3::Resampling] \cr
+#'   [`Resamling`][mlr3::Resampling] to use for resampling. Initialized to 3-fold cross-validation.
+#' * `id` :: `character(1)`\cr
 #'   Identifier of the resulting object, internally defaulting to the `id` of the [`Learner`][mlr3::Learner] being wrapped.
 #' * `param_vals` :: named `list`\cr
 #'   List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`.
@@ -42,7 +50,7 @@
 #' type given to `learner` during construction; both during training and prediction.
 #'
 #' The output is a task with the same target as the input task, with features replaced by predictions made by the [`Learner`][mlr3::Learner].
-#' During training, this prediction is the prediction made by [`resample`][mlr3::resample], during prediction, this is the
+#' During training, this prediction is the out-of-sample prediction made by [`resample`][mlr3::resample], during prediction, this is the
 #' ordinary prediction made on the data by a [`Learner`][mlr3::Learner] trained on the training phase data.
 #'
 #' @section State:
@@ -60,27 +68,9 @@
 #'   Prediction time, in seconds.
 #'
 #' @section Parameters:
-#' The parameters are the parameters inherited from the [`PipeOpTaskPreproc`], as well as the parameters of the [`Learner`][mlr3::Learner] wrapped by this object.
+#' The parameters are the parameters inherited from the [`PipeOpTaskPreproc`], as well as the parameters of the [`Learner`][mlr3::Learner] and
+#' [`Resampling`][mlr3::Resampling] wrapped by this object.
 #' Besides that, parameters introduced are:
-#' * `resampling.method` :: `character(1)`\cr
-#'   Which resampling method to use. Supports `"cv"`,`"bootstrap"`, `"holdout"`, `"loo"`, `"repeated_cv"`, `"subsampling"`, `"custom"` and `"insample"`.
-#'   See [`mlr_resamplings`][mlr3::mlr_resamplings].
-#'   `"insample"` generates predictions with the model trained on all training data.
-#'   In the case of the resampling method returing multiple predictions per row id, the predictions are aggregated via their mean
-#'   (execpt for the `"response"` in the case of a [classification Task][mlr3::TaskClassif] which is aggregated using the mode).
-#'   In the case of the resampling method not returning predictions for all row ids as given in the input [`Task`][mlr3::Task], these predictions are added as missing.
-#' * `resampling.repeats` :: `integer(1)`\cr
-#'   Number of repetitions. Initialized to 30. Only used for `resampling.method = "bootstrap"`, or `"repeated_cv"`, or `"subsampling"`.
-#' * `resampling.folds` :: `integer(1)`\cr
-#'   Number of cross validation folds. Initialized to 3. Only used for `resampling.method = "cv"`, or `"repeated_cv"`.
-#' * `resampling.ratio` :: `numeric(1)`\cr
-#'   Ratio of observations to put into the training set. Initialized to 2/3. Only used for `resampling.method = "bootstrap"`, or `"holdout"` or `"subsampling"`.
-#' * `resampling.custom.train_sets` :: `list()`\cr
-#'   List with row ids for training, one list element per iteration. Must have the same length as `resampling.custom.test_sets`.
-#'   Only used for `resampling.method = "custom"`.
-#' * `resampling.custom.test_sets` :: `list()`\cr
-#'   List with row ids for testing, one list element per iteration. Must have the same length as `resampling.custom.train_sets`.
-#'   Only used for `resampling.method = "custom"`.
 #' * `keep_response` :: `logical(1)`\cr
 #'   Only effective during `"prob"` prediction: Whether to keep response values, if available. Initialized to `FALSE`.
 #'
@@ -93,6 +83,8 @@
 #'   [`Learner`][mlr3::Learner] that is being wrapped. Read-only.
 #' * `learner_model` :: [`Learner`][mlr3::Learner]\cr
 #'   [`Learner`][mlr3::Learner] that is being wrapped. This learner contains the model if the `PipeOp` is trained. Read-only.
+#' * `resampling` :: [`Resampling`][mlr3::Resampling]\cr
+#'   [`Resampling`][mlr3::Resampling] that is being wrapped. Read-only.
 #'
 #' @section Methods:
 #' Methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
@@ -126,33 +118,24 @@
 PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
   inherit = PipeOpTaskPreproc,
   public = list(
-    initialize = function(learner, id = NULL, param_vals = list()) {
+    initialize = function(learner, resampling = rsmp("cv", folds = 3), id = NULL, param_vals = list()) {
       private$.learner = as_learner(learner, clone = TRUE)
       private$.learner$param_set$set_id = ""
+      private$.resampling = as_resampling(resampling, clone = TRUE)
+      private$.resampling$param_set$set_id = "resampling"
+
       id = id %??% private$.learner$id
-      # FIXME: can be changed when mlr-org/mlr3#470 has an answer
+      # FIXME: probably should restrict to only classif and regr
       task_type = mlr_reflections$task_types[get("type") == private$.learner$task_type][order(get("package"))][1L]$task
 
-      private$.crossval_param_set = ParamSet$new(params = list(
-        ParamFct$new("method", levels = c("bootstrap", "custom", "cv", "holdout", "insample", "loo", "repeated_cv", "subsampling"), tags = c("train", "required")),
-        ParamInt$new("repeats", lower = 1L, tags = c("train", "required")),
-        ParamInt$new("folds", lower = 2L, upper = Inf, tags = c("train", "required")),
-        ParamDbl$new("ratio", lower = 0, upper = 1, tags = c("train", "required")),
-        ParamLgl$new("keep_response", tags = c("train", "required")),
-        ParamUty$new("custom.train_sets", tags = "train", custom_check = function(x) check_list(x, types = "atomicvector", any.missing = FALSE)),
-        ParamUty$new("custom.test_sets", tags = "train", custom_check = function(x) check_list(x, types = "atomicvector", any.missing = FALSE))
+      private$.additional_param_set = ParamSet$new(params = list(
+        ParamLgl$new("keep_response", tags = c("train", "required"))
       ))
-      private$.crossval_param_set$values = list(method = "cv", repeats = 30L, folds = 3L, ratio = 2 / 3, keep_response = FALSE)
-      private$.crossval_param_set$set_id = "resampling"
-      # Dependencies in paradox have been broken from the start and this is known since at least a year:
-      # https://github.com/mlr-org/paradox/issues/216
-      # The following would make it _impossible_ to set "method" to "insample", because then "folds"
-      # is both _required_ (required tag above) and at the same time must be unset (because of this
-      # dependency). We will opt for the least annoying behaviour here and just not use dependencies
-      # in PipeOp ParamSets.
-      # private$.crossval_param_set$add_dep("folds", "method", CondEqual$new("cv"))  # don't do this.
+      private$.additional_param_set$values = list(keep_response = FALSE)
+      private$.additional_param_set$set_id = ""
 
-      super$initialize(id, alist(private$.crossval_param_set, private$.learner$param_set), param_vals = param_vals, can_subset_cols = TRUE, task_type = task_type, tags = c("learner", "ensemble"))
+      super$initialize(id, param_set = alist(private$.resampling$param_set, private$.additional_param_set, private$.learner$param_set),
+        param_vals = param_vals, can_subset_cols = TRUE, task_type = task_type, tags = c("learner", "ensemble"))
     }
 
   ),
@@ -176,6 +159,14 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       } else {
         multiplicity_recurse(self$state, clone_with_state, learner = private$.learner)
       }
+    },
+    resampling = function(val) {
+      if (!missing(val)) {
+        if (!identical(val, private$.resampling)) {
+          stop("$resampling is read-only.")
+        }
+      }
+      private$.resampling
     }
   ),
   private = list(
@@ -184,28 +175,12 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
 
       # Train a learner for predicting
       self$state = private$.learner$train(task)$state
-      pv = private$.crossval_param_set$values
-
-      if (pv$method == "insample") {
-        return(private$pred_to_task(as.data.table(private$.learner$predict(task)), task))  # early exit
-      }
 
       # Compute resampled Predictions
-      rdesc = mlr_resamplings$get(pv$method)
-      rdesc$param_set$values = switch(pv$method,
-        "bootstrap" = list(repeats = pv$repeats, ratio = pv$ratio),
-        "custom" = list(),
-        "cv" = list(folds = pv$folds),
-        "holdout" = list(ratio = pv$ratio),
-        "loo" = list(),
-        "repeated_cv" = list(repeats = pv$repeats, folds = pv$folds),
-        "subsampling" = list(repeats = pv$repeats, ratio = pv$ratio))
-      if (pv$method == "custom") {
-        rdesc$instantiate(task, train_sets = private$.crossval_param_set$values$custom.train_sets, test_sets = private$.crossval_param_set$values$custom.test_sets)
-      }
-      # FIXME: we may want to instantiate here in general for safety reasons
-      rr = resample(task, private$.learner, rdesc)
+      rr = resample(task, private$.learner, private$.resampling)
       prds = as.data.table(rr$prediction(predict_sets = "test"))
+
+      # Some resamplings will result in rows being sampled multiple times and some being missing
       nrows_multiple = length(prds$row_id[duplicated(prds$row_id)])
       missing_rows = setdiff(task$row_ids, prds$row_id)
       nrows_missing = length(missing_rows)
@@ -214,7 +189,6 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
         return(private$pred_to_task(prds, task))  # early exit
       }
 
-      # Some resamplings will result in rows being sampled multiple times and some being missing
       task_type = task$task_type
       prds_names = colnames(prds)
 
@@ -274,7 +248,7 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
 
     pred_to_task = function(prds, task) {
       if (!is.null(prds$truth)) prds[, truth := NULL]
-      if (!self$param_set$values$resampling.keep_response && self$learner$predict_type == "prob") {
+      if (!self$param_set$values$keep_response && self$learner$predict_type == "prob") {
         prds[, response := NULL]
       }
       renaming = setdiff(colnames(prds), "row_id")
@@ -282,8 +256,9 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       setnames(prds, "row_id", task$backend$primary_key)
       task$select(character(0))$cbind(prds)
     },
-    .crossval_param_set = NULL,
-    .learner = NULL
+    .additional_param_set = NULL,
+    .learner = NULL,
+    .resampling = NULL
   )
 )
 
diff --git a/man/mlr_pipeops_learner_cv.Rd b/man/mlr_pipeops_learner_cv.Rd
index db0aae79c..f88e4bab0 100644
--- a/man/mlr_pipeops_learner_cv.Rd
+++ b/man/mlr_pipeops_learner_cv.Rd
@@ -8,7 +8,7 @@
 \code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
 }
 \description{
-Wraps an \code{\link[mlr3:Learner]{mlr3::Learner}} into a \code{\link{PipeOp}}.
+Wraps a \code{\link[mlr3:Learner]{mlr3::Learner}} and \code{\link[mlr3:Resampling]{mlr3::Resampling}} into a \code{\link{PipeOp}}.
 
 Returns resampled predictions during training as a \code{\link[mlr3:Task]{Task}} and stores a model of the
 \code{\link[mlr3:Learner]{Learner}} trained on the whole data in \verb{$state}. This is used to create a similar
@@ -19,19 +19,27 @@ The \code{\link[mlr3:Task]{Task}} gets features depending on the capsuled \code{
 for \verb{$predict.type} \code{"prob"} the \verb{<ID>.prob.<CLASS>} features are created, and for \verb{$predict.type} \code{"se"} the new columns
 are \verb{<ID>.response} and \verb{<ID>.se}. \verb{<ID>} denotes the \verb{$id} of the \code{\link{PipeOpLearnerCV}} object.
 
-Inherits the \verb{$param_set} (and therefore \verb{$param_set$values}) from the \code{\link[mlr3:Learner]{Learner}} it is constructed from.
+In the case of the resampling method returing multiple predictions per row id, the predictions are aggregated via their mean
+(execpt for the \code{"response"} in the case of a \link[mlr3:TaskClassif]{classification Task} which is aggregated using the mode).
+In the case of the resampling method not returning predictions for all row ids as given in the input \code{\link[mlr3:Task]{Task}},
+these predictions are added as missing.
+
+Inherits both the \verb{$param_set} (and therefore \verb{$param_set$values}) from the \code{\link[mlr3:Learner]{Learner}} and
+\code{\link[mlr3:Resampling]{Resampling}} it is constructed from. The parameter ids of the latter one are prefixed with \code{"resampling."}.
 
 \code{\link{PipeOpLearnerCV}} can be used to create "stacking" or "super learning" \code{\link{Graph}}s that use the output of one \code{\link[mlr3:Learner]{Learner}}
 as features for another \code{\link[mlr3:Learner]{Learner}}. Because the \code{\link{PipeOpLearnerCV}} erases the original input features, it is often
 useful to use \code{\link{PipeOpFeatureUnion}} to bind the prediction \code{\link[mlr3:Task]{Task}} to the original input \code{\link[mlr3:Task]{Task}}.
 }
 \section{Construction}{
-\preformatted{PipeOpLearnerCV$new(learner, id = NULL, param_vals = list())
+\preformatted{PipeOpLearnerCV$new(learner, resampling = rsmp("cv", folds = 3), id = NULL, param_vals = list())
 }
 \itemize{
 \item \code{learner} :: \code{\link[mlr3:Learner]{Learner}} \cr
 \code{\link[mlr3:Learner]{Learner}} to use for resampling / prediction.
-\item \code{id} :: \code{character(1)}
+\item \code{resampling} :: \code{\link[mlr3:Resampling]{Resampling}} \cr
+\code{\link[mlr3:Resampling]{Resamling}} to use for resampling. Initialized to 3-fold cross-validation.
+\item \code{id} :: \code{character(1)}\cr
 Identifier of the resulting object, internally defaulting to the \code{id} of the \code{\link[mlr3:Learner]{Learner}} being wrapped.
 \item \code{param_vals} :: named \code{list}\cr
 List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}.
@@ -47,7 +55,7 @@ type given to \code{learner} during construction; both during training and predi
 type given to \code{learner} during construction; both during training and prediction.
 
 The output is a task with the same target as the input task, with features replaced by predictions made by the \code{\link[mlr3:Learner]{Learner}}.
-During training, this prediction is the prediction made by \code{\link[mlr3:resample]{resample}}, during prediction, this is the
+During training, this prediction is the out-of-sample prediction made by \code{\link[mlr3:resample]{resample}}, during prediction, this is the
 ordinary prediction made on the data by a \code{\link[mlr3:Learner]{Learner}} trained on the training phase data.
 }
 
@@ -71,28 +79,10 @@ Prediction time, in seconds.
 
 \section{Parameters}{
 
-The parameters are the parameters inherited from the \code{\link{PipeOpTaskPreproc}}, as well as the parameters of the \code{\link[mlr3:Learner]{Learner}} wrapped by this object.
+The parameters are the parameters inherited from the \code{\link{PipeOpTaskPreproc}}, as well as the parameters of the \code{\link[mlr3:Learner]{Learner}} and
+\code{\link[mlr3:Resampling]{Resampling}} wrapped by this object.
 Besides that, parameters introduced are:
 \itemize{
-\item \code{resampling.method} :: \code{character(1)}\cr
-Which resampling method to use. Supports \code{"cv"},\code{"bootstrap"}, \code{"holdout"}, \code{"loo"}, \code{"repeated_cv"}, \code{"subsampling"}, \code{"custom"} and \code{"insample"}.
-See \code{\link[mlr3:mlr_resamplings]{mlr_resamplings}}.
-\code{"insample"} generates predictions with the model trained on all training data.
-In the case of the resampling method returing multiple predictions per row id, the predictions are aggregated via their mean
-(execpt for the \code{"response"} in the case of a \link[mlr3:TaskClassif]{classification Task} which is aggregated using the mode).
-In the case of the resampling method not returning predictions for all row ids as given in the input \code{\link[mlr3:Task]{Task}}, these predictions are added as missing.
-\item \code{resampling.repeats} :: \code{integer(1)}\cr
-Number of repetitions. Initialized to 30. Only used for \code{resampling.method = "bootstrap"}, or \code{"repeated_cv"}, or \code{"subsampling"}.
-\item \code{resampling.folds} :: \code{integer(1)}\cr
-Number of cross validation folds. Initialized to 3. Only used for \code{resampling.method = "cv"}, or \code{"repeated_cv"}.
-\item \code{resampling.ratio} :: \code{numeric(1)}\cr
-Ratio of observations to put into the training set. Initialized to 2/3. Only used for \code{resampling.method = "bootstrap"}, or \code{"holdout"} or \code{"subsampling"}.
-\item \code{resampling.custom.train_sets} :: \code{list()}\cr
-List with row ids for training, one list element per iteration. Must have the same length as \code{resampling.custom.test_sets}.
-Only used for \code{resampling.method = "custom"}.
-\item \code{resampling.custom.test_sets} :: \code{list()}\cr
-List with row ids for testing, one list element per iteration. Must have the same length as \code{resampling.custom.train_sets}.
-Only used for \code{resampling.method = "custom"}.
 \item \code{keep_response} :: \code{logical(1)}\cr
 Only effective during \code{"prob"} prediction: Whether to keep response values, if available. Initialized to \code{FALSE}.
 }
@@ -111,6 +101,8 @@ Fields inherited from \code{\link{PipeOp}}, as well as:
 \code{\link[mlr3:Learner]{Learner}} that is being wrapped. Read-only.
 \item \code{learner_model} :: \code{\link[mlr3:Learner]{Learner}}\cr
 \code{\link[mlr3:Learner]{Learner}} that is being wrapped. This learner contains the model if the \code{PipeOp} is trained. Read-only.
+\item \code{resampling} :: \code{\link[mlr3:Resampling]{Resampling}}\cr
+\code{\link[mlr3:Resampling]{Resampling}} that is being wrapped. Read-only.
 }
 }
 
diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R
index 038c3fc14..b64daa81b 100644
--- a/tests/testthat/test_pipeop_learnercv.R
+++ b/tests/testthat/test_pipeop_learnercv.R
@@ -32,7 +32,7 @@ test_that("PipeOpLearnerCV - basic properties", {
     list(lrn), iris_with_unambiguous_mode, predict_like_train = FALSE, deterministic_train = FALSE)
   # 'insample' PipeOpLearnerCV with deterministic Learner is deterministic in every regard!
   expect_datapreproc_pipeop_class(PipeOpLearnerCV,
-    list(lrn, param_vals = list(resampling.method = "insample")), iris_with_unambiguous_mode)
+    list(lrn, resampling = rsmp("insample")), iris_with_unambiguous_mode)
 
   expect_error(PipeOpLearnerCV$new())
 
@@ -41,12 +41,12 @@ test_that("PipeOpLearnerCV - basic properties", {
 test_that("PipeOpLearnerCV - param values", {
   lrn = mlr_learners$get("classif.rpart")
   polrn = PipeOpLearnerCV$new(lrn)
-  expect_subset(c("minsplit", "resampling.method", "resampling.folds"), names(polrn$param_set$params))
-  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.repeats = 30, resampling.folds = 3, resampling.ratio = 2/3, resampling.keep_response = FALSE, xval = 0))
+  expect_subset(c("minsplit", "resampling.folds", "keep_response"), names(polrn$param_set$params))
+  expect_equal(polrn$param_set$values, list(resampling.folds = 3, keep_response = FALSE, xval = 0))
   polrn$param_set$values$minsplit = 2
-  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.repeats = 30, resampling.folds = 3, resampling.ratio = 2/3, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
+  expect_equal(polrn$param_set$values, list(resampling.folds = 3, keep_response = FALSE, minsplit = 2, xval = 0))
   polrn$param_set$values$resampling.folds = 4
-  expect_equal(polrn$param_set$values, list(resampling.method = "cv", resampling.repeats = 30, resampling.folds = 4, resampling.ratio = 2/3, resampling.keep_response = FALSE, minsplit = 2, xval = 0))
+  expect_equal(polrn$param_set$values, list(resampling.folds = 4, keep_response = FALSE, minsplit = 2, xval = 0))
 })
 
 test_that("PipeOpLearnerCV - within resampling", {
@@ -59,13 +59,13 @@ test_that("PipeOpLearnerCV - insample resampling", {
   lrn = mlr_learners$get("classif.featureless")
   iris_with_unambiguous_mode = mlr_tasks$get("iris")$filter(c(1:49, 52:150))  # want featureless learner without randomness
 
-  polrn = PipeOpLearnerCV$new(lrn, param_vals = list(resampling.method = "insample"))
+  polrn = PipeOpLearnerCV$new(lrn, rsmp("insample"))
   expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1]]$data(),
     cbind(iris_with_unambiguous_mode$data(cols = "Species"),
       classif.featureless.response = factor("virginica", levels = levels(iris[[5]]))))
 
   lrn = mlr_learners$get("classif.rpart")
-  polrn = PipeOpLearnerCV$new(lrn, param_vals = list(resampling.method = "insample"))
+  polrn = PipeOpLearnerCV$new(lrn, rsmp("insample"))
   expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1]],
     polrn$predict(list(iris_with_unambiguous_mode))[[1]])
 })
@@ -101,9 +101,9 @@ test_that("PipeOpLearnerCV - model active binding to state", {
 
 test_that("PipeOpLearnerCV - different methods", {
   skip_on_cran()  # takes too long
+
   # Helper
   test_valid_resampled_task = function(polrn, task, predict_type) {
-    polrn$param_set$values$resampling.keep_response = FALSE
     polrn$learner$predict_type = predict_type
 
     train_out = polrn$train(list(task))[[1]]
@@ -134,9 +134,6 @@ test_that("PipeOpLearnerCV - different methods", {
     }
   }
 
-  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), param_vals = list(resampling.method = "cv", resampling.folds = 2, resampling.repeats = 2))
-  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), param_vals = list(resampling.method = "cv", resampling.folds = 2, resampling.repeats = 2))
-
   set.seed(1234)
   # faster training
   taskc = tsk("german_credit")$filter(sample(1000, 50))
@@ -144,59 +141,61 @@ test_that("PipeOpLearnerCV - different methods", {
   taskr = tsk("boston_housing")$filter(sample(sample(506, 50)))
   taskr$select("rad")
 
-  # cv (see params above)
+  # cv
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("cv", folds = 2))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("cv", folds = 2))
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
   test_valid_resampled_task(polrnr, taskr, "se")
 
   # bootstrap
-  polrnc$param_set$values$resampling.method = "bootstrap"
-  polrnr$param_set$values$resampling.method = "bootstrap"
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("bootstrap", repeats = 2))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("bootstrap", repeats = 2))
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
   test_valid_resampled_task(polrnr, taskr, "se")
 
   # holdout
-  polrnc$param_set$values$resampling.method = "holdout"
-  polrnr$param_set$values$resampling.method = "holdout"
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("holdout"))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("holdout"))
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
   test_valid_resampled_task(polrnr, taskr, "se")
 
   # loo
-  polrnc$param_set$values$resampling.method = "loo"
-  polrnr$param_set$values$resampling.method = "loo"
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("loo"))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("loo"))
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
   test_valid_resampled_task(polrnr, taskr, "se")
 
   # repeated_cv
-  polrnc$param_set$values$resampling.method = "repeated_cv"
-  polrnr$param_set$values$resampling.method = "repeated_cv"
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("repeated_cv", folds = 2, repeats = 2))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("repeated_cv", folds = 2, repeats = 2))
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
   test_valid_resampled_task(polrnr, taskr, "se")
 
   # subsampling
-  polrnc$param_set$values$resampling.method = "subsampling"
-  polrnr$param_set$values$resampling.method = "subsampling"
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("subsampling", repeats = 2))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("subsampling", repeats = 2))
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
   test_valid_resampled_task(polrnr, taskr, "se")
 
   # custom
   # classif
-  polrnc$param_set$values$resampling.method = "custom"
-  polrnc$param_set$values$resampling.custom.train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50])
-  polrnc$param_set$values$resampling.custom.test_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50])  # no multiples no missings
+  rcm = rsmp("custom")
+  rcm$instantiate(taskc, train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]), test_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]))  # no multiples no missings
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rcm)
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
 
-  polrnc$param_set$values$resampling.custom.test_sets = list(taskc$row_ids[1:25], taskc$row_ids[1:50])  # multiples but no missings
+  rcm$instantiate(taskc, train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]), test_sets = list(taskc$row_ids[1:25], taskc$row_ids[1:50]))  # multiples but no missings
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
 
-  polrnc$param_set$values$resampling.custom.test_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:45])  # no multiples but missings
+  rcm$instantiate(taskc, train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]), test_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:45]))  # no multiples but missings
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
   polrnc$learner$predict_type = "response"
@@ -206,7 +205,7 @@ test_that("PipeOpLearnerCV - different methods", {
   features_out = polrnc$train(list(taskc))[[1L]]$data(cols = c("classif.rpart.prob.good", "classif.rpart.prob.bad"))
   expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
 
-  polrnc$param_set$values$resampling.custom.test_sets = list(taskc$row_ids[1:25], taskc$row_ids[20:45])  # multiples and missings
+  rcm$instantiate(taskc, train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]), test_sets = list(taskc$row_ids[1:25], taskc$row_ids[20:45]))  # multiples and missings
   test_valid_resampled_task(polrnc, taskc, "response")
   test_valid_resampled_task(polrnc, taskc, "prob")
   polrnc$learner$predict_type = "response"
@@ -217,21 +216,21 @@ test_that("PipeOpLearnerCV - different methods", {
   expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
 
   # regr
-  polrnr$param_set$values$resampling.method = "custom"
-  polrnr$param_set$values$resampling.custom.train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50])
-  polrnr$param_set$values$resampling.custom.test_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50])  # no multiples no missings
+  rcm = rsmp("custom")
+  rcm$instantiate(taskr, train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]), test_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]))  # no multiples no missings
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rcm)
   test_valid_resampled_task(polrnr, taskr, "se")
 
-  polrnr$param_set$values$resampling.custom.test_sets = list(taskr$row_ids[1:25], taskr$row_ids[1:50])  # multiples but no missings
+  rcm$instantiate(taskr, train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]), test_sets = list(taskr$row_ids[1:25], taskr$row_ids[1:50]))  # multiples but no missings
   test_valid_resampled_task(polrnr, taskr, "se")
 
-  polrnr$param_set$values$resampling.custom.test_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:45])  # no multiples but missings
+  rcm$instantiate(taskr, train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]), test_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:45]))  # no multiples but missings
   test_valid_resampled_task(polrnr, taskr, "se")
   polrnr$learner$predict_type = "se"
   features_out = polrnr$train(list(taskr))[[1L]]$data(cols = c("regr.lm.response", "regr.lm.se"))
   expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
 
-  polrnr$param_set$values$resampling.custom.test_sets = list(taskr$row_ids[1:25], taskr$row_ids[20:45])  # multiples and missings
+  rcm$instantiate(taskr, train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]), test_sets = list(taskr$row_ids[1:25], taskr$row_ids[20:45]))  # multiples and missings
   test_valid_resampled_task(polrnr, taskr, "se")
   polrnr$learner$predict_type = "se"
   features_out = polrnr$train(list(taskr))[[1L]]$data(cols = c("regr.lm.response", "regr.lm.se"))

From 7c3c301ac56e1f3aa4886f1bfcb1a9733c1617d4 Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Sun, 4 Oct 2020 20:32:07 +0200
Subject: [PATCH 4/8] update NEWS

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 2b03fdcb9..9abdc6366 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,6 @@
 # mlr3pipelines 0.3.0-9000
+* Changed PipeOps:
+  - PipeOpLearnerCV now also wraps a Resampling allowing for a wider use of resampling methods
 
 # mlr3pipelines 0.3.0
 

From d7f89697f05bcb442f875d4f6db3ceebde6864fa Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Thu, 15 Oct 2020 17:03:28 +0200
Subject: [PATCH 5/8] fix conversion learnercv test, rerun docs

---
 man/mlr_pipeops_nmf.Rd           | 4 ++--
 tests/testthat/test_conversion.R | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd
index 3df9f0433..23a039b44 100644
--- a/man/mlr_pipeops_nmf.Rd
+++ b/man/mlr_pipeops_nmf.Rd
@@ -94,8 +94,8 @@ See \code{\link[NMF:nmf]{nmf()}}.
 
 \section{Internals}{
 
-Uses the \code{\link[NMF:nmf]{nmf}} function as well as \code{\link[NMF:basis-coef-methods]{basis}}, \code{\link[NMF:basis-coef-methods]{coef}} and
-\code{\link[MASS:ginv]{ginv}}.
+Uses the \code{\link[NMF:nmf]{nmf()}} function as well as \code{\link[NMF:basis-coef-methods]{basis()}}, \code{\link[NMF:basis-coef-methods]{coef()}} and
+\code{\link[MASS:ginv]{ginv()}}.
 }
 
 \section{Methods}{
diff --git a/tests/testthat/test_conversion.R b/tests/testthat/test_conversion.R
index 68526a694..d7ce9480d 100644
--- a/tests/testthat/test_conversion.R
+++ b/tests/testthat/test_conversion.R
@@ -155,7 +155,7 @@ test_that("PipeOp to GraphLearner", {
 
   expect_equal(r1, r3)
 
-  po_cv = po("learner_cv", learner = po, param_vals = list(resampling.method = "insample"))
+  po_cv = po("learner_cv", learner = po, resampling = rsmp("insample"))
   expect_true("GraphLearner" %in% class(po_cv$learner))
 
   train_out = po_cv$train(list(task))

From c7b8a3c895ec2bcc0cc829d32d313a484f398556 Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Tue, 20 Oct 2020 17:31:57 +0200
Subject: [PATCH 6/8] fix news, fix tags of resampling params, fix tests

---
 NEWS.md                                |  1 -
 R/PipeOpLearnerCV.R                    | 21 ++++++++++++++-------
 man/mlr_pipeops_learner_cv.Rd          |  9 +++++----
 tests/testthat/test_pipeop_learnercv.R |  2 +-
 tests/testthat/test_usecases.R         |  2 +-
 5 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 1ec4012d7..77180c22b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,7 +2,6 @@
 
 * Changed PipeOps:
   - PipeOpLearnerCV now also wraps a Resampling allowing for a wider use of resampling methods
-  - PipeOpNMF: now exposes all parameters previously in .options
   - PipeOpMissInd now also allows for setting type = integer
   - PipeOpNMF: now exposes all parameters previously in .options
 * Changed mlr_graphs:
diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R
index 66494fddf..d5ef530d7 100644
--- a/R/PipeOpLearnerCV.R
+++ b/R/PipeOpLearnerCV.R
@@ -16,13 +16,14 @@
 #' for `$predict.type` `"prob"` the `<ID>.prob.<CLASS>` features are created, and for `$predict.type` `"se"` the new columns
 #' are `<ID>.response` and `<ID>.se`. `<ID>` denotes the `$id` of the [`PipeOpLearnerCV`] object.
 #'
-#' In the case of the resampling method returing multiple predictions per row id, the predictions are aggregated via their mean
-#' (execpt for the `"response"` in the case of a [classification Task][mlr3::TaskClassif] which is aggregated using the mode).
+#' In the case of the resampling method returning multiple predictions per row id, the predictions are aggregated via their mean
+#' (except for the `"response"` in the case of a [classification Task][mlr3::TaskClassif] which is aggregated using the mode).
 #' In the case of the resampling method not returning predictions for all row ids as given in the input [`Task`][mlr3::Task],
 #' these predictions are added as missing.
 #'
 #' Inherits both the `$param_set` (and therefore `$param_set$values`) from the [`Learner`][mlr3::Learner] and
-#' [`Resampling`][mlr3::Resampling] it is constructed from. The parameter ids of the latter one are prefixed with `"resampling."`.
+#' [`Resampling`][mlr3::Resampling] it is constructed from. The parameter ids of the latter one are prefixed with `"resampling."`
+#' and the tags of these parameters are extended by `"train"`.
 #'
 #' [`PipeOpLearnerCV`] can be used to create "stacking" or "super learning" [`Graph`]s that use the output of one [`Learner`][mlr3::Learner]
 #' as features for another [`Learner`][mlr3::Learner]. Because the [`PipeOpLearnerCV`] erases the original input features, it is often
@@ -36,7 +37,7 @@
 #' * `learner` :: [`Learner`][mlr3::Learner] \cr
 #'   [`Learner`][mlr3::Learner] to use for resampling / prediction.
 #' * `resampling` :: [`Resampling`][mlr3::Resampling] \cr
-#'   [`Resamling`][mlr3::Resampling] to use for resampling. Initialized to 3-fold cross-validation.
+#'   [`Resampling`][mlr3::Resampling] to use for resampling. Initialized to 3-fold cross-validation.
 #' * `id` :: `character(1)`\cr
 #'   Identifier of the resulting object, internally defaulting to the `id` of the [`Learner`][mlr3::Learner] being wrapped.
 #' * `param_vals` :: named `list`\cr
@@ -124,8 +125,14 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       private$.resampling = as_resampling(resampling, clone = TRUE)
       private$.resampling$param_set$set_id = "resampling"
 
+      # tags of resampling parameters should include "train"; we fix this here
+      for (i in seq_along(private$.resampling$param_set$params)) {
+        private$.resampling$param_set$params[[i]]$tags = c("train", private$.resampling$param_set$params[[i]]$tags)
+      }
+
+
       id = id %??% private$.learner$id
-      # FIXME: probably should restrict to only classif and regr
+      # FIXME: probably should restrict to only classif and regr because of the potential aggregation being done below
       task_type = mlr_reflections$task_types[get("type") == private$.learner$task_type][order(get("package"))][1L]$task
 
       private$.additional_param_set = ParamSet$new(params = list(
@@ -234,7 +241,7 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
         prds_corrected$response = factor(prds_corrected$response, levels = levels(target), ordered = is.ordered(target))
       }
 
-      # FIXME: safety cheks?
+      # FIXME: do we need additional safety checks here?
 
       private$pred_to_task(prds_corrected, task)
     },
@@ -262,7 +269,7 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
   )
 )
 
-# Helper function to add missings to predictions based on their storage mode
+# helper function to add missings to predictions based on their storage mode
 add_missings = function(x, len) {
   c(x, switch(typeof(x),
     "character" = rep_len(NA_character_, length.out = len),
diff --git a/man/mlr_pipeops_learner_cv.Rd b/man/mlr_pipeops_learner_cv.Rd
index f88e4bab0..c46448579 100644
--- a/man/mlr_pipeops_learner_cv.Rd
+++ b/man/mlr_pipeops_learner_cv.Rd
@@ -19,13 +19,14 @@ The \code{\link[mlr3:Task]{Task}} gets features depending on the capsuled \code{
 for \verb{$predict.type} \code{"prob"} the \verb{<ID>.prob.<CLASS>} features are created, and for \verb{$predict.type} \code{"se"} the new columns
 are \verb{<ID>.response} and \verb{<ID>.se}. \verb{<ID>} denotes the \verb{$id} of the \code{\link{PipeOpLearnerCV}} object.
 
-In the case of the resampling method returing multiple predictions per row id, the predictions are aggregated via their mean
-(execpt for the \code{"response"} in the case of a \link[mlr3:TaskClassif]{classification Task} which is aggregated using the mode).
+In the case of the resampling method returning multiple predictions per row id, the predictions are aggregated via their mean
+(except for the \code{"response"} in the case of a \link[mlr3:TaskClassif]{classification Task} which is aggregated using the mode).
 In the case of the resampling method not returning predictions for all row ids as given in the input \code{\link[mlr3:Task]{Task}},
 these predictions are added as missing.
 
 Inherits both the \verb{$param_set} (and therefore \verb{$param_set$values}) from the \code{\link[mlr3:Learner]{Learner}} and
-\code{\link[mlr3:Resampling]{Resampling}} it is constructed from. The parameter ids of the latter one are prefixed with \code{"resampling."}.
+\code{\link[mlr3:Resampling]{Resampling}} it is constructed from. The parameter ids of the latter one are prefixed with \code{"resampling."}
+and the tags of these parameters are extended by \code{"train"}.
 
 \code{\link{PipeOpLearnerCV}} can be used to create "stacking" or "super learning" \code{\link{Graph}}s that use the output of one \code{\link[mlr3:Learner]{Learner}}
 as features for another \code{\link[mlr3:Learner]{Learner}}. Because the \code{\link{PipeOpLearnerCV}} erases the original input features, it is often
@@ -38,7 +39,7 @@ useful to use \code{\link{PipeOpFeatureUnion}} to bind the prediction \code{\lin
 \item \code{learner} :: \code{\link[mlr3:Learner]{Learner}} \cr
 \code{\link[mlr3:Learner]{Learner}} to use for resampling / prediction.
 \item \code{resampling} :: \code{\link[mlr3:Resampling]{Resampling}} \cr
-\code{\link[mlr3:Resampling]{Resamling}} to use for resampling. Initialized to 3-fold cross-validation.
+\code{\link[mlr3:Resampling]{Resampling}} to use for resampling. Initialized to 3-fold cross-validation.
 \item \code{id} :: \code{character(1)}\cr
 Identifier of the resulting object, internally defaulting to the \code{id} of the \code{\link[mlr3:Learner]{Learner}} being wrapped.
 \item \code{param_vals} :: named \code{list}\cr
diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R
index 80d6bb047..67510d8ee 100644
--- a/tests/testthat/test_pipeop_learnercv.R
+++ b/tests/testthat/test_pipeop_learnercv.R
@@ -32,7 +32,7 @@ test_that("PipeOpLearnerCV - basic properties", {
     list(lrn), iris_with_unambiguous_mode, predict_like_train = FALSE, deterministic_train = FALSE, check_ps_default_values = FALSE)
   # 'insample' PipeOpLearnerCV with deterministic Learner is deterministic in every regard!
   expect_datapreproc_pipeop_class(PipeOpLearnerCV,
-    list(lrn, param_vals = list(resampling.method = "insample")), iris_with_unambiguous_mode, check_ps_default_values = FALSE)
+    list(lrn, resampling = rsmp("insample")), iris_with_unambiguous_mode, check_ps_default_values = FALSE)
 
   expect_error(PipeOpLearnerCV$new())
 
diff --git a/tests/testthat/test_usecases.R b/tests/testthat/test_usecases.R
index 40117175b..baf2d0dfb 100644
--- a/tests/testthat/test_usecases.R
+++ b/tests/testthat/test_usecases.R
@@ -152,7 +152,7 @@ test_that("stacking", {
 
   pipe$pipeops$classif.rpart$learner$predict_type = "prob"
   pipe$pipeops$classif.featureless$learner$predict_type = "prob"
-  pipe$pipeops$classif.featureless$param_set$values$resampling.keep_response = TRUE
+  pipe$pipeops$classif.featureless$param_set$values$keep_response = TRUE
 
   result = pipe$train(task)[[1]]
 

From 2f99db84e61214bd83507715d6ce4c4095f5150a Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Thu, 11 Mar 2021 12:32:13 +0100
Subject: [PATCH 7/8] rework

---
 DESCRIPTION                              |   3 +-
 NAMESPACE                                |   1 +
 NEWS.md                                  |   5 +-
 R/PipeOpAggregate.R                      | 148 +++++++++++++++++++
 R/PipeOpLearnerCV.R                      | 127 +++++-----------
 R/PipeOpTuneThreshold.R                  |   7 +-
 R/zzz.R                                  |   3 +
 man/PipeOp.Rd                            |   1 +
 man/PipeOpEnsemble.Rd                    |   1 +
 man/PipeOpImpute.Rd                      |   1 +
 man/PipeOpTargetTrafo.Rd                 |   1 +
 man/PipeOpTaskPreproc.Rd                 |   1 +
 man/PipeOpTaskPreprocSimple.Rd           |   1 +
 man/mlr_pipeops.Rd                       |   1 +
 man/mlr_pipeops_aggregate.Rd             | 178 +++++++++++++++++++++++
 man/mlr_pipeops_boxcox.Rd                |   1 +
 man/mlr_pipeops_branch.Rd                |   1 +
 man/mlr_pipeops_chunk.Rd                 |   1 +
 man/mlr_pipeops_classbalancing.Rd        |   1 +
 man/mlr_pipeops_classifavg.Rd            |   1 +
 man/mlr_pipeops_classweights.Rd          |   1 +
 man/mlr_pipeops_colapply.Rd              |   1 +
 man/mlr_pipeops_collapsefactors.Rd       |   1 +
 man/mlr_pipeops_colroles.Rd              |   1 +
 man/mlr_pipeops_copy.Rd                  |   1 +
 man/mlr_pipeops_datefeatures.Rd          |   1 +
 man/mlr_pipeops_encode.Rd                |   1 +
 man/mlr_pipeops_encodeimpact.Rd          |   1 +
 man/mlr_pipeops_encodelmer.Rd            |   1 +
 man/mlr_pipeops_featureunion.Rd          |   1 +
 man/mlr_pipeops_filter.Rd                |   1 +
 man/mlr_pipeops_fixfactors.Rd            |   1 +
 man/mlr_pipeops_histbin.Rd               |   1 +
 man/mlr_pipeops_ica.Rd                   |   1 +
 man/mlr_pipeops_imputeconstant.Rd        |   1 +
 man/mlr_pipeops_imputehist.Rd            |   1 +
 man/mlr_pipeops_imputelearner.Rd         |   1 +
 man/mlr_pipeops_imputemean.Rd            |   1 +
 man/mlr_pipeops_imputemedian.Rd          |   1 +
 man/mlr_pipeops_imputemode.Rd            |   1 +
 man/mlr_pipeops_imputeoor.Rd             |   1 +
 man/mlr_pipeops_imputesample.Rd          |   1 +
 man/mlr_pipeops_kernelpca.Rd             |   1 +
 man/mlr_pipeops_learner.Rd               |   1 +
 man/mlr_pipeops_learner_cv.Rd            |  14 +-
 man/mlr_pipeops_missind.Rd               |   1 +
 man/mlr_pipeops_modelmatrix.Rd           |   1 +
 man/mlr_pipeops_multiplicityexply.Rd     |   1 +
 man/mlr_pipeops_multiplicityimply.Rd     |   1 +
 man/mlr_pipeops_mutate.Rd                |   1 +
 man/mlr_pipeops_nmf.Rd                   |   1 +
 man/mlr_pipeops_nop.Rd                   |   1 +
 man/mlr_pipeops_ovrsplit.Rd              |   1 +
 man/mlr_pipeops_ovrunite.Rd              |   1 +
 man/mlr_pipeops_pca.Rd                   |   1 +
 man/mlr_pipeops_proxy.Rd                 |   1 +
 man/mlr_pipeops_quantilebin.Rd           |   1 +
 man/mlr_pipeops_randomprojection.Rd      |   1 +
 man/mlr_pipeops_randomresponse.Rd        |   1 +
 man/mlr_pipeops_regravg.Rd               |   1 +
 man/mlr_pipeops_removeconstants.Rd       |   1 +
 man/mlr_pipeops_renamecolumns.Rd         |   1 +
 man/mlr_pipeops_replicate.Rd             |   1 +
 man/mlr_pipeops_scale.Rd                 |   1 +
 man/mlr_pipeops_scalemaxabs.Rd           |   1 +
 man/mlr_pipeops_scalerange.Rd            |   1 +
 man/mlr_pipeops_select.Rd                |   1 +
 man/mlr_pipeops_smote.Rd                 |   1 +
 man/mlr_pipeops_spatialsign.Rd           |   1 +
 man/mlr_pipeops_subsample.Rd             |   1 +
 man/mlr_pipeops_targetinvert.Rd          |   1 +
 man/mlr_pipeops_targetmutate.Rd          |   1 +
 man/mlr_pipeops_targettrafoscalerange.Rd |   1 +
 man/mlr_pipeops_textvectorizer.Rd        |   1 +
 man/mlr_pipeops_threshold.Rd             |   1 +
 man/mlr_pipeops_tunethreshold.Rd         |   1 +
 man/mlr_pipeops_unbranch.Rd              |   1 +
 man/mlr_pipeops_updatetarget.Rd          |   1 +
 man/mlr_pipeops_vtreat.Rd                |   1 +
 man/mlr_pipeops_yeojohnson.Rd            |   1 +
 tests/testthat/test_pipeop_aggregate.R   | 159 ++++++++++++++++++++
 tests/testthat/test_pipeop_colroles.R    |   2 +-
 tests/testthat/test_pipeop_learnercv.R   | 170 ++--------------------
 83 files changed, 635 insertions(+), 253 deletions(-)
 create mode 100644 R/PipeOpAggregate.R
 create mode 100644 man/mlr_pipeops_aggregate.Rd
 create mode 100644 tests/testthat/test_pipeop_aggregate.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 9389a34fa..381391143 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -47,7 +47,7 @@ Imports:
     data.table,
     digest,
     lgr,
-    mlr3 (>= 0.6.0),
+    mlr3 (>= 0.11.0),
     mlr3misc (>= 0.7.0),
     paradox,
     R6,
@@ -102,6 +102,7 @@ Collate:
     'LearnerAvg.R'
     'NO_OP.R'
     'PipeOpTaskPreproc.R'
+    'PipeOpAggregate.R'
     'PipeOpBoxCox.R'
     'PipeOpBranch.R'
     'PipeOpChunk.R'
diff --git a/NAMESPACE b/NAMESPACE
index f4c424ba8..a7cc92ea3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -27,6 +27,7 @@ export(LearnerRegrAvg)
 export(Multiplicity)
 export(NO_OP)
 export(PipeOp)
+export(PipeOpAggregate)
 export(PipeOpBoxCox)
 export(PipeOpBranch)
 export(PipeOpChunk)
diff --git a/NEWS.md b/NEWS.md
index 9e3910f0d..d6e04b898 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,8 @@
 # mlr3pipelines 0.3.4-9000
+* Changed PipeOps:
+  - PipeOpLearnerCV now also wraps a Resampling allowing for a wider use of resampling methods
+* New PipeOps:
+  - PipeOpAggregate
 
 # mlr3pipelines 0.3.4
 
@@ -18,7 +22,6 @@
 # mlr3pipelines 0.3.1
 
 * Changed PipeOps:
-  - PipeOpLearnerCV now also wraps a Resampling allowing for a wider use of resampling methods
   - PipeOpMissInd now also allows for setting type = integer
   - PipeOpNMF: now exposes all parameters previously in .options
 * Changed mlr_graphs:
diff --git a/R/PipeOpAggregate.R b/R/PipeOpAggregate.R
new file mode 100644
index 000000000..0f96a10da
--- /dev/null
+++ b/R/PipeOpAggregate.R
@@ -0,0 +1,148 @@
+#' @title Aggregate Features Row-Wise
+#'
+#' @usage NULL
+#' @name mlr_pipeops_aggregate
+#' @format [`R6Class`] object inheriting from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`].
+#'
+#' @description
+#' Aggregates features row-wise based on multiple observations indicated via a column of role `row_reference` according to expressions given as formulas.
+#' Typically used after [`PipeOpLearnerCV`]  and prior to [`PipeOpFeatureUnion`] if the resampling method returned multiple predictions per row id.
+#' However, note that not all [`Resampling`][mlr3::Resampling] methods result in at least one prediction per original row id.
+#'
+#' @section Construction:
+#' ```
+#' PipeOpAggregate$new(id = "aggregate", param_vals = list())
+#' ```
+#' * `id` :: `character(1)`\cr
+#'   Identifier of resulting object, default `"aggregate"`.
+#' * `param_vals` :: named `list`\cr
+#'   List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`.
+#'
+#' @section Input and Output Channels:
+#' Input and output channels are inherited from [`PipeOpTaskPreproc`].
+#
+#' The output is a [`Task`][mlr3::Task] with the same target as the input [`Task`][mlr3::Task], with features aggregated as specified.
+#'
+#' @section State:
+#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`].
+#'
+#' @section Parameters:
+#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as:
+#' * `aggregation` :: named `list` of `formula`\cr
+#'   Expressions for how features should be aggregated, in the form of `formula`.
+#'   Each element of the list is a `formula` with the name of the element naming the feature to aggregate and the formula expression determining the result.
+#'   Each formula is evaluated within [`data.table`] environments of the [`Task`][mlr3::Task] that contain all features split via the `by` argument (see below).
+#'   Initialized to `list()`, i.e., no aggregation is performed.
+#' * `by` :: `character(1)` | `NULL`\cr
+#'   Column indicating the `row_reference` column of the [`Task`][mlr3::Task] that should be the row-wise basis for the aggregation.
+#'   Initialized to `NULL`, i.e., no aggregation is performed.
+#'
+#' @section Internals:
+#' A `formula` created using the `~` operator always contains a reference to the `environment` in which
+#' the `formula` is created. This makes it possible to use variables in the `~`-expressions that both
+#' reference either column names or variable names.
+#'
+#' @section Fields:
+#' Only fields inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
+#'
+#' @section Methods:
+#' Only methods inherited from [`PipeOpTaskPreprocSimple`]/[`PipeOpTaskPreproc`]/[`PipeOp`].
+#'
+#' @family PipeOps
+#' @seealso https://mlr3book.mlr-org.com/list-pipeops.html
+#' @include PipeOpTaskPreproc.R
+#' @export
+#' @examples
+#' library("mlr3")
+#' calculate_mode = function(x) {
+#'   unique_x = unique(x)
+#'   unique_x[which.max(tabulate(match(x, unique_x)))]
+#' }
+#'
+#' task = tsk("iris")
+#' learner = lrn("classif.rpart")
+#'
+#' lrnloo_po = po("learner_cv", learner, rsmp("loo"))
+#' nop = mlr_pipeops$get("nop")
+#' agg_po = po("aggregate",
+#'   aggregation = list(
+#'     classif.rpart.response = ~ calculate_mode(classif.rpart.response)
+#'   ),
+#'   by = "pre.classif.rpart")
+#'
+#' graph = gunion(list(
+#'   lrnloo_po %>>% agg_po,
+#'   nop
+#' )) %>>% po("featureunion")
+#'
+#' graph$train(task)
+#'
+#' graph$pipeops$classif.rpart$learner$predict_type = "prob"
+#' graph$param_set$values$aggregate.aggregation = list(
+#'     classif.rpart.prob.setosa = ~ mean(classif.rpart.prob.setosa),
+#'     classif.rpart.prob.versicolor = ~ mean(classif.rpart.prob.versicolor),
+#'     classif.rpart.prob.virginica = ~ mean(classif.rpart.prob.virginica)
+#' )
+#' graph$train(task)
+PipeOpAggregate = R6Class("Aggregate",
+  inherit = PipeOpTaskPreprocSimple,
+  public = list(
+    initialize = function(id = "aggregate", param_vals = list()) {
+      ps = ParamSet$new(params = list(
+        ParamUty$new("aggregation", tags = c("train", "predict", "required"), custom_check = check_aggregation_formulae),
+        ParamUty$new("by", tags = c("train", "predict", "required"), custom_check = function(x) check_string(x, null.ok = TRUE))
+      ))
+      ps$values = list(aggregation = list(), by = NULL)
+      super$initialize(id, ps, param_vals = param_vals, tags = "ensemble")
+    }
+  ),
+  private = list(
+    .transform = function(task) {
+
+      if (length(self$param_set$values$aggregation) == 0L || is.null(self$param_set$values$by)) {
+        return(task)  # early exit
+      }
+
+      assert_set_equal(names(self$param_set$values$aggregation), task$feature_names)
+      assert_choice(self$param_set$values$by, choices = task$col_roles$row_reference)
+
+      taskdata = task$data(cols = c(task$feature_names, task$col_roles$row_reference))
+      taskdata_split = split(taskdata, by = self$param_set$values$by)
+
+      newdata = unique(task$data(cols = c(task$target_names, task$col_roles$row_reference[match(task$col_roles$row_reference, self$param_set$values$by)])), by = self$param_set$values$by)
+
+      nms = names(self$param_set$values$aggregation)
+      for (i in seq_along(nms)) {
+        frm = self$param_set$values$aggregation[[i]]
+        set(newdata, j = nms[i], value = unlist(map(taskdata_split, .f = function(split) eval(frm[[2L]], envir = split, enclos = environment(frm)))))
+      }
+      setnames(newdata, old = self$param_set$values$by, new = task$backend$primary_key)
+
+      # get task_type from mlr_reflections and call constructor
+      constructor = get(mlr_reflections$task_types[["task"]][chmatch(task$task_type, table = mlr_reflections$task_types[["type"]], nomatch = 0L)][[1L]])
+      newtask = invoke(constructor$new, id = task$id, backend = as_data_backend(newdata, primary_key = task$backend$primary_key), target = task$target_names, .args = task$extra_args)
+      newtask$extra_args = task$extra_args
+
+      newtask
+    }
+  )
+)
+
+mlr_pipeops$add("aggregate", PipeOpAggregate)
+
+# check the `aggregation` parameter of PipeOpAggregate
+# @param x [list] whatever `aggregation` is being set to
+# checks that `aggregation` is
+# * a named list of `formula`
+# * that each element has only a lhs
+check_aggregation_formulae = function(x) {
+  check_list(x, types = "formula", names = "unique") %check&&%
+    Reduce(`%check&&%`, lapply(x, function(xel) {
+      if (length(xel) != 2) {
+        return(sprintf("formula %s must not have a left hand side.",
+          deparse(xel, nlines = 1L, width.cutoff = 500L)))
+      }
+      TRUE
+    }), TRUE)
+}
+
diff --git a/R/PipeOpLearnerCV.R b/R/PipeOpLearnerCV.R
index 350956398..8c2c11d6a 100644
--- a/R/PipeOpLearnerCV.R
+++ b/R/PipeOpLearnerCV.R
@@ -1,4 +1,4 @@
-#' @title Wrap a Learner into a PipeOp with Cross-validated Predictions as Features
+#' @title Wrap a Learner into a PipeOp with Resampled Predictions as Features
 #'
 #' @usage NULL
 #' @name mlr_pipeops_learner_cv
@@ -16,10 +16,10 @@
 #' for `$predict.type` `"prob"` the `<ID>.prob.<CLASS>` features are created, and for `$predict.type` `"se"` the new columns
 #' are `<ID>.response` and `<ID>.se`. `<ID>` denotes the `$id` of the [`PipeOpLearnerCV`] object.
 #'
-#' In the case of the resampling method returning multiple predictions per row id, the predictions are aggregated via their mean
-#' (except for the `"response"` in the case of a [classification Task][mlr3::TaskClassif] which is aggregated using the mode).
-#' In the case of the resampling method not returning predictions for all row ids as given in the input [`Task`][mlr3::Task],
-#' these predictions are added as missing.
+#' In the case of the resampling method returning multiple predictions per row id, the predictions
+#' are returned unaltered. The output [`Task`][mlr3::Task] always gains a `row_reference` column
+#' named `pre.<ID>` indicating the original row id prior to the resampling process. [`PipeOpAggregate`] should then
+#' be used to aggregate these multiple predictions per row id.
 #'
 #' Inherits both the `$param_set` (and therefore `$param_set$values`) from the [`Learner`][mlr3::Learner] and
 #' [`Resampling`][mlr3::Resampling] it is constructed from. The parameter ids of the latter one are prefixed with `"resampling."`
@@ -50,7 +50,7 @@
 #' [`PipeOpLearnerCV`] has one output channel named `"output"`, producing a [`Task`][mlr3::Task] specific to the [`Learner`][mlr3::Learner]
 #' type given to `learner` during construction; both during training and prediction.
 #'
-#' The output is a task with the same target as the input task, with features replaced by predictions made by the [`Learner`][mlr3::Learner].
+#' The output is a [`Task`][mlr3::Task] with the same target as the input [`Task`][mlr3::Task], with features replaced by predictions made by the [`Learner`][mlr3::Learner].
 #' During training, this prediction is the out-of-sample prediction made by [`resample`][mlr3::resample], during prediction, this is the
 #' ordinary prediction made on the data by a [`Learner`][mlr3::Learner] trained on the training phase data.
 #'
@@ -101,7 +101,7 @@
 #' task = tsk("iris")
 #' learner = lrn("classif.rpart")
 #'
-#' lrncv_po = po("learner_cv", learner)
+#' lrncv_po = po("learner_cv", learner, rsmp("cv"))
 #' lrncv_po$learner$predict_type = "response"
 #'
 #' nop = mlr_pipeops$get("nop")
@@ -131,8 +131,7 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
       }
 
 
-      id = id %??% private$.learner$id
-      # FIXME: probably should restrict to only classif and regr because of the potential aggregation being done below
+      id = id %??% self$learner$id
       task_type = mlr_reflections$task_types[get("type") == private$.learner$task_type][order(get("package"))][1L]$task
 
       private$.additional_param_set = ParamSet$new(params = list(
@@ -179,93 +178,52 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
   private = list(
     .train_task = function(task) {
       on.exit({private$.learner$state = NULL})
-
-      # Train a learner for predicting
+      # train a learner for predicting
       self$state = private$.learner$train(task)$state
 
-      # Compute resampled Predictions
+      # compute resampled predictions
       rr = resample(task, private$.learner, private$.resampling)
       prds = as.data.table(rr$prediction(predict_sets = "test"))
 
-      # Some resamplings will result in rows being sampled multiple times and some being missing
-      nrows_multiple = length(prds$row_id[duplicated(prds$row_id)])
-      missing_rows = setdiff(task$row_ids, prds$row_id)
-      nrows_missing = length(missing_rows)
-
-      if (!nrows_multiple && !nrows_missing) {
-        return(private$pred_to_task(prds, task))  # early exit
-      }
-
-      task_type = task$task_type
-      prds_names = colnames(prds)
-
-      prds_corrected = if (nrows_multiple) {
-        # classif: prob, regr: response, (se)
-        SDcols_multiple = setdiff(prds_names, if (task_type == "classif") c("row_id", "truth", "response") else c("row_id", "truth"))
-
-        # aggregation functions:
-        #  - mean for prob, response (regr), se
-        #  - mode for response (classif)
-        prds_corrected = prds[, map(.SD, function(x) {
-          if (length(x) == 1L) return(x)  # early exit
-          mean(x, na.rm = TRUE)
-        }), by = "row_id", .SDcols = SDcols_multiple]
-
-        if (NROW(prds_corrected) == 0L) prds_corrected = unique(prds[, "row_id"])
-
-        if (task_type == "classif") {
-          cbind(prds_corrected, prds[, map(.SD, function(x) {
-            if (length(x) == 1L) return(as.character(x))  # early exit
-            tt = table(x)
-            names(tt[which.max(tt)])
-          }), by = "row_id", .SDcols = "response"][, "response"])
-        } else {
-          prds_corrected
-        }
-      } else {
-        if (task_type == "classif") {
-          prds[, "response" := as.character(response)]
-        }
-        prds[, !"truth"]
-      }
-
-      if (nrows_missing) {
-        SDcols_missing = setdiff(prds_names, "truth")
-        # add missings
-        prds_corrected = prds_corrected[, map(.SD, add_missings, len = nrows_missing), .SDcols = SDcols_missing]
-        prds_corrected$row_id[is.na(prds_corrected$row_id)] = missing_rows
-      }
-
-      if (task_type == "classif") {
-        target = task$truth(prds_corrected$row_id)
-        prds_corrected$response = factor(prds_corrected$response, levels = levels(target), ordered = is.ordered(target))
-      }
-
-      # FIXME: do we need additional safety checks here?
-
-      private$pred_to_task(prds_corrected, task)
+      private$.pred_to_task(prds, task)
     },
 
     .predict_task = function(task) {
       on.exit({private$.learner$state = NULL})
       private$.learner$state = self$state
-      prediction = as.data.table(private$.learner$predict(task))
-      private$pred_to_task(prediction, task)
+      prds = as.data.table(private$.learner$predict(task))
+      private$.pred_to_task(prds, task)
     },
 
-    pred_to_task = function(prds, task) {
-      if (!is.null(prds$truth)) prds[, truth := NULL]
+    .pred_to_task = function(prds, task) {
       if (!self$param_set$values$keep_response && self$learner$predict_type == "prob") {
         prds[, response := NULL]
       }
-      renaming = setdiff(colnames(prds), c("row_id", "row_ids"))
-      setnames(prds, renaming, sprintf("%s.%s", self$id, renaming))
+      renaming = setdiff(colnames(prds), c("row_ids", "truth"))
+      setnames(prds, old = renaming, new = sprintf("%s.%s", self$id, renaming))
+      setnames(prds, old = "truth", new = task$target_names)
+      row_reference = paste0("pre.", self$id)
+      while (row_reference %in% task$col_info$id) {
+        row_reference = paste0(row_reference, ".")
+      }
+      setnames(prds, old = "row_ids", new = row_reference)
 
-      # This can be simplified for mlr3 >= 0.11.0;
-      # will be always "row_ids"
-      row_id_col = intersect(colnames(prds), c("row_id", "row_ids"))
-      setnames(prds, old = row_id_col, new = task$backend$primary_key)
-      task$select(character(0))$cbind(prds)
+      # the following is needed to pertain correct row ids in the case of e.g. cv
+      # here we do not necessarily apply PipeOpAggregate later
+      backend = if (identical(sort(prds[[row_reference]]), sort(task$row_ids))) {
+        set(prds, j = task$backend$primary_key, value = prds[[row_reference]])
+        as_data_backend(prds, primary_key = task$backend$primary_key)
+      } else {
+        as_data_backend(prds)
+      }
+
+      # get task_type from mlr_reflections and call constructor
+      constructor = get(mlr_reflections$task_types[["task"]][chmatch(task$task_type, table = mlr_reflections$task_types[["type"]], nomatch = 0L)][[1L]])
+      newtask = invoke(constructor$new, id = task$id, backend = backend, target = task$target_names, .args = task$extra_args)
+      newtask$extra_args = task$extra_args
+      newtask$set_col_roles(row_reference, "row_reference")
+
+      newtask
     },
     .additional_param_set = NULL,
     .learner = NULL,
@@ -273,12 +231,5 @@ PipeOpLearnerCV = R6Class("PipeOpLearnerCV",
   )
 )
 
-# helper function to add missings to predictions based on their storage mode
-add_missings = function(x, len) {
-  c(x, switch(typeof(x),
-    "character" = rep_len(NA_character_, length.out = len),
-    "double" = rep_len(NA_real_, length.out = len),
-    "integer" = rep_len(NA_integer_, length.out = len)))
-}
-
 mlr_pipeops$add("learner_cv", PipeOpLearnerCV, list(R6Class("Learner", public = list(id = "learner_cv", task_type = "classif", param_set = ParamSet$new()))$new()))
+
diff --git a/R/PipeOpTuneThreshold.R b/R/PipeOpTuneThreshold.R
index 1990c5c61..39522762a 100644
--- a/R/PipeOpTuneThreshold.R
+++ b/R/PipeOpTuneThreshold.R
@@ -143,7 +143,12 @@ PipeOpTuneThreshold = R6Class("PipeOpTuneThreshold",
     },
     .task_to_prediction = function(input) {
       prob = as.matrix(input$data(cols = input$feature_names))
-      colnames(prob) = unlist(input$levels())
+      # setting the column names the following way is safer
+      nms = map_chr(strsplit(colnames(prob), "\\."), function(x) x[length(x)])
+      if (!setequal(nms, input$levels(input$target_names)[[input$target_names]])) {
+        stopf("Cannot assign correct class levels to probability columns.")
+      }
+      colnames(prob) = map_chr(strsplit(colnames(prob), "\\."), function(x) x[length(x)])
       PredictionClassif$new(input, row_ids = input$row_ids, truth = input$truth(),
         response = factor(colnames(prob)[max.col(prob, ties.method = "random")], levels = unlist(input$levels())),
         prob = prob)
diff --git a/R/zzz.R b/R/zzz.R
index 885b08e68..40afa1d20 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -15,6 +15,9 @@ register_mlr3 = function() {
     c("abstract", "meta", "missings", "feature selection", "imbalanced data",
     "data transform", "target transform", "ensemble", "robustify", "learner", "encode",
      "multiplicity")))
+  if (!all(grepl("row_reference", x$task_col_roles))) {
+    x$task_col_roles = map(x$task_col_roles, function(col_roles) c(col_roles, "row_reference"))
+  }
 }
 
 .onLoad = function(libname, pkgname) {  # nocov start
diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd
index 15c71495a..b252e56e7 100644
--- a/man/PipeOp.Rd
+++ b/man/PipeOp.Rd
@@ -225,6 +225,7 @@ Other PipeOps:
 \code{\link{PipeOpTargetTrafo}},
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd
index f9dc38e0e..f7bc22365 100644
--- a/man/PipeOpEnsemble.Rd
+++ b/man/PipeOpEnsemble.Rd
@@ -102,6 +102,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd
index 2e254b0c8..e29fcc67b 100644
--- a/man/PipeOpImpute.Rd
+++ b/man/PipeOpImpute.Rd
@@ -132,6 +132,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd
index 9a567930c..539cfa103 100644
--- a/man/PipeOpTargetTrafo.Rd
+++ b/man/PipeOpTargetTrafo.Rd
@@ -143,6 +143,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd
index 54d44c0bb..6b4ac96b1 100644
--- a/man/PipeOpTaskPreproc.Rd
+++ b/man/PipeOpTaskPreproc.Rd
@@ -192,6 +192,7 @@ Other PipeOps:
 \code{\link{PipeOpTargetTrafo}},
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd
index 73d30ad7e..7058f250a 100644
--- a/man/PipeOpTaskPreprocSimple.Rd
+++ b/man/PipeOpTaskPreprocSimple.Rd
@@ -135,6 +135,7 @@ Other PipeOps:
 \code{\link{PipeOpTargetTrafo}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd
index 156975a4d..e2b3b3452 100644
--- a/man/mlr_pipeops.Rd
+++ b/man/mlr_pipeops.Rd
@@ -73,6 +73,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_aggregate.Rd b/man/mlr_pipeops_aggregate.Rd
new file mode 100644
index 000000000..2c087840a
--- /dev/null
+++ b/man/mlr_pipeops_aggregate.Rd
@@ -0,0 +1,178 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/PipeOpAggregate.R
+\name{mlr_pipeops_aggregate}
+\alias{mlr_pipeops_aggregate}
+\alias{PipeOpAggregate}
+\title{Aggregate Features Row-Wise}
+\format{
+\code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
+}
+\description{
+Aggregates features row-wise based on multiple observations indicated via a column of role \code{row_reference} according to expressions given as formulas.
+Typically used after \code{\link{PipeOpLearnerCV}}  and prior to \code{\link{PipeOpFeatureUnion}} if the resampling method returned multiple predictions per row id.
+However, note that not all \code{\link[mlr3:Resampling]{Resampling}} methods result in at least one prediction per original row id.
+}
+\section{Construction}{
+\preformatted{PipeOpAggregate$new(id = "aggregate", param_vals = list())
+}
+\itemize{
+\item \code{id} :: \code{character(1)}\cr
+Identifier of resulting object, default \code{"aggregate"}.
+\item \code{param_vals} :: named \code{list}\cr
+List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}.
+}
+}
+
+\section{Input and Output Channels}{
+
+Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}.
+The output is a \code{\link[mlr3:Task]{Task}} with the same target as the input \code{\link[mlr3:Task]{Task}}, with features aggregated as specified.
+}
+
+\section{State}{
+
+The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}.
+}
+
+\section{Parameters}{
+
+The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as:
+\itemize{
+\item \code{aggregation} :: named \code{list} of \code{formula}\cr
+Expressions for how features should be aggregated, in the form of \code{formula}.
+Each element of the list is a \code{formula} with the name of the element naming the feature to aggregate and the formula expression determining the result.
+Each formula is evaluated within \code{\link{data.table}} environments of the \code{\link[mlr3:Task]{Task}} that contain all features split via the \code{by} argument (see below).
+Initialized to \code{list()}, i.e., no aggregation is performed.
+\item \code{by} :: \code{character(1)} | \code{NULL}\cr
+Column indicating the \code{row_reference} column of the \code{\link[mlr3:Task]{Task}} that should be the row-wise basis for the aggregation.
+Initialized to \code{NULL}, i.e., no aggregation is performed.
+}
+}
+
+\section{Internals}{
+
+A \code{formula} created using the \code{~} operator always contains a reference to the \code{environment} in which
+the \code{formula} is created. This makes it possible to use variables in the \code{~}-expressions that both
+reference either column names or variable names.
+}
+
+\section{Fields}{
+
+Only fields inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
+}
+
+\section{Methods}{
+
+Only methods inherited from \code{\link{PipeOpTaskPreprocSimple}}/\code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
+}
+
+\examples{
+library("mlr3")
+calculate_mode = function(x) {
+  unique_x = unique(x)
+  unique_x[which.max(tabulate(match(x, unique_x)))]
+}
+
+task = tsk("iris")
+learner = lrn("classif.rpart")
+
+lrnloo_po = po("learner_cv", learner, rsmp("loo"))
+nop = mlr_pipeops$get("nop")
+agg_po = po("aggregate",
+  aggregation = list(
+    classif.rpart.response = ~ calculate_mode(classif.rpart.response)
+  ),
+  by = "pre.classif.rpart")
+
+graph = gunion(list(
+  lrnloo_po \%>>\% agg_po,
+  nop
+)) \%>>\% po("featureunion")
+
+graph$train(task)
+
+graph$pipeops$classif.rpart$learner$predict_type = "prob"
+graph$param_set$values$aggregate.aggregation = list(
+    classif.rpart.prob.setosa = ~ mean(classif.rpart.prob.setosa),
+    classif.rpart.prob.versicolor = ~ mean(classif.rpart.prob.versicolor),
+    classif.rpart.prob.virginica = ~ mean(classif.rpart.prob.virginica)
+)
+graph$train(task)
+}
+\seealso{
+https://mlr3book.mlr-org.com/list-pipeops.html
+
+Other PipeOps: 
+\code{\link{PipeOpEnsemble}},
+\code{\link{PipeOpImpute}},
+\code{\link{PipeOpTargetTrafo}},
+\code{\link{PipeOpTaskPreprocSimple}},
+\code{\link{PipeOpTaskPreproc}},
+\code{\link{PipeOp}},
+\code{\link{mlr_pipeops_boxcox}},
+\code{\link{mlr_pipeops_branch}},
+\code{\link{mlr_pipeops_chunk}},
+\code{\link{mlr_pipeops_classbalancing}},
+\code{\link{mlr_pipeops_classifavg}},
+\code{\link{mlr_pipeops_classweights}},
+\code{\link{mlr_pipeops_colapply}},
+\code{\link{mlr_pipeops_collapsefactors}},
+\code{\link{mlr_pipeops_colroles}},
+\code{\link{mlr_pipeops_copy}},
+\code{\link{mlr_pipeops_datefeatures}},
+\code{\link{mlr_pipeops_encodeimpact}},
+\code{\link{mlr_pipeops_encodelmer}},
+\code{\link{mlr_pipeops_encode}},
+\code{\link{mlr_pipeops_featureunion}},
+\code{\link{mlr_pipeops_filter}},
+\code{\link{mlr_pipeops_fixfactors}},
+\code{\link{mlr_pipeops_histbin}},
+\code{\link{mlr_pipeops_ica}},
+\code{\link{mlr_pipeops_imputeconstant}},
+\code{\link{mlr_pipeops_imputehist}},
+\code{\link{mlr_pipeops_imputelearner}},
+\code{\link{mlr_pipeops_imputemean}},
+\code{\link{mlr_pipeops_imputemedian}},
+\code{\link{mlr_pipeops_imputemode}},
+\code{\link{mlr_pipeops_imputeoor}},
+\code{\link{mlr_pipeops_imputesample}},
+\code{\link{mlr_pipeops_kernelpca}},
+\code{\link{mlr_pipeops_learner}},
+\code{\link{mlr_pipeops_missind}},
+\code{\link{mlr_pipeops_modelmatrix}},
+\code{\link{mlr_pipeops_multiplicityexply}},
+\code{\link{mlr_pipeops_multiplicityimply}},
+\code{\link{mlr_pipeops_mutate}},
+\code{\link{mlr_pipeops_nmf}},
+\code{\link{mlr_pipeops_nop}},
+\code{\link{mlr_pipeops_ovrsplit}},
+\code{\link{mlr_pipeops_ovrunite}},
+\code{\link{mlr_pipeops_pca}},
+\code{\link{mlr_pipeops_proxy}},
+\code{\link{mlr_pipeops_quantilebin}},
+\code{\link{mlr_pipeops_randomprojection}},
+\code{\link{mlr_pipeops_randomresponse}},
+\code{\link{mlr_pipeops_regravg}},
+\code{\link{mlr_pipeops_removeconstants}},
+\code{\link{mlr_pipeops_renamecolumns}},
+\code{\link{mlr_pipeops_replicate}},
+\code{\link{mlr_pipeops_scalemaxabs}},
+\code{\link{mlr_pipeops_scalerange}},
+\code{\link{mlr_pipeops_scale}},
+\code{\link{mlr_pipeops_select}},
+\code{\link{mlr_pipeops_smote}},
+\code{\link{mlr_pipeops_spatialsign}},
+\code{\link{mlr_pipeops_subsample}},
+\code{\link{mlr_pipeops_targetinvert}},
+\code{\link{mlr_pipeops_targetmutate}},
+\code{\link{mlr_pipeops_targettrafoscalerange}},
+\code{\link{mlr_pipeops_textvectorizer}},
+\code{\link{mlr_pipeops_threshold}},
+\code{\link{mlr_pipeops_tunethreshold}},
+\code{\link{mlr_pipeops_unbranch}},
+\code{\link{mlr_pipeops_updatetarget}},
+\code{\link{mlr_pipeops_vtreat}},
+\code{\link{mlr_pipeops_yeojohnson}},
+\code{\link{mlr_pipeops}}
+}
+\concept{PipeOps}
diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd
index cf7b8b976..a6d52e3f4 100644
--- a/man/mlr_pipeops_boxcox.Rd
+++ b/man/mlr_pipeops_boxcox.Rd
@@ -85,6 +85,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
 \code{\link{mlr_pipeops_classbalancing}},
diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd
index 256afebab..e5242bba4 100644
--- a/man/mlr_pipeops_branch.Rd
+++ b/man/mlr_pipeops_branch.Rd
@@ -105,6 +105,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_chunk}},
 \code{\link{mlr_pipeops_classbalancing}},
diff --git a/man/mlr_pipeops_chunk.Rd b/man/mlr_pipeops_chunk.Rd
index e7dc01689..3c0787cef 100644
--- a/man/mlr_pipeops_chunk.Rd
+++ b/man/mlr_pipeops_chunk.Rd
@@ -84,6 +84,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_classbalancing}},
diff --git a/man/mlr_pipeops_classbalancing.Rd b/man/mlr_pipeops_classbalancing.Rd
index 4e87e9ac5..3fe479b1e 100644
--- a/man/mlr_pipeops_classbalancing.Rd
+++ b/man/mlr_pipeops_classbalancing.Rd
@@ -125,6 +125,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd
index f9aab7eb4..e51f59e0d 100644
--- a/man/mlr_pipeops_classifavg.Rd
+++ b/man/mlr_pipeops_classifavg.Rd
@@ -99,6 +99,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_classweights.Rd b/man/mlr_pipeops_classweights.Rd
index deed5fcb7..91fcc2282 100644
--- a/man/mlr_pipeops_classweights.Rd
+++ b/man/mlr_pipeops_classweights.Rd
@@ -93,6 +93,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_colapply.Rd b/man/mlr_pipeops_colapply.Rd
index ec8ff0d99..fdfa50a5e 100644
--- a/man/mlr_pipeops_colapply.Rd
+++ b/man/mlr_pipeops_colapply.Rd
@@ -114,6 +114,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_collapsefactors.Rd b/man/mlr_pipeops_collapsefactors.Rd
index 4404732c0..e06bc020b 100644
--- a/man/mlr_pipeops_collapsefactors.Rd
+++ b/man/mlr_pipeops_collapsefactors.Rd
@@ -81,6 +81,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd
index f342d33d5..89c08da05 100644
--- a/man/mlr_pipeops_colroles.Rd
+++ b/man/mlr_pipeops_colroles.Rd
@@ -73,6 +73,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_copy.Rd b/man/mlr_pipeops_copy.Rd
index 02ae18124..3bf4aae61 100644
--- a/man/mlr_pipeops_copy.Rd
+++ b/man/mlr_pipeops_copy.Rd
@@ -103,6 +103,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_datefeatures.Rd b/man/mlr_pipeops_datefeatures.Rd
index 5c84d7451..c35830cdc 100644
--- a/man/mlr_pipeops_datefeatures.Rd
+++ b/man/mlr_pipeops_datefeatures.Rd
@@ -120,6 +120,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_encode.Rd b/man/mlr_pipeops_encode.Rd
index 80e336189..5ee052258 100644
--- a/man/mlr_pipeops_encode.Rd
+++ b/man/mlr_pipeops_encode.Rd
@@ -106,6 +106,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_encodeimpact.Rd b/man/mlr_pipeops_encodeimpact.Rd
index 0be88b7da..9f2a9afc0 100644
--- a/man/mlr_pipeops_encodeimpact.Rd
+++ b/man/mlr_pipeops_encodeimpact.Rd
@@ -98,6 +98,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd
index aebf5291b..8b84935ce 100644
--- a/man/mlr_pipeops_encodelmer.Rd
+++ b/man/mlr_pipeops_encodelmer.Rd
@@ -109,6 +109,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_featureunion.Rd b/man/mlr_pipeops_featureunion.Rd
index c99233a66..6f5c10dd3 100644
--- a/man/mlr_pipeops_featureunion.Rd
+++ b/man/mlr_pipeops_featureunion.Rd
@@ -118,6 +118,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd
index a87ccb638..fdb9d8ef1 100644
--- a/man/mlr_pipeops_filter.Rd
+++ b/man/mlr_pipeops_filter.Rd
@@ -127,6 +127,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_fixfactors.Rd b/man/mlr_pipeops_fixfactors.Rd
index 66a9226da..61ebd0b39 100644
--- a/man/mlr_pipeops_fixfactors.Rd
+++ b/man/mlr_pipeops_fixfactors.Rd
@@ -73,6 +73,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_histbin.Rd b/man/mlr_pipeops_histbin.Rd
index 7cef85cce..0eabd0a4a 100644
--- a/man/mlr_pipeops_histbin.Rd
+++ b/man/mlr_pipeops_histbin.Rd
@@ -85,6 +85,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd
index da0800c70..cae4243ba 100644
--- a/man/mlr_pipeops_ica.Rd
+++ b/man/mlr_pipeops_ica.Rd
@@ -111,6 +111,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_imputeconstant.Rd b/man/mlr_pipeops_imputeconstant.Rd
index 4ffd9ecdb..5392bf6e8 100644
--- a/man/mlr_pipeops_imputeconstant.Rd
+++ b/man/mlr_pipeops_imputeconstant.Rd
@@ -87,6 +87,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_imputehist.Rd b/man/mlr_pipeops_imputehist.Rd
index 43a3beb86..ea5dd8a94 100644
--- a/man/mlr_pipeops_imputehist.Rd
+++ b/man/mlr_pipeops_imputehist.Rd
@@ -72,6 +72,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd
index f86074f27..f4eada177 100644
--- a/man/mlr_pipeops_imputelearner.Rd
+++ b/man/mlr_pipeops_imputelearner.Rd
@@ -101,6 +101,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_imputemean.Rd b/man/mlr_pipeops_imputemean.Rd
index 9a34246aa..15016de56 100644
--- a/man/mlr_pipeops_imputemean.Rd
+++ b/man/mlr_pipeops_imputemean.Rd
@@ -72,6 +72,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_imputemedian.Rd b/man/mlr_pipeops_imputemedian.Rd
index b89c02ee3..82df3dd15 100644
--- a/man/mlr_pipeops_imputemedian.Rd
+++ b/man/mlr_pipeops_imputemedian.Rd
@@ -72,6 +72,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_imputemode.Rd b/man/mlr_pipeops_imputemode.Rd
index 1ec28fc65..c82b59fe4 100644
--- a/man/mlr_pipeops_imputemode.Rd
+++ b/man/mlr_pipeops_imputemode.Rd
@@ -79,6 +79,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_imputeoor.Rd b/man/mlr_pipeops_imputeoor.Rd
index c141c4d33..cf07c0d3a 100644
--- a/man/mlr_pipeops_imputeoor.Rd
+++ b/man/mlr_pipeops_imputeoor.Rd
@@ -101,6 +101,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_imputesample.Rd b/man/mlr_pipeops_imputesample.Rd
index e31bcb461..1617527a8 100644
--- a/man/mlr_pipeops_imputesample.Rd
+++ b/man/mlr_pipeops_imputesample.Rd
@@ -74,6 +74,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd
index 1b426e65d..85ec21d15 100644
--- a/man/mlr_pipeops_kernelpca.Rd
+++ b/man/mlr_pipeops_kernelpca.Rd
@@ -86,6 +86,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd
index 9a5a12024..09787d973 100644
--- a/man/mlr_pipeops_learner.Rd
+++ b/man/mlr_pipeops_learner.Rd
@@ -105,6 +105,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_learner_cv.Rd b/man/mlr_pipeops_learner_cv.Rd
index 9bcadeeec..1eb3457e4 100644
--- a/man/mlr_pipeops_learner_cv.Rd
+++ b/man/mlr_pipeops_learner_cv.Rd
@@ -3,7 +3,7 @@
 \name{mlr_pipeops_learner_cv}
 \alias{mlr_pipeops_learner_cv}
 \alias{PipeOpLearnerCV}
-\title{Wrap a Learner into a PipeOp with Cross-validated Predictions as Features}
+\title{Wrap a Learner into a PipeOp with Resampled Predictions as Features}
 \format{
 \code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}.
 }
@@ -19,10 +19,10 @@ The \code{\link[mlr3:Task]{Task}} gets features depending on the capsuled \code{
 for \verb{$predict.type} \code{"prob"} the \verb{<ID>.prob.<CLASS>} features are created, and for \verb{$predict.type} \code{"se"} the new columns
 are \verb{<ID>.response} and \verb{<ID>.se}. \verb{<ID>} denotes the \verb{$id} of the \code{\link{PipeOpLearnerCV}} object.
 
-In the case of the resampling method returning multiple predictions per row id, the predictions are aggregated via their mean
-(except for the \code{"response"} in the case of a \link[mlr3:TaskClassif]{classification Task} which is aggregated using the mode).
-In the case of the resampling method not returning predictions for all row ids as given in the input \code{\link[mlr3:Task]{Task}},
-these predictions are added as missing.
+In the case of the resampling method returning multiple predictions per row id, the predictions
+are returned unaltered. The output \code{\link[mlr3:Task]{Task}} always gains a \code{row_reference} column
+named \verb{pre.<ID>} indicating the original row id prior to the resampling process. \code{\link{PipeOpAggregate}} should then
+be used to aggregate these multiple predictions per row id.
 
 Inherits both the \verb{$param_set} (and therefore \verb{$param_set$values}) from the \code{\link[mlr3:Learner]{Learner}} and
 \code{\link[mlr3:Resampling]{Resampling}} it is constructed from. The parameter ids of the latter one are prefixed with \code{"resampling."}
@@ -55,7 +55,7 @@ type given to \code{learner} during construction; both during training and predi
 \code{\link{PipeOpLearnerCV}} has one output channel named \code{"output"}, producing a \code{\link[mlr3:Task]{Task}} specific to the \code{\link[mlr3:Learner]{Learner}}
 type given to \code{learner} during construction; both during training and prediction.
 
-The output is a task with the same target as the input task, with features replaced by predictions made by the \code{\link[mlr3:Learner]{Learner}}.
+The output is a \code{\link[mlr3:Task]{Task}} with the same target as the input \code{\link[mlr3:Task]{Task}}, with features replaced by predictions made by the \code{\link[mlr3:Learner]{Learner}}.
 During training, this prediction is the out-of-sample prediction made by \code{\link[mlr3:resample]{resample}}, during prediction, this is the
 ordinary prediction made on the data by a \code{\link[mlr3:Learner]{Learner}} trained on the training phase data.
 }
@@ -118,7 +118,7 @@ library("mlr3")
 task = tsk("iris")
 learner = lrn("classif.rpart")
 
-lrncv_po = po("learner_cv", learner)
+lrncv_po = po("learner_cv", learner, rsmp("cv"))
 lrncv_po$learner$predict_type = "response"
 
 nop = mlr_pipeops$get("nop")
diff --git a/man/mlr_pipeops_missind.Rd b/man/mlr_pipeops_missind.Rd
index 2e04a6645..00f6b7589 100644
--- a/man/mlr_pipeops_missind.Rd
+++ b/man/mlr_pipeops_missind.Rd
@@ -101,6 +101,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_modelmatrix.Rd b/man/mlr_pipeops_modelmatrix.Rd
index a001d496c..35fcee80f 100644
--- a/man/mlr_pipeops_modelmatrix.Rd
+++ b/man/mlr_pipeops_modelmatrix.Rd
@@ -78,6 +78,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_multiplicityexply.Rd b/man/mlr_pipeops_multiplicityexply.Rd
index bd0398108..9aa10147b 100644
--- a/man/mlr_pipeops_multiplicityexply.Rd
+++ b/man/mlr_pipeops_multiplicityexply.Rd
@@ -84,6 +84,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_multiplicityimply.Rd b/man/mlr_pipeops_multiplicityimply.Rd
index e7fa51394..1b820c814 100644
--- a/man/mlr_pipeops_multiplicityimply.Rd
+++ b/man/mlr_pipeops_multiplicityimply.Rd
@@ -90,6 +90,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_mutate.Rd b/man/mlr_pipeops_mutate.Rd
index d8b9aa8d4..9554ced38 100644
--- a/man/mlr_pipeops_mutate.Rd
+++ b/man/mlr_pipeops_mutate.Rd
@@ -95,6 +95,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd
index 69de35de1..eb3602e6b 100644
--- a/man/mlr_pipeops_nmf.Rd
+++ b/man/mlr_pipeops_nmf.Rd
@@ -124,6 +124,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_nop.Rd b/man/mlr_pipeops_nop.Rd
index 72e23ec84..29633ce14 100644
--- a/man/mlr_pipeops_nop.Rd
+++ b/man/mlr_pipeops_nop.Rd
@@ -80,6 +80,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd
index 7d7e62379..172815f29 100644
--- a/man/mlr_pipeops_ovrsplit.Rd
+++ b/man/mlr_pipeops_ovrsplit.Rd
@@ -95,6 +95,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd
index 4c58a76fe..64ffaff54 100644
--- a/man/mlr_pipeops_ovrunite.Rd
+++ b/man/mlr_pipeops_ovrunite.Rd
@@ -90,6 +90,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_pca.Rd b/man/mlr_pipeops_pca.Rd
index df07ac656..a968adccd 100644
--- a/man/mlr_pipeops_pca.Rd
+++ b/man/mlr_pipeops_pca.Rd
@@ -89,6 +89,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd
index 343e40014..a110c3a7f 100644
--- a/man/mlr_pipeops_proxy.Rd
+++ b/man/mlr_pipeops_proxy.Rd
@@ -101,6 +101,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_quantilebin.Rd b/man/mlr_pipeops_quantilebin.Rd
index 59c70c60e..113d10669 100644
--- a/man/mlr_pipeops_quantilebin.Rd
+++ b/man/mlr_pipeops_quantilebin.Rd
@@ -77,6 +77,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_randomprojection.Rd b/man/mlr_pipeops_randomprojection.Rd
index 7567e8ef0..96dd4906d 100644
--- a/man/mlr_pipeops_randomprojection.Rd
+++ b/man/mlr_pipeops_randomprojection.Rd
@@ -89,6 +89,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd
index 557be29e7..9191ea642 100644
--- a/man/mlr_pipeops_randomresponse.Rd
+++ b/man/mlr_pipeops_randomresponse.Rd
@@ -104,6 +104,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd
index 054da76d8..f25ab5a40 100644
--- a/man/mlr_pipeops_regravg.Rd
+++ b/man/mlr_pipeops_regravg.Rd
@@ -90,6 +90,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_removeconstants.Rd b/man/mlr_pipeops_removeconstants.Rd
index e4743aff6..e5a318c03 100644
--- a/man/mlr_pipeops_removeconstants.Rd
+++ b/man/mlr_pipeops_removeconstants.Rd
@@ -82,6 +82,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_renamecolumns.Rd b/man/mlr_pipeops_renamecolumns.Rd
index 714611a68..299595f29 100644
--- a/man/mlr_pipeops_renamecolumns.Rd
+++ b/man/mlr_pipeops_renamecolumns.Rd
@@ -81,6 +81,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_replicate.Rd b/man/mlr_pipeops_replicate.Rd
index 5a5a4ab15..dea415fac 100644
--- a/man/mlr_pipeops_replicate.Rd
+++ b/man/mlr_pipeops_replicate.Rd
@@ -74,6 +74,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_scale.Rd b/man/mlr_pipeops_scale.Rd
index 1189e238b..718c68032 100644
--- a/man/mlr_pipeops_scale.Rd
+++ b/man/mlr_pipeops_scale.Rd
@@ -96,6 +96,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_scalemaxabs.Rd b/man/mlr_pipeops_scalemaxabs.Rd
index cf765c8dc..d7c72eb6f 100644
--- a/man/mlr_pipeops_scalemaxabs.Rd
+++ b/man/mlr_pipeops_scalemaxabs.Rd
@@ -71,6 +71,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_scalerange.Rd b/man/mlr_pipeops_scalerange.Rd
index 34c58e39d..93c2a01bb 100644
--- a/man/mlr_pipeops_scalerange.Rd
+++ b/man/mlr_pipeops_scalerange.Rd
@@ -76,6 +76,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_select.Rd b/man/mlr_pipeops_select.Rd
index ffaf3c5a7..df47a817c 100644
--- a/man/mlr_pipeops_select.Rd
+++ b/man/mlr_pipeops_select.Rd
@@ -92,6 +92,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd
index c6870bda0..59fd8e0d3 100644
--- a/man/mlr_pipeops_smote.Rd
+++ b/man/mlr_pipeops_smote.Rd
@@ -93,6 +93,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_spatialsign.Rd b/man/mlr_pipeops_spatialsign.Rd
index e8b2ee70c..eeb735863 100644
--- a/man/mlr_pipeops_spatialsign.Rd
+++ b/man/mlr_pipeops_spatialsign.Rd
@@ -71,6 +71,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_subsample.Rd b/man/mlr_pipeops_subsample.Rd
index a66619dd4..2f4c2e5ea 100644
--- a/man/mlr_pipeops_subsample.Rd
+++ b/man/mlr_pipeops_subsample.Rd
@@ -86,6 +86,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_targetinvert.Rd b/man/mlr_pipeops_targetinvert.Rd
index e76f0f094..33b7c9d02 100644
--- a/man/mlr_pipeops_targetinvert.Rd
+++ b/man/mlr_pipeops_targetinvert.Rd
@@ -71,6 +71,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd
index 6c4953cdb..fc437d1e3 100644
--- a/man/mlr_pipeops_targetmutate.Rd
+++ b/man/mlr_pipeops_targetmutate.Rd
@@ -117,6 +117,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd
index 53f983901..c3bf733d9 100644
--- a/man/mlr_pipeops_targettrafoscalerange.Rd
+++ b/man/mlr_pipeops_targettrafoscalerange.Rd
@@ -83,6 +83,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd
index fccc3503c..c392f396f 100644
--- a/man/mlr_pipeops_textvectorizer.Rd
+++ b/man/mlr_pipeops_textvectorizer.Rd
@@ -181,6 +181,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd
index 8aa23ccc0..e6129b226 100644
--- a/man/mlr_pipeops_threshold.Rd
+++ b/man/mlr_pipeops_threshold.Rd
@@ -76,6 +76,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd
index 56947c7ef..f51cf126b 100644
--- a/man/mlr_pipeops_tunethreshold.Rd
+++ b/man/mlr_pipeops_tunethreshold.Rd
@@ -97,6 +97,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_unbranch.Rd b/man/mlr_pipeops_unbranch.Rd
index 8cbb4dacc..2a0f63dc7 100644
--- a/man/mlr_pipeops_unbranch.Rd
+++ b/man/mlr_pipeops_unbranch.Rd
@@ -83,6 +83,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd
index 245314651..29525b78f 100644
--- a/man/mlr_pipeops_updatetarget.Rd
+++ b/man/mlr_pipeops_updatetarget.Rd
@@ -96,6 +96,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd
index d2747fbcb..e45abd615 100644
--- a/man/mlr_pipeops_vtreat.Rd
+++ b/man/mlr_pipeops_vtreat.Rd
@@ -149,6 +149,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd
index 32eb7f47c..5dba9be0b 100644
--- a/man/mlr_pipeops_yeojohnson.Rd
+++ b/man/mlr_pipeops_yeojohnson.Rd
@@ -86,6 +86,7 @@ Other PipeOps:
 \code{\link{PipeOpTaskPreprocSimple}},
 \code{\link{PipeOpTaskPreproc}},
 \code{\link{PipeOp}},
+\code{\link{mlr_pipeops_aggregate}},
 \code{\link{mlr_pipeops_boxcox}},
 \code{\link{mlr_pipeops_branch}},
 \code{\link{mlr_pipeops_chunk}},
diff --git a/tests/testthat/test_pipeop_aggregate.R b/tests/testthat/test_pipeop_aggregate.R
new file mode 100644
index 000000000..da6bbcf68
--- /dev/null
+++ b/tests/testthat/test_pipeop_aggregate.R
@@ -0,0 +1,159 @@
+context("PipeOpAggregate")
+
+test_that("PipeOpAggregate - basic properties", {
+  op = PipeOpAggregate$new()
+  expect_pipeop(op)
+
+  # generic tests
+  task = tsk("iris")
+  task$select(cols = "Petal.Length")
+  expect_datapreproc_pipeop_class(PipeOpAggregate, task = task)
+
+  op$param_set$values$aggregation = list(NO_DEF = ~ mean(NO_DEF))
+  expect_equal(task$data(), op$train(list(task))[[1L]]$data())
+
+  op$param_set$values$aggregation = list()
+  op$param_set$values$by = "NO_DEF"
+  expect_equal(task$data(), op$train(list(task))[[1L]]$data())
+
+  op$param_set$values$aggregation = list(NO_DEF = ~ mean(NO_DEF))
+  expect_error(op$train(list(task)), regexp = "Must be equal to")
+  op$param_set$values$aggregation = list(Petal.Length = ~ mean(Petal.Length))
+  expect_error(op$train(list(task)), regexp = "Must be element of")
+
+  # toy aggregation works
+  calculate_mode = function(x) {
+    unique_x = unique(x)
+    unique_x[which.max(tabulate(match(x, unique_x)))]
+  }
+  task$cbind(data.table(row_reference = rep(1:3, each = 50L)))
+  task$cbind(data.table(categorical = as.factor(rep(c("a", "b", "c"), 50L))))
+  task$set_col_roles("row_reference", roles = "row_reference")
+  op$param_set$values$aggregation = list(Petal.Length = ~ mean(Petal.Length), categorical = ~ calculate_mode(categorical))
+  op$param_set$values$by = "row_reference"
+  train_out = op$train(list(task))[[1L]]
+  expect_data_table(train_out$data(), nrows = 3L, ncols = 3L)
+  expect_equal(train_out$data(cols = "Petal.Length")[["Petal.Length"]],
+    aggregate(Petal.Length ~ row_reference, FUN = mean, data = task$data(cols = c(task$feature_names, task$col_roles$row_reference)))[["Petal.Length"]])
+  expect_equal(train_out$data(cols = "categorical")[["categorical"]],
+    aggregate(categorical ~ row_reference, FUN = calculate_mode, data = task$data(cols = c(task$feature_names, task$col_roles$row_reference)))[["categorical"]])
+})
+
+test_that("PipeOpLearnerCV and PipeOpAggregate- different methods", {
+  skip_on_cran()  # takes too long
+
+  calculate_mode = function(x) {
+    unique_x = unique(x)
+    unique_x[which.max(tabulate(match(x, unique_x)))]
+  }
+
+  # helper
+  test_valid_resampled_task = function(polrn, poagg, task, predict_type) {
+    polrn$learner$predict_type = predict_type
+
+    lrn_out = polrn$train(list(task))[[1L]]
+    lrn_out_data = lrn_out$data()
+    if (class(polrn)[[1L]] %in% c("ResamplingCV", "ResamplingInsample", "ResamplingLoo")) {
+      expect_identical(lrn_out$row_ids, task$row_ids)
+    } else {
+      expect_subset(lrn_out$data(cols = lrn_out$col_roles$row_reference)[[lrn_out$col_roles$row_reference]], task$row_ids)
+    }
+
+    agg_out = poagg$train(list(lrn_out))[[1L]]
+    if (class(polrn)[[1L]] %in% c("ResamplingCV", "ResamplingInsample", "ResamplingLoo", "ResamplingRepeatedCV")) {
+      expect_identical(agg_out$row_ids, task$row_ids)
+    } else {
+      expect_subset(agg_out$row_ids, task$row_ids)
+    }
+
+    if (task$task_type == "classif") {
+      if (polrn$learner$predict_type == "response") {
+        feature = agg_out$data(cols = grep("*.response", agg_out$feature_names, value = TRUE))[[1L]]
+        expect_true(is.factor(feature))
+        expect_identical(task$class_names, levels(feature))
+      } else {  # "prob"
+        features = agg_out$data(cols = grep("*.prob*", agg_out$feature_names, value = TRUE))
+        sums = rowSums(is.na(features))
+        expect_true(all(sums == 0 | sums == NCOL(features)))  # either all or none missing
+        features = features[sums == 0, ]
+        expect_true(all(apply(features, MARGIN = 2L, function(x) x >= 0 & x <= 1)))  # between 0 and 1
+        expect_equal(rowSums(features), rep_len(1, length.out = NROW(features)))  # sum is 1
+      }
+    } else {  # "regr"
+      if (polrn$learner$predict_type == "response") {
+        feature = agg_out$data(cols = grep("*.response", agg_out$feature_names, value = TRUE))[[1L]]
+        expect_true(is.numeric(feature))
+      } else {  # "se"
+        features = agg_out$data(cols = grep("*.response|*.se", agg_out$feature_names, value = TRUE))
+        expect_true(all(apply(features, MARGIN = 2L, is.numeric)))
+      }
+    }
+  }
+
+  set.seed(1234)
+  # faster training
+  taskc = tsk("german_credit")$filter(sample(1000, 50))
+  taskc$select("age")
+  taskr = tsk("boston_housing")$filter(sample(sample(506, 50)))
+  taskr$select("rad")
+
+  poaggcr = PipeOpAggregate$new(
+    param_vals = list(aggregation = list(classif.rpart.response = ~ calculate_mode(classif.rpart.response)),
+      by = "pre.classif.rpart"))
+  poaggcp = PipeOpAggregate$new(
+    param_vals = list(aggregation = list(classif.rpart.prob.bad = ~ mean(classif.rpart.prob.bad), classif.rpart.prob.good = ~ mean(classif.rpart.prob.good)),
+      by = "pre.classif.rpart"))
+  poaggrs = PipeOpAggregate$new(
+    param_vals = list(aggregation = list(regr.lm.response = ~ mean(regr.lm.response), regr.lm.se = ~ mean(regr.lm.se)),
+      by = "pre.regr.lm"))
+
+  # cv
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("cv", folds = 2L))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("cv", folds = 2L))
+  test_valid_resampled_task(polrnc, poaggcr, taskc, "response")
+  test_valid_resampled_task(polrnc, poaggcp, taskc, "prob")
+  test_valid_resampled_task(polrnr, poaggrs, taskr, "se")
+
+  # insample
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("insample"))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("insample"))
+  test_valid_resampled_task(polrnc, poaggcr, taskc, "response")
+  test_valid_resampled_task(polrnc, poaggcp, taskc, "prob")
+  test_valid_resampled_task(polrnr, poaggrs, taskr, "se")
+
+  # bootstrap
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("bootstrap", repeats = 2L))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("bootstrap", repeats = 2L))
+  test_valid_resampled_task(polrnc, poaggcr, taskc, "response")
+  test_valid_resampled_task(polrnc, poaggcp, taskc, "prob")
+  test_valid_resampled_task(polrnr, poaggrs, taskr, "se")
+
+  # holdout
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("holdout"))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("holdout"))
+  test_valid_resampled_task(polrnc, poaggcr, taskc, "response")
+  test_valid_resampled_task(polrnc, poaggcp, taskc, "prob")
+  test_valid_resampled_task(polrnr, poaggrs, taskr, "se")
+
+  # loo
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("loo"))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("loo"))
+  test_valid_resampled_task(polrnc, poaggcr, taskc, "response")
+  test_valid_resampled_task(polrnc, poaggcp, taskc, "prob")
+  test_valid_resampled_task(polrnr, poaggrs, taskr, "se")
+
+  # repeated_cv
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("repeated_cv", folds = 2L, repeats = 2L))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("repeated_cv", folds = 2L, repeats = 2L))
+  test_valid_resampled_task(polrnc, poaggcr, taskc, "response")
+  test_valid_resampled_task(polrnc, poaggcp, taskc, "prob")
+  test_valid_resampled_task(polrnr, poaggrs, taskr, "se")
+
+  # subsampling
+  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("subsampling", repeats = 2L))
+  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("subsampling", repeats = 2L))
+  test_valid_resampled_task(polrnc, poaggcr, taskc, "response")
+  test_valid_resampled_task(polrnc, poaggcp, taskc, "prob")
+  test_valid_resampled_task(polrnr, poaggrs, taskr, "se")
+})
+
diff --git a/tests/testthat/test_pipeop_colroles.R b/tests/testthat/test_pipeop_colroles.R
index 0257f8b8d..adab8a9cf 100644
--- a/tests/testthat/test_pipeop_colroles.R
+++ b/tests/testthat/test_pipeop_colroles.R
@@ -34,7 +34,7 @@ test_that("PipeOpColRoles - functionality works", {
   train_out = train_pipeop(op, inputs = list(task))$output
   expect_equal(train_out$col_roles,
     list(feature = c("Sepal.Length", "Sepal.Width"), target = "Species", name = "Petal.Length",
-      order = "Petal.Length", stratum = character(), group = character(), weight = character(), uri = character(0)
+      order = "Petal.Length", stratum = character(0L), group = character(0L), weight = character(0L), uri = character(0L), row_reference = character(0L)
     )
   )
   expect_equal(train_out$row_names$row_name, task$data(cols = "Petal.Length")[[1L]])
diff --git a/tests/testthat/test_pipeop_learnercv.R b/tests/testthat/test_pipeop_learnercv.R
index 67510d8ee..1112f3e13 100644
--- a/tests/testthat/test_pipeop_learnercv.R
+++ b/tests/testthat/test_pipeop_learnercv.R
@@ -4,18 +4,18 @@ test_that("PipeOpLearnerCV - basic properties", {
   lrn = mlr_learners$get("classif.featureless")
   po = PipeOpLearnerCV$new(lrn)
   expect_pipeop(po$clone(), check_ps_default_values = FALSE)
-  expect_data_table(po$input, nrows = 1)
-  expect_data_table(po$output, nrows = 1)
+  expect_data_table(po$input, nrows = 1L)
+  expect_data_table(po$output, nrows = 1L)
 
   task = mlr_tasks$get("iris")
-  tsk = train_pipeop(po, list(task = task))[[1]]
+  tsk = train_pipeop(po, list(task = task))[[1L]]
   expect_class(tsk, "Task")
   expect_true(tsk$nrow == 150L)
   expect_true(tsk$ncol == 2L)
   expect_equal(task$target_names, tsk$target_names)
   expect_equal(task$class_names, tsk$class_names)
   vals = factor(unique(tsk$data(cols = tsk$feature_names)$response))
-  expect_character(setdiff(vals, task$class_names), len = 0)
+  expect_character(setdiff(vals, task$class_names), len = 0L)
 
   tsk = predict_pipeop(po, list(task = task))[[1]]
   expect_class(tsk, "Task")
@@ -24,35 +24,34 @@ test_that("PipeOpLearnerCV - basic properties", {
   expect_equal(task$target_names, tsk$target_names)
   expect_equal(task$class_names, tsk$class_names)
   vals = factor(unique(tsk$data(cols = tsk$feature_names)$response))
-  expect_character(setdiff(vals, task$class_names), len = 0)
+  expect_character(setdiff(vals, task$class_names), len = 0L)
 
   lrn = mlr_learners$get("classif.featureless")
   iris_with_unambiguous_mode = mlr_tasks$get("iris")$filter(c(1:49, 52:150))  # want featureless learner without randomness
   expect_datapreproc_pipeop_class(PipeOpLearnerCV,
-    list(lrn), iris_with_unambiguous_mode, predict_like_train = FALSE, deterministic_train = FALSE, check_ps_default_values = FALSE)
+    list(lrn), iris_with_unambiguous_mode, predict_like_train = FALSE, deterministic_train = FALSE, affect_context_independent = FALSE, check_ps_default_values = FALSE)
   # 'insample' PipeOpLearnerCV with deterministic Learner is deterministic in every regard!
   expect_datapreproc_pipeop_class(PipeOpLearnerCV,
-    list(lrn, resampling = rsmp("insample")), iris_with_unambiguous_mode, check_ps_default_values = FALSE)
+    list(lrn, resampling = rsmp("insample")), iris_with_unambiguous_mode, affect_context_independent = FALSE, check_ps_default_values = FALSE)
 
   expect_error(PipeOpLearnerCV$new())
-
 })
 
 test_that("PipeOpLearnerCV - param values", {
   lrn = mlr_learners$get("classif.rpart")
   polrn = PipeOpLearnerCV$new(lrn)
   expect_subset(c("minsplit", "resampling.folds", "keep_response"), names(polrn$param_set$params))
-  expect_equal(polrn$param_set$values, list(resampling.folds = 3, keep_response = FALSE, xval = 0))
-  polrn$param_set$values$minsplit = 2
-  expect_equal(polrn$param_set$values, list(resampling.folds = 3, keep_response = FALSE, minsplit = 2, xval = 0))
-  polrn$param_set$values$resampling.folds = 4
-  expect_equal(polrn$param_set$values, list(resampling.folds = 4, keep_response = FALSE, minsplit = 2, xval = 0))
+  expect_equal(polrn$param_set$values, list(resampling.folds = 3L, keep_response = FALSE, xval = 0))
+  polrn$param_set$values$minsplit = 2L
+  expect_equal(polrn$param_set$values, list(resampling.folds = 3L, keep_response = FALSE, minsplit = 2L, xval = 0))
+  polrn$param_set$values$resampling.folds = 4L
+  expect_equal(polrn$param_set$values, list(resampling.folds = 4L, keep_response = FALSE, minsplit = 2L, xval = 0))
 })
 
 test_that("PipeOpLearnerCV - within resampling", {
   lrn = mlr_learners$get("classif.rpart")
   gr = GraphLearner$new(PipeOpLearnerCV$new(lrn) %>>% po(id = "l2", lrn))
-  resample(tsk("iris"), gr, rsmp("holdout"))
+  expect_r6(resample(tsk("iris"), gr, rsmp("holdout")), classes = "ResampleResult")
 })
 
 test_that("PipeOpLearnerCV - insample resampling", {
@@ -60,14 +59,14 @@ test_that("PipeOpLearnerCV - insample resampling", {
   iris_with_unambiguous_mode = mlr_tasks$get("iris")$filter(c(1:49, 52:150))  # want featureless learner without randomness
 
   polrn = PipeOpLearnerCV$new(lrn, rsmp("insample"))
-  expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1]]$data(),
+  expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1L]]$data(),
     cbind(iris_with_unambiguous_mode$data(cols = "Species"),
       classif.featureless.response = factor("virginica", levels = levels(iris[[5]]))))
 
   lrn = mlr_learners$get("classif.rpart")
   polrn = PipeOpLearnerCV$new(lrn, rsmp("insample"))
-  expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1]],
-    polrn$predict(list(iris_with_unambiguous_mode))[[1]])
+  expect_equal(polrn$train(list(iris_with_unambiguous_mode))[[1L]],
+    polrn$predict(list(iris_with_unambiguous_mode))[[1L]])
 })
 
 test_that("PipeOpLearnerCV - graph but no id", {
@@ -99,140 +98,3 @@ test_that("PipeOpLearnerCV - model active binding to state", {
   expect_equal(po$learner_model$state, po$state)
 })
 
-test_that("PipeOpLearnerCV - different methods", {
-  skip_on_cran()  # takes too long
-
-  # Helper
-  test_valid_resampled_task = function(polrn, task, predict_type) {
-    polrn$learner$predict_type = predict_type
-
-    train_out = polrn$train(list(task))[[1]]
-    train_out_data = train_out$data()
-    expect_identical(task$row_ids, train_out$row_ids)
-
-    if (task$task_type == "classif") {
-      if (polrn$learner$predict_type == "response") {
-        feature = train_out$data(cols = grep("*.response", train_out$feature_names, value = TRUE))[[1L]]
-        expect_true(is.factor(feature))
-        expect_identical(task$class_names, levels(feature))
-      } else {  # "prob"
-        features = train_out$data(cols = grep("*.prob*", train_out$feature_names, value = TRUE))
-        sums = rowSums(is.na(features))
-        expect_true(all(sums == 0 | sums == NCOL(features)))  # either all or none missing
-        features = features[sums == 0, ]
-        expect_true(all(apply(features, MARGIN = 2L, function(x) x >= 0 & x <= 1)))  # between 0 and 1
-        expect_equal(rowSums(features), rep_len(1, length.out = NROW(features)))  # sum is 1
-      }
-    } else {  # "regr"
-      if (polrn$learner$predict_type == "response") {
-        feature = train_out$data(cols = grep("*.response", train_out$feature_names, value = TRUE))[[1L]]
-        expect_true(is.numeric(feature))
-      } else {  # "se"
-        features = train_out$data(cols = grep("*.response|*.se", train_out$feature_names, value = TRUE))
-        expect_true(all(apply(features, MARGIN = 2L, is.numeric)))
-      }
-    }
-  }
-
-  set.seed(1234)
-  # faster training
-  taskc = tsk("german_credit")$filter(sample(1000, 50))
-  taskc$select("age")
-  taskr = tsk("boston_housing")$filter(sample(sample(506, 50)))
-  taskr$select("rad")
-
-  # cv
-  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("cv", folds = 2))
-  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("cv", folds = 2))
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-  test_valid_resampled_task(polrnr, taskr, "se")
-
-  # bootstrap
-  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("bootstrap", repeats = 2))
-  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("bootstrap", repeats = 2))
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-  test_valid_resampled_task(polrnr, taskr, "se")
-
-  # holdout
-  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("holdout"))
-  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("holdout"))
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-  test_valid_resampled_task(polrnr, taskr, "se")
-
-  # loo
-  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("loo"))
-  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("loo"))
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-  test_valid_resampled_task(polrnr, taskr, "se")
-
-  # repeated_cv
-  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("repeated_cv", folds = 2, repeats = 2))
-  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("repeated_cv", folds = 2, repeats = 2))
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-  test_valid_resampled_task(polrnr, taskr, "se")
-
-  # subsampling
-  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rsmp("subsampling", repeats = 2))
-  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rsmp("subsampling", repeats = 2))
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-  test_valid_resampled_task(polrnr, taskr, "se")
-
-  # custom
-  # classif
-  rcm = rsmp("custom")
-  rcm$instantiate(taskc, train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]), test_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]))  # no multiples no missings
-  polrnc = PipeOpLearnerCV$new(LearnerClassifRpart$new(), rcm)
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-
-  rcm$instantiate(taskc, train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]), test_sets = list(taskc$row_ids[1:25], taskc$row_ids[1:50]))  # multiples but no missings
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-
-  rcm$instantiate(taskc, train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]), test_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:45]))  # no multiples but missings
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-  polrnc$learner$predict_type = "response"
-  feature_out = polrnc$train(list(taskc))[[1L]]$data(cols = "classif.rpart.response")[[1L]]
-  expect_true(all(which(is.na(feature_out)) == 46:50))
-  polrnc$learner$predict_type = "prob"
-  features_out = polrnc$train(list(taskc))[[1L]]$data(cols = c("classif.rpart.prob.good", "classif.rpart.prob.bad"))
-  expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
-
-  rcm$instantiate(taskc, train_sets = list(taskc$row_ids[1:25], taskc$row_ids[26:50]), test_sets = list(taskc$row_ids[1:25], taskc$row_ids[20:45]))  # multiples and missings
-  test_valid_resampled_task(polrnc, taskc, "response")
-  test_valid_resampled_task(polrnc, taskc, "prob")
-  polrnc$learner$predict_type = "response"
-  feature_out = polrnc$train(list(taskc))[[1L]]$data(cols = "classif.rpart.response")[[1L]]
-  expect_true(all(which(is.na(feature_out)) == 46:50))
-  polrnc$learner$predict_type = "prob"
-  features_out = polrnc$train(list(taskc))[[1L]]$data(cols = c("classif.rpart.prob.good", "classif.rpart.prob.bad"))
-  expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
-
-  # regr
-  rcm = rsmp("custom")
-  rcm$instantiate(taskr, train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]), test_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]))  # no multiples no missings
-  polrnr = PipeOpLearnerCV$new(mlr3learners::LearnerRegrLM$new(), rcm)
-  test_valid_resampled_task(polrnr, taskr, "se")
-
-  rcm$instantiate(taskr, train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]), test_sets = list(taskr$row_ids[1:25], taskr$row_ids[1:50]))  # multiples but no missings
-  test_valid_resampled_task(polrnr, taskr, "se")
-
-  rcm$instantiate(taskr, train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]), test_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:45]))  # no multiples but missings
-  test_valid_resampled_task(polrnr, taskr, "se")
-  polrnr$learner$predict_type = "se"
-  features_out = polrnr$train(list(taskr))[[1L]]$data(cols = c("regr.lm.response", "regr.lm.se"))
-  expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
-
-  rcm$instantiate(taskr, train_sets = list(taskr$row_ids[1:25], taskr$row_ids[26:50]), test_sets = list(taskr$row_ids[1:25], taskr$row_ids[20:45]))  # multiples and missings
-  test_valid_resampled_task(polrnr, taskr, "se")
-  polrnr$learner$predict_type = "se"
-  features_out = polrnr$train(list(taskr))[[1L]]$data(cols = c("regr.lm.response", "regr.lm.se"))
-  expect_true(all(which(rowSums(is.na(features_out)) == 2L) == 46:50))
-})

From 6431bd95523295567eefbb159db0058c1775e8e2 Mon Sep 17 00:00:00 2001
From: sumny <lennart.sch@web.de>
Date: Thu, 11 Mar 2021 15:16:47 +0100
Subject: [PATCH 8/8] ..

---
 R/PipeOpAggregate.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/PipeOpAggregate.R b/R/PipeOpAggregate.R
index 0f96a10da..e57b057f7 100644
--- a/R/PipeOpAggregate.R
+++ b/R/PipeOpAggregate.R
@@ -134,11 +134,11 @@ mlr_pipeops$add("aggregate", PipeOpAggregate)
 # @param x [list] whatever `aggregation` is being set to
 # checks that `aggregation` is
 # * a named list of `formula`
-# * that each element has only a lhs
+# * that each element has only a rhs
 check_aggregation_formulae = function(x) {
   check_list(x, types = "formula", names = "unique") %check&&%
     Reduce(`%check&&%`, lapply(x, function(xel) {
-      if (length(xel) != 2) {
+      if (length(xel) != 2L) {
         return(sprintf("formula %s must not have a left hand side.",
           deparse(xel, nlines = 1L, width.cutoff = 500L)))
       }