feat: adabag learner from old mlr2 (#440)

AnnaNzrv · sebffischer · be-marc · web-flow · commit df48e60b948a · 2025-10-01T12:01:09.000+02:00
Thank you for contributing a learner to the mlr3 ecosystem.
Please make sure that:

- [x] The added learner(s) are sufficiently tested
- [x] All the CI tests are passing (including the CodeFactor)
- [x] You ran `devtools::document()`
- [x] You updated the `NEWS.md` field to include the addition of the
learner
- [x] You did not modify anything **not** related to the new learner
- [x] You are listed as a contributor in the `DESCRIPTION` of the R
package

---------

Co-authored-by: Sebastian Fischer &lt;sebf.fischer@gmail.com&gt;
Co-authored-by: be-marc &lt;marcbecker@posteo.de&gt;
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -47,6 +47,7 @@ Imports:
 Suggests:
     abess,
     actuar,
+    adabag,
     ada,
     aorsf (>= 0.1.5),
     apcluster,
diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,7 @@ S3method(unmarshal_model,tabpfn_model_marshaled)
 S3method(unmarshal_model,xgboost_cox_model_marshaled)
 export(LearnerClassifAbess)
 export(LearnerClassifAdaBoostM1)
+export(LearnerClassifAdabag)
 export(LearnerClassifAdaBoosting)
 export(LearnerClassifBart)
 export(LearnerClassifBayesNet)
diff --git a/NEWS.md b/NEWS.md
@@ -3,12 +3,14 @@
 ## New Features
 
 * New Learners:
-
+  - `LearnerCompRisksRandomForestSRC`
+  - `LearnerSurvBlockForest`
   - `Learner{Classif,Regr,Surv}BlockForest`
   - `Learner{Classif,Regr}ExhaustiveSearch`
   - `LearnerClassifFastai`
   - `Learner{Classif,Regr}Penalized`
   - `Learner{Classif,Regr}Bst`
+  - `LearnerClassifAdabag`
   - `LearnerClassifAdaBoosting`
   - `Learner{Classif,Regr}Evtree`
   - `LearnerClassifKnn`
@@ -21,11 +23,16 @@
 * Add new `control_custom_fun` parameter in `surv.aorsf`
 * New function `learner_is_runnable()` to check whether the
   required packages to train a learner are available.
+* Added `selected_features` property to RandomForestSRC learners (prediction doesn't work if `vars.used = 'all.trees'`)
 
 ## Bug fixes
 
 * Tests are now skipped when the suggested packages is not available.
   This will make local development much more convenient.
+* Removed parameters from RandomForestSRC learners that weren't used + optimized tests
+* Removed `discrete` parameter from `surv.parametric`, so that it is impossible to return `distr6::VectorDistribution` survival predictions (softly deprecated in `mlr3proba@v0.8.1`)
+
+
 
 ## Breaking Changes
 
diff --git a/R/bibentries.R b/R/bibentries.R
@@ -812,6 +812,17 @@ bibentries = c( # nolint start
     booktitle = "International Conference on Learning Representations 2023",
     year = "2023"
   ),
+  adabag2013 = bibentry("article",
+    title = "adabag: An R Package for Classification with Boosting and Bagging",
+    volume = "54",
+    url = "https://www.jstatsoft.org/index.php/jss/article/view/v054i02",
+    doi = "10.18637/jss.v054.i02",
+    number = "2",
+    journal = "Journal of Statistical Software",
+    author = "Alfaro, Esteban and Gamez, Matias and Garc\xc3\xada, Noelia",
+    year = "2013",
+    pages = "1-35"
+  ),
   park2008plr = bibentry("article",
     title = "Penalized logistic regression for detecting gene interactions",
     author = "Park, Mee Young and Hastie, Trevor",
diff --git a/R/learner_adabag_classif_adabag.R b/R/learner_adabag_classif_adabag.R
@@ -0,0 +1,129 @@
+#' @title Classification Boosting Learner
+#' @author annanzrv
+#' @name mlr_learners_classif.adabag
+#'
+#' @description
+#' Classification boosting algorithm.
+#' Calls [adabag::boosting()] from \CRANpkg{adabag}.
+#'
+#' @section Initial parameter values:
+#' - `xval`:
+#'   * Actual default: 10L
+#'   * Initial value: 0L
+#'   * Reason for change: Set to 0 for speed.
+#'
+#' @references
+#' `r format_bib("adabag2013")`
+#'
+#' @templateVar id classif.adabag
+#' @template learner
+#'
+#'
+#' @template seealso_learner
+#' @template example
+#' @export
+LearnerClassifAdabag = R6Class("LearnerClassifAdabag",
+  inherit = LearnerClassif,
+  public = list(
+    #' @description
+    #' Creates a new instance of this [R6][R6::R6Class] class.
+    initialize = function() {
+      param_set = ps(
+        boos           = p_lgl(default = TRUE, tags = "train"),
+        coeflearn      = p_fct(default = "Breiman", levels = c("Breiman", "Freund", "Zhu"), tags = "train"),
+        cp             = p_dbl(default = 0.01, lower = 0, upper = 1, tags = "train"),
+        maxcompete     = p_int(default = 4L, lower = 0L, tags = "train"),
+        maxdepth       = p_int(default = 30L, lower = 1L, upper = 30L, tags = "train"),
+        maxsurrogate   = p_int(default = 5L, lower = 0L, tags = "train"),
+        mfinal         = p_int(default = 100L, lower = 1L, tags = "train"),
+        minbucket      = p_int(lower = 1L, tags = "train"),
+        minsplit       = p_int(default = 20L, lower = 1L, tags = "train"),
+        newmfinal      = p_int(tags = "predict"),
+        surrogatestyle = p_int(default = 0L, lower = 0L, upper = 1L, tags = "train"),
+        usesurrogate   = p_int(default = 2L, lower = 0L, upper = 2L, tags = "train"),
+        xval           = p_int(default = 0L, lower = 0L, tags = "train")
+      )
+      param_set$values = list(xval = 0L)
+
+      super$initialize(
+        id = "classif.adabag",
+        packages = c("adabag", "rpart"),
+        feature_types = c("integer", "numeric", "factor"),
+        predict_types = c("response", "prob"),
+        param_set = param_set,
+        properties = c("importance", "missings", "multiclass", "twoclass"),
+        man = "mlr3extralearners::mlr_learners_classif.adabag",
+        label = "Adabag Boosting"
+      )
+    },
+    #' @description
+    #' The importance scores are extracted from the model.
+    #' @return Named `numeric()`.
+    importance = function() {
+      if (is.null(self$model)) {
+        stopf("No model stored")
+      }
+      sort(self$model$importance, decreasing = TRUE)
+    }
+  ),
+
+  private = list(
+    .train = function(task) {
+      # get parameters for training
+      pars = self$param_set$get_values(tags = "train")
+
+      args_ctrl = formalArgs(rpart::rpart.control)
+      pars_ctrl = pars[names(pars) %in% args_ctrl]
+
+      # Create rpart control object
+      ctrl = invoke(
+        rpart::rpart.control,
+        .args = pars_ctrl
+      )
+
+      # Remove rpart control parameters from pars
+      pars = pars[names(pars) %nin% args_ctrl]
+
+      # Add control to pars
+      pars$control = ctrl
+
+      # Get formula and data
+      formula = task$formula()
+      data = task$data()
+
+      # Train model
+      invoke(adabag::boosting,
+        formula = formula,
+        data = data,
+        .args = pars
+      )
+    },
+    .predict = function(task) {
+      # get parameters with tag "predict"
+      pars = self$param_set$get_values(tags = "predict")
+
+      # get newdata and ensure same ordering in train and predict
+      newdata = ordered_features(task, self)
+
+      # Calculate predictions for the selected predict type
+      type = self$predict_type
+
+      # adaboost requires target column
+      newdata[, "target"] = factor(rep(1, nrow(newdata)), levels = task$class_names)
+
+      pred = invoke(predict, self$model, newdata = newdata, .args = pars)
+
+      if (type == "prob") {
+        # Ensure probabilities are ordered according to task class levels
+        prob = mlr3misc::set_col_names(pred$prob, task$class_names)
+        list(prob = prob)
+      } else {
+        # Create response factor with correct levels
+        response = factor(pred$class, levels = task$class_names)
+        list(response = response)
+      }
+    }
+  )
+)
+
+.extralrns_dict$add("classif.adabag", LearnerClassifAdabag)
diff --git a/man/mlr_learners_classif.adabag.Rd b/man/mlr_learners_classif.adabag.Rd
diff --git a/tests/testthat/test_adabag_classif_adabag.R b/tests/testthat/test_adabag_classif_adabag.R
@@ -0,0 +1,9 @@
+skip_if_not_installed("adabag")
+
+test_that("autotest", {
+  learner = lrn("classif.adabag")
+  expect_learner(learner)
+  # note that you can skip tests using the exclude argument
+  result = run_autotest(learner, exclude = "utf8_feature_names")
+  expect_true(result, info = result$error)
+})
diff --git a/tests/testthat/test_paramtest_adabag_classif_adabag.R b/tests/testthat/test_paramtest_adabag_classif_adabag.R