diff --git a/autoemulate/calibration/history_matching.py b/autoemulate/calibration/history_matching.py
index 2cca23bdd..05d23e6fa 100644
--- a/autoemulate/calibration/history_matching.py
+++ b/autoemulate/calibration/history_matching.py
@@ -12,10 +12,11 @@
 from autoemulate.core.device import TorchDeviceMixin
 from autoemulate.core.logging_config import get_configured_logger
 from autoemulate.core.plotting import display_figure
+from autoemulate.core.reinitialize import fit_from_reinitialized
 from autoemulate.core.results import Result
 from autoemulate.core.types import DeviceLike, DistributionLike, TensorLike
 from autoemulate.data.utils import set_random_seed
-from autoemulate.emulators import TransformedEmulator, get_emulator_class
+from autoemulate.emulators import Emulator
 from autoemulate.simulations.base import Simulator
 
 logger = logging.getLogger("autoemulate")
@@ -284,13 +285,15 @@ class HistoryMatchingWorkflow(HistoryMatching):
     def __init__(
         self,
         simulator: Simulator,
-        result: Result,
         observations: dict[str, tuple[float, float]] | dict[str, float],
+        emulator: Emulator | None = None,
+        result: Result | None = None,
         threshold: float = 3.0,
         model_discrepancy: float = 0.0,
         rank: int = 1,
         train_x: TensorLike | None = None,
         train_y: TensorLike | None = None,
+        transformed_emulator_params: dict | None = None,
         calibration_params: list[str] | None = None,
         device: DeviceLike | None = None,
         random_seed: int | None = None,
@@ -303,12 +306,17 @@ def __init__(
         ----------
         simulator: Simulator
             A simulator.
-        result: Result
-            A Result object containing the pre-trained emulator and its hyperparameters.
         observations: dict[str, tuple[float, float] | dict[str, float]
             For each output variable, specifies observed [value, noise] (with noise
             specified as variances). In case of no uncertainty in observations, provides
             just the observed value.
+        emulator: Emulator | None
+            An Emulator object containing the pre-trained emulator. If not provided, a
+            Result object must be provided instead. Defaults to None.
+        result: Result | None
+            A Result object containing the pre-trained emulator and its hyperparameters.
+            If not provided, an Emulator object must be provided instead.
+            Defaults to None.
         threshold: float
             Implausibility threshold (query points with implausibility scores that
             exceed this value are ruled out). Defaults to 3, which is considered
@@ -324,6 +332,11 @@ def __init__(
             Optional tensor of input data the emulator was trained on.
         train_y: TensorLike | None
             Optional tensor of output data the emulator was trained on.
+        transformed_emulator_params: dict | None
+            Optional dictionary of parameters for TransformedEmulator. These are
+            already contained in a Result object, so only needed if a
+            TransformedEmulator instance with non-default params is provided.
+            Defaults to None.
         calibration_params: list[str] | None
             Optional subset of parameters to calibrate. These have to correspond to the
             parameters that the emulator was trained on. If None, calibrate all
@@ -342,8 +355,15 @@ def __init__(
             set_random_seed(seed=random_seed)
         self.logger, self.progress_bar = get_configured_logger(log_level)
 
-        self.result = result
-        self.emulator = result.model
+        if result is not None:
+            self.emulator = result.model
+        elif emulator is not None:
+            self.emulator = emulator
+        else:
+            msg = "Either `emulator` or `result` must be provided."
+            raise ValueError(msg)
+
+        self.transformed_emulator_params = transformed_emulator_params or {}
         self.emulator.device = self.device
 
         # New data is simulated in `run()` and appended here
@@ -623,19 +643,16 @@ def refit_emulator(self, x: TensorLike, y: TensorLike) -> None:
         y: TensorLike
             Tensor of output data to refit the emulator on.
         """
-        # Create a fresh model with the same configuration
-        self.emulator = TransformedEmulator(
-            x.float(),
-            y.float(),
-            model=get_emulator_class(self.result.model_name),
-            x_transforms=self.result.x_transforms,
-            y_transforms=self.result.y_transforms,
+        x = x.float().to(self.device)
+        y = y.float().to(self.device)
+        self.emulator = fit_from_reinitialized(
+            x,
+            y,
+            emulator=self.emulator,
+            transformed_emulator_params=self.transformed_emulator_params,
             device=self.device,
-            **self.result.params,
         )
 
-        self.emulator.fit(x, y)
-
     def run(
         self,
         n_simulations: int = 100,
diff --git a/autoemulate/core/compare.py b/autoemulate/core/compare.py
index 5c2fb3e38..9be163b31 100644
--- a/autoemulate/core/compare.py
+++ b/autoemulate/core/compare.py
@@ -19,6 +19,7 @@
     display_figure,
     plot_xy,
 )
+from autoemulate.core.reinitialize import fit_from_reinitialized
 from autoemulate.core.results import Result, Results
 from autoemulate.core.save import ModelSerialiser
 from autoemulate.core.tuner import Tuner
@@ -566,34 +567,21 @@ def fit_from_reinitialized(
         # Get the result to use
         result = self.best_result() if result_id is None else self.get_result(result_id)
 
-        # Set the random seed for initialization
-        if random_seed is not None:
-            set_random_seed(seed=random_seed)
-
         # Convert and move the new data to device
         x_tensor, y_tensor = self._convert_to_tensors(x, y)
         x_tensor, y_tensor = self._move_tensors_to_device(x_tensor, y_tensor)
 
-        # Get the model class from the model name
-        model_class = get_emulator_class(result.model_name)
-
-        # Create a fresh model with the same configuration
-        fresh_model = TransformedEmulator(
+        # NOTE: function passes data to the Emulator model which handles conversion to
+        # tensors and device handling
+        return fit_from_reinitialized(
             x_tensor,
             y_tensor,
-            model=model_class,
-            x_transforms=result.x_transforms,
-            y_transforms=result.y_transforms,
+            emulator=result.model,
+            transformed_emulator_params=transformed_emulator_params,
             device=self.device,
-            **result.params,
-            **transformed_emulator_params,
+            random_seed=random_seed,
         )
 
-        # Fit the fresh model on the new data
-        fresh_model.fit(x_tensor, y_tensor)
-
-        return fresh_model
-
     def plot(  # noqa: PLR0912, PLR0915
         self,
         model_obj: int | Emulator | Result,
diff --git a/autoemulate/core/reinitialize.py b/autoemulate/core/reinitialize.py
new file mode 100644
index 000000000..571ab50dc
--- /dev/null
+++ b/autoemulate/core/reinitialize.py
@@ -0,0 +1,99 @@
+import inspect
+
+from autoemulate.core.types import DeviceLike, TensorLike
+from autoemulate.data.utils import set_random_seed
+from autoemulate.emulators import Emulator, TransformedEmulator, get_emulator_class
+
+
+def fit_from_reinitialized(
+    x: TensorLike,
+    y: TensorLike,
+    emulator: Emulator,
+    transformed_emulator_params: dict | None = None,
+    device: DeviceLike | None = None,
+    random_seed: int | None = None,
+):
+    """
+    Fit a fresh model with reinitialized parameters using the best configuration.
+
+    This method creates a new model instance with the same configuration as the
+    best (or specified) model from the comparison, but with freshly initialized
+    parameters fitted on the provided data.
+
+    Parameters
+    ----------
+    x: TensorLike
+        Input features for training the fresh model.
+    y: TensorLike
+        Target values for training the fresh model.
+    emulator: Emulator
+        An Emulator object containing the pre-trained emulator.
+    transformed_emulator_params: None | TransformedEmulatorParams
+        Parameters for the transformed emulator. When None, the same parameters as
+        used when identifying the best model are used. Defaults to None.
+    device: str | None
+        Device to use for model fitting (e.g., 'cpu' or 'cuda'). If None, the default
+        device is used. Defaults to None.
+    random_seed: int | None
+        Random seed for parameter initialization. Defaults to None.
+
+    Returns
+    -------
+    TransformedEmulator
+        A new model instance with the same configuration but fresh parameters
+        fitted on the provided data.
+
+    Notes
+    -----
+    Unlike TransformedEmulator.refit() which retrains an existing model,
+    this method creates a completely new model instance with reinitialized
+    parameters. This ensures that when fitting on new data that the same
+    initialization conditions are applied. This can have an affect for example
+    given kernel initialization in Gaussian Processes or weight initialization in
+    neural networks.
+    """
+    if random_seed is not None:
+        set_random_seed(seed=random_seed)
+
+    # Extract emulator and its parameters from Emulator instance
+    if isinstance(emulator, TransformedEmulator):
+        model = emulator.model
+        emulator_name = emulator.untransformed_model_name
+        x_transforms = emulator.x_transforms
+        y_transforms = emulator.y_transforms
+    else:
+        model = emulator
+        emulator_name = emulator.model_name()
+        x_transforms = None
+        y_transforms = None
+
+    # Extract parameters from the provided emulator instance
+    model_cls = get_emulator_class(emulator_name)
+    init_sig = inspect.signature(model_cls.__init__)
+    emulator_params = {}
+    for param_name in init_sig.parameters:
+        if param_name in ["self", "x", "y", "device"]:
+            continue
+        # NOTE: some emulators have standardize_x/y params option
+        # this is different to TransformedEmulator x/y transforms
+        if param_name == "standardize_x":
+            emulator_params["standardize_x"] = bool(model.x_transform)
+        if param_name == "standardize_y":
+            emulator_params["standardize_y"] = bool(model.y_transform)
+        if hasattr(model, param_name):
+            emulator_params[param_name] = getattr(model, param_name)
+
+    transformed_emulator_params = transformed_emulator_params or {}
+
+    new_emulator = TransformedEmulator(
+        x.float(),
+        y.float(),
+        model=model_cls,
+        x_transforms=x_transforms,
+        y_transforms=y_transforms,
+        device=device,
+        **emulator_params,
+    )
+
+    new_emulator.fit(x.float(), y.float())
+    return new_emulator
diff --git a/autoemulate/emulators/base.py b/autoemulate/emulators/base.py
index c352f4eef..f38711b40 100644
--- a/autoemulate/emulators/base.py
+++ b/autoemulate/emulators/base.py
@@ -43,24 +43,22 @@ def _fit(self, x: TensorLike, y: TensorLike): ...
 
     def fit(self, x: TensorLike, y: TensorLike):
         """Fit the emulator to the provided data."""
-        if isinstance(x, TensorLike) and isinstance(y, TensorLike):
-            self._check(x, y)
-            # Ensure x and y are tensors and 2D
-            x, y = self._convert_to_tensors(x, y)
-
-            # Move to device
-            x, y = self._move_tensors_to_device(x, y)
-
-            # Fit transforms
-            if self.x_transform is not None:
-                self.x_transform.fit(x)
-            if self.y_transform is not None:
-                self.y_transform.fit(y)
-            x = self.x_transform(x) if self.x_transform is not None else x
-            y = self.y_transform(y) if self.y_transform is not None else y
-
-            # Fit emulator
-            self._fit(x, y)
+        # Ensure x and y are tensors and 2D
+        x, y = self._convert_to_tensors(x, y)
+
+        # Move to device
+        x, y = self._move_tensors_to_device(x, y)
+
+        # Fit transforms
+        if self.x_transform is not None:
+            self.x_transform.fit(x)
+        if self.y_transform is not None:
+            self.y_transform.fit(y)
+        x = self.x_transform(x) if self.x_transform is not None else x
+        y = self.y_transform(y) if self.y_transform is not None else y
+
+        # Fit emulator
+        self._fit(x, y)
         self.is_fitted_ = True
 
     @abstractmethod
@@ -280,7 +278,7 @@ def get_random_params(cls):
         }
 
     @classmethod
-    def scheduler_params(cls) -> dict:
+    def get_scheduler_params(cls) -> dict:
         """
         Return a random parameters for the learning rate scheduler.
 
@@ -290,37 +288,37 @@ def scheduler_params(cls) -> dict:
         all_params = [
             {
                 "scheduler_cls": [None],
-                "scheduler_kwargs": [{}],
+                "scheduler_params": [{}],
             },
             {
                 "scheduler_cls": [ExponentialLR],
-                "scheduler_kwargs": [
+                "scheduler_params": [
                     {"gamma": 0.9},
                     {"gamma": 0.95},
                 ],
             },
             {
                 "scheduler_cls": [LRScheduler],
-                "scheduler_kwargs": [
+                "scheduler_params": [
                     {"policy": "ReduceLROnPlateau", "patience": 5, "factor": 0.5}
                 ],
             },
             # TODO: investigate these suggestions from copilot, issue: #597
             # {
             #     "scheduler_cls": [CosineAnnealingLR],
-            #     "scheduler_kwargs": [{"T_max": 10, "eta_min": 0.01}],
+            #     "scheduler_params": [{"T_max": 10, "eta_min": 0.01}],
             # },
             # {
             #     "scheduler_cls": [ReduceLROnPlateau],
-            #     "scheduler_kwargs": [{"mode": "min", "factor": 0.1, "patience": 5}],
+            #     "scheduler_params": [{"mode": "min", "factor": 0.1, "patience": 5}],
             # },
             # {
             #     "scheduler_cls": [StepLR],
-            #     "scheduler_kwargs": [{"step_size": 10, "gamma": 0.1}],
+            #     "scheduler_params": [{"step_size": 10, "gamma": 0.1}],
             # },
             # {
             #     "scheduler_cls": [CyclicLR],
-            #     "scheduler_kwargs": [{
+            #     "scheduler_params": [{
             #         "base_lr": 1e-3,
             #         "max_lr": 1e-1,
             #         "step_size_up": 5,
@@ -329,7 +327,7 @@ def scheduler_params(cls) -> dict:
             # },
             # {
             #     "scheduler_cls": [OneCycleLR],
-            #     "scheduler_kwargs": [{
+            #     "scheduler_params": [{
             #         "max_lr": 1e-1,
             #         "total_steps": self.epochs,
             #         "pct_start": 0.3,
@@ -340,40 +338,28 @@ def scheduler_params(cls) -> dict:
         # Randomly select one of the parameter sets
         return random.choice(all_params)
 
-    def scheduler_setup(self, kwargs: dict | None = None):
+    def scheduler_setup(self, scheduler_params: dict | None = None):
         """
         Set up the learning rate scheduler for the emulator.
 
         Parameters
         ----------
-        kwargs : dict | None
-            Keyword arguments for the model. This should include scheduler_kwargs.
+        scheduler_params : dict | None
+            Keyword arguments for the scheduler.
         """
-        if kwargs is None:
-            msg = (
-                "Provide a kwargs dictionary including "
-                "scheduler_kwargs to set up the scheduler."
-            )
+        if scheduler_params is None:
+            msg = "Provide scheduler_params to set up the scheduler."
             raise ValueError(msg)
 
         if not hasattr(self, "optimizer"):
             msg = "Optimizer must be set before setting up the scheduler."
             raise RuntimeError(msg)
 
-        # Extract scheduler-specific kwargs if present
-        try:
-            assert type(kwargs) is dict
-            scheduler_kwargs = kwargs.pop("scheduler_kwargs", {})
-        except AttributeError:
-            # If kwargs does not contain scheduler_kwargs, throw an error
-            msg = "No kwargs for scheduler setup detected."
-            raise ValueError(msg) from None
-
         # Set up the scheduler if a scheduler class is defined
         if self.scheduler_cls is None:
             self.scheduler = None
         else:
-            self.scheduler = self.scheduler_cls(self.optimizer, **scheduler_kwargs)  # type: ignore[call-arg]
+            self.scheduler = self.scheduler_cls(self.optimizer, **scheduler_params)  # type: ignore[call-arg]
 
 
 class DeterministicEmulator(Emulator):
diff --git a/autoemulate/emulators/ensemble.py b/autoemulate/emulators/ensemble.py
index 555595f2f..79dad8703 100644
--- a/autoemulate/emulators/ensemble.py
+++ b/autoemulate/emulators/ensemble.py
@@ -4,17 +4,8 @@
 from torch import Tensor
 
 from autoemulate.core.device import TorchDeviceMixin
-from autoemulate.core.types import (
-    DeviceLike,
-    GaussianLike,
-    TensorLike,
-    TuneParams,
-)
-from autoemulate.emulators.base import (
-    DropoutTorchBackend,
-    Emulator,
-    GaussianEmulator,
-)
+from autoemulate.core.types import DeviceLike, GaussianLike, TensorLike, TuneParams
+from autoemulate.emulators.base import DropoutTorchBackend, Emulator, GaussianEmulator
 from autoemulate.emulators.nn.mlp import MLP
 from autoemulate.transforms.standardize import StandardizeTransform
 from autoemulate.transforms.utils import make_positive_definite
@@ -148,7 +139,7 @@ def __init__(
         standardize_y: bool = True,
         n_emulators: int = 4,
         device: DeviceLike | None = None,
-        **mlp_kwargs,
+        mlp_kwargs: dict | None = None,
     ):
         """
         Initialize an ensemble of MLPs.
@@ -167,9 +158,10 @@ def __init__(
             Number of MLP emulators to create in the ensemble. Defaults to 4.
         device: DeviceLike | None
             Device to run the model on (e.g., "cpu", "cuda"). Defaults to None.
-        **mlp_kwargs: dict
+        mlp_kwargs: dict | None
             Additional keyword arguments for the MLP constructor.
         """
+        self.mlp_kwargs = mlp_kwargs or {}
         emulators = [
             MLP(
                 x,
@@ -177,7 +169,7 @@ def __init__(
                 standardize_x=standardize_x,
                 standardize_y=standardize_y,
                 device=device,
-                **mlp_kwargs,
+                **self.mlp_kwargs,
             )
             for i in range(n_emulators)
         ]
@@ -305,7 +297,7 @@ def __init__(
         standardize_y: bool = True,
         dropout_prob: float = 0.2,
         device: DeviceLike | None = None,
-        **mlp_kwargs,
+        mlp_kwargs: dict | None = None,
     ):
         """
         Initialize an ensemble of MLPs with dropout.
@@ -324,9 +316,10 @@ def __init__(
             Dropout probability to use in the MLP layers. Defaults to 0.2.
         device: DeviceLike | None
             Device to run the model on (e.g., "cpu", "cuda"). Defaults to None.
-        **mlp_kwargs: dict
+        mlp_kwargs: dict | None
             Additional keyword arguments for the MLP constructor.
         """
+        self.mlp_kwargs = mlp_kwargs or {}
         super().__init__(
             MLP(
                 x,
@@ -335,7 +328,7 @@ def __init__(
                 standardize_y=standardize_y,
                 dropout_prob=dropout_prob,
                 device=device,
-                **mlp_kwargs,
+                **self.mlp_kwargs,
             ),
             device=device,
         )
diff --git a/autoemulate/emulators/gaussian_process/exact.py b/autoemulate/emulators/gaussian_process/exact.py
index f4d639a7c..cbc00f043 100644
--- a/autoemulate/emulators/gaussian_process/exact.py
+++ b/autoemulate/emulators/gaussian_process/exact.py
@@ -70,7 +70,8 @@ def __init__(
         lr: float = 2e-1,
         early_stopping: EarlyStopping | None = None,
         device: DeviceLike | None = None,
-        **scheduler_kwargs,
+        scheduler_cls: type[LRScheduler] | None = None,
+        scheduler_params: dict | None = None,
     ):
         """
         Initialize the GaussianProcess emulator.
@@ -108,7 +109,10 @@ def __init__(
         device: DeviceLike | None
             Device to run the model on. If None, uses the default device (usually CPU or
             GPU). Defaults to None.
-        scheduler_kwargs: dict
+        scheduler_cls: type[LRScheduler] | None
+            Learning rate scheduler class. If None, no scheduler is used. Defaults to
+            None.
+        scheduler_params: dict | None
             Additional keyword arguments for the learning rate scheduler.
         """
         # Init device
@@ -123,8 +127,10 @@ def __init__(
         num_tasks_torch = torch.Size([num_tasks])
 
         # Initialize the mean and covariance modules
-        mean_module = mean_module_fn(n_features, num_tasks_torch)
-        covar_module = covar_module_fn(n_features, num_tasks_torch)
+        self.mean_module_fn = mean_module_fn
+        self.covar_module_fn = covar_module_fn
+        mean_module = self.mean_module_fn(n_features, num_tasks_torch)
+        covar_module = self.covar_module_fn(n_features, num_tasks_torch)
 
         # If the combined kernel is not a ScaleKernel, wrap it in one
         covar_module = (
@@ -134,7 +140,8 @@ def __init__(
         )
 
         # Init likelihood
-        likelihood = likelihood_cls(num_tasks=num_tasks)
+        self.likelihood_cls = likelihood_cls
+        likelihood = self.likelihood_cls(num_tasks=num_tasks)
         likelihood = likelihood.to(self.device)
 
         # Init must be called with preprocessed data
@@ -149,15 +156,19 @@ def __init__(
         self.epochs = epochs
         self.lr = lr
         self.optimizer = self.optimizer_cls(self.parameters(), lr=self.lr)  # type: ignore[call-arg] since all optimizers include lr
-        self.scheduler_setup(scheduler_kwargs)
+        self.scheduler_cls = scheduler_cls
+        self.scheduler_params = scheduler_params or {}
+        self.scheduler_setup(self.scheduler_params)
         self.early_stopping = early_stopping
         self.posterior_predictive = posterior_predictive
         self.num_tasks = num_tasks
         self.to(self.device)
 
         # Fix mean and kernel if required
-        self._fix_module_params(self.mean_module, fixed_mean_params)
-        self._fix_module_params(self.covar_module, fixed_covar_params)
+        self.fixed_mean_params = fixed_mean_params
+        self.fixed_covar_params = fixed_covar_params
+        self._fix_module_params(self.mean_module, self.fixed_mean_params)
+        self._fix_module_params(self.covar_module, self.fixed_covar_params)
 
     @staticmethod
     def _fix_module_params(module: nn.Module, fixed_params: bool):
@@ -293,7 +304,7 @@ def _predict(self, x: TensorLike, with_grad: bool):
     @staticmethod
     def get_tune_params():
         """Return the hyperparameters to tune for the Gaussian Process model."""
-        scheduler_params = GaussianProcess.scheduler_params()
+        scheduler_specs = GaussianProcess.get_scheduler_params()
         return {
             "mean_module_fn": [
                 constant_mean,
@@ -314,8 +325,8 @@ def get_tune_params():
             "epochs": [50, 100, 200],
             "lr": [5e-1, 1e-1, 5e-2, 1e-2],
             "likelihood_cls": [MultitaskGaussianLikelihood],
-            "scheduler_cls": scheduler_params["scheduler_cls"],
-            "scheduler_kwargs": scheduler_params["scheduler_kwargs"],
+            "scheduler_cls": scheduler_specs["scheduler_cls"],
+            "scheduler_params": scheduler_specs["scheduler_params"],
         }
 
 
@@ -349,7 +360,8 @@ def __init__(
         early_stopping: EarlyStopping | None = None,
         seed: int | None = None,
         device: DeviceLike | None = None,
-        **scheduler_kwargs,
+        scheduler_cls: type[LRScheduler] | None = None,
+        scheduler_params: dict | None = None,
     ):
         """
         Initialize the GaussianProcessCorrelated emulator.
@@ -391,7 +403,9 @@ def __init__(
         device: DeviceLike | None
             Device to run the model on. If None, uses the default device (usually CPU or
             GPU). Defaults to None.
-        scheduler_kwargs: dict
+        scheduler_cls: type[LRScheduler] | None
+            Learning rate scheduler class. If None, no scheduler is used. Defaults to
+        scheduler_params: dict
             Additional keyword arguments for the learning rate scheduler.
         """
         # Init device
@@ -405,8 +419,10 @@ def __init__(
 
         # Initialize the mean and covariance modules
         n_features = tuple(x.shape)[1]
-        mean_module = mean_module_fn(n_features, None)
-        covar_module = covar_module_fn(n_features, None)
+        self.mean_module_fn = mean_module_fn
+        self.covar_module_fn = covar_module_fn
+        mean_module = self.mean_module_fn(n_features, None)
+        covar_module = self.covar_module_fn(n_features, None)
 
         # Mean and covariance modules for multitask
         num_tasks = tuple(y.shape)[1]
@@ -424,8 +440,10 @@ def __init__(
         likelihood = likelihood.to(self.device)
 
         # Fix mean and kernel if required
-        self._fix_module_params(mean_module, fixed_mean_params)
-        self._fix_module_params(covar_module, fixed_covar_params)
+        self.fixed_mean_params = fixed_mean_params
+        self.fixed_covar_params = fixed_covar_params
+        self._fix_module_params(mean_module, self.fixed_mean_params)
+        self._fix_module_params(covar_module, self.fixed_covar_params)
 
         # Init must be called with preprocessed data
         gpytorch.models.ExactGP.__init__(
@@ -442,7 +460,9 @@ def __init__(
         self.epochs = epochs
         self.lr = lr
         self.optimizer = self.optimizer_cls(self.parameters(), lr=self.lr)  # type: ignore[call-arg] since all optimizers include lr
-        self.scheduler_setup(scheduler_kwargs)
+        self.scheduler_cls = scheduler_cls
+        self.scheduler_params = scheduler_params or {}
+        self.scheduler_setup(self.scheduler_params)
         self.early_stopping = early_stopping
         self.posterior_predictive = posterior_predictive
         self.num_tasks = num_tasks
@@ -536,7 +556,7 @@ def __init__(
             lr: float = lr,
             early_stopping: EarlyStopping | None = early_stopping,
             device: DeviceLike | None = device,
-            **scheduler_kwargs,
+            **scheduler_params,
         ):
             super().__init__(
                 x,
@@ -553,7 +573,7 @@ def __init__(
                 lr,
                 early_stopping,
                 device,
-                **scheduler_kwargs,
+                **scheduler_params,
             )
 
         @staticmethod
diff --git a/autoemulate/emulators/nn/mlp.py b/autoemulate/emulators/nn/mlp.py
index a1fa9d5fe..6c493e3e0 100644
--- a/autoemulate/emulators/nn/mlp.py
+++ b/autoemulate/emulators/nn/mlp.py
@@ -1,4 +1,5 @@
 from torch import nn
+from torch.optim.lr_scheduler import LRScheduler
 
 from autoemulate.core.device import TorchDeviceMixin
 from autoemulate.core.types import DeviceLike, TensorLike
@@ -34,7 +35,8 @@ def __init__(
         lr: float = 1e-2,
         random_seed: int | None = None,
         device: DeviceLike | None = None,
-        **scheduler_kwargs,
+        scheduler_cls: type[LRScheduler] | None = None,
+        scheduler_params: dict | None = None,
     ):
         """
         Multi-Layer Perceptron (MLP) emulator.
@@ -77,7 +79,10 @@ def __init__(
             Random seed for reproducibility. If None, no seed is set. Defaults to None.
         device: DeviceLike | None
             Device to run the model on (e.g., "cpu", "cuda", "mps"). Defaults to None.
-        **scheduler_kwargs: dict
+        scheduler_cls: type[LRScheduler] | None
+            Learning rate scheduler class. If None, no scheduler is used. Defaults to
+            None.
+        scheduler_params: dict | None
             Additional keyword arguments related to the scheduler.
 
         Raises
@@ -95,29 +100,40 @@ def __init__(
         x, y = self._convert_to_tensors(x, y)
 
         # Construct the MLP layers
-        layer_dims = [x.shape[1], *layer_dims] if layer_dims else [x.shape[1], 32, 16]
+        self.layer_dims = (
+            [x.shape[1], *layer_dims] if layer_dims else [x.shape[1], 32, 16]
+        )
+        self.dropout_prob = dropout_prob
+        self.activation_cls = activation_cls
+
         layers = []
-        for idx, dim in enumerate(layer_dims[1:]):
-            layers.append(nn.Linear(layer_dims[idx], dim, device=self.device))
-            layers.append(activation_cls())
-            if dropout_prob is not None:
-                layers.append(nn.Dropout(p=dropout_prob))
+        for idx, dim in enumerate(self.layer_dims[1:]):
+            layers.append(nn.Linear(self.layer_dims[idx], dim, device=self.device))
+            layers.append(self.activation_cls())
+            if self.dropout_prob is not None:
+                layers.append(nn.Dropout(p=self.dropout_prob))
 
         # Add final layer without activation
         num_tasks = y.shape[1]
-        layers.append(nn.Linear(layer_dims[-1], num_tasks, device=self.device))
+        layers.append(nn.Linear(self.layer_dims[-1], num_tasks, device=self.device))
         self.nn = nn.Sequential(*layers)
 
         # Finalize initialization
-        self._initialize_weights(weight_init, scale, bias_init)
+        self.weight_init = weight_init
+        self.scale = scale
+        self.bias_init = bias_init
+        self._initialize_weights(self.weight_init, self.scale, self.bias_init)
         self.x_transform = StandardizeTransform() if standardize_x else None
         self.y_transform = StandardizeTransform() if standardize_y else None
         self.epochs = epochs
+        self.loss_fn_cls = loss_fn_cls
         self.loss_fn = loss_fn_cls()
         self.lr = lr
         self.batch_size = batch_size
         self.optimizer = self.optimizer_cls(self.nn.parameters(), lr=self.lr)  # type: ignore[call-arg] since all optimizers include lr
-        self.scheduler_setup(scheduler_kwargs)
+        self.scheduler_cls = scheduler_cls
+        self.scheduler_params = scheduler_params or {}
+        self.scheduler_setup(self.scheduler_params)
         self.to(self.device)
 
     def forward(self, x):
@@ -132,7 +148,7 @@ def is_multioutput() -> bool:
     @staticmethod
     def get_tune_params():
         """Return a dictionary of hyperparameters to tune."""
-        scheduler_params = MLP.scheduler_params()
+        scheduler_specs = MLP.get_scheduler_params()
         return {
             "epochs": [100, 200],
             "layer_dims": [[8, 4], [16, 8], [32, 16], [64, 32, 16]],
@@ -142,6 +158,6 @@ def get_tune_params():
             "scale": [0.1, 1.0],
             "bias_init": ["default", "zeros"],
             "dropout_prob": [0.3, None],
-            "scheduler_cls": scheduler_params["scheduler_cls"],
-            "scheduler_kwargs": scheduler_params["scheduler_kwargs"],
+            "scheduler_cls": scheduler_specs["scheduler_cls"],
+            "scheduler_params": scheduler_specs["scheduler_params"],
         }
diff --git a/autoemulate/emulators/polynomials.py b/autoemulate/emulators/polynomials.py
index dd11d2210..84f7ea3bf 100644
--- a/autoemulate/emulators/polynomials.py
+++ b/autoemulate/emulators/polynomials.py
@@ -1,5 +1,6 @@
 import torch
 from torch import nn
+from torch.optim.lr_scheduler import LRScheduler
 
 from autoemulate.core.device import TorchDeviceMixin
 from autoemulate.core.types import DeviceLike, TensorLike
@@ -29,7 +30,8 @@ def __init__(
         batch_size: int = 16,
         random_seed: int | None = None,
         device: DeviceLike | None = None,
-        **kwargs,
+        scheduler_cls: type[LRScheduler] | None = None,
+        scheduler_params: dict | None = None,
     ):
         """Initialize a PolynomialRegression emulator.
 
@@ -56,8 +58,11 @@ def __init__(
         device: DeviceLike | None
             Device to run the model on. If None, uses the default device. Defaults to
             None.
-        **kwargs: dict
-            Additional keyword arguments.
+        scheduler_cls: type[LRScheduler] | None
+            Learning rate scheduler class. If None, no scheduler is used. Defaults to
+            None.
+        scheduler_params: dict | None
+            Additional keyword arguments related to the scheduler.
         """
         super().__init__()
         TorchDeviceMixin.__init__(self, device=device)
@@ -80,7 +85,9 @@ def __init__(
             self.poly.n_output_features, self.n_outputs, bias=False
         ).to(self.device)
         self.optimizer = self.optimizer_cls(self.linear.parameters(), lr=self.lr)  # type: ignore[call-arg] since all optimizers include lr
-        self.scheduler_setup(kwargs)
+        self.scheduler_cls = scheduler_cls
+        self.scheduler_params = scheduler_params or {}
+        self.scheduler_setup(self.scheduler_params)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Forward pass through for polynomial regression."""
@@ -96,11 +103,11 @@ def is_multioutput() -> bool:
     @staticmethod
     def get_tune_params():
         """Return a dictionary of hyperparameters to tune."""
-        scheduler_params = PolynomialRegression.scheduler_params()
+        scheduler_specs = PolynomialRegression.get_scheduler_params()
         return {
             "lr": [1e-3, 1e-2, 1e-1, 2e-1],
             "epochs": [50, 100, 200, 500, 1000],
             "batch_size": [8, 16, 32],
-            "scheduler_cls": scheduler_params["scheduler_cls"],
-            "scheduler_kwargs": scheduler_params["scheduler_kwargs"],
+            "scheduler_cls": scheduler_specs["scheduler_cls"],
+            "scheduler_params": scheduler_specs["scheduler_params"],
         }
diff --git a/autoemulate/emulators/random_forest.py b/autoemulate/emulators/random_forest.py
index 09e03555c..28cdc4d59 100644
--- a/autoemulate/emulators/random_forest.py
+++ b/autoemulate/emulators/random_forest.py
@@ -35,7 +35,6 @@ def __init__(
         max_samples: int | None = None,
         random_seed: int | None = None,
         device: DeviceLike = "cpu",
-        **kwargs,
     ):
         """Initialize a RandomForest emulator.
 
diff --git a/autoemulate/emulators/transformed/base.py b/autoemulate/emulators/transformed/base.py
index f626d124f..2665c5f2b 100644
--- a/autoemulate/emulators/transformed/base.py
+++ b/autoemulate/emulators/transformed/base.py
@@ -11,19 +11,16 @@
     OutputLike,
     TensorLike,
 )
-from autoemulate.data.utils import ValidationMixin
+from autoemulate.data.utils import ConversionMixin, ValidationMixin
 from autoemulate.emulators.base import Emulator
 from autoemulate.emulators.transformed.delta_method import (
     delta_method,
     delta_method_mean_only,
 )
-from autoemulate.transforms.base import (
-    AutoEmulateTransform,
-    is_affine,
-)
+from autoemulate.transforms.base import AutoEmulateTransform, is_affine
 
 
-class TransformedEmulator(Emulator, ValidationMixin):
+class TransformedEmulator(Emulator, ValidationMixin, ConversionMixin):
     """
     A transformed emulator that applies transformations to input and target data.
 
@@ -128,10 +125,18 @@ def __init__(
         """
         self.x_transforms = x_transforms or []
         self.y_transforms = y_transforms or []
+
+        # Convert and move the new data to device
+        TorchDeviceMixin.__init__(self, device=device)
+        x, y = self._move_tensors_to_device(x, y)
+
         self._fit_transforms(x, y)
         self.untransformed_model_name = model.model_name()
         self.model = model(
-            self._transform_x(x), self._transform_y_tensor(y), device=device, **kwargs
+            self._transform_x(x),
+            self._transform_y_tensor(y),
+            device=device,
+            **kwargs,
         )
         # Cache for constant Jacobian of inverse y-transform when affine
         self._fixed_jacobian_y_inv = None
@@ -148,7 +153,7 @@ def __init__(
             raise RuntimeError(msg)
         self.n_samples = n_samples
         self.full_covariance = full_covariance
-        TorchDeviceMixin.__init__(self, device=device)
+
         # TODO: add API to indicate that pdf not valid when not all transforms bijective
         self.supports_grad = self.model.supports_grad
         self.supports_uq = self.model.supports_uq
@@ -406,9 +411,11 @@ def predict_mean(
             out = delta_method_mean_only(
                 ComposeTransform(self.y_transforms).inv,
                 y_t_pred.mean,
-                y_t_pred.covariance_matrix
-                if isinstance(y_t_pred, GaussianLike)
-                else y_t_pred.variance,
+                (
+                    y_t_pred.covariance_matrix
+                    if isinstance(y_t_pred, GaussianLike)
+                    else y_t_pred.variance
+                ),
                 True,
             )
             return out["mean_total"].detach() if not with_grad else out["mean_total"]
diff --git a/autoemulate/learners/base.py b/autoemulate/learners/base.py
index 48af6a9dc..db1359661 100644
--- a/autoemulate/learners/base.py
+++ b/autoemulate/learners/base.py
@@ -4,15 +4,15 @@
 
 import torch
 from anytree import Node, RenderTree
-from torch.distributions import MultivariateNormal
 from torcheval.metrics import MeanSquaredError, R2Score
 
 from autoemulate.core.logging_config import get_configured_logger
+from autoemulate.core.reinitialize import fit_from_reinitialized
 from autoemulate.data.utils import ValidationMixin
 from autoemulate.emulators.base import Emulator
 from autoemulate.simulations.base import Simulator
 
-from ..core.types import GaussianLike, TensorLike
+from ..core.types import DistributionLike, TensorLike
 
 
 @dataclass(kw_only=True)
@@ -40,6 +40,7 @@ class Learner(ValidationMixin, ABC):
     x_train: TensorLike
     y_train: TensorLike
     log_level: str = "progress_bar"
+    fit_from_reinitialized: bool = True
     in_dim: int = field(init=False)
     out_dim: int = field(init=False)
 
@@ -48,7 +49,15 @@ def __post_init__(self):
         log_level = getattr(self, "log_level", "progress_bar")
         self.logger, self.progress_bar = get_configured_logger(log_level)
         self.logger.info("Initializing Learner with training data.")
-        self.emulator.fit(self.x_train, self.y_train)
+        if self.fit_from_reinitialized:
+            self.emulator = fit_from_reinitialized(
+                self.x_train,
+                self.y_train,
+                emulator=self.emulator,
+                device=self.emulator.device,
+            )
+        else:
+            self.emulator.fit(self.x_train, self.y_train)
         self.logger.info("Emulator fitted with initial training data.")
         self.in_dim = self.x_train.shape[1]
         self.out_dim = self.y_train.shape[1]
@@ -140,11 +149,9 @@ def fit(self, *args):
         x, output, extra = self.query(*args)
         if isinstance(output, TensorLike):
             y_pred = output
-        elif isinstance(output, GaussianLike):
+        elif isinstance(output, DistributionLike):
             assert output.variance.ndim == 2
-            y_pred, _ = output.mean, output.variance
-        elif isinstance(output, GaussianLike):
-            y_pred, _ = output.loc, None
+            y_pred = output.mean
         else:
             msg = (
                 f"Output must be either `Tensor` or `MultivariateNormal` but got "
@@ -159,7 +166,15 @@ def fit(self, *args):
             assert isinstance(y_true, TensorLike)
             self.x_train = torch.cat([self.x_train, x])
             self.y_train = torch.cat([self.y_train, y_true])
-            self.emulator.fit(self.x_train, self.y_train)
+            if self.fit_from_reinitialized:
+                self.emulator = fit_from_reinitialized(
+                    self.x_train,
+                    self.y_train,
+                    emulator=self.emulator,
+                    device=self.emulator.device,
+                )
+            else:
+                self.emulator.fit(self.x_train, self.y_train)
             self.mse.update(y_pred, y_true)
             self.r2.update(y_pred, y_true)
             self.n_queries += 1
@@ -179,9 +194,16 @@ def fit(self, *args):
         self.metrics["n_queries"].append(self.n_queries)
         self.logger.info("Metrics updated: MSE=%s, R2=%s", mse_val, r2_val)
 
-        # If Gaussian output
+        # If distribution output
         # TODO: check generality for other GPs (e.g. with full covariance)
-        if isinstance(output, MultivariateNormal):
+        if isinstance(output, DistributionLike):
+            if not hasattr(output, "variance"):
+                msg = (
+                    f"Output of type {type(output)} does not have a 'variance'"
+                    "property. This may occur if output is a PyTorch "
+                    "TransformedDistribution."
+                )
+                raise AttributeError(msg)
             assert isinstance(output.variance, TensorLike)
             assert output.variance.ndim == 2
             assert output.variance.shape[1] == self.out_dim
@@ -257,7 +279,11 @@ def summary(self):
     @abstractmethod
     def query(
         self, x: TensorLike | None = None
-    ) -> tuple[TensorLike | None, TensorLike | GaussianLike, dict[str, float]]:
+    ) -> tuple[
+        TensorLike | None,
+        TensorLike | DistributionLike,
+        dict[str, float],
+    ]:
         """
         Abstract method to query new samples.
 
@@ -268,7 +294,7 @@ def query(
 
         Returns
         -------
-        tuple[TensorLike or None, TensorLike, TensorLike, Dict[str, list[Any]]]
+        tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]]
             A tuple containing:
             - The queried samples (or None if no query is made),
             - The predicted outputs,
diff --git a/autoemulate/learners/stream.py b/autoemulate/learners/stream.py
index a939d62bd..b18cad72c 100644
--- a/autoemulate/learners/stream.py
+++ b/autoemulate/learners/stream.py
@@ -7,7 +7,7 @@
 
 from autoemulate.data.utils import set_random_seed
 
-from ..core.types import GaussianLike, TensorLike
+from ..core.types import DistributionLike, TensorLike
 from .base import Active
 
 
@@ -26,7 +26,7 @@ class Stream(Active):
     @abstractmethod
     def query(
         self, x: TensorLike | None = None
-    ) -> tuple[TensorLike | None, TensorLike | GaussianLike, dict[str, float]]:
+    ) -> tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]]:
         """
         Abstract method to query new samples from a stream.
 
@@ -37,7 +37,7 @@ def query(
 
         Returns
         -------
-        Tuple[torch.Tensor or None, torch.Tensor, torch.Tensor, Dict[str, List[Any]]]
+        tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]]
             A tuple containing:
             - The queried samples (or None if no query is made),
             - The predicted outputs,
@@ -64,9 +64,7 @@ def fit_samples(self, x: torch.Tensor):
             )
         ):
             self.fit(xi.reshape(1, -1))
-            pb.set_postfix(
-                ordered_dict={key: val[-1] for key, val in self.metrics.items()}
-            )
+            pb.set_postfix(ordered_dict={k: v[-1] for k, v in self.metrics.items()})
 
     def fit_batches(self, x: torch.Tensor, batch_size: int):
         """
@@ -114,7 +112,11 @@ def query(
         self,
         x: TensorLike | None = None,
         random_seed: int | None = None,
-    ) -> tuple[torch.Tensor | None, TensorLike | GaussianLike, dict[str, float]]:
+    ) -> tuple[
+        torch.Tensor | None,
+        TensorLike | DistributionLike,
+        dict[str, float],
+    ]:
         """
         Query new samples randomly based on a fixed probability.
 
@@ -125,7 +127,7 @@ def query(
 
         Returns
         -------
-        Tuple[torch.Tensor or None, torch.Tensor, torch.Tensor, Dict[str, List[Any]]]
+        tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]]
             A tuple containing:
             - The queried samples (or None if the random condition is not met),
             - The predicted outputs,
@@ -135,8 +137,7 @@ def query(
         assert isinstance(x, TensorLike)
         # TODO: move handling to check method in base class
         output = self.emulator.predict(x)
-        assert isinstance(output, TensorLike | GaussianLike)
-        # assert isinstance(output, TensorLike | DistributionLike)
+        assert isinstance(output, TensorLike | DistributionLike)
         if random_seed is not None:
             set_random_seed(seed=random_seed)
         x = x if np.random.rand() < self.p_query else None
@@ -199,7 +200,7 @@ def query(
         self, x: TensorLike | None = None
     ) -> tuple[
         torch.Tensor | None,
-        torch.Tensor | GaussianLike,
+        torch.Tensor | DistributionLike,
         dict[str, float],
     ]:
         """
@@ -212,7 +213,7 @@ def query(
 
         Returns
         -------
-        Tuple[torch.Tensor or None, torch.Tensor, torch.Tensor, Dict[str, List[Any]]]
+        tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]]
             A tuple containing:
             - The queried samples (or None if the score does not exceed the threshold),
             - The predicted outputs,
@@ -222,7 +223,7 @@ def query(
         # TODO: move handling to check method in base class
         assert isinstance(x, torch.Tensor)
         output = self.emulator.predict(x)
-        assert isinstance(output, GaussianLike)
+        assert isinstance(output, DistributionLike)
         assert isinstance(output.variance, torch.Tensor)
         score = self.score(x, output.mean, output.variance)
         x = x if score > self.threshold else None
@@ -262,13 +263,17 @@ def score(
         ----------
         x: torch.Tensor
             Input samples.
+        y: torch.Tensor
+            Predicted outputs (not used).
+        Sigma: torch.Tensor
+            Covariance estimates (not used).
 
         Returns
         -------
         float
             The average minimum distance.
         """
-        _, _, _ = x, y, Sigma  # Unused variables
+        _, _ = y, Sigma  # Unused variables
         distances = torch.cdist(x, self.x_train)
         min_dists, _ = distances.min(dim=1)
         return min_dists.mean()
@@ -306,11 +311,11 @@ def score(
         Parameters
         ----------
         x: torch.Tensor
-            Input samples.
+            Input samples (not used).
         y: torch.Tensor
             Predicted outputs (not used).
         Sigma: torch.Tensor
-            Covariance estimates (not used).
+            Covariance estimates.
 
         Returns
         -------
@@ -342,9 +347,9 @@ def score(
         Parameters
         ----------
         x: torch.Tensor
-            Input samples.
+            Input samples (not used).
         y: torch.Tensor
-            Predicted outputs.
+            Predicted outputs (not used).
         Sigma: torch.Tensor
             Covariance estimates.
 
@@ -378,9 +383,9 @@ def score(
         Parameters
         ----------
         x: torch.Tensor
-            Input samples.
+            Input samples (not used).
         y: torch.Tensor
-            Predicted outputs.
+            Predicted outputs (not used).
         Sigma: torch.Tensor
             Covariance estimates.
 
@@ -457,7 +462,11 @@ def __post_init__(self):
 
     def query(
         self, x: TensorLike | None = None
-    ) -> tuple[TensorLike | None, TensorLike | GaussianLike, dict[str, float]]:
+    ) -> tuple[
+        TensorLike | None,
+        TensorLike | DistributionLike,
+        dict[str, float],
+    ]:
         """
         Query new samples based on the adaptive threshold.
 
@@ -471,7 +480,7 @@ def query(
 
         Returns
         -------
-        Tuple[torch.Tensor or None, torch.Tensor, torch.Tensor, Dict[str, List[Any]]]
+        tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]]
             A tuple containing:
             - The queried samples,
             - The predicted outputs,
diff --git a/docs/tutorials/simulator/02_active_learning.ipynb b/docs/tutorials/simulator/02_active_learning.ipynb
index d6c63503b..768d4e825 100644
--- a/docs/tutorials/simulator/02_active_learning.ipynb
+++ b/docs/tutorials/simulator/02_active_learning.ipynb
@@ -205,8 +205,7 @@
     "        x_train, \n",
     "        y_train, \n",
     "        lr=lr, \n",
-    "        posterior_predictive=True, \n",
-    "        standardize_y=False\n",
+    "        standardize_y=False,\n",
     "    )\n",
     "\n",
     "emulator = make_gp(x_train, y_train)\n",
@@ -265,7 +264,7 @@
     "\n",
     "x_train = simulator.sample_inputs(5)\n",
     "y_train, _ = simulator.forward_batch(x_train)\n",
-    "emulator = make_gp(x_train, y_train, 0.01)\n",
+    "emulator = make_gp(x_train, y_train, 0.1)\n",
     "\n",
     "# Learner itself!\n",
     "learner = stream.Random(\n",
@@ -603,10 +602,7 @@
    "source": [
     "### Summarizing Results\n",
     "\n",
-    "After all learners have been evaluated, we use the `compute_statistics` function to produce a compact summary of performance across trials.\n",
-    "\n",
-    "```python\n",
-    "compute_statistics(summary: List[Dict]) -> pd.DataFrame\n"
+    "After all learners have been evaluated, we use the `compute_statistics` function to produce a compact summary of performance across trials."
    ]
   },
   {
@@ -687,7 +683,7 @@
     "    n_stream_samples=500,\n",
     "    adaptive_only=True,\n",
     "    batch_size=None,\n",
-    "    lr=2e-2\n",
+    "    lr=0.1\n",
     ")"
    ]
   },
diff --git a/tests/callbacks/test_early_stopping.py b/tests/callbacks/test_early_stopping.py
index 8ac4870f1..734e06212 100644
--- a/tests/callbacks/test_early_stopping.py
+++ b/tests/callbacks/test_early_stopping.py
@@ -3,9 +3,7 @@
 import pytest
 import torch
 from autoemulate.callbacks.early_stopping import EarlyStopping
-from autoemulate.emulators.gaussian_process.exact import (
-    GaussianProcess,
-)
+from autoemulate.emulators.gaussian_process.exact import GaussianProcess
 from gpytorch.likelihoods import MultitaskGaussianLikelihood
 
 
@@ -18,7 +16,6 @@ def gp_exact():
         y=y,
         likelihood_cls=MultitaskGaussianLikelihood,
         epochs=5,
-        batch_size=2,
         lr=0.1,
         early_stopping=None,
     )
diff --git a/tests/emulators/test_base.py b/tests/emulators/test_base.py
index cbc9201a9..ad9b5a8dd 100644
--- a/tests/emulators/test_base.py
+++ b/tests/emulators/test_base.py
@@ -52,7 +52,7 @@ def setup_method(self):
         Define the PyTorchBackend instance.
         """
         self.model = self.DummyModel(
-            scheduler_cls=ExponentialLR, scheduler_kwargs={"gamma": 0.9}
+            scheduler_cls=ExponentialLR, scheduler_params={"gamma": 0.9}
         )
 
     def test_model_name(self):
@@ -152,16 +152,16 @@ def test_fit_predict_deterministic_with_seed(self):
 
     def test_scheduler_setup(self):
         # Should raise ValueError if kwargs is None
-        with pytest.raises(ValueError, match="Provide a kwargs dictionary including"):
+        with pytest.raises(
+            ValueError, match="Provide scheduler_params to set up the scheduler."
+        ):
             self.model.scheduler_setup(None)
 
         # Should raise RuntimeError if optimizer is missing
         model_no_opt = self.DummyModel()
         delattr(model_no_opt, "optimizer")
         with pytest.raises(RuntimeError, match="Optimizer must be set before"):
-            model_no_opt.scheduler_setup(
-                {"scheduler_cls": ExponentialLR, "scheduler_kwargs": {"gamma": 0.9}}
-            )
+            model_no_opt.scheduler_setup({"gamma": 0.9})
 
         # Should set scheduler to None if scheduler_cls is None
         model_none_sched = self.DummyModel()
@@ -170,7 +170,7 @@ def test_scheduler_setup(self):
             model_none_sched.parameters(),
             lr=model_none_sched.lr,  # type: ignore[call-arg]
         )
-        model_none_sched.scheduler_setup({"scheduler_kwargs": {}})
+        model_none_sched.scheduler_setup({})
         assert model_none_sched.scheduler is None
 
         # Should set scheduler if scheduler_cls is valid
@@ -180,5 +180,5 @@ def test_scheduler_setup(self):
             model_valid_sched.parameters(),
             lr=model_valid_sched.lr,  # type: ignore[call-arg]
         )
-        model_valid_sched.scheduler_setup({"scheduler_kwargs": {"gamma": 0.9}})
+        model_valid_sched.scheduler_setup({"gamma": 0.9})
         assert isinstance(model_valid_sched.scheduler, ExponentialLR)
diff --git a/tests/learners/test_learners.py b/tests/learners/test_learners.py
index 8ba52056e..7e88eded3 100644
--- a/tests/learners/test_learners.py
+++ b/tests/learners/test_learners.py
@@ -3,7 +3,7 @@
 import numpy as np
 import torch
 from autoemulate.core.types import TensorLike
-from autoemulate.emulators.gaussian_process.exact import GaussianProcess
+from autoemulate.emulators.gaussian_process.exact import GaussianProcessRBF
 from autoemulate.learners import stream
 from autoemulate.simulations.base import Simulator
 from autoemulate.simulations.projectile import ProjectileMultioutput
@@ -24,7 +24,7 @@ def learners(
     assert isinstance(y_train, TensorLike)
     yield stream.Random(
         simulator=simulator,
-        emulator=GaussianProcess(x_train, y_train, lr=0.001),
+        emulator=GaussianProcessRBF(x_train, y_train, lr=0.001),
         x_train=x_train,
         y_train=y_train,
         p_query=0.25,
@@ -32,35 +32,35 @@ def learners(
     if not adaptive_only:
         yield stream.Distance(
             simulator=simulator,
-            emulator=GaussianProcess(x_train, y_train, lr=0.001),
+            emulator=GaussianProcessRBF(x_train, y_train, lr=0.001),
             x_train=x_train,
             y_train=y_train,
             threshold=0.5,
         )
         yield stream.A_Optimal(
             simulator=simulator,
-            emulator=GaussianProcess(x_train, y_train, lr=0.001),
+            emulator=GaussianProcessRBF(x_train, y_train, lr=0.001),
             x_train=x_train,
             y_train=y_train,
             threshold=1.0,
         )
         yield stream.D_Optimal(
             simulator=simulator,
-            emulator=GaussianProcess(x_train, y_train, lr=0.001),
+            emulator=GaussianProcessRBF(x_train, y_train, lr=0.001),
             x_train=x_train,
             y_train=y_train,
             threshold=-4.2,
         )
         yield stream.E_Optimal(
             simulator=simulator,
-            emulator=GaussianProcess(x_train, y_train, lr=0.001),
+            emulator=GaussianProcessRBF(x_train, y_train, lr=0.001),
             x_train=x_train,
             y_train=y_train,
             threshold=1.0,
         )
     yield stream.Adaptive_Distance(
         simulator=simulator,
-        emulator=GaussianProcess(x_train, y_train, lr=0.001),
+        emulator=GaussianProcessRBF(x_train, y_train, lr=0.001),
         x_train=x_train,
         y_train=y_train,
         threshold=0.5,
@@ -75,7 +75,7 @@ def learners(
     )
     yield stream.Adaptive_A_Optimal(
         simulator=simulator,
-        emulator=GaussianProcess(x_train, y_train, lr=0.001),
+        emulator=GaussianProcessRBF(x_train, y_train, lr=0.001),
         x_train=x_train,
         y_train=y_train,
         threshold=1e-1,
@@ -90,7 +90,7 @@ def learners(
     )
     yield stream.Adaptive_D_Optimal(
         simulator=simulator,
-        emulator=GaussianProcess(x_train, y_train, lr=0.001),
+        emulator=GaussianProcessRBF(x_train, y_train, lr=0.001),
         x_train=x_train,
         y_train=y_train,
         threshold=-4.0,
@@ -105,7 +105,7 @@ def learners(
     )
     yield stream.Adaptive_E_Optimal(
         simulator=simulator,
-        emulator=GaussianProcess(x_train, y_train, lr=0.001),
+        emulator=GaussianProcessRBF(x_train, y_train, lr=0.001),
         x_train=x_train,
         y_train=y_train,
         threshold=0.75 if isinstance(simulator, Sin) else 1000,