diff --git a/autoemulate/calibration/history_matching.py b/autoemulate/calibration/history_matching.py index 2cca23bdd..05d23e6fa 100644 --- a/autoemulate/calibration/history_matching.py +++ b/autoemulate/calibration/history_matching.py @@ -12,10 +12,11 @@ from autoemulate.core.device import TorchDeviceMixin from autoemulate.core.logging_config import get_configured_logger from autoemulate.core.plotting import display_figure +from autoemulate.core.reinitialize import fit_from_reinitialized from autoemulate.core.results import Result from autoemulate.core.types import DeviceLike, DistributionLike, TensorLike from autoemulate.data.utils import set_random_seed -from autoemulate.emulators import TransformedEmulator, get_emulator_class +from autoemulate.emulators import Emulator from autoemulate.simulations.base import Simulator logger = logging.getLogger("autoemulate") @@ -284,13 +285,15 @@ class HistoryMatchingWorkflow(HistoryMatching): def __init__( self, simulator: Simulator, - result: Result, observations: dict[str, tuple[float, float]] | dict[str, float], + emulator: Emulator | None = None, + result: Result | None = None, threshold: float = 3.0, model_discrepancy: float = 0.0, rank: int = 1, train_x: TensorLike | None = None, train_y: TensorLike | None = None, + transformed_emulator_params: dict | None = None, calibration_params: list[str] | None = None, device: DeviceLike | None = None, random_seed: int | None = None, @@ -303,12 +306,17 @@ def __init__( ---------- simulator: Simulator A simulator. - result: Result - A Result object containing the pre-trained emulator and its hyperparameters. observations: dict[str, tuple[float, float] | dict[str, float] For each output variable, specifies observed [value, noise] (with noise specified as variances). In case of no uncertainty in observations, provides just the observed value. + emulator: Emulator | None + An Emulator object containing the pre-trained emulator. If not provided, a + Result object must be provided instead. Defaults to None. + result: Result | None + A Result object containing the pre-trained emulator and its hyperparameters. + If not provided, an Emulator object must be provided instead. + Defaults to None. threshold: float Implausibility threshold (query points with implausibility scores that exceed this value are ruled out). Defaults to 3, which is considered @@ -324,6 +332,11 @@ def __init__( Optional tensor of input data the emulator was trained on. train_y: TensorLike | None Optional tensor of output data the emulator was trained on. + transformed_emulator_params: dict | None + Optional dictionary of parameters for TransformedEmulator. These are + already contained in a Result object, so only needed if a + TransformedEmulator instance with non-default params is provided. + Defaults to None. calibration_params: list[str] | None Optional subset of parameters to calibrate. These have to correspond to the parameters that the emulator was trained on. If None, calibrate all @@ -342,8 +355,15 @@ def __init__( set_random_seed(seed=random_seed) self.logger, self.progress_bar = get_configured_logger(log_level) - self.result = result - self.emulator = result.model + if result is not None: + self.emulator = result.model + elif emulator is not None: + self.emulator = emulator + else: + msg = "Either `emulator` or `result` must be provided." + raise ValueError(msg) + + self.transformed_emulator_params = transformed_emulator_params or {} self.emulator.device = self.device # New data is simulated in `run()` and appended here @@ -623,19 +643,16 @@ def refit_emulator(self, x: TensorLike, y: TensorLike) -> None: y: TensorLike Tensor of output data to refit the emulator on. """ - # Create a fresh model with the same configuration - self.emulator = TransformedEmulator( - x.float(), - y.float(), - model=get_emulator_class(self.result.model_name), - x_transforms=self.result.x_transforms, - y_transforms=self.result.y_transforms, + x = x.float().to(self.device) + y = y.float().to(self.device) + self.emulator = fit_from_reinitialized( + x, + y, + emulator=self.emulator, + transformed_emulator_params=self.transformed_emulator_params, device=self.device, - **self.result.params, ) - self.emulator.fit(x, y) - def run( self, n_simulations: int = 100, diff --git a/autoemulate/core/compare.py b/autoemulate/core/compare.py index 5c2fb3e38..9be163b31 100644 --- a/autoemulate/core/compare.py +++ b/autoemulate/core/compare.py @@ -19,6 +19,7 @@ display_figure, plot_xy, ) +from autoemulate.core.reinitialize import fit_from_reinitialized from autoemulate.core.results import Result, Results from autoemulate.core.save import ModelSerialiser from autoemulate.core.tuner import Tuner @@ -566,34 +567,21 @@ def fit_from_reinitialized( # Get the result to use result = self.best_result() if result_id is None else self.get_result(result_id) - # Set the random seed for initialization - if random_seed is not None: - set_random_seed(seed=random_seed) - # Convert and move the new data to device x_tensor, y_tensor = self._convert_to_tensors(x, y) x_tensor, y_tensor = self._move_tensors_to_device(x_tensor, y_tensor) - # Get the model class from the model name - model_class = get_emulator_class(result.model_name) - - # Create a fresh model with the same configuration - fresh_model = TransformedEmulator( + # NOTE: function passes data to the Emulator model which handles conversion to + # tensors and device handling + return fit_from_reinitialized( x_tensor, y_tensor, - model=model_class, - x_transforms=result.x_transforms, - y_transforms=result.y_transforms, + emulator=result.model, + transformed_emulator_params=transformed_emulator_params, device=self.device, - **result.params, - **transformed_emulator_params, + random_seed=random_seed, ) - # Fit the fresh model on the new data - fresh_model.fit(x_tensor, y_tensor) - - return fresh_model - def plot( # noqa: PLR0912, PLR0915 self, model_obj: int | Emulator | Result, diff --git a/autoemulate/core/reinitialize.py b/autoemulate/core/reinitialize.py new file mode 100644 index 000000000..571ab50dc --- /dev/null +++ b/autoemulate/core/reinitialize.py @@ -0,0 +1,99 @@ +import inspect + +from autoemulate.core.types import DeviceLike, TensorLike +from autoemulate.data.utils import set_random_seed +from autoemulate.emulators import Emulator, TransformedEmulator, get_emulator_class + + +def fit_from_reinitialized( + x: TensorLike, + y: TensorLike, + emulator: Emulator, + transformed_emulator_params: dict | None = None, + device: DeviceLike | None = None, + random_seed: int | None = None, +): + """ + Fit a fresh model with reinitialized parameters using the best configuration. + + This method creates a new model instance with the same configuration as the + best (or specified) model from the comparison, but with freshly initialized + parameters fitted on the provided data. + + Parameters + ---------- + x: TensorLike + Input features for training the fresh model. + y: TensorLike + Target values for training the fresh model. + emulator: Emulator + An Emulator object containing the pre-trained emulator. + transformed_emulator_params: None | TransformedEmulatorParams + Parameters for the transformed emulator. When None, the same parameters as + used when identifying the best model are used. Defaults to None. + device: str | None + Device to use for model fitting (e.g., 'cpu' or 'cuda'). If None, the default + device is used. Defaults to None. + random_seed: int | None + Random seed for parameter initialization. Defaults to None. + + Returns + ------- + TransformedEmulator + A new model instance with the same configuration but fresh parameters + fitted on the provided data. + + Notes + ----- + Unlike TransformedEmulator.refit() which retrains an existing model, + this method creates a completely new model instance with reinitialized + parameters. This ensures that when fitting on new data that the same + initialization conditions are applied. This can have an affect for example + given kernel initialization in Gaussian Processes or weight initialization in + neural networks. + """ + if random_seed is not None: + set_random_seed(seed=random_seed) + + # Extract emulator and its parameters from Emulator instance + if isinstance(emulator, TransformedEmulator): + model = emulator.model + emulator_name = emulator.untransformed_model_name + x_transforms = emulator.x_transforms + y_transforms = emulator.y_transforms + else: + model = emulator + emulator_name = emulator.model_name() + x_transforms = None + y_transforms = None + + # Extract parameters from the provided emulator instance + model_cls = get_emulator_class(emulator_name) + init_sig = inspect.signature(model_cls.__init__) + emulator_params = {} + for param_name in init_sig.parameters: + if param_name in ["self", "x", "y", "device"]: + continue + # NOTE: some emulators have standardize_x/y params option + # this is different to TransformedEmulator x/y transforms + if param_name == "standardize_x": + emulator_params["standardize_x"] = bool(model.x_transform) + if param_name == "standardize_y": + emulator_params["standardize_y"] = bool(model.y_transform) + if hasattr(model, param_name): + emulator_params[param_name] = getattr(model, param_name) + + transformed_emulator_params = transformed_emulator_params or {} + + new_emulator = TransformedEmulator( + x.float(), + y.float(), + model=model_cls, + x_transforms=x_transforms, + y_transforms=y_transforms, + device=device, + **emulator_params, + ) + + new_emulator.fit(x.float(), y.float()) + return new_emulator diff --git a/autoemulate/emulators/base.py b/autoemulate/emulators/base.py index c352f4eef..f38711b40 100644 --- a/autoemulate/emulators/base.py +++ b/autoemulate/emulators/base.py @@ -43,24 +43,22 @@ def _fit(self, x: TensorLike, y: TensorLike): ... def fit(self, x: TensorLike, y: TensorLike): """Fit the emulator to the provided data.""" - if isinstance(x, TensorLike) and isinstance(y, TensorLike): - self._check(x, y) - # Ensure x and y are tensors and 2D - x, y = self._convert_to_tensors(x, y) - - # Move to device - x, y = self._move_tensors_to_device(x, y) - - # Fit transforms - if self.x_transform is not None: - self.x_transform.fit(x) - if self.y_transform is not None: - self.y_transform.fit(y) - x = self.x_transform(x) if self.x_transform is not None else x - y = self.y_transform(y) if self.y_transform is not None else y - - # Fit emulator - self._fit(x, y) + # Ensure x and y are tensors and 2D + x, y = self._convert_to_tensors(x, y) + + # Move to device + x, y = self._move_tensors_to_device(x, y) + + # Fit transforms + if self.x_transform is not None: + self.x_transform.fit(x) + if self.y_transform is not None: + self.y_transform.fit(y) + x = self.x_transform(x) if self.x_transform is not None else x + y = self.y_transform(y) if self.y_transform is not None else y + + # Fit emulator + self._fit(x, y) self.is_fitted_ = True @abstractmethod @@ -280,7 +278,7 @@ def get_random_params(cls): } @classmethod - def scheduler_params(cls) -> dict: + def get_scheduler_params(cls) -> dict: """ Return a random parameters for the learning rate scheduler. @@ -290,37 +288,37 @@ def scheduler_params(cls) -> dict: all_params = [ { "scheduler_cls": [None], - "scheduler_kwargs": [{}], + "scheduler_params": [{}], }, { "scheduler_cls": [ExponentialLR], - "scheduler_kwargs": [ + "scheduler_params": [ {"gamma": 0.9}, {"gamma": 0.95}, ], }, { "scheduler_cls": [LRScheduler], - "scheduler_kwargs": [ + "scheduler_params": [ {"policy": "ReduceLROnPlateau", "patience": 5, "factor": 0.5} ], }, # TODO: investigate these suggestions from copilot, issue: #597 # { # "scheduler_cls": [CosineAnnealingLR], - # "scheduler_kwargs": [{"T_max": 10, "eta_min": 0.01}], + # "scheduler_params": [{"T_max": 10, "eta_min": 0.01}], # }, # { # "scheduler_cls": [ReduceLROnPlateau], - # "scheduler_kwargs": [{"mode": "min", "factor": 0.1, "patience": 5}], + # "scheduler_params": [{"mode": "min", "factor": 0.1, "patience": 5}], # }, # { # "scheduler_cls": [StepLR], - # "scheduler_kwargs": [{"step_size": 10, "gamma": 0.1}], + # "scheduler_params": [{"step_size": 10, "gamma": 0.1}], # }, # { # "scheduler_cls": [CyclicLR], - # "scheduler_kwargs": [{ + # "scheduler_params": [{ # "base_lr": 1e-3, # "max_lr": 1e-1, # "step_size_up": 5, @@ -329,7 +327,7 @@ def scheduler_params(cls) -> dict: # }, # { # "scheduler_cls": [OneCycleLR], - # "scheduler_kwargs": [{ + # "scheduler_params": [{ # "max_lr": 1e-1, # "total_steps": self.epochs, # "pct_start": 0.3, @@ -340,40 +338,28 @@ def scheduler_params(cls) -> dict: # Randomly select one of the parameter sets return random.choice(all_params) - def scheduler_setup(self, kwargs: dict | None = None): + def scheduler_setup(self, scheduler_params: dict | None = None): """ Set up the learning rate scheduler for the emulator. Parameters ---------- - kwargs : dict | None - Keyword arguments for the model. This should include scheduler_kwargs. + scheduler_params : dict | None + Keyword arguments for the scheduler. """ - if kwargs is None: - msg = ( - "Provide a kwargs dictionary including " - "scheduler_kwargs to set up the scheduler." - ) + if scheduler_params is None: + msg = "Provide scheduler_params to set up the scheduler." raise ValueError(msg) if not hasattr(self, "optimizer"): msg = "Optimizer must be set before setting up the scheduler." raise RuntimeError(msg) - # Extract scheduler-specific kwargs if present - try: - assert type(kwargs) is dict - scheduler_kwargs = kwargs.pop("scheduler_kwargs", {}) - except AttributeError: - # If kwargs does not contain scheduler_kwargs, throw an error - msg = "No kwargs for scheduler setup detected." - raise ValueError(msg) from None - # Set up the scheduler if a scheduler class is defined if self.scheduler_cls is None: self.scheduler = None else: - self.scheduler = self.scheduler_cls(self.optimizer, **scheduler_kwargs) # type: ignore[call-arg] + self.scheduler = self.scheduler_cls(self.optimizer, **scheduler_params) # type: ignore[call-arg] class DeterministicEmulator(Emulator): diff --git a/autoemulate/emulators/ensemble.py b/autoemulate/emulators/ensemble.py index 555595f2f..79dad8703 100644 --- a/autoemulate/emulators/ensemble.py +++ b/autoemulate/emulators/ensemble.py @@ -4,17 +4,8 @@ from torch import Tensor from autoemulate.core.device import TorchDeviceMixin -from autoemulate.core.types import ( - DeviceLike, - GaussianLike, - TensorLike, - TuneParams, -) -from autoemulate.emulators.base import ( - DropoutTorchBackend, - Emulator, - GaussianEmulator, -) +from autoemulate.core.types import DeviceLike, GaussianLike, TensorLike, TuneParams +from autoemulate.emulators.base import DropoutTorchBackend, Emulator, GaussianEmulator from autoemulate.emulators.nn.mlp import MLP from autoemulate.transforms.standardize import StandardizeTransform from autoemulate.transforms.utils import make_positive_definite @@ -148,7 +139,7 @@ def __init__( standardize_y: bool = True, n_emulators: int = 4, device: DeviceLike | None = None, - **mlp_kwargs, + mlp_kwargs: dict | None = None, ): """ Initialize an ensemble of MLPs. @@ -167,9 +158,10 @@ def __init__( Number of MLP emulators to create in the ensemble. Defaults to 4. device: DeviceLike | None Device to run the model on (e.g., "cpu", "cuda"). Defaults to None. - **mlp_kwargs: dict + mlp_kwargs: dict | None Additional keyword arguments for the MLP constructor. """ + self.mlp_kwargs = mlp_kwargs or {} emulators = [ MLP( x, @@ -177,7 +169,7 @@ def __init__( standardize_x=standardize_x, standardize_y=standardize_y, device=device, - **mlp_kwargs, + **self.mlp_kwargs, ) for i in range(n_emulators) ] @@ -305,7 +297,7 @@ def __init__( standardize_y: bool = True, dropout_prob: float = 0.2, device: DeviceLike | None = None, - **mlp_kwargs, + mlp_kwargs: dict | None = None, ): """ Initialize an ensemble of MLPs with dropout. @@ -324,9 +316,10 @@ def __init__( Dropout probability to use in the MLP layers. Defaults to 0.2. device: DeviceLike | None Device to run the model on (e.g., "cpu", "cuda"). Defaults to None. - **mlp_kwargs: dict + mlp_kwargs: dict | None Additional keyword arguments for the MLP constructor. """ + self.mlp_kwargs = mlp_kwargs or {} super().__init__( MLP( x, @@ -335,7 +328,7 @@ def __init__( standardize_y=standardize_y, dropout_prob=dropout_prob, device=device, - **mlp_kwargs, + **self.mlp_kwargs, ), device=device, ) diff --git a/autoemulate/emulators/gaussian_process/exact.py b/autoemulate/emulators/gaussian_process/exact.py index f4d639a7c..cbc00f043 100644 --- a/autoemulate/emulators/gaussian_process/exact.py +++ b/autoemulate/emulators/gaussian_process/exact.py @@ -70,7 +70,8 @@ def __init__( lr: float = 2e-1, early_stopping: EarlyStopping | None = None, device: DeviceLike | None = None, - **scheduler_kwargs, + scheduler_cls: type[LRScheduler] | None = None, + scheduler_params: dict | None = None, ): """ Initialize the GaussianProcess emulator. @@ -108,7 +109,10 @@ def __init__( device: DeviceLike | None Device to run the model on. If None, uses the default device (usually CPU or GPU). Defaults to None. - scheduler_kwargs: dict + scheduler_cls: type[LRScheduler] | None + Learning rate scheduler class. If None, no scheduler is used. Defaults to + None. + scheduler_params: dict | None Additional keyword arguments for the learning rate scheduler. """ # Init device @@ -123,8 +127,10 @@ def __init__( num_tasks_torch = torch.Size([num_tasks]) # Initialize the mean and covariance modules - mean_module = mean_module_fn(n_features, num_tasks_torch) - covar_module = covar_module_fn(n_features, num_tasks_torch) + self.mean_module_fn = mean_module_fn + self.covar_module_fn = covar_module_fn + mean_module = self.mean_module_fn(n_features, num_tasks_torch) + covar_module = self.covar_module_fn(n_features, num_tasks_torch) # If the combined kernel is not a ScaleKernel, wrap it in one covar_module = ( @@ -134,7 +140,8 @@ def __init__( ) # Init likelihood - likelihood = likelihood_cls(num_tasks=num_tasks) + self.likelihood_cls = likelihood_cls + likelihood = self.likelihood_cls(num_tasks=num_tasks) likelihood = likelihood.to(self.device) # Init must be called with preprocessed data @@ -149,15 +156,19 @@ def __init__( self.epochs = epochs self.lr = lr self.optimizer = self.optimizer_cls(self.parameters(), lr=self.lr) # type: ignore[call-arg] since all optimizers include lr - self.scheduler_setup(scheduler_kwargs) + self.scheduler_cls = scheduler_cls + self.scheduler_params = scheduler_params or {} + self.scheduler_setup(self.scheduler_params) self.early_stopping = early_stopping self.posterior_predictive = posterior_predictive self.num_tasks = num_tasks self.to(self.device) # Fix mean and kernel if required - self._fix_module_params(self.mean_module, fixed_mean_params) - self._fix_module_params(self.covar_module, fixed_covar_params) + self.fixed_mean_params = fixed_mean_params + self.fixed_covar_params = fixed_covar_params + self._fix_module_params(self.mean_module, self.fixed_mean_params) + self._fix_module_params(self.covar_module, self.fixed_covar_params) @staticmethod def _fix_module_params(module: nn.Module, fixed_params: bool): @@ -293,7 +304,7 @@ def _predict(self, x: TensorLike, with_grad: bool): @staticmethod def get_tune_params(): """Return the hyperparameters to tune for the Gaussian Process model.""" - scheduler_params = GaussianProcess.scheduler_params() + scheduler_specs = GaussianProcess.get_scheduler_params() return { "mean_module_fn": [ constant_mean, @@ -314,8 +325,8 @@ def get_tune_params(): "epochs": [50, 100, 200], "lr": [5e-1, 1e-1, 5e-2, 1e-2], "likelihood_cls": [MultitaskGaussianLikelihood], - "scheduler_cls": scheduler_params["scheduler_cls"], - "scheduler_kwargs": scheduler_params["scheduler_kwargs"], + "scheduler_cls": scheduler_specs["scheduler_cls"], + "scheduler_params": scheduler_specs["scheduler_params"], } @@ -349,7 +360,8 @@ def __init__( early_stopping: EarlyStopping | None = None, seed: int | None = None, device: DeviceLike | None = None, - **scheduler_kwargs, + scheduler_cls: type[LRScheduler] | None = None, + scheduler_params: dict | None = None, ): """ Initialize the GaussianProcessCorrelated emulator. @@ -391,7 +403,9 @@ def __init__( device: DeviceLike | None Device to run the model on. If None, uses the default device (usually CPU or GPU). Defaults to None. - scheduler_kwargs: dict + scheduler_cls: type[LRScheduler] | None + Learning rate scheduler class. If None, no scheduler is used. Defaults to + scheduler_params: dict Additional keyword arguments for the learning rate scheduler. """ # Init device @@ -405,8 +419,10 @@ def __init__( # Initialize the mean and covariance modules n_features = tuple(x.shape)[1] - mean_module = mean_module_fn(n_features, None) - covar_module = covar_module_fn(n_features, None) + self.mean_module_fn = mean_module_fn + self.covar_module_fn = covar_module_fn + mean_module = self.mean_module_fn(n_features, None) + covar_module = self.covar_module_fn(n_features, None) # Mean and covariance modules for multitask num_tasks = tuple(y.shape)[1] @@ -424,8 +440,10 @@ def __init__( likelihood = likelihood.to(self.device) # Fix mean and kernel if required - self._fix_module_params(mean_module, fixed_mean_params) - self._fix_module_params(covar_module, fixed_covar_params) + self.fixed_mean_params = fixed_mean_params + self.fixed_covar_params = fixed_covar_params + self._fix_module_params(mean_module, self.fixed_mean_params) + self._fix_module_params(covar_module, self.fixed_covar_params) # Init must be called with preprocessed data gpytorch.models.ExactGP.__init__( @@ -442,7 +460,9 @@ def __init__( self.epochs = epochs self.lr = lr self.optimizer = self.optimizer_cls(self.parameters(), lr=self.lr) # type: ignore[call-arg] since all optimizers include lr - self.scheduler_setup(scheduler_kwargs) + self.scheduler_cls = scheduler_cls + self.scheduler_params = scheduler_params or {} + self.scheduler_setup(self.scheduler_params) self.early_stopping = early_stopping self.posterior_predictive = posterior_predictive self.num_tasks = num_tasks @@ -536,7 +556,7 @@ def __init__( lr: float = lr, early_stopping: EarlyStopping | None = early_stopping, device: DeviceLike | None = device, - **scheduler_kwargs, + **scheduler_params, ): super().__init__( x, @@ -553,7 +573,7 @@ def __init__( lr, early_stopping, device, - **scheduler_kwargs, + **scheduler_params, ) @staticmethod diff --git a/autoemulate/emulators/nn/mlp.py b/autoemulate/emulators/nn/mlp.py index a1fa9d5fe..6c493e3e0 100644 --- a/autoemulate/emulators/nn/mlp.py +++ b/autoemulate/emulators/nn/mlp.py @@ -1,4 +1,5 @@ from torch import nn +from torch.optim.lr_scheduler import LRScheduler from autoemulate.core.device import TorchDeviceMixin from autoemulate.core.types import DeviceLike, TensorLike @@ -34,7 +35,8 @@ def __init__( lr: float = 1e-2, random_seed: int | None = None, device: DeviceLike | None = None, - **scheduler_kwargs, + scheduler_cls: type[LRScheduler] | None = None, + scheduler_params: dict | None = None, ): """ Multi-Layer Perceptron (MLP) emulator. @@ -77,7 +79,10 @@ def __init__( Random seed for reproducibility. If None, no seed is set. Defaults to None. device: DeviceLike | None Device to run the model on (e.g., "cpu", "cuda", "mps"). Defaults to None. - **scheduler_kwargs: dict + scheduler_cls: type[LRScheduler] | None + Learning rate scheduler class. If None, no scheduler is used. Defaults to + None. + scheduler_params: dict | None Additional keyword arguments related to the scheduler. Raises @@ -95,29 +100,40 @@ def __init__( x, y = self._convert_to_tensors(x, y) # Construct the MLP layers - layer_dims = [x.shape[1], *layer_dims] if layer_dims else [x.shape[1], 32, 16] + self.layer_dims = ( + [x.shape[1], *layer_dims] if layer_dims else [x.shape[1], 32, 16] + ) + self.dropout_prob = dropout_prob + self.activation_cls = activation_cls + layers = [] - for idx, dim in enumerate(layer_dims[1:]): - layers.append(nn.Linear(layer_dims[idx], dim, device=self.device)) - layers.append(activation_cls()) - if dropout_prob is not None: - layers.append(nn.Dropout(p=dropout_prob)) + for idx, dim in enumerate(self.layer_dims[1:]): + layers.append(nn.Linear(self.layer_dims[idx], dim, device=self.device)) + layers.append(self.activation_cls()) + if self.dropout_prob is not None: + layers.append(nn.Dropout(p=self.dropout_prob)) # Add final layer without activation num_tasks = y.shape[1] - layers.append(nn.Linear(layer_dims[-1], num_tasks, device=self.device)) + layers.append(nn.Linear(self.layer_dims[-1], num_tasks, device=self.device)) self.nn = nn.Sequential(*layers) # Finalize initialization - self._initialize_weights(weight_init, scale, bias_init) + self.weight_init = weight_init + self.scale = scale + self.bias_init = bias_init + self._initialize_weights(self.weight_init, self.scale, self.bias_init) self.x_transform = StandardizeTransform() if standardize_x else None self.y_transform = StandardizeTransform() if standardize_y else None self.epochs = epochs + self.loss_fn_cls = loss_fn_cls self.loss_fn = loss_fn_cls() self.lr = lr self.batch_size = batch_size self.optimizer = self.optimizer_cls(self.nn.parameters(), lr=self.lr) # type: ignore[call-arg] since all optimizers include lr - self.scheduler_setup(scheduler_kwargs) + self.scheduler_cls = scheduler_cls + self.scheduler_params = scheduler_params or {} + self.scheduler_setup(self.scheduler_params) self.to(self.device) def forward(self, x): @@ -132,7 +148,7 @@ def is_multioutput() -> bool: @staticmethod def get_tune_params(): """Return a dictionary of hyperparameters to tune.""" - scheduler_params = MLP.scheduler_params() + scheduler_specs = MLP.get_scheduler_params() return { "epochs": [100, 200], "layer_dims": [[8, 4], [16, 8], [32, 16], [64, 32, 16]], @@ -142,6 +158,6 @@ def get_tune_params(): "scale": [0.1, 1.0], "bias_init": ["default", "zeros"], "dropout_prob": [0.3, None], - "scheduler_cls": scheduler_params["scheduler_cls"], - "scheduler_kwargs": scheduler_params["scheduler_kwargs"], + "scheduler_cls": scheduler_specs["scheduler_cls"], + "scheduler_params": scheduler_specs["scheduler_params"], } diff --git a/autoemulate/emulators/polynomials.py b/autoemulate/emulators/polynomials.py index dd11d2210..84f7ea3bf 100644 --- a/autoemulate/emulators/polynomials.py +++ b/autoemulate/emulators/polynomials.py @@ -1,5 +1,6 @@ import torch from torch import nn +from torch.optim.lr_scheduler import LRScheduler from autoemulate.core.device import TorchDeviceMixin from autoemulate.core.types import DeviceLike, TensorLike @@ -29,7 +30,8 @@ def __init__( batch_size: int = 16, random_seed: int | None = None, device: DeviceLike | None = None, - **kwargs, + scheduler_cls: type[LRScheduler] | None = None, + scheduler_params: dict | None = None, ): """Initialize a PolynomialRegression emulator. @@ -56,8 +58,11 @@ def __init__( device: DeviceLike | None Device to run the model on. If None, uses the default device. Defaults to None. - **kwargs: dict - Additional keyword arguments. + scheduler_cls: type[LRScheduler] | None + Learning rate scheduler class. If None, no scheduler is used. Defaults to + None. + scheduler_params: dict | None + Additional keyword arguments related to the scheduler. """ super().__init__() TorchDeviceMixin.__init__(self, device=device) @@ -80,7 +85,9 @@ def __init__( self.poly.n_output_features, self.n_outputs, bias=False ).to(self.device) self.optimizer = self.optimizer_cls(self.linear.parameters(), lr=self.lr) # type: ignore[call-arg] since all optimizers include lr - self.scheduler_setup(kwargs) + self.scheduler_cls = scheduler_cls + self.scheduler_params = scheduler_params or {} + self.scheduler_setup(self.scheduler_params) def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward pass through for polynomial regression.""" @@ -96,11 +103,11 @@ def is_multioutput() -> bool: @staticmethod def get_tune_params(): """Return a dictionary of hyperparameters to tune.""" - scheduler_params = PolynomialRegression.scheduler_params() + scheduler_specs = PolynomialRegression.get_scheduler_params() return { "lr": [1e-3, 1e-2, 1e-1, 2e-1], "epochs": [50, 100, 200, 500, 1000], "batch_size": [8, 16, 32], - "scheduler_cls": scheduler_params["scheduler_cls"], - "scheduler_kwargs": scheduler_params["scheduler_kwargs"], + "scheduler_cls": scheduler_specs["scheduler_cls"], + "scheduler_params": scheduler_specs["scheduler_params"], } diff --git a/autoemulate/emulators/random_forest.py b/autoemulate/emulators/random_forest.py index 09e03555c..28cdc4d59 100644 --- a/autoemulate/emulators/random_forest.py +++ b/autoemulate/emulators/random_forest.py @@ -35,7 +35,6 @@ def __init__( max_samples: int | None = None, random_seed: int | None = None, device: DeviceLike = "cpu", - **kwargs, ): """Initialize a RandomForest emulator. diff --git a/autoemulate/emulators/transformed/base.py b/autoemulate/emulators/transformed/base.py index f626d124f..2665c5f2b 100644 --- a/autoemulate/emulators/transformed/base.py +++ b/autoemulate/emulators/transformed/base.py @@ -11,19 +11,16 @@ OutputLike, TensorLike, ) -from autoemulate.data.utils import ValidationMixin +from autoemulate.data.utils import ConversionMixin, ValidationMixin from autoemulate.emulators.base import Emulator from autoemulate.emulators.transformed.delta_method import ( delta_method, delta_method_mean_only, ) -from autoemulate.transforms.base import ( - AutoEmulateTransform, - is_affine, -) +from autoemulate.transforms.base import AutoEmulateTransform, is_affine -class TransformedEmulator(Emulator, ValidationMixin): +class TransformedEmulator(Emulator, ValidationMixin, ConversionMixin): """ A transformed emulator that applies transformations to input and target data. @@ -128,10 +125,18 @@ def __init__( """ self.x_transforms = x_transforms or [] self.y_transforms = y_transforms or [] + + # Convert and move the new data to device + TorchDeviceMixin.__init__(self, device=device) + x, y = self._move_tensors_to_device(x, y) + self._fit_transforms(x, y) self.untransformed_model_name = model.model_name() self.model = model( - self._transform_x(x), self._transform_y_tensor(y), device=device, **kwargs + self._transform_x(x), + self._transform_y_tensor(y), + device=device, + **kwargs, ) # Cache for constant Jacobian of inverse y-transform when affine self._fixed_jacobian_y_inv = None @@ -148,7 +153,7 @@ def __init__( raise RuntimeError(msg) self.n_samples = n_samples self.full_covariance = full_covariance - TorchDeviceMixin.__init__(self, device=device) + # TODO: add API to indicate that pdf not valid when not all transforms bijective self.supports_grad = self.model.supports_grad self.supports_uq = self.model.supports_uq @@ -406,9 +411,11 @@ def predict_mean( out = delta_method_mean_only( ComposeTransform(self.y_transforms).inv, y_t_pred.mean, - y_t_pred.covariance_matrix - if isinstance(y_t_pred, GaussianLike) - else y_t_pred.variance, + ( + y_t_pred.covariance_matrix + if isinstance(y_t_pred, GaussianLike) + else y_t_pred.variance + ), True, ) return out["mean_total"].detach() if not with_grad else out["mean_total"] diff --git a/autoemulate/learners/base.py b/autoemulate/learners/base.py index 48af6a9dc..db1359661 100644 --- a/autoemulate/learners/base.py +++ b/autoemulate/learners/base.py @@ -4,15 +4,15 @@ import torch from anytree import Node, RenderTree -from torch.distributions import MultivariateNormal from torcheval.metrics import MeanSquaredError, R2Score from autoemulate.core.logging_config import get_configured_logger +from autoemulate.core.reinitialize import fit_from_reinitialized from autoemulate.data.utils import ValidationMixin from autoemulate.emulators.base import Emulator from autoemulate.simulations.base import Simulator -from ..core.types import GaussianLike, TensorLike +from ..core.types import DistributionLike, TensorLike @dataclass(kw_only=True) @@ -40,6 +40,7 @@ class Learner(ValidationMixin, ABC): x_train: TensorLike y_train: TensorLike log_level: str = "progress_bar" + fit_from_reinitialized: bool = True in_dim: int = field(init=False) out_dim: int = field(init=False) @@ -48,7 +49,15 @@ def __post_init__(self): log_level = getattr(self, "log_level", "progress_bar") self.logger, self.progress_bar = get_configured_logger(log_level) self.logger.info("Initializing Learner with training data.") - self.emulator.fit(self.x_train, self.y_train) + if self.fit_from_reinitialized: + self.emulator = fit_from_reinitialized( + self.x_train, + self.y_train, + emulator=self.emulator, + device=self.emulator.device, + ) + else: + self.emulator.fit(self.x_train, self.y_train) self.logger.info("Emulator fitted with initial training data.") self.in_dim = self.x_train.shape[1] self.out_dim = self.y_train.shape[1] @@ -140,11 +149,9 @@ def fit(self, *args): x, output, extra = self.query(*args) if isinstance(output, TensorLike): y_pred = output - elif isinstance(output, GaussianLike): + elif isinstance(output, DistributionLike): assert output.variance.ndim == 2 - y_pred, _ = output.mean, output.variance - elif isinstance(output, GaussianLike): - y_pred, _ = output.loc, None + y_pred = output.mean else: msg = ( f"Output must be either `Tensor` or `MultivariateNormal` but got " @@ -159,7 +166,15 @@ def fit(self, *args): assert isinstance(y_true, TensorLike) self.x_train = torch.cat([self.x_train, x]) self.y_train = torch.cat([self.y_train, y_true]) - self.emulator.fit(self.x_train, self.y_train) + if self.fit_from_reinitialized: + self.emulator = fit_from_reinitialized( + self.x_train, + self.y_train, + emulator=self.emulator, + device=self.emulator.device, + ) + else: + self.emulator.fit(self.x_train, self.y_train) self.mse.update(y_pred, y_true) self.r2.update(y_pred, y_true) self.n_queries += 1 @@ -179,9 +194,16 @@ def fit(self, *args): self.metrics["n_queries"].append(self.n_queries) self.logger.info("Metrics updated: MSE=%s, R2=%s", mse_val, r2_val) - # If Gaussian output + # If distribution output # TODO: check generality for other GPs (e.g. with full covariance) - if isinstance(output, MultivariateNormal): + if isinstance(output, DistributionLike): + if not hasattr(output, "variance"): + msg = ( + f"Output of type {type(output)} does not have a 'variance'" + "property. This may occur if output is a PyTorch " + "TransformedDistribution." + ) + raise AttributeError(msg) assert isinstance(output.variance, TensorLike) assert output.variance.ndim == 2 assert output.variance.shape[1] == self.out_dim @@ -257,7 +279,11 @@ def summary(self): @abstractmethod def query( self, x: TensorLike | None = None - ) -> tuple[TensorLike | None, TensorLike | GaussianLike, dict[str, float]]: + ) -> tuple[ + TensorLike | None, + TensorLike | DistributionLike, + dict[str, float], + ]: """ Abstract method to query new samples. @@ -268,7 +294,7 @@ def query( Returns ------- - tuple[TensorLike or None, TensorLike, TensorLike, Dict[str, list[Any]]] + tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]] A tuple containing: - The queried samples (or None if no query is made), - The predicted outputs, diff --git a/autoemulate/learners/stream.py b/autoemulate/learners/stream.py index a939d62bd..b18cad72c 100644 --- a/autoemulate/learners/stream.py +++ b/autoemulate/learners/stream.py @@ -7,7 +7,7 @@ from autoemulate.data.utils import set_random_seed -from ..core.types import GaussianLike, TensorLike +from ..core.types import DistributionLike, TensorLike from .base import Active @@ -26,7 +26,7 @@ class Stream(Active): @abstractmethod def query( self, x: TensorLike | None = None - ) -> tuple[TensorLike | None, TensorLike | GaussianLike, dict[str, float]]: + ) -> tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]]: """ Abstract method to query new samples from a stream. @@ -37,7 +37,7 @@ def query( Returns ------- - Tuple[torch.Tensor or None, torch.Tensor, torch.Tensor, Dict[str, List[Any]]] + tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]] A tuple containing: - The queried samples (or None if no query is made), - The predicted outputs, @@ -64,9 +64,7 @@ def fit_samples(self, x: torch.Tensor): ) ): self.fit(xi.reshape(1, -1)) - pb.set_postfix( - ordered_dict={key: val[-1] for key, val in self.metrics.items()} - ) + pb.set_postfix(ordered_dict={k: v[-1] for k, v in self.metrics.items()}) def fit_batches(self, x: torch.Tensor, batch_size: int): """ @@ -114,7 +112,11 @@ def query( self, x: TensorLike | None = None, random_seed: int | None = None, - ) -> tuple[torch.Tensor | None, TensorLike | GaussianLike, dict[str, float]]: + ) -> tuple[ + torch.Tensor | None, + TensorLike | DistributionLike, + dict[str, float], + ]: """ Query new samples randomly based on a fixed probability. @@ -125,7 +127,7 @@ def query( Returns ------- - Tuple[torch.Tensor or None, torch.Tensor, torch.Tensor, Dict[str, List[Any]]] + tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]] A tuple containing: - The queried samples (or None if the random condition is not met), - The predicted outputs, @@ -135,8 +137,7 @@ def query( assert isinstance(x, TensorLike) # TODO: move handling to check method in base class output = self.emulator.predict(x) - assert isinstance(output, TensorLike | GaussianLike) - # assert isinstance(output, TensorLike | DistributionLike) + assert isinstance(output, TensorLike | DistributionLike) if random_seed is not None: set_random_seed(seed=random_seed) x = x if np.random.rand() < self.p_query else None @@ -199,7 +200,7 @@ def query( self, x: TensorLike | None = None ) -> tuple[ torch.Tensor | None, - torch.Tensor | GaussianLike, + torch.Tensor | DistributionLike, dict[str, float], ]: """ @@ -212,7 +213,7 @@ def query( Returns ------- - Tuple[torch.Tensor or None, torch.Tensor, torch.Tensor, Dict[str, List[Any]]] + tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]] A tuple containing: - The queried samples (or None if the score does not exceed the threshold), - The predicted outputs, @@ -222,7 +223,7 @@ def query( # TODO: move handling to check method in base class assert isinstance(x, torch.Tensor) output = self.emulator.predict(x) - assert isinstance(output, GaussianLike) + assert isinstance(output, DistributionLike) assert isinstance(output.variance, torch.Tensor) score = self.score(x, output.mean, output.variance) x = x if score > self.threshold else None @@ -262,13 +263,17 @@ def score( ---------- x: torch.Tensor Input samples. + y: torch.Tensor + Predicted outputs (not used). + Sigma: torch.Tensor + Covariance estimates (not used). Returns ------- float The average minimum distance. """ - _, _, _ = x, y, Sigma # Unused variables + _, _ = y, Sigma # Unused variables distances = torch.cdist(x, self.x_train) min_dists, _ = distances.min(dim=1) return min_dists.mean() @@ -306,11 +311,11 @@ def score( Parameters ---------- x: torch.Tensor - Input samples. + Input samples (not used). y: torch.Tensor Predicted outputs (not used). Sigma: torch.Tensor - Covariance estimates (not used). + Covariance estimates. Returns ------- @@ -342,9 +347,9 @@ def score( Parameters ---------- x: torch.Tensor - Input samples. + Input samples (not used). y: torch.Tensor - Predicted outputs. + Predicted outputs (not used). Sigma: torch.Tensor Covariance estimates. @@ -378,9 +383,9 @@ def score( Parameters ---------- x: torch.Tensor - Input samples. + Input samples (not used). y: torch.Tensor - Predicted outputs. + Predicted outputs (not used). Sigma: torch.Tensor Covariance estimates. @@ -457,7 +462,11 @@ def __post_init__(self): def query( self, x: TensorLike | None = None - ) -> tuple[TensorLike | None, TensorLike | GaussianLike, dict[str, float]]: + ) -> tuple[ + TensorLike | None, + TensorLike | DistributionLike, + dict[str, float], + ]: """ Query new samples based on the adaptive threshold. @@ -471,7 +480,7 @@ def query( Returns ------- - Tuple[torch.Tensor or None, torch.Tensor, torch.Tensor, Dict[str, List[Any]]] + tuple[TensorLike | None, TensorLike | DistributionLike, dict[str, float]] A tuple containing: - The queried samples, - The predicted outputs, diff --git a/docs/tutorials/simulator/02_active_learning.ipynb b/docs/tutorials/simulator/02_active_learning.ipynb index d6c63503b..768d4e825 100644 --- a/docs/tutorials/simulator/02_active_learning.ipynb +++ b/docs/tutorials/simulator/02_active_learning.ipynb @@ -205,8 +205,7 @@ " x_train, \n", " y_train, \n", " lr=lr, \n", - " posterior_predictive=True, \n", - " standardize_y=False\n", + " standardize_y=False,\n", " )\n", "\n", "emulator = make_gp(x_train, y_train)\n", @@ -265,7 +264,7 @@ "\n", "x_train = simulator.sample_inputs(5)\n", "y_train, _ = simulator.forward_batch(x_train)\n", - "emulator = make_gp(x_train, y_train, 0.01)\n", + "emulator = make_gp(x_train, y_train, 0.1)\n", "\n", "# Learner itself!\n", "learner = stream.Random(\n", @@ -603,10 +602,7 @@ "source": [ "### Summarizing Results\n", "\n", - "After all learners have been evaluated, we use the `compute_statistics` function to produce a compact summary of performance across trials.\n", - "\n", - "```python\n", - "compute_statistics(summary: List[Dict]) -> pd.DataFrame\n" + "After all learners have been evaluated, we use the `compute_statistics` function to produce a compact summary of performance across trials." ] }, { @@ -687,7 +683,7 @@ " n_stream_samples=500,\n", " adaptive_only=True,\n", " batch_size=None,\n", - " lr=2e-2\n", + " lr=0.1\n", ")" ] }, diff --git a/tests/callbacks/test_early_stopping.py b/tests/callbacks/test_early_stopping.py index 8ac4870f1..734e06212 100644 --- a/tests/callbacks/test_early_stopping.py +++ b/tests/callbacks/test_early_stopping.py @@ -3,9 +3,7 @@ import pytest import torch from autoemulate.callbacks.early_stopping import EarlyStopping -from autoemulate.emulators.gaussian_process.exact import ( - GaussianProcess, -) +from autoemulate.emulators.gaussian_process.exact import GaussianProcess from gpytorch.likelihoods import MultitaskGaussianLikelihood @@ -18,7 +16,6 @@ def gp_exact(): y=y, likelihood_cls=MultitaskGaussianLikelihood, epochs=5, - batch_size=2, lr=0.1, early_stopping=None, ) diff --git a/tests/emulators/test_base.py b/tests/emulators/test_base.py index cbc9201a9..ad9b5a8dd 100644 --- a/tests/emulators/test_base.py +++ b/tests/emulators/test_base.py @@ -52,7 +52,7 @@ def setup_method(self): Define the PyTorchBackend instance. """ self.model = self.DummyModel( - scheduler_cls=ExponentialLR, scheduler_kwargs={"gamma": 0.9} + scheduler_cls=ExponentialLR, scheduler_params={"gamma": 0.9} ) def test_model_name(self): @@ -152,16 +152,16 @@ def test_fit_predict_deterministic_with_seed(self): def test_scheduler_setup(self): # Should raise ValueError if kwargs is None - with pytest.raises(ValueError, match="Provide a kwargs dictionary including"): + with pytest.raises( + ValueError, match="Provide scheduler_params to set up the scheduler." + ): self.model.scheduler_setup(None) # Should raise RuntimeError if optimizer is missing model_no_opt = self.DummyModel() delattr(model_no_opt, "optimizer") with pytest.raises(RuntimeError, match="Optimizer must be set before"): - model_no_opt.scheduler_setup( - {"scheduler_cls": ExponentialLR, "scheduler_kwargs": {"gamma": 0.9}} - ) + model_no_opt.scheduler_setup({"gamma": 0.9}) # Should set scheduler to None if scheduler_cls is None model_none_sched = self.DummyModel() @@ -170,7 +170,7 @@ def test_scheduler_setup(self): model_none_sched.parameters(), lr=model_none_sched.lr, # type: ignore[call-arg] ) - model_none_sched.scheduler_setup({"scheduler_kwargs": {}}) + model_none_sched.scheduler_setup({}) assert model_none_sched.scheduler is None # Should set scheduler if scheduler_cls is valid @@ -180,5 +180,5 @@ def test_scheduler_setup(self): model_valid_sched.parameters(), lr=model_valid_sched.lr, # type: ignore[call-arg] ) - model_valid_sched.scheduler_setup({"scheduler_kwargs": {"gamma": 0.9}}) + model_valid_sched.scheduler_setup({"gamma": 0.9}) assert isinstance(model_valid_sched.scheduler, ExponentialLR) diff --git a/tests/learners/test_learners.py b/tests/learners/test_learners.py index 8ba52056e..7e88eded3 100644 --- a/tests/learners/test_learners.py +++ b/tests/learners/test_learners.py @@ -3,7 +3,7 @@ import numpy as np import torch from autoemulate.core.types import TensorLike -from autoemulate.emulators.gaussian_process.exact import GaussianProcess +from autoemulate.emulators.gaussian_process.exact import GaussianProcessRBF from autoemulate.learners import stream from autoemulate.simulations.base import Simulator from autoemulate.simulations.projectile import ProjectileMultioutput @@ -24,7 +24,7 @@ def learners( assert isinstance(y_train, TensorLike) yield stream.Random( simulator=simulator, - emulator=GaussianProcess(x_train, y_train, lr=0.001), + emulator=GaussianProcessRBF(x_train, y_train, lr=0.001), x_train=x_train, y_train=y_train, p_query=0.25, @@ -32,35 +32,35 @@ def learners( if not adaptive_only: yield stream.Distance( simulator=simulator, - emulator=GaussianProcess(x_train, y_train, lr=0.001), + emulator=GaussianProcessRBF(x_train, y_train, lr=0.001), x_train=x_train, y_train=y_train, threshold=0.5, ) yield stream.A_Optimal( simulator=simulator, - emulator=GaussianProcess(x_train, y_train, lr=0.001), + emulator=GaussianProcessRBF(x_train, y_train, lr=0.001), x_train=x_train, y_train=y_train, threshold=1.0, ) yield stream.D_Optimal( simulator=simulator, - emulator=GaussianProcess(x_train, y_train, lr=0.001), + emulator=GaussianProcessRBF(x_train, y_train, lr=0.001), x_train=x_train, y_train=y_train, threshold=-4.2, ) yield stream.E_Optimal( simulator=simulator, - emulator=GaussianProcess(x_train, y_train, lr=0.001), + emulator=GaussianProcessRBF(x_train, y_train, lr=0.001), x_train=x_train, y_train=y_train, threshold=1.0, ) yield stream.Adaptive_Distance( simulator=simulator, - emulator=GaussianProcess(x_train, y_train, lr=0.001), + emulator=GaussianProcessRBF(x_train, y_train, lr=0.001), x_train=x_train, y_train=y_train, threshold=0.5, @@ -75,7 +75,7 @@ def learners( ) yield stream.Adaptive_A_Optimal( simulator=simulator, - emulator=GaussianProcess(x_train, y_train, lr=0.001), + emulator=GaussianProcessRBF(x_train, y_train, lr=0.001), x_train=x_train, y_train=y_train, threshold=1e-1, @@ -90,7 +90,7 @@ def learners( ) yield stream.Adaptive_D_Optimal( simulator=simulator, - emulator=GaussianProcess(x_train, y_train, lr=0.001), + emulator=GaussianProcessRBF(x_train, y_train, lr=0.001), x_train=x_train, y_train=y_train, threshold=-4.0, @@ -105,7 +105,7 @@ def learners( ) yield stream.Adaptive_E_Optimal( simulator=simulator, - emulator=GaussianProcess(x_train, y_train, lr=0.001), + emulator=GaussianProcessRBF(x_train, y_train, lr=0.001), x_train=x_train, y_train=y_train, threshold=0.75 if isinstance(simulator, Sin) else 1000,