feat: Improve dashboard performance and Claude terminal context

- Add trial limiting (300 max) and reduce polling to 15s for large studies - Make dashboard layout wider with col-span adjustments - Claude terminal now runs from Atomizer root for CLAUDE.md/skills access - Add study context display in terminal on connect - Add KaTeX math rendering styles for study reports - Add surrogate tuner module for hyperparameter optimization - Fix backend proxy to port 8001 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-04 17:36:00 -05:00
parent 9eed4d81eb
commit f8b90156b3
13 changed files with 1481 additions and 141 deletions
--- a/optimization_engine/surrogate_tuner.py
+++ b/optimization_engine/surrogate_tuner.py
@@ -0,0 +1,800 @@
+"""
+Hyperparameter Tuning for Neural Network Surrogates
+
+This module provides automatic hyperparameter optimization for MLP surrogates
+using Optuna, with proper train/validation splits and early stopping.
+
+Key Features:
+1. Optuna-based hyperparameter search
+2. K-fold cross-validation
+3. Early stopping to prevent overfitting
+4. Ensemble model support
+5. Proper uncertainty quantification
+
+Usage:
+    from optimization_engine.surrogate_tuner import SurrogateHyperparameterTuner
+
+    tuner = SurrogateHyperparameterTuner(
+        input_dim=11,
+        output_dim=3,
+        n_trials=50
+    )
+    best_config = tuner.tune(X_train, Y_train)
+    model = tuner.create_tuned_model(best_config)
+"""
+
+import logging
+import numpy as np
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass, field
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+try:
+    import torch
+    import torch.nn as nn
+    from torch.utils.data import DataLoader, TensorDataset
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+    logger.warning("PyTorch not installed")
+
+try:
+    import optuna
+    from optuna.samplers import TPESampler
+    from optuna.pruners import MedianPruner
+    OPTUNA_AVAILABLE = True
+except ImportError:
+    OPTUNA_AVAILABLE = False
+    logger.warning("Optuna not installed")
+
+
+@dataclass
+class SurrogateConfig:
+    """Configuration for a tuned surrogate model."""
+    hidden_dims: List[int] = field(default_factory=lambda: [128, 256, 128])
+    dropout: float = 0.1
+    activation: str = 'relu'
+    use_batch_norm: bool = True
+    learning_rate: float = 1e-3
+    weight_decay: float = 1e-4
+    batch_size: int = 16
+    max_epochs: int = 500
+    early_stopping_patience: int = 30
+
+    # Normalization stats (filled during training)
+    input_mean: Optional[np.ndarray] = None
+    input_std: Optional[np.ndarray] = None
+    output_mean: Optional[np.ndarray] = None
+    output_std: Optional[np.ndarray] = None
+
+    # Validation metrics
+    val_loss: float = float('inf')
+    val_r2: Dict[str, float] = field(default_factory=dict)
+
+
+class TunableMLP(nn.Module):
+    """Flexible MLP with configurable architecture."""
+
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        hidden_dims: List[int],
+        dropout: float = 0.1,
+        activation: str = 'relu',
+        use_batch_norm: bool = True
+    ):
+        super().__init__()
+
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+
+        # Activation function
+        activations = {
+            'relu': nn.ReLU(),
+            'leaky_relu': nn.LeakyReLU(0.1),
+            'elu': nn.ELU(),
+            'selu': nn.SELU(),
+            'gelu': nn.GELU(),
+            'swish': nn.SiLU()
+        }
+        act_fn = activations.get(activation, nn.ReLU())
+
+        # Build layers
+        layers = []
+        prev_dim = input_dim
+
+        for hidden_dim in hidden_dims:
+            layers.append(nn.Linear(prev_dim, hidden_dim))
+            if use_batch_norm:
+                layers.append(nn.BatchNorm1d(hidden_dim))
+            layers.append(act_fn)
+            if dropout > 0:
+                layers.append(nn.Dropout(dropout))
+            prev_dim = hidden_dim
+
+        layers.append(nn.Linear(prev_dim, output_dim))
+        self.network = nn.Sequential(*layers)
+
+        self._init_weights()
+
+    def _init_weights(self):
+        """Initialize weights using Kaiming initialization."""
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        return self.network(x)
+
+
+class EarlyStopping:
+    """Early stopping to prevent overfitting."""
+
+    def __init__(self, patience: int = 20, min_delta: float = 1e-5):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.counter = 0
+        self.best_loss = float('inf')
+        self.best_model_state = None
+        self.should_stop = False
+
+    def __call__(self, val_loss: float, model: nn.Module) -> bool:
+        if val_loss < self.best_loss - self.min_delta:
+            self.best_loss = val_loss
+            self.best_model_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
+            self.counter = 0
+        else:
+            self.counter += 1
+            if self.counter >= self.patience:
+                self.should_stop = True
+
+        return self.should_stop
+
+    def restore_best(self, model: nn.Module):
+        """Restore model to best state."""
+        if self.best_model_state is not None:
+            model.load_state_dict(self.best_model_state)
+
+
+class SurrogateHyperparameterTuner:
+    """
+    Automatic hyperparameter tuning for neural network surrogates.
+
+    Uses Optuna for Bayesian optimization of:
+    - Network architecture (layers, widths)
+    - Regularization (dropout, weight decay)
+    - Learning rate and batch size
+    - Activation functions
+    """
+
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        n_trials: int = 50,
+        n_cv_folds: int = 5,
+        device: str = 'auto',
+        seed: int = 42,
+        timeout_seconds: Optional[int] = None
+    ):
+        """
+        Initialize hyperparameter tuner.
+
+        Args:
+            input_dim: Number of input features (design variables)
+            output_dim: Number of outputs (objectives)
+            n_trials: Number of Optuna trials for hyperparameter search
+            n_cv_folds: Number of cross-validation folds
+            device: Computing device ('cuda', 'cpu', or 'auto')
+            seed: Random seed for reproducibility
+            timeout_seconds: Optional timeout for tuning
+        """
+        if not TORCH_AVAILABLE:
+            raise ImportError("PyTorch required for surrogate tuning")
+        if not OPTUNA_AVAILABLE:
+            raise ImportError("Optuna required for hyperparameter tuning")
+
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.n_trials = n_trials
+        self.n_cv_folds = n_cv_folds
+        self.seed = seed
+        self.timeout = timeout_seconds
+
+        if device == 'auto':
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        else:
+            self.device = torch.device(device)
+
+        self.best_config: Optional[SurrogateConfig] = None
+        self.study: Optional[optuna.Study] = None
+
+        # Set seeds
+        np.random.seed(seed)
+        torch.manual_seed(seed)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed(seed)
+
+    def _suggest_hyperparameters(self, trial: optuna.Trial) -> SurrogateConfig:
+        """Suggest hyperparameters for a trial."""
+
+        # Architecture
+        n_layers = trial.suggest_int('n_layers', 2, 5)
+        hidden_dims = []
+        for i in range(n_layers):
+            dim = trial.suggest_int(f'hidden_dim_{i}', 32, 512, step=32)
+            hidden_dims.append(dim)
+
+        # Regularization
+        dropout = trial.suggest_float('dropout', 0.0, 0.5)
+        weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
+
+        # Training
+        learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
+        batch_size = trial.suggest_categorical('batch_size', [8, 16, 32, 64])
+
+        # Activation
+        activation = trial.suggest_categorical('activation',
+            ['relu', 'leaky_relu', 'elu', 'gelu', 'swish'])
+
+        # Batch norm
+        use_batch_norm = trial.suggest_categorical('use_batch_norm', [True, False])
+
+        return SurrogateConfig(
+            hidden_dims=hidden_dims,
+            dropout=dropout,
+            activation=activation,
+            use_batch_norm=use_batch_norm,
+            learning_rate=learning_rate,
+            weight_decay=weight_decay,
+            batch_size=batch_size
+        )
+
+    def _train_fold(
+        self,
+        config: SurrogateConfig,
+        X_train: np.ndarray,
+        Y_train: np.ndarray,
+        X_val: np.ndarray,
+        Y_val: np.ndarray,
+        trial: Optional[optuna.Trial] = None
+    ) -> Tuple[float, Dict[str, float]]:
+        """Train model on one fold and return validation metrics."""
+
+        # Create model
+        model = TunableMLP(
+            input_dim=self.input_dim,
+            output_dim=self.output_dim,
+            hidden_dims=config.hidden_dims,
+            dropout=config.dropout,
+            activation=config.activation,
+            use_batch_norm=config.use_batch_norm
+        ).to(self.device)
+
+        # Prepare data
+        X_train_t = torch.tensor(X_train, dtype=torch.float32, device=self.device)
+        Y_train_t = torch.tensor(Y_train, dtype=torch.float32, device=self.device)
+        X_val_t = torch.tensor(X_val, dtype=torch.float32, device=self.device)
+        Y_val_t = torch.tensor(Y_val, dtype=torch.float32, device=self.device)
+
+        train_dataset = TensorDataset(X_train_t, Y_train_t)
+        train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
+
+        # Optimizer and scheduler
+        optimizer = torch.optim.AdamW(
+            model.parameters(),
+            lr=config.learning_rate,
+            weight_decay=config.weight_decay
+        )
+        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+            optimizer, T_max=config.max_epochs
+        )
+
+        early_stopping = EarlyStopping(patience=config.early_stopping_patience)
+
+        # Training loop
+        for epoch in range(config.max_epochs):
+            model.train()
+            for X_batch, Y_batch in train_loader:
+                optimizer.zero_grad()
+                pred = model(X_batch)
+                loss = nn.functional.mse_loss(pred, Y_batch)
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
+                optimizer.step()
+
+            scheduler.step()
+
+            # Validation
+            model.eval()
+            with torch.no_grad():
+                val_pred = model(X_val_t)
+                val_loss = nn.functional.mse_loss(val_pred, Y_val_t).item()
+
+            # Early stopping
+            if early_stopping(val_loss, model):
+                break
+
+            # Optuna pruning (only report once per epoch across all folds)
+            if trial is not None and epoch % 10 == 0:
+                trial.report(val_loss, epoch // 10)
+                if trial.should_prune():
+                    raise optuna.TrialPruned()
+
+        # Restore best model
+        early_stopping.restore_best(model)
+
+        # Final validation metrics
+        model.eval()
+        with torch.no_grad():
+            val_pred = model(X_val_t).cpu().numpy()
+            Y_val_np = Y_val_t.cpu().numpy()
+
+            val_loss = float(np.mean((val_pred - Y_val_np) ** 2))
+
+            # R² per output
+            r2_scores = {}
+            for i in range(self.output_dim):
+                ss_res = np.sum((Y_val_np[:, i] - val_pred[:, i]) ** 2)
+                ss_tot = np.sum((Y_val_np[:, i] - Y_val_np[:, i].mean()) ** 2)
+                r2 = 1 - ss_res / ss_tot if ss_tot > 0 else 0
+                r2_scores[f'output_{i}'] = r2
+
+        return val_loss, r2_scores
+
+    def _cross_validate(
+        self,
+        config: SurrogateConfig,
+        X: np.ndarray,
+        Y: np.ndarray,
+        trial: Optional[optuna.Trial] = None
+    ) -> Tuple[float, Dict[str, float]]:
+        """Perform k-fold cross-validation."""
+
+        n_samples = len(X)
+        indices = np.random.permutation(n_samples)
+        fold_size = n_samples // self.n_cv_folds
+
+        fold_losses = []
+        fold_r2s = {f'output_{i}': [] for i in range(self.output_dim)}
+
+        for fold in range(self.n_cv_folds):
+            # Split indices
+            val_start = fold * fold_size
+            val_end = val_start + fold_size if fold < self.n_cv_folds - 1 else n_samples
+
+            val_indices = indices[val_start:val_end]
+            train_indices = np.concatenate([indices[:val_start], indices[val_end:]])
+
+            X_train, Y_train = X[train_indices], Y[train_indices]
+            X_val, Y_val = X[val_indices], Y[val_indices]
+
+            # Skip fold if too few samples
+            if len(X_train) < 10 or len(X_val) < 2:
+                continue
+
+            val_loss, r2_scores = self._train_fold(
+                config, X_train, Y_train, X_val, Y_val, trial
+            )
+
+            fold_losses.append(val_loss)
+            for key, val in r2_scores.items():
+                fold_r2s[key].append(val)
+
+        mean_loss = np.mean(fold_losses)
+        mean_r2 = {k: np.mean(v) for k, v in fold_r2s.items()}
+
+        return mean_loss, mean_r2
+
+    def tune(
+        self,
+        X: np.ndarray,
+        Y: np.ndarray,
+        output_names: Optional[List[str]] = None
+    ) -> SurrogateConfig:
+        """
+        Tune hyperparameters using Optuna.
+
+        Args:
+            X: Input features [n_samples, input_dim]
+            Y: Outputs [n_samples, output_dim]
+            output_names: Optional names for outputs (for logging)
+
+        Returns:
+            Best SurrogateConfig found
+        """
+        logger.info(f"Starting hyperparameter tuning with {self.n_trials} trials...")
+        logger.info(f"Data: {len(X)} samples, {self.n_cv_folds}-fold CV")
+
+        # Normalize data
+        self.input_mean = X.mean(axis=0)
+        self.input_std = X.std(axis=0) + 1e-8
+        self.output_mean = Y.mean(axis=0)
+        self.output_std = Y.std(axis=0) + 1e-8
+
+        X_norm = (X - self.input_mean) / self.input_std
+        Y_norm = (Y - self.output_mean) / self.output_std
+
+        def objective(trial: optuna.Trial) -> float:
+            config = self._suggest_hyperparameters(trial)
+            val_loss, r2_scores = self._cross_validate(config, X_norm, Y_norm, trial)
+
+            # Log R² scores
+            for key, val in r2_scores.items():
+                trial.set_user_attr(f'r2_{key}', val)
+
+            return val_loss
+
+        # Create study
+        self.study = optuna.create_study(
+            direction='minimize',
+            sampler=TPESampler(seed=self.seed, n_startup_trials=10),
+            pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=20)
+        )
+
+        self.study.optimize(
+            objective,
+            n_trials=self.n_trials,
+            timeout=self.timeout,
+            show_progress_bar=True,
+            catch=(RuntimeError,)  # Catch GPU OOM errors
+        )
+
+        # Build best config
+        best_trial = self.study.best_trial
+        self.best_config = self._suggest_hyperparameters_from_params(best_trial.params)
+        self.best_config.val_loss = best_trial.value
+        self.best_config.val_r2 = {
+            k.replace('r2_', ''): v
+            for k, v in best_trial.user_attrs.items()
+            if k.startswith('r2_')
+        }
+
+        # Store normalization
+        self.best_config.input_mean = self.input_mean
+        self.best_config.input_std = self.input_std
+        self.best_config.output_mean = self.output_mean
+        self.best_config.output_std = self.output_std
+
+        # Log results
+        logger.info(f"\nBest hyperparameters found:")
+        logger.info(f"  Hidden dims: {self.best_config.hidden_dims}")
+        logger.info(f"  Dropout: {self.best_config.dropout:.3f}")
+        logger.info(f"  Activation: {self.best_config.activation}")
+        logger.info(f"  Batch norm: {self.best_config.use_batch_norm}")
+        logger.info(f"  Learning rate: {self.best_config.learning_rate:.2e}")
+        logger.info(f"  Weight decay: {self.best_config.weight_decay:.2e}")
+        logger.info(f"  Batch size: {self.best_config.batch_size}")
+        logger.info(f"  Validation loss: {self.best_config.val_loss:.6f}")
+
+        if output_names:
+            for i, name in enumerate(output_names):
+                r2 = self.best_config.val_r2.get(f'output_{i}', 0)
+                logger.info(f"  {name} R² (CV): {r2:.4f}")
+
+        return self.best_config
+
+    def _suggest_hyperparameters_from_params(self, params: Dict[str, Any]) -> SurrogateConfig:
+        """Reconstruct config from Optuna params dict."""
+        n_layers = params['n_layers']
+        hidden_dims = [params[f'hidden_dim_{i}'] for i in range(n_layers)]
+
+        return SurrogateConfig(
+            hidden_dims=hidden_dims,
+            dropout=params['dropout'],
+            activation=params['activation'],
+            use_batch_norm=params['use_batch_norm'],
+            learning_rate=params['learning_rate'],
+            weight_decay=params['weight_decay'],
+            batch_size=params['batch_size']
+        )
+
+    def create_tuned_model(
+        self,
+        config: Optional[SurrogateConfig] = None
+    ) -> TunableMLP:
+        """Create a model with tuned hyperparameters."""
+        if config is None:
+            config = self.best_config
+        if config is None:
+            raise ValueError("No config available. Run tune() first.")
+
+        return TunableMLP(
+            input_dim=self.input_dim,
+            output_dim=self.output_dim,
+            hidden_dims=config.hidden_dims,
+            dropout=config.dropout,
+            activation=config.activation,
+            use_batch_norm=config.use_batch_norm
+        )
+
+
+class TunedEnsembleSurrogate:
+    """
+    Ensemble of tuned surrogate models for better uncertainty quantification.
+
+    Trains multiple models with different random seeds and aggregates predictions.
+    """
+
+    def __init__(
+        self,
+        config: SurrogateConfig,
+        input_dim: int,
+        output_dim: int,
+        n_models: int = 5,
+        device: str = 'auto'
+    ):
+        """
+        Initialize ensemble surrogate.
+
+        Args:
+            config: Tuned configuration to use for all models
+            input_dim: Number of input features
+            output_dim: Number of outputs
+            n_models: Number of models in ensemble
+            device: Computing device
+        """
+        self.config = config
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.n_models = n_models
+
+        if device == 'auto':
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        else:
+            self.device = torch.device(device)
+
+        self.models: List[TunableMLP] = []
+        self.trained = False
+
+    def train(self, X: np.ndarray, Y: np.ndarray, val_split: float = 0.2):
+        """Train all models in the ensemble."""
+        logger.info(f"Training ensemble of {self.n_models} models...")
+
+        # Normalize using config stats
+        X_norm = (X - self.config.input_mean) / self.config.input_std
+        Y_norm = (Y - self.config.output_mean) / self.config.output_std
+
+        # Split data
+        n_val = int(len(X) * val_split)
+        indices = np.random.permutation(len(X))
+        train_idx, val_idx = indices[n_val:], indices[:n_val]
+
+        X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
+        X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]
+
+        X_train_t = torch.tensor(X_train, dtype=torch.float32, device=self.device)
+        Y_train_t = torch.tensor(Y_train, dtype=torch.float32, device=self.device)
+        X_val_t = torch.tensor(X_val, dtype=torch.float32, device=self.device)
+        Y_val_t = torch.tensor(Y_val, dtype=torch.float32, device=self.device)
+
+        train_dataset = TensorDataset(X_train_t, Y_train_t)
+
+        self.models = []
+
+        for i in range(self.n_models):
+            torch.manual_seed(42 + i)
+
+            model = TunableMLP(
+                input_dim=self.input_dim,
+                output_dim=self.output_dim,
+                hidden_dims=self.config.hidden_dims,
+                dropout=self.config.dropout,
+                activation=self.config.activation,
+                use_batch_norm=self.config.use_batch_norm
+            ).to(self.device)
+
+            optimizer = torch.optim.AdamW(
+                model.parameters(),
+                lr=self.config.learning_rate,
+                weight_decay=self.config.weight_decay
+            )
+            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+                optimizer, T_max=self.config.max_epochs
+            )
+
+            train_loader = DataLoader(
+                train_dataset,
+                batch_size=self.config.batch_size,
+                shuffle=True
+            )
+            early_stopping = EarlyStopping(patience=self.config.early_stopping_patience)
+
+            for epoch in range(self.config.max_epochs):
+                model.train()
+                for X_batch, Y_batch in train_loader:
+                    optimizer.zero_grad()
+                    pred = model(X_batch)
+                    loss = nn.functional.mse_loss(pred, Y_batch)
+                    loss.backward()
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
+                    optimizer.step()
+
+                scheduler.step()
+
+                model.eval()
+                with torch.no_grad():
+                    val_pred = model(X_val_t)
+                    val_loss = nn.functional.mse_loss(val_pred, Y_val_t).item()
+
+                if early_stopping(val_loss, model):
+                    break
+
+            early_stopping.restore_best(model)
+            model.eval()
+            self.models.append(model)
+
+            logger.info(f"  Model {i+1}/{self.n_models}: val_loss = {early_stopping.best_loss:.6f}")
+
+        self.trained = True
+        logger.info("Ensemble training complete")
+
+    def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Predict with uncertainty estimation.
+
+        Args:
+            X: Input features [n_samples, input_dim]
+
+        Returns:
+            Tuple of (mean_predictions, std_predictions)
+        """
+        if not self.trained:
+            raise RuntimeError("Ensemble not trained. Call train() first.")
+
+        # Normalize input
+        X_norm = (X - self.config.input_mean) / self.config.input_std
+        X_t = torch.tensor(X_norm, dtype=torch.float32, device=self.device)
+
+        # Collect predictions from all models
+        predictions = []
+        for model in self.models:
+            model.eval()
+            with torch.no_grad():
+                pred = model(X_t).cpu().numpy()
+                # Denormalize
+                pred = pred * self.config.output_std + self.config.output_mean
+                predictions.append(pred)
+
+        predictions = np.array(predictions)  # [n_models, n_samples, output_dim]
+
+        mean_pred = predictions.mean(axis=0)
+        std_pred = predictions.std(axis=0)
+
+        return mean_pred, std_pred
+
+    def predict_single(self, params: Dict[str, float], var_names: List[str]) -> Tuple[Dict[str, float], float]:
+        """
+        Predict for a single point with uncertainty.
+
+        Args:
+            params: Dictionary of input parameters
+            var_names: List of variable names in order
+
+        Returns:
+            Tuple of (predictions dict, total uncertainty)
+        """
+        X = np.array([[params[name] for name in var_names]])
+        mean, std = self.predict(X)
+
+        pred_dict = {f'output_{i}': mean[0, i] for i in range(self.output_dim)}
+        uncertainty = float(np.sum(std[0]))
+
+        return pred_dict, uncertainty
+
+    def save(self, path: Path):
+        """Save ensemble to disk."""
+        state = {
+            'config': {
+                'hidden_dims': self.config.hidden_dims,
+                'dropout': self.config.dropout,
+                'activation': self.config.activation,
+                'use_batch_norm': self.config.use_batch_norm,
+                'learning_rate': self.config.learning_rate,
+                'weight_decay': self.config.weight_decay,
+                'batch_size': self.config.batch_size,
+                'input_mean': self.config.input_mean.tolist(),
+                'input_std': self.config.input_std.tolist(),
+                'output_mean': self.config.output_mean.tolist(),
+                'output_std': self.config.output_std.tolist(),
+            },
+            'n_models': self.n_models,
+            'model_states': [m.state_dict() for m in self.models]
+        }
+        torch.save(state, path)
+        logger.info(f"Saved ensemble to {path}")
+
+    def load(self, path: Path):
+        """Load ensemble from disk."""
+        state = torch.load(path, map_location=self.device)
+
+        # Restore config
+        cfg = state['config']
+        self.config = SurrogateConfig(
+            hidden_dims=cfg['hidden_dims'],
+            dropout=cfg['dropout'],
+            activation=cfg['activation'],
+            use_batch_norm=cfg['use_batch_norm'],
+            learning_rate=cfg['learning_rate'],
+            weight_decay=cfg['weight_decay'],
+            batch_size=cfg['batch_size'],
+            input_mean=np.array(cfg['input_mean']),
+            input_std=np.array(cfg['input_std']),
+            output_mean=np.array(cfg['output_mean']),
+            output_std=np.array(cfg['output_std'])
+        )
+
+        self.n_models = state['n_models']
+        self.models = []
+
+        for model_state in state['model_states']:
+            model = TunableMLP(
+                input_dim=self.input_dim,
+                output_dim=self.output_dim,
+                hidden_dims=self.config.hidden_dims,
+                dropout=self.config.dropout,
+                activation=self.config.activation,
+                use_batch_norm=self.config.use_batch_norm
+            ).to(self.device)
+            model.load_state_dict(model_state)
+            model.eval()
+            self.models.append(model)
+
+        self.trained = True
+        logger.info(f"Loaded ensemble with {self.n_models} models from {path}")
+
+
+def tune_surrogate_for_study(
+    fea_data: List[Dict],
+    design_var_names: List[str],
+    objective_names: List[str],
+    n_tuning_trials: int = 50,
+    n_ensemble_models: int = 5
+) -> TunedEnsembleSurrogate:
+    """
+    Convenience function to tune and create ensemble surrogate.
+
+    Args:
+        fea_data: List of FEA results with 'params' and 'objectives' keys
+        design_var_names: List of design variable names
+        objective_names: List of objective names
+        n_tuning_trials: Number of Optuna trials
+        n_ensemble_models: Number of models in ensemble
+
+    Returns:
+        Trained TunedEnsembleSurrogate
+    """
+    # Prepare data
+    X = np.array([[d['params'][name] for name in design_var_names] for d in fea_data])
+    Y = np.array([[d['objectives'][name] for name in objective_names] for d in fea_data])
+
+    logger.info(f"Tuning surrogate on {len(X)} samples...")
+    logger.info(f"Input: {len(design_var_names)} design variables")
+    logger.info(f"Output: {len(objective_names)} objectives")
+
+    # Tune hyperparameters
+    tuner = SurrogateHyperparameterTuner(
+        input_dim=len(design_var_names),
+        output_dim=len(objective_names),
+        n_trials=n_tuning_trials,
+        n_cv_folds=5
+    )
+
+    best_config = tuner.tune(X, Y, output_names=objective_names)
+
+    # Create and train ensemble
+    ensemble = TunedEnsembleSurrogate(
+        config=best_config,
+        input_dim=len(design_var_names),
+        output_dim=len(objective_names),
+        n_models=n_ensemble_models
+    )
+
+    ensemble.train(X, Y)
+
+    return ensemble