#!/usr/bin/env python3 """ Ensemble Surrogate with Uncertainty Quantification Addresses the V5 failure mode where single MLPs gave overconfident predictions in out-of-distribution regions, leading L-BFGS to fake optima. Key features: 1. Ensemble of N MLPs - disagreement = uncertainty 2. OOD detection - reject predictions far from training data 3. Confidence bounds - never trust point predictions alone 4. Active learning - prioritize FEA in uncertain regions Author: Atomizer Created: 2025-12-28 """ import numpy as np from typing import Tuple, List, Dict, Optional from pathlib import Path import json import logging logger = logging.getLogger(__name__) try: import torch import torch.nn as nn HAS_TORCH = True except ImportError: HAS_TORCH = False logger.warning("PyTorch not available - ensemble features limited") from sklearn.neighbors import NearestNeighbors class MLP(nn.Module): """Single MLP for ensemble.""" def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = None): super().__init__() hidden_dims = hidden_dims or [64, 32] layers = [] in_dim = input_dim for h_dim in hidden_dims: layers.append(nn.Linear(in_dim, h_dim)) layers.append(nn.ReLU()) layers.append(nn.Dropout(0.1)) in_dim = h_dim layers.append(nn.Linear(in_dim, output_dim)) self.net = nn.Sequential(*layers) def forward(self, x): return self.net(x) class OODDetector: """ Out-of-Distribution detector using multiple methods: 1. Z-score check (is input within N std of training mean) 2. KNN distance (is input close to training points) """ def __init__(self, X_train: np.ndarray, z_threshold: float = 3.0, knn_k: int = 5): self.X_train = X_train self.z_threshold = z_threshold self.knn_k = knn_k # Compute training statistics self.mean = X_train.mean(axis=0) self.std = X_train.std(axis=0) + 1e-8 # Fit KNN for local density estimation self.knn = NearestNeighbors(n_neighbors=min(knn_k, len(X_train))) self.knn.fit(X_train) # Compute typical KNN distances in training set train_distances, _ = self.knn.kneighbors(X_train) self.typical_knn_dist = np.median(train_distances.mean(axis=1)) logger.info(f"[OOD] Initialized with {len(X_train)} training points") logger.info(f"[OOD] Typical KNN distance: {self.typical_knn_dist:.4f}") def z_score_check(self, x: np.ndarray) -> Tuple[bool, float]: """Check if point is within z_threshold std of training mean.""" x = np.atleast_2d(x) z_scores = np.abs((x - self.mean) / self.std) max_z = z_scores.max(axis=1) is_ok = max_z < self.z_threshold return is_ok[0] if len(is_ok) == 1 else is_ok, max_z[0] if len(max_z) == 1 else max_z def knn_distance_check(self, x: np.ndarray) -> Tuple[bool, float]: """Check if point is close enough to training data.""" x = np.atleast_2d(x) distances, _ = self.knn.kneighbors(x) avg_dist = distances.mean(axis=1) # Allow up to 3x typical distance is_ok = avg_dist < 3 * self.typical_knn_dist return is_ok[0] if len(is_ok) == 1 else is_ok, avg_dist[0] if len(avg_dist) == 1 else avg_dist def is_in_distribution(self, x: np.ndarray) -> Tuple[bool, Dict]: """Combined OOD check.""" z_ok, z_score = self.z_score_check(x) knn_ok, knn_dist = self.knn_distance_check(x) is_ok = z_ok and knn_ok details = { 'z_score': float(z_score), 'z_ok': bool(z_ok), 'knn_dist': float(knn_dist), 'knn_ok': bool(knn_ok), 'in_distribution': bool(is_ok) } return is_ok, details class EnsembleSurrogate: """ Ensemble of MLPs with uncertainty quantification. Key insight: Models trained with different random seeds will agree in well-sampled regions but disagree in extrapolation regions. Disagreement = epistemic uncertainty. """ def __init__( self, input_dim: int, output_dim: int, n_models: int = 5, hidden_dims: List[int] = None, device: str = 'auto' ): self.input_dim = input_dim self.output_dim = output_dim self.n_models = n_models self.hidden_dims = hidden_dims or [64, 32] if device == 'auto': self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') else: self.device = torch.device(device) # Create ensemble self.models = [ MLP(input_dim, output_dim, hidden_dims).to(self.device) for _ in range(n_models) ] # Normalization stats self.x_mean = None self.x_std = None self.y_mean = None self.y_std = None # OOD detector self.ood_detector = None # Training state self.is_trained = False logger.info(f"[ENSEMBLE] Created {n_models} MLPs on {self.device}") def train( self, X: np.ndarray, Y: np.ndarray, epochs: int = 500, lr: float = 0.001, val_split: float = 0.1, patience: int = 50 ) -> Dict: """Train all models in ensemble with different random seeds.""" # Compute normalization self.x_mean = X.mean(axis=0) self.x_std = X.std(axis=0) + 1e-8 self.y_mean = Y.mean(axis=0) self.y_std = Y.std(axis=0) + 1e-8 X_norm = (X - self.x_mean) / self.x_std Y_norm = (Y - self.y_mean) / self.y_std # Split data n_val = max(int(len(X) * val_split), 5) indices = np.random.permutation(len(X)) val_idx, train_idx = indices[:n_val], indices[n_val:] X_train, Y_train = X_norm[train_idx], Y_norm[train_idx] X_val, Y_val = X_norm[val_idx], Y_norm[val_idx] # Convert to tensors X_t = torch.FloatTensor(X_train).to(self.device) Y_t = torch.FloatTensor(Y_train).to(self.device) X_v = torch.FloatTensor(X_val).to(self.device) Y_v = torch.FloatTensor(Y_val).to(self.device) # Train each model with different seed all_val_losses = [] for i, model in enumerate(self.models): torch.manual_seed(42 + i * 1000) # Different init per model np.random.seed(42 + i * 1000) optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4) criterion = nn.MSELoss() best_val_loss = float('inf') patience_counter = 0 best_state = None for epoch in range(epochs): # Train model.train() optimizer.zero_grad() pred = model(X_t) loss = criterion(pred, Y_t) loss.backward() optimizer.step() # Validate model.eval() with torch.no_grad(): val_pred = model(X_v) val_loss = criterion(val_pred, Y_v).item() # Early stopping if val_loss < best_val_loss: best_val_loss = val_loss patience_counter = 0 best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()} else: patience_counter += 1 if patience_counter >= patience: break # Restore best if best_state: model.load_state_dict(best_state) model.to(self.device) all_val_losses.append(best_val_loss) logger.info(f"[ENSEMBLE] Model {i+1}/{self.n_models} trained, val_loss={best_val_loss:.4f}") # Initialize OOD detector self.ood_detector = OODDetector(X_norm) self.is_trained = True # Compute ensemble metrics metrics = self._compute_metrics(X_val, Y_val) metrics['val_losses'] = all_val_losses return metrics def _compute_metrics(self, X_val: np.ndarray, Y_val: np.ndarray) -> Dict: """Compute R², MAE, and ensemble disagreement on validation set.""" mean, std = self.predict_normalized(X_val) # R² for each output ss_res = np.sum((Y_val - mean) ** 2, axis=0) ss_tot = np.sum((Y_val - Y_val.mean(axis=0)) ** 2, axis=0) r2 = 1 - ss_res / (ss_tot + 1e-8) # MAE mae = np.abs(Y_val - mean).mean(axis=0) # Average ensemble disagreement avg_std = std.mean() return { 'r2': r2.tolist(), 'mae': mae.tolist(), 'avg_ensemble_std': float(avg_std), 'n_val': len(X_val) } def predict_normalized(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Predict on normalized inputs, return normalized outputs.""" X = np.atleast_2d(X) X_t = torch.FloatTensor(X).to(self.device) preds = [] for model in self.models: model.eval() with torch.no_grad(): pred = model(X_t).cpu().numpy() preds.append(pred) preds = np.array(preds) # (n_models, n_samples, n_outputs) mean = preds.mean(axis=0) std = preds.std(axis=0) return mean, std def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Predict with uncertainty. Returns: mean: (n_samples, n_outputs) predicted values std: (n_samples, n_outputs) uncertainty (ensemble disagreement) """ X = np.atleast_2d(X) # Normalize input X_norm = (X - self.x_mean) / self.x_std # Get predictions mean_norm, std_norm = self.predict_normalized(X_norm) # Denormalize mean = mean_norm * self.y_std + self.y_mean std = std_norm * self.y_std # Std scales with y_std return mean, std def predict_with_confidence(self, X: np.ndarray) -> Dict: """ Full prediction with confidence assessment. Returns dict with: - mean: predicted values - std: uncertainty - confidence: 0-1 score (higher = more reliable) - in_distribution: OOD check result - recommendation: 'trust', 'verify', or 'reject' """ X = np.atleast_2d(X) mean, std = self.predict(X) # OOD check X_norm = (X - self.x_mean) / self.x_std ood_results = [self.ood_detector.is_in_distribution(x) for x in X_norm] in_distribution = [r[0] for r in ood_results] # Compute confidence score (0 = no confidence, 1 = high confidence) # Based on: relative std (lower = better) and OOD (in = better) relative_std = std / (np.abs(mean) + 1e-6) avg_rel_std = relative_std.mean(axis=1) confidence = np.zeros(len(X)) for i in range(len(X)): if not in_distribution[i]: confidence[i] = 0.0 # OOD = no confidence elif avg_rel_std[i] > 0.3: confidence[i] = 0.2 # High uncertainty elif avg_rel_std[i] > 0.1: confidence[i] = 0.5 # Medium uncertainty else: confidence[i] = 0.9 # Low uncertainty # Recommendations recommendations = [] for i in range(len(X)): if confidence[i] >= 0.7: recommendations.append('trust') elif confidence[i] >= 0.3: recommendations.append('verify') # Run FEA to check else: recommendations.append('reject') # Don't use, run FEA instead return { 'mean': mean, 'std': std, 'confidence': confidence, 'in_distribution': in_distribution, 'recommendation': recommendations } def acquisition_score(self, X: np.ndarray, best_so_far: float, xi: float = 0.01) -> np.ndarray: """ Expected Improvement acquisition function. High score = worth running FEA (either promising or uncertain). Args: X: candidate points best_so_far: current best objective value xi: exploration-exploitation tradeoff (higher = more exploration) Returns: scores: acquisition score per point """ X = np.atleast_2d(X) mean, std = self.predict(X) # For minimization: improvement = best - predicted # Take first objective (weighted sum) for acquisition if mean.ndim > 1: mean = mean[:, 0] std = std[:, 0] improvement = best_so_far - mean # Expected improvement with exploration bonus # Higher std = more exploration value z = improvement / (std + 1e-8) # Simple acquisition: exploitation + exploration scores = improvement + xi * std # Penalize OOD points X_norm = (X - self.x_mean) / self.x_std for i, x in enumerate(X_norm): is_ok, _ = self.ood_detector.is_in_distribution(x) if not is_ok: scores[i] *= 0.1 # Heavy penalty for OOD return scores def select_candidates_for_fea( self, candidates: np.ndarray, best_so_far: float, n_select: int = 5, diversity_weight: float = 0.3 ) -> Tuple[np.ndarray, np.ndarray]: """ Select diverse, high-acquisition candidates for FEA validation. Balances: 1. High acquisition score (exploitation + exploration) 2. Diversity (don't cluster all candidates together) 3. In-distribution (avoid OOD predictions) Returns: selected: indices of selected candidates scores: acquisition scores """ scores = self.acquisition_score(candidates, best_so_far) # Greedy selection with diversity selected = [] remaining = list(range(len(candidates))) while len(selected) < n_select and remaining: if not selected: # First: pick highest score best_idx = max(remaining, key=lambda i: scores[i]) else: # Later: balance score with distance to selected def combined_score(i): # Min distance to already selected min_dist = min( np.linalg.norm(candidates[i] - candidates[j]) for j in selected ) # Combine acquisition + diversity return scores[i] + diversity_weight * min_dist best_idx = max(remaining, key=combined_score) selected.append(best_idx) remaining.remove(best_idx) return np.array(selected), scores[selected] def save(self, path: Path): """Save ensemble to disk.""" path = Path(path) path.mkdir(parents=True, exist_ok=True) # Save each model for i, model in enumerate(self.models): torch.save(model.state_dict(), path / f"model_{i}.pt") # Save normalization stats and config config = { 'input_dim': self.input_dim, 'output_dim': self.output_dim, 'n_models': self.n_models, 'hidden_dims': self.hidden_dims, 'x_mean': self.x_mean.tolist() if self.x_mean is not None else None, 'x_std': self.x_std.tolist() if self.x_std is not None else None, 'y_mean': self.y_mean.tolist() if self.y_mean is not None else None, 'y_std': self.y_std.tolist() if self.y_std is not None else None, } with open(path / "config.json", 'w') as f: json.dump(config, f, indent=2) logger.info(f"[ENSEMBLE] Saved to {path}") @classmethod def load(cls, path: Path, device: str = 'auto') -> 'EnsembleSurrogate': """Load ensemble from disk.""" path = Path(path) with open(path / "config.json") as f: config = json.load(f) surrogate = cls( input_dim=config['input_dim'], output_dim=config['output_dim'], n_models=config['n_models'], hidden_dims=config['hidden_dims'], device=device ) # Load normalization surrogate.x_mean = np.array(config['x_mean']) if config['x_mean'] else None surrogate.x_std = np.array(config['x_std']) if config['x_std'] else None surrogate.y_mean = np.array(config['y_mean']) if config['y_mean'] else None surrogate.y_std = np.array(config['y_std']) if config['y_std'] else None # Load models for i, model in enumerate(surrogate.models): model.load_state_dict(torch.load(path / f"model_{i}.pt", map_location=surrogate.device)) model.to(surrogate.device) surrogate.is_trained = True logger.info(f"[ENSEMBLE] Loaded from {path}") return surrogate # Convenience function for quick usage def create_and_train_ensemble( X: np.ndarray, Y: np.ndarray, n_models: int = 5, epochs: int = 500 ) -> EnsembleSurrogate: """Create and train an ensemble surrogate.""" surrogate = EnsembleSurrogate( input_dim=X.shape[1], output_dim=Y.shape[1] if Y.ndim > 1 else 1, n_models=n_models ) if Y.ndim == 1: Y = Y.reshape(-1, 1) metrics = surrogate.train(X, Y, epochs=epochs) logger.info(f"[ENSEMBLE] Training complete: R²={metrics['r2']}, avg_std={metrics['avg_ensemble_std']:.4f}") return surrogate