Atomizer/optimization_engine/surrogates/ensemble_surrogate.py

#!/usr/bin/env python3
"""
Ensemble Surrogate with Uncertainty Quantification

Addresses the V5 failure mode where single MLPs gave overconfident predictions
in out-of-distribution regions, leading L-BFGS to fake optima.

Key features:
1. Ensemble of N MLPs - disagreement = uncertainty
2. OOD detection - reject predictions far from training data
3. Confidence bounds - never trust point predictions alone
4. Active learning - prioritize FEA in uncertain regions

Author: Atomizer
Created: 2025-12-28
"""

import numpy as np
from typing import Tuple, List, Dict, Optional
from pathlib import Path
import json
import logging

logger = logging.getLogger(__name__)

try:
    import torch
    import torch.nn as nn
    HAS_TORCH = True
except ImportError:
    HAS_TORCH = False
    logger.warning("PyTorch not available - ensemble features limited")

from sklearn.neighbors import NearestNeighbors


class MLP(nn.Module):
    """Single MLP for ensemble."""

    def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = None):
        super().__init__()
        hidden_dims = hidden_dims or [64, 32]

        layers = []
        in_dim = input_dim
        for h_dim in hidden_dims:
            layers.append(nn.Linear(in_dim, h_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.1))
            in_dim = h_dim
        layers.append(nn.Linear(in_dim, output_dim))

        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)


class OODDetector:
    """
    Out-of-Distribution detector using multiple methods:
    1. Z-score check (is input within N std of training mean)
    2. KNN distance (is input close to training points)
    """

    def __init__(self, X_train: np.ndarray, z_threshold: float = 3.0, knn_k: int = 5):
        self.X_train = X_train
        self.z_threshold = z_threshold
        self.knn_k = knn_k

        # Compute training statistics
        self.mean = X_train.mean(axis=0)
        self.std = X_train.std(axis=0) + 1e-8

        # Fit KNN for local density estimation
        self.knn = NearestNeighbors(n_neighbors=min(knn_k, len(X_train)))
        self.knn.fit(X_train)

        # Compute typical KNN distances in training set
        train_distances, _ = self.knn.kneighbors(X_train)
        self.typical_knn_dist = np.median(train_distances.mean(axis=1))

        logger.info(f"[OOD] Initialized with {len(X_train)} training points")
        logger.info(f"[OOD] Typical KNN distance: {self.typical_knn_dist:.4f}")

    def z_score_check(self, x: np.ndarray) -> Tuple[bool, float]:
        """Check if point is within z_threshold std of training mean."""
        x = np.atleast_2d(x)
        z_scores = np.abs((x - self.mean) / self.std)
        max_z = z_scores.max(axis=1)
        is_ok = max_z < self.z_threshold
        return is_ok[0] if len(is_ok) == 1 else is_ok, max_z[0] if len(max_z) == 1 else max_z

    def knn_distance_check(self, x: np.ndarray) -> Tuple[bool, float]:
        """Check if point is close enough to training data."""
        x = np.atleast_2d(x)
        distances, _ = self.knn.kneighbors(x)
        avg_dist = distances.mean(axis=1)
        # Allow up to 3x typical distance
        is_ok = avg_dist < 3 * self.typical_knn_dist
        return is_ok[0] if len(is_ok) == 1 else is_ok, avg_dist[0] if len(avg_dist) == 1 else avg_dist

    def is_in_distribution(self, x: np.ndarray) -> Tuple[bool, Dict]:
        """Combined OOD check."""
        z_ok, z_score = self.z_score_check(x)
        knn_ok, knn_dist = self.knn_distance_check(x)

        is_ok = z_ok and knn_ok
        details = {
            'z_score': float(z_score),
            'z_ok': bool(z_ok),
            'knn_dist': float(knn_dist),
            'knn_ok': bool(knn_ok),
            'in_distribution': bool(is_ok)
        }

        return is_ok, details


class EnsembleSurrogate:
    """
    Ensemble of MLPs with uncertainty quantification.

    Key insight: Models trained with different random seeds will agree
    in well-sampled regions but disagree in extrapolation regions.
    Disagreement = epistemic uncertainty.
    """

    def __init__(
        self,
        input_dim: int,
        output_dim: int,
        n_models: int = 5,
        hidden_dims: List[int] = None,
        device: str = 'auto'
    ):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.n_models = n_models
        self.hidden_dims = hidden_dims or [64, 32]

        if device == 'auto':
            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        else:
            self.device = torch.device(device)

        # Create ensemble
        self.models = [
            MLP(input_dim, output_dim, hidden_dims).to(self.device)
            for _ in range(n_models)
        ]

        # Normalization stats
        self.x_mean = None
        self.x_std = None
        self.y_mean = None
        self.y_std = None

        # OOD detector
        self.ood_detector = None

        # Training state
        self.is_trained = False

        logger.info(f"[ENSEMBLE] Created {n_models} MLPs on {self.device}")

    def train(
        self,
        X: np.ndarray,
        Y: np.ndarray,
        epochs: int = 500,
        lr: float = 0.001,
        val_split: float = 0.1,
        patience: int = 50
    ) -> Dict:
        """Train all models in ensemble with different random seeds."""

        # Compute normalization
        self.x_mean = X.mean(axis=0)
        self.x_std = X.std(axis=0) + 1e-8
        self.y_mean = Y.mean(axis=0)
        self.y_std = Y.std(axis=0) + 1e-8

        X_norm = (X - self.x_mean) / self.x_std
        Y_norm = (Y - self.y_mean) / self.y_std

        # Split data
        n_val = max(int(len(X) * val_split), 5)
        indices = np.random.permutation(len(X))
        val_idx, train_idx = indices[:n_val], indices[n_val:]

        X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
        X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]

        # Convert to tensors
        X_t = torch.FloatTensor(X_train).to(self.device)
        Y_t = torch.FloatTensor(Y_train).to(self.device)
        X_v = torch.FloatTensor(X_val).to(self.device)
        Y_v = torch.FloatTensor(Y_val).to(self.device)

        # Train each model with different seed
        all_val_losses = []
        for i, model in enumerate(self.models):
            torch.manual_seed(42 + i * 1000)  # Different init per model
            np.random.seed(42 + i * 1000)

            optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
            criterion = nn.MSELoss()

            best_val_loss = float('inf')
            patience_counter = 0
            best_state = None

            for epoch in range(epochs):
                # Train
                model.train()
                optimizer.zero_grad()
                pred = model(X_t)
                loss = criterion(pred, Y_t)
                loss.backward()
                optimizer.step()

                # Validate
                model.eval()
                with torch.no_grad():
                    val_pred = model(X_v)
                    val_loss = criterion(val_pred, Y_v).item()

                # Early stopping
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    patience_counter = 0
                    best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        break

            # Restore best
            if best_state:
                model.load_state_dict(best_state)
                model.to(self.device)

            all_val_losses.append(best_val_loss)
            logger.info(f"[ENSEMBLE] Model {i+1}/{self.n_models} trained, val_loss={best_val_loss:.4f}")

        # Initialize OOD detector
        self.ood_detector = OODDetector(X_norm)

        self.is_trained = True

        # Compute ensemble metrics
        metrics = self._compute_metrics(X_val, Y_val)
        metrics['val_losses'] = all_val_losses

        return metrics

    def _compute_metrics(self, X_val: np.ndarray, Y_val: np.ndarray) -> Dict:
        """Compute R², MAE, and ensemble disagreement on validation set."""
        mean, std = self.predict_normalized(X_val)

        # R² for each output
        ss_res = np.sum((Y_val - mean) ** 2, axis=0)
        ss_tot = np.sum((Y_val - Y_val.mean(axis=0)) ** 2, axis=0)
        r2 = 1 - ss_res / (ss_tot + 1e-8)

        # MAE
        mae = np.abs(Y_val - mean).mean(axis=0)

        # Average ensemble disagreement
        avg_std = std.mean()

        return {
            'r2': r2.tolist(),
            'mae': mae.tolist(),
            'avg_ensemble_std': float(avg_std),
            'n_val': len(X_val)
        }

    def predict_normalized(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """Predict on normalized inputs, return normalized outputs."""
        X = np.atleast_2d(X)
        X_t = torch.FloatTensor(X).to(self.device)

        preds = []
        for model in self.models:
            model.eval()
            with torch.no_grad():
                pred = model(X_t).cpu().numpy()
            preds.append(pred)

        preds = np.array(preds)  # (n_models, n_samples, n_outputs)
        mean = preds.mean(axis=0)
        std = preds.std(axis=0)

        return mean, std

    def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
        """
        Predict with uncertainty.

        Returns:
            mean: (n_samples, n_outputs) predicted values
            std: (n_samples, n_outputs) uncertainty (ensemble disagreement)
        """
        X = np.atleast_2d(X)

        # Normalize input
        X_norm = (X - self.x_mean) / self.x_std

        # Get predictions
        mean_norm, std_norm = self.predict_normalized(X_norm)

        # Denormalize
        mean = mean_norm * self.y_std + self.y_mean
        std = std_norm * self.y_std  # Std scales with y_std

        return mean, std

    def predict_with_confidence(self, X: np.ndarray) -> Dict:
        """
        Full prediction with confidence assessment.

        Returns dict with:
        - mean: predicted values
        - std: uncertainty
        - confidence: 0-1 score (higher = more reliable)
        - in_distribution: OOD check result
        - recommendation: 'trust', 'verify', or 'reject'
        """
        X = np.atleast_2d(X)

        mean, std = self.predict(X)

        # OOD check
        X_norm = (X - self.x_mean) / self.x_std
        ood_results = [self.ood_detector.is_in_distribution(x) for x in X_norm]
        in_distribution = [r[0] for r in ood_results]

        # Compute confidence score (0 = no confidence, 1 = high confidence)
        # Based on: relative std (lower = better) and OOD (in = better)
        relative_std = std / (np.abs(mean) + 1e-6)
        avg_rel_std = relative_std.mean(axis=1)

        confidence = np.zeros(len(X))
        for i in range(len(X)):
            if not in_distribution[i]:
                confidence[i] = 0.0  # OOD = no confidence
            elif avg_rel_std[i] > 0.3:
                confidence[i] = 0.2  # High uncertainty
            elif avg_rel_std[i] > 0.1:
                confidence[i] = 0.5  # Medium uncertainty
            else:
                confidence[i] = 0.9  # Low uncertainty

        # Recommendations
        recommendations = []
        for i in range(len(X)):
            if confidence[i] >= 0.7:
                recommendations.append('trust')
            elif confidence[i] >= 0.3:
                recommendations.append('verify')  # Run FEA to check
            else:
                recommendations.append('reject')  # Don't use, run FEA instead

        return {
            'mean': mean,
            'std': std,
            'confidence': confidence,
            'in_distribution': in_distribution,
            'recommendation': recommendations
        }

    def acquisition_score(self, X: np.ndarray, best_so_far: float, xi: float = 0.01) -> np.ndarray:
        """
        Expected Improvement acquisition function.

        High score = worth running FEA (either promising or uncertain).

        Args:
            X: candidate points
            best_so_far: current best objective value
            xi: exploration-exploitation tradeoff (higher = more exploration)

        Returns:
            scores: acquisition score per point
        """
        X = np.atleast_2d(X)
        mean, std = self.predict(X)

        # For minimization: improvement = best - predicted
        # Take first objective (weighted sum) for acquisition
        if mean.ndim > 1:
            mean = mean[:, 0]
            std = std[:, 0]

        improvement = best_so_far - mean

        # Expected improvement with exploration bonus
        # Higher std = more exploration value
        z = improvement / (std + 1e-8)

        # Simple acquisition: exploitation + exploration
        scores = improvement + xi * std

        # Penalize OOD points
        X_norm = (X - self.x_mean) / self.x_std
        for i, x in enumerate(X_norm):
            is_ok, _ = self.ood_detector.is_in_distribution(x)
            if not is_ok:
                scores[i] *= 0.1  # Heavy penalty for OOD

        return scores

    def select_candidates_for_fea(
        self,
        candidates: np.ndarray,
        best_so_far: float,
        n_select: int = 5,
        diversity_weight: float = 0.3
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Select diverse, high-acquisition candidates for FEA validation.

        Balances:
        1. High acquisition score (exploitation + exploration)
        2. Diversity (don't cluster all candidates together)
        3. In-distribution (avoid OOD predictions)

        Returns:
            selected: indices of selected candidates
            scores: acquisition scores
        """
        scores = self.acquisition_score(candidates, best_so_far)

        # Greedy selection with diversity
        selected = []
        remaining = list(range(len(candidates)))

        while len(selected) < n_select and remaining:
            if not selected:
                # First: pick highest score
                best_idx = max(remaining, key=lambda i: scores[i])
            else:
                # Later: balance score with distance to selected
                def combined_score(i):
                    # Min distance to already selected
                    min_dist = min(
                        np.linalg.norm(candidates[i] - candidates[j])
                        for j in selected
                    )
                    # Combine acquisition + diversity
                    return scores[i] + diversity_weight * min_dist

                best_idx = max(remaining, key=combined_score)

            selected.append(best_idx)
            remaining.remove(best_idx)

        return np.array(selected), scores[selected]

    def save(self, path: Path):
        """Save ensemble to disk."""
        path = Path(path)
        path.mkdir(parents=True, exist_ok=True)

        # Save each model
        for i, model in enumerate(self.models):
            torch.save(model.state_dict(), path / f"model_{i}.pt")

        # Save normalization stats and config
        config = {
            'input_dim': self.input_dim,
            'output_dim': self.output_dim,
            'n_models': self.n_models,
            'hidden_dims': self.hidden_dims,
            'x_mean': self.x_mean.tolist() if self.x_mean is not None else None,
            'x_std': self.x_std.tolist() if self.x_std is not None else None,
            'y_mean': self.y_mean.tolist() if self.y_mean is not None else None,
            'y_std': self.y_std.tolist() if self.y_std is not None else None,
        }
        with open(path / "config.json", 'w') as f:
            json.dump(config, f, indent=2)

        logger.info(f"[ENSEMBLE] Saved to {path}")

    @classmethod
    def load(cls, path: Path, device: str = 'auto') -> 'EnsembleSurrogate':
        """Load ensemble from disk."""
        path = Path(path)

        with open(path / "config.json") as f:
            config = json.load(f)

        surrogate = cls(
            input_dim=config['input_dim'],
            output_dim=config['output_dim'],
            n_models=config['n_models'],
            hidden_dims=config['hidden_dims'],
            device=device
        )

        # Load normalization
        surrogate.x_mean = np.array(config['x_mean']) if config['x_mean'] else None
        surrogate.x_std = np.array(config['x_std']) if config['x_std'] else None
        surrogate.y_mean = np.array(config['y_mean']) if config['y_mean'] else None
        surrogate.y_std = np.array(config['y_std']) if config['y_std'] else None

        # Load models
        for i, model in enumerate(surrogate.models):
            model.load_state_dict(torch.load(path / f"model_{i}.pt", map_location=surrogate.device))
            model.to(surrogate.device)

        surrogate.is_trained = True
        logger.info(f"[ENSEMBLE] Loaded from {path}")

        return surrogate


# Convenience function for quick usage
def create_and_train_ensemble(
    X: np.ndarray,
    Y: np.ndarray,
    n_models: int = 5,
    epochs: int = 500
) -> EnsembleSurrogate:
    """Create and train an ensemble surrogate."""
    surrogate = EnsembleSurrogate(
        input_dim=X.shape[1],
        output_dim=Y.shape[1] if Y.ndim > 1 else 1,
        n_models=n_models
    )

    if Y.ndim == 1:
        Y = Y.reshape(-1, 1)

    metrics = surrogate.train(X, Y, epochs=epochs)
    logger.info(f"[ENSEMBLE] Training complete: R²={metrics['r2']}, avg_std={metrics['avg_ensemble_std']:.4f}")

    return surrogate