Updates before optimization_engine migration: - Updated migration plan to v2.1 with complete file inventory - Added OP_07 disk optimization protocol - Added SYS_16 self-aware turbo protocol - Added study archiver and cleanup utilities - Added ensemble surrogate module - Updated NX solver and session manager - Updated zernike HTML generator - Added context engineering plan - LAC session insights updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
541 lines
17 KiB
Python
541 lines
17 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Ensemble Surrogate with Uncertainty Quantification
|
|
|
|
Addresses the V5 failure mode where single MLPs gave overconfident predictions
|
|
in out-of-distribution regions, leading L-BFGS to fake optima.
|
|
|
|
Key features:
|
|
1. Ensemble of N MLPs - disagreement = uncertainty
|
|
2. OOD detection - reject predictions far from training data
|
|
3. Confidence bounds - never trust point predictions alone
|
|
4. Active learning - prioritize FEA in uncertain regions
|
|
|
|
Author: Atomizer
|
|
Created: 2025-12-28
|
|
"""
|
|
|
|
import numpy as np
|
|
from typing import Tuple, List, Dict, Optional
|
|
from pathlib import Path
|
|
import json
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
try:
|
|
import torch
|
|
import torch.nn as nn
|
|
HAS_TORCH = True
|
|
except ImportError:
|
|
HAS_TORCH = False
|
|
logger.warning("PyTorch not available - ensemble features limited")
|
|
|
|
from sklearn.neighbors import NearestNeighbors
|
|
|
|
|
|
class MLP(nn.Module):
|
|
"""Single MLP for ensemble."""
|
|
|
|
def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = None):
|
|
super().__init__()
|
|
hidden_dims = hidden_dims or [64, 32]
|
|
|
|
layers = []
|
|
in_dim = input_dim
|
|
for h_dim in hidden_dims:
|
|
layers.append(nn.Linear(in_dim, h_dim))
|
|
layers.append(nn.ReLU())
|
|
layers.append(nn.Dropout(0.1))
|
|
in_dim = h_dim
|
|
layers.append(nn.Linear(in_dim, output_dim))
|
|
|
|
self.net = nn.Sequential(*layers)
|
|
|
|
def forward(self, x):
|
|
return self.net(x)
|
|
|
|
|
|
class OODDetector:
|
|
"""
|
|
Out-of-Distribution detector using multiple methods:
|
|
1. Z-score check (is input within N std of training mean)
|
|
2. KNN distance (is input close to training points)
|
|
"""
|
|
|
|
def __init__(self, X_train: np.ndarray, z_threshold: float = 3.0, knn_k: int = 5):
|
|
self.X_train = X_train
|
|
self.z_threshold = z_threshold
|
|
self.knn_k = knn_k
|
|
|
|
# Compute training statistics
|
|
self.mean = X_train.mean(axis=0)
|
|
self.std = X_train.std(axis=0) + 1e-8
|
|
|
|
# Fit KNN for local density estimation
|
|
self.knn = NearestNeighbors(n_neighbors=min(knn_k, len(X_train)))
|
|
self.knn.fit(X_train)
|
|
|
|
# Compute typical KNN distances in training set
|
|
train_distances, _ = self.knn.kneighbors(X_train)
|
|
self.typical_knn_dist = np.median(train_distances.mean(axis=1))
|
|
|
|
logger.info(f"[OOD] Initialized with {len(X_train)} training points")
|
|
logger.info(f"[OOD] Typical KNN distance: {self.typical_knn_dist:.4f}")
|
|
|
|
def z_score_check(self, x: np.ndarray) -> Tuple[bool, float]:
|
|
"""Check if point is within z_threshold std of training mean."""
|
|
x = np.atleast_2d(x)
|
|
z_scores = np.abs((x - self.mean) / self.std)
|
|
max_z = z_scores.max(axis=1)
|
|
is_ok = max_z < self.z_threshold
|
|
return is_ok[0] if len(is_ok) == 1 else is_ok, max_z[0] if len(max_z) == 1 else max_z
|
|
|
|
def knn_distance_check(self, x: np.ndarray) -> Tuple[bool, float]:
|
|
"""Check if point is close enough to training data."""
|
|
x = np.atleast_2d(x)
|
|
distances, _ = self.knn.kneighbors(x)
|
|
avg_dist = distances.mean(axis=1)
|
|
# Allow up to 3x typical distance
|
|
is_ok = avg_dist < 3 * self.typical_knn_dist
|
|
return is_ok[0] if len(is_ok) == 1 else is_ok, avg_dist[0] if len(avg_dist) == 1 else avg_dist
|
|
|
|
def is_in_distribution(self, x: np.ndarray) -> Tuple[bool, Dict]:
|
|
"""Combined OOD check."""
|
|
z_ok, z_score = self.z_score_check(x)
|
|
knn_ok, knn_dist = self.knn_distance_check(x)
|
|
|
|
is_ok = z_ok and knn_ok
|
|
details = {
|
|
'z_score': float(z_score),
|
|
'z_ok': bool(z_ok),
|
|
'knn_dist': float(knn_dist),
|
|
'knn_ok': bool(knn_ok),
|
|
'in_distribution': bool(is_ok)
|
|
}
|
|
|
|
return is_ok, details
|
|
|
|
|
|
class EnsembleSurrogate:
|
|
"""
|
|
Ensemble of MLPs with uncertainty quantification.
|
|
|
|
Key insight: Models trained with different random seeds will agree
|
|
in well-sampled regions but disagree in extrapolation regions.
|
|
Disagreement = epistemic uncertainty.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_dim: int,
|
|
output_dim: int,
|
|
n_models: int = 5,
|
|
hidden_dims: List[int] = None,
|
|
device: str = 'auto'
|
|
):
|
|
self.input_dim = input_dim
|
|
self.output_dim = output_dim
|
|
self.n_models = n_models
|
|
self.hidden_dims = hidden_dims or [64, 32]
|
|
|
|
if device == 'auto':
|
|
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
else:
|
|
self.device = torch.device(device)
|
|
|
|
# Create ensemble
|
|
self.models = [
|
|
MLP(input_dim, output_dim, hidden_dims).to(self.device)
|
|
for _ in range(n_models)
|
|
]
|
|
|
|
# Normalization stats
|
|
self.x_mean = None
|
|
self.x_std = None
|
|
self.y_mean = None
|
|
self.y_std = None
|
|
|
|
# OOD detector
|
|
self.ood_detector = None
|
|
|
|
# Training state
|
|
self.is_trained = False
|
|
|
|
logger.info(f"[ENSEMBLE] Created {n_models} MLPs on {self.device}")
|
|
|
|
def train(
|
|
self,
|
|
X: np.ndarray,
|
|
Y: np.ndarray,
|
|
epochs: int = 500,
|
|
lr: float = 0.001,
|
|
val_split: float = 0.1,
|
|
patience: int = 50
|
|
) -> Dict:
|
|
"""Train all models in ensemble with different random seeds."""
|
|
|
|
# Compute normalization
|
|
self.x_mean = X.mean(axis=0)
|
|
self.x_std = X.std(axis=0) + 1e-8
|
|
self.y_mean = Y.mean(axis=0)
|
|
self.y_std = Y.std(axis=0) + 1e-8
|
|
|
|
X_norm = (X - self.x_mean) / self.x_std
|
|
Y_norm = (Y - self.y_mean) / self.y_std
|
|
|
|
# Split data
|
|
n_val = max(int(len(X) * val_split), 5)
|
|
indices = np.random.permutation(len(X))
|
|
val_idx, train_idx = indices[:n_val], indices[n_val:]
|
|
|
|
X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
|
|
X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]
|
|
|
|
# Convert to tensors
|
|
X_t = torch.FloatTensor(X_train).to(self.device)
|
|
Y_t = torch.FloatTensor(Y_train).to(self.device)
|
|
X_v = torch.FloatTensor(X_val).to(self.device)
|
|
Y_v = torch.FloatTensor(Y_val).to(self.device)
|
|
|
|
# Train each model with different seed
|
|
all_val_losses = []
|
|
for i, model in enumerate(self.models):
|
|
torch.manual_seed(42 + i * 1000) # Different init per model
|
|
np.random.seed(42 + i * 1000)
|
|
|
|
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
|
|
criterion = nn.MSELoss()
|
|
|
|
best_val_loss = float('inf')
|
|
patience_counter = 0
|
|
best_state = None
|
|
|
|
for epoch in range(epochs):
|
|
# Train
|
|
model.train()
|
|
optimizer.zero_grad()
|
|
pred = model(X_t)
|
|
loss = criterion(pred, Y_t)
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
# Validate
|
|
model.eval()
|
|
with torch.no_grad():
|
|
val_pred = model(X_v)
|
|
val_loss = criterion(val_pred, Y_v).item()
|
|
|
|
# Early stopping
|
|
if val_loss < best_val_loss:
|
|
best_val_loss = val_loss
|
|
patience_counter = 0
|
|
best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
|
|
else:
|
|
patience_counter += 1
|
|
if patience_counter >= patience:
|
|
break
|
|
|
|
# Restore best
|
|
if best_state:
|
|
model.load_state_dict(best_state)
|
|
model.to(self.device)
|
|
|
|
all_val_losses.append(best_val_loss)
|
|
logger.info(f"[ENSEMBLE] Model {i+1}/{self.n_models} trained, val_loss={best_val_loss:.4f}")
|
|
|
|
# Initialize OOD detector
|
|
self.ood_detector = OODDetector(X_norm)
|
|
|
|
self.is_trained = True
|
|
|
|
# Compute ensemble metrics
|
|
metrics = self._compute_metrics(X_val, Y_val)
|
|
metrics['val_losses'] = all_val_losses
|
|
|
|
return metrics
|
|
|
|
def _compute_metrics(self, X_val: np.ndarray, Y_val: np.ndarray) -> Dict:
|
|
"""Compute R², MAE, and ensemble disagreement on validation set."""
|
|
mean, std = self.predict_normalized(X_val)
|
|
|
|
# R² for each output
|
|
ss_res = np.sum((Y_val - mean) ** 2, axis=0)
|
|
ss_tot = np.sum((Y_val - Y_val.mean(axis=0)) ** 2, axis=0)
|
|
r2 = 1 - ss_res / (ss_tot + 1e-8)
|
|
|
|
# MAE
|
|
mae = np.abs(Y_val - mean).mean(axis=0)
|
|
|
|
# Average ensemble disagreement
|
|
avg_std = std.mean()
|
|
|
|
return {
|
|
'r2': r2.tolist(),
|
|
'mae': mae.tolist(),
|
|
'avg_ensemble_std': float(avg_std),
|
|
'n_val': len(X_val)
|
|
}
|
|
|
|
def predict_normalized(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""Predict on normalized inputs, return normalized outputs."""
|
|
X = np.atleast_2d(X)
|
|
X_t = torch.FloatTensor(X).to(self.device)
|
|
|
|
preds = []
|
|
for model in self.models:
|
|
model.eval()
|
|
with torch.no_grad():
|
|
pred = model(X_t).cpu().numpy()
|
|
preds.append(pred)
|
|
|
|
preds = np.array(preds) # (n_models, n_samples, n_outputs)
|
|
mean = preds.mean(axis=0)
|
|
std = preds.std(axis=0)
|
|
|
|
return mean, std
|
|
|
|
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
Predict with uncertainty.
|
|
|
|
Returns:
|
|
mean: (n_samples, n_outputs) predicted values
|
|
std: (n_samples, n_outputs) uncertainty (ensemble disagreement)
|
|
"""
|
|
X = np.atleast_2d(X)
|
|
|
|
# Normalize input
|
|
X_norm = (X - self.x_mean) / self.x_std
|
|
|
|
# Get predictions
|
|
mean_norm, std_norm = self.predict_normalized(X_norm)
|
|
|
|
# Denormalize
|
|
mean = mean_norm * self.y_std + self.y_mean
|
|
std = std_norm * self.y_std # Std scales with y_std
|
|
|
|
return mean, std
|
|
|
|
def predict_with_confidence(self, X: np.ndarray) -> Dict:
|
|
"""
|
|
Full prediction with confidence assessment.
|
|
|
|
Returns dict with:
|
|
- mean: predicted values
|
|
- std: uncertainty
|
|
- confidence: 0-1 score (higher = more reliable)
|
|
- in_distribution: OOD check result
|
|
- recommendation: 'trust', 'verify', or 'reject'
|
|
"""
|
|
X = np.atleast_2d(X)
|
|
|
|
mean, std = self.predict(X)
|
|
|
|
# OOD check
|
|
X_norm = (X - self.x_mean) / self.x_std
|
|
ood_results = [self.ood_detector.is_in_distribution(x) for x in X_norm]
|
|
in_distribution = [r[0] for r in ood_results]
|
|
|
|
# Compute confidence score (0 = no confidence, 1 = high confidence)
|
|
# Based on: relative std (lower = better) and OOD (in = better)
|
|
relative_std = std / (np.abs(mean) + 1e-6)
|
|
avg_rel_std = relative_std.mean(axis=1)
|
|
|
|
confidence = np.zeros(len(X))
|
|
for i in range(len(X)):
|
|
if not in_distribution[i]:
|
|
confidence[i] = 0.0 # OOD = no confidence
|
|
elif avg_rel_std[i] > 0.3:
|
|
confidence[i] = 0.2 # High uncertainty
|
|
elif avg_rel_std[i] > 0.1:
|
|
confidence[i] = 0.5 # Medium uncertainty
|
|
else:
|
|
confidence[i] = 0.9 # Low uncertainty
|
|
|
|
# Recommendations
|
|
recommendations = []
|
|
for i in range(len(X)):
|
|
if confidence[i] >= 0.7:
|
|
recommendations.append('trust')
|
|
elif confidence[i] >= 0.3:
|
|
recommendations.append('verify') # Run FEA to check
|
|
else:
|
|
recommendations.append('reject') # Don't use, run FEA instead
|
|
|
|
return {
|
|
'mean': mean,
|
|
'std': std,
|
|
'confidence': confidence,
|
|
'in_distribution': in_distribution,
|
|
'recommendation': recommendations
|
|
}
|
|
|
|
def acquisition_score(self, X: np.ndarray, best_so_far: float, xi: float = 0.01) -> np.ndarray:
|
|
"""
|
|
Expected Improvement acquisition function.
|
|
|
|
High score = worth running FEA (either promising or uncertain).
|
|
|
|
Args:
|
|
X: candidate points
|
|
best_so_far: current best objective value
|
|
xi: exploration-exploitation tradeoff (higher = more exploration)
|
|
|
|
Returns:
|
|
scores: acquisition score per point
|
|
"""
|
|
X = np.atleast_2d(X)
|
|
mean, std = self.predict(X)
|
|
|
|
# For minimization: improvement = best - predicted
|
|
# Take first objective (weighted sum) for acquisition
|
|
if mean.ndim > 1:
|
|
mean = mean[:, 0]
|
|
std = std[:, 0]
|
|
|
|
improvement = best_so_far - mean
|
|
|
|
# Expected improvement with exploration bonus
|
|
# Higher std = more exploration value
|
|
z = improvement / (std + 1e-8)
|
|
|
|
# Simple acquisition: exploitation + exploration
|
|
scores = improvement + xi * std
|
|
|
|
# Penalize OOD points
|
|
X_norm = (X - self.x_mean) / self.x_std
|
|
for i, x in enumerate(X_norm):
|
|
is_ok, _ = self.ood_detector.is_in_distribution(x)
|
|
if not is_ok:
|
|
scores[i] *= 0.1 # Heavy penalty for OOD
|
|
|
|
return scores
|
|
|
|
def select_candidates_for_fea(
|
|
self,
|
|
candidates: np.ndarray,
|
|
best_so_far: float,
|
|
n_select: int = 5,
|
|
diversity_weight: float = 0.3
|
|
) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""
|
|
Select diverse, high-acquisition candidates for FEA validation.
|
|
|
|
Balances:
|
|
1. High acquisition score (exploitation + exploration)
|
|
2. Diversity (don't cluster all candidates together)
|
|
3. In-distribution (avoid OOD predictions)
|
|
|
|
Returns:
|
|
selected: indices of selected candidates
|
|
scores: acquisition scores
|
|
"""
|
|
scores = self.acquisition_score(candidates, best_so_far)
|
|
|
|
# Greedy selection with diversity
|
|
selected = []
|
|
remaining = list(range(len(candidates)))
|
|
|
|
while len(selected) < n_select and remaining:
|
|
if not selected:
|
|
# First: pick highest score
|
|
best_idx = max(remaining, key=lambda i: scores[i])
|
|
else:
|
|
# Later: balance score with distance to selected
|
|
def combined_score(i):
|
|
# Min distance to already selected
|
|
min_dist = min(
|
|
np.linalg.norm(candidates[i] - candidates[j])
|
|
for j in selected
|
|
)
|
|
# Combine acquisition + diversity
|
|
return scores[i] + diversity_weight * min_dist
|
|
|
|
best_idx = max(remaining, key=combined_score)
|
|
|
|
selected.append(best_idx)
|
|
remaining.remove(best_idx)
|
|
|
|
return np.array(selected), scores[selected]
|
|
|
|
def save(self, path: Path):
|
|
"""Save ensemble to disk."""
|
|
path = Path(path)
|
|
path.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Save each model
|
|
for i, model in enumerate(self.models):
|
|
torch.save(model.state_dict(), path / f"model_{i}.pt")
|
|
|
|
# Save normalization stats and config
|
|
config = {
|
|
'input_dim': self.input_dim,
|
|
'output_dim': self.output_dim,
|
|
'n_models': self.n_models,
|
|
'hidden_dims': self.hidden_dims,
|
|
'x_mean': self.x_mean.tolist() if self.x_mean is not None else None,
|
|
'x_std': self.x_std.tolist() if self.x_std is not None else None,
|
|
'y_mean': self.y_mean.tolist() if self.y_mean is not None else None,
|
|
'y_std': self.y_std.tolist() if self.y_std is not None else None,
|
|
}
|
|
with open(path / "config.json", 'w') as f:
|
|
json.dump(config, f, indent=2)
|
|
|
|
logger.info(f"[ENSEMBLE] Saved to {path}")
|
|
|
|
@classmethod
|
|
def load(cls, path: Path, device: str = 'auto') -> 'EnsembleSurrogate':
|
|
"""Load ensemble from disk."""
|
|
path = Path(path)
|
|
|
|
with open(path / "config.json") as f:
|
|
config = json.load(f)
|
|
|
|
surrogate = cls(
|
|
input_dim=config['input_dim'],
|
|
output_dim=config['output_dim'],
|
|
n_models=config['n_models'],
|
|
hidden_dims=config['hidden_dims'],
|
|
device=device
|
|
)
|
|
|
|
# Load normalization
|
|
surrogate.x_mean = np.array(config['x_mean']) if config['x_mean'] else None
|
|
surrogate.x_std = np.array(config['x_std']) if config['x_std'] else None
|
|
surrogate.y_mean = np.array(config['y_mean']) if config['y_mean'] else None
|
|
surrogate.y_std = np.array(config['y_std']) if config['y_std'] else None
|
|
|
|
# Load models
|
|
for i, model in enumerate(surrogate.models):
|
|
model.load_state_dict(torch.load(path / f"model_{i}.pt", map_location=surrogate.device))
|
|
model.to(surrogate.device)
|
|
|
|
surrogate.is_trained = True
|
|
logger.info(f"[ENSEMBLE] Loaded from {path}")
|
|
|
|
return surrogate
|
|
|
|
|
|
# Convenience function for quick usage
|
|
def create_and_train_ensemble(
|
|
X: np.ndarray,
|
|
Y: np.ndarray,
|
|
n_models: int = 5,
|
|
epochs: int = 500
|
|
) -> EnsembleSurrogate:
|
|
"""Create and train an ensemble surrogate."""
|
|
surrogate = EnsembleSurrogate(
|
|
input_dim=X.shape[1],
|
|
output_dim=Y.shape[1] if Y.ndim > 1 else 1,
|
|
n_models=n_models
|
|
)
|
|
|
|
if Y.ndim == 1:
|
|
Y = Y.reshape(-1, 1)
|
|
|
|
metrics = surrogate.train(X, Y, epochs=epochs)
|
|
logger.info(f"[ENSEMBLE] Training complete: R²={metrics['r2']}, avg_std={metrics['avg_ensemble_std']:.4f}")
|
|
|
|
return surrogate
|