feat: Pre-migration checkpoint - updated docs and utilities

Updates before optimization_engine migration:
- Updated migration plan to v2.1 with complete file inventory
- Added OP_07 disk optimization protocol
- Added SYS_16 self-aware turbo protocol
- Added study archiver and cleanup utilities
- Added ensemble surrogate module
- Updated NX solver and session manager
- Updated zernike HTML generator
- Added context engineering plan
- LAC session insights updates

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 10:22:45 -05:00
parent faa7779a43
commit 82f36689b7
21 changed files with 6304 additions and 890 deletions

View File

@@ -0,0 +1,19 @@
"""
Surrogate models for FEA acceleration.
Available surrogates:
- EnsembleSurrogate: Multiple MLPs with uncertainty quantification
- OODDetector: Out-of-distribution detection
"""
from .ensemble_surrogate import (
EnsembleSurrogate,
OODDetector,
create_and_train_ensemble
)
__all__ = [
'EnsembleSurrogate',
'OODDetector',
'create_and_train_ensemble'
]

View File

@@ -0,0 +1,540 @@
#!/usr/bin/env python3
"""
Ensemble Surrogate with Uncertainty Quantification
Addresses the V5 failure mode where single MLPs gave overconfident predictions
in out-of-distribution regions, leading L-BFGS to fake optima.
Key features:
1. Ensemble of N MLPs - disagreement = uncertainty
2. OOD detection - reject predictions far from training data
3. Confidence bounds - never trust point predictions alone
4. Active learning - prioritize FEA in uncertain regions
Author: Atomizer
Created: 2025-12-28
"""
import numpy as np
from typing import Tuple, List, Dict, Optional
from pathlib import Path
import json
import logging
logger = logging.getLogger(__name__)
try:
import torch
import torch.nn as nn
HAS_TORCH = True
except ImportError:
HAS_TORCH = False
logger.warning("PyTorch not available - ensemble features limited")
from sklearn.neighbors import NearestNeighbors
class MLP(nn.Module):
"""Single MLP for ensemble."""
def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = None):
super().__init__()
hidden_dims = hidden_dims or [64, 32]
layers = []
in_dim = input_dim
for h_dim in hidden_dims:
layers.append(nn.Linear(in_dim, h_dim))
layers.append(nn.ReLU())
layers.append(nn.Dropout(0.1))
in_dim = h_dim
layers.append(nn.Linear(in_dim, output_dim))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
class OODDetector:
"""
Out-of-Distribution detector using multiple methods:
1. Z-score check (is input within N std of training mean)
2. KNN distance (is input close to training points)
"""
def __init__(self, X_train: np.ndarray, z_threshold: float = 3.0, knn_k: int = 5):
self.X_train = X_train
self.z_threshold = z_threshold
self.knn_k = knn_k
# Compute training statistics
self.mean = X_train.mean(axis=0)
self.std = X_train.std(axis=0) + 1e-8
# Fit KNN for local density estimation
self.knn = NearestNeighbors(n_neighbors=min(knn_k, len(X_train)))
self.knn.fit(X_train)
# Compute typical KNN distances in training set
train_distances, _ = self.knn.kneighbors(X_train)
self.typical_knn_dist = np.median(train_distances.mean(axis=1))
logger.info(f"[OOD] Initialized with {len(X_train)} training points")
logger.info(f"[OOD] Typical KNN distance: {self.typical_knn_dist:.4f}")
def z_score_check(self, x: np.ndarray) -> Tuple[bool, float]:
"""Check if point is within z_threshold std of training mean."""
x = np.atleast_2d(x)
z_scores = np.abs((x - self.mean) / self.std)
max_z = z_scores.max(axis=1)
is_ok = max_z < self.z_threshold
return is_ok[0] if len(is_ok) == 1 else is_ok, max_z[0] if len(max_z) == 1 else max_z
def knn_distance_check(self, x: np.ndarray) -> Tuple[bool, float]:
"""Check if point is close enough to training data."""
x = np.atleast_2d(x)
distances, _ = self.knn.kneighbors(x)
avg_dist = distances.mean(axis=1)
# Allow up to 3x typical distance
is_ok = avg_dist < 3 * self.typical_knn_dist
return is_ok[0] if len(is_ok) == 1 else is_ok, avg_dist[0] if len(avg_dist) == 1 else avg_dist
def is_in_distribution(self, x: np.ndarray) -> Tuple[bool, Dict]:
"""Combined OOD check."""
z_ok, z_score = self.z_score_check(x)
knn_ok, knn_dist = self.knn_distance_check(x)
is_ok = z_ok and knn_ok
details = {
'z_score': float(z_score),
'z_ok': bool(z_ok),
'knn_dist': float(knn_dist),
'knn_ok': bool(knn_ok),
'in_distribution': bool(is_ok)
}
return is_ok, details
class EnsembleSurrogate:
"""
Ensemble of MLPs with uncertainty quantification.
Key insight: Models trained with different random seeds will agree
in well-sampled regions but disagree in extrapolation regions.
Disagreement = epistemic uncertainty.
"""
def __init__(
self,
input_dim: int,
output_dim: int,
n_models: int = 5,
hidden_dims: List[int] = None,
device: str = 'auto'
):
self.input_dim = input_dim
self.output_dim = output_dim
self.n_models = n_models
self.hidden_dims = hidden_dims or [64, 32]
if device == 'auto':
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = torch.device(device)
# Create ensemble
self.models = [
MLP(input_dim, output_dim, hidden_dims).to(self.device)
for _ in range(n_models)
]
# Normalization stats
self.x_mean = None
self.x_std = None
self.y_mean = None
self.y_std = None
# OOD detector
self.ood_detector = None
# Training state
self.is_trained = False
logger.info(f"[ENSEMBLE] Created {n_models} MLPs on {self.device}")
def train(
self,
X: np.ndarray,
Y: np.ndarray,
epochs: int = 500,
lr: float = 0.001,
val_split: float = 0.1,
patience: int = 50
) -> Dict:
"""Train all models in ensemble with different random seeds."""
# Compute normalization
self.x_mean = X.mean(axis=0)
self.x_std = X.std(axis=0) + 1e-8
self.y_mean = Y.mean(axis=0)
self.y_std = Y.std(axis=0) + 1e-8
X_norm = (X - self.x_mean) / self.x_std
Y_norm = (Y - self.y_mean) / self.y_std
# Split data
n_val = max(int(len(X) * val_split), 5)
indices = np.random.permutation(len(X))
val_idx, train_idx = indices[:n_val], indices[n_val:]
X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]
# Convert to tensors
X_t = torch.FloatTensor(X_train).to(self.device)
Y_t = torch.FloatTensor(Y_train).to(self.device)
X_v = torch.FloatTensor(X_val).to(self.device)
Y_v = torch.FloatTensor(Y_val).to(self.device)
# Train each model with different seed
all_val_losses = []
for i, model in enumerate(self.models):
torch.manual_seed(42 + i * 1000) # Different init per model
np.random.seed(42 + i * 1000)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
criterion = nn.MSELoss()
best_val_loss = float('inf')
patience_counter = 0
best_state = None
for epoch in range(epochs):
# Train
model.train()
optimizer.zero_grad()
pred = model(X_t)
loss = criterion(pred, Y_t)
loss.backward()
optimizer.step()
# Validate
model.eval()
with torch.no_grad():
val_pred = model(X_v)
val_loss = criterion(val_pred, Y_v).item()
# Early stopping
if val_loss < best_val_loss:
best_val_loss = val_loss
patience_counter = 0
best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
else:
patience_counter += 1
if patience_counter >= patience:
break
# Restore best
if best_state:
model.load_state_dict(best_state)
model.to(self.device)
all_val_losses.append(best_val_loss)
logger.info(f"[ENSEMBLE] Model {i+1}/{self.n_models} trained, val_loss={best_val_loss:.4f}")
# Initialize OOD detector
self.ood_detector = OODDetector(X_norm)
self.is_trained = True
# Compute ensemble metrics
metrics = self._compute_metrics(X_val, Y_val)
metrics['val_losses'] = all_val_losses
return metrics
def _compute_metrics(self, X_val: np.ndarray, Y_val: np.ndarray) -> Dict:
"""Compute R², MAE, and ensemble disagreement on validation set."""
mean, std = self.predict_normalized(X_val)
# R² for each output
ss_res = np.sum((Y_val - mean) ** 2, axis=0)
ss_tot = np.sum((Y_val - Y_val.mean(axis=0)) ** 2, axis=0)
r2 = 1 - ss_res / (ss_tot + 1e-8)
# MAE
mae = np.abs(Y_val - mean).mean(axis=0)
# Average ensemble disagreement
avg_std = std.mean()
return {
'r2': r2.tolist(),
'mae': mae.tolist(),
'avg_ensemble_std': float(avg_std),
'n_val': len(X_val)
}
def predict_normalized(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Predict on normalized inputs, return normalized outputs."""
X = np.atleast_2d(X)
X_t = torch.FloatTensor(X).to(self.device)
preds = []
for model in self.models:
model.eval()
with torch.no_grad():
pred = model(X_t).cpu().numpy()
preds.append(pred)
preds = np.array(preds) # (n_models, n_samples, n_outputs)
mean = preds.mean(axis=0)
std = preds.std(axis=0)
return mean, std
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Predict with uncertainty.
Returns:
mean: (n_samples, n_outputs) predicted values
std: (n_samples, n_outputs) uncertainty (ensemble disagreement)
"""
X = np.atleast_2d(X)
# Normalize input
X_norm = (X - self.x_mean) / self.x_std
# Get predictions
mean_norm, std_norm = self.predict_normalized(X_norm)
# Denormalize
mean = mean_norm * self.y_std + self.y_mean
std = std_norm * self.y_std # Std scales with y_std
return mean, std
def predict_with_confidence(self, X: np.ndarray) -> Dict:
"""
Full prediction with confidence assessment.
Returns dict with:
- mean: predicted values
- std: uncertainty
- confidence: 0-1 score (higher = more reliable)
- in_distribution: OOD check result
- recommendation: 'trust', 'verify', or 'reject'
"""
X = np.atleast_2d(X)
mean, std = self.predict(X)
# OOD check
X_norm = (X - self.x_mean) / self.x_std
ood_results = [self.ood_detector.is_in_distribution(x) for x in X_norm]
in_distribution = [r[0] for r in ood_results]
# Compute confidence score (0 = no confidence, 1 = high confidence)
# Based on: relative std (lower = better) and OOD (in = better)
relative_std = std / (np.abs(mean) + 1e-6)
avg_rel_std = relative_std.mean(axis=1)
confidence = np.zeros(len(X))
for i in range(len(X)):
if not in_distribution[i]:
confidence[i] = 0.0 # OOD = no confidence
elif avg_rel_std[i] > 0.3:
confidence[i] = 0.2 # High uncertainty
elif avg_rel_std[i] > 0.1:
confidence[i] = 0.5 # Medium uncertainty
else:
confidence[i] = 0.9 # Low uncertainty
# Recommendations
recommendations = []
for i in range(len(X)):
if confidence[i] >= 0.7:
recommendations.append('trust')
elif confidence[i] >= 0.3:
recommendations.append('verify') # Run FEA to check
else:
recommendations.append('reject') # Don't use, run FEA instead
return {
'mean': mean,
'std': std,
'confidence': confidence,
'in_distribution': in_distribution,
'recommendation': recommendations
}
def acquisition_score(self, X: np.ndarray, best_so_far: float, xi: float = 0.01) -> np.ndarray:
"""
Expected Improvement acquisition function.
High score = worth running FEA (either promising or uncertain).
Args:
X: candidate points
best_so_far: current best objective value
xi: exploration-exploitation tradeoff (higher = more exploration)
Returns:
scores: acquisition score per point
"""
X = np.atleast_2d(X)
mean, std = self.predict(X)
# For minimization: improvement = best - predicted
# Take first objective (weighted sum) for acquisition
if mean.ndim > 1:
mean = mean[:, 0]
std = std[:, 0]
improvement = best_so_far - mean
# Expected improvement with exploration bonus
# Higher std = more exploration value
z = improvement / (std + 1e-8)
# Simple acquisition: exploitation + exploration
scores = improvement + xi * std
# Penalize OOD points
X_norm = (X - self.x_mean) / self.x_std
for i, x in enumerate(X_norm):
is_ok, _ = self.ood_detector.is_in_distribution(x)
if not is_ok:
scores[i] *= 0.1 # Heavy penalty for OOD
return scores
def select_candidates_for_fea(
self,
candidates: np.ndarray,
best_so_far: float,
n_select: int = 5,
diversity_weight: float = 0.3
) -> Tuple[np.ndarray, np.ndarray]:
"""
Select diverse, high-acquisition candidates for FEA validation.
Balances:
1. High acquisition score (exploitation + exploration)
2. Diversity (don't cluster all candidates together)
3. In-distribution (avoid OOD predictions)
Returns:
selected: indices of selected candidates
scores: acquisition scores
"""
scores = self.acquisition_score(candidates, best_so_far)
# Greedy selection with diversity
selected = []
remaining = list(range(len(candidates)))
while len(selected) < n_select and remaining:
if not selected:
# First: pick highest score
best_idx = max(remaining, key=lambda i: scores[i])
else:
# Later: balance score with distance to selected
def combined_score(i):
# Min distance to already selected
min_dist = min(
np.linalg.norm(candidates[i] - candidates[j])
for j in selected
)
# Combine acquisition + diversity
return scores[i] + diversity_weight * min_dist
best_idx = max(remaining, key=combined_score)
selected.append(best_idx)
remaining.remove(best_idx)
return np.array(selected), scores[selected]
def save(self, path: Path):
"""Save ensemble to disk."""
path = Path(path)
path.mkdir(parents=True, exist_ok=True)
# Save each model
for i, model in enumerate(self.models):
torch.save(model.state_dict(), path / f"model_{i}.pt")
# Save normalization stats and config
config = {
'input_dim': self.input_dim,
'output_dim': self.output_dim,
'n_models': self.n_models,
'hidden_dims': self.hidden_dims,
'x_mean': self.x_mean.tolist() if self.x_mean is not None else None,
'x_std': self.x_std.tolist() if self.x_std is not None else None,
'y_mean': self.y_mean.tolist() if self.y_mean is not None else None,
'y_std': self.y_std.tolist() if self.y_std is not None else None,
}
with open(path / "config.json", 'w') as f:
json.dump(config, f, indent=2)
logger.info(f"[ENSEMBLE] Saved to {path}")
@classmethod
def load(cls, path: Path, device: str = 'auto') -> 'EnsembleSurrogate':
"""Load ensemble from disk."""
path = Path(path)
with open(path / "config.json") as f:
config = json.load(f)
surrogate = cls(
input_dim=config['input_dim'],
output_dim=config['output_dim'],
n_models=config['n_models'],
hidden_dims=config['hidden_dims'],
device=device
)
# Load normalization
surrogate.x_mean = np.array(config['x_mean']) if config['x_mean'] else None
surrogate.x_std = np.array(config['x_std']) if config['x_std'] else None
surrogate.y_mean = np.array(config['y_mean']) if config['y_mean'] else None
surrogate.y_std = np.array(config['y_std']) if config['y_std'] else None
# Load models
for i, model in enumerate(surrogate.models):
model.load_state_dict(torch.load(path / f"model_{i}.pt", map_location=surrogate.device))
model.to(surrogate.device)
surrogate.is_trained = True
logger.info(f"[ENSEMBLE] Loaded from {path}")
return surrogate
# Convenience function for quick usage
def create_and_train_ensemble(
X: np.ndarray,
Y: np.ndarray,
n_models: int = 5,
epochs: int = 500
) -> EnsembleSurrogate:
"""Create and train an ensemble surrogate."""
surrogate = EnsembleSurrogate(
input_dim=X.shape[1],
output_dim=Y.shape[1] if Y.ndim > 1 else 1,
n_models=n_models
)
if Y.ndim == 1:
Y = Y.reshape(-1, 1)
metrics = surrogate.train(X, Y, epochs=epochs)
logger.info(f"[ENSEMBLE] Training complete: R²={metrics['r2']}, avg_std={metrics['avg_ensemble_std']:.4f}")
return surrogate