refactor: Reorganize code structure and create tests directory
- Consolidate surrogates module to processors/surrogates/ - Move ensemble_surrogate.py to proper location - Add deprecation shim for old import path - Create tests/ directory with pytest structure - Move test files from archive/test_scripts/ - Add conftest.py with shared fixtures 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -59,6 +59,15 @@ def __getattr__(name):
|
||||
elif name == 'create_exporter_from_config':
|
||||
from .training_data_exporter import create_exporter_from_config
|
||||
return create_exporter_from_config
|
||||
elif name == 'EnsembleSurrogate':
|
||||
from .ensemble_surrogate import EnsembleSurrogate
|
||||
return EnsembleSurrogate
|
||||
elif name == 'OODDetector':
|
||||
from .ensemble_surrogate import OODDetector
|
||||
return OODDetector
|
||||
elif name == 'create_and_train_ensemble':
|
||||
from .ensemble_surrogate import create_and_train_ensemble
|
||||
return create_and_train_ensemble
|
||||
|
||||
raise AttributeError(f"module 'optimization_engine.processors.surrogates' has no attribute '{name}'")
|
||||
|
||||
@@ -76,4 +85,7 @@ __all__ = [
|
||||
'AutoTrainer',
|
||||
'TrainingDataExporter',
|
||||
'create_exporter_from_config',
|
||||
'EnsembleSurrogate',
|
||||
'OODDetector',
|
||||
'create_and_train_ensemble',
|
||||
]
|
||||
|
||||
540
optimization_engine/processors/surrogates/ensemble_surrogate.py
Normal file
540
optimization_engine/processors/surrogates/ensemble_surrogate.py
Normal file
@@ -0,0 +1,540 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Ensemble Surrogate with Uncertainty Quantification
|
||||
|
||||
Addresses the V5 failure mode where single MLPs gave overconfident predictions
|
||||
in out-of-distribution regions, leading L-BFGS to fake optima.
|
||||
|
||||
Key features:
|
||||
1. Ensemble of N MLPs - disagreement = uncertainty
|
||||
2. OOD detection - reject predictions far from training data
|
||||
3. Confidence bounds - never trust point predictions alone
|
||||
4. Active learning - prioritize FEA in uncertain regions
|
||||
|
||||
Author: Atomizer
|
||||
Created: 2025-12-28
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Tuple, List, Dict, Optional
|
||||
from pathlib import Path
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
HAS_TORCH = True
|
||||
except ImportError:
|
||||
HAS_TORCH = False
|
||||
logger.warning("PyTorch not available - ensemble features limited")
|
||||
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
|
||||
|
||||
class MLP(nn.Module):
|
||||
"""Single MLP for ensemble."""
|
||||
|
||||
def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = None):
|
||||
super().__init__()
|
||||
hidden_dims = hidden_dims or [64, 32]
|
||||
|
||||
layers = []
|
||||
in_dim = input_dim
|
||||
for h_dim in hidden_dims:
|
||||
layers.append(nn.Linear(in_dim, h_dim))
|
||||
layers.append(nn.ReLU())
|
||||
layers.append(nn.Dropout(0.1))
|
||||
in_dim = h_dim
|
||||
layers.append(nn.Linear(in_dim, output_dim))
|
||||
|
||||
self.net = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class OODDetector:
|
||||
"""
|
||||
Out-of-Distribution detector using multiple methods:
|
||||
1. Z-score check (is input within N std of training mean)
|
||||
2. KNN distance (is input close to training points)
|
||||
"""
|
||||
|
||||
def __init__(self, X_train: np.ndarray, z_threshold: float = 3.0, knn_k: int = 5):
|
||||
self.X_train = X_train
|
||||
self.z_threshold = z_threshold
|
||||
self.knn_k = knn_k
|
||||
|
||||
# Compute training statistics
|
||||
self.mean = X_train.mean(axis=0)
|
||||
self.std = X_train.std(axis=0) + 1e-8
|
||||
|
||||
# Fit KNN for local density estimation
|
||||
self.knn = NearestNeighbors(n_neighbors=min(knn_k, len(X_train)))
|
||||
self.knn.fit(X_train)
|
||||
|
||||
# Compute typical KNN distances in training set
|
||||
train_distances, _ = self.knn.kneighbors(X_train)
|
||||
self.typical_knn_dist = np.median(train_distances.mean(axis=1))
|
||||
|
||||
logger.info(f"[OOD] Initialized with {len(X_train)} training points")
|
||||
logger.info(f"[OOD] Typical KNN distance: {self.typical_knn_dist:.4f}")
|
||||
|
||||
def z_score_check(self, x: np.ndarray) -> Tuple[bool, float]:
|
||||
"""Check if point is within z_threshold std of training mean."""
|
||||
x = np.atleast_2d(x)
|
||||
z_scores = np.abs((x - self.mean) / self.std)
|
||||
max_z = z_scores.max(axis=1)
|
||||
is_ok = max_z < self.z_threshold
|
||||
return is_ok[0] if len(is_ok) == 1 else is_ok, max_z[0] if len(max_z) == 1 else max_z
|
||||
|
||||
def knn_distance_check(self, x: np.ndarray) -> Tuple[bool, float]:
|
||||
"""Check if point is close enough to training data."""
|
||||
x = np.atleast_2d(x)
|
||||
distances, _ = self.knn.kneighbors(x)
|
||||
avg_dist = distances.mean(axis=1)
|
||||
# Allow up to 3x typical distance
|
||||
is_ok = avg_dist < 3 * self.typical_knn_dist
|
||||
return is_ok[0] if len(is_ok) == 1 else is_ok, avg_dist[0] if len(avg_dist) == 1 else avg_dist
|
||||
|
||||
def is_in_distribution(self, x: np.ndarray) -> Tuple[bool, Dict]:
|
||||
"""Combined OOD check."""
|
||||
z_ok, z_score = self.z_score_check(x)
|
||||
knn_ok, knn_dist = self.knn_distance_check(x)
|
||||
|
||||
is_ok = z_ok and knn_ok
|
||||
details = {
|
||||
'z_score': float(z_score),
|
||||
'z_ok': bool(z_ok),
|
||||
'knn_dist': float(knn_dist),
|
||||
'knn_ok': bool(knn_ok),
|
||||
'in_distribution': bool(is_ok)
|
||||
}
|
||||
|
||||
return is_ok, details
|
||||
|
||||
|
||||
class EnsembleSurrogate:
|
||||
"""
|
||||
Ensemble of MLPs with uncertainty quantification.
|
||||
|
||||
Key insight: Models trained with different random seeds will agree
|
||||
in well-sampled regions but disagree in extrapolation regions.
|
||||
Disagreement = epistemic uncertainty.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dim: int,
|
||||
output_dim: int,
|
||||
n_models: int = 5,
|
||||
hidden_dims: List[int] = None,
|
||||
device: str = 'auto'
|
||||
):
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.n_models = n_models
|
||||
self.hidden_dims = hidden_dims or [64, 32]
|
||||
|
||||
if device == 'auto':
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
else:
|
||||
self.device = torch.device(device)
|
||||
|
||||
# Create ensemble
|
||||
self.models = [
|
||||
MLP(input_dim, output_dim, hidden_dims).to(self.device)
|
||||
for _ in range(n_models)
|
||||
]
|
||||
|
||||
# Normalization stats
|
||||
self.x_mean = None
|
||||
self.x_std = None
|
||||
self.y_mean = None
|
||||
self.y_std = None
|
||||
|
||||
# OOD detector
|
||||
self.ood_detector = None
|
||||
|
||||
# Training state
|
||||
self.is_trained = False
|
||||
|
||||
logger.info(f"[ENSEMBLE] Created {n_models} MLPs on {self.device}")
|
||||
|
||||
def train(
|
||||
self,
|
||||
X: np.ndarray,
|
||||
Y: np.ndarray,
|
||||
epochs: int = 500,
|
||||
lr: float = 0.001,
|
||||
val_split: float = 0.1,
|
||||
patience: int = 50
|
||||
) -> Dict:
|
||||
"""Train all models in ensemble with different random seeds."""
|
||||
|
||||
# Compute normalization
|
||||
self.x_mean = X.mean(axis=0)
|
||||
self.x_std = X.std(axis=0) + 1e-8
|
||||
self.y_mean = Y.mean(axis=0)
|
||||
self.y_std = Y.std(axis=0) + 1e-8
|
||||
|
||||
X_norm = (X - self.x_mean) / self.x_std
|
||||
Y_norm = (Y - self.y_mean) / self.y_std
|
||||
|
||||
# Split data
|
||||
n_val = max(int(len(X) * val_split), 5)
|
||||
indices = np.random.permutation(len(X))
|
||||
val_idx, train_idx = indices[:n_val], indices[n_val:]
|
||||
|
||||
X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
|
||||
X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]
|
||||
|
||||
# Convert to tensors
|
||||
X_t = torch.FloatTensor(X_train).to(self.device)
|
||||
Y_t = torch.FloatTensor(Y_train).to(self.device)
|
||||
X_v = torch.FloatTensor(X_val).to(self.device)
|
||||
Y_v = torch.FloatTensor(Y_val).to(self.device)
|
||||
|
||||
# Train each model with different seed
|
||||
all_val_losses = []
|
||||
for i, model in enumerate(self.models):
|
||||
torch.manual_seed(42 + i * 1000) # Different init per model
|
||||
np.random.seed(42 + i * 1000)
|
||||
|
||||
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
|
||||
criterion = nn.MSELoss()
|
||||
|
||||
best_val_loss = float('inf')
|
||||
patience_counter = 0
|
||||
best_state = None
|
||||
|
||||
for epoch in range(epochs):
|
||||
# Train
|
||||
model.train()
|
||||
optimizer.zero_grad()
|
||||
pred = model(X_t)
|
||||
loss = criterion(pred, Y_t)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# Validate
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
val_pred = model(X_v)
|
||||
val_loss = criterion(val_pred, Y_v).item()
|
||||
|
||||
# Early stopping
|
||||
if val_loss < best_val_loss:
|
||||
best_val_loss = val_loss
|
||||
patience_counter = 0
|
||||
best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
|
||||
else:
|
||||
patience_counter += 1
|
||||
if patience_counter >= patience:
|
||||
break
|
||||
|
||||
# Restore best
|
||||
if best_state:
|
||||
model.load_state_dict(best_state)
|
||||
model.to(self.device)
|
||||
|
||||
all_val_losses.append(best_val_loss)
|
||||
logger.info(f"[ENSEMBLE] Model {i+1}/{self.n_models} trained, val_loss={best_val_loss:.4f}")
|
||||
|
||||
# Initialize OOD detector
|
||||
self.ood_detector = OODDetector(X_norm)
|
||||
|
||||
self.is_trained = True
|
||||
|
||||
# Compute ensemble metrics
|
||||
metrics = self._compute_metrics(X_val, Y_val)
|
||||
metrics['val_losses'] = all_val_losses
|
||||
|
||||
return metrics
|
||||
|
||||
def _compute_metrics(self, X_val: np.ndarray, Y_val: np.ndarray) -> Dict:
|
||||
"""Compute R², MAE, and ensemble disagreement on validation set."""
|
||||
mean, std = self.predict_normalized(X_val)
|
||||
|
||||
# R² for each output
|
||||
ss_res = np.sum((Y_val - mean) ** 2, axis=0)
|
||||
ss_tot = np.sum((Y_val - Y_val.mean(axis=0)) ** 2, axis=0)
|
||||
r2 = 1 - ss_res / (ss_tot + 1e-8)
|
||||
|
||||
# MAE
|
||||
mae = np.abs(Y_val - mean).mean(axis=0)
|
||||
|
||||
# Average ensemble disagreement
|
||||
avg_std = std.mean()
|
||||
|
||||
return {
|
||||
'r2': r2.tolist(),
|
||||
'mae': mae.tolist(),
|
||||
'avg_ensemble_std': float(avg_std),
|
||||
'n_val': len(X_val)
|
||||
}
|
||||
|
||||
def predict_normalized(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Predict on normalized inputs, return normalized outputs."""
|
||||
X = np.atleast_2d(X)
|
||||
X_t = torch.FloatTensor(X).to(self.device)
|
||||
|
||||
preds = []
|
||||
for model in self.models:
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
pred = model(X_t).cpu().numpy()
|
||||
preds.append(pred)
|
||||
|
||||
preds = np.array(preds) # (n_models, n_samples, n_outputs)
|
||||
mean = preds.mean(axis=0)
|
||||
std = preds.std(axis=0)
|
||||
|
||||
return mean, std
|
||||
|
||||
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Predict with uncertainty.
|
||||
|
||||
Returns:
|
||||
mean: (n_samples, n_outputs) predicted values
|
||||
std: (n_samples, n_outputs) uncertainty (ensemble disagreement)
|
||||
"""
|
||||
X = np.atleast_2d(X)
|
||||
|
||||
# Normalize input
|
||||
X_norm = (X - self.x_mean) / self.x_std
|
||||
|
||||
# Get predictions
|
||||
mean_norm, std_norm = self.predict_normalized(X_norm)
|
||||
|
||||
# Denormalize
|
||||
mean = mean_norm * self.y_std + self.y_mean
|
||||
std = std_norm * self.y_std # Std scales with y_std
|
||||
|
||||
return mean, std
|
||||
|
||||
def predict_with_confidence(self, X: np.ndarray) -> Dict:
|
||||
"""
|
||||
Full prediction with confidence assessment.
|
||||
|
||||
Returns dict with:
|
||||
- mean: predicted values
|
||||
- std: uncertainty
|
||||
- confidence: 0-1 score (higher = more reliable)
|
||||
- in_distribution: OOD check result
|
||||
- recommendation: 'trust', 'verify', or 'reject'
|
||||
"""
|
||||
X = np.atleast_2d(X)
|
||||
|
||||
mean, std = self.predict(X)
|
||||
|
||||
# OOD check
|
||||
X_norm = (X - self.x_mean) / self.x_std
|
||||
ood_results = [self.ood_detector.is_in_distribution(x) for x in X_norm]
|
||||
in_distribution = [r[0] for r in ood_results]
|
||||
|
||||
# Compute confidence score (0 = no confidence, 1 = high confidence)
|
||||
# Based on: relative std (lower = better) and OOD (in = better)
|
||||
relative_std = std / (np.abs(mean) + 1e-6)
|
||||
avg_rel_std = relative_std.mean(axis=1)
|
||||
|
||||
confidence = np.zeros(len(X))
|
||||
for i in range(len(X)):
|
||||
if not in_distribution[i]:
|
||||
confidence[i] = 0.0 # OOD = no confidence
|
||||
elif avg_rel_std[i] > 0.3:
|
||||
confidence[i] = 0.2 # High uncertainty
|
||||
elif avg_rel_std[i] > 0.1:
|
||||
confidence[i] = 0.5 # Medium uncertainty
|
||||
else:
|
||||
confidence[i] = 0.9 # Low uncertainty
|
||||
|
||||
# Recommendations
|
||||
recommendations = []
|
||||
for i in range(len(X)):
|
||||
if confidence[i] >= 0.7:
|
||||
recommendations.append('trust')
|
||||
elif confidence[i] >= 0.3:
|
||||
recommendations.append('verify') # Run FEA to check
|
||||
else:
|
||||
recommendations.append('reject') # Don't use, run FEA instead
|
||||
|
||||
return {
|
||||
'mean': mean,
|
||||
'std': std,
|
||||
'confidence': confidence,
|
||||
'in_distribution': in_distribution,
|
||||
'recommendation': recommendations
|
||||
}
|
||||
|
||||
def acquisition_score(self, X: np.ndarray, best_so_far: float, xi: float = 0.01) -> np.ndarray:
|
||||
"""
|
||||
Expected Improvement acquisition function.
|
||||
|
||||
High score = worth running FEA (either promising or uncertain).
|
||||
|
||||
Args:
|
||||
X: candidate points
|
||||
best_so_far: current best objective value
|
||||
xi: exploration-exploitation tradeoff (higher = more exploration)
|
||||
|
||||
Returns:
|
||||
scores: acquisition score per point
|
||||
"""
|
||||
X = np.atleast_2d(X)
|
||||
mean, std = self.predict(X)
|
||||
|
||||
# For minimization: improvement = best - predicted
|
||||
# Take first objective (weighted sum) for acquisition
|
||||
if mean.ndim > 1:
|
||||
mean = mean[:, 0]
|
||||
std = std[:, 0]
|
||||
|
||||
improvement = best_so_far - mean
|
||||
|
||||
# Expected improvement with exploration bonus
|
||||
# Higher std = more exploration value
|
||||
z = improvement / (std + 1e-8)
|
||||
|
||||
# Simple acquisition: exploitation + exploration
|
||||
scores = improvement + xi * std
|
||||
|
||||
# Penalize OOD points
|
||||
X_norm = (X - self.x_mean) / self.x_std
|
||||
for i, x in enumerate(X_norm):
|
||||
is_ok, _ = self.ood_detector.is_in_distribution(x)
|
||||
if not is_ok:
|
||||
scores[i] *= 0.1 # Heavy penalty for OOD
|
||||
|
||||
return scores
|
||||
|
||||
def select_candidates_for_fea(
|
||||
self,
|
||||
candidates: np.ndarray,
|
||||
best_so_far: float,
|
||||
n_select: int = 5,
|
||||
diversity_weight: float = 0.3
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Select diverse, high-acquisition candidates for FEA validation.
|
||||
|
||||
Balances:
|
||||
1. High acquisition score (exploitation + exploration)
|
||||
2. Diversity (don't cluster all candidates together)
|
||||
3. In-distribution (avoid OOD predictions)
|
||||
|
||||
Returns:
|
||||
selected: indices of selected candidates
|
||||
scores: acquisition scores
|
||||
"""
|
||||
scores = self.acquisition_score(candidates, best_so_far)
|
||||
|
||||
# Greedy selection with diversity
|
||||
selected = []
|
||||
remaining = list(range(len(candidates)))
|
||||
|
||||
while len(selected) < n_select and remaining:
|
||||
if not selected:
|
||||
# First: pick highest score
|
||||
best_idx = max(remaining, key=lambda i: scores[i])
|
||||
else:
|
||||
# Later: balance score with distance to selected
|
||||
def combined_score(i):
|
||||
# Min distance to already selected
|
||||
min_dist = min(
|
||||
np.linalg.norm(candidates[i] - candidates[j])
|
||||
for j in selected
|
||||
)
|
||||
# Combine acquisition + diversity
|
||||
return scores[i] + diversity_weight * min_dist
|
||||
|
||||
best_idx = max(remaining, key=combined_score)
|
||||
|
||||
selected.append(best_idx)
|
||||
remaining.remove(best_idx)
|
||||
|
||||
return np.array(selected), scores[selected]
|
||||
|
||||
def save(self, path: Path):
|
||||
"""Save ensemble to disk."""
|
||||
path = Path(path)
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save each model
|
||||
for i, model in enumerate(self.models):
|
||||
torch.save(model.state_dict(), path / f"model_{i}.pt")
|
||||
|
||||
# Save normalization stats and config
|
||||
config = {
|
||||
'input_dim': self.input_dim,
|
||||
'output_dim': self.output_dim,
|
||||
'n_models': self.n_models,
|
||||
'hidden_dims': self.hidden_dims,
|
||||
'x_mean': self.x_mean.tolist() if self.x_mean is not None else None,
|
||||
'x_std': self.x_std.tolist() if self.x_std is not None else None,
|
||||
'y_mean': self.y_mean.tolist() if self.y_mean is not None else None,
|
||||
'y_std': self.y_std.tolist() if self.y_std is not None else None,
|
||||
}
|
||||
with open(path / "config.json", 'w') as f:
|
||||
json.dump(config, f, indent=2)
|
||||
|
||||
logger.info(f"[ENSEMBLE] Saved to {path}")
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: Path, device: str = 'auto') -> 'EnsembleSurrogate':
|
||||
"""Load ensemble from disk."""
|
||||
path = Path(path)
|
||||
|
||||
with open(path / "config.json") as f:
|
||||
config = json.load(f)
|
||||
|
||||
surrogate = cls(
|
||||
input_dim=config['input_dim'],
|
||||
output_dim=config['output_dim'],
|
||||
n_models=config['n_models'],
|
||||
hidden_dims=config['hidden_dims'],
|
||||
device=device
|
||||
)
|
||||
|
||||
# Load normalization
|
||||
surrogate.x_mean = np.array(config['x_mean']) if config['x_mean'] else None
|
||||
surrogate.x_std = np.array(config['x_std']) if config['x_std'] else None
|
||||
surrogate.y_mean = np.array(config['y_mean']) if config['y_mean'] else None
|
||||
surrogate.y_std = np.array(config['y_std']) if config['y_std'] else None
|
||||
|
||||
# Load models
|
||||
for i, model in enumerate(surrogate.models):
|
||||
model.load_state_dict(torch.load(path / f"model_{i}.pt", map_location=surrogate.device))
|
||||
model.to(surrogate.device)
|
||||
|
||||
surrogate.is_trained = True
|
||||
logger.info(f"[ENSEMBLE] Loaded from {path}")
|
||||
|
||||
return surrogate
|
||||
|
||||
|
||||
# Convenience function for quick usage
|
||||
def create_and_train_ensemble(
|
||||
X: np.ndarray,
|
||||
Y: np.ndarray,
|
||||
n_models: int = 5,
|
||||
epochs: int = 500
|
||||
) -> EnsembleSurrogate:
|
||||
"""Create and train an ensemble surrogate."""
|
||||
surrogate = EnsembleSurrogate(
|
||||
input_dim=X.shape[1],
|
||||
output_dim=Y.shape[1] if Y.ndim > 1 else 1,
|
||||
n_models=n_models
|
||||
)
|
||||
|
||||
if Y.ndim == 1:
|
||||
Y = Y.reshape(-1, 1)
|
||||
|
||||
metrics = surrogate.train(X, Y, epochs=epochs)
|
||||
logger.info(f"[ENSEMBLE] Training complete: R²={metrics['r2']}, avg_std={metrics['avg_ensemble_std']:.4f}")
|
||||
|
||||
return surrogate
|
||||
Reference in New Issue
Block a user