refactor: Major reorganization of optimization_engine module structure

BREAKING CHANGE: Module paths have been reorganized for better maintainability.
Backwards compatibility aliases with deprecation warnings are provided.

New Structure:
- core/           - Optimization runners (runner, intelligent_optimizer, etc.)
- processors/     - Data processing
  - surrogates/   - Neural network surrogates
- nx/             - NX/Nastran integration (solver, updater, session_manager)
- study/          - Study management (creator, wizard, state, reset)
- reporting/      - Reports and analysis (visualizer, report_generator)
- config/         - Configuration management (manager, builder)
- utils/          - Utilities (logger, auto_doc, etc.)
- future/         - Research/experimental code

Migration:
- ~200 import changes across 125 files
- All __init__.py files use lazy loading to avoid circular imports
- Backwards compatibility layer supports old import paths with warnings
- All existing functionality preserved

To migrate existing code:
  OLD: from optimization_engine.nx_solver import NXSolver
  NEW: from optimization_engine.nx.solver import NXSolver

  OLD: from optimization_engine.runner import OptimizationRunner
  NEW: from optimization_engine.core.runner import OptimizationRunner

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 12:30:59 -05:00
parent 82f36689b7
commit eabcc4c3ca
120 changed files with 1127 additions and 637 deletions

View File

@@ -0,0 +1,25 @@
"""
Optimization Processors
=======================
Data processing algorithms and ML models.
Submodules:
- surrogates/: Neural network surrogate models
- dynamic_response/: Dynamic response processing (random vib, sine sweep)
"""
# Lazy import for surrogates to avoid import errors
def __getattr__(name):
if name == 'surrogates':
from . import surrogates
return surrogates
elif name == 'AdaptiveCharacterization':
from .adaptive_characterization import AdaptiveCharacterization
return AdaptiveCharacterization
raise AttributeError(f"module 'optimization_engine.processors' has no attribute '{name}'")
__all__ = [
'surrogates',
'AdaptiveCharacterization',
]

View File

@@ -0,0 +1,415 @@
"""
Adaptive Characterization Module - Intelligent stopping for landscape characterization.
This module implements adaptive stopping criteria for the characterization phase
that intelligently determines when enough landscape exploration has been done.
Simple problems (smooth, unimodal) -> stop early (~10-15 trials)
Complex problems (multimodal, rugged) -> continue longer (~20-30 trials)
Part of Protocol 10: Intelligent Multi-Strategy Optimization (IMSO)
"""
import numpy as np
import optuna
from typing import Dict, List, Optional
from dataclasses import dataclass
import json
from pathlib import Path
from datetime import datetime
@dataclass
class LandscapeMetricSnapshot:
"""Snapshot of landscape metrics at a given trial."""
trial_number: int
smoothness: float
multimodal: bool
n_modes: int
noise_level: float
landscape_type: str
overall_confidence: float
class CharacterizationStoppingCriterion:
"""
Intelligently determines when characterization phase has gathered enough information.
Key Features:
1. Progressive landscape analysis (every 5 trials starting at trial 10)
2. Metric convergence detection (are metrics stabilizing?)
3. Complexity-aware sample adequacy (complex problems need more trials)
4. Parameter space coverage assessment
5. Confidence scoring (combines all factors)
Stopping Decision:
- Simple problems: Stop at ~10-15 trials when metrics converge
- Complex problems: Continue to ~20-30 trials for adequate coverage
"""
def __init__(
self,
min_trials: int = 10,
max_trials: int = 30,
confidence_threshold: float = 0.85,
check_interval: int = 5,
verbose: bool = True,
tracking_dir: Optional[Path] = None
):
"""
Args:
min_trials: Minimum trials before considering stopping
max_trials: Maximum trials (stop even if not converged)
confidence_threshold: Confidence needed to stop (0-1)
check_interval: How often to check stopping criteria
verbose: Print progress reports
tracking_dir: Directory to save characterization tracking
"""
self.min_trials = min_trials
self.max_trials = max_trials
self.confidence_threshold = confidence_threshold
self.check_interval = check_interval
self.verbose = verbose
self.tracking_dir = tracking_dir
# Track metric history across analyses
self.metric_history: List[LandscapeMetricSnapshot] = []
self.should_stop_flag = False
self.stop_reason = ""
self.final_confidence = 0.0
# Initialize tracking
if tracking_dir:
self.tracking_dir = Path(tracking_dir)
self.tracking_dir.mkdir(parents=True, exist_ok=True)
self.characterization_log = self.tracking_dir / "characterization_progress.json"
def update(self, landscape: Dict, trial_number: int):
"""
Update with latest landscape analysis.
Args:
landscape: Landscape analysis dictionary
trial_number: Current trial number
"""
if not landscape.get('ready', False):
return
# Create snapshot
snapshot = LandscapeMetricSnapshot(
trial_number=trial_number,
smoothness=landscape['smoothness'],
multimodal=landscape['multimodal'],
n_modes=landscape['n_modes'],
noise_level=landscape['noise_level'],
landscape_type=landscape['landscape_type'],
overall_confidence=0.0 # Will be calculated
)
self.metric_history.append(snapshot)
# Calculate confidence
confidence = self._calculate_confidence(landscape, trial_number)
snapshot.overall_confidence = confidence
# Save progress
self._save_progress()
# Print report
if self.verbose:
self._print_progress_report(trial_number, landscape, confidence)
# Check stopping criteria
if trial_number >= self.min_trials:
self._evaluate_stopping_criteria(landscape, trial_number, confidence)
def should_stop(self, study: optuna.Study) -> bool:
"""
Check if characterization should stop.
Args:
study: Optuna study
Returns:
True if should stop characterization
"""
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
n_trials = len(completed_trials)
# Force stop at max trials
if n_trials >= self.max_trials:
self.should_stop_flag = True
self.stop_reason = f"Maximum characterization trials reached ({self.max_trials})"
return True
return self.should_stop_flag
def _calculate_confidence(self, landscape: Dict, trial_number: int) -> float:
"""
Calculate confidence score for stopping decision.
Confidence Components (weighted sum):
1. Metric Stability (40%): Are metrics converging?
2. Parameter Coverage (30%): Explored enough space?
3. Sample Adequacy (20%): Enough samples for complexity?
4. Landscape Clarity (10%): Clear classification?
"""
if trial_number < self.min_trials:
return 0.0
# 1. Metric Stability Score
stability_score = self._compute_metric_stability()
# 2. Parameter Coverage Score
coverage_score = self._compute_parameter_coverage(landscape)
# 3. Sample Adequacy Score
adequacy_score = self._compute_sample_adequacy(landscape, trial_number)
# 4. Landscape Clarity Score
clarity_score = self._compute_landscape_clarity(landscape)
# Weighted confidence
confidence = (
0.40 * stability_score +
0.30 * coverage_score +
0.20 * adequacy_score +
0.10 * clarity_score
)
return confidence
def _compute_metric_stability(self) -> float:
"""
Compute how stable landscape metrics are.
High stability = metrics have converged (good for stopping)
Low stability = metrics still changing (need more trials)
"""
if len(self.metric_history) < 3:
return 0.0
# Look at last 3 analyses
recent_snapshots = self.metric_history[-3:]
# Check smoothness stability
smoothness_values = [s.smoothness for s in recent_snapshots]
smoothness_std = np.std(smoothness_values)
smoothness_stable = smoothness_std < 0.05 # Stable if std < 0.05
# Check noise stability
noise_values = [s.noise_level for s in recent_snapshots]
noise_std = np.std(noise_values)
noise_stable = noise_std < 0.1 # Stable if std < 0.1
# Check landscape type consistency
landscape_types = [s.landscape_type for s in recent_snapshots]
type_consistent = len(set(landscape_types)) == 1 # All same type
# Check n_modes stability
n_modes = [s.n_modes for s in recent_snapshots]
modes_consistent = len(set(n_modes)) <= 1 # Same or ±1
# Combine stability indicators
stability_indicators = [
1.0 if smoothness_stable else 0.0,
1.0 if noise_stable else 0.0,
1.0 if type_consistent else 0.0,
1.0 if modes_consistent else 0.0
]
stability_score = np.mean(stability_indicators)
return stability_score
def _compute_parameter_coverage(self, landscape: Dict) -> float:
"""
Compute how well parameter space has been explored.
High coverage = explored wide range of each parameter
"""
param_ranges = landscape.get('parameter_ranges', {})
if not param_ranges:
return 0.5 # Unknown
coverage_scores = []
for param, ranges in param_ranges.items():
coverage = ranges['coverage'] # Already computed in landscape analyzer
coverage_scores.append(coverage)
avg_coverage = np.mean(coverage_scores)
# Normalize: 50% coverage = 0.5 score, 100% coverage = 1.0 score
coverage_score = min(1.0, avg_coverage / 0.5)
return coverage_score
def _compute_sample_adequacy(self, landscape: Dict, trial_number: int) -> float:
"""
Compute if we have enough samples for the detected complexity.
Simple problems: 10 trials sufficient
Complex problems: 20-30 trials needed
"""
dimensionality = landscape.get('dimensionality', 2)
multimodal = landscape.get('multimodal', False)
n_modes = landscape.get('n_modes', 1)
# Calculate required samples based on complexity
if multimodal and n_modes > 2:
# Complex multimodal: need more samples
required_samples = 10 + 5 * n_modes + 2 * dimensionality
elif multimodal:
# Simple multimodal: moderate samples
required_samples = 15 + 2 * dimensionality
else:
# Unimodal: fewer samples needed
required_samples = 10 + dimensionality
# Cap at max_trials
required_samples = min(required_samples, self.max_trials)
# Score based on how many samples we have vs required
adequacy_score = min(1.0, trial_number / required_samples)
return adequacy_score
def _compute_landscape_clarity(self, landscape: Dict) -> float:
"""
Compute how clearly we can classify the landscape.
Clear classification = high confidence in landscape type
"""
smoothness = landscape.get('smoothness', 0.5)
noise_level = landscape.get('noise_level', 0.5)
# Clear cases:
# - Very smooth (> 0.7) or very rugged (< 0.3)
# - Low noise (< 0.3) or high noise (> 0.7)
smoothness_clarity = max(
abs(smoothness - 0.7), # Distance from smooth threshold
abs(smoothness - 0.3) # Distance from rugged threshold
)
noise_clarity = max(
abs(noise_level - 0.3), # Distance from low noise threshold
abs(noise_level - 0.7) # Distance from high noise threshold
)
# Normalize to 0-1
clarity_score = min(1.0, (smoothness_clarity + noise_clarity) / 0.8)
return clarity_score
def _evaluate_stopping_criteria(self, landscape: Dict, trial_number: int, confidence: float):
"""
Evaluate if we should stop characterization.
Stop if:
1. Confidence threshold met
2. OR maximum trials reached
"""
if confidence >= self.confidence_threshold:
self.should_stop_flag = True
self.stop_reason = f"Characterization confidence threshold met ({confidence:.1%})"
self.final_confidence = confidence
if self.verbose:
print(f"\n{'='*70}")
print(f" CHARACTERIZATION COMPLETE")
print(f"{'='*70}")
print(f" Trial #{trial_number}")
print(f" Confidence: {confidence:.1%}")
print(f" Landscape Type: {landscape['landscape_type'].upper()}")
print(f" Ready for strategy selection")
print(f"{'='*70}\n")
def _print_progress_report(self, trial_number: int, landscape: Dict, confidence: float):
"""Print characterization progress report."""
print(f"\n{'='*70}")
print(f" CHARACTERIZATION PROGRESS - Trial #{trial_number}")
print(f"{'='*70}")
print(f" Landscape Type: {landscape['landscape_type']}")
print(f" Smoothness: {landscape['smoothness']:.2f}")
print(f" Multimodal: {'YES' if landscape['multimodal'] else 'NO'} ({landscape['n_modes']} modes)")
print(f" Noise: {landscape['noise_level']:.2f}")
print(f" Characterization Confidence: {confidence:.1%}")
if confidence >= self.confidence_threshold:
print(f" Status: READY TO STOP (confidence >= {self.confidence_threshold:.0%})")
else:
remaining = self.confidence_threshold - confidence
print(f" Status: CONTINUE (need +{remaining:.1%} confidence)")
print(f"{'='*70}\n")
def _save_progress(self):
"""Save characterization progress to JSON."""
if not self.tracking_dir:
return
progress_data = {
'min_trials': self.min_trials,
'max_trials': self.max_trials,
'confidence_threshold': self.confidence_threshold,
'metric_history': [
{
'trial_number': s.trial_number,
'smoothness': s.smoothness,
'multimodal': s.multimodal,
'n_modes': s.n_modes,
'noise_level': s.noise_level,
'landscape_type': s.landscape_type,
'confidence': s.overall_confidence
}
for s in self.metric_history
],
'should_stop': self.should_stop_flag,
'stop_reason': self.stop_reason,
'final_confidence': self.final_confidence,
'timestamp': datetime.now().isoformat()
}
try:
with open(self.characterization_log, 'w') as f:
json.dump(progress_data, f, indent=2)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to save characterization progress: {e}")
def get_summary_report(self) -> str:
"""Generate summary report of characterization phase."""
if not self.metric_history:
return "No characterization data available"
final_snapshot = self.metric_history[-1]
report = "\n" + "="*70 + "\n"
report += " CHARACTERIZATION PHASE SUMMARY\n"
report += "="*70 + "\n"
report += f" Total Trials: {final_snapshot.trial_number}\n"
report += f" Final Confidence: {final_snapshot.overall_confidence:.1%}\n"
report += f" Stop Reason: {self.stop_reason}\n"
report += f"\n FINAL LANDSCAPE CLASSIFICATION:\n"
report += f" Type: {final_snapshot.landscape_type.upper()}\n"
report += f" Smoothness: {final_snapshot.smoothness:.2f}\n"
report += f" Multimodal: {'YES' if final_snapshot.multimodal else 'NO'} ({final_snapshot.n_modes} modes)\n"
report += f" Noise Level: {final_snapshot.noise_level:.2f}\n"
if len(self.metric_history) >= 2:
report += f"\n METRIC CONVERGENCE:\n"
# Show how metrics evolved
first = self.metric_history[0]
last = self.metric_history[-1]
smoothness_change = abs(last.smoothness - first.smoothness)
report += f" Smoothness stability: {smoothness_change:.3f} (lower = more stable)\n"
type_changes = len(set(s.landscape_type for s in self.metric_history))
report += f" Landscape type changes: {type_changes - 1}\n"
report += "="*70 + "\n"
return report

View File

@@ -0,0 +1,79 @@
"""
Surrogate Models
================
Neural network and ML surrogate models for FEA acceleration.
Available modules:
- neural_surrogate: AtomizerField neural network surrogate
- generic_surrogate: Flexible surrogate interface
- adaptive_surrogate: Self-improving surrogate
- simple_mlp_surrogate: Simple multi-layer perceptron
- active_learning_surrogate: Active learning surrogate
- surrogate_tuner: Hyperparameter tuning
- auto_trainer: Automatic model training
- training_data_exporter: Export training data from studies
Note: Imports are done on-demand to avoid import errors from optional dependencies.
"""
# Lazy imports to avoid circular dependencies and optional dependency issues
def __getattr__(name):
"""Lazy import mechanism for surrogate modules."""
if name == 'NeuralSurrogate':
from .neural_surrogate import NeuralSurrogate
return NeuralSurrogate
elif name == 'create_surrogate_for_study':
from .neural_surrogate import create_surrogate_for_study
return create_surrogate_for_study
elif name == 'GenericSurrogate':
from .generic_surrogate import GenericSurrogate
return GenericSurrogate
elif name == 'ConfigDrivenSurrogate':
from .generic_surrogate import ConfigDrivenSurrogate
return ConfigDrivenSurrogate
elif name == 'create_surrogate':
from .generic_surrogate import create_surrogate
return create_surrogate
elif name == 'AdaptiveSurrogate':
from .adaptive_surrogate import AdaptiveSurrogate
return AdaptiveSurrogate
elif name == 'SimpleSurrogate':
from .simple_mlp_surrogate import SimpleSurrogate
return SimpleSurrogate
elif name == 'ActiveLearningSurrogate':
from .active_learning_surrogate import ActiveLearningSurrogate
return ActiveLearningSurrogate
elif name == 'SurrogateHyperparameterTuner':
from .surrogate_tuner import SurrogateHyperparameterTuner
return SurrogateHyperparameterTuner
elif name == 'tune_surrogate_for_study':
from .surrogate_tuner import tune_surrogate_for_study
return tune_surrogate_for_study
elif name == 'AutoTrainer':
from .auto_trainer import AutoTrainer
return AutoTrainer
elif name == 'TrainingDataExporter':
from .training_data_exporter import TrainingDataExporter
return TrainingDataExporter
elif name == 'create_exporter_from_config':
from .training_data_exporter import create_exporter_from_config
return create_exporter_from_config
raise AttributeError(f"module 'optimization_engine.processors.surrogates' has no attribute '{name}'")
__all__ = [
'NeuralSurrogate',
'create_surrogate_for_study',
'GenericSurrogate',
'ConfigDrivenSurrogate',
'create_surrogate',
'AdaptiveSurrogate',
'SimpleSurrogate',
'ActiveLearningSurrogate',
'SurrogateHyperparameterTuner',
'tune_surrogate_for_study',
'AutoTrainer',
'TrainingDataExporter',
'create_exporter_from_config',
]

View File

@@ -0,0 +1,591 @@
"""
Active Learning Surrogate with Uncertainty Estimation
This module implements an ensemble-based neural network surrogate that:
1. Provides uncertainty estimates via ensemble disagreement
2. Supports active learning for strategic FEA validation
3. Tracks confidence and knows when predictions are reliable
Key Concept:
- Train multiple NNs (ensemble) on slightly different data (bootstrap)
- Uncertainty = disagreement between ensemble members
- High uncertainty regions need FEA validation
- Low uncertainty + good accuracy = ready for optimization
"""
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import json
import logging
logger = logging.getLogger(__name__)
class EnsembleMLP(nn.Module):
"""Single MLP member of the ensemble."""
def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = [64, 64, 32]):
super().__init__()
layers = []
prev_dim = input_dim
for hidden_dim in hidden_dims:
layers.extend([
nn.Linear(prev_dim, hidden_dim),
nn.ReLU(),
nn.Dropout(0.1)
])
prev_dim = hidden_dim
layers.append(nn.Linear(prev_dim, output_dim))
self.network = nn.Sequential(*layers)
def forward(self, x):
return self.network(x)
class ActiveLearningSurrogate:
"""
Ensemble-based surrogate with uncertainty estimation for active learning.
Strategy:
1. Use ensemble of 5-10 neural networks
2. Each trained on bootstrap sample of data
3. Uncertainty = std dev of predictions across ensemble
4. Select high-uncertainty designs for FEA validation
"""
def __init__(
self,
n_ensemble: int = 5,
hidden_dims: List[int] = [64, 64, 32],
device: str = 'cpu'
):
self.n_ensemble = n_ensemble
self.hidden_dims = hidden_dims
self.device = device
self.models: List[EnsembleMLP] = []
self.design_var_names: List[str] = []
self.objective_names: List[str] = ['mass', 'frequency', 'max_displacement', 'max_stress']
# Normalization parameters
self.input_mean = None
self.input_std = None
self.output_mean = None
self.output_std = None
# Training history for each ensemble member
self.training_history = []
# Confidence tracking
self.validation_errors = [] # Track FEA validation errors
self.confidence_score = 0.0
def _normalize_input(self, x: np.ndarray) -> torch.Tensor:
"""Normalize input features."""
x_norm = (x - self.input_mean) / (self.input_std + 1e-8)
return torch.FloatTensor(x_norm).to(self.device)
def _denormalize_output(self, y: torch.Tensor) -> np.ndarray:
"""Denormalize output predictions."""
y_np = y.cpu().numpy()
return y_np * (self.output_std + 1e-8) + self.output_mean
def train(
self,
design_params: np.ndarray,
objectives: np.ndarray,
design_var_names: List[str],
epochs: int = 200,
lr: float = 0.001,
batch_size: int = 32,
val_split: float = 0.2
):
"""
Train ensemble on the data with bootstrap sampling.
Args:
design_params: (N, D) array of design parameters
objectives: (N, O) array of objective values
design_var_names: Names of design variables
epochs: Training epochs per ensemble member
lr: Learning rate
batch_size: Batch size
val_split: Validation split ratio
"""
self.design_var_names = design_var_names
n_samples = len(design_params)
input_dim = design_params.shape[1]
output_dim = objectives.shape[1]
# Compute normalization parameters from full dataset
self.input_mean = design_params.mean(axis=0)
self.input_std = design_params.std(axis=0)
self.output_mean = objectives.mean(axis=0)
self.output_std = objectives.std(axis=0)
# Train each ensemble member on bootstrap sample
self.models = []
self.training_history = []
for i in range(self.n_ensemble):
logger.info(f"Training ensemble member {i+1}/{self.n_ensemble}")
# Bootstrap sampling (sample with replacement)
bootstrap_idx = np.random.choice(n_samples, size=n_samples, replace=True)
X_boot = design_params[bootstrap_idx]
y_boot = objectives[bootstrap_idx]
# Split into train/val
n_val = int(len(X_boot) * val_split)
indices = np.random.permutation(len(X_boot))
train_idx, val_idx = indices[n_val:], indices[:n_val]
X_train = self._normalize_input(X_boot[train_idx])
y_train = torch.FloatTensor((y_boot[train_idx] - self.output_mean) / (self.output_std + 1e-8)).to(self.device)
X_val = self._normalize_input(X_boot[val_idx])
y_val = torch.FloatTensor((y_boot[val_idx] - self.output_mean) / (self.output_std + 1e-8)).to(self.device)
# Create and train model
model = EnsembleMLP(input_dim, output_dim, self.hidden_dims).to(self.device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss()
best_val_loss = float('inf')
patience_counter = 0
best_state = None
for epoch in range(epochs):
model.train()
# Mini-batch training
perm = torch.randperm(len(X_train))
epoch_loss = 0.0
n_batches = 0
for j in range(0, len(X_train), batch_size):
batch_idx = perm[j:j+batch_size]
X_batch = X_train[batch_idx]
y_batch = y_train[batch_idx]
optimizer.zero_grad()
pred = model(X_batch)
loss = criterion(pred, y_batch)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
n_batches += 1
# Validation
model.eval()
with torch.no_grad():
val_pred = model(X_val)
val_loss = criterion(val_pred, y_val).item()
# Early stopping
if val_loss < best_val_loss:
best_val_loss = val_loss
best_state = model.state_dict().copy()
patience_counter = 0
else:
patience_counter += 1
if patience_counter >= 20:
break
# Restore best model
if best_state is not None:
model.load_state_dict(best_state)
self.models.append(model)
self.training_history.append({
'member': i,
'best_val_loss': best_val_loss,
'epochs_trained': epoch + 1
})
logger.info(f" Member {i+1}: val_loss={best_val_loss:.6f}, epochs={epoch+1}")
def predict(self, params: Dict[str, float]) -> Dict[str, float]:
"""
Predict objectives for a single design.
Returns dict with predictions and uncertainty estimates.
"""
# Convert to array
x = np.array([[params.get(name, 0.0) for name in self.design_var_names]], dtype=np.float32)
# Get predictions from all ensemble members
predictions = []
for model in self.models:
model.eval()
with torch.no_grad():
x_norm = self._normalize_input(x)
pred_norm = model(x_norm)
pred = self._denormalize_output(pred_norm)
predictions.append(pred[0])
predictions = np.array(predictions) # (n_ensemble, n_objectives)
# Mean prediction and uncertainty (std dev)
mean_pred = predictions.mean(axis=0)
std_pred = predictions.std(axis=0)
result = {}
for i, name in enumerate(self.objective_names):
result[name] = float(mean_pred[i])
result[f'{name}_uncertainty'] = float(std_pred[i])
# Overall uncertainty score (normalized)
result['total_uncertainty'] = float(np.mean(std_pred / (self.output_std + 1e-8)))
return result
def predict_batch(self, params_list: List[Dict[str, float]]) -> List[Dict[str, float]]:
"""Predict for multiple designs efficiently."""
return [self.predict(p) for p in params_list]
def select_designs_for_validation(
self,
candidate_designs: List[Dict[str, float]],
n_select: int = 5,
strategy: str = 'uncertainty'
) -> List[Tuple[int, Dict[str, float], float]]:
"""
Select designs that should be validated with FEA.
Strategies:
- 'uncertainty': Select highest uncertainty designs
- 'pareto_uncertainty': Select from Pareto front with high uncertainty
- 'diverse': Select diverse designs with moderate uncertainty
Returns: List of (index, params, uncertainty_score)
"""
# Get predictions with uncertainty
predictions = self.predict_batch(candidate_designs)
# Score each design
scored = []
for i, (design, pred) in enumerate(zip(candidate_designs, predictions)):
uncertainty = pred['total_uncertainty']
scored.append((i, design, pred, uncertainty))
if strategy == 'uncertainty':
# Simply select highest uncertainty
scored.sort(key=lambda x: x[3], reverse=True)
elif strategy == 'pareto_uncertainty':
# Prefer Pareto-optimal designs with uncertainty
# Simple proxy: designs with low mass and high frequency predictions
for item in scored:
pred = item[2]
# Bonus for potentially good designs
pareto_score = -pred['mass'] / 1000 + pred['frequency'] / 10
# Combined score: uncertainty * pareto_potential
item = (item[0], item[1], item[2], item[3] * (1 + 0.5 * pareto_score))
scored.sort(key=lambda x: x[3], reverse=True)
elif strategy == 'diverse':
# Select diverse designs using simple greedy selection
selected = []
remaining = scored.copy()
# First, select highest uncertainty
remaining.sort(key=lambda x: x[3], reverse=True)
selected.append(remaining.pop(0))
while len(selected) < n_select and remaining:
# Find design most different from selected ones
best_idx = 0
best_min_dist = 0
for i, item in enumerate(remaining):
design = item[1]
min_dist = float('inf')
for sel_item in selected:
sel_design = sel_item[1]
dist = sum((design.get(k, 0) - sel_design.get(k, 0))**2
for k in self.design_var_names)
min_dist = min(min_dist, dist)
# Weight by uncertainty too
weighted_dist = min_dist * (1 + item[3])
if weighted_dist > best_min_dist:
best_min_dist = weighted_dist
best_idx = i
selected.append(remaining.pop(best_idx))
return [(s[0], s[1], s[3]) for s in selected]
return [(s[0], s[1], s[3]) for s in scored[:n_select]]
def update_with_validation(
self,
validated_designs: List[Dict[str, float]],
fea_results: List[Dict[str, float]]
):
"""
Update validation error tracking with new FEA results.
This doesn't retrain the model, just tracks prediction accuracy.
"""
for design, fea_result in zip(validated_designs, fea_results):
pred = self.predict(design)
errors = {}
for name in ['mass', 'frequency']:
if name in fea_result:
pred_val = pred[name]
actual_val = fea_result[name]
error = abs(pred_val - actual_val) / (abs(actual_val) + 1e-8)
errors[name] = error
self.validation_errors.append({
'design': design,
'predicted': {k: pred[k] for k in self.objective_names},
'actual': fea_result,
'errors': errors,
'uncertainty': pred['total_uncertainty']
})
# Update confidence score
self._update_confidence()
def _update_confidence(self):
"""Calculate overall confidence score based on validation history."""
if not self.validation_errors:
self.confidence_score = 0.0
return
recent_errors = self.validation_errors[-20:] # Last 20 validations
mass_errors = [e['errors'].get('mass', 1.0) for e in recent_errors]
freq_errors = [e['errors'].get('frequency', 1.0) for e in recent_errors]
# Confidence based on MAPE < 10%
mass_conf = sum(1 for e in mass_errors if e < 0.10) / len(mass_errors)
freq_conf = sum(1 for e in freq_errors if e < 0.10) / len(freq_errors)
# Combined confidence (frequency is harder, weight less)
self.confidence_score = 0.6 * mass_conf + 0.4 * freq_conf
def get_confidence_report(self) -> Dict:
"""Get detailed confidence metrics."""
if not self.validation_errors:
return {
'confidence_score': 0.0,
'n_validations': 0,
'status': 'NO_DATA',
'recommendation': 'Need FEA validation data'
}
recent = self.validation_errors[-20:]
mass_mape = np.mean([e['errors'].get('mass', 1.0) for e in recent]) * 100
freq_mape = np.mean([e['errors'].get('frequency', 1.0) for e in recent]) * 100
# Correlation between uncertainty and error
uncertainties = [e['uncertainty'] for e in recent]
total_errors = [np.mean(list(e['errors'].values())) for e in recent]
if len(set(uncertainties)) > 1 and len(set(total_errors)) > 1:
correlation = np.corrcoef(uncertainties, total_errors)[0, 1]
else:
correlation = 0.0
# Determine status
if self.confidence_score >= 0.8 and mass_mape < 5 and freq_mape < 15:
status = 'HIGH_CONFIDENCE'
recommendation = 'NN ready for optimization'
elif self.confidence_score >= 0.5:
status = 'MEDIUM_CONFIDENCE'
recommendation = 'Continue targeted FEA validation in high-uncertainty regions'
else:
status = 'LOW_CONFIDENCE'
recommendation = 'Need more FEA training data, especially in unexplored regions'
return {
'confidence_score': self.confidence_score,
'n_validations': len(self.validation_errors),
'mass_mape': mass_mape,
'freq_mape': freq_mape,
'uncertainty_error_correlation': correlation,
'status': status,
'recommendation': recommendation
}
def is_ready_for_optimization(self, threshold: float = 0.7) -> bool:
"""Check if NN is confident enough for optimization."""
return self.confidence_score >= threshold
def save(self, path: str):
"""Save the ensemble model."""
path = Path(path)
state = {
'n_ensemble': self.n_ensemble,
'hidden_dims': self.hidden_dims,
'design_var_names': self.design_var_names,
'objective_names': self.objective_names,
'input_mean': self.input_mean.tolist() if self.input_mean is not None else None,
'input_std': self.input_std.tolist() if self.input_std is not None else None,
'output_mean': self.output_mean.tolist() if self.output_mean is not None else None,
'output_std': self.output_std.tolist() if self.output_std is not None else None,
'validation_errors': self.validation_errors,
'confidence_score': self.confidence_score,
'training_history': self.training_history,
'models': [m.state_dict() for m in self.models]
}
torch.save(state, path)
logger.info(f"Saved ensemble surrogate to {path}")
@classmethod
def load(cls, path: str) -> 'ActiveLearningSurrogate':
"""Load the ensemble model."""
path = Path(path)
if not path.exists():
raise FileNotFoundError(f"Model not found: {path}")
state = torch.load(path, map_location='cpu')
surrogate = cls(
n_ensemble=state['n_ensemble'],
hidden_dims=state['hidden_dims']
)
surrogate.design_var_names = state['design_var_names']
surrogate.objective_names = state['objective_names']
surrogate.input_mean = np.array(state['input_mean']) if state['input_mean'] else None
surrogate.input_std = np.array(state['input_std']) if state['input_std'] else None
surrogate.output_mean = np.array(state['output_mean']) if state['output_mean'] else None
surrogate.output_std = np.array(state['output_std']) if state['output_std'] else None
surrogate.validation_errors = state.get('validation_errors', [])
surrogate.confidence_score = state.get('confidence_score', 0.0)
surrogate.training_history = state.get('training_history', [])
# Reconstruct models
input_dim = len(surrogate.design_var_names)
output_dim = len(surrogate.objective_names)
for model_state in state['models']:
model = EnsembleMLP(input_dim, output_dim, surrogate.hidden_dims)
model.load_state_dict(model_state)
surrogate.models.append(model)
logger.info(f"Loaded ensemble surrogate from {path}")
return surrogate
def extract_training_data_from_study(db_path: str, study_name: str):
"""Extract training data from Optuna study database."""
import optuna
storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}")
study = optuna.load_study(study_name=study_name, storage=storage)
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed_trials:
raise ValueError("No completed trials found")
# Infer design variable names
design_var_names = list(completed_trials[0].params.keys())
design_params_list = []
objectives_list = []
for trial in completed_trials:
if len(trial.values) < 2:
continue
mass = trial.values[0]
raw_freq = trial.values[1]
frequency = -raw_freq if raw_freq < 0 else raw_freq
max_disp = trial.user_attrs.get('max_displacement', 0.0)
max_stress = trial.user_attrs.get('max_stress', 0.0)
# Skip invalid
if any(np.isinf(v) or np.isnan(v) for v in [mass, frequency]):
continue
if frequency <= 0:
continue
params = [trial.params.get(name, 0.0) for name in design_var_names]
design_params_list.append(params)
objectives_list.append([mass, frequency, max_disp, max_stress])
return (
np.array(design_params_list, dtype=np.float32),
np.array(objectives_list, dtype=np.float32),
design_var_names
)
if __name__ == '__main__':
import sys
logging.basicConfig(level=logging.INFO)
project_root = Path(__file__).parent.parent
# Find database
db_path = project_root / "studies/uav_arm_optimization/2_results/study.db"
study_name = "uav_arm_optimization"
if not db_path.exists():
db_path = project_root / "studies/uav_arm_atomizerfield_test/2_results/study.db"
study_name = "uav_arm_atomizerfield_test"
print("="*60)
print("Training Active Learning Surrogate (Ensemble)")
print("="*60)
# Extract data
print(f"\nLoading data from {db_path}")
design_params, objectives, design_var_names = extract_training_data_from_study(
str(db_path), study_name
)
print(f"Loaded {len(design_params)} samples")
print(f"Design variables: {design_var_names}")
# Train ensemble
print("\nTraining 5-member ensemble...")
surrogate = ActiveLearningSurrogate(n_ensemble=5)
surrogate.train(design_params, objectives, design_var_names, epochs=200)
# Test predictions with uncertainty
print("\n" + "="*60)
print("Testing Predictions with Uncertainty")
print("="*60)
# Test on a few samples
test_designs = [
{'beam_half_core_thickness': 2.0, 'beam_face_thickness': 1.0, 'holes_diameter': 5.0, 'hole_count': 10},
{'beam_half_core_thickness': 5.0, 'beam_face_thickness': 2.0, 'holes_diameter': 20.0, 'hole_count': 8},
{'beam_half_core_thickness': 1.0, 'beam_face_thickness': 0.5, 'holes_diameter': 2.0, 'hole_count': 6}, # Low data region
]
for i, design in enumerate(test_designs):
pred = surrogate.predict(design)
print(f"\nDesign {i+1}: {design}")
print(f" Mass: {pred['mass']:.1f}g +/- {pred['mass_uncertainty']:.1f}g")
print(f" Freq: {pred['frequency']:.1f}Hz +/- {pred['frequency_uncertainty']:.1f}Hz")
print(f" Total Uncertainty: {pred['total_uncertainty']:.3f}")
# Save model
save_path = project_root / "active_learning_surrogate.pt"
surrogate.save(str(save_path))
print(f"\nSaved to {save_path}")
# Get confidence report
print("\n" + "="*60)
print("Confidence Report")
print("="*60)
report = surrogate.get_confidence_report()
for k, v in report.items():
print(f" {k}: {v}")

View File

@@ -0,0 +1,393 @@
"""
Adaptive surrogate modeling with confidence-based exploration-exploitation transitions.
This module implements state-of-the-art Bayesian optimization strategies that
dynamically adjust exploration vs exploitation based on surrogate model confidence.
"""
import numpy as np
from typing import Optional, Dict, List
import optuna
from scipy.stats import variation
import json
from pathlib import Path
class SurrogateConfidenceMetrics:
"""Calculate confidence metrics for surrogate model quality.
STUDY-AWARE: Uses study.trials directly instead of session-based history
to properly track confidence across multiple optimization runs.
"""
def __init__(self, min_trials_for_confidence: int = 15):
self.min_trials = min_trials_for_confidence
def update(self, study: optuna.Study, trial: optuna.trial.FrozenTrial):
"""Update metrics after each trial (no-op for study-aware implementation)."""
pass # Study-aware: we read directly from study.trials
def calculate_confidence(self, study: optuna.Study) -> Dict[str, float]:
"""
Calculate comprehensive surrogate confidence metrics.
STUDY-AWARE: Uses ALL completed trials from the study database,
not just trials from the current session.
PROTOCOL 11: Multi-objective studies are NOT supported by adaptive
characterization. Return immediately with max confidence to skip
characterization phase.
Returns:
Dict with confidence scores:
- 'overall_confidence': 0-1 score, where 1 = high confidence
- 'convergence_score': How stable recent improvements are
- 'exploration_coverage': How well parameter space is covered
- 'prediction_stability': How consistent the model predictions are
"""
# [Protocol 11] Multi-objective NOT supported by adaptive characterization
is_multi_objective = len(study.directions) > 1
if is_multi_objective:
return {
'overall_confidence': 1.0, # Skip characterization
'convergence_score': 1.0,
'exploration_coverage': 1.0,
'prediction_stability': 1.0,
'ready_for_exploitation': True, # Go straight to NSGA-II
'total_trials': len(study.trials),
'message': '[Protocol 11] Multi-objective: skipping adaptive characterization, using NSGA-II directly'
}
# Get ALL completed trials from study (study-aware)
all_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if len(all_trials) < self.min_trials:
return {
'overall_confidence': 0.0,
'convergence_score': 0.0,
'exploration_coverage': 0.0,
'prediction_stability': 0.0,
'ready_for_exploitation': False,
'total_trials': len(all_trials),
'message': f'Need {self.min_trials - len(all_trials)} more trials for confidence assessment (currently {len(all_trials)} trials)'
}
# 1. Convergence Score - are we finding consistent improvements?
recent_window = 10
recent_trials = all_trials[-recent_window:]
recent_values = [t.value for t in recent_trials] # Safe: single-objective only
# Calculate improvement rate
improvements = []
for i in range(1, len(recent_values)):
if recent_values[i] < recent_values[i-1]:
improvement = (recent_values[i-1] - recent_values[i]) / abs(recent_values[i-1])
improvements.append(improvement)
# If we're consistently finding improvements, convergence is good
if improvements:
avg_improvement = np.mean(improvements)
improvement_consistency = 1.0 - variation(improvements) if len(improvements) > 1 else 0.5
convergence_score = min(1.0, avg_improvement * improvement_consistency * 10)
else:
convergence_score = 0.0
# 2. Exploration Coverage - how well have we covered parameter space?
# Use coefficient of variation for each parameter
param_names = list(all_trials[0].params.keys()) if all_trials else []
coverage_scores = []
for param in param_names:
values = [t.params[param] for t in all_trials]
# Get parameter bounds
distribution = all_trials[0].distributions[param]
param_range = distribution.high - distribution.low
# Calculate spread relative to bounds
spread = max(values) - min(values)
coverage = spread / param_range
coverage_scores.append(coverage)
exploration_coverage = np.mean(coverage_scores)
# 3. Prediction Stability - are recent trials clustered in good regions?
recent_best_values = []
current_best = float('inf')
for trial in recent_trials:
current_best = min(current_best, trial.value)
recent_best_values.append(current_best)
# If best hasn't improved much recently, model is stable
if len(recent_best_values) > 1:
best_stability = 1.0 - (recent_best_values[0] - recent_best_values[-1]) / (recent_best_values[0] + 1e-10)
prediction_stability = max(0.0, min(1.0, best_stability))
else:
prediction_stability = 0.0
# 4. Overall Confidence - weighted combination
overall_confidence = (
0.4 * convergence_score +
0.3 * exploration_coverage +
0.3 * prediction_stability
)
# Decision: Ready for intensive exploitation?
ready_for_exploitation = (
overall_confidence >= 0.65 and
exploration_coverage >= 0.5 and
len(all_trials) >= self.min_trials
)
message = self._get_confidence_message(overall_confidence, ready_for_exploitation)
return {
'overall_confidence': overall_confidence,
'convergence_score': convergence_score,
'exploration_coverage': exploration_coverage,
'prediction_stability': prediction_stability,
'ready_for_exploitation': ready_for_exploitation,
'total_trials': len(all_trials),
'message': message
}
def _get_confidence_message(self, confidence: float, ready: bool) -> str:
"""Generate human-readable confidence assessment."""
if ready:
return f"HIGH CONFIDENCE ({confidence:.1%}) - Transitioning to exploitation phase"
elif confidence >= 0.5:
return f"MEDIUM CONFIDENCE ({confidence:.1%}) - Continue exploration with some exploitation"
elif confidence >= 0.3:
return f"LOW CONFIDENCE ({confidence:.1%}) - Focus on exploration"
else:
return f"VERY LOW CONFIDENCE ({confidence:.1%}) - Need more diverse exploration"
class AdaptiveExploitationCallback:
"""
Dynamically adjust sampler behavior based on surrogate confidence.
This callback monitors surrogate model confidence and adapts the optimization
strategy from exploration-heavy to exploitation-heavy as confidence increases.
STUDY-AWARE: Tracks phase transitions across multiple optimization runs
and persists confidence history to JSON files.
"""
def __init__(
self,
target_value: Optional[float] = None,
tolerance: float = 0.1,
min_confidence_for_exploitation: float = 0.65,
min_trials: int = 15,
verbose: bool = True,
tracking_dir: Optional[Path] = None
):
"""
Args:
target_value: Target objective value (if known)
tolerance: Acceptable error from target
min_confidence_for_exploitation: Confidence threshold to enable intensive exploitation
min_trials: Minimum trials before confidence assessment
verbose: Print confidence updates
tracking_dir: Directory to save phase transition tracking files
"""
self.target_value = target_value
self.tolerance = tolerance
self.min_confidence = min_confidence_for_exploitation
self.verbose = verbose
self.tracking_dir = tracking_dir
self.metrics = SurrogateConfidenceMetrics(min_trials_for_confidence=min_trials)
self.consecutive_successes = 0
# Initialize phase transition tracking
self.phase_transition_file = None
self.confidence_history_file = None
if tracking_dir:
self.tracking_dir = Path(tracking_dir)
self.tracking_dir.mkdir(parents=True, exist_ok=True)
self.phase_transition_file = self.tracking_dir / "phase_transitions.json"
self.confidence_history_file = self.tracking_dir / "confidence_history.json"
# Load existing phase transition data if available
self.phase_transitions = self._load_phase_transitions()
self.confidence_history = self._load_confidence_history()
# Determine current phase from history
self.phase = self._get_current_phase()
def _load_phase_transitions(self) -> List[Dict]:
"""Load existing phase transition history from JSON."""
if self.phase_transition_file and self.phase_transition_file.exists():
try:
with open(self.phase_transition_file, 'r') as f:
return json.load(f)
except Exception:
return []
return []
def _load_confidence_history(self) -> List[Dict]:
"""Load existing confidence history from JSON."""
if self.confidence_history_file and self.confidence_history_file.exists():
try:
with open(self.confidence_history_file, 'r') as f:
return json.load(f)
except Exception:
return []
return []
def _get_current_phase(self) -> str:
"""Determine current phase from transition history."""
if not self.phase_transitions:
return "exploration"
# If any transition to exploitation exists, we're in exploitation
for transition in self.phase_transitions:
if transition.get('to_phase') == 'exploitation':
return "exploitation"
return "exploration"
def _save_phase_transition(self, trial_number: int, confidence: Dict):
"""Save phase transition event to JSON."""
if not self.phase_transition_file:
return
transition_event = {
'trial_number': trial_number,
'from_phase': 'exploration',
'to_phase': 'exploitation',
'confidence_metrics': {
'overall_confidence': confidence['overall_confidence'],
'convergence_score': confidence['convergence_score'],
'exploration_coverage': confidence['exploration_coverage'],
'prediction_stability': confidence['prediction_stability']
},
'total_trials': confidence.get('total_trials', trial_number + 1)
}
self.phase_transitions.append(transition_event)
try:
with open(self.phase_transition_file, 'w') as f:
json.dump(self.phase_transitions, f, indent=2)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to save phase transition: {e}")
def _save_confidence_snapshot(self, trial_number: int, confidence: Dict):
"""Save confidence metrics snapshot to history."""
if not self.confidence_history_file:
return
snapshot = {
'trial_number': trial_number,
'phase': self.phase,
'confidence_metrics': {
'overall_confidence': confidence['overall_confidence'],
'convergence_score': confidence['convergence_score'],
'exploration_coverage': confidence['exploration_coverage'],
'prediction_stability': confidence['prediction_stability']
},
'total_trials': confidence.get('total_trials', trial_number + 1)
}
self.confidence_history.append(snapshot)
try:
with open(self.confidence_history_file, 'w') as f:
json.dump(self.confidence_history, f, indent=2)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to save confidence history: {e}")
def __call__(self, study: optuna.Study, trial: optuna.trial.FrozenTrial):
"""Called after each trial completes."""
# Skip failed trials
if trial.state != optuna.trial.TrialState.COMPLETE:
return
# Update metrics (no-op for study-aware implementation)
self.metrics.update(study, trial)
# Calculate confidence
confidence = self.metrics.calculate_confidence(study)
# Save confidence snapshot every 5 trials
if trial.number % 5 == 0:
self._save_confidence_snapshot(trial.number, confidence)
# Print confidence report
if self.verbose and trial.number % 5 == 0: # Every 5 trials
self._print_confidence_report(trial.number, confidence)
# Check for phase transition
if confidence['ready_for_exploitation'] and self.phase == "exploration":
self.phase = "exploitation"
# Save transition event
self._save_phase_transition(trial.number, confidence)
if self.verbose:
print(f"\n{'='*60}")
print(f" PHASE TRANSITION: EXPLORATION -> EXPLOITATION")
print(f" Trial #{trial.number}")
print(f" Surrogate confidence: {confidence['overall_confidence']:.1%}")
print(f" Now focusing on refining best regions")
print(f"{'='*60}\n")
# Check for target achievement
if self.target_value is not None and trial.value <= self.tolerance:
self.consecutive_successes += 1
if self.verbose:
print(f" [TARGET] Trial #{trial.number}: {trial.value:.6f}{self.tolerance:.6f}")
print(f" [TARGET] Consecutive successes: {self.consecutive_successes}/3")
# Stop after 3 consecutive successes in exploitation phase
if self.consecutive_successes >= 3 and self.phase == "exploitation":
if self.verbose:
print(f"\n{'='*60}")
print(f" TARGET ACHIEVED WITH HIGH CONFIDENCE")
print(f" Best value: {study.best_value:.6f}")
print(f" Stopping optimization")
print(f"{'='*60}\n")
study.stop()
else:
self.consecutive_successes = 0
def _print_confidence_report(self, trial_number: int, confidence: Dict):
"""Print confidence metrics report."""
print(f"\n [CONFIDENCE REPORT - Trial #{trial_number}]")
print(f" Phase: {self.phase.upper()}")
print(f" Overall Confidence: {confidence['overall_confidence']:.1%}")
print(f" - Convergence: {confidence['convergence_score']:.1%}")
print(f" - Coverage: {confidence['exploration_coverage']:.1%}")
print(f" - Stability: {confidence['prediction_stability']:.1%}")
print(f" {confidence['message']}")
print()
def create_adaptive_sampler(
n_startup_trials: int = 10,
multivariate: bool = True,
confidence_threshold: float = 0.65
) -> optuna.samplers.TPESampler:
"""
Create TPE sampler configured for adaptive exploration-exploitation.
Args:
n_startup_trials: Initial random exploration trials
multivariate: Enable multivariate TPE for correlated parameters
confidence_threshold: Confidence needed before intensive exploitation
Returns:
Configured TPESampler
"""
# Higher n_ei_candidates = more exploitation
# Will be used once confidence threshold is reached
return optuna.samplers.TPESampler(
n_startup_trials=n_startup_trials,
n_ei_candidates=24,
multivariate=multivariate,
warn_independent_sampling=True
)

View File

@@ -0,0 +1,522 @@
"""
Auto-Training Trigger System for AtomizerField
Monitors training data collection and automatically triggers neural network training
when enough data is accumulated. This is the key component to close the neural loop.
Workflow:
1. Monitor training data directory for new trials
2. When min_points threshold is reached, trigger training
3. Validate trained model against FEA
4. Deploy model for neural-accelerated optimization
Usage:
from optimization_engine.processors.surrogates.auto_trainer import AutoTrainer
trainer = AutoTrainer(
study_name="uav_arm_optimization",
min_points=50,
epochs=100
)
# Check if ready to train
if trainer.should_train():
model_path = trainer.train()
trainer.validate_model(model_path)
# Or run continuous monitoring
trainer.watch()
"""
import json
import subprocess
import sys
import time
import logging
from pathlib import Path
from typing import Dict, Any, Optional, Tuple, List
from datetime import datetime
import shutil
logger = logging.getLogger(__name__)
class AutoTrainer:
"""
Automatic neural network training trigger for AtomizerField.
Monitors training data accumulation and triggers training when thresholds are met.
"""
def __init__(
self,
study_name: str,
training_data_dir: Optional[Path] = None,
min_points: int = 50,
epochs: int = 100,
val_split: float = 0.2,
retrain_threshold: int = 50,
atomizer_field_dir: Optional[Path] = None,
output_dir: Optional[Path] = None
):
"""
Initialize the auto trainer.
Args:
study_name: Name of the optimization study
training_data_dir: Directory containing exported training data
min_points: Minimum data points before training (default: 50)
epochs: Training epochs (default: 100)
val_split: Validation split ratio (default: 0.2)
retrain_threshold: New points needed for retraining (default: 50)
atomizer_field_dir: Path to atomizer-field repository
output_dir: Directory for trained models
"""
self.study_name = study_name
self.min_points = min_points
self.epochs = epochs
self.val_split = val_split
self.retrain_threshold = retrain_threshold
# Set up directories
project_root = Path(__file__).parent.parent
self.training_data_dir = training_data_dir or (
project_root / "atomizer_field_training_data" / study_name
)
self.atomizer_field_dir = atomizer_field_dir or (project_root / "atomizer-field")
self.output_dir = output_dir or (
self.atomizer_field_dir / "runs" / f"{study_name}_auto"
)
# Tracking state
self.last_trained_count = 0
self.model_version = 0
self.training_history: List[Dict[str, Any]] = []
# Load state if exists
self._load_state()
logger.info(f"AutoTrainer initialized for {study_name}")
logger.info(f"Training data: {self.training_data_dir}")
logger.info(f"Min points: {min_points}, Retrain threshold: {retrain_threshold}")
def _state_file(self) -> Path:
"""Get path to state file."""
return self.output_dir / "auto_trainer_state.json"
def _load_state(self) -> None:
"""Load trainer state from disk."""
state_file = self._state_file()
if state_file.exists():
with open(state_file, 'r') as f:
state = json.load(f)
self.last_trained_count = state.get("last_trained_count", 0)
self.model_version = state.get("model_version", 0)
self.training_history = state.get("training_history", [])
logger.info(f"Loaded state: {self.last_trained_count} points trained, version {self.model_version}")
def _save_state(self) -> None:
"""Save trainer state to disk."""
self.output_dir.mkdir(parents=True, exist_ok=True)
state_file = self._state_file()
state = {
"study_name": self.study_name,
"last_trained_count": self.last_trained_count,
"model_version": self.model_version,
"training_history": self.training_history,
"last_updated": datetime.now().isoformat()
}
with open(state_file, 'w') as f:
json.dump(state, f, indent=2)
def count_training_points(self) -> int:
"""
Count available training data points.
Returns:
Number of trial directories with valid training data
"""
if not self.training_data_dir.exists():
return 0
count = 0
for trial_dir in self.training_data_dir.glob("trial_*"):
if trial_dir.is_dir():
# Check for required files
has_input = (trial_dir / "input" / "model.bdf").exists()
has_output = (trial_dir / "output" / "model.op2").exists()
has_metadata = (trial_dir / "metadata.json").exists()
if has_input and has_output and has_metadata:
count += 1
return count
def should_train(self) -> bool:
"""
Check if there's enough new data to trigger training.
Returns:
True if training should be triggered
"""
current_count = self.count_training_points()
# First training - check minimum threshold
if self.last_trained_count == 0:
return current_count >= self.min_points
# Retraining - check new points threshold
new_points = current_count - self.last_trained_count
return new_points >= self.retrain_threshold
def get_new_points_count(self) -> int:
"""Get number of new points since last training."""
return self.count_training_points() - self.last_trained_count
def prepare_training_split(self) -> Tuple[Path, Path]:
"""
Prepare train/validation split from collected data.
Returns:
Tuple of (train_dir, val_dir) paths
"""
train_dir = self.training_data_dir.parent / f"{self.study_name}_train"
val_dir = self.training_data_dir.parent / f"{self.study_name}_val"
# Clear existing splits
if train_dir.exists():
shutil.rmtree(train_dir)
if val_dir.exists():
shutil.rmtree(val_dir)
train_dir.mkdir(parents=True)
val_dir.mkdir(parents=True)
# Get all trial directories
trial_dirs = sorted(self.training_data_dir.glob("trial_*"))
n_trials = len(trial_dirs)
n_val = max(1, int(n_trials * self.val_split))
# Split: use latest trials for validation (they're most diverse)
train_trials = trial_dirs[:-n_val] if n_val > 0 else trial_dirs
val_trials = trial_dirs[-n_val:] if n_val > 0 else []
# Copy to split directories
for trial_dir in train_trials:
dest = train_dir / trial_dir.name
shutil.copytree(trial_dir, dest)
for trial_dir in val_trials:
dest = val_dir / trial_dir.name
shutil.copytree(trial_dir, dest)
logger.info(f"Split data: {len(train_trials)} train, {len(val_trials)} validation")
return train_dir, val_dir
def train(self, train_parametric: bool = True) -> Optional[Path]:
"""
Trigger neural network training.
Args:
train_parametric: If True, train parametric predictor (fast).
If False, train field predictor (slower, more detailed).
Returns:
Path to trained model checkpoint, or None if training failed
"""
current_count = self.count_training_points()
if current_count < self.min_points:
logger.warning(
f"Not enough data for training: {current_count} < {self.min_points}"
)
return None
logger.info(f"Starting training with {current_count} data points...")
# Prepare train/val split
train_dir, val_dir = self.prepare_training_split()
# Increment model version
self.model_version += 1
version_output_dir = self.output_dir / f"v{self.model_version}"
version_output_dir.mkdir(parents=True, exist_ok=True)
# Choose training script
if train_parametric:
train_script = self.atomizer_field_dir / "train_parametric.py"
else:
train_script = self.atomizer_field_dir / "train.py"
if not train_script.exists():
logger.error(f"Training script not found: {train_script}")
return None
# Build training command
cmd = [
sys.executable,
str(train_script),
"--train_dir", str(train_dir),
"--val_dir", str(val_dir),
"--epochs", str(self.epochs),
"--output_dir", str(version_output_dir)
]
logger.info(f"Running: {' '.join(cmd)}")
# Run training
start_time = time.time()
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
cwd=str(self.atomizer_field_dir),
timeout=3600 * 4 # 4 hour timeout
)
elapsed = time.time() - start_time
if result.returncode != 0:
logger.error(f"Training failed:\n{result.stderr}")
return None
logger.info(f"Training completed in {elapsed/60:.1f} minutes")
# Find model checkpoint
checkpoints = list(version_output_dir.glob("*.pt")) + list(version_output_dir.glob("*.pth"))
if not checkpoints:
# Check for best model
checkpoints = list(version_output_dir.glob("**/best*.pt")) + \
list(version_output_dir.glob("**/checkpoint*.pt"))
if checkpoints:
model_path = checkpoints[0]
logger.info(f"Model saved: {model_path}")
else:
logger.warning("No checkpoint file found after training")
model_path = version_output_dir
# Update state
self.last_trained_count = current_count
self.training_history.append({
"version": self.model_version,
"timestamp": datetime.now().isoformat(),
"data_points": current_count,
"epochs": self.epochs,
"training_time_seconds": elapsed,
"model_path": str(model_path)
})
self._save_state()
return model_path
except subprocess.TimeoutExpired:
logger.error("Training timed out after 4 hours")
return None
except Exception as e:
logger.error(f"Training error: {e}")
return None
def validate_model(
self,
model_path: Path,
n_validation_trials: int = 5
) -> Dict[str, Any]:
"""
Validate trained model against FEA results.
Args:
model_path: Path to trained model
n_validation_trials: Number of trials to validate
Returns:
Validation metrics dictionary
"""
logger.info(f"Validating model: {model_path}")
# This would integrate with the neural surrogate to compare predictions vs FEA
# For now, return placeholder metrics
validation_results = {
"model_path": str(model_path),
"n_validation_trials": n_validation_trials,
"mean_error_percent": 0.0, # Would be computed
"max_error_percent": 0.0,
"validated_at": datetime.now().isoformat()
}
# TODO: Implement actual validation
# - Load model
# - Run predictions on held-out trials
# - Compare with FEA results
# - Compute error metrics
return validation_results
def get_latest_model(self) -> Optional[Path]:
"""
Get path to latest trained model.
Returns:
Path to latest model checkpoint, or None if no model exists
"""
if self.model_version == 0:
return None
latest_dir = self.output_dir / f"v{self.model_version}"
if not latest_dir.exists():
return None
# Find checkpoint
checkpoints = list(latest_dir.glob("*.pt")) + list(latest_dir.glob("*.pth"))
if checkpoints:
return checkpoints[0]
return latest_dir
def watch(self, check_interval: int = 60) -> None:
"""
Continuously monitor for new data and trigger training.
Args:
check_interval: Seconds between checks (default: 60)
"""
logger.info(f"Starting auto-trainer watch mode for {self.study_name}")
logger.info(f"Check interval: {check_interval}s")
logger.info(f"Min points: {self.min_points}, Retrain threshold: {self.retrain_threshold}")
try:
while True:
current_count = self.count_training_points()
new_points = current_count - self.last_trained_count
status = f"[{datetime.now().strftime('%H:%M:%S')}] "
status += f"Points: {current_count} (new: {new_points})"
if self.should_train():
status += " -> TRAINING"
print(status)
model_path = self.train()
if model_path:
print(f"Training complete: {model_path}")
else:
if self.last_trained_count == 0:
needed = self.min_points - current_count
status += f" (need {needed} more for first training)"
else:
needed = self.retrain_threshold - new_points
status += f" (need {needed} more for retraining)"
print(status)
time.sleep(check_interval)
except KeyboardInterrupt:
logger.info("Watch mode stopped")
def get_status(self) -> Dict[str, Any]:
"""
Get current trainer status.
Returns:
Status dictionary with counts and state
"""
current_count = self.count_training_points()
new_points = current_count - self.last_trained_count
return {
"study_name": self.study_name,
"total_points": current_count,
"new_points_since_training": new_points,
"last_trained_count": self.last_trained_count,
"model_version": self.model_version,
"min_points_threshold": self.min_points,
"retrain_threshold": self.retrain_threshold,
"should_train": self.should_train(),
"latest_model": str(self.get_latest_model()) if self.get_latest_model() else None,
"training_history_count": len(self.training_history)
}
def check_training_status(study_name: str) -> Dict[str, Any]:
"""
Quick check of training data status for a study.
Args:
study_name: Name of the study
Returns:
Status dictionary
"""
trainer = AutoTrainer(study_name=study_name)
return trainer.get_status()
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="AtomizerField Auto-Trainer")
parser.add_argument("study_name", help="Name of the optimization study")
parser.add_argument("--train", action="store_true", help="Trigger training now")
parser.add_argument("--watch", action="store_true", help="Watch mode - continuous monitoring")
parser.add_argument("--status", action="store_true", help="Show status only")
parser.add_argument("--min-points", type=int, default=50, help="Minimum points for training")
parser.add_argument("--epochs", type=int, default=100, help="Training epochs")
parser.add_argument("--interval", type=int, default=60, help="Check interval for watch mode")
args = parser.parse_args()
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s'
)
trainer = AutoTrainer(
study_name=args.study_name,
min_points=args.min_points,
epochs=args.epochs
)
if args.status:
status = trainer.get_status()
print(f"\nAuto-Trainer Status: {args.study_name}")
print("=" * 50)
for key, value in status.items():
print(f" {key}: {value}")
elif args.train:
if trainer.should_train():
print("Training triggered...")
model_path = trainer.train()
if model_path:
print(f"Success! Model at: {model_path}")
else:
print("Training failed")
else:
print("Not enough data for training")
print(f"Current: {trainer.count_training_points()}, Need: {args.min_points}")
elif args.watch:
trainer.watch(check_interval=args.interval)
else:
# Default: show status and recommendation
status = trainer.get_status()
print(f"\nAuto-Trainer Status: {args.study_name}")
print("=" * 50)
print(f" Data points: {status['total_points']}")
print(f" New since last training: {status['new_points_since_training']}")
print(f" Model version: v{status['model_version']}")
print(f" Should train: {status['should_train']}")
print()
if status['should_train']:
print("Ready to train! Run with --train to start training.")
else:
if status['last_trained_count'] == 0:
needed = status['min_points_threshold'] - status['total_points']
print(f"Need {needed} more points for initial training.")
else:
needed = status['retrain_threshold'] - status['new_points_since_training']
print(f"Need {needed} more new points for retraining.")

View File

@@ -0,0 +1,834 @@
"""
GenericSurrogate - Config-driven neural network surrogate for optimization.
This module eliminates ~2,800 lines of duplicated code across study run_nn_optimization.py files
by providing a fully config-driven neural surrogate system.
Usage:
# In study's run_nn_optimization.py (now ~30 lines instead of ~600):
from optimization_engine.processors.surrogates.generic_surrogate import ConfigDrivenSurrogate
surrogate = ConfigDrivenSurrogate(__file__)
surrogate.run() # Handles --train, --turbo, --all flags automatically
"""
from pathlib import Path
import sys
import json
import argparse
from datetime import datetime
from typing import Dict, Any, Optional, List, Tuple
import time
import numpy as np
# Conditional PyTorch import
try:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, TensorDataset
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
import optuna
from optuna.samplers import NSGAIISampler
class MLPSurrogate(nn.Module):
"""
Generic MLP architecture for surrogate modeling.
Architecture: Input -> [Linear -> LayerNorm -> ReLU -> Dropout] * N -> Output
"""
def __init__(self, n_inputs: int, n_outputs: int,
hidden_dims: List[int] = None, dropout: float = 0.1):
super().__init__()
if hidden_dims is None:
# Default architecture scales with problem size
hidden_dims = [64, 128, 128, 64]
layers = []
prev_dim = n_inputs
for hidden_dim in hidden_dims:
layers.extend([
nn.Linear(prev_dim, hidden_dim),
nn.LayerNorm(hidden_dim),
nn.ReLU(),
nn.Dropout(dropout)
])
prev_dim = hidden_dim
layers.append(nn.Linear(prev_dim, n_outputs))
self.network = nn.Sequential(*layers)
# Initialize weights
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
return self.network(x)
class GenericSurrogate:
"""
Config-driven neural surrogate for FEA optimization.
Automatically adapts to any number of design variables and objectives
based on the optimization_config.json file.
"""
def __init__(self, config: Dict, device: str = 'auto'):
"""
Initialize surrogate from config.
Args:
config: Normalized config dictionary
device: 'auto', 'cuda', or 'cpu'
"""
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required for neural surrogate")
self.config = config
self.device = torch.device(
'cuda' if torch.cuda.is_available() and device == 'auto' else 'cpu'
)
# Extract variable and objective info from config
self.design_var_names = [v['name'] for v in config['design_variables']]
self.design_var_bounds = {
v['name']: (v['min'], v['max'])
for v in config['design_variables']
}
self.design_var_types = {
v['name']: v.get('type', 'continuous')
for v in config['design_variables']
}
self.objective_names = [o['name'] for o in config['objectives']]
self.n_inputs = len(self.design_var_names)
self.n_outputs = len(self.objective_names)
self.model = None
self.normalization = None
def _get_hidden_dims(self) -> List[int]:
"""Calculate hidden layer dimensions based on problem size."""
n = self.n_inputs
if n <= 3:
return [32, 64, 32]
elif n <= 6:
return [64, 128, 128, 64]
elif n <= 10:
return [128, 256, 256, 128]
else:
return [256, 512, 512, 256]
def train_from_database(self, db_path: Path, study_name: str,
epochs: int = 300, validation_split: float = 0.2,
batch_size: int = 16, learning_rate: float = 0.001,
save_path: Path = None, verbose: bool = True):
"""
Train surrogate from Optuna database.
Args:
db_path: Path to study.db
study_name: Name of the Optuna study
epochs: Number of training epochs
validation_split: Fraction of data for validation
batch_size: Training batch size
learning_rate: Initial learning rate
save_path: Where to save the trained model
verbose: Print training progress
"""
if verbose:
print(f"\n{'='*60}")
print(f"Training Generic Surrogate ({self.n_inputs} inputs -> {self.n_outputs} outputs)")
print(f"{'='*60}")
print(f"Device: {self.device}")
print(f"Database: {db_path}")
# Load data from Optuna
storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}")
study = optuna.load_study(study_name=study_name, storage=storage)
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if verbose:
print(f"Found {len(completed)} completed trials")
if len(completed) < 10:
raise ValueError(f"Need at least 10 trials for training, got {len(completed)}")
# Extract training data
design_params = []
objectives = []
for trial in completed:
# Skip inf values
if any(v == float('inf') or v != v for v in trial.values): # nan check
continue
params = [trial.params.get(name, 0) for name in self.design_var_names]
objs = list(trial.values)
design_params.append(params)
objectives.append(objs)
design_params = np.array(design_params, dtype=np.float32)
objectives = np.array(objectives, dtype=np.float32)
if verbose:
print(f"Valid samples: {len(design_params)}")
print(f"\nDesign variable ranges:")
for i, name in enumerate(self.design_var_names):
print(f" {name}: {design_params[:, i].min():.2f} - {design_params[:, i].max():.2f}")
print(f"\nObjective ranges:")
for i, name in enumerate(self.objective_names):
print(f" {name}: {objectives[:, i].min():.4f} - {objectives[:, i].max():.4f}")
# Compute normalization parameters
design_mean = design_params.mean(axis=0)
design_std = design_params.std(axis=0) + 1e-8
objective_mean = objectives.mean(axis=0)
objective_std = objectives.std(axis=0) + 1e-8
self.normalization = {
'design_mean': design_mean,
'design_std': design_std,
'objective_mean': objective_mean,
'objective_std': objective_std
}
# Normalize data
X = (design_params - design_mean) / design_std
Y = (objectives - objective_mean) / objective_std
X_tensor = torch.tensor(X, dtype=torch.float32)
Y_tensor = torch.tensor(Y, dtype=torch.float32)
# Create datasets
dataset = TensorDataset(X_tensor, Y_tensor)
n_val = max(1, int(len(dataset) * validation_split))
n_train = len(dataset) - n_val
train_ds, val_ds = random_split(dataset, [n_train, n_val])
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size)
if verbose:
print(f"\nTraining: {n_train} samples, Validation: {n_val} samples")
# Build model
hidden_dims = self._get_hidden_dims()
self.model = MLPSurrogate(
n_inputs=self.n_inputs,
n_outputs=self.n_outputs,
hidden_dims=hidden_dims
).to(self.device)
n_params = sum(p.numel() for p in self.model.parameters())
if verbose:
print(f"Model architecture: {self.n_inputs} -> {hidden_dims} -> {self.n_outputs}")
print(f"Total parameters: {n_params:,}")
# Training setup
optimizer = torch.optim.AdamW(self.model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
best_val_loss = float('inf')
best_state = None
if verbose:
print(f"\nTraining for {epochs} epochs...")
for epoch in range(epochs):
# Training
self.model.train()
train_loss = 0.0
for x, y in train_loader:
x, y = x.to(self.device), y.to(self.device)
optimizer.zero_grad()
pred = self.model(x)
loss = F.mse_loss(pred, y)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= len(train_loader)
# Validation
self.model.eval()
val_loss = 0.0
with torch.no_grad():
for x, y in val_loader:
x, y = x.to(self.device), y.to(self.device)
pred = self.model(x)
val_loss += F.mse_loss(pred, y).item()
val_loss /= len(val_loader)
scheduler.step()
if val_loss < best_val_loss:
best_val_loss = val_loss
best_state = self.model.state_dict().copy()
if verbose and ((epoch + 1) % 50 == 0 or epoch == 0):
print(f" Epoch {epoch+1:3d}: train={train_loss:.6f}, val={val_loss:.6f}")
# Load best model
self.model.load_state_dict(best_state)
if verbose:
print(f"\nBest validation loss: {best_val_loss:.6f}")
# Final evaluation
self._print_validation_metrics(val_loader)
# Save model
if save_path:
self.save(save_path)
return self
def _print_validation_metrics(self, val_loader):
"""Print validation accuracy metrics."""
self.model.eval()
all_preds = []
all_targets = []
with torch.no_grad():
for x, y in val_loader:
x = x.to(self.device)
pred = self.model(x).cpu().numpy()
all_preds.append(pred)
all_targets.append(y.numpy())
all_preds = np.concatenate(all_preds)
all_targets = np.concatenate(all_targets)
# Denormalize
preds_denorm = all_preds * self.normalization['objective_std'] + self.normalization['objective_mean']
targets_denorm = all_targets * self.normalization['objective_std'] + self.normalization['objective_mean']
print(f"\nValidation accuracy:")
for i, name in enumerate(self.objective_names):
mae = np.abs(preds_denorm[:, i] - targets_denorm[:, i]).mean()
mape = (np.abs(preds_denorm[:, i] - targets_denorm[:, i]) /
(np.abs(targets_denorm[:, i]) + 1e-8)).mean() * 100
print(f" {name}: MAE={mae:.4f}, MAPE={mape:.1f}%")
def predict(self, design_params: Dict[str, float]) -> Dict[str, float]:
"""
Predict objectives from design parameters.
Args:
design_params: Dictionary of design variable values
Returns:
Dictionary of predicted objective values
"""
if self.model is None:
raise ValueError("Model not trained. Call train_from_database first.")
# Build input array
x = np.array([design_params.get(name, 0) for name in self.design_var_names], dtype=np.float32)
x_norm = (x - self.normalization['design_mean']) / self.normalization['design_std']
x_tensor = torch.tensor(x_norm, dtype=torch.float32, device=self.device).unsqueeze(0)
# Predict
self.model.eval()
with torch.no_grad():
y_norm = self.model(x_tensor).cpu().numpy()[0]
# Denormalize
y = y_norm * self.normalization['objective_std'] + self.normalization['objective_mean']
return {name: float(y[i]) for i, name in enumerate(self.objective_names)}
def sample_random_design(self) -> Dict[str, float]:
"""Sample a random point in the design space."""
params = {}
for name in self.design_var_names:
low, high = self.design_var_bounds[name]
if self.design_var_types[name] == 'integer':
params[name] = float(np.random.randint(int(low), int(high) + 1))
else:
params[name] = np.random.uniform(low, high)
return params
def save(self, path: Path):
"""Save model to file."""
path = Path(path)
torch.save({
'model_state_dict': self.model.state_dict(),
'normalization': {
'design_mean': self.normalization['design_mean'].tolist(),
'design_std': self.normalization['design_std'].tolist(),
'objective_mean': self.normalization['objective_mean'].tolist(),
'objective_std': self.normalization['objective_std'].tolist()
},
'design_var_names': self.design_var_names,
'objective_names': self.objective_names,
'n_inputs': self.n_inputs,
'n_outputs': self.n_outputs,
'hidden_dims': self._get_hidden_dims()
}, path)
print(f"Model saved to {path}")
def load(self, path: Path):
"""Load model from file."""
path = Path(path)
checkpoint = torch.load(path, map_location=self.device)
hidden_dims = checkpoint.get('hidden_dims', self._get_hidden_dims())
self.model = MLPSurrogate(
n_inputs=checkpoint['n_inputs'],
n_outputs=checkpoint['n_outputs'],
hidden_dims=hidden_dims
).to(self.device)
self.model.load_state_dict(checkpoint['model_state_dict'])
self.model.eval()
norm = checkpoint['normalization']
self.normalization = {
'design_mean': np.array(norm['design_mean']),
'design_std': np.array(norm['design_std']),
'objective_mean': np.array(norm['objective_mean']),
'objective_std': np.array(norm['objective_std'])
}
self.design_var_names = checkpoint.get('design_var_names', self.design_var_names)
self.objective_names = checkpoint.get('objective_names', self.objective_names)
print(f"Model loaded from {path}")
class ConfigDrivenSurrogate:
"""
Fully config-driven neural surrogate system.
Provides complete --train, --turbo, --all workflow based on optimization_config.json.
Handles FEA validation, surrogate retraining, and result reporting automatically.
"""
def __init__(self, script_path: str, config_path: Optional[str] = None,
element_type: str = 'auto'):
"""
Initialize config-driven surrogate.
Args:
script_path: Path to study's run_nn_optimization.py (__file__)
config_path: Optional explicit path to config
element_type: Element type for stress extraction ('auto' detects from DAT file)
"""
self.study_dir = Path(script_path).parent
self.config_path = Path(config_path) if config_path else self._find_config()
self.model_dir = self.study_dir / "1_setup" / "model"
self.results_dir = self.study_dir / "2_results"
# Load config
with open(self.config_path, 'r') as f:
self.raw_config = json.load(f)
# Normalize config (reuse from base_runner)
self.config = self._normalize_config(self.raw_config)
self.study_name = self.config['study_name']
self.element_type = element_type
self.surrogate = None
self.logger = None
self.nx_solver = None
def _find_config(self) -> Path:
"""Find the optimization config file."""
candidates = [
self.study_dir / "optimization_config.json",
self.study_dir / "1_setup" / "optimization_config.json",
]
for path in candidates:
if path.exists():
return path
raise FileNotFoundError(f"No optimization_config.json found in {self.study_dir}")
def _normalize_config(self, config: Dict) -> Dict:
"""Normalize config format variations."""
# This mirrors ConfigNormalizer from base_runner.py
normalized = {
'study_name': config.get('study_name', 'unnamed_study'),
'description': config.get('description', ''),
'design_variables': [],
'objectives': [],
'constraints': [],
'simulation': {},
'neural_acceleration': config.get('neural_acceleration', {}),
}
# Normalize design variables
for var in config.get('design_variables', []):
normalized['design_variables'].append({
'name': var.get('parameter') or var.get('name'),
'type': var.get('type', 'continuous'),
'min': var.get('bounds', [var.get('min', 0), var.get('max', 1)])[0] if 'bounds' in var else var.get('min', 0),
'max': var.get('bounds', [var.get('min', 0), var.get('max', 1)])[1] if 'bounds' in var else var.get('max', 1),
})
# Normalize objectives
for obj in config.get('objectives', []):
normalized['objectives'].append({
'name': obj.get('name'),
'direction': obj.get('goal') or obj.get('direction', 'minimize'),
})
# Normalize simulation
sim = config.get('simulation', {})
normalized['simulation'] = {
'sim_file': sim.get('sim_file', ''),
'dat_file': sim.get('dat_file', ''),
'solution_name': sim.get('solution_name', 'Solution 1'),
}
return normalized
def _setup(self):
"""Initialize solver and logger."""
project_root = self.study_dir.parents[1]
if str(project_root) not in sys.path:
sys.path.insert(0, str(project_root))
from optimization_engine.nx.solver import NXSolver
from optimization_engine.utils.logger import get_logger
self.results_dir.mkdir(exist_ok=True)
self.logger = get_logger(self.study_name, study_dir=self.results_dir)
self.nx_solver = NXSolver(nastran_version="2506")
def _detect_element_type(self, dat_file: Path) -> str:
"""Auto-detect element type from DAT file."""
if self.element_type != 'auto':
return self.element_type
try:
with open(dat_file, 'r') as f:
content = f.read(50000)
if 'CTETRA' in content:
return 'ctetra'
elif 'CHEXA' in content:
return 'chexa'
elif 'CQUAD4' in content:
return 'cquad4'
else:
return 'ctetra'
except Exception:
return 'ctetra'
def train(self, epochs: int = 300) -> GenericSurrogate:
"""Train surrogate model from FEA database."""
print(f"\n{'='*60}")
print("PHASE: Train Surrogate Model")
print(f"{'='*60}")
self.surrogate = GenericSurrogate(self.config, device='auto')
self.surrogate.train_from_database(
db_path=self.results_dir / "study.db",
study_name=self.study_name,
epochs=epochs,
save_path=self.results_dir / "surrogate_best.pt"
)
return self.surrogate
def turbo(self, total_nn_trials: int = 5000, batch_size: int = 100,
retrain_every: int = 10, epochs: int = 150):
"""
Run TURBO mode: NN exploration + FEA validation + surrogate retraining.
Args:
total_nn_trials: Total NN trials to run
batch_size: NN trials per batch before FEA validation
retrain_every: Retrain surrogate every N FEA validations
epochs: Training epochs for surrogate
"""
from optimization_engine.extractors.bdf_mass_extractor import extract_mass_from_bdf
from optimization_engine.extractors.extract_displacement import extract_displacement
from optimization_engine.extractors.extract_von_mises_stress import extract_solid_stress
print(f"\n{'#'*60}")
print(f"# TURBO MODE: {self.study_name}")
print(f"{'#'*60}")
print(f"Design variables: {len(self.config['design_variables'])}")
print(f"Objectives: {len(self.config['objectives'])}")
print(f"Total NN budget: {total_nn_trials:,} trials")
print(f"NN batch size: {batch_size}")
print(f"Expected FEA validations: ~{total_nn_trials // batch_size}")
# Initial training
print(f"\n[INIT] Training initial surrogate...")
self.train(epochs=epochs)
sim_file = self.model_dir / self.config['simulation']['sim_file']
dat_file = self.model_dir / self.config['simulation']['dat_file']
element_type = self._detect_element_type(dat_file)
fea_count = 0
nn_count = 0
best_solutions = []
iteration = 0
start_time = time.time()
# Get objective info
obj_names = [o['name'] for o in self.config['objectives']]
obj_directions = [o['direction'] for o in self.config['objectives']]
while nn_count < total_nn_trials:
iteration += 1
batch_trials = min(batch_size, total_nn_trials - nn_count)
print(f"\n{''*50}")
print(f"Iteration {iteration}: NN trials {nn_count+1}-{nn_count+batch_trials}")
# Find best candidate via NN
best_candidate = None
best_score = float('inf')
for _ in range(batch_trials):
params = self.surrogate.sample_random_design()
pred = self.surrogate.predict(params)
# Compute score (simple weighted sum - lower is better)
score = sum(pred[name] if obj_directions[i] == 'minimize' else -pred[name]
for i, name in enumerate(obj_names))
if score < best_score:
best_score = score
best_candidate = {'params': params, 'nn_pred': pred}
nn_count += batch_trials
params = best_candidate['params']
nn_pred = best_candidate['nn_pred']
# Log NN prediction
var_str = ", ".join(f"{k}={v:.2f}" for k, v in list(params.items())[:3])
print(f" Best NN: {var_str}...")
pred_str = ", ".join(f"{k}={v:.2f}" for k, v in nn_pred.items())
print(f" NN pred: {pred_str}")
# Run FEA validation
result = self.nx_solver.run_simulation(
sim_file=sim_file,
working_dir=self.model_dir,
expression_updates=params,
solution_name=self.config['simulation'].get('solution_name'),
cleanup=True
)
if not result['success']:
print(f" FEA FAILED - skipping")
continue
# Extract FEA results
op2_file = result['op2_file']
fea_results = self._extract_fea_results(op2_file, dat_file, element_type,
extract_mass_from_bdf, extract_displacement,
extract_solid_stress)
fea_str = ", ".join(f"{k}={v:.2f}" for k, v in fea_results.items())
print(f" FEA: {fea_str}")
# Compute errors
errors = {}
for name in obj_names:
if name in fea_results and name in nn_pred and fea_results[name] != 0:
errors[name] = abs(fea_results[name] - nn_pred[name]) / abs(fea_results[name]) * 100
if errors:
err_str = ", ".join(f"{k}={v:.1f}%" for k, v in errors.items())
print(f" Error: {err_str}")
fea_count += 1
# Add to main study database
self._add_to_study(params, fea_results, iteration)
best_solutions.append({
'iteration': iteration,
'params': {k: float(v) for k, v in params.items()},
'fea': [fea_results.get(name, 0) for name in obj_names],
'nn_error': [errors.get(name, 0) for name in obj_names[:2]] # First 2 errors
})
# Retrain periodically
if fea_count % retrain_every == 0:
print(f"\n [RETRAIN] Retraining surrogate...")
self.train(epochs=epochs)
# Progress
elapsed = time.time() - start_time
rate = nn_count / elapsed if elapsed > 0 else 0
remaining = (total_nn_trials - nn_count) / rate if rate > 0 else 0
print(f" Progress: {nn_count:,}/{total_nn_trials:,} NN | {fea_count} FEA | {elapsed/60:.1f}min | ~{remaining/60:.1f}min left")
# Final summary
print(f"\n{'#'*60}")
print("# TURBO MODE COMPLETE")
print(f"{'#'*60}")
print(f"NN trials: {nn_count:,}")
print(f"FEA validations: {fea_count}")
print(f"Time: {(time.time() - start_time)/60:.1f} minutes")
# Save report
turbo_report = {
'mode': 'turbo',
'total_nn_trials': nn_count,
'fea_validations': fea_count,
'time_minutes': (time.time() - start_time) / 60,
'best_solutions': best_solutions[-20:]
}
report_path = self.results_dir / "turbo_report.json"
with open(report_path, 'w') as f:
json.dump(turbo_report, f, indent=2)
print(f"\nReport saved to {report_path}")
def _extract_fea_results(self, op2_file: Path, dat_file: Path, element_type: str,
extract_mass_from_bdf, extract_displacement, extract_solid_stress) -> Dict[str, float]:
"""Extract FEA results for all objectives."""
results = {}
for obj in self.config['objectives']:
name = obj['name'].lower()
try:
if 'mass' in name:
results[obj['name']] = extract_mass_from_bdf(str(dat_file))
elif 'stress' in name:
stress_result = extract_solid_stress(op2_file, subcase=1, element_type=element_type)
results[obj['name']] = stress_result.get('max_von_mises', float('inf')) / 1000.0
elif 'displacement' in name:
disp_result = extract_displacement(op2_file, subcase=1)
results[obj['name']] = disp_result['max_displacement']
elif 'stiffness' in name:
disp_result = extract_displacement(op2_file, subcase=1)
max_disp = disp_result['max_displacement']
# Negative for minimization in multi-objective
results[obj['name']] = -1000.0 / max(abs(max_disp), 1e-6)
results['displacement'] = max_disp
except Exception as e:
print(f" Warning: Failed to extract {name}: {e}")
results[obj['name']] = float('inf')
return results
def _add_to_study(self, params: Dict, fea_results: Dict, iteration: int):
"""Add FEA result to main Optuna study."""
try:
storage = f"sqlite:///{self.results_dir / 'study.db'}"
study = optuna.load_study(
study_name=self.study_name,
storage=storage,
sampler=NSGAIISampler(population_size=20, seed=42)
)
trial = study.ask()
for var in self.config['design_variables']:
name = var['name']
value = params[name]
if var['type'] == 'integer':
trial.suggest_int(name, int(value), int(value))
else:
trial.suggest_float(name, value, value)
# Get objective values in order
obj_values = [fea_results.get(o['name'], float('inf')) for o in self.config['objectives']]
study.tell(trial, obj_values)
trial.set_user_attr('source', 'turbo_mode')
trial.set_user_attr('iteration', iteration)
except Exception as e:
print(f" Warning: couldn't add to study: {e}")
def run(self, args=None):
"""
Main entry point with argument parsing.
Handles --train, --turbo, --all flags.
"""
if args is None:
args = self.parse_args()
self._setup()
print(f"\n{'#'*60}")
print(f"# {self.study_name} - Hybrid NN Optimization")
print(f"{'#'*60}")
if args.all or args.train:
self.train(epochs=args.epochs)
if args.all or args.turbo:
self.turbo(
total_nn_trials=args.nn_trials,
batch_size=args.batch_size,
retrain_every=args.retrain_every,
epochs=args.epochs
)
print(f"\n{'#'*60}")
print("# Workflow Complete!")
print(f"{'#'*60}\n")
return 0
def parse_args(self) -> argparse.Namespace:
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description=f'{self.study_name} - Hybrid NN Optimization')
parser.add_argument('--train', action='store_true', help='Train surrogate only')
parser.add_argument('--turbo', action='store_true', help='TURBO mode (recommended)')
parser.add_argument('--all', action='store_true', help='Train then run turbo')
nn_config = self.config.get('neural_acceleration', {})
parser.add_argument('--epochs', type=int, default=nn_config.get('epochs', 200), help='Training epochs')
parser.add_argument('--nn-trials', type=int, default=nn_config.get('nn_trials', 5000), help='Total NN trials')
parser.add_argument('--batch-size', type=int, default=100, help='NN batch size')
parser.add_argument('--retrain-every', type=int, default=10, help='Retrain every N FEA')
args = parser.parse_args()
if not any([args.train, args.turbo, args.all]):
print("No phase specified. Use --train, --turbo, or --all")
print("\nRecommended workflow:")
print(f" python run_nn_optimization.py --turbo --nn-trials {nn_config.get('nn_trials', 5000)}")
sys.exit(1)
return args
def create_surrogate(script_path: str, element_type: str = 'auto') -> ConfigDrivenSurrogate:
"""
Factory function to create a ConfigDrivenSurrogate.
Args:
script_path: Path to study's run_nn_optimization.py (__file__)
element_type: Element type for stress extraction
Returns:
Configured surrogate ready to run
"""
return ConfigDrivenSurrogate(script_path, element_type=element_type)

View File

@@ -0,0 +1,993 @@
"""
Neural network surrogate integration for Atomizer.
This module provides the integration layer between Atomizer optimization framework
and AtomizerField neural network models for fast FEA predictions.
Key Features:
- Load and manage AtomizerField trained models
- Convert design variables to neural field format
- Provide millisecond FEA predictions
- Automatic fallback to FEA when confidence is low
- Performance tracking and statistics
Usage:
from optimization_engine.processors.surrogates.neural_surrogate import NeuralSurrogate, create_surrogate_for_study
# Create surrogate for UAV arm study
surrogate = create_surrogate_for_study(
model_path="atomizer-field/runs/uav_arm_model/checkpoint_best.pt",
training_data_dir="atomizer_field_training_data/uav_arm_train"
)
# Predict for new design
results = surrogate.predict(design_params)
print(f"Max displacement: {results['max_displacement']:.6f} mm")
"""
import sys
import time
import json
import logging
import h5py
from pathlib import Path
from typing import Dict, Any, Optional, Tuple, List
import numpy as np
logger = logging.getLogger(__name__)
# Add atomizer-field to path for imports
_atomizer_field_path = Path(__file__).parent.parent / 'atomizer-field'
if str(_atomizer_field_path) not in sys.path:
sys.path.insert(0, str(_atomizer_field_path))
try:
import torch
from torch_geometric.data import Data
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
logger.warning("PyTorch not installed. Neural surrogate features will be limited.")
# Import AtomizerField model
ATOMIZER_FIELD_AVAILABLE = False
PARAMETRIC_MODEL_AVAILABLE = False
if TORCH_AVAILABLE:
try:
from neural_models.field_predictor import AtomizerFieldModel, create_model
ATOMIZER_FIELD_AVAILABLE = True
except ImportError as e:
logger.warning(f"AtomizerField modules not found: {e}")
try:
from neural_models.parametric_predictor import ParametricFieldPredictor, create_parametric_model
PARAMETRIC_MODEL_AVAILABLE = True
except ImportError as e:
logger.warning(f"Parametric predictor modules not found: {e}")
class NeuralSurrogate:
"""
Neural surrogate for fast FEA predictions using trained AtomizerField model.
This class loads a trained AtomizerField model and provides fast predictions
of displacement fields, which can then be used to compute derived quantities
like max displacement, estimated stress, etc.
"""
def __init__(
self,
model_path: Path,
training_data_dir: Path,
device: str = 'auto'
):
"""
Initialize neural surrogate.
Args:
model_path: Path to trained model checkpoint (.pt file)
training_data_dir: Path to training data (for normalization stats and mesh)
device: Computing device ('cuda', 'cpu', or 'auto')
"""
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required. Install: pip install torch torch-geometric")
if not ATOMIZER_FIELD_AVAILABLE:
raise ImportError("AtomizerField modules not found")
self.model_path = Path(model_path)
self.training_data_dir = Path(training_data_dir)
# Set device
if device == 'auto':
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = torch.device(device)
logger.info(f"Neural Surrogate initializing on {self.device}")
# Load model
self._load_model()
# Load normalization statistics
self._load_normalization_stats()
# Load reference mesh structure
self._load_reference_mesh()
# Performance tracking
self.stats = {
'predictions': 0,
'total_time_ms': 0.0,
'fea_validations': 0
}
logger.info(f"Neural Surrogate ready: {self.num_nodes} nodes, model loaded")
def _load_model(self):
"""Load trained AtomizerField model."""
logger.info(f"Loading model from {self.model_path}")
checkpoint = torch.load(self.model_path, map_location=self.device)
# Create model with saved config
model_config = checkpoint['config']['model']
self.model = AtomizerFieldModel(**model_config)
self.model.load_state_dict(checkpoint['model_state_dict'])
self.model = self.model.to(self.device)
self.model.eval()
self.model_config = checkpoint['config']
self.best_val_loss = checkpoint.get('best_val_loss', None)
n_params = sum(p.numel() for p in self.model.parameters())
logger.info(f"Model loaded: {n_params:,} parameters, val_loss={self.best_val_loss:.4f}")
def _load_normalization_stats(self):
"""Load normalization statistics from training data."""
case_dirs = sorted(self.training_data_dir.glob("trial_*"))
if not case_dirs:
logger.warning("No training cases found - using identity normalization")
self.coord_mean = np.zeros(3)
self.coord_std = np.ones(3)
self.disp_mean = np.zeros(6)
self.disp_std = np.ones(6)
return
# Compute stats from all training data
all_coords = []
all_disp = []
for case_dir in case_dirs:
h5_file = case_dir / "neural_field_data.h5"
if h5_file.exists():
with h5py.File(h5_file, 'r') as f:
all_coords.append(f['mesh/node_coordinates'][:])
all_disp.append(f['results/displacement'][:])
if all_coords:
all_coords = np.concatenate(all_coords, axis=0)
all_disp = np.concatenate(all_disp, axis=0)
self.coord_mean = all_coords.mean(axis=0)
self.coord_std = all_coords.std(axis=0) + 1e-8
self.disp_mean = all_disp.mean(axis=0)
self.disp_std = all_disp.std(axis=0) + 1e-8
logger.info(f"Normalization stats from {len(case_dirs)} cases")
def _load_reference_mesh(self):
"""Load reference mesh structure for building graphs."""
case_dirs = sorted(self.training_data_dir.glob("trial_*"))
if not case_dirs:
raise ValueError(f"No training cases in {self.training_data_dir}")
first_case = case_dirs[0]
json_file = first_case / "neural_field_data.json"
h5_file = first_case / "neural_field_data.h5"
# Load metadata
with open(json_file, 'r') as f:
self.reference_metadata = json.load(f)
# Load mesh
with h5py.File(h5_file, 'r') as f:
self.reference_coords = f['mesh/node_coordinates'][:]
self.num_nodes = self.reference_coords.shape[0]
# Build edge index (constant for parametric optimization)
self._build_graph_structure()
def _build_graph_structure(self):
"""Build graph edge index and attributes from mesh."""
metadata = self.reference_metadata
num_nodes = self.num_nodes
edge_list = []
# Get material properties
mat_props = [0.0] * 5
if 'materials' in metadata:
for mat in metadata['materials']:
if mat['type'] == 'MAT1':
mat_props = [
mat.get('E', 0.0) / 1e6,
mat.get('nu', 0.0),
mat.get('rho', 0.0) * 1e6,
mat.get('G', 0.0) / 1e6 if mat.get('G') else 0.0,
mat.get('alpha', 0.0) * 1e6 if mat.get('alpha') else 0.0
]
break
# Process elements to create edges
if 'mesh' in metadata and 'elements' in metadata['mesh']:
for elem_type in ['solid', 'shell', 'beam']:
if elem_type in metadata['mesh']['elements']:
for elem in metadata['mesh']['elements'][elem_type]:
elem_nodes = elem['nodes']
for i in range(len(elem_nodes)):
for j in range(i + 1, len(elem_nodes)):
node_i = elem_nodes[i] - 1
node_j = elem_nodes[j] - 1
if node_i < num_nodes and node_j < num_nodes:
edge_list.append([node_i, node_j])
edge_list.append([node_j, node_i])
if edge_list:
self.edge_index = torch.tensor(edge_list, dtype=torch.long).t().to(self.device)
num_edges = self.edge_index.shape[1]
self.edge_attr = torch.tensor([mat_props] * num_edges, dtype=torch.float).to(self.device)
else:
self.edge_index = torch.zeros((2, 0), dtype=torch.long).to(self.device)
self.edge_attr = torch.zeros((0, 5), dtype=torch.float).to(self.device)
# Build BC mask and load features (constant for this study)
self._build_bc_and_loads()
def _build_bc_and_loads(self):
"""Build boundary condition mask and load features."""
metadata = self.reference_metadata
num_nodes = self.num_nodes
# BC mask
self.bc_mask = torch.zeros(num_nodes, 6)
if 'boundary_conditions' in metadata and 'spc' in metadata['boundary_conditions']:
for spc in metadata['boundary_conditions']['spc']:
node_id = spc['node']
if node_id <= num_nodes:
dofs = spc['dofs']
for dof_char in str(dofs):
if dof_char.isdigit():
dof_idx = int(dof_char) - 1
if 0 <= dof_idx < 6:
self.bc_mask[node_id - 1, dof_idx] = 1.0
# Load features
self.load_features = torch.zeros(num_nodes, 3)
if 'loads' in metadata and 'point_forces' in metadata['loads']:
for force in metadata['loads']['point_forces']:
node_id = force['node']
if node_id <= num_nodes:
magnitude = force['magnitude']
direction = force['direction']
force_vector = [magnitude * d for d in direction]
self.load_features[node_id - 1] = torch.tensor(force_vector)
self.bc_mask = self.bc_mask.to(self.device)
self.load_features = self.load_features.to(self.device)
def _build_node_features(self) -> torch.Tensor:
"""Build node features tensor for model input."""
# Normalized coordinates
coords = torch.from_numpy(self.reference_coords).float()
coords_norm = (coords - torch.from_numpy(self.coord_mean).float()) / \
torch.from_numpy(self.coord_std).float()
coords_norm = coords_norm.to(self.device)
# Concatenate: [coords(3) + bc_mask(6) + loads(3)] = 12 features
node_features = torch.cat([coords_norm, self.bc_mask, self.load_features], dim=-1)
return node_features
def predict(
self,
design_params: Dict[str, float],
return_fields: bool = False
) -> Dict[str, Any]:
"""
Predict FEA results using neural network.
Args:
design_params: Design parameter values (not used for prediction,
but kept for API compatibility - mesh is constant)
return_fields: If True, return complete displacement field
Returns:
dict with:
- max_displacement: Maximum displacement magnitude (mm)
- max_stress: Estimated maximum stress (approximate)
- inference_time_ms: Prediction time
- fields: Complete displacement field (if return_fields=True)
"""
start_time = time.time()
# Build graph data
node_features = self._build_node_features()
graph_data = Data(
x=node_features,
edge_index=self.edge_index,
edge_attr=self.edge_attr
)
# Predict
with torch.no_grad():
predictions = self.model(graph_data, return_stress=True)
# Denormalize displacement
displacement = predictions['displacement'].cpu().numpy()
displacement = displacement * self.disp_std + self.disp_mean
# Compute max values
disp_magnitude = np.linalg.norm(displacement[:, :3], axis=1)
max_displacement = float(np.max(disp_magnitude))
# Stress (approximate - model trained on displacement only)
max_stress = float(torch.max(predictions['von_mises']).item())
inference_time = (time.time() - start_time) * 1000
results = {
'max_displacement': max_displacement,
'max_stress': max_stress,
'inference_time_ms': inference_time
}
if return_fields:
results['displacement_field'] = displacement
results['von_mises_field'] = predictions['von_mises'].cpu().numpy()
# Update stats
self.stats['predictions'] += 1
self.stats['total_time_ms'] += inference_time
return results
def get_statistics(self) -> Dict[str, Any]:
"""Get prediction statistics."""
avg_time = self.stats['total_time_ms'] / self.stats['predictions'] \
if self.stats['predictions'] > 0 else 0
return {
'total_predictions': self.stats['predictions'],
'total_time_ms': self.stats['total_time_ms'],
'average_time_ms': avg_time,
'model_path': str(self.model_path),
'best_val_loss': self.best_val_loss,
'device': str(self.device)
}
def needs_fea_validation(self, trial_number: int) -> bool:
"""
Determine if FEA validation is recommended.
Args:
trial_number: Current trial number
Returns:
True if FEA validation is recommended
"""
# Validate periodically
if trial_number < 5:
return True # First few always validate
if trial_number % 20 == 0:
return True # Periodic validation
return False
class ParametricSurrogate:
"""
Parametric neural surrogate that predicts ALL objectives from design parameters.
Unlike NeuralSurrogate which only predicts displacement fields,
ParametricSurrogate directly predicts:
- mass
- frequency
- max_displacement
- max_stress
This is the "future-proof" solution using design-conditioned GNN.
"""
def __init__(
self,
model_path: Path,
training_data_dir: Path = None,
device: str = 'auto',
num_nodes: int = 500
):
"""
Initialize parametric surrogate.
Args:
model_path: Path to trained parametric model checkpoint (.pt file)
training_data_dir: Path to training data (optional - for mesh loading)
device: Computing device ('cuda', 'cpu', or 'auto')
num_nodes: Number of nodes for synthetic reference graph (default: 500)
"""
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required. Install: pip install torch torch-geometric")
if not PARAMETRIC_MODEL_AVAILABLE:
raise ImportError("Parametric predictor modules not found")
self.model_path = Path(model_path)
self.training_data_dir = Path(training_data_dir) if training_data_dir else None
self.num_nodes = num_nodes
# Set device
if device == 'auto':
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = torch.device(device)
logger.info(f"Parametric Surrogate initializing on {self.device}")
# Load model and normalization
self._load_model()
# Create reference graph structure (synthetic - matching training)
self._create_reference_graph()
# Performance tracking
self.stats = {
'predictions': 0,
'total_time_ms': 0.0
}
logger.info(f"Parametric Surrogate ready: {self.num_nodes} nodes, "
f"predicts mass/freq/disp/stress")
def _load_model(self):
"""Load trained parametric model and normalization stats."""
logger.info(f"Loading parametric model from {self.model_path}")
checkpoint = torch.load(self.model_path, map_location=self.device)
# Create model with saved config
model_config = checkpoint['config']
self.model = create_parametric_model(model_config)
self.model.load_state_dict(checkpoint['model_state_dict'])
self.model = self.model.to(self.device)
self.model.eval()
self.model_config = model_config
self.best_val_loss = checkpoint.get('best_val_loss', None)
# Load normalization stats
norm = checkpoint.get('normalization', {})
self.design_var_names = checkpoint.get('design_var_names', [])
self.n_design_vars = len(self.design_var_names)
self.design_mean = torch.tensor(norm.get('design_mean', [0.0] * self.n_design_vars),
dtype=torch.float32, device=self.device)
self.design_std = torch.tensor(norm.get('design_std', [1.0] * self.n_design_vars),
dtype=torch.float32, device=self.device)
self.coord_mean = np.array(norm.get('coord_mean', [0.0, 0.0, 0.0]))
self.coord_std = np.array(norm.get('coord_std', [1.0, 1.0, 1.0]))
self.disp_mean = np.array(norm.get('disp_mean', [0.0] * 6))
self.disp_std = np.array(norm.get('disp_std', [1.0] * 6))
# Scalar normalization stats (for denormalization)
self.mass_mean = norm.get('mass_mean', 3500.0)
self.mass_std = norm.get('mass_std', 700.0)
self.freq_mean = norm.get('freq_mean', 18.0)
self.freq_std = norm.get('freq_std', 2.0)
self.max_disp_mean = norm.get('max_disp_mean', 0.025)
self.max_disp_std = norm.get('max_disp_std', 0.005)
self.max_stress_mean = norm.get('max_stress_mean', 200.0)
self.max_stress_std = norm.get('max_stress_std', 50.0)
n_params = sum(p.numel() for p in self.model.parameters())
logger.info(f"Parametric model loaded: {n_params:,} params, "
f"val_loss={self.best_val_loss:.4f}")
logger.info(f"Design vars: {self.design_var_names}")
def _create_reference_graph(self):
"""
Create a synthetic reference graph structure for the GNN.
This matches the create_reference_graph() function used during training
in train_parametric.py. The model was trained on synthetic graphs,
so we need to use the same structure for inference.
"""
num_nodes = self.num_nodes
# Create simple node features (random, like training)
# [coords(3) + bc_mask(6) + loads(3)] = 12 features
x = torch.randn(num_nodes, 12, device=self.device)
# Create grid-like connectivity (same as training)
edges = []
grid_size = int(np.ceil(np.sqrt(num_nodes)))
for i in range(num_nodes):
row = i // grid_size
col = i % grid_size
# Right neighbor (same row)
right = i + 1
if col < grid_size - 1 and right < num_nodes:
edges.append([i, right])
edges.append([right, i])
# Bottom neighbor (next row)
bottom = i + grid_size
if bottom < num_nodes:
edges.append([i, bottom])
edges.append([bottom, i])
# Ensure we have at least some edges
if len(edges) == 0:
# Fallback: partially connected for very small graphs
for i in range(num_nodes):
for j in range(i + 1, min(i + 5, num_nodes)):
edges.append([i, j])
edges.append([j, i])
edge_index = torch.tensor(edges, dtype=torch.long, device=self.device).t().contiguous()
edge_attr = torch.randn(edge_index.shape[1], 5, device=self.device)
# Create reference graph data object
self.reference_graph = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
logger.info(f"Created reference graph: {num_nodes} nodes, {edge_index.shape[1]} edges")
def predict(
self,
design_params: Dict[str, float],
return_fields: bool = False
) -> Dict[str, Any]:
"""
Predict all FEA objectives using parametric neural network.
Args:
design_params: Design parameter values (e.g. support_angle, tip_thickness, etc.)
return_fields: If True, return complete displacement field (not supported for synthetic graphs)
Returns:
dict with:
- mass: Predicted mass (g)
- frequency: Predicted fundamental frequency (Hz)
- max_displacement: Maximum displacement magnitude (mm)
- max_stress: Maximum von Mises stress (MPa)
- inference_time_ms: Prediction time
"""
start_time = time.time()
# Build design parameter tensor
param_values = [design_params.get(name, 0.0) for name in self.design_var_names]
design_tensor = torch.tensor(param_values, dtype=torch.float32, device=self.device)
# Normalize design params
design_tensor_norm = (design_tensor - self.design_mean) / self.design_std
# Add batch dimension for design params
design_batch = design_tensor_norm.unsqueeze(0) # [1, n_design_vars]
# Predict using reference graph
with torch.no_grad():
predictions = self.model(self.reference_graph, design_batch)
# Extract scalar predictions and denormalize
# Model outputs normalized values, so we need to convert back to original scale
mass_norm = predictions['mass'].item()
freq_norm = predictions['frequency'].item()
disp_norm = predictions['max_displacement'].item()
stress_norm = predictions['max_stress'].item()
# Denormalize to original scale
mass = mass_norm * self.mass_std + self.mass_mean
frequency = freq_norm * self.freq_std + self.freq_mean
max_displacement = disp_norm * self.max_disp_std + self.max_disp_mean
max_stress = stress_norm * self.max_stress_std + self.max_stress_mean
inference_time = (time.time() - start_time) * 1000
results = {
'mass': mass,
'frequency': frequency,
'max_displacement': max_displacement,
'max_stress': max_stress,
'inference_time_ms': inference_time
}
# Update stats
self.stats['predictions'] += 1
self.stats['total_time_ms'] += inference_time
return results
def get_statistics(self) -> Dict[str, Any]:
"""Get prediction statistics."""
avg_time = self.stats['total_time_ms'] / self.stats['predictions'] \
if self.stats['predictions'] > 0 else 0
return {
'total_predictions': self.stats['predictions'],
'total_time_ms': self.stats['total_time_ms'],
'average_time_ms': avg_time,
'model_path': str(self.model_path),
'best_val_loss': self.best_val_loss,
'device': str(self.device),
'design_var_names': self.design_var_names,
'n_design_vars': self.n_design_vars
}
class HybridOptimizer:
"""
Intelligent optimizer that combines FEA and neural surrogates.
Phases:
1. Exploration: Use FEA to explore design space
2. Training: Train neural network on FEA data
3. Exploitation: Use NN for fast optimization
4. Validation: Periodically validate with FEA
"""
def __init__(self, config: Dict[str, Any]):
"""
Initialize hybrid optimizer.
Args:
config: Configuration dictionary
"""
self.config = config
self.phase = 'exploration'
self.fea_samples = []
self.nn_surrogate = None
self.trial_count = 0
# Phase transition parameters
self.min_fea_samples = config.get('min_fea_samples', 20)
self.validation_frequency = config.get('validation_frequency', 10)
self.retrain_frequency = config.get('retrain_frequency', 50)
self.confidence_threshold = config.get('confidence_threshold', 0.95)
# Training data export directory
self.training_data_dir = Path(config.get('training_data_dir', 'hybrid_training_data'))
self.training_data_dir.mkdir(parents=True, exist_ok=True)
logger.info("Hybrid optimizer initialized")
def should_use_nn(self, trial_number: int) -> Tuple[bool, str]:
"""
Decide whether to use NN for this trial.
Args:
trial_number: Current trial number
Returns:
Tuple of (use_nn, reason)
"""
self.trial_count = trial_number
if self.phase == 'exploration':
# Initial FEA exploration
if trial_number < self.min_fea_samples:
return False, f"Exploration phase ({trial_number}/{self.min_fea_samples})"
else:
# Transition to training
self.phase = 'training'
self._train_surrogate()
self.phase = 'exploitation'
return True, "Switched to neural surrogate"
elif self.phase == 'exploitation':
# Check if validation needed
if trial_number % self.validation_frequency == 0:
return False, f"Periodic FEA validation (every {self.validation_frequency} trials)"
# Check if retraining needed
if trial_number % self.retrain_frequency == 0:
self._retrain_surrogate()
return True, "Using neural surrogate"
return False, f"Unknown phase: {self.phase}"
def _train_surrogate(self):
"""Train surrogate model on accumulated FEA data."""
logger.info(f"Training surrogate on {len(self.fea_samples)} FEA samples")
# In practice, this would:
# 1. Parse all FEA data using neural_field_parser
# 2. Train AtomizerField model
# 3. Load trained model
# For now, try to load pre-trained model if available
model_path = self.config.get('pretrained_model_path')
if model_path and Path(model_path).exists():
self.nn_surrogate = NeuralSurrogate(
model_path=Path(model_path),
confidence_threshold=self.confidence_threshold
)
logger.info(f"Loaded pre-trained model from {model_path}")
else:
logger.warning("No pre-trained model available, continuing with FEA")
self.phase = 'exploration'
def _retrain_surrogate(self):
"""Retrain surrogate with additional data."""
logger.info(f"Retraining surrogate with {len(self.fea_samples)} total samples")
# Trigger retraining pipeline
# This would integrate with AtomizerField training
def add_fea_sample(self, design: Dict[str, float], results: Dict[str, float]):
"""
Add FEA result to training data.
Args:
design: Design variables
results: FEA results
"""
self.fea_samples.append({
'trial': self.trial_count,
'design': design,
'results': results,
'timestamp': time.time()
})
def get_phase_info(self) -> Dict[str, Any]:
"""Get current phase information."""
return {
'phase': self.phase,
'trial_count': self.trial_count,
'fea_samples': len(self.fea_samples),
'has_surrogate': self.nn_surrogate is not None,
'min_fea_samples': self.min_fea_samples,
'validation_frequency': self.validation_frequency
}
def create_parametric_surrogate_for_study(
model_path: str = None,
training_data_dir: str = None,
project_root: Path = None
) -> Optional[ParametricSurrogate]:
"""
Factory function to create parametric neural surrogate for UAV arm study.
This is the recommended surrogate type - predicts all objectives (mass, freq, etc.)
Args:
model_path: Path to parametric model checkpoint (auto-detect if None)
training_data_dir: Path to training data (auto-detect if None)
project_root: Project root directory for auto-detection
Returns:
ParametricSurrogate instance or None if not available
"""
if not TORCH_AVAILABLE or not PARAMETRIC_MODEL_AVAILABLE:
logger.warning("Parametric surrogate not available: PyTorch or ParametricPredictor missing")
return None
# Auto-detect project root
if project_root is None:
project_root = Path(__file__).parent.parent
# Auto-detect parametric model path
if model_path is None:
default_model = project_root / "atomizer-field" / "runs" / "parametric_uav_arm_v2" / "checkpoint_best.pt"
if not default_model.exists():
# Try older path
default_model = project_root / "atomizer-field" / "runs" / "parametric_uav_arm" / "checkpoint_best.pt"
if default_model.exists():
model_path = str(default_model)
else:
logger.warning(f"No trained parametric model found")
return None
else:
model_path = str(model_path)
# Auto-detect training data
if training_data_dir is None:
default_data = project_root / "atomizer_field_training_data" / "uav_arm_train"
if default_data.exists():
training_data_dir = str(default_data)
else:
logger.warning(f"No training data found at {default_data}")
return None
else:
training_data_dir = str(training_data_dir)
try:
return ParametricSurrogate(
model_path=Path(model_path),
training_data_dir=Path(training_data_dir)
)
except Exception as e:
logger.error(f"Failed to create parametric surrogate: {e}")
import traceback
traceback.print_exc()
return None
def create_surrogate_for_study(
model_path: str = None,
training_data_dir: str = None,
project_root: Path = None,
study_name: str = None
) -> Optional[ParametricSurrogate]:
"""
Factory function to create neural surrogate for any study.
Automatically detects whether to use ParametricSurrogate or NeuralSurrogate
based on available models.
Args:
model_path: Path to model checkpoint (auto-detect if None)
training_data_dir: Path to training data (optional, no longer required)
project_root: Project root directory for auto-detection
study_name: Name of the study (for auto-detection)
Returns:
ParametricSurrogate or NeuralSurrogate instance, or None if not available
"""
if not TORCH_AVAILABLE:
logger.warning("Neural surrogate not available: PyTorch missing")
return None
# Auto-detect project root
if project_root is None:
project_root = Path(__file__).parent.parent
# Try ParametricSurrogate first (more capable)
if PARAMETRIC_MODEL_AVAILABLE:
# Search order for parametric models
model_search_paths = []
if study_name:
# Study-specific paths
model_search_paths.append(project_root / "atomizer-field" / "runs" / study_name / "checkpoint_best.pt")
# Common model names to try
model_search_paths.extend([
project_root / "atomizer-field" / "runs" / "bracket_model" / "checkpoint_best.pt",
project_root / "atomizer-field" / "runs" / "bracket_stiffness_optimization_atomizerfield" / "checkpoint_best.pt",
project_root / "atomizer-field" / "runs" / "parametric_uav_arm_v2" / "checkpoint_best.pt",
project_root / "atomizer-field" / "runs" / "parametric_uav_arm" / "checkpoint_best.pt",
project_root / "atomizer-field" / "runs" / "uav_arm_model" / "checkpoint_best.pt",
])
# Use explicit path if provided
if model_path is not None:
model_search_paths = [Path(model_path)]
# Find first existing model
found_model = None
for mp in model_search_paths:
if mp.exists():
found_model = mp
logger.info(f"Found model at: {found_model}")
break
if found_model:
try:
# ParametricSurrogate no longer requires training_data_dir
# It creates a synthetic reference graph like during training
return ParametricSurrogate(
model_path=found_model,
training_data_dir=None # Not required anymore
)
except Exception as e:
logger.warning(f"Failed to create ParametricSurrogate: {e}")
import traceback
traceback.print_exc()
# Fall through to try NeuralSurrogate
# Fall back to NeuralSurrogate if ParametricSurrogate not available
if ATOMIZER_FIELD_AVAILABLE:
if model_path is None:
default_model = project_root / "atomizer-field" / "runs" / "uav_arm_model" / "checkpoint_best.pt"
if default_model.exists():
model_path = str(default_model)
else:
logger.warning(f"No trained model found")
return None
else:
model_path = str(model_path)
if training_data_dir is None:
default_data = project_root / "atomizer_field_training_data" / "uav_arm_train"
if default_data.exists():
training_data_dir = str(default_data)
else:
logger.warning(f"No training data found (required for NeuralSurrogate)")
return None
else:
training_data_dir = str(training_data_dir)
try:
return NeuralSurrogate(
model_path=Path(model_path),
training_data_dir=Path(training_data_dir)
)
except Exception as e:
logger.error(f"Failed to create neural surrogate: {e}")
return None
logger.warning("No suitable neural model modules available")
return None
def create_surrogate_from_config(config: Dict[str, Any]) -> Optional[NeuralSurrogate]:
"""
Factory function to create neural surrogate from workflow configuration.
Args:
config: Workflow configuration dictionary
Returns:
NeuralSurrogate instance if enabled, None otherwise
"""
if not config.get('neural_surrogate', {}).get('enabled', False):
logger.info("Neural surrogate is disabled")
return None
surrogate_config = config['neural_surrogate']
model_path = surrogate_config.get('model_path')
training_data_dir = surrogate_config.get('training_data_dir')
if not model_path:
logger.error("Neural surrogate enabled but model_path not specified")
return None
if not training_data_dir:
logger.error("Neural surrogate enabled but training_data_dir not specified")
return None
try:
surrogate = NeuralSurrogate(
model_path=Path(model_path),
training_data_dir=Path(training_data_dir),
device=surrogate_config.get('device', 'auto')
)
logger.info("Neural surrogate created successfully")
return surrogate
except Exception as e:
logger.error(f"Failed to create neural surrogate: {e}")
return None
def create_hybrid_optimizer_from_config(config: Dict[str, Any]) -> Optional[HybridOptimizer]:
"""
Factory function to create hybrid optimizer from configuration.
Args:
config: Workflow configuration dictionary
Returns:
HybridOptimizer instance if enabled, None otherwise
"""
if not config.get('hybrid_optimization', {}).get('enabled', False):
logger.info("Hybrid optimization is disabled")
return None
hybrid_config = config.get('hybrid_optimization', {})
try:
optimizer = HybridOptimizer(hybrid_config)
logger.info("Hybrid optimizer created successfully")
return optimizer
except Exception as e:
logger.error(f"Failed to create hybrid optimizer: {e}")
return None

View File

@@ -0,0 +1,648 @@
"""
Simple MLP Surrogate for Fast Optimization
This module provides a lightweight neural network surrogate that:
1. Trains directly from Optuna database (no mesh parsing needed)
2. Uses simple MLP: design_params -> [mass, frequency, max_disp, max_stress]
3. Provides millisecond predictions for optimization
This is much simpler than the GNN-based approach and works well when:
- You have enough FEA data in the database
- You only need scalar objective predictions (no field data)
- You want quick setup without mesh parsing pipeline
Usage:
from optimization_engine.processors.surrogates.simple_mlp_surrogate import SimpleSurrogate, train_from_database
# Train from database
surrogate = train_from_database(
db_path="studies/uav_arm_atomizerfield_test/2_results/study.db",
study_name="uav_arm_atomizerfield_test"
)
# Predict
results = surrogate.predict({
'beam_half_core_thickness': 3.0,
'beam_face_thickness': 1.5,
'holes_diameter': 8.0,
'hole_count': 4
})
"""
import json
import logging
import time
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
import numpy as np
logger = logging.getLogger(__name__)
try:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
logger.warning("PyTorch not installed. SimpleSurrogate will be limited.")
try:
import optuna
OPTUNA_AVAILABLE = True
except ImportError:
OPTUNA_AVAILABLE = False
class MLPModel(nn.Module):
"""Simple MLP for design parameter -> objective prediction."""
def __init__(
self,
n_inputs: int = 4,
n_outputs: int = 4,
hidden_dims: List[int] = [128, 256, 128, 64],
dropout: float = 0.1
):
super().__init__()
layers = []
prev_dim = n_inputs
for hidden_dim in hidden_dims:
layers.extend([
nn.Linear(prev_dim, hidden_dim),
nn.LayerNorm(hidden_dim),
nn.ReLU(),
nn.Dropout(dropout)
])
prev_dim = hidden_dim
layers.append(nn.Linear(prev_dim, n_outputs))
self.network = nn.Sequential(*layers)
# Initialize weights
self._init_weights()
def _init_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
return self.network(x)
class FEADataset(Dataset):
"""Dataset for training from FEA results."""
def __init__(
self,
design_params: np.ndarray,
objectives: np.ndarray
):
self.design_params = torch.tensor(design_params, dtype=torch.float32)
self.objectives = torch.tensor(objectives, dtype=torch.float32)
def __len__(self):
return len(self.design_params)
def __getitem__(self, idx):
return self.design_params[idx], self.objectives[idx]
class SimpleSurrogate:
"""
Simple MLP-based surrogate for FEA prediction.
This is a lightweight alternative to the GNN-based approach that:
- Doesn't require mesh parsing
- Trains directly from database
- Provides fast scalar predictions
"""
def __init__(
self,
model: nn.Module = None,
design_var_names: List[str] = None,
objective_names: List[str] = None,
normalization: Dict[str, Any] = None,
device: str = 'auto'
):
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required. Install: pip install torch")
# Set device
if device == 'auto':
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = torch.device(device)
self.model = model
if model is not None:
self.model = model.to(self.device)
self.model.eval()
self.design_var_names = design_var_names or []
self.objective_names = objective_names or ['mass', 'frequency', 'max_displacement', 'max_stress']
# Normalization stats
self.normalization = normalization or {
'design_mean': np.zeros(len(self.design_var_names)),
'design_std': np.ones(len(self.design_var_names)),
'objective_mean': np.zeros(len(self.objective_names)),
'objective_std': np.ones(len(self.objective_names))
}
# Performance tracking
self.stats = {
'predictions': 0,
'total_time_ms': 0.0
}
logger.info(f"SimpleSurrogate initialized on {self.device}")
def predict(self, design_params: Dict[str, float]) -> Dict[str, Any]:
"""
Predict FEA objectives from design parameters.
Args:
design_params: Dict of design variable values
Returns:
Dict with mass, frequency, max_displacement, max_stress, inference_time_ms
"""
start_time = time.time()
# Build input tensor
param_values = [design_params.get(name, 0.0) for name in self.design_var_names]
x = np.array(param_values, dtype=np.float32)
# Normalize
x_norm = (x - self.normalization['design_mean']) / (self.normalization['design_std'] + 1e-8)
x_tensor = torch.tensor(x_norm, dtype=torch.float32, device=self.device).unsqueeze(0)
# Predict
with torch.no_grad():
y_norm = self.model(x_tensor).cpu().numpy()[0]
# Denormalize
y = y_norm * self.normalization['objective_std'] + self.normalization['objective_mean']
inference_time = (time.time() - start_time) * 1000
results = {
self.objective_names[i]: float(y[i]) for i in range(len(self.objective_names))
}
results['inference_time_ms'] = inference_time
# Update stats
self.stats['predictions'] += 1
self.stats['total_time_ms'] += inference_time
return results
def get_statistics(self) -> Dict[str, Any]:
"""Get prediction statistics."""
avg_time = self.stats['total_time_ms'] / self.stats['predictions'] \
if self.stats['predictions'] > 0 else 0
return {
'total_predictions': self.stats['predictions'],
'total_time_ms': self.stats['total_time_ms'],
'average_time_ms': avg_time,
'device': str(self.device),
'design_var_names': self.design_var_names,
'objective_names': self.objective_names
}
def save(self, path: Path):
"""Save surrogate to file."""
path = Path(path)
torch.save({
'model_state_dict': self.model.state_dict(),
'design_var_names': self.design_var_names,
'objective_names': self.objective_names,
'normalization': self.normalization,
'model_config': {
'n_inputs': len(self.design_var_names),
'n_outputs': len(self.objective_names)
}
}, path)
logger.info(f"Surrogate saved to {path}")
@classmethod
def load(cls, path: Path, device: str = 'auto') -> 'SimpleSurrogate':
"""Load surrogate from file."""
path = Path(path)
checkpoint = torch.load(path, map_location='cpu')
# Create model
model_config = checkpoint['model_config']
model = MLPModel(
n_inputs=model_config['n_inputs'],
n_outputs=model_config['n_outputs']
)
model.load_state_dict(checkpoint['model_state_dict'])
return cls(
model=model,
design_var_names=checkpoint['design_var_names'],
objective_names=checkpoint['objective_names'],
normalization=checkpoint['normalization'],
device=device
)
def extract_data_from_database(
db_path: str,
study_name: str
) -> Tuple[np.ndarray, np.ndarray, List[str], List[str]]:
"""
Extract training data from Optuna database.
Args:
db_path: Path to SQLite database
study_name: Name of Optuna study
Returns:
Tuple of (design_params, objectives, design_var_names, objective_names)
"""
if not OPTUNA_AVAILABLE:
raise ImportError("Optuna required. Install: pip install optuna")
storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}")
study = optuna.load_study(study_name=study_name, storage=storage)
# Get completed trials
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed_trials:
raise ValueError(f"No completed trials in study {study_name}")
logger.info(f"Found {len(completed_trials)} completed trials")
# Get design variable names from first trial
design_var_names = list(completed_trials[0].params.keys())
# Determine objective structure
first_values = completed_trials[0].values
if first_values is None:
raise ValueError("Trials have no objective values")
# For multi-objective, values are [mass, frequency, ...]
# We also need user_attrs for constraints
# Collect data - filter out invalid samples
design_params_list = []
objectives_list = []
skipped = 0
for trial in completed_trials:
# Objectives - need mass, frequency, max_disp, max_stress
mass = trial.values[0] if len(trial.values) > 0 else 0.0
frequency = trial.values[1] if len(trial.values) > 1 else 0.0
# Get constraints from user_attrs
max_disp = trial.user_attrs.get('max_displacement', 0.0)
max_stress = trial.user_attrs.get('max_stress', 0.0)
# Note: frequency is stored as -freq for minimization, so convert back
# Also filter out inf values
objectives = [mass, -frequency, max_disp, max_stress]
# Skip invalid samples (inf, nan, or extreme values)
if any(np.isinf(v) or np.isnan(v) or v > 1e10 for v in objectives):
skipped += 1
continue
# Skip if frequency is negative (indicates error)
if -frequency <= 0:
skipped += 1
continue
# Design parameters
params = [trial.params.get(name, 0.0) for name in design_var_names]
design_params_list.append(params)
objectives_list.append(objectives)
if skipped > 0:
logger.info(f"Skipped {skipped} invalid samples")
if not design_params_list:
raise ValueError("No valid samples found after filtering")
design_params = np.array(design_params_list, dtype=np.float32)
objectives = np.array(objectives_list, dtype=np.float32)
objective_names = ['mass', 'frequency', 'max_displacement', 'max_stress']
logger.info(f"Extracted {len(design_params)} valid samples")
logger.info(f"Design vars: {design_var_names}")
logger.info(f"Objectives: {objective_names}")
return design_params, objectives, design_var_names, objective_names
def train_from_database(
db_path: str,
study_name: str,
epochs: int = 200,
batch_size: int = 32,
learning_rate: float = 0.001,
val_split: float = 0.2,
save_path: Optional[str] = None,
device: str = 'auto'
) -> SimpleSurrogate:
"""
Train SimpleSurrogate from Optuna database.
Args:
db_path: Path to SQLite database
study_name: Name of Optuna study
epochs: Training epochs
batch_size: Batch size
learning_rate: Learning rate
val_split: Validation split ratio
save_path: Optional path to save trained model
device: Computing device
Returns:
Trained SimpleSurrogate
"""
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required")
# Set device
if device == 'auto':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
device = torch.device(device)
print(f"\n{'='*60}")
print("Training Simple MLP Surrogate from Database")
print(f"{'='*60}")
print(f"Device: {device}")
# Extract data
print(f"\n[1] Loading data from {db_path}")
design_params, objectives, design_var_names, objective_names = extract_data_from_database(
db_path, study_name
)
print(f" Samples: {len(design_params)}")
print(f" Design vars: {design_var_names}")
print(f" Objectives: {objective_names}")
# Compute normalization stats
design_mean = design_params.mean(axis=0)
design_std = design_params.std(axis=0)
objective_mean = objectives.mean(axis=0)
objective_std = objectives.std(axis=0)
print(f"\n Objective ranges:")
for i, name in enumerate(objective_names):
print(f" {name}: {objectives[:, i].min():.2f} - {objectives[:, i].max():.2f}")
# Normalize data
design_params_norm = (design_params - design_mean) / (design_std + 1e-8)
objectives_norm = (objectives - objective_mean) / (objective_std + 1e-8)
# Create dataset
dataset = FEADataset(design_params_norm, objectives_norm)
# Split into train/val
n_val = int(len(dataset) * val_split)
n_train = len(dataset) - n_val
train_dataset, val_dataset = random_split(dataset, [n_train, n_val])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
print(f"\n[2] Creating model")
print(f" Train samples: {n_train}")
print(f" Val samples: {n_val}")
# Create model
model = MLPModel(
n_inputs=len(design_var_names),
n_outputs=len(objective_names),
hidden_dims=[128, 256, 128, 64]
).to(device)
n_params = sum(p.numel() for p in model.parameters())
print(f" Model params: {n_params:,}")
# Training
print(f"\n[3] Training for {epochs} epochs")
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
best_val_loss = float('inf')
best_state = None
for epoch in range(epochs):
# Train
model.train()
train_loss = 0.0
for x, y in train_loader:
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
pred = model(x)
loss = F.mse_loss(pred, y)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= len(train_loader)
# Validate
model.eval()
val_loss = 0.0
with torch.no_grad():
for x, y in val_loader:
x, y = x.to(device), y.to(device)
pred = model(x)
val_loss += F.mse_loss(pred, y).item()
val_loss /= len(val_loader)
scheduler.step()
# Track best
if val_loss < best_val_loss:
best_val_loss = val_loss
best_state = model.state_dict().copy()
# Log progress
if (epoch + 1) % 20 == 0 or epoch == 0:
print(f" Epoch {epoch+1:3d}: train_loss={train_loss:.6f}, val_loss={val_loss:.6f}")
# Load best model
model.load_state_dict(best_state)
print(f"\n Best val_loss: {best_val_loss:.6f}")
# Create surrogate
normalization = {
'design_mean': design_mean,
'design_std': design_std,
'objective_mean': objective_mean,
'objective_std': objective_std
}
surrogate = SimpleSurrogate(
model=model,
design_var_names=design_var_names,
objective_names=objective_names,
normalization=normalization,
device=str(device)
)
# Evaluate accuracy
print(f"\n[4] Evaluating accuracy on validation set")
model.eval()
all_preds = []
all_targets = []
with torch.no_grad():
for x, y in val_loader:
x = x.to(device)
pred = model(x).cpu().numpy()
all_preds.append(pred)
all_targets.append(y.numpy())
all_preds = np.concatenate(all_preds)
all_targets = np.concatenate(all_targets)
# Denormalize for error calculation
preds_denorm = all_preds * objective_std + objective_mean
targets_denorm = all_targets * objective_std + objective_mean
for i, name in enumerate(objective_names):
mae = np.abs(preds_denorm[:, i] - targets_denorm[:, i]).mean()
mape = (np.abs(preds_denorm[:, i] - targets_denorm[:, i]) / (np.abs(targets_denorm[:, i]) + 1e-8)).mean() * 100
print(f" {name}: MAE={mae:.4f}, MAPE={mape:.1f}%")
# Save if requested
if save_path:
surrogate.save(save_path)
print(f"\n{'='*60}")
print("Training complete!")
print(f"{'='*60}")
return surrogate
def create_simple_surrogate_for_study(
db_path: str = None,
study_name: str = None,
model_path: str = None,
project_root: Path = None
) -> Optional[SimpleSurrogate]:
"""
Factory function to create or load SimpleSurrogate for UAV arm study.
If model_path exists, loads the model. Otherwise trains from database.
Args:
db_path: Path to Optuna database
study_name: Name of study
model_path: Path to saved model (auto-detect if None)
project_root: Project root for auto-detection
Returns:
SimpleSurrogate instance or None
"""
if not TORCH_AVAILABLE:
logger.warning("PyTorch not available")
return None
# Auto-detect paths
if project_root is None:
project_root = Path(__file__).parent.parent
if model_path is None:
model_path = project_root / "simple_mlp_surrogate.pt"
else:
model_path = Path(model_path)
# Try to load existing model
if model_path.exists():
logger.info(f"Loading existing surrogate from {model_path}")
return SimpleSurrogate.load(model_path)
# Otherwise train from database
if db_path is None:
db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db"
else:
db_path = Path(db_path)
if study_name is None:
study_name = "uav_arm_atomizerfield_test"
if not db_path.exists():
logger.warning(f"Database not found: {db_path}")
return None
logger.info(f"Training surrogate from {db_path}")
return train_from_database(
db_path=str(db_path),
study_name=study_name,
save_path=str(model_path)
)
if __name__ == "__main__":
import sys
# Default paths
project_root = Path(__file__).parent.parent
db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db"
model_path = project_root / "simple_mlp_surrogate.pt"
print("Simple MLP Surrogate Training")
print("="*60)
if not db_path.exists():
print(f"ERROR: Database not found: {db_path}")
sys.exit(1)
# Train
surrogate = train_from_database(
db_path=str(db_path),
study_name="uav_arm_atomizerfield_test",
epochs=300,
save_path=str(model_path)
)
# Test predictions
print("\n[5] Testing predictions")
test_params = {
'beam_half_core_thickness': 3.0,
'beam_face_thickness': 1.5,
'holes_diameter': 8.0,
'hole_count': 4
}
print(f" Input: {test_params}")
results = surrogate.predict(test_params)
print(f" Mass: {results['mass']:.2f} g")
print(f" Frequency: {results['frequency']:.2f} Hz")
print(f" Max Displacement: {results['max_displacement']:.6f} mm")
print(f" Max Stress: {results['max_stress']:.2f} MPa")
print(f" Inference time: {results['inference_time_ms']:.2f} ms")
# Test variation
print("\n[6] Testing variation with parameters")
for thickness in [1.0, 3.0, 5.0]:
params = {**test_params, 'beam_half_core_thickness': thickness}
r = surrogate.predict(params)
print(f" thickness={thickness}: mass={r['mass']:.0f}g, freq={r['frequency']:.2f}Hz")

View File

@@ -0,0 +1,800 @@
"""
Hyperparameter Tuning for Neural Network Surrogates
This module provides automatic hyperparameter optimization for MLP surrogates
using Optuna, with proper train/validation splits and early stopping.
Key Features:
1. Optuna-based hyperparameter search
2. K-fold cross-validation
3. Early stopping to prevent overfitting
4. Ensemble model support
5. Proper uncertainty quantification
Usage:
from optimization_engine.processors.surrogates.surrogate_tuner import SurrogateHyperparameterTuner
tuner = SurrogateHyperparameterTuner(
input_dim=11,
output_dim=3,
n_trials=50
)
best_config = tuner.tune(X_train, Y_train)
model = tuner.create_tuned_model(best_config)
"""
import logging
import numpy as np
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass, field
from pathlib import Path
logger = logging.getLogger(__name__)
try:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
logger.warning("PyTorch not installed")
try:
import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner
OPTUNA_AVAILABLE = True
except ImportError:
OPTUNA_AVAILABLE = False
logger.warning("Optuna not installed")
@dataclass
class SurrogateConfig:
"""Configuration for a tuned surrogate model."""
hidden_dims: List[int] = field(default_factory=lambda: [128, 256, 128])
dropout: float = 0.1
activation: str = 'relu'
use_batch_norm: bool = True
learning_rate: float = 1e-3
weight_decay: float = 1e-4
batch_size: int = 16
max_epochs: int = 500
early_stopping_patience: int = 30
# Normalization stats (filled during training)
input_mean: Optional[np.ndarray] = None
input_std: Optional[np.ndarray] = None
output_mean: Optional[np.ndarray] = None
output_std: Optional[np.ndarray] = None
# Validation metrics
val_loss: float = float('inf')
val_r2: Dict[str, float] = field(default_factory=dict)
class TunableMLP(nn.Module):
"""Flexible MLP with configurable architecture."""
def __init__(
self,
input_dim: int,
output_dim: int,
hidden_dims: List[int],
dropout: float = 0.1,
activation: str = 'relu',
use_batch_norm: bool = True
):
super().__init__()
self.input_dim = input_dim
self.output_dim = output_dim
# Activation function
activations = {
'relu': nn.ReLU(),
'leaky_relu': nn.LeakyReLU(0.1),
'elu': nn.ELU(),
'selu': nn.SELU(),
'gelu': nn.GELU(),
'swish': nn.SiLU()
}
act_fn = activations.get(activation, nn.ReLU())
# Build layers
layers = []
prev_dim = input_dim
for hidden_dim in hidden_dims:
layers.append(nn.Linear(prev_dim, hidden_dim))
if use_batch_norm:
layers.append(nn.BatchNorm1d(hidden_dim))
layers.append(act_fn)
if dropout > 0:
layers.append(nn.Dropout(dropout))
prev_dim = hidden_dim
layers.append(nn.Linear(prev_dim, output_dim))
self.network = nn.Sequential(*layers)
self._init_weights()
def _init_weights(self):
"""Initialize weights using Kaiming initialization."""
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
return self.network(x)
class EarlyStopping:
"""Early stopping to prevent overfitting."""
def __init__(self, patience: int = 20, min_delta: float = 1e-5):
self.patience = patience
self.min_delta = min_delta
self.counter = 0
self.best_loss = float('inf')
self.best_model_state = None
self.should_stop = False
def __call__(self, val_loss: float, model: nn.Module) -> bool:
if val_loss < self.best_loss - self.min_delta:
self.best_loss = val_loss
self.best_model_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
self.counter = 0
else:
self.counter += 1
if self.counter >= self.patience:
self.should_stop = True
return self.should_stop
def restore_best(self, model: nn.Module):
"""Restore model to best state."""
if self.best_model_state is not None:
model.load_state_dict(self.best_model_state)
class SurrogateHyperparameterTuner:
"""
Automatic hyperparameter tuning for neural network surrogates.
Uses Optuna for Bayesian optimization of:
- Network architecture (layers, widths)
- Regularization (dropout, weight decay)
- Learning rate and batch size
- Activation functions
"""
def __init__(
self,
input_dim: int,
output_dim: int,
n_trials: int = 50,
n_cv_folds: int = 5,
device: str = 'auto',
seed: int = 42,
timeout_seconds: Optional[int] = None
):
"""
Initialize hyperparameter tuner.
Args:
input_dim: Number of input features (design variables)
output_dim: Number of outputs (objectives)
n_trials: Number of Optuna trials for hyperparameter search
n_cv_folds: Number of cross-validation folds
device: Computing device ('cuda', 'cpu', or 'auto')
seed: Random seed for reproducibility
timeout_seconds: Optional timeout for tuning
"""
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required for surrogate tuning")
if not OPTUNA_AVAILABLE:
raise ImportError("Optuna required for hyperparameter tuning")
self.input_dim = input_dim
self.output_dim = output_dim
self.n_trials = n_trials
self.n_cv_folds = n_cv_folds
self.seed = seed
self.timeout = timeout_seconds
if device == 'auto':
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = torch.device(device)
self.best_config: Optional[SurrogateConfig] = None
self.study: Optional[optuna.Study] = None
# Set seeds
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(seed)
def _suggest_hyperparameters(self, trial: optuna.Trial) -> SurrogateConfig:
"""Suggest hyperparameters for a trial."""
# Architecture
n_layers = trial.suggest_int('n_layers', 2, 5)
hidden_dims = []
for i in range(n_layers):
dim = trial.suggest_int(f'hidden_dim_{i}', 32, 512, step=32)
hidden_dims.append(dim)
# Regularization
dropout = trial.suggest_float('dropout', 0.0, 0.5)
weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
# Training
learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
batch_size = trial.suggest_categorical('batch_size', [8, 16, 32, 64])
# Activation
activation = trial.suggest_categorical('activation',
['relu', 'leaky_relu', 'elu', 'gelu', 'swish'])
# Batch norm
use_batch_norm = trial.suggest_categorical('use_batch_norm', [True, False])
return SurrogateConfig(
hidden_dims=hidden_dims,
dropout=dropout,
activation=activation,
use_batch_norm=use_batch_norm,
learning_rate=learning_rate,
weight_decay=weight_decay,
batch_size=batch_size
)
def _train_fold(
self,
config: SurrogateConfig,
X_train: np.ndarray,
Y_train: np.ndarray,
X_val: np.ndarray,
Y_val: np.ndarray,
trial: Optional[optuna.Trial] = None
) -> Tuple[float, Dict[str, float]]:
"""Train model on one fold and return validation metrics."""
# Create model
model = TunableMLP(
input_dim=self.input_dim,
output_dim=self.output_dim,
hidden_dims=config.hidden_dims,
dropout=config.dropout,
activation=config.activation,
use_batch_norm=config.use_batch_norm
).to(self.device)
# Prepare data
X_train_t = torch.tensor(X_train, dtype=torch.float32, device=self.device)
Y_train_t = torch.tensor(Y_train, dtype=torch.float32, device=self.device)
X_val_t = torch.tensor(X_val, dtype=torch.float32, device=self.device)
Y_val_t = torch.tensor(Y_val, dtype=torch.float32, device=self.device)
train_dataset = TensorDataset(X_train_t, Y_train_t)
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
# Optimizer and scheduler
optimizer = torch.optim.AdamW(
model.parameters(),
lr=config.learning_rate,
weight_decay=config.weight_decay
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
optimizer, T_max=config.max_epochs
)
early_stopping = EarlyStopping(patience=config.early_stopping_patience)
# Training loop
for epoch in range(config.max_epochs):
model.train()
for X_batch, Y_batch in train_loader:
optimizer.zero_grad()
pred = model(X_batch)
loss = nn.functional.mse_loss(pred, Y_batch)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
# Validation
model.eval()
with torch.no_grad():
val_pred = model(X_val_t)
val_loss = nn.functional.mse_loss(val_pred, Y_val_t).item()
# Early stopping
if early_stopping(val_loss, model):
break
# Optuna pruning (only report once per epoch across all folds)
if trial is not None and epoch % 10 == 0:
trial.report(val_loss, epoch // 10)
if trial.should_prune():
raise optuna.TrialPruned()
# Restore best model
early_stopping.restore_best(model)
# Final validation metrics
model.eval()
with torch.no_grad():
val_pred = model(X_val_t).cpu().numpy()
Y_val_np = Y_val_t.cpu().numpy()
val_loss = float(np.mean((val_pred - Y_val_np) ** 2))
# R² per output
r2_scores = {}
for i in range(self.output_dim):
ss_res = np.sum((Y_val_np[:, i] - val_pred[:, i]) ** 2)
ss_tot = np.sum((Y_val_np[:, i] - Y_val_np[:, i].mean()) ** 2)
r2 = 1 - ss_res / ss_tot if ss_tot > 0 else 0
r2_scores[f'output_{i}'] = r2
return val_loss, r2_scores
def _cross_validate(
self,
config: SurrogateConfig,
X: np.ndarray,
Y: np.ndarray,
trial: Optional[optuna.Trial] = None
) -> Tuple[float, Dict[str, float]]:
"""Perform k-fold cross-validation."""
n_samples = len(X)
indices = np.random.permutation(n_samples)
fold_size = n_samples // self.n_cv_folds
fold_losses = []
fold_r2s = {f'output_{i}': [] for i in range(self.output_dim)}
for fold in range(self.n_cv_folds):
# Split indices
val_start = fold * fold_size
val_end = val_start + fold_size if fold < self.n_cv_folds - 1 else n_samples
val_indices = indices[val_start:val_end]
train_indices = np.concatenate([indices[:val_start], indices[val_end:]])
X_train, Y_train = X[train_indices], Y[train_indices]
X_val, Y_val = X[val_indices], Y[val_indices]
# Skip fold if too few samples
if len(X_train) < 10 or len(X_val) < 2:
continue
val_loss, r2_scores = self._train_fold(
config, X_train, Y_train, X_val, Y_val, trial
)
fold_losses.append(val_loss)
for key, val in r2_scores.items():
fold_r2s[key].append(val)
mean_loss = np.mean(fold_losses)
mean_r2 = {k: np.mean(v) for k, v in fold_r2s.items()}
return mean_loss, mean_r2
def tune(
self,
X: np.ndarray,
Y: np.ndarray,
output_names: Optional[List[str]] = None
) -> SurrogateConfig:
"""
Tune hyperparameters using Optuna.
Args:
X: Input features [n_samples, input_dim]
Y: Outputs [n_samples, output_dim]
output_names: Optional names for outputs (for logging)
Returns:
Best SurrogateConfig found
"""
logger.info(f"Starting hyperparameter tuning with {self.n_trials} trials...")
logger.info(f"Data: {len(X)} samples, {self.n_cv_folds}-fold CV")
# Normalize data
self.input_mean = X.mean(axis=0)
self.input_std = X.std(axis=0) + 1e-8
self.output_mean = Y.mean(axis=0)
self.output_std = Y.std(axis=0) + 1e-8
X_norm = (X - self.input_mean) / self.input_std
Y_norm = (Y - self.output_mean) / self.output_std
def objective(trial: optuna.Trial) -> float:
config = self._suggest_hyperparameters(trial)
val_loss, r2_scores = self._cross_validate(config, X_norm, Y_norm, trial)
# Log R² scores
for key, val in r2_scores.items():
trial.set_user_attr(f'r2_{key}', val)
return val_loss
# Create study
self.study = optuna.create_study(
direction='minimize',
sampler=TPESampler(seed=self.seed, n_startup_trials=10),
pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=20)
)
self.study.optimize(
objective,
n_trials=self.n_trials,
timeout=self.timeout,
show_progress_bar=True,
catch=(RuntimeError,) # Catch GPU OOM errors
)
# Build best config
best_trial = self.study.best_trial
self.best_config = self._suggest_hyperparameters_from_params(best_trial.params)
self.best_config.val_loss = best_trial.value
self.best_config.val_r2 = {
k.replace('r2_', ''): v
for k, v in best_trial.user_attrs.items()
if k.startswith('r2_')
}
# Store normalization
self.best_config.input_mean = self.input_mean
self.best_config.input_std = self.input_std
self.best_config.output_mean = self.output_mean
self.best_config.output_std = self.output_std
# Log results
logger.info(f"\nBest hyperparameters found:")
logger.info(f" Hidden dims: {self.best_config.hidden_dims}")
logger.info(f" Dropout: {self.best_config.dropout:.3f}")
logger.info(f" Activation: {self.best_config.activation}")
logger.info(f" Batch norm: {self.best_config.use_batch_norm}")
logger.info(f" Learning rate: {self.best_config.learning_rate:.2e}")
logger.info(f" Weight decay: {self.best_config.weight_decay:.2e}")
logger.info(f" Batch size: {self.best_config.batch_size}")
logger.info(f" Validation loss: {self.best_config.val_loss:.6f}")
if output_names:
for i, name in enumerate(output_names):
r2 = self.best_config.val_r2.get(f'output_{i}', 0)
logger.info(f" {name} R² (CV): {r2:.4f}")
return self.best_config
def _suggest_hyperparameters_from_params(self, params: Dict[str, Any]) -> SurrogateConfig:
"""Reconstruct config from Optuna params dict."""
n_layers = params['n_layers']
hidden_dims = [params[f'hidden_dim_{i}'] for i in range(n_layers)]
return SurrogateConfig(
hidden_dims=hidden_dims,
dropout=params['dropout'],
activation=params['activation'],
use_batch_norm=params['use_batch_norm'],
learning_rate=params['learning_rate'],
weight_decay=params['weight_decay'],
batch_size=params['batch_size']
)
def create_tuned_model(
self,
config: Optional[SurrogateConfig] = None
) -> TunableMLP:
"""Create a model with tuned hyperparameters."""
if config is None:
config = self.best_config
if config is None:
raise ValueError("No config available. Run tune() first.")
return TunableMLP(
input_dim=self.input_dim,
output_dim=self.output_dim,
hidden_dims=config.hidden_dims,
dropout=config.dropout,
activation=config.activation,
use_batch_norm=config.use_batch_norm
)
class TunedEnsembleSurrogate:
"""
Ensemble of tuned surrogate models for better uncertainty quantification.
Trains multiple models with different random seeds and aggregates predictions.
"""
def __init__(
self,
config: SurrogateConfig,
input_dim: int,
output_dim: int,
n_models: int = 5,
device: str = 'auto'
):
"""
Initialize ensemble surrogate.
Args:
config: Tuned configuration to use for all models
input_dim: Number of input features
output_dim: Number of outputs
n_models: Number of models in ensemble
device: Computing device
"""
self.config = config
self.input_dim = input_dim
self.output_dim = output_dim
self.n_models = n_models
if device == 'auto':
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = torch.device(device)
self.models: List[TunableMLP] = []
self.trained = False
def train(self, X: np.ndarray, Y: np.ndarray, val_split: float = 0.2):
"""Train all models in the ensemble."""
logger.info(f"Training ensemble of {self.n_models} models...")
# Normalize using config stats
X_norm = (X - self.config.input_mean) / self.config.input_std
Y_norm = (Y - self.config.output_mean) / self.config.output_std
# Split data
n_val = int(len(X) * val_split)
indices = np.random.permutation(len(X))
train_idx, val_idx = indices[n_val:], indices[:n_val]
X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]
X_train_t = torch.tensor(X_train, dtype=torch.float32, device=self.device)
Y_train_t = torch.tensor(Y_train, dtype=torch.float32, device=self.device)
X_val_t = torch.tensor(X_val, dtype=torch.float32, device=self.device)
Y_val_t = torch.tensor(Y_val, dtype=torch.float32, device=self.device)
train_dataset = TensorDataset(X_train_t, Y_train_t)
self.models = []
for i in range(self.n_models):
torch.manual_seed(42 + i)
model = TunableMLP(
input_dim=self.input_dim,
output_dim=self.output_dim,
hidden_dims=self.config.hidden_dims,
dropout=self.config.dropout,
activation=self.config.activation,
use_batch_norm=self.config.use_batch_norm
).to(self.device)
optimizer = torch.optim.AdamW(
model.parameters(),
lr=self.config.learning_rate,
weight_decay=self.config.weight_decay
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
optimizer, T_max=self.config.max_epochs
)
train_loader = DataLoader(
train_dataset,
batch_size=self.config.batch_size,
shuffle=True
)
early_stopping = EarlyStopping(patience=self.config.early_stopping_patience)
for epoch in range(self.config.max_epochs):
model.train()
for X_batch, Y_batch in train_loader:
optimizer.zero_grad()
pred = model(X_batch)
loss = nn.functional.mse_loss(pred, Y_batch)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
model.eval()
with torch.no_grad():
val_pred = model(X_val_t)
val_loss = nn.functional.mse_loss(val_pred, Y_val_t).item()
if early_stopping(val_loss, model):
break
early_stopping.restore_best(model)
model.eval()
self.models.append(model)
logger.info(f" Model {i+1}/{self.n_models}: val_loss = {early_stopping.best_loss:.6f}")
self.trained = True
logger.info("Ensemble training complete")
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Predict with uncertainty estimation.
Args:
X: Input features [n_samples, input_dim]
Returns:
Tuple of (mean_predictions, std_predictions)
"""
if not self.trained:
raise RuntimeError("Ensemble not trained. Call train() first.")
# Normalize input
X_norm = (X - self.config.input_mean) / self.config.input_std
X_t = torch.tensor(X_norm, dtype=torch.float32, device=self.device)
# Collect predictions from all models
predictions = []
for model in self.models:
model.eval()
with torch.no_grad():
pred = model(X_t).cpu().numpy()
# Denormalize
pred = pred * self.config.output_std + self.config.output_mean
predictions.append(pred)
predictions = np.array(predictions) # [n_models, n_samples, output_dim]
mean_pred = predictions.mean(axis=0)
std_pred = predictions.std(axis=0)
return mean_pred, std_pred
def predict_single(self, params: Dict[str, float], var_names: List[str]) -> Tuple[Dict[str, float], float]:
"""
Predict for a single point with uncertainty.
Args:
params: Dictionary of input parameters
var_names: List of variable names in order
Returns:
Tuple of (predictions dict, total uncertainty)
"""
X = np.array([[params[name] for name in var_names]])
mean, std = self.predict(X)
pred_dict = {f'output_{i}': mean[0, i] for i in range(self.output_dim)}
uncertainty = float(np.sum(std[0]))
return pred_dict, uncertainty
def save(self, path: Path):
"""Save ensemble to disk."""
state = {
'config': {
'hidden_dims': self.config.hidden_dims,
'dropout': self.config.dropout,
'activation': self.config.activation,
'use_batch_norm': self.config.use_batch_norm,
'learning_rate': self.config.learning_rate,
'weight_decay': self.config.weight_decay,
'batch_size': self.config.batch_size,
'input_mean': self.config.input_mean.tolist(),
'input_std': self.config.input_std.tolist(),
'output_mean': self.config.output_mean.tolist(),
'output_std': self.config.output_std.tolist(),
},
'n_models': self.n_models,
'model_states': [m.state_dict() for m in self.models]
}
torch.save(state, path)
logger.info(f"Saved ensemble to {path}")
def load(self, path: Path):
"""Load ensemble from disk."""
state = torch.load(path, map_location=self.device)
# Restore config
cfg = state['config']
self.config = SurrogateConfig(
hidden_dims=cfg['hidden_dims'],
dropout=cfg['dropout'],
activation=cfg['activation'],
use_batch_norm=cfg['use_batch_norm'],
learning_rate=cfg['learning_rate'],
weight_decay=cfg['weight_decay'],
batch_size=cfg['batch_size'],
input_mean=np.array(cfg['input_mean']),
input_std=np.array(cfg['input_std']),
output_mean=np.array(cfg['output_mean']),
output_std=np.array(cfg['output_std'])
)
self.n_models = state['n_models']
self.models = []
for model_state in state['model_states']:
model = TunableMLP(
input_dim=self.input_dim,
output_dim=self.output_dim,
hidden_dims=self.config.hidden_dims,
dropout=self.config.dropout,
activation=self.config.activation,
use_batch_norm=self.config.use_batch_norm
).to(self.device)
model.load_state_dict(model_state)
model.eval()
self.models.append(model)
self.trained = True
logger.info(f"Loaded ensemble with {self.n_models} models from {path}")
def tune_surrogate_for_study(
fea_data: List[Dict],
design_var_names: List[str],
objective_names: List[str],
n_tuning_trials: int = 50,
n_ensemble_models: int = 5
) -> TunedEnsembleSurrogate:
"""
Convenience function to tune and create ensemble surrogate.
Args:
fea_data: List of FEA results with 'params' and 'objectives' keys
design_var_names: List of design variable names
objective_names: List of objective names
n_tuning_trials: Number of Optuna trials
n_ensemble_models: Number of models in ensemble
Returns:
Trained TunedEnsembleSurrogate
"""
# Prepare data
X = np.array([[d['params'][name] for name in design_var_names] for d in fea_data])
Y = np.array([[d['objectives'][name] for name in objective_names] for d in fea_data])
logger.info(f"Tuning surrogate on {len(X)} samples...")
logger.info(f"Input: {len(design_var_names)} design variables")
logger.info(f"Output: {len(objective_names)} objectives")
# Tune hyperparameters
tuner = SurrogateHyperparameterTuner(
input_dim=len(design_var_names),
output_dim=len(objective_names),
n_trials=n_tuning_trials,
n_cv_folds=5
)
best_config = tuner.tune(X, Y, output_names=objective_names)
# Create and train ensemble
ensemble = TunedEnsembleSurrogate(
config=best_config,
input_dim=len(design_var_names),
output_dim=len(objective_names),
n_models=n_ensemble_models
)
ensemble.train(X, Y)
return ensemble

View File

@@ -0,0 +1,385 @@
"""
Training Data Exporter for AtomizerField
This module exports training data from Atomizer optimization runs for AtomizerField neural network training.
It saves NX Nastran input (.dat) and output (.op2) files along with metadata for each trial.
Usage:
from optimization_engine.processors.surrogates.training_data_exporter import create_exporter_from_config
exporter = create_exporter_from_config(config)
if exporter:
exporter.export_trial(trial_number, design_vars, results, simulation_files)
exporter.finalize()
"""
import json
import shutil
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional, List
import logging
logger = logging.getLogger(__name__)
class TrainingDataExporter:
"""
Exports training data for AtomizerField neural network training.
After each FEA solve, saves:
- Input: NX Nastran .dat file (BDF format)
- Output: NX Nastran .op2 file (binary results)
- Metadata: JSON with design parameters, objectives, constraints
Directory structure:
{export_dir}/
├── trial_0001/
│ ├── input/
│ │ └── model.bdf
│ ├── output/
│ │ └── model.op2
│ └── metadata.json
├── trial_0002/
│ └── ...
├── study_summary.json
└── README.md
"""
def __init__(
self,
export_dir: Path,
study_name: str,
design_variable_names: List[str],
objective_names: List[str],
constraint_names: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None
):
"""
Initialize the training data exporter.
Args:
export_dir: Root directory for exported training data
study_name: Name of the optimization study
design_variable_names: List of design variable names
objective_names: List of objective function names
constraint_names: List of constraint names (optional)
metadata: Additional study metadata (optional)
"""
self.export_dir = Path(export_dir)
self.study_name = study_name
self.design_variable_names = design_variable_names
self.objective_names = objective_names
self.constraint_names = constraint_names or []
self.study_metadata = metadata or {}
self.trial_count = 0
self.exported_trials: List[Dict[str, Any]] = []
# Create root export directory
self.export_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Training data exporter initialized: {self.export_dir}")
# Create README
self._create_readme()
def export_trial(
self,
trial_number: int,
design_variables: Dict[str, float],
results: Dict[str, Any],
simulation_files: Dict[str, Path]
) -> bool:
"""
Export training data for a single trial.
Args:
trial_number: Optuna trial number
design_variables: Dict of design parameter names and values
results: Dict containing objectives, constraints, and other results
simulation_files: Dict with paths to 'dat_file' and 'op2_file'
Returns:
True if export successful, False otherwise
"""
try:
# Create trial directory
trial_dir = self.export_dir / f"trial_{trial_number:04d}"
trial_dir.mkdir(parents=True, exist_ok=True)
input_dir = trial_dir / "input"
output_dir = trial_dir / "output"
input_dir.mkdir(exist_ok=True)
output_dir.mkdir(exist_ok=True)
# Copy .dat file (Nastran input deck)
dat_file = simulation_files.get('dat_file')
if dat_file and Path(dat_file).exists():
shutil.copy2(dat_file, input_dir / "model.bdf")
logger.debug(f"Copied .dat file: {dat_file} -> {input_dir / 'model.bdf'}")
else:
logger.warning(f"Trial {trial_number}: .dat file not found at {dat_file}")
return False
# Copy .op2 file (Nastran binary results)
op2_file = simulation_files.get('op2_file')
if op2_file and Path(op2_file).exists():
shutil.copy2(op2_file, output_dir / "model.op2")
logger.debug(f"Copied .op2 file: {op2_file} -> {output_dir / 'model.op2'}")
else:
logger.warning(f"Trial {trial_number}: .op2 file not found at {op2_file}")
return False
# Create metadata.json
metadata = self._create_trial_metadata(
trial_number,
design_variables,
results
)
metadata_path = trial_dir / "metadata.json"
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
logger.info(f"Exported training data for trial {trial_number}")
self.trial_count += 1
self.exported_trials.append(metadata)
return True
except Exception as e:
logger.error(f"Failed to export trial {trial_number}: {e}", exc_info=True)
return False
def _create_trial_metadata(
self,
trial_number: int,
design_variables: Dict[str, float],
results: Dict[str, Any]
) -> Dict[str, Any]:
"""
Create metadata dictionary for a trial.
Args:
trial_number: Optuna trial number
design_variables: Design parameter values
results: Optimization results
Returns:
Metadata dictionary
"""
metadata = {
"trial_number": trial_number,
"timestamp": datetime.now().isoformat(),
"atomizer_study": self.study_name,
"design_parameters": design_variables,
"results": {}
}
# Extract objectives
if "objectives" in results:
metadata["results"]["objectives"] = results["objectives"]
# Extract constraints
if "constraints" in results:
metadata["results"]["constraints"] = results["constraints"]
# Extract any scalar results (max_stress, max_displacement, etc.)
for key in ["max_stress", "max_displacement", "mass", "volume"]:
if key in results:
metadata["results"][key] = results[key]
# Add any additional result fields
for key, value in results.items():
if key not in ["objectives", "constraints"] and isinstance(value, (int, float, str, bool)):
metadata["results"][key] = value
return metadata
def finalize(self) -> None:
"""
Finalize the training data export.
Creates study_summary.json with metadata about the entire study.
"""
try:
summary = {
"study_name": self.study_name,
"total_trials": self.trial_count,
"design_variables": self.design_variable_names,
"objectives": self.objective_names,
"constraints": self.constraint_names,
"export_timestamp": datetime.now().isoformat(),
"metadata": self.study_metadata
}
summary_path = self.export_dir / "study_summary.json"
with open(summary_path, 'w') as f:
json.dump(summary, f, indent=2)
logger.info(f"Training data export finalized: {self.trial_count} trials exported")
logger.info(f"Summary saved to: {summary_path}")
except Exception as e:
logger.error(f"Failed to finalize training data export: {e}", exc_info=True)
def _create_readme(self) -> None:
"""Create README.md explaining the training data structure."""
readme_content = f"""# AtomizerField Training Data
**Study Name**: {self.study_name}
**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
## Directory Structure
```
{self.export_dir.name}/
├── trial_0001/
│ ├── input/
│ │ └── model.bdf # NX Nastran input deck (BDF format)
│ ├── output/
│ │ └── model.op2 # NX Nastran binary results (OP2 format)
│ └── metadata.json # Design parameters, objectives, constraints
├── trial_0002/
│ └── ...
├── study_summary.json # Overall study metadata
└── README.md # This file
```
## Design Variables
{chr(10).join(f'- {name}' for name in self.design_variable_names)}
## Objectives
{chr(10).join(f'- {name}' for name in self.objective_names)}
## Constraints
{chr(10).join(f'- {name}' for name in self.constraint_names) if self.constraint_names else '- None'}
## Usage with AtomizerField
### 1. Parse Training Data
```bash
cd Atomizer-Field
python batch_parser.py --data-dir "{self.export_dir}"
```
This converts BDF/OP2 files to PyTorch Geometric format.
### 2. Validate Parsed Data
```bash
python validate_parsed_data.py
```
### 3. Train Neural Network
```bash
python train.py --data-dir "training_data/parsed/" --epochs 200
```
### 4. Use Trained Model in Atomizer
```bash
cd ../Atomizer
python run_optimization.py --config studies/{self.study_name}/workflow_config.json --use-neural
```
## File Formats
- **BDF (.bdf)**: Nastran Bulk Data File - contains mesh, materials, loads, BCs
- **OP2 (.op2)**: Nastran Output2 - binary results with displacements, stresses, etc.
- **metadata.json**: Human-readable trial metadata
## AtomizerField Documentation
See `Atomizer-Field/docs/` for complete documentation on:
- Neural network architecture
- Training procedures
- Integration with Atomizer
- Uncertainty quantification
---
*Generated by Atomizer Training Data Exporter*
"""
readme_path = self.export_dir / "README.md"
with open(readme_path, 'w', encoding='utf-8') as f:
f.write(readme_content)
logger.debug(f"Created README: {readme_path}")
def create_exporter_from_config(config: Dict[str, Any]) -> Optional[TrainingDataExporter]:
"""
Factory function to create TrainingDataExporter from workflow configuration.
Args:
config: Workflow configuration dictionary
Returns:
TrainingDataExporter instance if enabled, None otherwise
Example config YAML:
training_data_export:
enabled: true
export_dir: "atomizer_field_training_data/beam_study_001"
"""
if not config.get("training_data_export", {}).get("enabled", False):
logger.info("Training data export is disabled")
return None
export_config = config["training_data_export"]
# Get export directory
export_dir = export_config.get("export_dir")
if not export_dir:
logger.error("Training data export enabled but 'export_dir' not specified")
return None
# Get study name
study_name = config.get("study_name", "unnamed_study")
# Get design variable names
design_vars = config.get("design_variables", [])
design_var_names = [dv.get("name", dv.get("parameter", f"var_{i}"))
for i, dv in enumerate(design_vars)]
# Get objective names
objectives = config.get("objectives", [])
objective_names = [obj.get("name", f"obj_{i}")
for i, obj in enumerate(objectives)]
# Get constraint names
constraints = config.get("constraints", [])
constraint_names = [c.get("name", f"constraint_{i}")
for i, c in enumerate(constraints)]
# Additional metadata
metadata = {
"atomizer_version": config.get("version", "unknown"),
"optimization_algorithm": config.get("optimization", {}).get("algorithm", "unknown"),
"n_trials": config.get("optimization", {}).get("n_trials", 0)
}
try:
exporter = TrainingDataExporter(
export_dir=Path(export_dir),
study_name=study_name,
design_variable_names=design_var_names,
objective_names=objective_names,
constraint_names=constraint_names,
metadata=metadata
)
logger.info("Training data exporter created successfully")
return exporter
except Exception as e:
logger.error(f"Failed to create training data exporter: {e}", exc_info=True)
return None