refactor: Major reorganization of optimization_engine module structure
BREAKING CHANGE: Module paths have been reorganized for better maintainability. Backwards compatibility aliases with deprecation warnings are provided. New Structure: - core/ - Optimization runners (runner, intelligent_optimizer, etc.) - processors/ - Data processing - surrogates/ - Neural network surrogates - nx/ - NX/Nastran integration (solver, updater, session_manager) - study/ - Study management (creator, wizard, state, reset) - reporting/ - Reports and analysis (visualizer, report_generator) - config/ - Configuration management (manager, builder) - utils/ - Utilities (logger, auto_doc, etc.) - future/ - Research/experimental code Migration: - ~200 import changes across 125 files - All __init__.py files use lazy loading to avoid circular imports - Backwards compatibility layer supports old import paths with warnings - All existing functionality preserved To migrate existing code: OLD: from optimization_engine.nx_solver import NXSolver NEW: from optimization_engine.nx.solver import NXSolver OLD: from optimization_engine.runner import OptimizationRunner NEW: from optimization_engine.core.runner import OptimizationRunner 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
25
optimization_engine/processors/__init__.py
Normal file
25
optimization_engine/processors/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""
|
||||
Optimization Processors
|
||||
=======================
|
||||
|
||||
Data processing algorithms and ML models.
|
||||
|
||||
Submodules:
|
||||
- surrogates/: Neural network surrogate models
|
||||
- dynamic_response/: Dynamic response processing (random vib, sine sweep)
|
||||
"""
|
||||
|
||||
# Lazy import for surrogates to avoid import errors
|
||||
def __getattr__(name):
|
||||
if name == 'surrogates':
|
||||
from . import surrogates
|
||||
return surrogates
|
||||
elif name == 'AdaptiveCharacterization':
|
||||
from .adaptive_characterization import AdaptiveCharacterization
|
||||
return AdaptiveCharacterization
|
||||
raise AttributeError(f"module 'optimization_engine.processors' has no attribute '{name}'")
|
||||
|
||||
__all__ = [
|
||||
'surrogates',
|
||||
'AdaptiveCharacterization',
|
||||
]
|
||||
415
optimization_engine/processors/adaptive_characterization.py
Normal file
415
optimization_engine/processors/adaptive_characterization.py
Normal file
@@ -0,0 +1,415 @@
|
||||
"""
|
||||
Adaptive Characterization Module - Intelligent stopping for landscape characterization.
|
||||
|
||||
This module implements adaptive stopping criteria for the characterization phase
|
||||
that intelligently determines when enough landscape exploration has been done.
|
||||
|
||||
Simple problems (smooth, unimodal) -> stop early (~10-15 trials)
|
||||
Complex problems (multimodal, rugged) -> continue longer (~20-30 trials)
|
||||
|
||||
Part of Protocol 10: Intelligent Multi-Strategy Optimization (IMSO)
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import optuna
|
||||
from typing import Dict, List, Optional
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@dataclass
|
||||
class LandscapeMetricSnapshot:
|
||||
"""Snapshot of landscape metrics at a given trial."""
|
||||
trial_number: int
|
||||
smoothness: float
|
||||
multimodal: bool
|
||||
n_modes: int
|
||||
noise_level: float
|
||||
landscape_type: str
|
||||
overall_confidence: float
|
||||
|
||||
|
||||
class CharacterizationStoppingCriterion:
|
||||
"""
|
||||
Intelligently determines when characterization phase has gathered enough information.
|
||||
|
||||
Key Features:
|
||||
1. Progressive landscape analysis (every 5 trials starting at trial 10)
|
||||
2. Metric convergence detection (are metrics stabilizing?)
|
||||
3. Complexity-aware sample adequacy (complex problems need more trials)
|
||||
4. Parameter space coverage assessment
|
||||
5. Confidence scoring (combines all factors)
|
||||
|
||||
Stopping Decision:
|
||||
- Simple problems: Stop at ~10-15 trials when metrics converge
|
||||
- Complex problems: Continue to ~20-30 trials for adequate coverage
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
min_trials: int = 10,
|
||||
max_trials: int = 30,
|
||||
confidence_threshold: float = 0.85,
|
||||
check_interval: int = 5,
|
||||
verbose: bool = True,
|
||||
tracking_dir: Optional[Path] = None
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
min_trials: Minimum trials before considering stopping
|
||||
max_trials: Maximum trials (stop even if not converged)
|
||||
confidence_threshold: Confidence needed to stop (0-1)
|
||||
check_interval: How often to check stopping criteria
|
||||
verbose: Print progress reports
|
||||
tracking_dir: Directory to save characterization tracking
|
||||
"""
|
||||
self.min_trials = min_trials
|
||||
self.max_trials = max_trials
|
||||
self.confidence_threshold = confidence_threshold
|
||||
self.check_interval = check_interval
|
||||
self.verbose = verbose
|
||||
self.tracking_dir = tracking_dir
|
||||
|
||||
# Track metric history across analyses
|
||||
self.metric_history: List[LandscapeMetricSnapshot] = []
|
||||
self.should_stop_flag = False
|
||||
self.stop_reason = ""
|
||||
self.final_confidence = 0.0
|
||||
|
||||
# Initialize tracking
|
||||
if tracking_dir:
|
||||
self.tracking_dir = Path(tracking_dir)
|
||||
self.tracking_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.characterization_log = self.tracking_dir / "characterization_progress.json"
|
||||
|
||||
def update(self, landscape: Dict, trial_number: int):
|
||||
"""
|
||||
Update with latest landscape analysis.
|
||||
|
||||
Args:
|
||||
landscape: Landscape analysis dictionary
|
||||
trial_number: Current trial number
|
||||
"""
|
||||
if not landscape.get('ready', False):
|
||||
return
|
||||
|
||||
# Create snapshot
|
||||
snapshot = LandscapeMetricSnapshot(
|
||||
trial_number=trial_number,
|
||||
smoothness=landscape['smoothness'],
|
||||
multimodal=landscape['multimodal'],
|
||||
n_modes=landscape['n_modes'],
|
||||
noise_level=landscape['noise_level'],
|
||||
landscape_type=landscape['landscape_type'],
|
||||
overall_confidence=0.0 # Will be calculated
|
||||
)
|
||||
|
||||
self.metric_history.append(snapshot)
|
||||
|
||||
# Calculate confidence
|
||||
confidence = self._calculate_confidence(landscape, trial_number)
|
||||
snapshot.overall_confidence = confidence
|
||||
|
||||
# Save progress
|
||||
self._save_progress()
|
||||
|
||||
# Print report
|
||||
if self.verbose:
|
||||
self._print_progress_report(trial_number, landscape, confidence)
|
||||
|
||||
# Check stopping criteria
|
||||
if trial_number >= self.min_trials:
|
||||
self._evaluate_stopping_criteria(landscape, trial_number, confidence)
|
||||
|
||||
def should_stop(self, study: optuna.Study) -> bool:
|
||||
"""
|
||||
Check if characterization should stop.
|
||||
|
||||
Args:
|
||||
study: Optuna study
|
||||
|
||||
Returns:
|
||||
True if should stop characterization
|
||||
"""
|
||||
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
|
||||
n_trials = len(completed_trials)
|
||||
|
||||
# Force stop at max trials
|
||||
if n_trials >= self.max_trials:
|
||||
self.should_stop_flag = True
|
||||
self.stop_reason = f"Maximum characterization trials reached ({self.max_trials})"
|
||||
return True
|
||||
|
||||
return self.should_stop_flag
|
||||
|
||||
def _calculate_confidence(self, landscape: Dict, trial_number: int) -> float:
|
||||
"""
|
||||
Calculate confidence score for stopping decision.
|
||||
|
||||
Confidence Components (weighted sum):
|
||||
1. Metric Stability (40%): Are metrics converging?
|
||||
2. Parameter Coverage (30%): Explored enough space?
|
||||
3. Sample Adequacy (20%): Enough samples for complexity?
|
||||
4. Landscape Clarity (10%): Clear classification?
|
||||
"""
|
||||
if trial_number < self.min_trials:
|
||||
return 0.0
|
||||
|
||||
# 1. Metric Stability Score
|
||||
stability_score = self._compute_metric_stability()
|
||||
|
||||
# 2. Parameter Coverage Score
|
||||
coverage_score = self._compute_parameter_coverage(landscape)
|
||||
|
||||
# 3. Sample Adequacy Score
|
||||
adequacy_score = self._compute_sample_adequacy(landscape, trial_number)
|
||||
|
||||
# 4. Landscape Clarity Score
|
||||
clarity_score = self._compute_landscape_clarity(landscape)
|
||||
|
||||
# Weighted confidence
|
||||
confidence = (
|
||||
0.40 * stability_score +
|
||||
0.30 * coverage_score +
|
||||
0.20 * adequacy_score +
|
||||
0.10 * clarity_score
|
||||
)
|
||||
|
||||
return confidence
|
||||
|
||||
def _compute_metric_stability(self) -> float:
|
||||
"""
|
||||
Compute how stable landscape metrics are.
|
||||
|
||||
High stability = metrics have converged (good for stopping)
|
||||
Low stability = metrics still changing (need more trials)
|
||||
"""
|
||||
if len(self.metric_history) < 3:
|
||||
return 0.0
|
||||
|
||||
# Look at last 3 analyses
|
||||
recent_snapshots = self.metric_history[-3:]
|
||||
|
||||
# Check smoothness stability
|
||||
smoothness_values = [s.smoothness for s in recent_snapshots]
|
||||
smoothness_std = np.std(smoothness_values)
|
||||
smoothness_stable = smoothness_std < 0.05 # Stable if std < 0.05
|
||||
|
||||
# Check noise stability
|
||||
noise_values = [s.noise_level for s in recent_snapshots]
|
||||
noise_std = np.std(noise_values)
|
||||
noise_stable = noise_std < 0.1 # Stable if std < 0.1
|
||||
|
||||
# Check landscape type consistency
|
||||
landscape_types = [s.landscape_type for s in recent_snapshots]
|
||||
type_consistent = len(set(landscape_types)) == 1 # All same type
|
||||
|
||||
# Check n_modes stability
|
||||
n_modes = [s.n_modes for s in recent_snapshots]
|
||||
modes_consistent = len(set(n_modes)) <= 1 # Same or ±1
|
||||
|
||||
# Combine stability indicators
|
||||
stability_indicators = [
|
||||
1.0 if smoothness_stable else 0.0,
|
||||
1.0 if noise_stable else 0.0,
|
||||
1.0 if type_consistent else 0.0,
|
||||
1.0 if modes_consistent else 0.0
|
||||
]
|
||||
|
||||
stability_score = np.mean(stability_indicators)
|
||||
return stability_score
|
||||
|
||||
def _compute_parameter_coverage(self, landscape: Dict) -> float:
|
||||
"""
|
||||
Compute how well parameter space has been explored.
|
||||
|
||||
High coverage = explored wide range of each parameter
|
||||
"""
|
||||
param_ranges = landscape.get('parameter_ranges', {})
|
||||
|
||||
if not param_ranges:
|
||||
return 0.5 # Unknown
|
||||
|
||||
coverage_scores = []
|
||||
for param, ranges in param_ranges.items():
|
||||
coverage = ranges['coverage'] # Already computed in landscape analyzer
|
||||
coverage_scores.append(coverage)
|
||||
|
||||
avg_coverage = np.mean(coverage_scores)
|
||||
|
||||
# Normalize: 50% coverage = 0.5 score, 100% coverage = 1.0 score
|
||||
coverage_score = min(1.0, avg_coverage / 0.5)
|
||||
|
||||
return coverage_score
|
||||
|
||||
def _compute_sample_adequacy(self, landscape: Dict, trial_number: int) -> float:
|
||||
"""
|
||||
Compute if we have enough samples for the detected complexity.
|
||||
|
||||
Simple problems: 10 trials sufficient
|
||||
Complex problems: 20-30 trials needed
|
||||
"""
|
||||
dimensionality = landscape.get('dimensionality', 2)
|
||||
multimodal = landscape.get('multimodal', False)
|
||||
n_modes = landscape.get('n_modes', 1)
|
||||
|
||||
# Calculate required samples based on complexity
|
||||
if multimodal and n_modes > 2:
|
||||
# Complex multimodal: need more samples
|
||||
required_samples = 10 + 5 * n_modes + 2 * dimensionality
|
||||
elif multimodal:
|
||||
# Simple multimodal: moderate samples
|
||||
required_samples = 15 + 2 * dimensionality
|
||||
else:
|
||||
# Unimodal: fewer samples needed
|
||||
required_samples = 10 + dimensionality
|
||||
|
||||
# Cap at max_trials
|
||||
required_samples = min(required_samples, self.max_trials)
|
||||
|
||||
# Score based on how many samples we have vs required
|
||||
adequacy_score = min(1.0, trial_number / required_samples)
|
||||
|
||||
return adequacy_score
|
||||
|
||||
def _compute_landscape_clarity(self, landscape: Dict) -> float:
|
||||
"""
|
||||
Compute how clearly we can classify the landscape.
|
||||
|
||||
Clear classification = high confidence in landscape type
|
||||
"""
|
||||
smoothness = landscape.get('smoothness', 0.5)
|
||||
noise_level = landscape.get('noise_level', 0.5)
|
||||
|
||||
# Clear cases:
|
||||
# - Very smooth (> 0.7) or very rugged (< 0.3)
|
||||
# - Low noise (< 0.3) or high noise (> 0.7)
|
||||
|
||||
smoothness_clarity = max(
|
||||
abs(smoothness - 0.7), # Distance from smooth threshold
|
||||
abs(smoothness - 0.3) # Distance from rugged threshold
|
||||
)
|
||||
|
||||
noise_clarity = max(
|
||||
abs(noise_level - 0.3), # Distance from low noise threshold
|
||||
abs(noise_level - 0.7) # Distance from high noise threshold
|
||||
)
|
||||
|
||||
# Normalize to 0-1
|
||||
clarity_score = min(1.0, (smoothness_clarity + noise_clarity) / 0.8)
|
||||
|
||||
return clarity_score
|
||||
|
||||
def _evaluate_stopping_criteria(self, landscape: Dict, trial_number: int, confidence: float):
|
||||
"""
|
||||
Evaluate if we should stop characterization.
|
||||
|
||||
Stop if:
|
||||
1. Confidence threshold met
|
||||
2. OR maximum trials reached
|
||||
"""
|
||||
if confidence >= self.confidence_threshold:
|
||||
self.should_stop_flag = True
|
||||
self.stop_reason = f"Characterization confidence threshold met ({confidence:.1%})"
|
||||
self.final_confidence = confidence
|
||||
|
||||
if self.verbose:
|
||||
print(f"\n{'='*70}")
|
||||
print(f" CHARACTERIZATION COMPLETE")
|
||||
print(f"{'='*70}")
|
||||
print(f" Trial #{trial_number}")
|
||||
print(f" Confidence: {confidence:.1%}")
|
||||
print(f" Landscape Type: {landscape['landscape_type'].upper()}")
|
||||
print(f" Ready for strategy selection")
|
||||
print(f"{'='*70}\n")
|
||||
|
||||
def _print_progress_report(self, trial_number: int, landscape: Dict, confidence: float):
|
||||
"""Print characterization progress report."""
|
||||
print(f"\n{'='*70}")
|
||||
print(f" CHARACTERIZATION PROGRESS - Trial #{trial_number}")
|
||||
print(f"{'='*70}")
|
||||
print(f" Landscape Type: {landscape['landscape_type']}")
|
||||
print(f" Smoothness: {landscape['smoothness']:.2f}")
|
||||
print(f" Multimodal: {'YES' if landscape['multimodal'] else 'NO'} ({landscape['n_modes']} modes)")
|
||||
print(f" Noise: {landscape['noise_level']:.2f}")
|
||||
print(f" Characterization Confidence: {confidence:.1%}")
|
||||
|
||||
if confidence >= self.confidence_threshold:
|
||||
print(f" Status: READY TO STOP (confidence >= {self.confidence_threshold:.0%})")
|
||||
else:
|
||||
remaining = self.confidence_threshold - confidence
|
||||
print(f" Status: CONTINUE (need +{remaining:.1%} confidence)")
|
||||
|
||||
print(f"{'='*70}\n")
|
||||
|
||||
def _save_progress(self):
|
||||
"""Save characterization progress to JSON."""
|
||||
if not self.tracking_dir:
|
||||
return
|
||||
|
||||
progress_data = {
|
||||
'min_trials': self.min_trials,
|
||||
'max_trials': self.max_trials,
|
||||
'confidence_threshold': self.confidence_threshold,
|
||||
'metric_history': [
|
||||
{
|
||||
'trial_number': s.trial_number,
|
||||
'smoothness': s.smoothness,
|
||||
'multimodal': s.multimodal,
|
||||
'n_modes': s.n_modes,
|
||||
'noise_level': s.noise_level,
|
||||
'landscape_type': s.landscape_type,
|
||||
'confidence': s.overall_confidence
|
||||
}
|
||||
for s in self.metric_history
|
||||
],
|
||||
'should_stop': self.should_stop_flag,
|
||||
'stop_reason': self.stop_reason,
|
||||
'final_confidence': self.final_confidence,
|
||||
'timestamp': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
try:
|
||||
with open(self.characterization_log, 'w') as f:
|
||||
json.dump(progress_data, f, indent=2)
|
||||
except Exception as e:
|
||||
if self.verbose:
|
||||
print(f" Warning: Failed to save characterization progress: {e}")
|
||||
|
||||
def get_summary_report(self) -> str:
|
||||
"""Generate summary report of characterization phase."""
|
||||
if not self.metric_history:
|
||||
return "No characterization data available"
|
||||
|
||||
final_snapshot = self.metric_history[-1]
|
||||
|
||||
report = "\n" + "="*70 + "\n"
|
||||
report += " CHARACTERIZATION PHASE SUMMARY\n"
|
||||
report += "="*70 + "\n"
|
||||
report += f" Total Trials: {final_snapshot.trial_number}\n"
|
||||
report += f" Final Confidence: {final_snapshot.overall_confidence:.1%}\n"
|
||||
report += f" Stop Reason: {self.stop_reason}\n"
|
||||
report += f"\n FINAL LANDSCAPE CLASSIFICATION:\n"
|
||||
report += f" Type: {final_snapshot.landscape_type.upper()}\n"
|
||||
report += f" Smoothness: {final_snapshot.smoothness:.2f}\n"
|
||||
report += f" Multimodal: {'YES' if final_snapshot.multimodal else 'NO'} ({final_snapshot.n_modes} modes)\n"
|
||||
report += f" Noise Level: {final_snapshot.noise_level:.2f}\n"
|
||||
|
||||
if len(self.metric_history) >= 2:
|
||||
report += f"\n METRIC CONVERGENCE:\n"
|
||||
|
||||
# Show how metrics evolved
|
||||
first = self.metric_history[0]
|
||||
last = self.metric_history[-1]
|
||||
|
||||
smoothness_change = abs(last.smoothness - first.smoothness)
|
||||
report += f" Smoothness stability: {smoothness_change:.3f} (lower = more stable)\n"
|
||||
|
||||
type_changes = len(set(s.landscape_type for s in self.metric_history))
|
||||
report += f" Landscape type changes: {type_changes - 1}\n"
|
||||
|
||||
report += "="*70 + "\n"
|
||||
|
||||
return report
|
||||
79
optimization_engine/processors/surrogates/__init__.py
Normal file
79
optimization_engine/processors/surrogates/__init__.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""
|
||||
Surrogate Models
|
||||
================
|
||||
|
||||
Neural network and ML surrogate models for FEA acceleration.
|
||||
|
||||
Available modules:
|
||||
- neural_surrogate: AtomizerField neural network surrogate
|
||||
- generic_surrogate: Flexible surrogate interface
|
||||
- adaptive_surrogate: Self-improving surrogate
|
||||
- simple_mlp_surrogate: Simple multi-layer perceptron
|
||||
- active_learning_surrogate: Active learning surrogate
|
||||
- surrogate_tuner: Hyperparameter tuning
|
||||
- auto_trainer: Automatic model training
|
||||
- training_data_exporter: Export training data from studies
|
||||
|
||||
Note: Imports are done on-demand to avoid import errors from optional dependencies.
|
||||
"""
|
||||
|
||||
# Lazy imports to avoid circular dependencies and optional dependency issues
|
||||
def __getattr__(name):
|
||||
"""Lazy import mechanism for surrogate modules."""
|
||||
if name == 'NeuralSurrogate':
|
||||
from .neural_surrogate import NeuralSurrogate
|
||||
return NeuralSurrogate
|
||||
elif name == 'create_surrogate_for_study':
|
||||
from .neural_surrogate import create_surrogate_for_study
|
||||
return create_surrogate_for_study
|
||||
elif name == 'GenericSurrogate':
|
||||
from .generic_surrogate import GenericSurrogate
|
||||
return GenericSurrogate
|
||||
elif name == 'ConfigDrivenSurrogate':
|
||||
from .generic_surrogate import ConfigDrivenSurrogate
|
||||
return ConfigDrivenSurrogate
|
||||
elif name == 'create_surrogate':
|
||||
from .generic_surrogate import create_surrogate
|
||||
return create_surrogate
|
||||
elif name == 'AdaptiveSurrogate':
|
||||
from .adaptive_surrogate import AdaptiveSurrogate
|
||||
return AdaptiveSurrogate
|
||||
elif name == 'SimpleSurrogate':
|
||||
from .simple_mlp_surrogate import SimpleSurrogate
|
||||
return SimpleSurrogate
|
||||
elif name == 'ActiveLearningSurrogate':
|
||||
from .active_learning_surrogate import ActiveLearningSurrogate
|
||||
return ActiveLearningSurrogate
|
||||
elif name == 'SurrogateHyperparameterTuner':
|
||||
from .surrogate_tuner import SurrogateHyperparameterTuner
|
||||
return SurrogateHyperparameterTuner
|
||||
elif name == 'tune_surrogate_for_study':
|
||||
from .surrogate_tuner import tune_surrogate_for_study
|
||||
return tune_surrogate_for_study
|
||||
elif name == 'AutoTrainer':
|
||||
from .auto_trainer import AutoTrainer
|
||||
return AutoTrainer
|
||||
elif name == 'TrainingDataExporter':
|
||||
from .training_data_exporter import TrainingDataExporter
|
||||
return TrainingDataExporter
|
||||
elif name == 'create_exporter_from_config':
|
||||
from .training_data_exporter import create_exporter_from_config
|
||||
return create_exporter_from_config
|
||||
|
||||
raise AttributeError(f"module 'optimization_engine.processors.surrogates' has no attribute '{name}'")
|
||||
|
||||
__all__ = [
|
||||
'NeuralSurrogate',
|
||||
'create_surrogate_for_study',
|
||||
'GenericSurrogate',
|
||||
'ConfigDrivenSurrogate',
|
||||
'create_surrogate',
|
||||
'AdaptiveSurrogate',
|
||||
'SimpleSurrogate',
|
||||
'ActiveLearningSurrogate',
|
||||
'SurrogateHyperparameterTuner',
|
||||
'tune_surrogate_for_study',
|
||||
'AutoTrainer',
|
||||
'TrainingDataExporter',
|
||||
'create_exporter_from_config',
|
||||
]
|
||||
@@ -0,0 +1,591 @@
|
||||
"""
|
||||
Active Learning Surrogate with Uncertainty Estimation
|
||||
|
||||
This module implements an ensemble-based neural network surrogate that:
|
||||
1. Provides uncertainty estimates via ensemble disagreement
|
||||
2. Supports active learning for strategic FEA validation
|
||||
3. Tracks confidence and knows when predictions are reliable
|
||||
|
||||
Key Concept:
|
||||
- Train multiple NNs (ensemble) on slightly different data (bootstrap)
|
||||
- Uncertainty = disagreement between ensemble members
|
||||
- High uncertainty regions need FEA validation
|
||||
- Low uncertainty + good accuracy = ready for optimization
|
||||
"""
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EnsembleMLP(nn.Module):
|
||||
"""Single MLP member of the ensemble."""
|
||||
|
||||
def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = [64, 64, 32]):
|
||||
super().__init__()
|
||||
|
||||
layers = []
|
||||
prev_dim = input_dim
|
||||
for hidden_dim in hidden_dims:
|
||||
layers.extend([
|
||||
nn.Linear(prev_dim, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.1)
|
||||
])
|
||||
prev_dim = hidden_dim
|
||||
layers.append(nn.Linear(prev_dim, output_dim))
|
||||
|
||||
self.network = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
return self.network(x)
|
||||
|
||||
|
||||
class ActiveLearningSurrogate:
|
||||
"""
|
||||
Ensemble-based surrogate with uncertainty estimation for active learning.
|
||||
|
||||
Strategy:
|
||||
1. Use ensemble of 5-10 neural networks
|
||||
2. Each trained on bootstrap sample of data
|
||||
3. Uncertainty = std dev of predictions across ensemble
|
||||
4. Select high-uncertainty designs for FEA validation
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
n_ensemble: int = 5,
|
||||
hidden_dims: List[int] = [64, 64, 32],
|
||||
device: str = 'cpu'
|
||||
):
|
||||
self.n_ensemble = n_ensemble
|
||||
self.hidden_dims = hidden_dims
|
||||
self.device = device
|
||||
|
||||
self.models: List[EnsembleMLP] = []
|
||||
self.design_var_names: List[str] = []
|
||||
self.objective_names: List[str] = ['mass', 'frequency', 'max_displacement', 'max_stress']
|
||||
|
||||
# Normalization parameters
|
||||
self.input_mean = None
|
||||
self.input_std = None
|
||||
self.output_mean = None
|
||||
self.output_std = None
|
||||
|
||||
# Training history for each ensemble member
|
||||
self.training_history = []
|
||||
|
||||
# Confidence tracking
|
||||
self.validation_errors = [] # Track FEA validation errors
|
||||
self.confidence_score = 0.0
|
||||
|
||||
def _normalize_input(self, x: np.ndarray) -> torch.Tensor:
|
||||
"""Normalize input features."""
|
||||
x_norm = (x - self.input_mean) / (self.input_std + 1e-8)
|
||||
return torch.FloatTensor(x_norm).to(self.device)
|
||||
|
||||
def _denormalize_output(self, y: torch.Tensor) -> np.ndarray:
|
||||
"""Denormalize output predictions."""
|
||||
y_np = y.cpu().numpy()
|
||||
return y_np * (self.output_std + 1e-8) + self.output_mean
|
||||
|
||||
def train(
|
||||
self,
|
||||
design_params: np.ndarray,
|
||||
objectives: np.ndarray,
|
||||
design_var_names: List[str],
|
||||
epochs: int = 200,
|
||||
lr: float = 0.001,
|
||||
batch_size: int = 32,
|
||||
val_split: float = 0.2
|
||||
):
|
||||
"""
|
||||
Train ensemble on the data with bootstrap sampling.
|
||||
|
||||
Args:
|
||||
design_params: (N, D) array of design parameters
|
||||
objectives: (N, O) array of objective values
|
||||
design_var_names: Names of design variables
|
||||
epochs: Training epochs per ensemble member
|
||||
lr: Learning rate
|
||||
batch_size: Batch size
|
||||
val_split: Validation split ratio
|
||||
"""
|
||||
self.design_var_names = design_var_names
|
||||
n_samples = len(design_params)
|
||||
input_dim = design_params.shape[1]
|
||||
output_dim = objectives.shape[1]
|
||||
|
||||
# Compute normalization parameters from full dataset
|
||||
self.input_mean = design_params.mean(axis=0)
|
||||
self.input_std = design_params.std(axis=0)
|
||||
self.output_mean = objectives.mean(axis=0)
|
||||
self.output_std = objectives.std(axis=0)
|
||||
|
||||
# Train each ensemble member on bootstrap sample
|
||||
self.models = []
|
||||
self.training_history = []
|
||||
|
||||
for i in range(self.n_ensemble):
|
||||
logger.info(f"Training ensemble member {i+1}/{self.n_ensemble}")
|
||||
|
||||
# Bootstrap sampling (sample with replacement)
|
||||
bootstrap_idx = np.random.choice(n_samples, size=n_samples, replace=True)
|
||||
X_boot = design_params[bootstrap_idx]
|
||||
y_boot = objectives[bootstrap_idx]
|
||||
|
||||
# Split into train/val
|
||||
n_val = int(len(X_boot) * val_split)
|
||||
indices = np.random.permutation(len(X_boot))
|
||||
train_idx, val_idx = indices[n_val:], indices[:n_val]
|
||||
|
||||
X_train = self._normalize_input(X_boot[train_idx])
|
||||
y_train = torch.FloatTensor((y_boot[train_idx] - self.output_mean) / (self.output_std + 1e-8)).to(self.device)
|
||||
X_val = self._normalize_input(X_boot[val_idx])
|
||||
y_val = torch.FloatTensor((y_boot[val_idx] - self.output_mean) / (self.output_std + 1e-8)).to(self.device)
|
||||
|
||||
# Create and train model
|
||||
model = EnsembleMLP(input_dim, output_dim, self.hidden_dims).to(self.device)
|
||||
optimizer = optim.Adam(model.parameters(), lr=lr)
|
||||
criterion = nn.MSELoss()
|
||||
|
||||
best_val_loss = float('inf')
|
||||
patience_counter = 0
|
||||
best_state = None
|
||||
|
||||
for epoch in range(epochs):
|
||||
model.train()
|
||||
|
||||
# Mini-batch training
|
||||
perm = torch.randperm(len(X_train))
|
||||
epoch_loss = 0.0
|
||||
n_batches = 0
|
||||
|
||||
for j in range(0, len(X_train), batch_size):
|
||||
batch_idx = perm[j:j+batch_size]
|
||||
X_batch = X_train[batch_idx]
|
||||
y_batch = y_train[batch_idx]
|
||||
|
||||
optimizer.zero_grad()
|
||||
pred = model(X_batch)
|
||||
loss = criterion(pred, y_batch)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
epoch_loss += loss.item()
|
||||
n_batches += 1
|
||||
|
||||
# Validation
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
val_pred = model(X_val)
|
||||
val_loss = criterion(val_pred, y_val).item()
|
||||
|
||||
# Early stopping
|
||||
if val_loss < best_val_loss:
|
||||
best_val_loss = val_loss
|
||||
best_state = model.state_dict().copy()
|
||||
patience_counter = 0
|
||||
else:
|
||||
patience_counter += 1
|
||||
if patience_counter >= 20:
|
||||
break
|
||||
|
||||
# Restore best model
|
||||
if best_state is not None:
|
||||
model.load_state_dict(best_state)
|
||||
|
||||
self.models.append(model)
|
||||
self.training_history.append({
|
||||
'member': i,
|
||||
'best_val_loss': best_val_loss,
|
||||
'epochs_trained': epoch + 1
|
||||
})
|
||||
|
||||
logger.info(f" Member {i+1}: val_loss={best_val_loss:.6f}, epochs={epoch+1}")
|
||||
|
||||
def predict(self, params: Dict[str, float]) -> Dict[str, float]:
|
||||
"""
|
||||
Predict objectives for a single design.
|
||||
|
||||
Returns dict with predictions and uncertainty estimates.
|
||||
"""
|
||||
# Convert to array
|
||||
x = np.array([[params.get(name, 0.0) for name in self.design_var_names]], dtype=np.float32)
|
||||
|
||||
# Get predictions from all ensemble members
|
||||
predictions = []
|
||||
for model in self.models:
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
x_norm = self._normalize_input(x)
|
||||
pred_norm = model(x_norm)
|
||||
pred = self._denormalize_output(pred_norm)
|
||||
predictions.append(pred[0])
|
||||
|
||||
predictions = np.array(predictions) # (n_ensemble, n_objectives)
|
||||
|
||||
# Mean prediction and uncertainty (std dev)
|
||||
mean_pred = predictions.mean(axis=0)
|
||||
std_pred = predictions.std(axis=0)
|
||||
|
||||
result = {}
|
||||
for i, name in enumerate(self.objective_names):
|
||||
result[name] = float(mean_pred[i])
|
||||
result[f'{name}_uncertainty'] = float(std_pred[i])
|
||||
|
||||
# Overall uncertainty score (normalized)
|
||||
result['total_uncertainty'] = float(np.mean(std_pred / (self.output_std + 1e-8)))
|
||||
|
||||
return result
|
||||
|
||||
def predict_batch(self, params_list: List[Dict[str, float]]) -> List[Dict[str, float]]:
|
||||
"""Predict for multiple designs efficiently."""
|
||||
return [self.predict(p) for p in params_list]
|
||||
|
||||
def select_designs_for_validation(
|
||||
self,
|
||||
candidate_designs: List[Dict[str, float]],
|
||||
n_select: int = 5,
|
||||
strategy: str = 'uncertainty'
|
||||
) -> List[Tuple[int, Dict[str, float], float]]:
|
||||
"""
|
||||
Select designs that should be validated with FEA.
|
||||
|
||||
Strategies:
|
||||
- 'uncertainty': Select highest uncertainty designs
|
||||
- 'pareto_uncertainty': Select from Pareto front with high uncertainty
|
||||
- 'diverse': Select diverse designs with moderate uncertainty
|
||||
|
||||
Returns: List of (index, params, uncertainty_score)
|
||||
"""
|
||||
# Get predictions with uncertainty
|
||||
predictions = self.predict_batch(candidate_designs)
|
||||
|
||||
# Score each design
|
||||
scored = []
|
||||
for i, (design, pred) in enumerate(zip(candidate_designs, predictions)):
|
||||
uncertainty = pred['total_uncertainty']
|
||||
scored.append((i, design, pred, uncertainty))
|
||||
|
||||
if strategy == 'uncertainty':
|
||||
# Simply select highest uncertainty
|
||||
scored.sort(key=lambda x: x[3], reverse=True)
|
||||
|
||||
elif strategy == 'pareto_uncertainty':
|
||||
# Prefer Pareto-optimal designs with uncertainty
|
||||
# Simple proxy: designs with low mass and high frequency predictions
|
||||
for item in scored:
|
||||
pred = item[2]
|
||||
# Bonus for potentially good designs
|
||||
pareto_score = -pred['mass'] / 1000 + pred['frequency'] / 10
|
||||
# Combined score: uncertainty * pareto_potential
|
||||
item = (item[0], item[1], item[2], item[3] * (1 + 0.5 * pareto_score))
|
||||
scored.sort(key=lambda x: x[3], reverse=True)
|
||||
|
||||
elif strategy == 'diverse':
|
||||
# Select diverse designs using simple greedy selection
|
||||
selected = []
|
||||
remaining = scored.copy()
|
||||
|
||||
# First, select highest uncertainty
|
||||
remaining.sort(key=lambda x: x[3], reverse=True)
|
||||
selected.append(remaining.pop(0))
|
||||
|
||||
while len(selected) < n_select and remaining:
|
||||
# Find design most different from selected ones
|
||||
best_idx = 0
|
||||
best_min_dist = 0
|
||||
|
||||
for i, item in enumerate(remaining):
|
||||
design = item[1]
|
||||
min_dist = float('inf')
|
||||
for sel_item in selected:
|
||||
sel_design = sel_item[1]
|
||||
dist = sum((design.get(k, 0) - sel_design.get(k, 0))**2
|
||||
for k in self.design_var_names)
|
||||
min_dist = min(min_dist, dist)
|
||||
|
||||
# Weight by uncertainty too
|
||||
weighted_dist = min_dist * (1 + item[3])
|
||||
if weighted_dist > best_min_dist:
|
||||
best_min_dist = weighted_dist
|
||||
best_idx = i
|
||||
|
||||
selected.append(remaining.pop(best_idx))
|
||||
|
||||
return [(s[0], s[1], s[3]) for s in selected]
|
||||
|
||||
return [(s[0], s[1], s[3]) for s in scored[:n_select]]
|
||||
|
||||
def update_with_validation(
|
||||
self,
|
||||
validated_designs: List[Dict[str, float]],
|
||||
fea_results: List[Dict[str, float]]
|
||||
):
|
||||
"""
|
||||
Update validation error tracking with new FEA results.
|
||||
|
||||
This doesn't retrain the model, just tracks prediction accuracy.
|
||||
"""
|
||||
for design, fea_result in zip(validated_designs, fea_results):
|
||||
pred = self.predict(design)
|
||||
|
||||
errors = {}
|
||||
for name in ['mass', 'frequency']:
|
||||
if name in fea_result:
|
||||
pred_val = pred[name]
|
||||
actual_val = fea_result[name]
|
||||
error = abs(pred_val - actual_val) / (abs(actual_val) + 1e-8)
|
||||
errors[name] = error
|
||||
|
||||
self.validation_errors.append({
|
||||
'design': design,
|
||||
'predicted': {k: pred[k] for k in self.objective_names},
|
||||
'actual': fea_result,
|
||||
'errors': errors,
|
||||
'uncertainty': pred['total_uncertainty']
|
||||
})
|
||||
|
||||
# Update confidence score
|
||||
self._update_confidence()
|
||||
|
||||
def _update_confidence(self):
|
||||
"""Calculate overall confidence score based on validation history."""
|
||||
if not self.validation_errors:
|
||||
self.confidence_score = 0.0
|
||||
return
|
||||
|
||||
recent_errors = self.validation_errors[-20:] # Last 20 validations
|
||||
|
||||
mass_errors = [e['errors'].get('mass', 1.0) for e in recent_errors]
|
||||
freq_errors = [e['errors'].get('frequency', 1.0) for e in recent_errors]
|
||||
|
||||
# Confidence based on MAPE < 10%
|
||||
mass_conf = sum(1 for e in mass_errors if e < 0.10) / len(mass_errors)
|
||||
freq_conf = sum(1 for e in freq_errors if e < 0.10) / len(freq_errors)
|
||||
|
||||
# Combined confidence (frequency is harder, weight less)
|
||||
self.confidence_score = 0.6 * mass_conf + 0.4 * freq_conf
|
||||
|
||||
def get_confidence_report(self) -> Dict:
|
||||
"""Get detailed confidence metrics."""
|
||||
if not self.validation_errors:
|
||||
return {
|
||||
'confidence_score': 0.0,
|
||||
'n_validations': 0,
|
||||
'status': 'NO_DATA',
|
||||
'recommendation': 'Need FEA validation data'
|
||||
}
|
||||
|
||||
recent = self.validation_errors[-20:]
|
||||
|
||||
mass_mape = np.mean([e['errors'].get('mass', 1.0) for e in recent]) * 100
|
||||
freq_mape = np.mean([e['errors'].get('frequency', 1.0) for e in recent]) * 100
|
||||
|
||||
# Correlation between uncertainty and error
|
||||
uncertainties = [e['uncertainty'] for e in recent]
|
||||
total_errors = [np.mean(list(e['errors'].values())) for e in recent]
|
||||
|
||||
if len(set(uncertainties)) > 1 and len(set(total_errors)) > 1:
|
||||
correlation = np.corrcoef(uncertainties, total_errors)[0, 1]
|
||||
else:
|
||||
correlation = 0.0
|
||||
|
||||
# Determine status
|
||||
if self.confidence_score >= 0.8 and mass_mape < 5 and freq_mape < 15:
|
||||
status = 'HIGH_CONFIDENCE'
|
||||
recommendation = 'NN ready for optimization'
|
||||
elif self.confidence_score >= 0.5:
|
||||
status = 'MEDIUM_CONFIDENCE'
|
||||
recommendation = 'Continue targeted FEA validation in high-uncertainty regions'
|
||||
else:
|
||||
status = 'LOW_CONFIDENCE'
|
||||
recommendation = 'Need more FEA training data, especially in unexplored regions'
|
||||
|
||||
return {
|
||||
'confidence_score': self.confidence_score,
|
||||
'n_validations': len(self.validation_errors),
|
||||
'mass_mape': mass_mape,
|
||||
'freq_mape': freq_mape,
|
||||
'uncertainty_error_correlation': correlation,
|
||||
'status': status,
|
||||
'recommendation': recommendation
|
||||
}
|
||||
|
||||
def is_ready_for_optimization(self, threshold: float = 0.7) -> bool:
|
||||
"""Check if NN is confident enough for optimization."""
|
||||
return self.confidence_score >= threshold
|
||||
|
||||
def save(self, path: str):
|
||||
"""Save the ensemble model."""
|
||||
path = Path(path)
|
||||
|
||||
state = {
|
||||
'n_ensemble': self.n_ensemble,
|
||||
'hidden_dims': self.hidden_dims,
|
||||
'design_var_names': self.design_var_names,
|
||||
'objective_names': self.objective_names,
|
||||
'input_mean': self.input_mean.tolist() if self.input_mean is not None else None,
|
||||
'input_std': self.input_std.tolist() if self.input_std is not None else None,
|
||||
'output_mean': self.output_mean.tolist() if self.output_mean is not None else None,
|
||||
'output_std': self.output_std.tolist() if self.output_std is not None else None,
|
||||
'validation_errors': self.validation_errors,
|
||||
'confidence_score': self.confidence_score,
|
||||
'training_history': self.training_history,
|
||||
'models': [m.state_dict() for m in self.models]
|
||||
}
|
||||
|
||||
torch.save(state, path)
|
||||
logger.info(f"Saved ensemble surrogate to {path}")
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: str) -> 'ActiveLearningSurrogate':
|
||||
"""Load the ensemble model."""
|
||||
path = Path(path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Model not found: {path}")
|
||||
|
||||
state = torch.load(path, map_location='cpu')
|
||||
|
||||
surrogate = cls(
|
||||
n_ensemble=state['n_ensemble'],
|
||||
hidden_dims=state['hidden_dims']
|
||||
)
|
||||
surrogate.design_var_names = state['design_var_names']
|
||||
surrogate.objective_names = state['objective_names']
|
||||
surrogate.input_mean = np.array(state['input_mean']) if state['input_mean'] else None
|
||||
surrogate.input_std = np.array(state['input_std']) if state['input_std'] else None
|
||||
surrogate.output_mean = np.array(state['output_mean']) if state['output_mean'] else None
|
||||
surrogate.output_std = np.array(state['output_std']) if state['output_std'] else None
|
||||
surrogate.validation_errors = state.get('validation_errors', [])
|
||||
surrogate.confidence_score = state.get('confidence_score', 0.0)
|
||||
surrogate.training_history = state.get('training_history', [])
|
||||
|
||||
# Reconstruct models
|
||||
input_dim = len(surrogate.design_var_names)
|
||||
output_dim = len(surrogate.objective_names)
|
||||
|
||||
for model_state in state['models']:
|
||||
model = EnsembleMLP(input_dim, output_dim, surrogate.hidden_dims)
|
||||
model.load_state_dict(model_state)
|
||||
surrogate.models.append(model)
|
||||
|
||||
logger.info(f"Loaded ensemble surrogate from {path}")
|
||||
return surrogate
|
||||
|
||||
|
||||
def extract_training_data_from_study(db_path: str, study_name: str):
|
||||
"""Extract training data from Optuna study database."""
|
||||
import optuna
|
||||
|
||||
storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}")
|
||||
study = optuna.load_study(study_name=study_name, storage=storage)
|
||||
|
||||
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
|
||||
|
||||
if not completed_trials:
|
||||
raise ValueError("No completed trials found")
|
||||
|
||||
# Infer design variable names
|
||||
design_var_names = list(completed_trials[0].params.keys())
|
||||
|
||||
design_params_list = []
|
||||
objectives_list = []
|
||||
|
||||
for trial in completed_trials:
|
||||
if len(trial.values) < 2:
|
||||
continue
|
||||
|
||||
mass = trial.values[0]
|
||||
raw_freq = trial.values[1]
|
||||
frequency = -raw_freq if raw_freq < 0 else raw_freq
|
||||
|
||||
max_disp = trial.user_attrs.get('max_displacement', 0.0)
|
||||
max_stress = trial.user_attrs.get('max_stress', 0.0)
|
||||
|
||||
# Skip invalid
|
||||
if any(np.isinf(v) or np.isnan(v) for v in [mass, frequency]):
|
||||
continue
|
||||
if frequency <= 0:
|
||||
continue
|
||||
|
||||
params = [trial.params.get(name, 0.0) for name in design_var_names]
|
||||
design_params_list.append(params)
|
||||
objectives_list.append([mass, frequency, max_disp, max_stress])
|
||||
|
||||
return (
|
||||
np.array(design_params_list, dtype=np.float32),
|
||||
np.array(objectives_list, dtype=np.float32),
|
||||
design_var_names
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
project_root = Path(__file__).parent.parent
|
||||
|
||||
# Find database
|
||||
db_path = project_root / "studies/uav_arm_optimization/2_results/study.db"
|
||||
study_name = "uav_arm_optimization"
|
||||
|
||||
if not db_path.exists():
|
||||
db_path = project_root / "studies/uav_arm_atomizerfield_test/2_results/study.db"
|
||||
study_name = "uav_arm_atomizerfield_test"
|
||||
|
||||
print("="*60)
|
||||
print("Training Active Learning Surrogate (Ensemble)")
|
||||
print("="*60)
|
||||
|
||||
# Extract data
|
||||
print(f"\nLoading data from {db_path}")
|
||||
design_params, objectives, design_var_names = extract_training_data_from_study(
|
||||
str(db_path), study_name
|
||||
)
|
||||
print(f"Loaded {len(design_params)} samples")
|
||||
print(f"Design variables: {design_var_names}")
|
||||
|
||||
# Train ensemble
|
||||
print("\nTraining 5-member ensemble...")
|
||||
surrogate = ActiveLearningSurrogate(n_ensemble=5)
|
||||
surrogate.train(design_params, objectives, design_var_names, epochs=200)
|
||||
|
||||
# Test predictions with uncertainty
|
||||
print("\n" + "="*60)
|
||||
print("Testing Predictions with Uncertainty")
|
||||
print("="*60)
|
||||
|
||||
# Test on a few samples
|
||||
test_designs = [
|
||||
{'beam_half_core_thickness': 2.0, 'beam_face_thickness': 1.0, 'holes_diameter': 5.0, 'hole_count': 10},
|
||||
{'beam_half_core_thickness': 5.0, 'beam_face_thickness': 2.0, 'holes_diameter': 20.0, 'hole_count': 8},
|
||||
{'beam_half_core_thickness': 1.0, 'beam_face_thickness': 0.5, 'holes_diameter': 2.0, 'hole_count': 6}, # Low data region
|
||||
]
|
||||
|
||||
for i, design in enumerate(test_designs):
|
||||
pred = surrogate.predict(design)
|
||||
print(f"\nDesign {i+1}: {design}")
|
||||
print(f" Mass: {pred['mass']:.1f}g +/- {pred['mass_uncertainty']:.1f}g")
|
||||
print(f" Freq: {pred['frequency']:.1f}Hz +/- {pred['frequency_uncertainty']:.1f}Hz")
|
||||
print(f" Total Uncertainty: {pred['total_uncertainty']:.3f}")
|
||||
|
||||
# Save model
|
||||
save_path = project_root / "active_learning_surrogate.pt"
|
||||
surrogate.save(str(save_path))
|
||||
print(f"\nSaved to {save_path}")
|
||||
|
||||
# Get confidence report
|
||||
print("\n" + "="*60)
|
||||
print("Confidence Report")
|
||||
print("="*60)
|
||||
report = surrogate.get_confidence_report()
|
||||
for k, v in report.items():
|
||||
print(f" {k}: {v}")
|
||||
393
optimization_engine/processors/surrogates/adaptive_surrogate.py
Normal file
393
optimization_engine/processors/surrogates/adaptive_surrogate.py
Normal file
@@ -0,0 +1,393 @@
|
||||
"""
|
||||
Adaptive surrogate modeling with confidence-based exploration-exploitation transitions.
|
||||
|
||||
This module implements state-of-the-art Bayesian optimization strategies that
|
||||
dynamically adjust exploration vs exploitation based on surrogate model confidence.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Optional, Dict, List
|
||||
import optuna
|
||||
from scipy.stats import variation
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class SurrogateConfidenceMetrics:
|
||||
"""Calculate confidence metrics for surrogate model quality.
|
||||
|
||||
STUDY-AWARE: Uses study.trials directly instead of session-based history
|
||||
to properly track confidence across multiple optimization runs.
|
||||
"""
|
||||
|
||||
def __init__(self, min_trials_for_confidence: int = 15):
|
||||
self.min_trials = min_trials_for_confidence
|
||||
|
||||
def update(self, study: optuna.Study, trial: optuna.trial.FrozenTrial):
|
||||
"""Update metrics after each trial (no-op for study-aware implementation)."""
|
||||
pass # Study-aware: we read directly from study.trials
|
||||
|
||||
def calculate_confidence(self, study: optuna.Study) -> Dict[str, float]:
|
||||
"""
|
||||
Calculate comprehensive surrogate confidence metrics.
|
||||
|
||||
STUDY-AWARE: Uses ALL completed trials from the study database,
|
||||
not just trials from the current session.
|
||||
|
||||
PROTOCOL 11: Multi-objective studies are NOT supported by adaptive
|
||||
characterization. Return immediately with max confidence to skip
|
||||
characterization phase.
|
||||
|
||||
Returns:
|
||||
Dict with confidence scores:
|
||||
- 'overall_confidence': 0-1 score, where 1 = high confidence
|
||||
- 'convergence_score': How stable recent improvements are
|
||||
- 'exploration_coverage': How well parameter space is covered
|
||||
- 'prediction_stability': How consistent the model predictions are
|
||||
"""
|
||||
# [Protocol 11] Multi-objective NOT supported by adaptive characterization
|
||||
is_multi_objective = len(study.directions) > 1
|
||||
if is_multi_objective:
|
||||
return {
|
||||
'overall_confidence': 1.0, # Skip characterization
|
||||
'convergence_score': 1.0,
|
||||
'exploration_coverage': 1.0,
|
||||
'prediction_stability': 1.0,
|
||||
'ready_for_exploitation': True, # Go straight to NSGA-II
|
||||
'total_trials': len(study.trials),
|
||||
'message': '[Protocol 11] Multi-objective: skipping adaptive characterization, using NSGA-II directly'
|
||||
}
|
||||
|
||||
# Get ALL completed trials from study (study-aware)
|
||||
all_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
|
||||
|
||||
if len(all_trials) < self.min_trials:
|
||||
return {
|
||||
'overall_confidence': 0.0,
|
||||
'convergence_score': 0.0,
|
||||
'exploration_coverage': 0.0,
|
||||
'prediction_stability': 0.0,
|
||||
'ready_for_exploitation': False,
|
||||
'total_trials': len(all_trials),
|
||||
'message': f'Need {self.min_trials - len(all_trials)} more trials for confidence assessment (currently {len(all_trials)} trials)'
|
||||
}
|
||||
|
||||
# 1. Convergence Score - are we finding consistent improvements?
|
||||
recent_window = 10
|
||||
recent_trials = all_trials[-recent_window:]
|
||||
recent_values = [t.value for t in recent_trials] # Safe: single-objective only
|
||||
|
||||
# Calculate improvement rate
|
||||
improvements = []
|
||||
for i in range(1, len(recent_values)):
|
||||
if recent_values[i] < recent_values[i-1]:
|
||||
improvement = (recent_values[i-1] - recent_values[i]) / abs(recent_values[i-1])
|
||||
improvements.append(improvement)
|
||||
|
||||
# If we're consistently finding improvements, convergence is good
|
||||
if improvements:
|
||||
avg_improvement = np.mean(improvements)
|
||||
improvement_consistency = 1.0 - variation(improvements) if len(improvements) > 1 else 0.5
|
||||
convergence_score = min(1.0, avg_improvement * improvement_consistency * 10)
|
||||
else:
|
||||
convergence_score = 0.0
|
||||
|
||||
# 2. Exploration Coverage - how well have we covered parameter space?
|
||||
# Use coefficient of variation for each parameter
|
||||
param_names = list(all_trials[0].params.keys()) if all_trials else []
|
||||
|
||||
coverage_scores = []
|
||||
for param in param_names:
|
||||
values = [t.params[param] for t in all_trials]
|
||||
|
||||
# Get parameter bounds
|
||||
distribution = all_trials[0].distributions[param]
|
||||
param_range = distribution.high - distribution.low
|
||||
|
||||
# Calculate spread relative to bounds
|
||||
spread = max(values) - min(values)
|
||||
coverage = spread / param_range
|
||||
coverage_scores.append(coverage)
|
||||
|
||||
exploration_coverage = np.mean(coverage_scores)
|
||||
|
||||
# 3. Prediction Stability - are recent trials clustered in good regions?
|
||||
recent_best_values = []
|
||||
current_best = float('inf')
|
||||
for trial in recent_trials:
|
||||
current_best = min(current_best, trial.value)
|
||||
recent_best_values.append(current_best)
|
||||
|
||||
# If best hasn't improved much recently, model is stable
|
||||
if len(recent_best_values) > 1:
|
||||
best_stability = 1.0 - (recent_best_values[0] - recent_best_values[-1]) / (recent_best_values[0] + 1e-10)
|
||||
prediction_stability = max(0.0, min(1.0, best_stability))
|
||||
else:
|
||||
prediction_stability = 0.0
|
||||
|
||||
# 4. Overall Confidence - weighted combination
|
||||
overall_confidence = (
|
||||
0.4 * convergence_score +
|
||||
0.3 * exploration_coverage +
|
||||
0.3 * prediction_stability
|
||||
)
|
||||
|
||||
# Decision: Ready for intensive exploitation?
|
||||
ready_for_exploitation = (
|
||||
overall_confidence >= 0.65 and
|
||||
exploration_coverage >= 0.5 and
|
||||
len(all_trials) >= self.min_trials
|
||||
)
|
||||
|
||||
message = self._get_confidence_message(overall_confidence, ready_for_exploitation)
|
||||
|
||||
return {
|
||||
'overall_confidence': overall_confidence,
|
||||
'convergence_score': convergence_score,
|
||||
'exploration_coverage': exploration_coverage,
|
||||
'prediction_stability': prediction_stability,
|
||||
'ready_for_exploitation': ready_for_exploitation,
|
||||
'total_trials': len(all_trials),
|
||||
'message': message
|
||||
}
|
||||
|
||||
def _get_confidence_message(self, confidence: float, ready: bool) -> str:
|
||||
"""Generate human-readable confidence assessment."""
|
||||
if ready:
|
||||
return f"HIGH CONFIDENCE ({confidence:.1%}) - Transitioning to exploitation phase"
|
||||
elif confidence >= 0.5:
|
||||
return f"MEDIUM CONFIDENCE ({confidence:.1%}) - Continue exploration with some exploitation"
|
||||
elif confidence >= 0.3:
|
||||
return f"LOW CONFIDENCE ({confidence:.1%}) - Focus on exploration"
|
||||
else:
|
||||
return f"VERY LOW CONFIDENCE ({confidence:.1%}) - Need more diverse exploration"
|
||||
|
||||
|
||||
class AdaptiveExploitationCallback:
|
||||
"""
|
||||
Dynamically adjust sampler behavior based on surrogate confidence.
|
||||
|
||||
This callback monitors surrogate model confidence and adapts the optimization
|
||||
strategy from exploration-heavy to exploitation-heavy as confidence increases.
|
||||
|
||||
STUDY-AWARE: Tracks phase transitions across multiple optimization runs
|
||||
and persists confidence history to JSON files.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
target_value: Optional[float] = None,
|
||||
tolerance: float = 0.1,
|
||||
min_confidence_for_exploitation: float = 0.65,
|
||||
min_trials: int = 15,
|
||||
verbose: bool = True,
|
||||
tracking_dir: Optional[Path] = None
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
target_value: Target objective value (if known)
|
||||
tolerance: Acceptable error from target
|
||||
min_confidence_for_exploitation: Confidence threshold to enable intensive exploitation
|
||||
min_trials: Minimum trials before confidence assessment
|
||||
verbose: Print confidence updates
|
||||
tracking_dir: Directory to save phase transition tracking files
|
||||
"""
|
||||
self.target_value = target_value
|
||||
self.tolerance = tolerance
|
||||
self.min_confidence = min_confidence_for_exploitation
|
||||
self.verbose = verbose
|
||||
self.tracking_dir = tracking_dir
|
||||
|
||||
self.metrics = SurrogateConfidenceMetrics(min_trials_for_confidence=min_trials)
|
||||
self.consecutive_successes = 0
|
||||
|
||||
# Initialize phase transition tracking
|
||||
self.phase_transition_file = None
|
||||
self.confidence_history_file = None
|
||||
if tracking_dir:
|
||||
self.tracking_dir = Path(tracking_dir)
|
||||
self.tracking_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.phase_transition_file = self.tracking_dir / "phase_transitions.json"
|
||||
self.confidence_history_file = self.tracking_dir / "confidence_history.json"
|
||||
|
||||
# Load existing phase transition data if available
|
||||
self.phase_transitions = self._load_phase_transitions()
|
||||
self.confidence_history = self._load_confidence_history()
|
||||
|
||||
# Determine current phase from history
|
||||
self.phase = self._get_current_phase()
|
||||
|
||||
def _load_phase_transitions(self) -> List[Dict]:
|
||||
"""Load existing phase transition history from JSON."""
|
||||
if self.phase_transition_file and self.phase_transition_file.exists():
|
||||
try:
|
||||
with open(self.phase_transition_file, 'r') as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
return []
|
||||
return []
|
||||
|
||||
def _load_confidence_history(self) -> List[Dict]:
|
||||
"""Load existing confidence history from JSON."""
|
||||
if self.confidence_history_file and self.confidence_history_file.exists():
|
||||
try:
|
||||
with open(self.confidence_history_file, 'r') as f:
|
||||
return json.load(f)
|
||||
except Exception:
|
||||
return []
|
||||
return []
|
||||
|
||||
def _get_current_phase(self) -> str:
|
||||
"""Determine current phase from transition history."""
|
||||
if not self.phase_transitions:
|
||||
return "exploration"
|
||||
# If any transition to exploitation exists, we're in exploitation
|
||||
for transition in self.phase_transitions:
|
||||
if transition.get('to_phase') == 'exploitation':
|
||||
return "exploitation"
|
||||
return "exploration"
|
||||
|
||||
def _save_phase_transition(self, trial_number: int, confidence: Dict):
|
||||
"""Save phase transition event to JSON."""
|
||||
if not self.phase_transition_file:
|
||||
return
|
||||
|
||||
transition_event = {
|
||||
'trial_number': trial_number,
|
||||
'from_phase': 'exploration',
|
||||
'to_phase': 'exploitation',
|
||||
'confidence_metrics': {
|
||||
'overall_confidence': confidence['overall_confidence'],
|
||||
'convergence_score': confidence['convergence_score'],
|
||||
'exploration_coverage': confidence['exploration_coverage'],
|
||||
'prediction_stability': confidence['prediction_stability']
|
||||
},
|
||||
'total_trials': confidence.get('total_trials', trial_number + 1)
|
||||
}
|
||||
|
||||
self.phase_transitions.append(transition_event)
|
||||
|
||||
try:
|
||||
with open(self.phase_transition_file, 'w') as f:
|
||||
json.dump(self.phase_transitions, f, indent=2)
|
||||
except Exception as e:
|
||||
if self.verbose:
|
||||
print(f" Warning: Failed to save phase transition: {e}")
|
||||
|
||||
def _save_confidence_snapshot(self, trial_number: int, confidence: Dict):
|
||||
"""Save confidence metrics snapshot to history."""
|
||||
if not self.confidence_history_file:
|
||||
return
|
||||
|
||||
snapshot = {
|
||||
'trial_number': trial_number,
|
||||
'phase': self.phase,
|
||||
'confidence_metrics': {
|
||||
'overall_confidence': confidence['overall_confidence'],
|
||||
'convergence_score': confidence['convergence_score'],
|
||||
'exploration_coverage': confidence['exploration_coverage'],
|
||||
'prediction_stability': confidence['prediction_stability']
|
||||
},
|
||||
'total_trials': confidence.get('total_trials', trial_number + 1)
|
||||
}
|
||||
|
||||
self.confidence_history.append(snapshot)
|
||||
|
||||
try:
|
||||
with open(self.confidence_history_file, 'w') as f:
|
||||
json.dump(self.confidence_history, f, indent=2)
|
||||
except Exception as e:
|
||||
if self.verbose:
|
||||
print(f" Warning: Failed to save confidence history: {e}")
|
||||
|
||||
def __call__(self, study: optuna.Study, trial: optuna.trial.FrozenTrial):
|
||||
"""Called after each trial completes."""
|
||||
# Skip failed trials
|
||||
if trial.state != optuna.trial.TrialState.COMPLETE:
|
||||
return
|
||||
|
||||
# Update metrics (no-op for study-aware implementation)
|
||||
self.metrics.update(study, trial)
|
||||
|
||||
# Calculate confidence
|
||||
confidence = self.metrics.calculate_confidence(study)
|
||||
|
||||
# Save confidence snapshot every 5 trials
|
||||
if trial.number % 5 == 0:
|
||||
self._save_confidence_snapshot(trial.number, confidence)
|
||||
|
||||
# Print confidence report
|
||||
if self.verbose and trial.number % 5 == 0: # Every 5 trials
|
||||
self._print_confidence_report(trial.number, confidence)
|
||||
|
||||
# Check for phase transition
|
||||
if confidence['ready_for_exploitation'] and self.phase == "exploration":
|
||||
self.phase = "exploitation"
|
||||
|
||||
# Save transition event
|
||||
self._save_phase_transition(trial.number, confidence)
|
||||
|
||||
if self.verbose:
|
||||
print(f"\n{'='*60}")
|
||||
print(f" PHASE TRANSITION: EXPLORATION -> EXPLOITATION")
|
||||
print(f" Trial #{trial.number}")
|
||||
print(f" Surrogate confidence: {confidence['overall_confidence']:.1%}")
|
||||
print(f" Now focusing on refining best regions")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# Check for target achievement
|
||||
if self.target_value is not None and trial.value <= self.tolerance:
|
||||
self.consecutive_successes += 1
|
||||
|
||||
if self.verbose:
|
||||
print(f" [TARGET] Trial #{trial.number}: {trial.value:.6f} ≤ {self.tolerance:.6f}")
|
||||
print(f" [TARGET] Consecutive successes: {self.consecutive_successes}/3")
|
||||
|
||||
# Stop after 3 consecutive successes in exploitation phase
|
||||
if self.consecutive_successes >= 3 and self.phase == "exploitation":
|
||||
if self.verbose:
|
||||
print(f"\n{'='*60}")
|
||||
print(f" TARGET ACHIEVED WITH HIGH CONFIDENCE")
|
||||
print(f" Best value: {study.best_value:.6f}")
|
||||
print(f" Stopping optimization")
|
||||
print(f"{'='*60}\n")
|
||||
study.stop()
|
||||
else:
|
||||
self.consecutive_successes = 0
|
||||
|
||||
def _print_confidence_report(self, trial_number: int, confidence: Dict):
|
||||
"""Print confidence metrics report."""
|
||||
print(f"\n [CONFIDENCE REPORT - Trial #{trial_number}]")
|
||||
print(f" Phase: {self.phase.upper()}")
|
||||
print(f" Overall Confidence: {confidence['overall_confidence']:.1%}")
|
||||
print(f" - Convergence: {confidence['convergence_score']:.1%}")
|
||||
print(f" - Coverage: {confidence['exploration_coverage']:.1%}")
|
||||
print(f" - Stability: {confidence['prediction_stability']:.1%}")
|
||||
print(f" {confidence['message']}")
|
||||
print()
|
||||
|
||||
|
||||
def create_adaptive_sampler(
|
||||
n_startup_trials: int = 10,
|
||||
multivariate: bool = True,
|
||||
confidence_threshold: float = 0.65
|
||||
) -> optuna.samplers.TPESampler:
|
||||
"""
|
||||
Create TPE sampler configured for adaptive exploration-exploitation.
|
||||
|
||||
Args:
|
||||
n_startup_trials: Initial random exploration trials
|
||||
multivariate: Enable multivariate TPE for correlated parameters
|
||||
confidence_threshold: Confidence needed before intensive exploitation
|
||||
|
||||
Returns:
|
||||
Configured TPESampler
|
||||
"""
|
||||
# Higher n_ei_candidates = more exploitation
|
||||
# Will be used once confidence threshold is reached
|
||||
return optuna.samplers.TPESampler(
|
||||
n_startup_trials=n_startup_trials,
|
||||
n_ei_candidates=24,
|
||||
multivariate=multivariate,
|
||||
warn_independent_sampling=True
|
||||
)
|
||||
522
optimization_engine/processors/surrogates/auto_trainer.py
Normal file
522
optimization_engine/processors/surrogates/auto_trainer.py
Normal file
@@ -0,0 +1,522 @@
|
||||
"""
|
||||
Auto-Training Trigger System for AtomizerField
|
||||
|
||||
Monitors training data collection and automatically triggers neural network training
|
||||
when enough data is accumulated. This is the key component to close the neural loop.
|
||||
|
||||
Workflow:
|
||||
1. Monitor training data directory for new trials
|
||||
2. When min_points threshold is reached, trigger training
|
||||
3. Validate trained model against FEA
|
||||
4. Deploy model for neural-accelerated optimization
|
||||
|
||||
Usage:
|
||||
from optimization_engine.processors.surrogates.auto_trainer import AutoTrainer
|
||||
|
||||
trainer = AutoTrainer(
|
||||
study_name="uav_arm_optimization",
|
||||
min_points=50,
|
||||
epochs=100
|
||||
)
|
||||
|
||||
# Check if ready to train
|
||||
if trainer.should_train():
|
||||
model_path = trainer.train()
|
||||
trainer.validate_model(model_path)
|
||||
|
||||
# Or run continuous monitoring
|
||||
trainer.watch()
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, Tuple, List
|
||||
from datetime import datetime
|
||||
import shutil
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AutoTrainer:
|
||||
"""
|
||||
Automatic neural network training trigger for AtomizerField.
|
||||
|
||||
Monitors training data accumulation and triggers training when thresholds are met.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
study_name: str,
|
||||
training_data_dir: Optional[Path] = None,
|
||||
min_points: int = 50,
|
||||
epochs: int = 100,
|
||||
val_split: float = 0.2,
|
||||
retrain_threshold: int = 50,
|
||||
atomizer_field_dir: Optional[Path] = None,
|
||||
output_dir: Optional[Path] = None
|
||||
):
|
||||
"""
|
||||
Initialize the auto trainer.
|
||||
|
||||
Args:
|
||||
study_name: Name of the optimization study
|
||||
training_data_dir: Directory containing exported training data
|
||||
min_points: Minimum data points before training (default: 50)
|
||||
epochs: Training epochs (default: 100)
|
||||
val_split: Validation split ratio (default: 0.2)
|
||||
retrain_threshold: New points needed for retraining (default: 50)
|
||||
atomizer_field_dir: Path to atomizer-field repository
|
||||
output_dir: Directory for trained models
|
||||
"""
|
||||
self.study_name = study_name
|
||||
self.min_points = min_points
|
||||
self.epochs = epochs
|
||||
self.val_split = val_split
|
||||
self.retrain_threshold = retrain_threshold
|
||||
|
||||
# Set up directories
|
||||
project_root = Path(__file__).parent.parent
|
||||
self.training_data_dir = training_data_dir or (
|
||||
project_root / "atomizer_field_training_data" / study_name
|
||||
)
|
||||
self.atomizer_field_dir = atomizer_field_dir or (project_root / "atomizer-field")
|
||||
self.output_dir = output_dir or (
|
||||
self.atomizer_field_dir / "runs" / f"{study_name}_auto"
|
||||
)
|
||||
|
||||
# Tracking state
|
||||
self.last_trained_count = 0
|
||||
self.model_version = 0
|
||||
self.training_history: List[Dict[str, Any]] = []
|
||||
|
||||
# Load state if exists
|
||||
self._load_state()
|
||||
|
||||
logger.info(f"AutoTrainer initialized for {study_name}")
|
||||
logger.info(f"Training data: {self.training_data_dir}")
|
||||
logger.info(f"Min points: {min_points}, Retrain threshold: {retrain_threshold}")
|
||||
|
||||
def _state_file(self) -> Path:
|
||||
"""Get path to state file."""
|
||||
return self.output_dir / "auto_trainer_state.json"
|
||||
|
||||
def _load_state(self) -> None:
|
||||
"""Load trainer state from disk."""
|
||||
state_file = self._state_file()
|
||||
if state_file.exists():
|
||||
with open(state_file, 'r') as f:
|
||||
state = json.load(f)
|
||||
self.last_trained_count = state.get("last_trained_count", 0)
|
||||
self.model_version = state.get("model_version", 0)
|
||||
self.training_history = state.get("training_history", [])
|
||||
logger.info(f"Loaded state: {self.last_trained_count} points trained, version {self.model_version}")
|
||||
|
||||
def _save_state(self) -> None:
|
||||
"""Save trainer state to disk."""
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
state_file = self._state_file()
|
||||
state = {
|
||||
"study_name": self.study_name,
|
||||
"last_trained_count": self.last_trained_count,
|
||||
"model_version": self.model_version,
|
||||
"training_history": self.training_history,
|
||||
"last_updated": datetime.now().isoformat()
|
||||
}
|
||||
with open(state_file, 'w') as f:
|
||||
json.dump(state, f, indent=2)
|
||||
|
||||
def count_training_points(self) -> int:
|
||||
"""
|
||||
Count available training data points.
|
||||
|
||||
Returns:
|
||||
Number of trial directories with valid training data
|
||||
"""
|
||||
if not self.training_data_dir.exists():
|
||||
return 0
|
||||
|
||||
count = 0
|
||||
for trial_dir in self.training_data_dir.glob("trial_*"):
|
||||
if trial_dir.is_dir():
|
||||
# Check for required files
|
||||
has_input = (trial_dir / "input" / "model.bdf").exists()
|
||||
has_output = (trial_dir / "output" / "model.op2").exists()
|
||||
has_metadata = (trial_dir / "metadata.json").exists()
|
||||
|
||||
if has_input and has_output and has_metadata:
|
||||
count += 1
|
||||
|
||||
return count
|
||||
|
||||
def should_train(self) -> bool:
|
||||
"""
|
||||
Check if there's enough new data to trigger training.
|
||||
|
||||
Returns:
|
||||
True if training should be triggered
|
||||
"""
|
||||
current_count = self.count_training_points()
|
||||
|
||||
# First training - check minimum threshold
|
||||
if self.last_trained_count == 0:
|
||||
return current_count >= self.min_points
|
||||
|
||||
# Retraining - check new points threshold
|
||||
new_points = current_count - self.last_trained_count
|
||||
return new_points >= self.retrain_threshold
|
||||
|
||||
def get_new_points_count(self) -> int:
|
||||
"""Get number of new points since last training."""
|
||||
return self.count_training_points() - self.last_trained_count
|
||||
|
||||
def prepare_training_split(self) -> Tuple[Path, Path]:
|
||||
"""
|
||||
Prepare train/validation split from collected data.
|
||||
|
||||
Returns:
|
||||
Tuple of (train_dir, val_dir) paths
|
||||
"""
|
||||
train_dir = self.training_data_dir.parent / f"{self.study_name}_train"
|
||||
val_dir = self.training_data_dir.parent / f"{self.study_name}_val"
|
||||
|
||||
# Clear existing splits
|
||||
if train_dir.exists():
|
||||
shutil.rmtree(train_dir)
|
||||
if val_dir.exists():
|
||||
shutil.rmtree(val_dir)
|
||||
|
||||
train_dir.mkdir(parents=True)
|
||||
val_dir.mkdir(parents=True)
|
||||
|
||||
# Get all trial directories
|
||||
trial_dirs = sorted(self.training_data_dir.glob("trial_*"))
|
||||
n_trials = len(trial_dirs)
|
||||
n_val = max(1, int(n_trials * self.val_split))
|
||||
|
||||
# Split: use latest trials for validation (they're most diverse)
|
||||
train_trials = trial_dirs[:-n_val] if n_val > 0 else trial_dirs
|
||||
val_trials = trial_dirs[-n_val:] if n_val > 0 else []
|
||||
|
||||
# Copy to split directories
|
||||
for trial_dir in train_trials:
|
||||
dest = train_dir / trial_dir.name
|
||||
shutil.copytree(trial_dir, dest)
|
||||
|
||||
for trial_dir in val_trials:
|
||||
dest = val_dir / trial_dir.name
|
||||
shutil.copytree(trial_dir, dest)
|
||||
|
||||
logger.info(f"Split data: {len(train_trials)} train, {len(val_trials)} validation")
|
||||
|
||||
return train_dir, val_dir
|
||||
|
||||
def train(self, train_parametric: bool = True) -> Optional[Path]:
|
||||
"""
|
||||
Trigger neural network training.
|
||||
|
||||
Args:
|
||||
train_parametric: If True, train parametric predictor (fast).
|
||||
If False, train field predictor (slower, more detailed).
|
||||
|
||||
Returns:
|
||||
Path to trained model checkpoint, or None if training failed
|
||||
"""
|
||||
current_count = self.count_training_points()
|
||||
|
||||
if current_count < self.min_points:
|
||||
logger.warning(
|
||||
f"Not enough data for training: {current_count} < {self.min_points}"
|
||||
)
|
||||
return None
|
||||
|
||||
logger.info(f"Starting training with {current_count} data points...")
|
||||
|
||||
# Prepare train/val split
|
||||
train_dir, val_dir = self.prepare_training_split()
|
||||
|
||||
# Increment model version
|
||||
self.model_version += 1
|
||||
version_output_dir = self.output_dir / f"v{self.model_version}"
|
||||
version_output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Choose training script
|
||||
if train_parametric:
|
||||
train_script = self.atomizer_field_dir / "train_parametric.py"
|
||||
else:
|
||||
train_script = self.atomizer_field_dir / "train.py"
|
||||
|
||||
if not train_script.exists():
|
||||
logger.error(f"Training script not found: {train_script}")
|
||||
return None
|
||||
|
||||
# Build training command
|
||||
cmd = [
|
||||
sys.executable,
|
||||
str(train_script),
|
||||
"--train_dir", str(train_dir),
|
||||
"--val_dir", str(val_dir),
|
||||
"--epochs", str(self.epochs),
|
||||
"--output_dir", str(version_output_dir)
|
||||
]
|
||||
|
||||
logger.info(f"Running: {' '.join(cmd)}")
|
||||
|
||||
# Run training
|
||||
start_time = time.time()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=str(self.atomizer_field_dir),
|
||||
timeout=3600 * 4 # 4 hour timeout
|
||||
)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"Training failed:\n{result.stderr}")
|
||||
return None
|
||||
|
||||
logger.info(f"Training completed in {elapsed/60:.1f} minutes")
|
||||
|
||||
# Find model checkpoint
|
||||
checkpoints = list(version_output_dir.glob("*.pt")) + list(version_output_dir.glob("*.pth"))
|
||||
if not checkpoints:
|
||||
# Check for best model
|
||||
checkpoints = list(version_output_dir.glob("**/best*.pt")) + \
|
||||
list(version_output_dir.glob("**/checkpoint*.pt"))
|
||||
|
||||
if checkpoints:
|
||||
model_path = checkpoints[0]
|
||||
logger.info(f"Model saved: {model_path}")
|
||||
else:
|
||||
logger.warning("No checkpoint file found after training")
|
||||
model_path = version_output_dir
|
||||
|
||||
# Update state
|
||||
self.last_trained_count = current_count
|
||||
self.training_history.append({
|
||||
"version": self.model_version,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"data_points": current_count,
|
||||
"epochs": self.epochs,
|
||||
"training_time_seconds": elapsed,
|
||||
"model_path": str(model_path)
|
||||
})
|
||||
self._save_state()
|
||||
|
||||
return model_path
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error("Training timed out after 4 hours")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Training error: {e}")
|
||||
return None
|
||||
|
||||
def validate_model(
|
||||
self,
|
||||
model_path: Path,
|
||||
n_validation_trials: int = 5
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate trained model against FEA results.
|
||||
|
||||
Args:
|
||||
model_path: Path to trained model
|
||||
n_validation_trials: Number of trials to validate
|
||||
|
||||
Returns:
|
||||
Validation metrics dictionary
|
||||
"""
|
||||
logger.info(f"Validating model: {model_path}")
|
||||
|
||||
# This would integrate with the neural surrogate to compare predictions vs FEA
|
||||
# For now, return placeholder metrics
|
||||
validation_results = {
|
||||
"model_path": str(model_path),
|
||||
"n_validation_trials": n_validation_trials,
|
||||
"mean_error_percent": 0.0, # Would be computed
|
||||
"max_error_percent": 0.0,
|
||||
"validated_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# TODO: Implement actual validation
|
||||
# - Load model
|
||||
# - Run predictions on held-out trials
|
||||
# - Compare with FEA results
|
||||
# - Compute error metrics
|
||||
|
||||
return validation_results
|
||||
|
||||
def get_latest_model(self) -> Optional[Path]:
|
||||
"""
|
||||
Get path to latest trained model.
|
||||
|
||||
Returns:
|
||||
Path to latest model checkpoint, or None if no model exists
|
||||
"""
|
||||
if self.model_version == 0:
|
||||
return None
|
||||
|
||||
latest_dir = self.output_dir / f"v{self.model_version}"
|
||||
if not latest_dir.exists():
|
||||
return None
|
||||
|
||||
# Find checkpoint
|
||||
checkpoints = list(latest_dir.glob("*.pt")) + list(latest_dir.glob("*.pth"))
|
||||
if checkpoints:
|
||||
return checkpoints[0]
|
||||
|
||||
return latest_dir
|
||||
|
||||
def watch(self, check_interval: int = 60) -> None:
|
||||
"""
|
||||
Continuously monitor for new data and trigger training.
|
||||
|
||||
Args:
|
||||
check_interval: Seconds between checks (default: 60)
|
||||
"""
|
||||
logger.info(f"Starting auto-trainer watch mode for {self.study_name}")
|
||||
logger.info(f"Check interval: {check_interval}s")
|
||||
logger.info(f"Min points: {self.min_points}, Retrain threshold: {self.retrain_threshold}")
|
||||
|
||||
try:
|
||||
while True:
|
||||
current_count = self.count_training_points()
|
||||
new_points = current_count - self.last_trained_count
|
||||
|
||||
status = f"[{datetime.now().strftime('%H:%M:%S')}] "
|
||||
status += f"Points: {current_count} (new: {new_points})"
|
||||
|
||||
if self.should_train():
|
||||
status += " -> TRAINING"
|
||||
print(status)
|
||||
model_path = self.train()
|
||||
if model_path:
|
||||
print(f"Training complete: {model_path}")
|
||||
else:
|
||||
if self.last_trained_count == 0:
|
||||
needed = self.min_points - current_count
|
||||
status += f" (need {needed} more for first training)"
|
||||
else:
|
||||
needed = self.retrain_threshold - new_points
|
||||
status += f" (need {needed} more for retraining)"
|
||||
print(status)
|
||||
|
||||
time.sleep(check_interval)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.info("Watch mode stopped")
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get current trainer status.
|
||||
|
||||
Returns:
|
||||
Status dictionary with counts and state
|
||||
"""
|
||||
current_count = self.count_training_points()
|
||||
new_points = current_count - self.last_trained_count
|
||||
|
||||
return {
|
||||
"study_name": self.study_name,
|
||||
"total_points": current_count,
|
||||
"new_points_since_training": new_points,
|
||||
"last_trained_count": self.last_trained_count,
|
||||
"model_version": self.model_version,
|
||||
"min_points_threshold": self.min_points,
|
||||
"retrain_threshold": self.retrain_threshold,
|
||||
"should_train": self.should_train(),
|
||||
"latest_model": str(self.get_latest_model()) if self.get_latest_model() else None,
|
||||
"training_history_count": len(self.training_history)
|
||||
}
|
||||
|
||||
|
||||
def check_training_status(study_name: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Quick check of training data status for a study.
|
||||
|
||||
Args:
|
||||
study_name: Name of the study
|
||||
|
||||
Returns:
|
||||
Status dictionary
|
||||
"""
|
||||
trainer = AutoTrainer(study_name=study_name)
|
||||
return trainer.get_status()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="AtomizerField Auto-Trainer")
|
||||
parser.add_argument("study_name", help="Name of the optimization study")
|
||||
parser.add_argument("--train", action="store_true", help="Trigger training now")
|
||||
parser.add_argument("--watch", action="store_true", help="Watch mode - continuous monitoring")
|
||||
parser.add_argument("--status", action="store_true", help="Show status only")
|
||||
parser.add_argument("--min-points", type=int, default=50, help="Minimum points for training")
|
||||
parser.add_argument("--epochs", type=int, default=100, help="Training epochs")
|
||||
parser.add_argument("--interval", type=int, default=60, help="Check interval for watch mode")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s [%(levelname)s] %(message)s'
|
||||
)
|
||||
|
||||
trainer = AutoTrainer(
|
||||
study_name=args.study_name,
|
||||
min_points=args.min_points,
|
||||
epochs=args.epochs
|
||||
)
|
||||
|
||||
if args.status:
|
||||
status = trainer.get_status()
|
||||
print(f"\nAuto-Trainer Status: {args.study_name}")
|
||||
print("=" * 50)
|
||||
for key, value in status.items():
|
||||
print(f" {key}: {value}")
|
||||
|
||||
elif args.train:
|
||||
if trainer.should_train():
|
||||
print("Training triggered...")
|
||||
model_path = trainer.train()
|
||||
if model_path:
|
||||
print(f"Success! Model at: {model_path}")
|
||||
else:
|
||||
print("Training failed")
|
||||
else:
|
||||
print("Not enough data for training")
|
||||
print(f"Current: {trainer.count_training_points()}, Need: {args.min_points}")
|
||||
|
||||
elif args.watch:
|
||||
trainer.watch(check_interval=args.interval)
|
||||
|
||||
else:
|
||||
# Default: show status and recommendation
|
||||
status = trainer.get_status()
|
||||
print(f"\nAuto-Trainer Status: {args.study_name}")
|
||||
print("=" * 50)
|
||||
print(f" Data points: {status['total_points']}")
|
||||
print(f" New since last training: {status['new_points_since_training']}")
|
||||
print(f" Model version: v{status['model_version']}")
|
||||
print(f" Should train: {status['should_train']}")
|
||||
print()
|
||||
|
||||
if status['should_train']:
|
||||
print("Ready to train! Run with --train to start training.")
|
||||
else:
|
||||
if status['last_trained_count'] == 0:
|
||||
needed = status['min_points_threshold'] - status['total_points']
|
||||
print(f"Need {needed} more points for initial training.")
|
||||
else:
|
||||
needed = status['retrain_threshold'] - status['new_points_since_training']
|
||||
print(f"Need {needed} more new points for retraining.")
|
||||
834
optimization_engine/processors/surrogates/generic_surrogate.py
Normal file
834
optimization_engine/processors/surrogates/generic_surrogate.py
Normal file
@@ -0,0 +1,834 @@
|
||||
"""
|
||||
GenericSurrogate - Config-driven neural network surrogate for optimization.
|
||||
|
||||
This module eliminates ~2,800 lines of duplicated code across study run_nn_optimization.py files
|
||||
by providing a fully config-driven neural surrogate system.
|
||||
|
||||
Usage:
|
||||
# In study's run_nn_optimization.py (now ~30 lines instead of ~600):
|
||||
from optimization_engine.processors.surrogates.generic_surrogate import ConfigDrivenSurrogate
|
||||
|
||||
surrogate = ConfigDrivenSurrogate(__file__)
|
||||
surrogate.run() # Handles --train, --turbo, --all flags automatically
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional, List, Tuple
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Conditional PyTorch import
|
||||
try:
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.utils.data import DataLoader, random_split, TensorDataset
|
||||
TORCH_AVAILABLE = True
|
||||
except ImportError:
|
||||
TORCH_AVAILABLE = False
|
||||
|
||||
import optuna
|
||||
from optuna.samplers import NSGAIISampler
|
||||
|
||||
|
||||
class MLPSurrogate(nn.Module):
|
||||
"""
|
||||
Generic MLP architecture for surrogate modeling.
|
||||
|
||||
Architecture: Input -> [Linear -> LayerNorm -> ReLU -> Dropout] * N -> Output
|
||||
"""
|
||||
|
||||
def __init__(self, n_inputs: int, n_outputs: int,
|
||||
hidden_dims: List[int] = None, dropout: float = 0.1):
|
||||
super().__init__()
|
||||
|
||||
if hidden_dims is None:
|
||||
# Default architecture scales with problem size
|
||||
hidden_dims = [64, 128, 128, 64]
|
||||
|
||||
layers = []
|
||||
prev_dim = n_inputs
|
||||
|
||||
for hidden_dim in hidden_dims:
|
||||
layers.extend([
|
||||
nn.Linear(prev_dim, hidden_dim),
|
||||
nn.LayerNorm(hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(dropout)
|
||||
])
|
||||
prev_dim = hidden_dim
|
||||
|
||||
layers.append(nn.Linear(prev_dim, n_outputs))
|
||||
self.network = nn.Sequential(*layers)
|
||||
|
||||
# Initialize weights
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Linear):
|
||||
nn.init.kaiming_normal_(m.weight)
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
return self.network(x)
|
||||
|
||||
|
||||
class GenericSurrogate:
|
||||
"""
|
||||
Config-driven neural surrogate for FEA optimization.
|
||||
|
||||
Automatically adapts to any number of design variables and objectives
|
||||
based on the optimization_config.json file.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict, device: str = 'auto'):
|
||||
"""
|
||||
Initialize surrogate from config.
|
||||
|
||||
Args:
|
||||
config: Normalized config dictionary
|
||||
device: 'auto', 'cuda', or 'cpu'
|
||||
"""
|
||||
if not TORCH_AVAILABLE:
|
||||
raise ImportError("PyTorch required for neural surrogate")
|
||||
|
||||
self.config = config
|
||||
self.device = torch.device(
|
||||
'cuda' if torch.cuda.is_available() and device == 'auto' else 'cpu'
|
||||
)
|
||||
|
||||
# Extract variable and objective info from config
|
||||
self.design_var_names = [v['name'] for v in config['design_variables']]
|
||||
self.design_var_bounds = {
|
||||
v['name']: (v['min'], v['max'])
|
||||
for v in config['design_variables']
|
||||
}
|
||||
self.design_var_types = {
|
||||
v['name']: v.get('type', 'continuous')
|
||||
for v in config['design_variables']
|
||||
}
|
||||
|
||||
self.objective_names = [o['name'] for o in config['objectives']]
|
||||
self.n_inputs = len(self.design_var_names)
|
||||
self.n_outputs = len(self.objective_names)
|
||||
|
||||
self.model = None
|
||||
self.normalization = None
|
||||
|
||||
def _get_hidden_dims(self) -> List[int]:
|
||||
"""Calculate hidden layer dimensions based on problem size."""
|
||||
n = self.n_inputs
|
||||
|
||||
if n <= 3:
|
||||
return [32, 64, 32]
|
||||
elif n <= 6:
|
||||
return [64, 128, 128, 64]
|
||||
elif n <= 10:
|
||||
return [128, 256, 256, 128]
|
||||
else:
|
||||
return [256, 512, 512, 256]
|
||||
|
||||
def train_from_database(self, db_path: Path, study_name: str,
|
||||
epochs: int = 300, validation_split: float = 0.2,
|
||||
batch_size: int = 16, learning_rate: float = 0.001,
|
||||
save_path: Path = None, verbose: bool = True):
|
||||
"""
|
||||
Train surrogate from Optuna database.
|
||||
|
||||
Args:
|
||||
db_path: Path to study.db
|
||||
study_name: Name of the Optuna study
|
||||
epochs: Number of training epochs
|
||||
validation_split: Fraction of data for validation
|
||||
batch_size: Training batch size
|
||||
learning_rate: Initial learning rate
|
||||
save_path: Where to save the trained model
|
||||
verbose: Print training progress
|
||||
"""
|
||||
if verbose:
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Training Generic Surrogate ({self.n_inputs} inputs -> {self.n_outputs} outputs)")
|
||||
print(f"{'='*60}")
|
||||
print(f"Device: {self.device}")
|
||||
print(f"Database: {db_path}")
|
||||
|
||||
# Load data from Optuna
|
||||
storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}")
|
||||
study = optuna.load_study(study_name=study_name, storage=storage)
|
||||
|
||||
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
|
||||
|
||||
if verbose:
|
||||
print(f"Found {len(completed)} completed trials")
|
||||
|
||||
if len(completed) < 10:
|
||||
raise ValueError(f"Need at least 10 trials for training, got {len(completed)}")
|
||||
|
||||
# Extract training data
|
||||
design_params = []
|
||||
objectives = []
|
||||
|
||||
for trial in completed:
|
||||
# Skip inf values
|
||||
if any(v == float('inf') or v != v for v in trial.values): # nan check
|
||||
continue
|
||||
|
||||
params = [trial.params.get(name, 0) for name in self.design_var_names]
|
||||
objs = list(trial.values)
|
||||
|
||||
design_params.append(params)
|
||||
objectives.append(objs)
|
||||
|
||||
design_params = np.array(design_params, dtype=np.float32)
|
||||
objectives = np.array(objectives, dtype=np.float32)
|
||||
|
||||
if verbose:
|
||||
print(f"Valid samples: {len(design_params)}")
|
||||
print(f"\nDesign variable ranges:")
|
||||
for i, name in enumerate(self.design_var_names):
|
||||
print(f" {name}: {design_params[:, i].min():.2f} - {design_params[:, i].max():.2f}")
|
||||
print(f"\nObjective ranges:")
|
||||
for i, name in enumerate(self.objective_names):
|
||||
print(f" {name}: {objectives[:, i].min():.4f} - {objectives[:, i].max():.4f}")
|
||||
|
||||
# Compute normalization parameters
|
||||
design_mean = design_params.mean(axis=0)
|
||||
design_std = design_params.std(axis=0) + 1e-8
|
||||
objective_mean = objectives.mean(axis=0)
|
||||
objective_std = objectives.std(axis=0) + 1e-8
|
||||
|
||||
self.normalization = {
|
||||
'design_mean': design_mean,
|
||||
'design_std': design_std,
|
||||
'objective_mean': objective_mean,
|
||||
'objective_std': objective_std
|
||||
}
|
||||
|
||||
# Normalize data
|
||||
X = (design_params - design_mean) / design_std
|
||||
Y = (objectives - objective_mean) / objective_std
|
||||
|
||||
X_tensor = torch.tensor(X, dtype=torch.float32)
|
||||
Y_tensor = torch.tensor(Y, dtype=torch.float32)
|
||||
|
||||
# Create datasets
|
||||
dataset = TensorDataset(X_tensor, Y_tensor)
|
||||
n_val = max(1, int(len(dataset) * validation_split))
|
||||
n_train = len(dataset) - n_val
|
||||
train_ds, val_ds = random_split(dataset, [n_train, n_val])
|
||||
|
||||
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
|
||||
val_loader = DataLoader(val_ds, batch_size=batch_size)
|
||||
|
||||
if verbose:
|
||||
print(f"\nTraining: {n_train} samples, Validation: {n_val} samples")
|
||||
|
||||
# Build model
|
||||
hidden_dims = self._get_hidden_dims()
|
||||
self.model = MLPSurrogate(
|
||||
n_inputs=self.n_inputs,
|
||||
n_outputs=self.n_outputs,
|
||||
hidden_dims=hidden_dims
|
||||
).to(self.device)
|
||||
|
||||
n_params = sum(p.numel() for p in self.model.parameters())
|
||||
if verbose:
|
||||
print(f"Model architecture: {self.n_inputs} -> {hidden_dims} -> {self.n_outputs}")
|
||||
print(f"Total parameters: {n_params:,}")
|
||||
|
||||
# Training setup
|
||||
optimizer = torch.optim.AdamW(self.model.parameters(), lr=learning_rate, weight_decay=1e-5)
|
||||
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
|
||||
|
||||
best_val_loss = float('inf')
|
||||
best_state = None
|
||||
|
||||
if verbose:
|
||||
print(f"\nTraining for {epochs} epochs...")
|
||||
|
||||
for epoch in range(epochs):
|
||||
# Training
|
||||
self.model.train()
|
||||
train_loss = 0.0
|
||||
for x, y in train_loader:
|
||||
x, y = x.to(self.device), y.to(self.device)
|
||||
optimizer.zero_grad()
|
||||
pred = self.model(x)
|
||||
loss = F.mse_loss(pred, y)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
train_loss += loss.item()
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
# Validation
|
||||
self.model.eval()
|
||||
val_loss = 0.0
|
||||
with torch.no_grad():
|
||||
for x, y in val_loader:
|
||||
x, y = x.to(self.device), y.to(self.device)
|
||||
pred = self.model(x)
|
||||
val_loss += F.mse_loss(pred, y).item()
|
||||
val_loss /= len(val_loader)
|
||||
|
||||
scheduler.step()
|
||||
|
||||
if val_loss < best_val_loss:
|
||||
best_val_loss = val_loss
|
||||
best_state = self.model.state_dict().copy()
|
||||
|
||||
if verbose and ((epoch + 1) % 50 == 0 or epoch == 0):
|
||||
print(f" Epoch {epoch+1:3d}: train={train_loss:.6f}, val={val_loss:.6f}")
|
||||
|
||||
# Load best model
|
||||
self.model.load_state_dict(best_state)
|
||||
|
||||
if verbose:
|
||||
print(f"\nBest validation loss: {best_val_loss:.6f}")
|
||||
|
||||
# Final evaluation
|
||||
self._print_validation_metrics(val_loader)
|
||||
|
||||
# Save model
|
||||
if save_path:
|
||||
self.save(save_path)
|
||||
|
||||
return self
|
||||
|
||||
def _print_validation_metrics(self, val_loader):
|
||||
"""Print validation accuracy metrics."""
|
||||
self.model.eval()
|
||||
all_preds = []
|
||||
all_targets = []
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in val_loader:
|
||||
x = x.to(self.device)
|
||||
pred = self.model(x).cpu().numpy()
|
||||
all_preds.append(pred)
|
||||
all_targets.append(y.numpy())
|
||||
|
||||
all_preds = np.concatenate(all_preds)
|
||||
all_targets = np.concatenate(all_targets)
|
||||
|
||||
# Denormalize
|
||||
preds_denorm = all_preds * self.normalization['objective_std'] + self.normalization['objective_mean']
|
||||
targets_denorm = all_targets * self.normalization['objective_std'] + self.normalization['objective_mean']
|
||||
|
||||
print(f"\nValidation accuracy:")
|
||||
for i, name in enumerate(self.objective_names):
|
||||
mae = np.abs(preds_denorm[:, i] - targets_denorm[:, i]).mean()
|
||||
mape = (np.abs(preds_denorm[:, i] - targets_denorm[:, i]) /
|
||||
(np.abs(targets_denorm[:, i]) + 1e-8)).mean() * 100
|
||||
print(f" {name}: MAE={mae:.4f}, MAPE={mape:.1f}%")
|
||||
|
||||
def predict(self, design_params: Dict[str, float]) -> Dict[str, float]:
|
||||
"""
|
||||
Predict objectives from design parameters.
|
||||
|
||||
Args:
|
||||
design_params: Dictionary of design variable values
|
||||
|
||||
Returns:
|
||||
Dictionary of predicted objective values
|
||||
"""
|
||||
if self.model is None:
|
||||
raise ValueError("Model not trained. Call train_from_database first.")
|
||||
|
||||
# Build input array
|
||||
x = np.array([design_params.get(name, 0) for name in self.design_var_names], dtype=np.float32)
|
||||
x_norm = (x - self.normalization['design_mean']) / self.normalization['design_std']
|
||||
x_tensor = torch.tensor(x_norm, dtype=torch.float32, device=self.device).unsqueeze(0)
|
||||
|
||||
# Predict
|
||||
self.model.eval()
|
||||
with torch.no_grad():
|
||||
y_norm = self.model(x_tensor).cpu().numpy()[0]
|
||||
|
||||
# Denormalize
|
||||
y = y_norm * self.normalization['objective_std'] + self.normalization['objective_mean']
|
||||
|
||||
return {name: float(y[i]) for i, name in enumerate(self.objective_names)}
|
||||
|
||||
def sample_random_design(self) -> Dict[str, float]:
|
||||
"""Sample a random point in the design space."""
|
||||
params = {}
|
||||
for name in self.design_var_names:
|
||||
low, high = self.design_var_bounds[name]
|
||||
if self.design_var_types[name] == 'integer':
|
||||
params[name] = float(np.random.randint(int(low), int(high) + 1))
|
||||
else:
|
||||
params[name] = np.random.uniform(low, high)
|
||||
return params
|
||||
|
||||
def save(self, path: Path):
|
||||
"""Save model to file."""
|
||||
path = Path(path)
|
||||
torch.save({
|
||||
'model_state_dict': self.model.state_dict(),
|
||||
'normalization': {
|
||||
'design_mean': self.normalization['design_mean'].tolist(),
|
||||
'design_std': self.normalization['design_std'].tolist(),
|
||||
'objective_mean': self.normalization['objective_mean'].tolist(),
|
||||
'objective_std': self.normalization['objective_std'].tolist()
|
||||
},
|
||||
'design_var_names': self.design_var_names,
|
||||
'objective_names': self.objective_names,
|
||||
'n_inputs': self.n_inputs,
|
||||
'n_outputs': self.n_outputs,
|
||||
'hidden_dims': self._get_hidden_dims()
|
||||
}, path)
|
||||
print(f"Model saved to {path}")
|
||||
|
||||
def load(self, path: Path):
|
||||
"""Load model from file."""
|
||||
path = Path(path)
|
||||
checkpoint = torch.load(path, map_location=self.device)
|
||||
|
||||
hidden_dims = checkpoint.get('hidden_dims', self._get_hidden_dims())
|
||||
self.model = MLPSurrogate(
|
||||
n_inputs=checkpoint['n_inputs'],
|
||||
n_outputs=checkpoint['n_outputs'],
|
||||
hidden_dims=hidden_dims
|
||||
).to(self.device)
|
||||
self.model.load_state_dict(checkpoint['model_state_dict'])
|
||||
self.model.eval()
|
||||
|
||||
norm = checkpoint['normalization']
|
||||
self.normalization = {
|
||||
'design_mean': np.array(norm['design_mean']),
|
||||
'design_std': np.array(norm['design_std']),
|
||||
'objective_mean': np.array(norm['objective_mean']),
|
||||
'objective_std': np.array(norm['objective_std'])
|
||||
}
|
||||
|
||||
self.design_var_names = checkpoint.get('design_var_names', self.design_var_names)
|
||||
self.objective_names = checkpoint.get('objective_names', self.objective_names)
|
||||
print(f"Model loaded from {path}")
|
||||
|
||||
|
||||
class ConfigDrivenSurrogate:
|
||||
"""
|
||||
Fully config-driven neural surrogate system.
|
||||
|
||||
Provides complete --train, --turbo, --all workflow based on optimization_config.json.
|
||||
Handles FEA validation, surrogate retraining, and result reporting automatically.
|
||||
"""
|
||||
|
||||
def __init__(self, script_path: str, config_path: Optional[str] = None,
|
||||
element_type: str = 'auto'):
|
||||
"""
|
||||
Initialize config-driven surrogate.
|
||||
|
||||
Args:
|
||||
script_path: Path to study's run_nn_optimization.py (__file__)
|
||||
config_path: Optional explicit path to config
|
||||
element_type: Element type for stress extraction ('auto' detects from DAT file)
|
||||
"""
|
||||
self.study_dir = Path(script_path).parent
|
||||
self.config_path = Path(config_path) if config_path else self._find_config()
|
||||
self.model_dir = self.study_dir / "1_setup" / "model"
|
||||
self.results_dir = self.study_dir / "2_results"
|
||||
|
||||
# Load config
|
||||
with open(self.config_path, 'r') as f:
|
||||
self.raw_config = json.load(f)
|
||||
|
||||
# Normalize config (reuse from base_runner)
|
||||
self.config = self._normalize_config(self.raw_config)
|
||||
|
||||
self.study_name = self.config['study_name']
|
||||
self.element_type = element_type
|
||||
|
||||
self.surrogate = None
|
||||
self.logger = None
|
||||
self.nx_solver = None
|
||||
|
||||
def _find_config(self) -> Path:
|
||||
"""Find the optimization config file."""
|
||||
candidates = [
|
||||
self.study_dir / "optimization_config.json",
|
||||
self.study_dir / "1_setup" / "optimization_config.json",
|
||||
]
|
||||
for path in candidates:
|
||||
if path.exists():
|
||||
return path
|
||||
raise FileNotFoundError(f"No optimization_config.json found in {self.study_dir}")
|
||||
|
||||
def _normalize_config(self, config: Dict) -> Dict:
|
||||
"""Normalize config format variations."""
|
||||
# This mirrors ConfigNormalizer from base_runner.py
|
||||
normalized = {
|
||||
'study_name': config.get('study_name', 'unnamed_study'),
|
||||
'description': config.get('description', ''),
|
||||
'design_variables': [],
|
||||
'objectives': [],
|
||||
'constraints': [],
|
||||
'simulation': {},
|
||||
'neural_acceleration': config.get('neural_acceleration', {}),
|
||||
}
|
||||
|
||||
# Normalize design variables
|
||||
for var in config.get('design_variables', []):
|
||||
normalized['design_variables'].append({
|
||||
'name': var.get('parameter') or var.get('name'),
|
||||
'type': var.get('type', 'continuous'),
|
||||
'min': var.get('bounds', [var.get('min', 0), var.get('max', 1)])[0] if 'bounds' in var else var.get('min', 0),
|
||||
'max': var.get('bounds', [var.get('min', 0), var.get('max', 1)])[1] if 'bounds' in var else var.get('max', 1),
|
||||
})
|
||||
|
||||
# Normalize objectives
|
||||
for obj in config.get('objectives', []):
|
||||
normalized['objectives'].append({
|
||||
'name': obj.get('name'),
|
||||
'direction': obj.get('goal') or obj.get('direction', 'minimize'),
|
||||
})
|
||||
|
||||
# Normalize simulation
|
||||
sim = config.get('simulation', {})
|
||||
normalized['simulation'] = {
|
||||
'sim_file': sim.get('sim_file', ''),
|
||||
'dat_file': sim.get('dat_file', ''),
|
||||
'solution_name': sim.get('solution_name', 'Solution 1'),
|
||||
}
|
||||
|
||||
return normalized
|
||||
|
||||
def _setup(self):
|
||||
"""Initialize solver and logger."""
|
||||
project_root = self.study_dir.parents[1]
|
||||
if str(project_root) not in sys.path:
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
from optimization_engine.nx.solver import NXSolver
|
||||
from optimization_engine.utils.logger import get_logger
|
||||
|
||||
self.results_dir.mkdir(exist_ok=True)
|
||||
self.logger = get_logger(self.study_name, study_dir=self.results_dir)
|
||||
self.nx_solver = NXSolver(nastran_version="2506")
|
||||
|
||||
def _detect_element_type(self, dat_file: Path) -> str:
|
||||
"""Auto-detect element type from DAT file."""
|
||||
if self.element_type != 'auto':
|
||||
return self.element_type
|
||||
|
||||
try:
|
||||
with open(dat_file, 'r') as f:
|
||||
content = f.read(50000)
|
||||
|
||||
if 'CTETRA' in content:
|
||||
return 'ctetra'
|
||||
elif 'CHEXA' in content:
|
||||
return 'chexa'
|
||||
elif 'CQUAD4' in content:
|
||||
return 'cquad4'
|
||||
else:
|
||||
return 'ctetra'
|
||||
except Exception:
|
||||
return 'ctetra'
|
||||
|
||||
def train(self, epochs: int = 300) -> GenericSurrogate:
|
||||
"""Train surrogate model from FEA database."""
|
||||
print(f"\n{'='*60}")
|
||||
print("PHASE: Train Surrogate Model")
|
||||
print(f"{'='*60}")
|
||||
|
||||
self.surrogate = GenericSurrogate(self.config, device='auto')
|
||||
self.surrogate.train_from_database(
|
||||
db_path=self.results_dir / "study.db",
|
||||
study_name=self.study_name,
|
||||
epochs=epochs,
|
||||
save_path=self.results_dir / "surrogate_best.pt"
|
||||
)
|
||||
|
||||
return self.surrogate
|
||||
|
||||
def turbo(self, total_nn_trials: int = 5000, batch_size: int = 100,
|
||||
retrain_every: int = 10, epochs: int = 150):
|
||||
"""
|
||||
Run TURBO mode: NN exploration + FEA validation + surrogate retraining.
|
||||
|
||||
Args:
|
||||
total_nn_trials: Total NN trials to run
|
||||
batch_size: NN trials per batch before FEA validation
|
||||
retrain_every: Retrain surrogate every N FEA validations
|
||||
epochs: Training epochs for surrogate
|
||||
"""
|
||||
from optimization_engine.extractors.bdf_mass_extractor import extract_mass_from_bdf
|
||||
from optimization_engine.extractors.extract_displacement import extract_displacement
|
||||
from optimization_engine.extractors.extract_von_mises_stress import extract_solid_stress
|
||||
|
||||
print(f"\n{'#'*60}")
|
||||
print(f"# TURBO MODE: {self.study_name}")
|
||||
print(f"{'#'*60}")
|
||||
print(f"Design variables: {len(self.config['design_variables'])}")
|
||||
print(f"Objectives: {len(self.config['objectives'])}")
|
||||
print(f"Total NN budget: {total_nn_trials:,} trials")
|
||||
print(f"NN batch size: {batch_size}")
|
||||
print(f"Expected FEA validations: ~{total_nn_trials // batch_size}")
|
||||
|
||||
# Initial training
|
||||
print(f"\n[INIT] Training initial surrogate...")
|
||||
self.train(epochs=epochs)
|
||||
|
||||
sim_file = self.model_dir / self.config['simulation']['sim_file']
|
||||
dat_file = self.model_dir / self.config['simulation']['dat_file']
|
||||
element_type = self._detect_element_type(dat_file)
|
||||
|
||||
fea_count = 0
|
||||
nn_count = 0
|
||||
best_solutions = []
|
||||
iteration = 0
|
||||
start_time = time.time()
|
||||
|
||||
# Get objective info
|
||||
obj_names = [o['name'] for o in self.config['objectives']]
|
||||
obj_directions = [o['direction'] for o in self.config['objectives']]
|
||||
|
||||
while nn_count < total_nn_trials:
|
||||
iteration += 1
|
||||
batch_trials = min(batch_size, total_nn_trials - nn_count)
|
||||
|
||||
print(f"\n{'─'*50}")
|
||||
print(f"Iteration {iteration}: NN trials {nn_count+1}-{nn_count+batch_trials}")
|
||||
|
||||
# Find best candidate via NN
|
||||
best_candidate = None
|
||||
best_score = float('inf')
|
||||
|
||||
for _ in range(batch_trials):
|
||||
params = self.surrogate.sample_random_design()
|
||||
pred = self.surrogate.predict(params)
|
||||
|
||||
# Compute score (simple weighted sum - lower is better)
|
||||
score = sum(pred[name] if obj_directions[i] == 'minimize' else -pred[name]
|
||||
for i, name in enumerate(obj_names))
|
||||
|
||||
if score < best_score:
|
||||
best_score = score
|
||||
best_candidate = {'params': params, 'nn_pred': pred}
|
||||
|
||||
nn_count += batch_trials
|
||||
|
||||
params = best_candidate['params']
|
||||
nn_pred = best_candidate['nn_pred']
|
||||
|
||||
# Log NN prediction
|
||||
var_str = ", ".join(f"{k}={v:.2f}" for k, v in list(params.items())[:3])
|
||||
print(f" Best NN: {var_str}...")
|
||||
pred_str = ", ".join(f"{k}={v:.2f}" for k, v in nn_pred.items())
|
||||
print(f" NN pred: {pred_str}")
|
||||
|
||||
# Run FEA validation
|
||||
result = self.nx_solver.run_simulation(
|
||||
sim_file=sim_file,
|
||||
working_dir=self.model_dir,
|
||||
expression_updates=params,
|
||||
solution_name=self.config['simulation'].get('solution_name'),
|
||||
cleanup=True
|
||||
)
|
||||
|
||||
if not result['success']:
|
||||
print(f" FEA FAILED - skipping")
|
||||
continue
|
||||
|
||||
# Extract FEA results
|
||||
op2_file = result['op2_file']
|
||||
fea_results = self._extract_fea_results(op2_file, dat_file, element_type,
|
||||
extract_mass_from_bdf, extract_displacement,
|
||||
extract_solid_stress)
|
||||
|
||||
fea_str = ", ".join(f"{k}={v:.2f}" for k, v in fea_results.items())
|
||||
print(f" FEA: {fea_str}")
|
||||
|
||||
# Compute errors
|
||||
errors = {}
|
||||
for name in obj_names:
|
||||
if name in fea_results and name in nn_pred and fea_results[name] != 0:
|
||||
errors[name] = abs(fea_results[name] - nn_pred[name]) / abs(fea_results[name]) * 100
|
||||
|
||||
if errors:
|
||||
err_str = ", ".join(f"{k}={v:.1f}%" for k, v in errors.items())
|
||||
print(f" Error: {err_str}")
|
||||
|
||||
fea_count += 1
|
||||
|
||||
# Add to main study database
|
||||
self._add_to_study(params, fea_results, iteration)
|
||||
|
||||
best_solutions.append({
|
||||
'iteration': iteration,
|
||||
'params': {k: float(v) for k, v in params.items()},
|
||||
'fea': [fea_results.get(name, 0) for name in obj_names],
|
||||
'nn_error': [errors.get(name, 0) for name in obj_names[:2]] # First 2 errors
|
||||
})
|
||||
|
||||
# Retrain periodically
|
||||
if fea_count % retrain_every == 0:
|
||||
print(f"\n [RETRAIN] Retraining surrogate...")
|
||||
self.train(epochs=epochs)
|
||||
|
||||
# Progress
|
||||
elapsed = time.time() - start_time
|
||||
rate = nn_count / elapsed if elapsed > 0 else 0
|
||||
remaining = (total_nn_trials - nn_count) / rate if rate > 0 else 0
|
||||
print(f" Progress: {nn_count:,}/{total_nn_trials:,} NN | {fea_count} FEA | {elapsed/60:.1f}min | ~{remaining/60:.1f}min left")
|
||||
|
||||
# Final summary
|
||||
print(f"\n{'#'*60}")
|
||||
print("# TURBO MODE COMPLETE")
|
||||
print(f"{'#'*60}")
|
||||
print(f"NN trials: {nn_count:,}")
|
||||
print(f"FEA validations: {fea_count}")
|
||||
print(f"Time: {(time.time() - start_time)/60:.1f} minutes")
|
||||
|
||||
# Save report
|
||||
turbo_report = {
|
||||
'mode': 'turbo',
|
||||
'total_nn_trials': nn_count,
|
||||
'fea_validations': fea_count,
|
||||
'time_minutes': (time.time() - start_time) / 60,
|
||||
'best_solutions': best_solutions[-20:]
|
||||
}
|
||||
|
||||
report_path = self.results_dir / "turbo_report.json"
|
||||
with open(report_path, 'w') as f:
|
||||
json.dump(turbo_report, f, indent=2)
|
||||
|
||||
print(f"\nReport saved to {report_path}")
|
||||
|
||||
def _extract_fea_results(self, op2_file: Path, dat_file: Path, element_type: str,
|
||||
extract_mass_from_bdf, extract_displacement, extract_solid_stress) -> Dict[str, float]:
|
||||
"""Extract FEA results for all objectives."""
|
||||
results = {}
|
||||
|
||||
for obj in self.config['objectives']:
|
||||
name = obj['name'].lower()
|
||||
|
||||
try:
|
||||
if 'mass' in name:
|
||||
results[obj['name']] = extract_mass_from_bdf(str(dat_file))
|
||||
|
||||
elif 'stress' in name:
|
||||
stress_result = extract_solid_stress(op2_file, subcase=1, element_type=element_type)
|
||||
results[obj['name']] = stress_result.get('max_von_mises', float('inf')) / 1000.0
|
||||
|
||||
elif 'displacement' in name:
|
||||
disp_result = extract_displacement(op2_file, subcase=1)
|
||||
results[obj['name']] = disp_result['max_displacement']
|
||||
|
||||
elif 'stiffness' in name:
|
||||
disp_result = extract_displacement(op2_file, subcase=1)
|
||||
max_disp = disp_result['max_displacement']
|
||||
# Negative for minimization in multi-objective
|
||||
results[obj['name']] = -1000.0 / max(abs(max_disp), 1e-6)
|
||||
results['displacement'] = max_disp
|
||||
|
||||
except Exception as e:
|
||||
print(f" Warning: Failed to extract {name}: {e}")
|
||||
results[obj['name']] = float('inf')
|
||||
|
||||
return results
|
||||
|
||||
def _add_to_study(self, params: Dict, fea_results: Dict, iteration: int):
|
||||
"""Add FEA result to main Optuna study."""
|
||||
try:
|
||||
storage = f"sqlite:///{self.results_dir / 'study.db'}"
|
||||
study = optuna.load_study(
|
||||
study_name=self.study_name,
|
||||
storage=storage,
|
||||
sampler=NSGAIISampler(population_size=20, seed=42)
|
||||
)
|
||||
|
||||
trial = study.ask()
|
||||
|
||||
for var in self.config['design_variables']:
|
||||
name = var['name']
|
||||
value = params[name]
|
||||
if var['type'] == 'integer':
|
||||
trial.suggest_int(name, int(value), int(value))
|
||||
else:
|
||||
trial.suggest_float(name, value, value)
|
||||
|
||||
# Get objective values in order
|
||||
obj_values = [fea_results.get(o['name'], float('inf')) for o in self.config['objectives']]
|
||||
study.tell(trial, obj_values)
|
||||
|
||||
trial.set_user_attr('source', 'turbo_mode')
|
||||
trial.set_user_attr('iteration', iteration)
|
||||
|
||||
except Exception as e:
|
||||
print(f" Warning: couldn't add to study: {e}")
|
||||
|
||||
def run(self, args=None):
|
||||
"""
|
||||
Main entry point with argument parsing.
|
||||
|
||||
Handles --train, --turbo, --all flags.
|
||||
"""
|
||||
if args is None:
|
||||
args = self.parse_args()
|
||||
|
||||
self._setup()
|
||||
|
||||
print(f"\n{'#'*60}")
|
||||
print(f"# {self.study_name} - Hybrid NN Optimization")
|
||||
print(f"{'#'*60}")
|
||||
|
||||
if args.all or args.train:
|
||||
self.train(epochs=args.epochs)
|
||||
|
||||
if args.all or args.turbo:
|
||||
self.turbo(
|
||||
total_nn_trials=args.nn_trials,
|
||||
batch_size=args.batch_size,
|
||||
retrain_every=args.retrain_every,
|
||||
epochs=args.epochs
|
||||
)
|
||||
|
||||
print(f"\n{'#'*60}")
|
||||
print("# Workflow Complete!")
|
||||
print(f"{'#'*60}\n")
|
||||
|
||||
return 0
|
||||
|
||||
def parse_args(self) -> argparse.Namespace:
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(description=f'{self.study_name} - Hybrid NN Optimization')
|
||||
|
||||
parser.add_argument('--train', action='store_true', help='Train surrogate only')
|
||||
parser.add_argument('--turbo', action='store_true', help='TURBO mode (recommended)')
|
||||
parser.add_argument('--all', action='store_true', help='Train then run turbo')
|
||||
|
||||
nn_config = self.config.get('neural_acceleration', {})
|
||||
parser.add_argument('--epochs', type=int, default=nn_config.get('epochs', 200), help='Training epochs')
|
||||
parser.add_argument('--nn-trials', type=int, default=nn_config.get('nn_trials', 5000), help='Total NN trials')
|
||||
parser.add_argument('--batch-size', type=int, default=100, help='NN batch size')
|
||||
parser.add_argument('--retrain-every', type=int, default=10, help='Retrain every N FEA')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not any([args.train, args.turbo, args.all]):
|
||||
print("No phase specified. Use --train, --turbo, or --all")
|
||||
print("\nRecommended workflow:")
|
||||
print(f" python run_nn_optimization.py --turbo --nn-trials {nn_config.get('nn_trials', 5000)}")
|
||||
sys.exit(1)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def create_surrogate(script_path: str, element_type: str = 'auto') -> ConfigDrivenSurrogate:
|
||||
"""
|
||||
Factory function to create a ConfigDrivenSurrogate.
|
||||
|
||||
Args:
|
||||
script_path: Path to study's run_nn_optimization.py (__file__)
|
||||
element_type: Element type for stress extraction
|
||||
|
||||
Returns:
|
||||
Configured surrogate ready to run
|
||||
"""
|
||||
return ConfigDrivenSurrogate(script_path, element_type=element_type)
|
||||
993
optimization_engine/processors/surrogates/neural_surrogate.py
Normal file
993
optimization_engine/processors/surrogates/neural_surrogate.py
Normal file
@@ -0,0 +1,993 @@
|
||||
"""
|
||||
Neural network surrogate integration for Atomizer.
|
||||
|
||||
This module provides the integration layer between Atomizer optimization framework
|
||||
and AtomizerField neural network models for fast FEA predictions.
|
||||
|
||||
Key Features:
|
||||
- Load and manage AtomizerField trained models
|
||||
- Convert design variables to neural field format
|
||||
- Provide millisecond FEA predictions
|
||||
- Automatic fallback to FEA when confidence is low
|
||||
- Performance tracking and statistics
|
||||
|
||||
Usage:
|
||||
from optimization_engine.processors.surrogates.neural_surrogate import NeuralSurrogate, create_surrogate_for_study
|
||||
|
||||
# Create surrogate for UAV arm study
|
||||
surrogate = create_surrogate_for_study(
|
||||
model_path="atomizer-field/runs/uav_arm_model/checkpoint_best.pt",
|
||||
training_data_dir="atomizer_field_training_data/uav_arm_train"
|
||||
)
|
||||
|
||||
# Predict for new design
|
||||
results = surrogate.predict(design_params)
|
||||
print(f"Max displacement: {results['max_displacement']:.6f} mm")
|
||||
"""
|
||||
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
import h5py
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, Tuple, List
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Add atomizer-field to path for imports
|
||||
_atomizer_field_path = Path(__file__).parent.parent / 'atomizer-field'
|
||||
if str(_atomizer_field_path) not in sys.path:
|
||||
sys.path.insert(0, str(_atomizer_field_path))
|
||||
|
||||
try:
|
||||
import torch
|
||||
from torch_geometric.data import Data
|
||||
TORCH_AVAILABLE = True
|
||||
except ImportError:
|
||||
TORCH_AVAILABLE = False
|
||||
logger.warning("PyTorch not installed. Neural surrogate features will be limited.")
|
||||
|
||||
# Import AtomizerField model
|
||||
ATOMIZER_FIELD_AVAILABLE = False
|
||||
PARAMETRIC_MODEL_AVAILABLE = False
|
||||
if TORCH_AVAILABLE:
|
||||
try:
|
||||
from neural_models.field_predictor import AtomizerFieldModel, create_model
|
||||
ATOMIZER_FIELD_AVAILABLE = True
|
||||
except ImportError as e:
|
||||
logger.warning(f"AtomizerField modules not found: {e}")
|
||||
|
||||
try:
|
||||
from neural_models.parametric_predictor import ParametricFieldPredictor, create_parametric_model
|
||||
PARAMETRIC_MODEL_AVAILABLE = True
|
||||
except ImportError as e:
|
||||
logger.warning(f"Parametric predictor modules not found: {e}")
|
||||
|
||||
|
||||
class NeuralSurrogate:
|
||||
"""
|
||||
Neural surrogate for fast FEA predictions using trained AtomizerField model.
|
||||
|
||||
This class loads a trained AtomizerField model and provides fast predictions
|
||||
of displacement fields, which can then be used to compute derived quantities
|
||||
like max displacement, estimated stress, etc.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: Path,
|
||||
training_data_dir: Path,
|
||||
device: str = 'auto'
|
||||
):
|
||||
"""
|
||||
Initialize neural surrogate.
|
||||
|
||||
Args:
|
||||
model_path: Path to trained model checkpoint (.pt file)
|
||||
training_data_dir: Path to training data (for normalization stats and mesh)
|
||||
device: Computing device ('cuda', 'cpu', or 'auto')
|
||||
"""
|
||||
if not TORCH_AVAILABLE:
|
||||
raise ImportError("PyTorch required. Install: pip install torch torch-geometric")
|
||||
|
||||
if not ATOMIZER_FIELD_AVAILABLE:
|
||||
raise ImportError("AtomizerField modules not found")
|
||||
|
||||
self.model_path = Path(model_path)
|
||||
self.training_data_dir = Path(training_data_dir)
|
||||
|
||||
# Set device
|
||||
if device == 'auto':
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
else:
|
||||
self.device = torch.device(device)
|
||||
|
||||
logger.info(f"Neural Surrogate initializing on {self.device}")
|
||||
|
||||
# Load model
|
||||
self._load_model()
|
||||
|
||||
# Load normalization statistics
|
||||
self._load_normalization_stats()
|
||||
|
||||
# Load reference mesh structure
|
||||
self._load_reference_mesh()
|
||||
|
||||
# Performance tracking
|
||||
self.stats = {
|
||||
'predictions': 0,
|
||||
'total_time_ms': 0.0,
|
||||
'fea_validations': 0
|
||||
}
|
||||
|
||||
logger.info(f"Neural Surrogate ready: {self.num_nodes} nodes, model loaded")
|
||||
|
||||
def _load_model(self):
|
||||
"""Load trained AtomizerField model."""
|
||||
logger.info(f"Loading model from {self.model_path}")
|
||||
|
||||
checkpoint = torch.load(self.model_path, map_location=self.device)
|
||||
|
||||
# Create model with saved config
|
||||
model_config = checkpoint['config']['model']
|
||||
self.model = AtomizerFieldModel(**model_config)
|
||||
self.model.load_state_dict(checkpoint['model_state_dict'])
|
||||
self.model = self.model.to(self.device)
|
||||
self.model.eval()
|
||||
|
||||
self.model_config = checkpoint['config']
|
||||
self.best_val_loss = checkpoint.get('best_val_loss', None)
|
||||
|
||||
n_params = sum(p.numel() for p in self.model.parameters())
|
||||
logger.info(f"Model loaded: {n_params:,} parameters, val_loss={self.best_val_loss:.4f}")
|
||||
|
||||
def _load_normalization_stats(self):
|
||||
"""Load normalization statistics from training data."""
|
||||
case_dirs = sorted(self.training_data_dir.glob("trial_*"))
|
||||
|
||||
if not case_dirs:
|
||||
logger.warning("No training cases found - using identity normalization")
|
||||
self.coord_mean = np.zeros(3)
|
||||
self.coord_std = np.ones(3)
|
||||
self.disp_mean = np.zeros(6)
|
||||
self.disp_std = np.ones(6)
|
||||
return
|
||||
|
||||
# Compute stats from all training data
|
||||
all_coords = []
|
||||
all_disp = []
|
||||
|
||||
for case_dir in case_dirs:
|
||||
h5_file = case_dir / "neural_field_data.h5"
|
||||
if h5_file.exists():
|
||||
with h5py.File(h5_file, 'r') as f:
|
||||
all_coords.append(f['mesh/node_coordinates'][:])
|
||||
all_disp.append(f['results/displacement'][:])
|
||||
|
||||
if all_coords:
|
||||
all_coords = np.concatenate(all_coords, axis=0)
|
||||
all_disp = np.concatenate(all_disp, axis=0)
|
||||
|
||||
self.coord_mean = all_coords.mean(axis=0)
|
||||
self.coord_std = all_coords.std(axis=0) + 1e-8
|
||||
self.disp_mean = all_disp.mean(axis=0)
|
||||
self.disp_std = all_disp.std(axis=0) + 1e-8
|
||||
|
||||
logger.info(f"Normalization stats from {len(case_dirs)} cases")
|
||||
|
||||
def _load_reference_mesh(self):
|
||||
"""Load reference mesh structure for building graphs."""
|
||||
case_dirs = sorted(self.training_data_dir.glob("trial_*"))
|
||||
|
||||
if not case_dirs:
|
||||
raise ValueError(f"No training cases in {self.training_data_dir}")
|
||||
|
||||
first_case = case_dirs[0]
|
||||
json_file = first_case / "neural_field_data.json"
|
||||
h5_file = first_case / "neural_field_data.h5"
|
||||
|
||||
# Load metadata
|
||||
with open(json_file, 'r') as f:
|
||||
self.reference_metadata = json.load(f)
|
||||
|
||||
# Load mesh
|
||||
with h5py.File(h5_file, 'r') as f:
|
||||
self.reference_coords = f['mesh/node_coordinates'][:]
|
||||
self.num_nodes = self.reference_coords.shape[0]
|
||||
|
||||
# Build edge index (constant for parametric optimization)
|
||||
self._build_graph_structure()
|
||||
|
||||
def _build_graph_structure(self):
|
||||
"""Build graph edge index and attributes from mesh."""
|
||||
metadata = self.reference_metadata
|
||||
num_nodes = self.num_nodes
|
||||
edge_list = []
|
||||
|
||||
# Get material properties
|
||||
mat_props = [0.0] * 5
|
||||
if 'materials' in metadata:
|
||||
for mat in metadata['materials']:
|
||||
if mat['type'] == 'MAT1':
|
||||
mat_props = [
|
||||
mat.get('E', 0.0) / 1e6,
|
||||
mat.get('nu', 0.0),
|
||||
mat.get('rho', 0.0) * 1e6,
|
||||
mat.get('G', 0.0) / 1e6 if mat.get('G') else 0.0,
|
||||
mat.get('alpha', 0.0) * 1e6 if mat.get('alpha') else 0.0
|
||||
]
|
||||
break
|
||||
|
||||
# Process elements to create edges
|
||||
if 'mesh' in metadata and 'elements' in metadata['mesh']:
|
||||
for elem_type in ['solid', 'shell', 'beam']:
|
||||
if elem_type in metadata['mesh']['elements']:
|
||||
for elem in metadata['mesh']['elements'][elem_type]:
|
||||
elem_nodes = elem['nodes']
|
||||
for i in range(len(elem_nodes)):
|
||||
for j in range(i + 1, len(elem_nodes)):
|
||||
node_i = elem_nodes[i] - 1
|
||||
node_j = elem_nodes[j] - 1
|
||||
if node_i < num_nodes and node_j < num_nodes:
|
||||
edge_list.append([node_i, node_j])
|
||||
edge_list.append([node_j, node_i])
|
||||
|
||||
if edge_list:
|
||||
self.edge_index = torch.tensor(edge_list, dtype=torch.long).t().to(self.device)
|
||||
num_edges = self.edge_index.shape[1]
|
||||
self.edge_attr = torch.tensor([mat_props] * num_edges, dtype=torch.float).to(self.device)
|
||||
else:
|
||||
self.edge_index = torch.zeros((2, 0), dtype=torch.long).to(self.device)
|
||||
self.edge_attr = torch.zeros((0, 5), dtype=torch.float).to(self.device)
|
||||
|
||||
# Build BC mask and load features (constant for this study)
|
||||
self._build_bc_and_loads()
|
||||
|
||||
def _build_bc_and_loads(self):
|
||||
"""Build boundary condition mask and load features."""
|
||||
metadata = self.reference_metadata
|
||||
num_nodes = self.num_nodes
|
||||
|
||||
# BC mask
|
||||
self.bc_mask = torch.zeros(num_nodes, 6)
|
||||
if 'boundary_conditions' in metadata and 'spc' in metadata['boundary_conditions']:
|
||||
for spc in metadata['boundary_conditions']['spc']:
|
||||
node_id = spc['node']
|
||||
if node_id <= num_nodes:
|
||||
dofs = spc['dofs']
|
||||
for dof_char in str(dofs):
|
||||
if dof_char.isdigit():
|
||||
dof_idx = int(dof_char) - 1
|
||||
if 0 <= dof_idx < 6:
|
||||
self.bc_mask[node_id - 1, dof_idx] = 1.0
|
||||
|
||||
# Load features
|
||||
self.load_features = torch.zeros(num_nodes, 3)
|
||||
if 'loads' in metadata and 'point_forces' in metadata['loads']:
|
||||
for force in metadata['loads']['point_forces']:
|
||||
node_id = force['node']
|
||||
if node_id <= num_nodes:
|
||||
magnitude = force['magnitude']
|
||||
direction = force['direction']
|
||||
force_vector = [magnitude * d for d in direction]
|
||||
self.load_features[node_id - 1] = torch.tensor(force_vector)
|
||||
|
||||
self.bc_mask = self.bc_mask.to(self.device)
|
||||
self.load_features = self.load_features.to(self.device)
|
||||
|
||||
def _build_node_features(self) -> torch.Tensor:
|
||||
"""Build node features tensor for model input."""
|
||||
# Normalized coordinates
|
||||
coords = torch.from_numpy(self.reference_coords).float()
|
||||
coords_norm = (coords - torch.from_numpy(self.coord_mean).float()) / \
|
||||
torch.from_numpy(self.coord_std).float()
|
||||
coords_norm = coords_norm.to(self.device)
|
||||
|
||||
# Concatenate: [coords(3) + bc_mask(6) + loads(3)] = 12 features
|
||||
node_features = torch.cat([coords_norm, self.bc_mask, self.load_features], dim=-1)
|
||||
|
||||
return node_features
|
||||
|
||||
def predict(
|
||||
self,
|
||||
design_params: Dict[str, float],
|
||||
return_fields: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Predict FEA results using neural network.
|
||||
|
||||
Args:
|
||||
design_params: Design parameter values (not used for prediction,
|
||||
but kept for API compatibility - mesh is constant)
|
||||
return_fields: If True, return complete displacement field
|
||||
|
||||
Returns:
|
||||
dict with:
|
||||
- max_displacement: Maximum displacement magnitude (mm)
|
||||
- max_stress: Estimated maximum stress (approximate)
|
||||
- inference_time_ms: Prediction time
|
||||
- fields: Complete displacement field (if return_fields=True)
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Build graph data
|
||||
node_features = self._build_node_features()
|
||||
|
||||
graph_data = Data(
|
||||
x=node_features,
|
||||
edge_index=self.edge_index,
|
||||
edge_attr=self.edge_attr
|
||||
)
|
||||
|
||||
# Predict
|
||||
with torch.no_grad():
|
||||
predictions = self.model(graph_data, return_stress=True)
|
||||
|
||||
# Denormalize displacement
|
||||
displacement = predictions['displacement'].cpu().numpy()
|
||||
displacement = displacement * self.disp_std + self.disp_mean
|
||||
|
||||
# Compute max values
|
||||
disp_magnitude = np.linalg.norm(displacement[:, :3], axis=1)
|
||||
max_displacement = float(np.max(disp_magnitude))
|
||||
|
||||
# Stress (approximate - model trained on displacement only)
|
||||
max_stress = float(torch.max(predictions['von_mises']).item())
|
||||
|
||||
inference_time = (time.time() - start_time) * 1000
|
||||
|
||||
results = {
|
||||
'max_displacement': max_displacement,
|
||||
'max_stress': max_stress,
|
||||
'inference_time_ms': inference_time
|
||||
}
|
||||
|
||||
if return_fields:
|
||||
results['displacement_field'] = displacement
|
||||
results['von_mises_field'] = predictions['von_mises'].cpu().numpy()
|
||||
|
||||
# Update stats
|
||||
self.stats['predictions'] += 1
|
||||
self.stats['total_time_ms'] += inference_time
|
||||
|
||||
return results
|
||||
|
||||
def get_statistics(self) -> Dict[str, Any]:
|
||||
"""Get prediction statistics."""
|
||||
avg_time = self.stats['total_time_ms'] / self.stats['predictions'] \
|
||||
if self.stats['predictions'] > 0 else 0
|
||||
|
||||
return {
|
||||
'total_predictions': self.stats['predictions'],
|
||||
'total_time_ms': self.stats['total_time_ms'],
|
||||
'average_time_ms': avg_time,
|
||||
'model_path': str(self.model_path),
|
||||
'best_val_loss': self.best_val_loss,
|
||||
'device': str(self.device)
|
||||
}
|
||||
|
||||
def needs_fea_validation(self, trial_number: int) -> bool:
|
||||
"""
|
||||
Determine if FEA validation is recommended.
|
||||
|
||||
Args:
|
||||
trial_number: Current trial number
|
||||
|
||||
Returns:
|
||||
True if FEA validation is recommended
|
||||
"""
|
||||
# Validate periodically
|
||||
if trial_number < 5:
|
||||
return True # First few always validate
|
||||
if trial_number % 20 == 0:
|
||||
return True # Periodic validation
|
||||
return False
|
||||
|
||||
|
||||
class ParametricSurrogate:
|
||||
"""
|
||||
Parametric neural surrogate that predicts ALL objectives from design parameters.
|
||||
|
||||
Unlike NeuralSurrogate which only predicts displacement fields,
|
||||
ParametricSurrogate directly predicts:
|
||||
- mass
|
||||
- frequency
|
||||
- max_displacement
|
||||
- max_stress
|
||||
|
||||
This is the "future-proof" solution using design-conditioned GNN.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: Path,
|
||||
training_data_dir: Path = None,
|
||||
device: str = 'auto',
|
||||
num_nodes: int = 500
|
||||
):
|
||||
"""
|
||||
Initialize parametric surrogate.
|
||||
|
||||
Args:
|
||||
model_path: Path to trained parametric model checkpoint (.pt file)
|
||||
training_data_dir: Path to training data (optional - for mesh loading)
|
||||
device: Computing device ('cuda', 'cpu', or 'auto')
|
||||
num_nodes: Number of nodes for synthetic reference graph (default: 500)
|
||||
"""
|
||||
if not TORCH_AVAILABLE:
|
||||
raise ImportError("PyTorch required. Install: pip install torch torch-geometric")
|
||||
|
||||
if not PARAMETRIC_MODEL_AVAILABLE:
|
||||
raise ImportError("Parametric predictor modules not found")
|
||||
|
||||
self.model_path = Path(model_path)
|
||||
self.training_data_dir = Path(training_data_dir) if training_data_dir else None
|
||||
self.num_nodes = num_nodes
|
||||
|
||||
# Set device
|
||||
if device == 'auto':
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
else:
|
||||
self.device = torch.device(device)
|
||||
|
||||
logger.info(f"Parametric Surrogate initializing on {self.device}")
|
||||
|
||||
# Load model and normalization
|
||||
self._load_model()
|
||||
|
||||
# Create reference graph structure (synthetic - matching training)
|
||||
self._create_reference_graph()
|
||||
|
||||
# Performance tracking
|
||||
self.stats = {
|
||||
'predictions': 0,
|
||||
'total_time_ms': 0.0
|
||||
}
|
||||
|
||||
logger.info(f"Parametric Surrogate ready: {self.num_nodes} nodes, "
|
||||
f"predicts mass/freq/disp/stress")
|
||||
|
||||
def _load_model(self):
|
||||
"""Load trained parametric model and normalization stats."""
|
||||
logger.info(f"Loading parametric model from {self.model_path}")
|
||||
|
||||
checkpoint = torch.load(self.model_path, map_location=self.device)
|
||||
|
||||
# Create model with saved config
|
||||
model_config = checkpoint['config']
|
||||
self.model = create_parametric_model(model_config)
|
||||
self.model.load_state_dict(checkpoint['model_state_dict'])
|
||||
self.model = self.model.to(self.device)
|
||||
self.model.eval()
|
||||
|
||||
self.model_config = model_config
|
||||
self.best_val_loss = checkpoint.get('best_val_loss', None)
|
||||
|
||||
# Load normalization stats
|
||||
norm = checkpoint.get('normalization', {})
|
||||
self.design_var_names = checkpoint.get('design_var_names', [])
|
||||
self.n_design_vars = len(self.design_var_names)
|
||||
|
||||
self.design_mean = torch.tensor(norm.get('design_mean', [0.0] * self.n_design_vars),
|
||||
dtype=torch.float32, device=self.device)
|
||||
self.design_std = torch.tensor(norm.get('design_std', [1.0] * self.n_design_vars),
|
||||
dtype=torch.float32, device=self.device)
|
||||
|
||||
self.coord_mean = np.array(norm.get('coord_mean', [0.0, 0.0, 0.0]))
|
||||
self.coord_std = np.array(norm.get('coord_std', [1.0, 1.0, 1.0]))
|
||||
self.disp_mean = np.array(norm.get('disp_mean', [0.0] * 6))
|
||||
self.disp_std = np.array(norm.get('disp_std', [1.0] * 6))
|
||||
|
||||
# Scalar normalization stats (for denormalization)
|
||||
self.mass_mean = norm.get('mass_mean', 3500.0)
|
||||
self.mass_std = norm.get('mass_std', 700.0)
|
||||
self.freq_mean = norm.get('freq_mean', 18.0)
|
||||
self.freq_std = norm.get('freq_std', 2.0)
|
||||
self.max_disp_mean = norm.get('max_disp_mean', 0.025)
|
||||
self.max_disp_std = norm.get('max_disp_std', 0.005)
|
||||
self.max_stress_mean = norm.get('max_stress_mean', 200.0)
|
||||
self.max_stress_std = norm.get('max_stress_std', 50.0)
|
||||
|
||||
n_params = sum(p.numel() for p in self.model.parameters())
|
||||
logger.info(f"Parametric model loaded: {n_params:,} params, "
|
||||
f"val_loss={self.best_val_loss:.4f}")
|
||||
logger.info(f"Design vars: {self.design_var_names}")
|
||||
|
||||
def _create_reference_graph(self):
|
||||
"""
|
||||
Create a synthetic reference graph structure for the GNN.
|
||||
|
||||
This matches the create_reference_graph() function used during training
|
||||
in train_parametric.py. The model was trained on synthetic graphs,
|
||||
so we need to use the same structure for inference.
|
||||
"""
|
||||
num_nodes = self.num_nodes
|
||||
|
||||
# Create simple node features (random, like training)
|
||||
# [coords(3) + bc_mask(6) + loads(3)] = 12 features
|
||||
x = torch.randn(num_nodes, 12, device=self.device)
|
||||
|
||||
# Create grid-like connectivity (same as training)
|
||||
edges = []
|
||||
grid_size = int(np.ceil(np.sqrt(num_nodes)))
|
||||
|
||||
for i in range(num_nodes):
|
||||
row = i // grid_size
|
||||
col = i % grid_size
|
||||
|
||||
# Right neighbor (same row)
|
||||
right = i + 1
|
||||
if col < grid_size - 1 and right < num_nodes:
|
||||
edges.append([i, right])
|
||||
edges.append([right, i])
|
||||
|
||||
# Bottom neighbor (next row)
|
||||
bottom = i + grid_size
|
||||
if bottom < num_nodes:
|
||||
edges.append([i, bottom])
|
||||
edges.append([bottom, i])
|
||||
|
||||
# Ensure we have at least some edges
|
||||
if len(edges) == 0:
|
||||
# Fallback: partially connected for very small graphs
|
||||
for i in range(num_nodes):
|
||||
for j in range(i + 1, min(i + 5, num_nodes)):
|
||||
edges.append([i, j])
|
||||
edges.append([j, i])
|
||||
|
||||
edge_index = torch.tensor(edges, dtype=torch.long, device=self.device).t().contiguous()
|
||||
edge_attr = torch.randn(edge_index.shape[1], 5, device=self.device)
|
||||
|
||||
# Create reference graph data object
|
||||
self.reference_graph = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
|
||||
|
||||
logger.info(f"Created reference graph: {num_nodes} nodes, {edge_index.shape[1]} edges")
|
||||
|
||||
def predict(
|
||||
self,
|
||||
design_params: Dict[str, float],
|
||||
return_fields: bool = False
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Predict all FEA objectives using parametric neural network.
|
||||
|
||||
Args:
|
||||
design_params: Design parameter values (e.g. support_angle, tip_thickness, etc.)
|
||||
return_fields: If True, return complete displacement field (not supported for synthetic graphs)
|
||||
|
||||
Returns:
|
||||
dict with:
|
||||
- mass: Predicted mass (g)
|
||||
- frequency: Predicted fundamental frequency (Hz)
|
||||
- max_displacement: Maximum displacement magnitude (mm)
|
||||
- max_stress: Maximum von Mises stress (MPa)
|
||||
- inference_time_ms: Prediction time
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Build design parameter tensor
|
||||
param_values = [design_params.get(name, 0.0) for name in self.design_var_names]
|
||||
design_tensor = torch.tensor(param_values, dtype=torch.float32, device=self.device)
|
||||
|
||||
# Normalize design params
|
||||
design_tensor_norm = (design_tensor - self.design_mean) / self.design_std
|
||||
|
||||
# Add batch dimension for design params
|
||||
design_batch = design_tensor_norm.unsqueeze(0) # [1, n_design_vars]
|
||||
|
||||
# Predict using reference graph
|
||||
with torch.no_grad():
|
||||
predictions = self.model(self.reference_graph, design_batch)
|
||||
|
||||
# Extract scalar predictions and denormalize
|
||||
# Model outputs normalized values, so we need to convert back to original scale
|
||||
mass_norm = predictions['mass'].item()
|
||||
freq_norm = predictions['frequency'].item()
|
||||
disp_norm = predictions['max_displacement'].item()
|
||||
stress_norm = predictions['max_stress'].item()
|
||||
|
||||
# Denormalize to original scale
|
||||
mass = mass_norm * self.mass_std + self.mass_mean
|
||||
frequency = freq_norm * self.freq_std + self.freq_mean
|
||||
max_displacement = disp_norm * self.max_disp_std + self.max_disp_mean
|
||||
max_stress = stress_norm * self.max_stress_std + self.max_stress_mean
|
||||
|
||||
inference_time = (time.time() - start_time) * 1000
|
||||
|
||||
results = {
|
||||
'mass': mass,
|
||||
'frequency': frequency,
|
||||
'max_displacement': max_displacement,
|
||||
'max_stress': max_stress,
|
||||
'inference_time_ms': inference_time
|
||||
}
|
||||
|
||||
# Update stats
|
||||
self.stats['predictions'] += 1
|
||||
self.stats['total_time_ms'] += inference_time
|
||||
|
||||
return results
|
||||
|
||||
def get_statistics(self) -> Dict[str, Any]:
|
||||
"""Get prediction statistics."""
|
||||
avg_time = self.stats['total_time_ms'] / self.stats['predictions'] \
|
||||
if self.stats['predictions'] > 0 else 0
|
||||
|
||||
return {
|
||||
'total_predictions': self.stats['predictions'],
|
||||
'total_time_ms': self.stats['total_time_ms'],
|
||||
'average_time_ms': avg_time,
|
||||
'model_path': str(self.model_path),
|
||||
'best_val_loss': self.best_val_loss,
|
||||
'device': str(self.device),
|
||||
'design_var_names': self.design_var_names,
|
||||
'n_design_vars': self.n_design_vars
|
||||
}
|
||||
|
||||
|
||||
class HybridOptimizer:
|
||||
"""
|
||||
Intelligent optimizer that combines FEA and neural surrogates.
|
||||
|
||||
Phases:
|
||||
1. Exploration: Use FEA to explore design space
|
||||
2. Training: Train neural network on FEA data
|
||||
3. Exploitation: Use NN for fast optimization
|
||||
4. Validation: Periodically validate with FEA
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
"""
|
||||
Initialize hybrid optimizer.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary
|
||||
"""
|
||||
self.config = config
|
||||
self.phase = 'exploration'
|
||||
self.fea_samples = []
|
||||
self.nn_surrogate = None
|
||||
self.trial_count = 0
|
||||
|
||||
# Phase transition parameters
|
||||
self.min_fea_samples = config.get('min_fea_samples', 20)
|
||||
self.validation_frequency = config.get('validation_frequency', 10)
|
||||
self.retrain_frequency = config.get('retrain_frequency', 50)
|
||||
self.confidence_threshold = config.get('confidence_threshold', 0.95)
|
||||
|
||||
# Training data export directory
|
||||
self.training_data_dir = Path(config.get('training_data_dir', 'hybrid_training_data'))
|
||||
self.training_data_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info("Hybrid optimizer initialized")
|
||||
|
||||
def should_use_nn(self, trial_number: int) -> Tuple[bool, str]:
|
||||
"""
|
||||
Decide whether to use NN for this trial.
|
||||
|
||||
Args:
|
||||
trial_number: Current trial number
|
||||
|
||||
Returns:
|
||||
Tuple of (use_nn, reason)
|
||||
"""
|
||||
self.trial_count = trial_number
|
||||
|
||||
if self.phase == 'exploration':
|
||||
# Initial FEA exploration
|
||||
if trial_number < self.min_fea_samples:
|
||||
return False, f"Exploration phase ({trial_number}/{self.min_fea_samples})"
|
||||
else:
|
||||
# Transition to training
|
||||
self.phase = 'training'
|
||||
self._train_surrogate()
|
||||
self.phase = 'exploitation'
|
||||
return True, "Switched to neural surrogate"
|
||||
|
||||
elif self.phase == 'exploitation':
|
||||
# Check if validation needed
|
||||
if trial_number % self.validation_frequency == 0:
|
||||
return False, f"Periodic FEA validation (every {self.validation_frequency} trials)"
|
||||
|
||||
# Check if retraining needed
|
||||
if trial_number % self.retrain_frequency == 0:
|
||||
self._retrain_surrogate()
|
||||
|
||||
return True, "Using neural surrogate"
|
||||
|
||||
return False, f"Unknown phase: {self.phase}"
|
||||
|
||||
def _train_surrogate(self):
|
||||
"""Train surrogate model on accumulated FEA data."""
|
||||
logger.info(f"Training surrogate on {len(self.fea_samples)} FEA samples")
|
||||
|
||||
# In practice, this would:
|
||||
# 1. Parse all FEA data using neural_field_parser
|
||||
# 2. Train AtomizerField model
|
||||
# 3. Load trained model
|
||||
|
||||
# For now, try to load pre-trained model if available
|
||||
model_path = self.config.get('pretrained_model_path')
|
||||
if model_path and Path(model_path).exists():
|
||||
self.nn_surrogate = NeuralSurrogate(
|
||||
model_path=Path(model_path),
|
||||
confidence_threshold=self.confidence_threshold
|
||||
)
|
||||
logger.info(f"Loaded pre-trained model from {model_path}")
|
||||
else:
|
||||
logger.warning("No pre-trained model available, continuing with FEA")
|
||||
self.phase = 'exploration'
|
||||
|
||||
def _retrain_surrogate(self):
|
||||
"""Retrain surrogate with additional data."""
|
||||
logger.info(f"Retraining surrogate with {len(self.fea_samples)} total samples")
|
||||
# Trigger retraining pipeline
|
||||
# This would integrate with AtomizerField training
|
||||
|
||||
def add_fea_sample(self, design: Dict[str, float], results: Dict[str, float]):
|
||||
"""
|
||||
Add FEA result to training data.
|
||||
|
||||
Args:
|
||||
design: Design variables
|
||||
results: FEA results
|
||||
"""
|
||||
self.fea_samples.append({
|
||||
'trial': self.trial_count,
|
||||
'design': design,
|
||||
'results': results,
|
||||
'timestamp': time.time()
|
||||
})
|
||||
|
||||
def get_phase_info(self) -> Dict[str, Any]:
|
||||
"""Get current phase information."""
|
||||
return {
|
||||
'phase': self.phase,
|
||||
'trial_count': self.trial_count,
|
||||
'fea_samples': len(self.fea_samples),
|
||||
'has_surrogate': self.nn_surrogate is not None,
|
||||
'min_fea_samples': self.min_fea_samples,
|
||||
'validation_frequency': self.validation_frequency
|
||||
}
|
||||
|
||||
|
||||
def create_parametric_surrogate_for_study(
|
||||
model_path: str = None,
|
||||
training_data_dir: str = None,
|
||||
project_root: Path = None
|
||||
) -> Optional[ParametricSurrogate]:
|
||||
"""
|
||||
Factory function to create parametric neural surrogate for UAV arm study.
|
||||
|
||||
This is the recommended surrogate type - predicts all objectives (mass, freq, etc.)
|
||||
|
||||
Args:
|
||||
model_path: Path to parametric model checkpoint (auto-detect if None)
|
||||
training_data_dir: Path to training data (auto-detect if None)
|
||||
project_root: Project root directory for auto-detection
|
||||
|
||||
Returns:
|
||||
ParametricSurrogate instance or None if not available
|
||||
"""
|
||||
if not TORCH_AVAILABLE or not PARAMETRIC_MODEL_AVAILABLE:
|
||||
logger.warning("Parametric surrogate not available: PyTorch or ParametricPredictor missing")
|
||||
return None
|
||||
|
||||
# Auto-detect project root
|
||||
if project_root is None:
|
||||
project_root = Path(__file__).parent.parent
|
||||
|
||||
# Auto-detect parametric model path
|
||||
if model_path is None:
|
||||
default_model = project_root / "atomizer-field" / "runs" / "parametric_uav_arm_v2" / "checkpoint_best.pt"
|
||||
if not default_model.exists():
|
||||
# Try older path
|
||||
default_model = project_root / "atomizer-field" / "runs" / "parametric_uav_arm" / "checkpoint_best.pt"
|
||||
if default_model.exists():
|
||||
model_path = str(default_model)
|
||||
else:
|
||||
logger.warning(f"No trained parametric model found")
|
||||
return None
|
||||
else:
|
||||
model_path = str(model_path)
|
||||
|
||||
# Auto-detect training data
|
||||
if training_data_dir is None:
|
||||
default_data = project_root / "atomizer_field_training_data" / "uav_arm_train"
|
||||
if default_data.exists():
|
||||
training_data_dir = str(default_data)
|
||||
else:
|
||||
logger.warning(f"No training data found at {default_data}")
|
||||
return None
|
||||
else:
|
||||
training_data_dir = str(training_data_dir)
|
||||
|
||||
try:
|
||||
return ParametricSurrogate(
|
||||
model_path=Path(model_path),
|
||||
training_data_dir=Path(training_data_dir)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create parametric surrogate: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return None
|
||||
|
||||
|
||||
def create_surrogate_for_study(
|
||||
model_path: str = None,
|
||||
training_data_dir: str = None,
|
||||
project_root: Path = None,
|
||||
study_name: str = None
|
||||
) -> Optional[ParametricSurrogate]:
|
||||
"""
|
||||
Factory function to create neural surrogate for any study.
|
||||
|
||||
Automatically detects whether to use ParametricSurrogate or NeuralSurrogate
|
||||
based on available models.
|
||||
|
||||
Args:
|
||||
model_path: Path to model checkpoint (auto-detect if None)
|
||||
training_data_dir: Path to training data (optional, no longer required)
|
||||
project_root: Project root directory for auto-detection
|
||||
study_name: Name of the study (for auto-detection)
|
||||
|
||||
Returns:
|
||||
ParametricSurrogate or NeuralSurrogate instance, or None if not available
|
||||
"""
|
||||
if not TORCH_AVAILABLE:
|
||||
logger.warning("Neural surrogate not available: PyTorch missing")
|
||||
return None
|
||||
|
||||
# Auto-detect project root
|
||||
if project_root is None:
|
||||
project_root = Path(__file__).parent.parent
|
||||
|
||||
# Try ParametricSurrogate first (more capable)
|
||||
if PARAMETRIC_MODEL_AVAILABLE:
|
||||
# Search order for parametric models
|
||||
model_search_paths = []
|
||||
|
||||
if study_name:
|
||||
# Study-specific paths
|
||||
model_search_paths.append(project_root / "atomizer-field" / "runs" / study_name / "checkpoint_best.pt")
|
||||
|
||||
# Common model names to try
|
||||
model_search_paths.extend([
|
||||
project_root / "atomizer-field" / "runs" / "bracket_model" / "checkpoint_best.pt",
|
||||
project_root / "atomizer-field" / "runs" / "bracket_stiffness_optimization_atomizerfield" / "checkpoint_best.pt",
|
||||
project_root / "atomizer-field" / "runs" / "parametric_uav_arm_v2" / "checkpoint_best.pt",
|
||||
project_root / "atomizer-field" / "runs" / "parametric_uav_arm" / "checkpoint_best.pt",
|
||||
project_root / "atomizer-field" / "runs" / "uav_arm_model" / "checkpoint_best.pt",
|
||||
])
|
||||
|
||||
# Use explicit path if provided
|
||||
if model_path is not None:
|
||||
model_search_paths = [Path(model_path)]
|
||||
|
||||
# Find first existing model
|
||||
found_model = None
|
||||
|
||||
for mp in model_search_paths:
|
||||
if mp.exists():
|
||||
found_model = mp
|
||||
logger.info(f"Found model at: {found_model}")
|
||||
break
|
||||
|
||||
if found_model:
|
||||
try:
|
||||
# ParametricSurrogate no longer requires training_data_dir
|
||||
# It creates a synthetic reference graph like during training
|
||||
return ParametricSurrogate(
|
||||
model_path=found_model,
|
||||
training_data_dir=None # Not required anymore
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to create ParametricSurrogate: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Fall through to try NeuralSurrogate
|
||||
|
||||
# Fall back to NeuralSurrogate if ParametricSurrogate not available
|
||||
if ATOMIZER_FIELD_AVAILABLE:
|
||||
if model_path is None:
|
||||
default_model = project_root / "atomizer-field" / "runs" / "uav_arm_model" / "checkpoint_best.pt"
|
||||
if default_model.exists():
|
||||
model_path = str(default_model)
|
||||
else:
|
||||
logger.warning(f"No trained model found")
|
||||
return None
|
||||
else:
|
||||
model_path = str(model_path)
|
||||
|
||||
if training_data_dir is None:
|
||||
default_data = project_root / "atomizer_field_training_data" / "uav_arm_train"
|
||||
if default_data.exists():
|
||||
training_data_dir = str(default_data)
|
||||
else:
|
||||
logger.warning(f"No training data found (required for NeuralSurrogate)")
|
||||
return None
|
||||
else:
|
||||
training_data_dir = str(training_data_dir)
|
||||
|
||||
try:
|
||||
return NeuralSurrogate(
|
||||
model_path=Path(model_path),
|
||||
training_data_dir=Path(training_data_dir)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create neural surrogate: {e}")
|
||||
return None
|
||||
|
||||
logger.warning("No suitable neural model modules available")
|
||||
return None
|
||||
|
||||
|
||||
def create_surrogate_from_config(config: Dict[str, Any]) -> Optional[NeuralSurrogate]:
|
||||
"""
|
||||
Factory function to create neural surrogate from workflow configuration.
|
||||
|
||||
Args:
|
||||
config: Workflow configuration dictionary
|
||||
|
||||
Returns:
|
||||
NeuralSurrogate instance if enabled, None otherwise
|
||||
"""
|
||||
if not config.get('neural_surrogate', {}).get('enabled', False):
|
||||
logger.info("Neural surrogate is disabled")
|
||||
return None
|
||||
|
||||
surrogate_config = config['neural_surrogate']
|
||||
|
||||
model_path = surrogate_config.get('model_path')
|
||||
training_data_dir = surrogate_config.get('training_data_dir')
|
||||
|
||||
if not model_path:
|
||||
logger.error("Neural surrogate enabled but model_path not specified")
|
||||
return None
|
||||
|
||||
if not training_data_dir:
|
||||
logger.error("Neural surrogate enabled but training_data_dir not specified")
|
||||
return None
|
||||
|
||||
try:
|
||||
surrogate = NeuralSurrogate(
|
||||
model_path=Path(model_path),
|
||||
training_data_dir=Path(training_data_dir),
|
||||
device=surrogate_config.get('device', 'auto')
|
||||
)
|
||||
|
||||
logger.info("Neural surrogate created successfully")
|
||||
return surrogate
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create neural surrogate: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def create_hybrid_optimizer_from_config(config: Dict[str, Any]) -> Optional[HybridOptimizer]:
|
||||
"""
|
||||
Factory function to create hybrid optimizer from configuration.
|
||||
|
||||
Args:
|
||||
config: Workflow configuration dictionary
|
||||
|
||||
Returns:
|
||||
HybridOptimizer instance if enabled, None otherwise
|
||||
"""
|
||||
if not config.get('hybrid_optimization', {}).get('enabled', False):
|
||||
logger.info("Hybrid optimization is disabled")
|
||||
return None
|
||||
|
||||
hybrid_config = config.get('hybrid_optimization', {})
|
||||
|
||||
try:
|
||||
optimizer = HybridOptimizer(hybrid_config)
|
||||
logger.info("Hybrid optimizer created successfully")
|
||||
return optimizer
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create hybrid optimizer: {e}")
|
||||
return None
|
||||
@@ -0,0 +1,648 @@
|
||||
"""
|
||||
Simple MLP Surrogate for Fast Optimization
|
||||
|
||||
This module provides a lightweight neural network surrogate that:
|
||||
1. Trains directly from Optuna database (no mesh parsing needed)
|
||||
2. Uses simple MLP: design_params -> [mass, frequency, max_disp, max_stress]
|
||||
3. Provides millisecond predictions for optimization
|
||||
|
||||
This is much simpler than the GNN-based approach and works well when:
|
||||
- You have enough FEA data in the database
|
||||
- You only need scalar objective predictions (no field data)
|
||||
- You want quick setup without mesh parsing pipeline
|
||||
|
||||
Usage:
|
||||
from optimization_engine.processors.surrogates.simple_mlp_surrogate import SimpleSurrogate, train_from_database
|
||||
|
||||
# Train from database
|
||||
surrogate = train_from_database(
|
||||
db_path="studies/uav_arm_atomizerfield_test/2_results/study.db",
|
||||
study_name="uav_arm_atomizerfield_test"
|
||||
)
|
||||
|
||||
# Predict
|
||||
results = surrogate.predict({
|
||||
'beam_half_core_thickness': 3.0,
|
||||
'beam_face_thickness': 1.5,
|
||||
'holes_diameter': 8.0,
|
||||
'hole_count': 4
|
||||
})
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional, List, Tuple
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.utils.data import Dataset, DataLoader, random_split
|
||||
TORCH_AVAILABLE = True
|
||||
except ImportError:
|
||||
TORCH_AVAILABLE = False
|
||||
logger.warning("PyTorch not installed. SimpleSurrogate will be limited.")
|
||||
|
||||
try:
|
||||
import optuna
|
||||
OPTUNA_AVAILABLE = True
|
||||
except ImportError:
|
||||
OPTUNA_AVAILABLE = False
|
||||
|
||||
|
||||
class MLPModel(nn.Module):
|
||||
"""Simple MLP for design parameter -> objective prediction."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
n_inputs: int = 4,
|
||||
n_outputs: int = 4,
|
||||
hidden_dims: List[int] = [128, 256, 128, 64],
|
||||
dropout: float = 0.1
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
layers = []
|
||||
prev_dim = n_inputs
|
||||
|
||||
for hidden_dim in hidden_dims:
|
||||
layers.extend([
|
||||
nn.Linear(prev_dim, hidden_dim),
|
||||
nn.LayerNorm(hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(dropout)
|
||||
])
|
||||
prev_dim = hidden_dim
|
||||
|
||||
layers.append(nn.Linear(prev_dim, n_outputs))
|
||||
|
||||
self.network = nn.Sequential(*layers)
|
||||
|
||||
# Initialize weights
|
||||
self._init_weights()
|
||||
|
||||
def _init_weights(self):
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Linear):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
return self.network(x)
|
||||
|
||||
|
||||
class FEADataset(Dataset):
|
||||
"""Dataset for training from FEA results."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
design_params: np.ndarray,
|
||||
objectives: np.ndarray
|
||||
):
|
||||
self.design_params = torch.tensor(design_params, dtype=torch.float32)
|
||||
self.objectives = torch.tensor(objectives, dtype=torch.float32)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.design_params)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return self.design_params[idx], self.objectives[idx]
|
||||
|
||||
|
||||
class SimpleSurrogate:
|
||||
"""
|
||||
Simple MLP-based surrogate for FEA prediction.
|
||||
|
||||
This is a lightweight alternative to the GNN-based approach that:
|
||||
- Doesn't require mesh parsing
|
||||
- Trains directly from database
|
||||
- Provides fast scalar predictions
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: nn.Module = None,
|
||||
design_var_names: List[str] = None,
|
||||
objective_names: List[str] = None,
|
||||
normalization: Dict[str, Any] = None,
|
||||
device: str = 'auto'
|
||||
):
|
||||
if not TORCH_AVAILABLE:
|
||||
raise ImportError("PyTorch required. Install: pip install torch")
|
||||
|
||||
# Set device
|
||||
if device == 'auto':
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
else:
|
||||
self.device = torch.device(device)
|
||||
|
||||
self.model = model
|
||||
if model is not None:
|
||||
self.model = model.to(self.device)
|
||||
self.model.eval()
|
||||
|
||||
self.design_var_names = design_var_names or []
|
||||
self.objective_names = objective_names or ['mass', 'frequency', 'max_displacement', 'max_stress']
|
||||
|
||||
# Normalization stats
|
||||
self.normalization = normalization or {
|
||||
'design_mean': np.zeros(len(self.design_var_names)),
|
||||
'design_std': np.ones(len(self.design_var_names)),
|
||||
'objective_mean': np.zeros(len(self.objective_names)),
|
||||
'objective_std': np.ones(len(self.objective_names))
|
||||
}
|
||||
|
||||
# Performance tracking
|
||||
self.stats = {
|
||||
'predictions': 0,
|
||||
'total_time_ms': 0.0
|
||||
}
|
||||
|
||||
logger.info(f"SimpleSurrogate initialized on {self.device}")
|
||||
|
||||
def predict(self, design_params: Dict[str, float]) -> Dict[str, Any]:
|
||||
"""
|
||||
Predict FEA objectives from design parameters.
|
||||
|
||||
Args:
|
||||
design_params: Dict of design variable values
|
||||
|
||||
Returns:
|
||||
Dict with mass, frequency, max_displacement, max_stress, inference_time_ms
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
# Build input tensor
|
||||
param_values = [design_params.get(name, 0.0) for name in self.design_var_names]
|
||||
x = np.array(param_values, dtype=np.float32)
|
||||
|
||||
# Normalize
|
||||
x_norm = (x - self.normalization['design_mean']) / (self.normalization['design_std'] + 1e-8)
|
||||
x_tensor = torch.tensor(x_norm, dtype=torch.float32, device=self.device).unsqueeze(0)
|
||||
|
||||
# Predict
|
||||
with torch.no_grad():
|
||||
y_norm = self.model(x_tensor).cpu().numpy()[0]
|
||||
|
||||
# Denormalize
|
||||
y = y_norm * self.normalization['objective_std'] + self.normalization['objective_mean']
|
||||
|
||||
inference_time = (time.time() - start_time) * 1000
|
||||
|
||||
results = {
|
||||
self.objective_names[i]: float(y[i]) for i in range(len(self.objective_names))
|
||||
}
|
||||
results['inference_time_ms'] = inference_time
|
||||
|
||||
# Update stats
|
||||
self.stats['predictions'] += 1
|
||||
self.stats['total_time_ms'] += inference_time
|
||||
|
||||
return results
|
||||
|
||||
def get_statistics(self) -> Dict[str, Any]:
|
||||
"""Get prediction statistics."""
|
||||
avg_time = self.stats['total_time_ms'] / self.stats['predictions'] \
|
||||
if self.stats['predictions'] > 0 else 0
|
||||
|
||||
return {
|
||||
'total_predictions': self.stats['predictions'],
|
||||
'total_time_ms': self.stats['total_time_ms'],
|
||||
'average_time_ms': avg_time,
|
||||
'device': str(self.device),
|
||||
'design_var_names': self.design_var_names,
|
||||
'objective_names': self.objective_names
|
||||
}
|
||||
|
||||
def save(self, path: Path):
|
||||
"""Save surrogate to file."""
|
||||
path = Path(path)
|
||||
torch.save({
|
||||
'model_state_dict': self.model.state_dict(),
|
||||
'design_var_names': self.design_var_names,
|
||||
'objective_names': self.objective_names,
|
||||
'normalization': self.normalization,
|
||||
'model_config': {
|
||||
'n_inputs': len(self.design_var_names),
|
||||
'n_outputs': len(self.objective_names)
|
||||
}
|
||||
}, path)
|
||||
logger.info(f"Surrogate saved to {path}")
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: Path, device: str = 'auto') -> 'SimpleSurrogate':
|
||||
"""Load surrogate from file."""
|
||||
path = Path(path)
|
||||
checkpoint = torch.load(path, map_location='cpu')
|
||||
|
||||
# Create model
|
||||
model_config = checkpoint['model_config']
|
||||
model = MLPModel(
|
||||
n_inputs=model_config['n_inputs'],
|
||||
n_outputs=model_config['n_outputs']
|
||||
)
|
||||
model.load_state_dict(checkpoint['model_state_dict'])
|
||||
|
||||
return cls(
|
||||
model=model,
|
||||
design_var_names=checkpoint['design_var_names'],
|
||||
objective_names=checkpoint['objective_names'],
|
||||
normalization=checkpoint['normalization'],
|
||||
device=device
|
||||
)
|
||||
|
||||
|
||||
def extract_data_from_database(
|
||||
db_path: str,
|
||||
study_name: str
|
||||
) -> Tuple[np.ndarray, np.ndarray, List[str], List[str]]:
|
||||
"""
|
||||
Extract training data from Optuna database.
|
||||
|
||||
Args:
|
||||
db_path: Path to SQLite database
|
||||
study_name: Name of Optuna study
|
||||
|
||||
Returns:
|
||||
Tuple of (design_params, objectives, design_var_names, objective_names)
|
||||
"""
|
||||
if not OPTUNA_AVAILABLE:
|
||||
raise ImportError("Optuna required. Install: pip install optuna")
|
||||
|
||||
storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}")
|
||||
study = optuna.load_study(study_name=study_name, storage=storage)
|
||||
|
||||
# Get completed trials
|
||||
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
|
||||
|
||||
if not completed_trials:
|
||||
raise ValueError(f"No completed trials in study {study_name}")
|
||||
|
||||
logger.info(f"Found {len(completed_trials)} completed trials")
|
||||
|
||||
# Get design variable names from first trial
|
||||
design_var_names = list(completed_trials[0].params.keys())
|
||||
|
||||
# Determine objective structure
|
||||
first_values = completed_trials[0].values
|
||||
if first_values is None:
|
||||
raise ValueError("Trials have no objective values")
|
||||
|
||||
# For multi-objective, values are [mass, frequency, ...]
|
||||
# We also need user_attrs for constraints
|
||||
|
||||
# Collect data - filter out invalid samples
|
||||
design_params_list = []
|
||||
objectives_list = []
|
||||
skipped = 0
|
||||
|
||||
for trial in completed_trials:
|
||||
# Objectives - need mass, frequency, max_disp, max_stress
|
||||
mass = trial.values[0] if len(trial.values) > 0 else 0.0
|
||||
frequency = trial.values[1] if len(trial.values) > 1 else 0.0
|
||||
|
||||
# Get constraints from user_attrs
|
||||
max_disp = trial.user_attrs.get('max_displacement', 0.0)
|
||||
max_stress = trial.user_attrs.get('max_stress', 0.0)
|
||||
|
||||
# Note: frequency is stored as -freq for minimization, so convert back
|
||||
# Also filter out inf values
|
||||
objectives = [mass, -frequency, max_disp, max_stress]
|
||||
|
||||
# Skip invalid samples (inf, nan, or extreme values)
|
||||
if any(np.isinf(v) or np.isnan(v) or v > 1e10 for v in objectives):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Skip if frequency is negative (indicates error)
|
||||
if -frequency <= 0:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Design parameters
|
||||
params = [trial.params.get(name, 0.0) for name in design_var_names]
|
||||
design_params_list.append(params)
|
||||
objectives_list.append(objectives)
|
||||
|
||||
if skipped > 0:
|
||||
logger.info(f"Skipped {skipped} invalid samples")
|
||||
|
||||
if not design_params_list:
|
||||
raise ValueError("No valid samples found after filtering")
|
||||
|
||||
design_params = np.array(design_params_list, dtype=np.float32)
|
||||
objectives = np.array(objectives_list, dtype=np.float32)
|
||||
|
||||
objective_names = ['mass', 'frequency', 'max_displacement', 'max_stress']
|
||||
|
||||
logger.info(f"Extracted {len(design_params)} valid samples")
|
||||
logger.info(f"Design vars: {design_var_names}")
|
||||
logger.info(f"Objectives: {objective_names}")
|
||||
|
||||
return design_params, objectives, design_var_names, objective_names
|
||||
|
||||
|
||||
def train_from_database(
|
||||
db_path: str,
|
||||
study_name: str,
|
||||
epochs: int = 200,
|
||||
batch_size: int = 32,
|
||||
learning_rate: float = 0.001,
|
||||
val_split: float = 0.2,
|
||||
save_path: Optional[str] = None,
|
||||
device: str = 'auto'
|
||||
) -> SimpleSurrogate:
|
||||
"""
|
||||
Train SimpleSurrogate from Optuna database.
|
||||
|
||||
Args:
|
||||
db_path: Path to SQLite database
|
||||
study_name: Name of Optuna study
|
||||
epochs: Training epochs
|
||||
batch_size: Batch size
|
||||
learning_rate: Learning rate
|
||||
val_split: Validation split ratio
|
||||
save_path: Optional path to save trained model
|
||||
device: Computing device
|
||||
|
||||
Returns:
|
||||
Trained SimpleSurrogate
|
||||
"""
|
||||
if not TORCH_AVAILABLE:
|
||||
raise ImportError("PyTorch required")
|
||||
|
||||
# Set device
|
||||
if device == 'auto':
|
||||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
else:
|
||||
device = torch.device(device)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("Training Simple MLP Surrogate from Database")
|
||||
print(f"{'='*60}")
|
||||
print(f"Device: {device}")
|
||||
|
||||
# Extract data
|
||||
print(f"\n[1] Loading data from {db_path}")
|
||||
design_params, objectives, design_var_names, objective_names = extract_data_from_database(
|
||||
db_path, study_name
|
||||
)
|
||||
|
||||
print(f" Samples: {len(design_params)}")
|
||||
print(f" Design vars: {design_var_names}")
|
||||
print(f" Objectives: {objective_names}")
|
||||
|
||||
# Compute normalization stats
|
||||
design_mean = design_params.mean(axis=0)
|
||||
design_std = design_params.std(axis=0)
|
||||
objective_mean = objectives.mean(axis=0)
|
||||
objective_std = objectives.std(axis=0)
|
||||
|
||||
print(f"\n Objective ranges:")
|
||||
for i, name in enumerate(objective_names):
|
||||
print(f" {name}: {objectives[:, i].min():.2f} - {objectives[:, i].max():.2f}")
|
||||
|
||||
# Normalize data
|
||||
design_params_norm = (design_params - design_mean) / (design_std + 1e-8)
|
||||
objectives_norm = (objectives - objective_mean) / (objective_std + 1e-8)
|
||||
|
||||
# Create dataset
|
||||
dataset = FEADataset(design_params_norm, objectives_norm)
|
||||
|
||||
# Split into train/val
|
||||
n_val = int(len(dataset) * val_split)
|
||||
n_train = len(dataset) - n_val
|
||||
train_dataset, val_dataset = random_split(dataset, [n_train, n_val])
|
||||
|
||||
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
|
||||
val_loader = DataLoader(val_dataset, batch_size=batch_size)
|
||||
|
||||
print(f"\n[2] Creating model")
|
||||
print(f" Train samples: {n_train}")
|
||||
print(f" Val samples: {n_val}")
|
||||
|
||||
# Create model
|
||||
model = MLPModel(
|
||||
n_inputs=len(design_var_names),
|
||||
n_outputs=len(objective_names),
|
||||
hidden_dims=[128, 256, 128, 64]
|
||||
).to(device)
|
||||
|
||||
n_params = sum(p.numel() for p in model.parameters())
|
||||
print(f" Model params: {n_params:,}")
|
||||
|
||||
# Training
|
||||
print(f"\n[3] Training for {epochs} epochs")
|
||||
|
||||
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-5)
|
||||
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
|
||||
|
||||
best_val_loss = float('inf')
|
||||
best_state = None
|
||||
|
||||
for epoch in range(epochs):
|
||||
# Train
|
||||
model.train()
|
||||
train_loss = 0.0
|
||||
for x, y in train_loader:
|
||||
x, y = x.to(device), y.to(device)
|
||||
|
||||
optimizer.zero_grad()
|
||||
pred = model(x)
|
||||
loss = F.mse_loss(pred, y)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
train_loss += loss.item()
|
||||
|
||||
train_loss /= len(train_loader)
|
||||
|
||||
# Validate
|
||||
model.eval()
|
||||
val_loss = 0.0
|
||||
with torch.no_grad():
|
||||
for x, y in val_loader:
|
||||
x, y = x.to(device), y.to(device)
|
||||
pred = model(x)
|
||||
val_loss += F.mse_loss(pred, y).item()
|
||||
|
||||
val_loss /= len(val_loader)
|
||||
scheduler.step()
|
||||
|
||||
# Track best
|
||||
if val_loss < best_val_loss:
|
||||
best_val_loss = val_loss
|
||||
best_state = model.state_dict().copy()
|
||||
|
||||
# Log progress
|
||||
if (epoch + 1) % 20 == 0 or epoch == 0:
|
||||
print(f" Epoch {epoch+1:3d}: train_loss={train_loss:.6f}, val_loss={val_loss:.6f}")
|
||||
|
||||
# Load best model
|
||||
model.load_state_dict(best_state)
|
||||
print(f"\n Best val_loss: {best_val_loss:.6f}")
|
||||
|
||||
# Create surrogate
|
||||
normalization = {
|
||||
'design_mean': design_mean,
|
||||
'design_std': design_std,
|
||||
'objective_mean': objective_mean,
|
||||
'objective_std': objective_std
|
||||
}
|
||||
|
||||
surrogate = SimpleSurrogate(
|
||||
model=model,
|
||||
design_var_names=design_var_names,
|
||||
objective_names=objective_names,
|
||||
normalization=normalization,
|
||||
device=str(device)
|
||||
)
|
||||
|
||||
# Evaluate accuracy
|
||||
print(f"\n[4] Evaluating accuracy on validation set")
|
||||
model.eval()
|
||||
|
||||
all_preds = []
|
||||
all_targets = []
|
||||
|
||||
with torch.no_grad():
|
||||
for x, y in val_loader:
|
||||
x = x.to(device)
|
||||
pred = model(x).cpu().numpy()
|
||||
all_preds.append(pred)
|
||||
all_targets.append(y.numpy())
|
||||
|
||||
all_preds = np.concatenate(all_preds)
|
||||
all_targets = np.concatenate(all_targets)
|
||||
|
||||
# Denormalize for error calculation
|
||||
preds_denorm = all_preds * objective_std + objective_mean
|
||||
targets_denorm = all_targets * objective_std + objective_mean
|
||||
|
||||
for i, name in enumerate(objective_names):
|
||||
mae = np.abs(preds_denorm[:, i] - targets_denorm[:, i]).mean()
|
||||
mape = (np.abs(preds_denorm[:, i] - targets_denorm[:, i]) / (np.abs(targets_denorm[:, i]) + 1e-8)).mean() * 100
|
||||
print(f" {name}: MAE={mae:.4f}, MAPE={mape:.1f}%")
|
||||
|
||||
# Save if requested
|
||||
if save_path:
|
||||
surrogate.save(save_path)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("Training complete!")
|
||||
print(f"{'='*60}")
|
||||
|
||||
return surrogate
|
||||
|
||||
|
||||
def create_simple_surrogate_for_study(
|
||||
db_path: str = None,
|
||||
study_name: str = None,
|
||||
model_path: str = None,
|
||||
project_root: Path = None
|
||||
) -> Optional[SimpleSurrogate]:
|
||||
"""
|
||||
Factory function to create or load SimpleSurrogate for UAV arm study.
|
||||
|
||||
If model_path exists, loads the model. Otherwise trains from database.
|
||||
|
||||
Args:
|
||||
db_path: Path to Optuna database
|
||||
study_name: Name of study
|
||||
model_path: Path to saved model (auto-detect if None)
|
||||
project_root: Project root for auto-detection
|
||||
|
||||
Returns:
|
||||
SimpleSurrogate instance or None
|
||||
"""
|
||||
if not TORCH_AVAILABLE:
|
||||
logger.warning("PyTorch not available")
|
||||
return None
|
||||
|
||||
# Auto-detect paths
|
||||
if project_root is None:
|
||||
project_root = Path(__file__).parent.parent
|
||||
|
||||
if model_path is None:
|
||||
model_path = project_root / "simple_mlp_surrogate.pt"
|
||||
else:
|
||||
model_path = Path(model_path)
|
||||
|
||||
# Try to load existing model
|
||||
if model_path.exists():
|
||||
logger.info(f"Loading existing surrogate from {model_path}")
|
||||
return SimpleSurrogate.load(model_path)
|
||||
|
||||
# Otherwise train from database
|
||||
if db_path is None:
|
||||
db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db"
|
||||
else:
|
||||
db_path = Path(db_path)
|
||||
|
||||
if study_name is None:
|
||||
study_name = "uav_arm_atomizerfield_test"
|
||||
|
||||
if not db_path.exists():
|
||||
logger.warning(f"Database not found: {db_path}")
|
||||
return None
|
||||
|
||||
logger.info(f"Training surrogate from {db_path}")
|
||||
return train_from_database(
|
||||
db_path=str(db_path),
|
||||
study_name=study_name,
|
||||
save_path=str(model_path)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
# Default paths
|
||||
project_root = Path(__file__).parent.parent
|
||||
db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db"
|
||||
model_path = project_root / "simple_mlp_surrogate.pt"
|
||||
|
||||
print("Simple MLP Surrogate Training")
|
||||
print("="*60)
|
||||
|
||||
if not db_path.exists():
|
||||
print(f"ERROR: Database not found: {db_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Train
|
||||
surrogate = train_from_database(
|
||||
db_path=str(db_path),
|
||||
study_name="uav_arm_atomizerfield_test",
|
||||
epochs=300,
|
||||
save_path=str(model_path)
|
||||
)
|
||||
|
||||
# Test predictions
|
||||
print("\n[5] Testing predictions")
|
||||
test_params = {
|
||||
'beam_half_core_thickness': 3.0,
|
||||
'beam_face_thickness': 1.5,
|
||||
'holes_diameter': 8.0,
|
||||
'hole_count': 4
|
||||
}
|
||||
|
||||
print(f" Input: {test_params}")
|
||||
results = surrogate.predict(test_params)
|
||||
print(f" Mass: {results['mass']:.2f} g")
|
||||
print(f" Frequency: {results['frequency']:.2f} Hz")
|
||||
print(f" Max Displacement: {results['max_displacement']:.6f} mm")
|
||||
print(f" Max Stress: {results['max_stress']:.2f} MPa")
|
||||
print(f" Inference time: {results['inference_time_ms']:.2f} ms")
|
||||
|
||||
# Test variation
|
||||
print("\n[6] Testing variation with parameters")
|
||||
for thickness in [1.0, 3.0, 5.0]:
|
||||
params = {**test_params, 'beam_half_core_thickness': thickness}
|
||||
r = surrogate.predict(params)
|
||||
print(f" thickness={thickness}: mass={r['mass']:.0f}g, freq={r['frequency']:.2f}Hz")
|
||||
800
optimization_engine/processors/surrogates/surrogate_tuner.py
Normal file
800
optimization_engine/processors/surrogates/surrogate_tuner.py
Normal file
@@ -0,0 +1,800 @@
|
||||
"""
|
||||
Hyperparameter Tuning for Neural Network Surrogates
|
||||
|
||||
This module provides automatic hyperparameter optimization for MLP surrogates
|
||||
using Optuna, with proper train/validation splits and early stopping.
|
||||
|
||||
Key Features:
|
||||
1. Optuna-based hyperparameter search
|
||||
2. K-fold cross-validation
|
||||
3. Early stopping to prevent overfitting
|
||||
4. Ensemble model support
|
||||
5. Proper uncertainty quantification
|
||||
|
||||
Usage:
|
||||
from optimization_engine.processors.surrogates.surrogate_tuner import SurrogateHyperparameterTuner
|
||||
|
||||
tuner = SurrogateHyperparameterTuner(
|
||||
input_dim=11,
|
||||
output_dim=3,
|
||||
n_trials=50
|
||||
)
|
||||
best_config = tuner.tune(X_train, Y_train)
|
||||
model = tuner.create_tuned_model(best_config)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
from typing import Dict, List, Tuple, Optional, Any
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.utils.data import DataLoader, TensorDataset
|
||||
TORCH_AVAILABLE = True
|
||||
except ImportError:
|
||||
TORCH_AVAILABLE = False
|
||||
logger.warning("PyTorch not installed")
|
||||
|
||||
try:
|
||||
import optuna
|
||||
from optuna.samplers import TPESampler
|
||||
from optuna.pruners import MedianPruner
|
||||
OPTUNA_AVAILABLE = True
|
||||
except ImportError:
|
||||
OPTUNA_AVAILABLE = False
|
||||
logger.warning("Optuna not installed")
|
||||
|
||||
|
||||
@dataclass
|
||||
class SurrogateConfig:
|
||||
"""Configuration for a tuned surrogate model."""
|
||||
hidden_dims: List[int] = field(default_factory=lambda: [128, 256, 128])
|
||||
dropout: float = 0.1
|
||||
activation: str = 'relu'
|
||||
use_batch_norm: bool = True
|
||||
learning_rate: float = 1e-3
|
||||
weight_decay: float = 1e-4
|
||||
batch_size: int = 16
|
||||
max_epochs: int = 500
|
||||
early_stopping_patience: int = 30
|
||||
|
||||
# Normalization stats (filled during training)
|
||||
input_mean: Optional[np.ndarray] = None
|
||||
input_std: Optional[np.ndarray] = None
|
||||
output_mean: Optional[np.ndarray] = None
|
||||
output_std: Optional[np.ndarray] = None
|
||||
|
||||
# Validation metrics
|
||||
val_loss: float = float('inf')
|
||||
val_r2: Dict[str, float] = field(default_factory=dict)
|
||||
|
||||
|
||||
class TunableMLP(nn.Module):
|
||||
"""Flexible MLP with configurable architecture."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dim: int,
|
||||
output_dim: int,
|
||||
hidden_dims: List[int],
|
||||
dropout: float = 0.1,
|
||||
activation: str = 'relu',
|
||||
use_batch_norm: bool = True
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
|
||||
# Activation function
|
||||
activations = {
|
||||
'relu': nn.ReLU(),
|
||||
'leaky_relu': nn.LeakyReLU(0.1),
|
||||
'elu': nn.ELU(),
|
||||
'selu': nn.SELU(),
|
||||
'gelu': nn.GELU(),
|
||||
'swish': nn.SiLU()
|
||||
}
|
||||
act_fn = activations.get(activation, nn.ReLU())
|
||||
|
||||
# Build layers
|
||||
layers = []
|
||||
prev_dim = input_dim
|
||||
|
||||
for hidden_dim in hidden_dims:
|
||||
layers.append(nn.Linear(prev_dim, hidden_dim))
|
||||
if use_batch_norm:
|
||||
layers.append(nn.BatchNorm1d(hidden_dim))
|
||||
layers.append(act_fn)
|
||||
if dropout > 0:
|
||||
layers.append(nn.Dropout(dropout))
|
||||
prev_dim = hidden_dim
|
||||
|
||||
layers.append(nn.Linear(prev_dim, output_dim))
|
||||
self.network = nn.Sequential(*layers)
|
||||
|
||||
self._init_weights()
|
||||
|
||||
def _init_weights(self):
|
||||
"""Initialize weights using Kaiming initialization."""
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Linear):
|
||||
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
|
||||
if m.bias is not None:
|
||||
nn.init.constant_(m.bias, 0)
|
||||
|
||||
def forward(self, x):
|
||||
return self.network(x)
|
||||
|
||||
|
||||
class EarlyStopping:
|
||||
"""Early stopping to prevent overfitting."""
|
||||
|
||||
def __init__(self, patience: int = 20, min_delta: float = 1e-5):
|
||||
self.patience = patience
|
||||
self.min_delta = min_delta
|
||||
self.counter = 0
|
||||
self.best_loss = float('inf')
|
||||
self.best_model_state = None
|
||||
self.should_stop = False
|
||||
|
||||
def __call__(self, val_loss: float, model: nn.Module) -> bool:
|
||||
if val_loss < self.best_loss - self.min_delta:
|
||||
self.best_loss = val_loss
|
||||
self.best_model_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
|
||||
self.counter = 0
|
||||
else:
|
||||
self.counter += 1
|
||||
if self.counter >= self.patience:
|
||||
self.should_stop = True
|
||||
|
||||
return self.should_stop
|
||||
|
||||
def restore_best(self, model: nn.Module):
|
||||
"""Restore model to best state."""
|
||||
if self.best_model_state is not None:
|
||||
model.load_state_dict(self.best_model_state)
|
||||
|
||||
|
||||
class SurrogateHyperparameterTuner:
|
||||
"""
|
||||
Automatic hyperparameter tuning for neural network surrogates.
|
||||
|
||||
Uses Optuna for Bayesian optimization of:
|
||||
- Network architecture (layers, widths)
|
||||
- Regularization (dropout, weight decay)
|
||||
- Learning rate and batch size
|
||||
- Activation functions
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dim: int,
|
||||
output_dim: int,
|
||||
n_trials: int = 50,
|
||||
n_cv_folds: int = 5,
|
||||
device: str = 'auto',
|
||||
seed: int = 42,
|
||||
timeout_seconds: Optional[int] = None
|
||||
):
|
||||
"""
|
||||
Initialize hyperparameter tuner.
|
||||
|
||||
Args:
|
||||
input_dim: Number of input features (design variables)
|
||||
output_dim: Number of outputs (objectives)
|
||||
n_trials: Number of Optuna trials for hyperparameter search
|
||||
n_cv_folds: Number of cross-validation folds
|
||||
device: Computing device ('cuda', 'cpu', or 'auto')
|
||||
seed: Random seed for reproducibility
|
||||
timeout_seconds: Optional timeout for tuning
|
||||
"""
|
||||
if not TORCH_AVAILABLE:
|
||||
raise ImportError("PyTorch required for surrogate tuning")
|
||||
if not OPTUNA_AVAILABLE:
|
||||
raise ImportError("Optuna required for hyperparameter tuning")
|
||||
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.n_trials = n_trials
|
||||
self.n_cv_folds = n_cv_folds
|
||||
self.seed = seed
|
||||
self.timeout = timeout_seconds
|
||||
|
||||
if device == 'auto':
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
else:
|
||||
self.device = torch.device(device)
|
||||
|
||||
self.best_config: Optional[SurrogateConfig] = None
|
||||
self.study: Optional[optuna.Study] = None
|
||||
|
||||
# Set seeds
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.manual_seed(seed)
|
||||
|
||||
def _suggest_hyperparameters(self, trial: optuna.Trial) -> SurrogateConfig:
|
||||
"""Suggest hyperparameters for a trial."""
|
||||
|
||||
# Architecture
|
||||
n_layers = trial.suggest_int('n_layers', 2, 5)
|
||||
hidden_dims = []
|
||||
for i in range(n_layers):
|
||||
dim = trial.suggest_int(f'hidden_dim_{i}', 32, 512, step=32)
|
||||
hidden_dims.append(dim)
|
||||
|
||||
# Regularization
|
||||
dropout = trial.suggest_float('dropout', 0.0, 0.5)
|
||||
weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
|
||||
|
||||
# Training
|
||||
learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
|
||||
batch_size = trial.suggest_categorical('batch_size', [8, 16, 32, 64])
|
||||
|
||||
# Activation
|
||||
activation = trial.suggest_categorical('activation',
|
||||
['relu', 'leaky_relu', 'elu', 'gelu', 'swish'])
|
||||
|
||||
# Batch norm
|
||||
use_batch_norm = trial.suggest_categorical('use_batch_norm', [True, False])
|
||||
|
||||
return SurrogateConfig(
|
||||
hidden_dims=hidden_dims,
|
||||
dropout=dropout,
|
||||
activation=activation,
|
||||
use_batch_norm=use_batch_norm,
|
||||
learning_rate=learning_rate,
|
||||
weight_decay=weight_decay,
|
||||
batch_size=batch_size
|
||||
)
|
||||
|
||||
def _train_fold(
|
||||
self,
|
||||
config: SurrogateConfig,
|
||||
X_train: np.ndarray,
|
||||
Y_train: np.ndarray,
|
||||
X_val: np.ndarray,
|
||||
Y_val: np.ndarray,
|
||||
trial: Optional[optuna.Trial] = None
|
||||
) -> Tuple[float, Dict[str, float]]:
|
||||
"""Train model on one fold and return validation metrics."""
|
||||
|
||||
# Create model
|
||||
model = TunableMLP(
|
||||
input_dim=self.input_dim,
|
||||
output_dim=self.output_dim,
|
||||
hidden_dims=config.hidden_dims,
|
||||
dropout=config.dropout,
|
||||
activation=config.activation,
|
||||
use_batch_norm=config.use_batch_norm
|
||||
).to(self.device)
|
||||
|
||||
# Prepare data
|
||||
X_train_t = torch.tensor(X_train, dtype=torch.float32, device=self.device)
|
||||
Y_train_t = torch.tensor(Y_train, dtype=torch.float32, device=self.device)
|
||||
X_val_t = torch.tensor(X_val, dtype=torch.float32, device=self.device)
|
||||
Y_val_t = torch.tensor(Y_val, dtype=torch.float32, device=self.device)
|
||||
|
||||
train_dataset = TensorDataset(X_train_t, Y_train_t)
|
||||
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
|
||||
|
||||
# Optimizer and scheduler
|
||||
optimizer = torch.optim.AdamW(
|
||||
model.parameters(),
|
||||
lr=config.learning_rate,
|
||||
weight_decay=config.weight_decay
|
||||
)
|
||||
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
|
||||
optimizer, T_max=config.max_epochs
|
||||
)
|
||||
|
||||
early_stopping = EarlyStopping(patience=config.early_stopping_patience)
|
||||
|
||||
# Training loop
|
||||
for epoch in range(config.max_epochs):
|
||||
model.train()
|
||||
for X_batch, Y_batch in train_loader:
|
||||
optimizer.zero_grad()
|
||||
pred = model(X_batch)
|
||||
loss = nn.functional.mse_loss(pred, Y_batch)
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
||||
optimizer.step()
|
||||
|
||||
scheduler.step()
|
||||
|
||||
# Validation
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
val_pred = model(X_val_t)
|
||||
val_loss = nn.functional.mse_loss(val_pred, Y_val_t).item()
|
||||
|
||||
# Early stopping
|
||||
if early_stopping(val_loss, model):
|
||||
break
|
||||
|
||||
# Optuna pruning (only report once per epoch across all folds)
|
||||
if trial is not None and epoch % 10 == 0:
|
||||
trial.report(val_loss, epoch // 10)
|
||||
if trial.should_prune():
|
||||
raise optuna.TrialPruned()
|
||||
|
||||
# Restore best model
|
||||
early_stopping.restore_best(model)
|
||||
|
||||
# Final validation metrics
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
val_pred = model(X_val_t).cpu().numpy()
|
||||
Y_val_np = Y_val_t.cpu().numpy()
|
||||
|
||||
val_loss = float(np.mean((val_pred - Y_val_np) ** 2))
|
||||
|
||||
# R² per output
|
||||
r2_scores = {}
|
||||
for i in range(self.output_dim):
|
||||
ss_res = np.sum((Y_val_np[:, i] - val_pred[:, i]) ** 2)
|
||||
ss_tot = np.sum((Y_val_np[:, i] - Y_val_np[:, i].mean()) ** 2)
|
||||
r2 = 1 - ss_res / ss_tot if ss_tot > 0 else 0
|
||||
r2_scores[f'output_{i}'] = r2
|
||||
|
||||
return val_loss, r2_scores
|
||||
|
||||
def _cross_validate(
|
||||
self,
|
||||
config: SurrogateConfig,
|
||||
X: np.ndarray,
|
||||
Y: np.ndarray,
|
||||
trial: Optional[optuna.Trial] = None
|
||||
) -> Tuple[float, Dict[str, float]]:
|
||||
"""Perform k-fold cross-validation."""
|
||||
|
||||
n_samples = len(X)
|
||||
indices = np.random.permutation(n_samples)
|
||||
fold_size = n_samples // self.n_cv_folds
|
||||
|
||||
fold_losses = []
|
||||
fold_r2s = {f'output_{i}': [] for i in range(self.output_dim)}
|
||||
|
||||
for fold in range(self.n_cv_folds):
|
||||
# Split indices
|
||||
val_start = fold * fold_size
|
||||
val_end = val_start + fold_size if fold < self.n_cv_folds - 1 else n_samples
|
||||
|
||||
val_indices = indices[val_start:val_end]
|
||||
train_indices = np.concatenate([indices[:val_start], indices[val_end:]])
|
||||
|
||||
X_train, Y_train = X[train_indices], Y[train_indices]
|
||||
X_val, Y_val = X[val_indices], Y[val_indices]
|
||||
|
||||
# Skip fold if too few samples
|
||||
if len(X_train) < 10 or len(X_val) < 2:
|
||||
continue
|
||||
|
||||
val_loss, r2_scores = self._train_fold(
|
||||
config, X_train, Y_train, X_val, Y_val, trial
|
||||
)
|
||||
|
||||
fold_losses.append(val_loss)
|
||||
for key, val in r2_scores.items():
|
||||
fold_r2s[key].append(val)
|
||||
|
||||
mean_loss = np.mean(fold_losses)
|
||||
mean_r2 = {k: np.mean(v) for k, v in fold_r2s.items()}
|
||||
|
||||
return mean_loss, mean_r2
|
||||
|
||||
def tune(
|
||||
self,
|
||||
X: np.ndarray,
|
||||
Y: np.ndarray,
|
||||
output_names: Optional[List[str]] = None
|
||||
) -> SurrogateConfig:
|
||||
"""
|
||||
Tune hyperparameters using Optuna.
|
||||
|
||||
Args:
|
||||
X: Input features [n_samples, input_dim]
|
||||
Y: Outputs [n_samples, output_dim]
|
||||
output_names: Optional names for outputs (for logging)
|
||||
|
||||
Returns:
|
||||
Best SurrogateConfig found
|
||||
"""
|
||||
logger.info(f"Starting hyperparameter tuning with {self.n_trials} trials...")
|
||||
logger.info(f"Data: {len(X)} samples, {self.n_cv_folds}-fold CV")
|
||||
|
||||
# Normalize data
|
||||
self.input_mean = X.mean(axis=0)
|
||||
self.input_std = X.std(axis=0) + 1e-8
|
||||
self.output_mean = Y.mean(axis=0)
|
||||
self.output_std = Y.std(axis=0) + 1e-8
|
||||
|
||||
X_norm = (X - self.input_mean) / self.input_std
|
||||
Y_norm = (Y - self.output_mean) / self.output_std
|
||||
|
||||
def objective(trial: optuna.Trial) -> float:
|
||||
config = self._suggest_hyperparameters(trial)
|
||||
val_loss, r2_scores = self._cross_validate(config, X_norm, Y_norm, trial)
|
||||
|
||||
# Log R² scores
|
||||
for key, val in r2_scores.items():
|
||||
trial.set_user_attr(f'r2_{key}', val)
|
||||
|
||||
return val_loss
|
||||
|
||||
# Create study
|
||||
self.study = optuna.create_study(
|
||||
direction='minimize',
|
||||
sampler=TPESampler(seed=self.seed, n_startup_trials=10),
|
||||
pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=20)
|
||||
)
|
||||
|
||||
self.study.optimize(
|
||||
objective,
|
||||
n_trials=self.n_trials,
|
||||
timeout=self.timeout,
|
||||
show_progress_bar=True,
|
||||
catch=(RuntimeError,) # Catch GPU OOM errors
|
||||
)
|
||||
|
||||
# Build best config
|
||||
best_trial = self.study.best_trial
|
||||
self.best_config = self._suggest_hyperparameters_from_params(best_trial.params)
|
||||
self.best_config.val_loss = best_trial.value
|
||||
self.best_config.val_r2 = {
|
||||
k.replace('r2_', ''): v
|
||||
for k, v in best_trial.user_attrs.items()
|
||||
if k.startswith('r2_')
|
||||
}
|
||||
|
||||
# Store normalization
|
||||
self.best_config.input_mean = self.input_mean
|
||||
self.best_config.input_std = self.input_std
|
||||
self.best_config.output_mean = self.output_mean
|
||||
self.best_config.output_std = self.output_std
|
||||
|
||||
# Log results
|
||||
logger.info(f"\nBest hyperparameters found:")
|
||||
logger.info(f" Hidden dims: {self.best_config.hidden_dims}")
|
||||
logger.info(f" Dropout: {self.best_config.dropout:.3f}")
|
||||
logger.info(f" Activation: {self.best_config.activation}")
|
||||
logger.info(f" Batch norm: {self.best_config.use_batch_norm}")
|
||||
logger.info(f" Learning rate: {self.best_config.learning_rate:.2e}")
|
||||
logger.info(f" Weight decay: {self.best_config.weight_decay:.2e}")
|
||||
logger.info(f" Batch size: {self.best_config.batch_size}")
|
||||
logger.info(f" Validation loss: {self.best_config.val_loss:.6f}")
|
||||
|
||||
if output_names:
|
||||
for i, name in enumerate(output_names):
|
||||
r2 = self.best_config.val_r2.get(f'output_{i}', 0)
|
||||
logger.info(f" {name} R² (CV): {r2:.4f}")
|
||||
|
||||
return self.best_config
|
||||
|
||||
def _suggest_hyperparameters_from_params(self, params: Dict[str, Any]) -> SurrogateConfig:
|
||||
"""Reconstruct config from Optuna params dict."""
|
||||
n_layers = params['n_layers']
|
||||
hidden_dims = [params[f'hidden_dim_{i}'] for i in range(n_layers)]
|
||||
|
||||
return SurrogateConfig(
|
||||
hidden_dims=hidden_dims,
|
||||
dropout=params['dropout'],
|
||||
activation=params['activation'],
|
||||
use_batch_norm=params['use_batch_norm'],
|
||||
learning_rate=params['learning_rate'],
|
||||
weight_decay=params['weight_decay'],
|
||||
batch_size=params['batch_size']
|
||||
)
|
||||
|
||||
def create_tuned_model(
|
||||
self,
|
||||
config: Optional[SurrogateConfig] = None
|
||||
) -> TunableMLP:
|
||||
"""Create a model with tuned hyperparameters."""
|
||||
if config is None:
|
||||
config = self.best_config
|
||||
if config is None:
|
||||
raise ValueError("No config available. Run tune() first.")
|
||||
|
||||
return TunableMLP(
|
||||
input_dim=self.input_dim,
|
||||
output_dim=self.output_dim,
|
||||
hidden_dims=config.hidden_dims,
|
||||
dropout=config.dropout,
|
||||
activation=config.activation,
|
||||
use_batch_norm=config.use_batch_norm
|
||||
)
|
||||
|
||||
|
||||
class TunedEnsembleSurrogate:
|
||||
"""
|
||||
Ensemble of tuned surrogate models for better uncertainty quantification.
|
||||
|
||||
Trains multiple models with different random seeds and aggregates predictions.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: SurrogateConfig,
|
||||
input_dim: int,
|
||||
output_dim: int,
|
||||
n_models: int = 5,
|
||||
device: str = 'auto'
|
||||
):
|
||||
"""
|
||||
Initialize ensemble surrogate.
|
||||
|
||||
Args:
|
||||
config: Tuned configuration to use for all models
|
||||
input_dim: Number of input features
|
||||
output_dim: Number of outputs
|
||||
n_models: Number of models in ensemble
|
||||
device: Computing device
|
||||
"""
|
||||
self.config = config
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.n_models = n_models
|
||||
|
||||
if device == 'auto':
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
else:
|
||||
self.device = torch.device(device)
|
||||
|
||||
self.models: List[TunableMLP] = []
|
||||
self.trained = False
|
||||
|
||||
def train(self, X: np.ndarray, Y: np.ndarray, val_split: float = 0.2):
|
||||
"""Train all models in the ensemble."""
|
||||
logger.info(f"Training ensemble of {self.n_models} models...")
|
||||
|
||||
# Normalize using config stats
|
||||
X_norm = (X - self.config.input_mean) / self.config.input_std
|
||||
Y_norm = (Y - self.config.output_mean) / self.config.output_std
|
||||
|
||||
# Split data
|
||||
n_val = int(len(X) * val_split)
|
||||
indices = np.random.permutation(len(X))
|
||||
train_idx, val_idx = indices[n_val:], indices[:n_val]
|
||||
|
||||
X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
|
||||
X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]
|
||||
|
||||
X_train_t = torch.tensor(X_train, dtype=torch.float32, device=self.device)
|
||||
Y_train_t = torch.tensor(Y_train, dtype=torch.float32, device=self.device)
|
||||
X_val_t = torch.tensor(X_val, dtype=torch.float32, device=self.device)
|
||||
Y_val_t = torch.tensor(Y_val, dtype=torch.float32, device=self.device)
|
||||
|
||||
train_dataset = TensorDataset(X_train_t, Y_train_t)
|
||||
|
||||
self.models = []
|
||||
|
||||
for i in range(self.n_models):
|
||||
torch.manual_seed(42 + i)
|
||||
|
||||
model = TunableMLP(
|
||||
input_dim=self.input_dim,
|
||||
output_dim=self.output_dim,
|
||||
hidden_dims=self.config.hidden_dims,
|
||||
dropout=self.config.dropout,
|
||||
activation=self.config.activation,
|
||||
use_batch_norm=self.config.use_batch_norm
|
||||
).to(self.device)
|
||||
|
||||
optimizer = torch.optim.AdamW(
|
||||
model.parameters(),
|
||||
lr=self.config.learning_rate,
|
||||
weight_decay=self.config.weight_decay
|
||||
)
|
||||
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
|
||||
optimizer, T_max=self.config.max_epochs
|
||||
)
|
||||
|
||||
train_loader = DataLoader(
|
||||
train_dataset,
|
||||
batch_size=self.config.batch_size,
|
||||
shuffle=True
|
||||
)
|
||||
early_stopping = EarlyStopping(patience=self.config.early_stopping_patience)
|
||||
|
||||
for epoch in range(self.config.max_epochs):
|
||||
model.train()
|
||||
for X_batch, Y_batch in train_loader:
|
||||
optimizer.zero_grad()
|
||||
pred = model(X_batch)
|
||||
loss = nn.functional.mse_loss(pred, Y_batch)
|
||||
loss.backward()
|
||||
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
||||
optimizer.step()
|
||||
|
||||
scheduler.step()
|
||||
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
val_pred = model(X_val_t)
|
||||
val_loss = nn.functional.mse_loss(val_pred, Y_val_t).item()
|
||||
|
||||
if early_stopping(val_loss, model):
|
||||
break
|
||||
|
||||
early_stopping.restore_best(model)
|
||||
model.eval()
|
||||
self.models.append(model)
|
||||
|
||||
logger.info(f" Model {i+1}/{self.n_models}: val_loss = {early_stopping.best_loss:.6f}")
|
||||
|
||||
self.trained = True
|
||||
logger.info("Ensemble training complete")
|
||||
|
||||
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Predict with uncertainty estimation.
|
||||
|
||||
Args:
|
||||
X: Input features [n_samples, input_dim]
|
||||
|
||||
Returns:
|
||||
Tuple of (mean_predictions, std_predictions)
|
||||
"""
|
||||
if not self.trained:
|
||||
raise RuntimeError("Ensemble not trained. Call train() first.")
|
||||
|
||||
# Normalize input
|
||||
X_norm = (X - self.config.input_mean) / self.config.input_std
|
||||
X_t = torch.tensor(X_norm, dtype=torch.float32, device=self.device)
|
||||
|
||||
# Collect predictions from all models
|
||||
predictions = []
|
||||
for model in self.models:
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
pred = model(X_t).cpu().numpy()
|
||||
# Denormalize
|
||||
pred = pred * self.config.output_std + self.config.output_mean
|
||||
predictions.append(pred)
|
||||
|
||||
predictions = np.array(predictions) # [n_models, n_samples, output_dim]
|
||||
|
||||
mean_pred = predictions.mean(axis=0)
|
||||
std_pred = predictions.std(axis=0)
|
||||
|
||||
return mean_pred, std_pred
|
||||
|
||||
def predict_single(self, params: Dict[str, float], var_names: List[str]) -> Tuple[Dict[str, float], float]:
|
||||
"""
|
||||
Predict for a single point with uncertainty.
|
||||
|
||||
Args:
|
||||
params: Dictionary of input parameters
|
||||
var_names: List of variable names in order
|
||||
|
||||
Returns:
|
||||
Tuple of (predictions dict, total uncertainty)
|
||||
"""
|
||||
X = np.array([[params[name] for name in var_names]])
|
||||
mean, std = self.predict(X)
|
||||
|
||||
pred_dict = {f'output_{i}': mean[0, i] for i in range(self.output_dim)}
|
||||
uncertainty = float(np.sum(std[0]))
|
||||
|
||||
return pred_dict, uncertainty
|
||||
|
||||
def save(self, path: Path):
|
||||
"""Save ensemble to disk."""
|
||||
state = {
|
||||
'config': {
|
||||
'hidden_dims': self.config.hidden_dims,
|
||||
'dropout': self.config.dropout,
|
||||
'activation': self.config.activation,
|
||||
'use_batch_norm': self.config.use_batch_norm,
|
||||
'learning_rate': self.config.learning_rate,
|
||||
'weight_decay': self.config.weight_decay,
|
||||
'batch_size': self.config.batch_size,
|
||||
'input_mean': self.config.input_mean.tolist(),
|
||||
'input_std': self.config.input_std.tolist(),
|
||||
'output_mean': self.config.output_mean.tolist(),
|
||||
'output_std': self.config.output_std.tolist(),
|
||||
},
|
||||
'n_models': self.n_models,
|
||||
'model_states': [m.state_dict() for m in self.models]
|
||||
}
|
||||
torch.save(state, path)
|
||||
logger.info(f"Saved ensemble to {path}")
|
||||
|
||||
def load(self, path: Path):
|
||||
"""Load ensemble from disk."""
|
||||
state = torch.load(path, map_location=self.device)
|
||||
|
||||
# Restore config
|
||||
cfg = state['config']
|
||||
self.config = SurrogateConfig(
|
||||
hidden_dims=cfg['hidden_dims'],
|
||||
dropout=cfg['dropout'],
|
||||
activation=cfg['activation'],
|
||||
use_batch_norm=cfg['use_batch_norm'],
|
||||
learning_rate=cfg['learning_rate'],
|
||||
weight_decay=cfg['weight_decay'],
|
||||
batch_size=cfg['batch_size'],
|
||||
input_mean=np.array(cfg['input_mean']),
|
||||
input_std=np.array(cfg['input_std']),
|
||||
output_mean=np.array(cfg['output_mean']),
|
||||
output_std=np.array(cfg['output_std'])
|
||||
)
|
||||
|
||||
self.n_models = state['n_models']
|
||||
self.models = []
|
||||
|
||||
for model_state in state['model_states']:
|
||||
model = TunableMLP(
|
||||
input_dim=self.input_dim,
|
||||
output_dim=self.output_dim,
|
||||
hidden_dims=self.config.hidden_dims,
|
||||
dropout=self.config.dropout,
|
||||
activation=self.config.activation,
|
||||
use_batch_norm=self.config.use_batch_norm
|
||||
).to(self.device)
|
||||
model.load_state_dict(model_state)
|
||||
model.eval()
|
||||
self.models.append(model)
|
||||
|
||||
self.trained = True
|
||||
logger.info(f"Loaded ensemble with {self.n_models} models from {path}")
|
||||
|
||||
|
||||
def tune_surrogate_for_study(
|
||||
fea_data: List[Dict],
|
||||
design_var_names: List[str],
|
||||
objective_names: List[str],
|
||||
n_tuning_trials: int = 50,
|
||||
n_ensemble_models: int = 5
|
||||
) -> TunedEnsembleSurrogate:
|
||||
"""
|
||||
Convenience function to tune and create ensemble surrogate.
|
||||
|
||||
Args:
|
||||
fea_data: List of FEA results with 'params' and 'objectives' keys
|
||||
design_var_names: List of design variable names
|
||||
objective_names: List of objective names
|
||||
n_tuning_trials: Number of Optuna trials
|
||||
n_ensemble_models: Number of models in ensemble
|
||||
|
||||
Returns:
|
||||
Trained TunedEnsembleSurrogate
|
||||
"""
|
||||
# Prepare data
|
||||
X = np.array([[d['params'][name] for name in design_var_names] for d in fea_data])
|
||||
Y = np.array([[d['objectives'][name] for name in objective_names] for d in fea_data])
|
||||
|
||||
logger.info(f"Tuning surrogate on {len(X)} samples...")
|
||||
logger.info(f"Input: {len(design_var_names)} design variables")
|
||||
logger.info(f"Output: {len(objective_names)} objectives")
|
||||
|
||||
# Tune hyperparameters
|
||||
tuner = SurrogateHyperparameterTuner(
|
||||
input_dim=len(design_var_names),
|
||||
output_dim=len(objective_names),
|
||||
n_trials=n_tuning_trials,
|
||||
n_cv_folds=5
|
||||
)
|
||||
|
||||
best_config = tuner.tune(X, Y, output_names=objective_names)
|
||||
|
||||
# Create and train ensemble
|
||||
ensemble = TunedEnsembleSurrogate(
|
||||
config=best_config,
|
||||
input_dim=len(design_var_names),
|
||||
output_dim=len(objective_names),
|
||||
n_models=n_ensemble_models
|
||||
)
|
||||
|
||||
ensemble.train(X, Y)
|
||||
|
||||
return ensemble
|
||||
@@ -0,0 +1,385 @@
|
||||
"""
|
||||
Training Data Exporter for AtomizerField
|
||||
|
||||
This module exports training data from Atomizer optimization runs for AtomizerField neural network training.
|
||||
It saves NX Nastran input (.dat) and output (.op2) files along with metadata for each trial.
|
||||
|
||||
Usage:
|
||||
from optimization_engine.processors.surrogates.training_data_exporter import create_exporter_from_config
|
||||
|
||||
exporter = create_exporter_from_config(config)
|
||||
if exporter:
|
||||
exporter.export_trial(trial_number, design_vars, results, simulation_files)
|
||||
exporter.finalize()
|
||||
"""
|
||||
|
||||
import json
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional, List
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TrainingDataExporter:
|
||||
"""
|
||||
Exports training data for AtomizerField neural network training.
|
||||
|
||||
After each FEA solve, saves:
|
||||
- Input: NX Nastran .dat file (BDF format)
|
||||
- Output: NX Nastran .op2 file (binary results)
|
||||
- Metadata: JSON with design parameters, objectives, constraints
|
||||
|
||||
Directory structure:
|
||||
{export_dir}/
|
||||
├── trial_0001/
|
||||
│ ├── input/
|
||||
│ │ └── model.bdf
|
||||
│ ├── output/
|
||||
│ │ └── model.op2
|
||||
│ └── metadata.json
|
||||
├── trial_0002/
|
||||
│ └── ...
|
||||
├── study_summary.json
|
||||
└── README.md
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
export_dir: Path,
|
||||
study_name: str,
|
||||
design_variable_names: List[str],
|
||||
objective_names: List[str],
|
||||
constraint_names: Optional[List[str]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
"""
|
||||
Initialize the training data exporter.
|
||||
|
||||
Args:
|
||||
export_dir: Root directory for exported training data
|
||||
study_name: Name of the optimization study
|
||||
design_variable_names: List of design variable names
|
||||
objective_names: List of objective function names
|
||||
constraint_names: List of constraint names (optional)
|
||||
metadata: Additional study metadata (optional)
|
||||
"""
|
||||
self.export_dir = Path(export_dir)
|
||||
self.study_name = study_name
|
||||
self.design_variable_names = design_variable_names
|
||||
self.objective_names = objective_names
|
||||
self.constraint_names = constraint_names or []
|
||||
self.study_metadata = metadata or {}
|
||||
|
||||
self.trial_count = 0
|
||||
self.exported_trials: List[Dict[str, Any]] = []
|
||||
|
||||
# Create root export directory
|
||||
self.export_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info(f"Training data exporter initialized: {self.export_dir}")
|
||||
|
||||
# Create README
|
||||
self._create_readme()
|
||||
|
||||
def export_trial(
|
||||
self,
|
||||
trial_number: int,
|
||||
design_variables: Dict[str, float],
|
||||
results: Dict[str, Any],
|
||||
simulation_files: Dict[str, Path]
|
||||
) -> bool:
|
||||
"""
|
||||
Export training data for a single trial.
|
||||
|
||||
Args:
|
||||
trial_number: Optuna trial number
|
||||
design_variables: Dict of design parameter names and values
|
||||
results: Dict containing objectives, constraints, and other results
|
||||
simulation_files: Dict with paths to 'dat_file' and 'op2_file'
|
||||
|
||||
Returns:
|
||||
True if export successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Create trial directory
|
||||
trial_dir = self.export_dir / f"trial_{trial_number:04d}"
|
||||
trial_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
input_dir = trial_dir / "input"
|
||||
output_dir = trial_dir / "output"
|
||||
input_dir.mkdir(exist_ok=True)
|
||||
output_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Copy .dat file (Nastran input deck)
|
||||
dat_file = simulation_files.get('dat_file')
|
||||
if dat_file and Path(dat_file).exists():
|
||||
shutil.copy2(dat_file, input_dir / "model.bdf")
|
||||
logger.debug(f"Copied .dat file: {dat_file} -> {input_dir / 'model.bdf'}")
|
||||
else:
|
||||
logger.warning(f"Trial {trial_number}: .dat file not found at {dat_file}")
|
||||
return False
|
||||
|
||||
# Copy .op2 file (Nastran binary results)
|
||||
op2_file = simulation_files.get('op2_file')
|
||||
if op2_file and Path(op2_file).exists():
|
||||
shutil.copy2(op2_file, output_dir / "model.op2")
|
||||
logger.debug(f"Copied .op2 file: {op2_file} -> {output_dir / 'model.op2'}")
|
||||
else:
|
||||
logger.warning(f"Trial {trial_number}: .op2 file not found at {op2_file}")
|
||||
return False
|
||||
|
||||
# Create metadata.json
|
||||
metadata = self._create_trial_metadata(
|
||||
trial_number,
|
||||
design_variables,
|
||||
results
|
||||
)
|
||||
|
||||
metadata_path = trial_dir / "metadata.json"
|
||||
with open(metadata_path, 'w') as f:
|
||||
json.dump(metadata, f, indent=2)
|
||||
|
||||
logger.info(f"Exported training data for trial {trial_number}")
|
||||
|
||||
self.trial_count += 1
|
||||
self.exported_trials.append(metadata)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to export trial {trial_number}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _create_trial_metadata(
|
||||
self,
|
||||
trial_number: int,
|
||||
design_variables: Dict[str, float],
|
||||
results: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create metadata dictionary for a trial.
|
||||
|
||||
Args:
|
||||
trial_number: Optuna trial number
|
||||
design_variables: Design parameter values
|
||||
results: Optimization results
|
||||
|
||||
Returns:
|
||||
Metadata dictionary
|
||||
"""
|
||||
metadata = {
|
||||
"trial_number": trial_number,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"atomizer_study": self.study_name,
|
||||
"design_parameters": design_variables,
|
||||
"results": {}
|
||||
}
|
||||
|
||||
# Extract objectives
|
||||
if "objectives" in results:
|
||||
metadata["results"]["objectives"] = results["objectives"]
|
||||
|
||||
# Extract constraints
|
||||
if "constraints" in results:
|
||||
metadata["results"]["constraints"] = results["constraints"]
|
||||
|
||||
# Extract any scalar results (max_stress, max_displacement, etc.)
|
||||
for key in ["max_stress", "max_displacement", "mass", "volume"]:
|
||||
if key in results:
|
||||
metadata["results"][key] = results[key]
|
||||
|
||||
# Add any additional result fields
|
||||
for key, value in results.items():
|
||||
if key not in ["objectives", "constraints"] and isinstance(value, (int, float, str, bool)):
|
||||
metadata["results"][key] = value
|
||||
|
||||
return metadata
|
||||
|
||||
def finalize(self) -> None:
|
||||
"""
|
||||
Finalize the training data export.
|
||||
|
||||
Creates study_summary.json with metadata about the entire study.
|
||||
"""
|
||||
try:
|
||||
summary = {
|
||||
"study_name": self.study_name,
|
||||
"total_trials": self.trial_count,
|
||||
"design_variables": self.design_variable_names,
|
||||
"objectives": self.objective_names,
|
||||
"constraints": self.constraint_names,
|
||||
"export_timestamp": datetime.now().isoformat(),
|
||||
"metadata": self.study_metadata
|
||||
}
|
||||
|
||||
summary_path = self.export_dir / "study_summary.json"
|
||||
with open(summary_path, 'w') as f:
|
||||
json.dump(summary, f, indent=2)
|
||||
|
||||
logger.info(f"Training data export finalized: {self.trial_count} trials exported")
|
||||
logger.info(f"Summary saved to: {summary_path}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to finalize training data export: {e}", exc_info=True)
|
||||
|
||||
def _create_readme(self) -> None:
|
||||
"""Create README.md explaining the training data structure."""
|
||||
readme_content = f"""# AtomizerField Training Data
|
||||
|
||||
**Study Name**: {self.study_name}
|
||||
**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
{self.export_dir.name}/
|
||||
├── trial_0001/
|
||||
│ ├── input/
|
||||
│ │ └── model.bdf # NX Nastran input deck (BDF format)
|
||||
│ ├── output/
|
||||
│ │ └── model.op2 # NX Nastran binary results (OP2 format)
|
||||
│ └── metadata.json # Design parameters, objectives, constraints
|
||||
├── trial_0002/
|
||||
│ └── ...
|
||||
├── study_summary.json # Overall study metadata
|
||||
└── README.md # This file
|
||||
```
|
||||
|
||||
## Design Variables
|
||||
|
||||
{chr(10).join(f'- {name}' for name in self.design_variable_names)}
|
||||
|
||||
## Objectives
|
||||
|
||||
{chr(10).join(f'- {name}' for name in self.objective_names)}
|
||||
|
||||
## Constraints
|
||||
|
||||
{chr(10).join(f'- {name}' for name in self.constraint_names) if self.constraint_names else '- None'}
|
||||
|
||||
## Usage with AtomizerField
|
||||
|
||||
### 1. Parse Training Data
|
||||
|
||||
```bash
|
||||
cd Atomizer-Field
|
||||
python batch_parser.py --data-dir "{self.export_dir}"
|
||||
```
|
||||
|
||||
This converts BDF/OP2 files to PyTorch Geometric format.
|
||||
|
||||
### 2. Validate Parsed Data
|
||||
|
||||
```bash
|
||||
python validate_parsed_data.py
|
||||
```
|
||||
|
||||
### 3. Train Neural Network
|
||||
|
||||
```bash
|
||||
python train.py --data-dir "training_data/parsed/" --epochs 200
|
||||
```
|
||||
|
||||
### 4. Use Trained Model in Atomizer
|
||||
|
||||
```bash
|
||||
cd ../Atomizer
|
||||
python run_optimization.py --config studies/{self.study_name}/workflow_config.json --use-neural
|
||||
```
|
||||
|
||||
## File Formats
|
||||
|
||||
- **BDF (.bdf)**: Nastran Bulk Data File - contains mesh, materials, loads, BCs
|
||||
- **OP2 (.op2)**: Nastran Output2 - binary results with displacements, stresses, etc.
|
||||
- **metadata.json**: Human-readable trial metadata
|
||||
|
||||
## AtomizerField Documentation
|
||||
|
||||
See `Atomizer-Field/docs/` for complete documentation on:
|
||||
- Neural network architecture
|
||||
- Training procedures
|
||||
- Integration with Atomizer
|
||||
- Uncertainty quantification
|
||||
|
||||
---
|
||||
|
||||
*Generated by Atomizer Training Data Exporter*
|
||||
"""
|
||||
|
||||
readme_path = self.export_dir / "README.md"
|
||||
with open(readme_path, 'w', encoding='utf-8') as f:
|
||||
f.write(readme_content)
|
||||
|
||||
logger.debug(f"Created README: {readme_path}")
|
||||
|
||||
|
||||
def create_exporter_from_config(config: Dict[str, Any]) -> Optional[TrainingDataExporter]:
|
||||
"""
|
||||
Factory function to create TrainingDataExporter from workflow configuration.
|
||||
|
||||
Args:
|
||||
config: Workflow configuration dictionary
|
||||
|
||||
Returns:
|
||||
TrainingDataExporter instance if enabled, None otherwise
|
||||
|
||||
Example config YAML:
|
||||
training_data_export:
|
||||
enabled: true
|
||||
export_dir: "atomizer_field_training_data/beam_study_001"
|
||||
"""
|
||||
if not config.get("training_data_export", {}).get("enabled", False):
|
||||
logger.info("Training data export is disabled")
|
||||
return None
|
||||
|
||||
export_config = config["training_data_export"]
|
||||
|
||||
# Get export directory
|
||||
export_dir = export_config.get("export_dir")
|
||||
if not export_dir:
|
||||
logger.error("Training data export enabled but 'export_dir' not specified")
|
||||
return None
|
||||
|
||||
# Get study name
|
||||
study_name = config.get("study_name", "unnamed_study")
|
||||
|
||||
# Get design variable names
|
||||
design_vars = config.get("design_variables", [])
|
||||
design_var_names = [dv.get("name", dv.get("parameter", f"var_{i}"))
|
||||
for i, dv in enumerate(design_vars)]
|
||||
|
||||
# Get objective names
|
||||
objectives = config.get("objectives", [])
|
||||
objective_names = [obj.get("name", f"obj_{i}")
|
||||
for i, obj in enumerate(objectives)]
|
||||
|
||||
# Get constraint names
|
||||
constraints = config.get("constraints", [])
|
||||
constraint_names = [c.get("name", f"constraint_{i}")
|
||||
for i, c in enumerate(constraints)]
|
||||
|
||||
# Additional metadata
|
||||
metadata = {
|
||||
"atomizer_version": config.get("version", "unknown"),
|
||||
"optimization_algorithm": config.get("optimization", {}).get("algorithm", "unknown"),
|
||||
"n_trials": config.get("optimization", {}).get("n_trials", 0)
|
||||
}
|
||||
|
||||
try:
|
||||
exporter = TrainingDataExporter(
|
||||
export_dir=Path(export_dir),
|
||||
study_name=study_name,
|
||||
design_variable_names=design_var_names,
|
||||
objective_names=objective_names,
|
||||
constraint_names=constraint_names,
|
||||
metadata=metadata
|
||||
)
|
||||
logger.info("Training data exporter created successfully")
|
||||
return exporter
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create training data exporter: {e}", exc_info=True)
|
||||
return None
|
||||
Reference in New Issue
Block a user