Files
Atomizer/optimization_engine/processors/surrogates/simple_mlp_surrogate.py
Anto01 eabcc4c3ca refactor: Major reorganization of optimization_engine module structure
BREAKING CHANGE: Module paths have been reorganized for better maintainability.
Backwards compatibility aliases with deprecation warnings are provided.

New Structure:
- core/           - Optimization runners (runner, intelligent_optimizer, etc.)
- processors/     - Data processing
  - surrogates/   - Neural network surrogates
- nx/             - NX/Nastran integration (solver, updater, session_manager)
- study/          - Study management (creator, wizard, state, reset)
- reporting/      - Reports and analysis (visualizer, report_generator)
- config/         - Configuration management (manager, builder)
- utils/          - Utilities (logger, auto_doc, etc.)
- future/         - Research/experimental code

Migration:
- ~200 import changes across 125 files
- All __init__.py files use lazy loading to avoid circular imports
- Backwards compatibility layer supports old import paths with warnings
- All existing functionality preserved

To migrate existing code:
  OLD: from optimization_engine.nx_solver import NXSolver
  NEW: from optimization_engine.nx.solver import NXSolver

  OLD: from optimization_engine.runner import OptimizationRunner
  NEW: from optimization_engine.core.runner import OptimizationRunner

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 12:30:59 -05:00

649 lines
20 KiB
Python

"""
Simple MLP Surrogate for Fast Optimization
This module provides a lightweight neural network surrogate that:
1. Trains directly from Optuna database (no mesh parsing needed)
2. Uses simple MLP: design_params -> [mass, frequency, max_disp, max_stress]
3. Provides millisecond predictions for optimization
This is much simpler than the GNN-based approach and works well when:
- You have enough FEA data in the database
- You only need scalar objective predictions (no field data)
- You want quick setup without mesh parsing pipeline
Usage:
from optimization_engine.processors.surrogates.simple_mlp_surrogate import SimpleSurrogate, train_from_database
# Train from database
surrogate = train_from_database(
db_path="studies/uav_arm_atomizerfield_test/2_results/study.db",
study_name="uav_arm_atomizerfield_test"
)
# Predict
results = surrogate.predict({
'beam_half_core_thickness': 3.0,
'beam_face_thickness': 1.5,
'holes_diameter': 8.0,
'hole_count': 4
})
"""
import json
import logging
import time
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
import numpy as np
logger = logging.getLogger(__name__)
try:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
logger.warning("PyTorch not installed. SimpleSurrogate will be limited.")
try:
import optuna
OPTUNA_AVAILABLE = True
except ImportError:
OPTUNA_AVAILABLE = False
class MLPModel(nn.Module):
"""Simple MLP for design parameter -> objective prediction."""
def __init__(
self,
n_inputs: int = 4,
n_outputs: int = 4,
hidden_dims: List[int] = [128, 256, 128, 64],
dropout: float = 0.1
):
super().__init__()
layers = []
prev_dim = n_inputs
for hidden_dim in hidden_dims:
layers.extend([
nn.Linear(prev_dim, hidden_dim),
nn.LayerNorm(hidden_dim),
nn.ReLU(),
nn.Dropout(dropout)
])
prev_dim = hidden_dim
layers.append(nn.Linear(prev_dim, n_outputs))
self.network = nn.Sequential(*layers)
# Initialize weights
self._init_weights()
def _init_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
return self.network(x)
class FEADataset(Dataset):
"""Dataset for training from FEA results."""
def __init__(
self,
design_params: np.ndarray,
objectives: np.ndarray
):
self.design_params = torch.tensor(design_params, dtype=torch.float32)
self.objectives = torch.tensor(objectives, dtype=torch.float32)
def __len__(self):
return len(self.design_params)
def __getitem__(self, idx):
return self.design_params[idx], self.objectives[idx]
class SimpleSurrogate:
"""
Simple MLP-based surrogate for FEA prediction.
This is a lightweight alternative to the GNN-based approach that:
- Doesn't require mesh parsing
- Trains directly from database
- Provides fast scalar predictions
"""
def __init__(
self,
model: nn.Module = None,
design_var_names: List[str] = None,
objective_names: List[str] = None,
normalization: Dict[str, Any] = None,
device: str = 'auto'
):
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required. Install: pip install torch")
# Set device
if device == 'auto':
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = torch.device(device)
self.model = model
if model is not None:
self.model = model.to(self.device)
self.model.eval()
self.design_var_names = design_var_names or []
self.objective_names = objective_names or ['mass', 'frequency', 'max_displacement', 'max_stress']
# Normalization stats
self.normalization = normalization or {
'design_mean': np.zeros(len(self.design_var_names)),
'design_std': np.ones(len(self.design_var_names)),
'objective_mean': np.zeros(len(self.objective_names)),
'objective_std': np.ones(len(self.objective_names))
}
# Performance tracking
self.stats = {
'predictions': 0,
'total_time_ms': 0.0
}
logger.info(f"SimpleSurrogate initialized on {self.device}")
def predict(self, design_params: Dict[str, float]) -> Dict[str, Any]:
"""
Predict FEA objectives from design parameters.
Args:
design_params: Dict of design variable values
Returns:
Dict with mass, frequency, max_displacement, max_stress, inference_time_ms
"""
start_time = time.time()
# Build input tensor
param_values = [design_params.get(name, 0.0) for name in self.design_var_names]
x = np.array(param_values, dtype=np.float32)
# Normalize
x_norm = (x - self.normalization['design_mean']) / (self.normalization['design_std'] + 1e-8)
x_tensor = torch.tensor(x_norm, dtype=torch.float32, device=self.device).unsqueeze(0)
# Predict
with torch.no_grad():
y_norm = self.model(x_tensor).cpu().numpy()[0]
# Denormalize
y = y_norm * self.normalization['objective_std'] + self.normalization['objective_mean']
inference_time = (time.time() - start_time) * 1000
results = {
self.objective_names[i]: float(y[i]) for i in range(len(self.objective_names))
}
results['inference_time_ms'] = inference_time
# Update stats
self.stats['predictions'] += 1
self.stats['total_time_ms'] += inference_time
return results
def get_statistics(self) -> Dict[str, Any]:
"""Get prediction statistics."""
avg_time = self.stats['total_time_ms'] / self.stats['predictions'] \
if self.stats['predictions'] > 0 else 0
return {
'total_predictions': self.stats['predictions'],
'total_time_ms': self.stats['total_time_ms'],
'average_time_ms': avg_time,
'device': str(self.device),
'design_var_names': self.design_var_names,
'objective_names': self.objective_names
}
def save(self, path: Path):
"""Save surrogate to file."""
path = Path(path)
torch.save({
'model_state_dict': self.model.state_dict(),
'design_var_names': self.design_var_names,
'objective_names': self.objective_names,
'normalization': self.normalization,
'model_config': {
'n_inputs': len(self.design_var_names),
'n_outputs': len(self.objective_names)
}
}, path)
logger.info(f"Surrogate saved to {path}")
@classmethod
def load(cls, path: Path, device: str = 'auto') -> 'SimpleSurrogate':
"""Load surrogate from file."""
path = Path(path)
checkpoint = torch.load(path, map_location='cpu')
# Create model
model_config = checkpoint['model_config']
model = MLPModel(
n_inputs=model_config['n_inputs'],
n_outputs=model_config['n_outputs']
)
model.load_state_dict(checkpoint['model_state_dict'])
return cls(
model=model,
design_var_names=checkpoint['design_var_names'],
objective_names=checkpoint['objective_names'],
normalization=checkpoint['normalization'],
device=device
)
def extract_data_from_database(
db_path: str,
study_name: str
) -> Tuple[np.ndarray, np.ndarray, List[str], List[str]]:
"""
Extract training data from Optuna database.
Args:
db_path: Path to SQLite database
study_name: Name of Optuna study
Returns:
Tuple of (design_params, objectives, design_var_names, objective_names)
"""
if not OPTUNA_AVAILABLE:
raise ImportError("Optuna required. Install: pip install optuna")
storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}")
study = optuna.load_study(study_name=study_name, storage=storage)
# Get completed trials
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed_trials:
raise ValueError(f"No completed trials in study {study_name}")
logger.info(f"Found {len(completed_trials)} completed trials")
# Get design variable names from first trial
design_var_names = list(completed_trials[0].params.keys())
# Determine objective structure
first_values = completed_trials[0].values
if first_values is None:
raise ValueError("Trials have no objective values")
# For multi-objective, values are [mass, frequency, ...]
# We also need user_attrs for constraints
# Collect data - filter out invalid samples
design_params_list = []
objectives_list = []
skipped = 0
for trial in completed_trials:
# Objectives - need mass, frequency, max_disp, max_stress
mass = trial.values[0] if len(trial.values) > 0 else 0.0
frequency = trial.values[1] if len(trial.values) > 1 else 0.0
# Get constraints from user_attrs
max_disp = trial.user_attrs.get('max_displacement', 0.0)
max_stress = trial.user_attrs.get('max_stress', 0.0)
# Note: frequency is stored as -freq for minimization, so convert back
# Also filter out inf values
objectives = [mass, -frequency, max_disp, max_stress]
# Skip invalid samples (inf, nan, or extreme values)
if any(np.isinf(v) or np.isnan(v) or v > 1e10 for v in objectives):
skipped += 1
continue
# Skip if frequency is negative (indicates error)
if -frequency <= 0:
skipped += 1
continue
# Design parameters
params = [trial.params.get(name, 0.0) for name in design_var_names]
design_params_list.append(params)
objectives_list.append(objectives)
if skipped > 0:
logger.info(f"Skipped {skipped} invalid samples")
if not design_params_list:
raise ValueError("No valid samples found after filtering")
design_params = np.array(design_params_list, dtype=np.float32)
objectives = np.array(objectives_list, dtype=np.float32)
objective_names = ['mass', 'frequency', 'max_displacement', 'max_stress']
logger.info(f"Extracted {len(design_params)} valid samples")
logger.info(f"Design vars: {design_var_names}")
logger.info(f"Objectives: {objective_names}")
return design_params, objectives, design_var_names, objective_names
def train_from_database(
db_path: str,
study_name: str,
epochs: int = 200,
batch_size: int = 32,
learning_rate: float = 0.001,
val_split: float = 0.2,
save_path: Optional[str] = None,
device: str = 'auto'
) -> SimpleSurrogate:
"""
Train SimpleSurrogate from Optuna database.
Args:
db_path: Path to SQLite database
study_name: Name of Optuna study
epochs: Training epochs
batch_size: Batch size
learning_rate: Learning rate
val_split: Validation split ratio
save_path: Optional path to save trained model
device: Computing device
Returns:
Trained SimpleSurrogate
"""
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required")
# Set device
if device == 'auto':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
device = torch.device(device)
print(f"\n{'='*60}")
print("Training Simple MLP Surrogate from Database")
print(f"{'='*60}")
print(f"Device: {device}")
# Extract data
print(f"\n[1] Loading data from {db_path}")
design_params, objectives, design_var_names, objective_names = extract_data_from_database(
db_path, study_name
)
print(f" Samples: {len(design_params)}")
print(f" Design vars: {design_var_names}")
print(f" Objectives: {objective_names}")
# Compute normalization stats
design_mean = design_params.mean(axis=0)
design_std = design_params.std(axis=0)
objective_mean = objectives.mean(axis=0)
objective_std = objectives.std(axis=0)
print(f"\n Objective ranges:")
for i, name in enumerate(objective_names):
print(f" {name}: {objectives[:, i].min():.2f} - {objectives[:, i].max():.2f}")
# Normalize data
design_params_norm = (design_params - design_mean) / (design_std + 1e-8)
objectives_norm = (objectives - objective_mean) / (objective_std + 1e-8)
# Create dataset
dataset = FEADataset(design_params_norm, objectives_norm)
# Split into train/val
n_val = int(len(dataset) * val_split)
n_train = len(dataset) - n_val
train_dataset, val_dataset = random_split(dataset, [n_train, n_val])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
print(f"\n[2] Creating model")
print(f" Train samples: {n_train}")
print(f" Val samples: {n_val}")
# Create model
model = MLPModel(
n_inputs=len(design_var_names),
n_outputs=len(objective_names),
hidden_dims=[128, 256, 128, 64]
).to(device)
n_params = sum(p.numel() for p in model.parameters())
print(f" Model params: {n_params:,}")
# Training
print(f"\n[3] Training for {epochs} epochs")
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
best_val_loss = float('inf')
best_state = None
for epoch in range(epochs):
# Train
model.train()
train_loss = 0.0
for x, y in train_loader:
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
pred = model(x)
loss = F.mse_loss(pred, y)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= len(train_loader)
# Validate
model.eval()
val_loss = 0.0
with torch.no_grad():
for x, y in val_loader:
x, y = x.to(device), y.to(device)
pred = model(x)
val_loss += F.mse_loss(pred, y).item()
val_loss /= len(val_loader)
scheduler.step()
# Track best
if val_loss < best_val_loss:
best_val_loss = val_loss
best_state = model.state_dict().copy()
# Log progress
if (epoch + 1) % 20 == 0 or epoch == 0:
print(f" Epoch {epoch+1:3d}: train_loss={train_loss:.6f}, val_loss={val_loss:.6f}")
# Load best model
model.load_state_dict(best_state)
print(f"\n Best val_loss: {best_val_loss:.6f}")
# Create surrogate
normalization = {
'design_mean': design_mean,
'design_std': design_std,
'objective_mean': objective_mean,
'objective_std': objective_std
}
surrogate = SimpleSurrogate(
model=model,
design_var_names=design_var_names,
objective_names=objective_names,
normalization=normalization,
device=str(device)
)
# Evaluate accuracy
print(f"\n[4] Evaluating accuracy on validation set")
model.eval()
all_preds = []
all_targets = []
with torch.no_grad():
for x, y in val_loader:
x = x.to(device)
pred = model(x).cpu().numpy()
all_preds.append(pred)
all_targets.append(y.numpy())
all_preds = np.concatenate(all_preds)
all_targets = np.concatenate(all_targets)
# Denormalize for error calculation
preds_denorm = all_preds * objective_std + objective_mean
targets_denorm = all_targets * objective_std + objective_mean
for i, name in enumerate(objective_names):
mae = np.abs(preds_denorm[:, i] - targets_denorm[:, i]).mean()
mape = (np.abs(preds_denorm[:, i] - targets_denorm[:, i]) / (np.abs(targets_denorm[:, i]) + 1e-8)).mean() * 100
print(f" {name}: MAE={mae:.4f}, MAPE={mape:.1f}%")
# Save if requested
if save_path:
surrogate.save(save_path)
print(f"\n{'='*60}")
print("Training complete!")
print(f"{'='*60}")
return surrogate
def create_simple_surrogate_for_study(
db_path: str = None,
study_name: str = None,
model_path: str = None,
project_root: Path = None
) -> Optional[SimpleSurrogate]:
"""
Factory function to create or load SimpleSurrogate for UAV arm study.
If model_path exists, loads the model. Otherwise trains from database.
Args:
db_path: Path to Optuna database
study_name: Name of study
model_path: Path to saved model (auto-detect if None)
project_root: Project root for auto-detection
Returns:
SimpleSurrogate instance or None
"""
if not TORCH_AVAILABLE:
logger.warning("PyTorch not available")
return None
# Auto-detect paths
if project_root is None:
project_root = Path(__file__).parent.parent
if model_path is None:
model_path = project_root / "simple_mlp_surrogate.pt"
else:
model_path = Path(model_path)
# Try to load existing model
if model_path.exists():
logger.info(f"Loading existing surrogate from {model_path}")
return SimpleSurrogate.load(model_path)
# Otherwise train from database
if db_path is None:
db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db"
else:
db_path = Path(db_path)
if study_name is None:
study_name = "uav_arm_atomizerfield_test"
if not db_path.exists():
logger.warning(f"Database not found: {db_path}")
return None
logger.info(f"Training surrogate from {db_path}")
return train_from_database(
db_path=str(db_path),
study_name=study_name,
save_path=str(model_path)
)
if __name__ == "__main__":
import sys
# Default paths
project_root = Path(__file__).parent.parent
db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db"
model_path = project_root / "simple_mlp_surrogate.pt"
print("Simple MLP Surrogate Training")
print("="*60)
if not db_path.exists():
print(f"ERROR: Database not found: {db_path}")
sys.exit(1)
# Train
surrogate = train_from_database(
db_path=str(db_path),
study_name="uav_arm_atomizerfield_test",
epochs=300,
save_path=str(model_path)
)
# Test predictions
print("\n[5] Testing predictions")
test_params = {
'beam_half_core_thickness': 3.0,
'beam_face_thickness': 1.5,
'holes_diameter': 8.0,
'hole_count': 4
}
print(f" Input: {test_params}")
results = surrogate.predict(test_params)
print(f" Mass: {results['mass']:.2f} g")
print(f" Frequency: {results['frequency']:.2f} Hz")
print(f" Max Displacement: {results['max_displacement']:.6f} mm")
print(f" Max Stress: {results['max_stress']:.2f} MPa")
print(f" Inference time: {results['inference_time_ms']:.2f} ms")
# Test variation
print("\n[6] Testing variation with parameters")
for thickness in [1.0, 3.0, 5.0]:
params = {**test_params, 'beam_half_core_thickness': thickness}
r = surrogate.predict(params)
print(f" thickness={thickness}: mass={r['mass']:.0f}g, freq={r['frequency']:.2f}Hz")