""" Simple MLP Surrogate for Fast Optimization This module provides a lightweight neural network surrogate that: 1. Trains directly from Optuna database (no mesh parsing needed) 2. Uses simple MLP: design_params -> [mass, frequency, max_disp, max_stress] 3. Provides millisecond predictions for optimization This is much simpler than the GNN-based approach and works well when: - You have enough FEA data in the database - You only need scalar objective predictions (no field data) - You want quick setup without mesh parsing pipeline Usage: from optimization_engine.processors.surrogates.simple_mlp_surrogate import SimpleSurrogate, train_from_database # Train from database surrogate = train_from_database( db_path="studies/uav_arm_atomizerfield_test/2_results/study.db", study_name="uav_arm_atomizerfield_test" ) # Predict results = surrogate.predict({ 'beam_half_core_thickness': 3.0, 'beam_face_thickness': 1.5, 'holes_diameter': 8.0, 'hole_count': 4 }) """ import json import logging import time from pathlib import Path from typing import Dict, Any, Optional, List, Tuple import numpy as np logger = logging.getLogger(__name__) try: import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader, random_split TORCH_AVAILABLE = True except ImportError: TORCH_AVAILABLE = False logger.warning("PyTorch not installed. SimpleSurrogate will be limited.") try: import optuna OPTUNA_AVAILABLE = True except ImportError: OPTUNA_AVAILABLE = False class MLPModel(nn.Module): """Simple MLP for design parameter -> objective prediction.""" def __init__( self, n_inputs: int = 4, n_outputs: int = 4, hidden_dims: List[int] = [128, 256, 128, 64], dropout: float = 0.1 ): super().__init__() layers = [] prev_dim = n_inputs for hidden_dim in hidden_dims: layers.extend([ nn.Linear(prev_dim, hidden_dim), nn.LayerNorm(hidden_dim), nn.ReLU(), nn.Dropout(dropout) ]) prev_dim = hidden_dim layers.append(nn.Linear(prev_dim, n_outputs)) self.network = nn.Sequential(*layers) # Initialize weights self._init_weights() def _init_weights(self): for m in self.modules(): if isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') if m.bias is not None: nn.init.constant_(m.bias, 0) def forward(self, x): return self.network(x) class FEADataset(Dataset): """Dataset for training from FEA results.""" def __init__( self, design_params: np.ndarray, objectives: np.ndarray ): self.design_params = torch.tensor(design_params, dtype=torch.float32) self.objectives = torch.tensor(objectives, dtype=torch.float32) def __len__(self): return len(self.design_params) def __getitem__(self, idx): return self.design_params[idx], self.objectives[idx] class SimpleSurrogate: """ Simple MLP-based surrogate for FEA prediction. This is a lightweight alternative to the GNN-based approach that: - Doesn't require mesh parsing - Trains directly from database - Provides fast scalar predictions """ def __init__( self, model: nn.Module = None, design_var_names: List[str] = None, objective_names: List[str] = None, normalization: Dict[str, Any] = None, device: str = 'auto' ): if not TORCH_AVAILABLE: raise ImportError("PyTorch required. Install: pip install torch") # Set device if device == 'auto': self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') else: self.device = torch.device(device) self.model = model if model is not None: self.model = model.to(self.device) self.model.eval() self.design_var_names = design_var_names or [] self.objective_names = objective_names or ['mass', 'frequency', 'max_displacement', 'max_stress'] # Normalization stats self.normalization = normalization or { 'design_mean': np.zeros(len(self.design_var_names)), 'design_std': np.ones(len(self.design_var_names)), 'objective_mean': np.zeros(len(self.objective_names)), 'objective_std': np.ones(len(self.objective_names)) } # Performance tracking self.stats = { 'predictions': 0, 'total_time_ms': 0.0 } logger.info(f"SimpleSurrogate initialized on {self.device}") def predict(self, design_params: Dict[str, float]) -> Dict[str, Any]: """ Predict FEA objectives from design parameters. Args: design_params: Dict of design variable values Returns: Dict with mass, frequency, max_displacement, max_stress, inference_time_ms """ start_time = time.time() # Build input tensor param_values = [design_params.get(name, 0.0) for name in self.design_var_names] x = np.array(param_values, dtype=np.float32) # Normalize x_norm = (x - self.normalization['design_mean']) / (self.normalization['design_std'] + 1e-8) x_tensor = torch.tensor(x_norm, dtype=torch.float32, device=self.device).unsqueeze(0) # Predict with torch.no_grad(): y_norm = self.model(x_tensor).cpu().numpy()[0] # Denormalize y = y_norm * self.normalization['objective_std'] + self.normalization['objective_mean'] inference_time = (time.time() - start_time) * 1000 results = { self.objective_names[i]: float(y[i]) for i in range(len(self.objective_names)) } results['inference_time_ms'] = inference_time # Update stats self.stats['predictions'] += 1 self.stats['total_time_ms'] += inference_time return results def get_statistics(self) -> Dict[str, Any]: """Get prediction statistics.""" avg_time = self.stats['total_time_ms'] / self.stats['predictions'] \ if self.stats['predictions'] > 0 else 0 return { 'total_predictions': self.stats['predictions'], 'total_time_ms': self.stats['total_time_ms'], 'average_time_ms': avg_time, 'device': str(self.device), 'design_var_names': self.design_var_names, 'objective_names': self.objective_names } def save(self, path: Path): """Save surrogate to file.""" path = Path(path) torch.save({ 'model_state_dict': self.model.state_dict(), 'design_var_names': self.design_var_names, 'objective_names': self.objective_names, 'normalization': self.normalization, 'model_config': { 'n_inputs': len(self.design_var_names), 'n_outputs': len(self.objective_names) } }, path) logger.info(f"Surrogate saved to {path}") @classmethod def load(cls, path: Path, device: str = 'auto') -> 'SimpleSurrogate': """Load surrogate from file.""" path = Path(path) checkpoint = torch.load(path, map_location='cpu') # Create model model_config = checkpoint['model_config'] model = MLPModel( n_inputs=model_config['n_inputs'], n_outputs=model_config['n_outputs'] ) model.load_state_dict(checkpoint['model_state_dict']) return cls( model=model, design_var_names=checkpoint['design_var_names'], objective_names=checkpoint['objective_names'], normalization=checkpoint['normalization'], device=device ) def extract_data_from_database( db_path: str, study_name: str ) -> Tuple[np.ndarray, np.ndarray, List[str], List[str]]: """ Extract training data from Optuna database. Args: db_path: Path to SQLite database study_name: Name of Optuna study Returns: Tuple of (design_params, objectives, design_var_names, objective_names) """ if not OPTUNA_AVAILABLE: raise ImportError("Optuna required. Install: pip install optuna") storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}") study = optuna.load_study(study_name=study_name, storage=storage) # Get completed trials completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE] if not completed_trials: raise ValueError(f"No completed trials in study {study_name}") logger.info(f"Found {len(completed_trials)} completed trials") # Get design variable names from first trial design_var_names = list(completed_trials[0].params.keys()) # Determine objective structure first_values = completed_trials[0].values if first_values is None: raise ValueError("Trials have no objective values") # For multi-objective, values are [mass, frequency, ...] # We also need user_attrs for constraints # Collect data - filter out invalid samples design_params_list = [] objectives_list = [] skipped = 0 for trial in completed_trials: # Objectives - need mass, frequency, max_disp, max_stress mass = trial.values[0] if len(trial.values) > 0 else 0.0 frequency = trial.values[1] if len(trial.values) > 1 else 0.0 # Get constraints from user_attrs max_disp = trial.user_attrs.get('max_displacement', 0.0) max_stress = trial.user_attrs.get('max_stress', 0.0) # Note: frequency is stored as -freq for minimization, so convert back # Also filter out inf values objectives = [mass, -frequency, max_disp, max_stress] # Skip invalid samples (inf, nan, or extreme values) if any(np.isinf(v) or np.isnan(v) or v > 1e10 for v in objectives): skipped += 1 continue # Skip if frequency is negative (indicates error) if -frequency <= 0: skipped += 1 continue # Design parameters params = [trial.params.get(name, 0.0) for name in design_var_names] design_params_list.append(params) objectives_list.append(objectives) if skipped > 0: logger.info(f"Skipped {skipped} invalid samples") if not design_params_list: raise ValueError("No valid samples found after filtering") design_params = np.array(design_params_list, dtype=np.float32) objectives = np.array(objectives_list, dtype=np.float32) objective_names = ['mass', 'frequency', 'max_displacement', 'max_stress'] logger.info(f"Extracted {len(design_params)} valid samples") logger.info(f"Design vars: {design_var_names}") logger.info(f"Objectives: {objective_names}") return design_params, objectives, design_var_names, objective_names def train_from_database( db_path: str, study_name: str, epochs: int = 200, batch_size: int = 32, learning_rate: float = 0.001, val_split: float = 0.2, save_path: Optional[str] = None, device: str = 'auto' ) -> SimpleSurrogate: """ Train SimpleSurrogate from Optuna database. Args: db_path: Path to SQLite database study_name: Name of Optuna study epochs: Training epochs batch_size: Batch size learning_rate: Learning rate val_split: Validation split ratio save_path: Optional path to save trained model device: Computing device Returns: Trained SimpleSurrogate """ if not TORCH_AVAILABLE: raise ImportError("PyTorch required") # Set device if device == 'auto': device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') else: device = torch.device(device) print(f"\n{'='*60}") print("Training Simple MLP Surrogate from Database") print(f"{'='*60}") print(f"Device: {device}") # Extract data print(f"\n[1] Loading data from {db_path}") design_params, objectives, design_var_names, objective_names = extract_data_from_database( db_path, study_name ) print(f" Samples: {len(design_params)}") print(f" Design vars: {design_var_names}") print(f" Objectives: {objective_names}") # Compute normalization stats design_mean = design_params.mean(axis=0) design_std = design_params.std(axis=0) objective_mean = objectives.mean(axis=0) objective_std = objectives.std(axis=0) print(f"\n Objective ranges:") for i, name in enumerate(objective_names): print(f" {name}: {objectives[:, i].min():.2f} - {objectives[:, i].max():.2f}") # Normalize data design_params_norm = (design_params - design_mean) / (design_std + 1e-8) objectives_norm = (objectives - objective_mean) / (objective_std + 1e-8) # Create dataset dataset = FEADataset(design_params_norm, objectives_norm) # Split into train/val n_val = int(len(dataset) * val_split) n_train = len(dataset) - n_val train_dataset, val_dataset = random_split(dataset, [n_train, n_val]) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=batch_size) print(f"\n[2] Creating model") print(f" Train samples: {n_train}") print(f" Val samples: {n_val}") # Create model model = MLPModel( n_inputs=len(design_var_names), n_outputs=len(objective_names), hidden_dims=[128, 256, 128, 64] ).to(device) n_params = sum(p.numel() for p in model.parameters()) print(f" Model params: {n_params:,}") # Training print(f"\n[3] Training for {epochs} epochs") optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-5) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs) best_val_loss = float('inf') best_state = None for epoch in range(epochs): # Train model.train() train_loss = 0.0 for x, y in train_loader: x, y = x.to(device), y.to(device) optimizer.zero_grad() pred = model(x) loss = F.mse_loss(pred, y) loss.backward() optimizer.step() train_loss += loss.item() train_loss /= len(train_loader) # Validate model.eval() val_loss = 0.0 with torch.no_grad(): for x, y in val_loader: x, y = x.to(device), y.to(device) pred = model(x) val_loss += F.mse_loss(pred, y).item() val_loss /= len(val_loader) scheduler.step() # Track best if val_loss < best_val_loss: best_val_loss = val_loss best_state = model.state_dict().copy() # Log progress if (epoch + 1) % 20 == 0 or epoch == 0: print(f" Epoch {epoch+1:3d}: train_loss={train_loss:.6f}, val_loss={val_loss:.6f}") # Load best model model.load_state_dict(best_state) print(f"\n Best val_loss: {best_val_loss:.6f}") # Create surrogate normalization = { 'design_mean': design_mean, 'design_std': design_std, 'objective_mean': objective_mean, 'objective_std': objective_std } surrogate = SimpleSurrogate( model=model, design_var_names=design_var_names, objective_names=objective_names, normalization=normalization, device=str(device) ) # Evaluate accuracy print(f"\n[4] Evaluating accuracy on validation set") model.eval() all_preds = [] all_targets = [] with torch.no_grad(): for x, y in val_loader: x = x.to(device) pred = model(x).cpu().numpy() all_preds.append(pred) all_targets.append(y.numpy()) all_preds = np.concatenate(all_preds) all_targets = np.concatenate(all_targets) # Denormalize for error calculation preds_denorm = all_preds * objective_std + objective_mean targets_denorm = all_targets * objective_std + objective_mean for i, name in enumerate(objective_names): mae = np.abs(preds_denorm[:, i] - targets_denorm[:, i]).mean() mape = (np.abs(preds_denorm[:, i] - targets_denorm[:, i]) / (np.abs(targets_denorm[:, i]) + 1e-8)).mean() * 100 print(f" {name}: MAE={mae:.4f}, MAPE={mape:.1f}%") # Save if requested if save_path: surrogate.save(save_path) print(f"\n{'='*60}") print("Training complete!") print(f"{'='*60}") return surrogate def create_simple_surrogate_for_study( db_path: str = None, study_name: str = None, model_path: str = None, project_root: Path = None ) -> Optional[SimpleSurrogate]: """ Factory function to create or load SimpleSurrogate for UAV arm study. If model_path exists, loads the model. Otherwise trains from database. Args: db_path: Path to Optuna database study_name: Name of study model_path: Path to saved model (auto-detect if None) project_root: Project root for auto-detection Returns: SimpleSurrogate instance or None """ if not TORCH_AVAILABLE: logger.warning("PyTorch not available") return None # Auto-detect paths if project_root is None: project_root = Path(__file__).parent.parent if model_path is None: model_path = project_root / "simple_mlp_surrogate.pt" else: model_path = Path(model_path) # Try to load existing model if model_path.exists(): logger.info(f"Loading existing surrogate from {model_path}") return SimpleSurrogate.load(model_path) # Otherwise train from database if db_path is None: db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db" else: db_path = Path(db_path) if study_name is None: study_name = "uav_arm_atomizerfield_test" if not db_path.exists(): logger.warning(f"Database not found: {db_path}") return None logger.info(f"Training surrogate from {db_path}") return train_from_database( db_path=str(db_path), study_name=study_name, save_path=str(model_path) ) if __name__ == "__main__": import sys # Default paths project_root = Path(__file__).parent.parent db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db" model_path = project_root / "simple_mlp_surrogate.pt" print("Simple MLP Surrogate Training") print("="*60) if not db_path.exists(): print(f"ERROR: Database not found: {db_path}") sys.exit(1) # Train surrogate = train_from_database( db_path=str(db_path), study_name="uav_arm_atomizerfield_test", epochs=300, save_path=str(model_path) ) # Test predictions print("\n[5] Testing predictions") test_params = { 'beam_half_core_thickness': 3.0, 'beam_face_thickness': 1.5, 'holes_diameter': 8.0, 'hole_count': 4 } print(f" Input: {test_params}") results = surrogate.predict(test_params) print(f" Mass: {results['mass']:.2f} g") print(f" Frequency: {results['frequency']:.2f} Hz") print(f" Max Displacement: {results['max_displacement']:.6f} mm") print(f" Max Stress: {results['max_stress']:.2f} MPa") print(f" Inference time: {results['inference_time_ms']:.2f} ms") # Test variation print("\n[6] Testing variation with parameters") for thickness in [1.0, 3.0, 5.0]: params = {**test_params, 'beam_half_core_thickness': thickness} r = surrogate.predict(params) print(f" thickness={thickness}: mass={r['mass']:.0f}g, freq={r['frequency']:.2f}Hz")