""" Real Data Cross-Validation for Surrogate Model This script performs proper k-fold cross-validation using ONLY real FEA data to assess the true prediction accuracy of the neural network surrogate. The key insight: We don't need simulated FEA - we already have real FEA data! We can use cross-validation to estimate out-of-sample performance. """ import sys from pathlib import Path import numpy as np import torch import torch.nn as nn import torch.optim as optim from sklearn.model_selection import KFold import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt # Add project paths project_root = Path(__file__).parent sys.path.insert(0, str(project_root)) from optimization_engine.active_learning_surrogate import ( EnsembleMLP, extract_training_data_from_study ) def k_fold_cross_validation( design_params: np.ndarray, objectives: np.ndarray, design_var_names: list, n_folds: int = 5, hidden_dims: list = [128, 64, 32], # Deeper network epochs: int = 300, lr: float = 0.001 ): """ Perform k-fold cross-validation to assess real prediction performance. Returns detailed metrics for each fold. """ kf = KFold(n_splits=n_folds, shuffle=True, random_state=42) fold_results = [] all_predictions = [] all_actuals = [] all_indices = [] for fold, (train_idx, test_idx) in enumerate(kf.split(design_params)): print(f"\n--- Fold {fold+1}/{n_folds} ---") X_train, X_test = design_params[train_idx], design_params[test_idx] y_train, y_test = objectives[train_idx], objectives[test_idx] # Normalization on training data X_mean = X_train.mean(axis=0) X_std = X_train.std(axis=0) + 1e-8 y_mean = y_train.mean(axis=0) y_std = y_train.std(axis=0) + 1e-8 X_train_norm = (X_train - X_mean) / X_std X_test_norm = (X_test - X_mean) / X_std y_train_norm = (y_train - y_mean) / y_std # Convert to tensors X_train_t = torch.FloatTensor(X_train_norm) y_train_t = torch.FloatTensor(y_train_norm) X_test_t = torch.FloatTensor(X_test_norm) # Train model input_dim = X_train.shape[1] output_dim = y_train.shape[1] model = EnsembleMLP(input_dim, output_dim, hidden_dims) optimizer = optim.Adam(model.parameters(), lr=lr) criterion = nn.MSELoss() # Training with early stopping best_loss = float('inf') patience = 30 patience_counter = 0 best_state = None for epoch in range(epochs): model.train() # Mini-batch training batch_size = 32 perm = torch.randperm(len(X_train_t)) epoch_loss = 0 n_batches = 0 for j in range(0, len(X_train_t), batch_size): batch_idx = perm[j:j+batch_size] X_batch = X_train_t[batch_idx] y_batch = y_train_t[batch_idx] optimizer.zero_grad() pred = model(X_batch) loss = criterion(pred, y_batch) loss.backward() optimizer.step() epoch_loss += loss.item() n_batches += 1 avg_loss = epoch_loss / n_batches if avg_loss < best_loss: best_loss = avg_loss best_state = model.state_dict().copy() patience_counter = 0 else: patience_counter += 1 if patience_counter >= patience: break # Restore best model if best_state: model.load_state_dict(best_state) # Evaluate on test set model.eval() with torch.no_grad(): pred_norm = model(X_test_t).numpy() pred = pred_norm * y_std + y_mean # Calculate errors for each objective mass_pred = pred[:, 0] mass_actual = y_test[:, 0] freq_pred = pred[:, 1] freq_actual = y_test[:, 1] mass_errors = np.abs(mass_pred - mass_actual) / (np.abs(mass_actual) + 1e-8) freq_errors = np.abs(freq_pred - freq_actual) / (np.abs(freq_actual) + 1e-8) mass_mape = np.mean(mass_errors) * 100 freq_mape = np.mean(freq_errors) * 100 mass_rmse = np.sqrt(np.mean((mass_pred - mass_actual)**2)) freq_rmse = np.sqrt(np.mean((freq_pred - freq_actual)**2)) fold_results.append({ 'fold': fold + 1, 'n_train': len(train_idx), 'n_test': len(test_idx), 'mass_mape': mass_mape, 'freq_mape': freq_mape, 'mass_rmse': mass_rmse, 'freq_rmse': freq_rmse, 'epochs_trained': epoch + 1 }) # Store for plotting all_predictions.extend(pred.tolist()) all_actuals.extend(y_test.tolist()) all_indices.extend(test_idx.tolist()) print(f" Mass MAPE: {mass_mape:.1f}%, RMSE: {mass_rmse:.1f}g") print(f" Freq MAPE: {freq_mape:.1f}%, RMSE: {freq_rmse:.1f}Hz") return fold_results, np.array(all_predictions), np.array(all_actuals), all_indices def train_final_model_with_cv_uncertainty( design_params: np.ndarray, objectives: np.ndarray, design_var_names: list, cv_results: dict ): """ Train final model on all data and attach CV-based uncertainty estimates. """ print("\n" + "="*60) print("Training Final Model on All Data") print("="*60) # Normalization X_mean = design_params.mean(axis=0) X_std = design_params.std(axis=0) + 1e-8 y_mean = objectives.mean(axis=0) y_std = objectives.std(axis=0) + 1e-8 X_norm = (design_params - X_mean) / X_std y_norm = (objectives - y_mean) / y_std X_t = torch.FloatTensor(X_norm) y_t = torch.FloatTensor(y_norm) # Train on all data input_dim = design_params.shape[1] output_dim = objectives.shape[1] model = EnsembleMLP(input_dim, output_dim, [128, 64, 32]) optimizer = optim.Adam(model.parameters(), lr=0.001) criterion = nn.MSELoss() for epoch in range(500): model.train() optimizer.zero_grad() pred = model(X_t) loss = criterion(pred, y_t) loss.backward() optimizer.step() # Save model with metadata model_state = { 'model': model.state_dict(), 'input_mean': X_mean.tolist(), 'input_std': X_std.tolist(), 'output_mean': y_mean.tolist(), 'output_std': y_std.tolist(), 'design_var_names': design_var_names, 'cv_mass_mape': cv_results['mass_mape'], 'cv_freq_mape': cv_results['freq_mape'], 'cv_mass_std': cv_results['mass_std'], 'cv_freq_std': cv_results['freq_std'], 'n_samples': len(design_params), 'hidden_dims': [128, 64, 32] } save_path = project_root / "cv_validated_surrogate.pt" torch.save(model_state, save_path) print(f"Saved CV-validated model to: {save_path}") return model, model_state def plot_cv_results( all_predictions: np.ndarray, all_actuals: np.ndarray, fold_results: list, save_path: str ): """Generate comprehensive validation plots.""" fig, axes = plt.subplots(2, 3, figsize=(15, 10)) # 1. Mass: Predicted vs Actual ax = axes[0, 0] ax.scatter(all_actuals[:, 0], all_predictions[:, 0], alpha=0.5, s=20) min_val = min(all_actuals[:, 0].min(), all_predictions[:, 0].min()) max_val = max(all_actuals[:, 0].max(), all_predictions[:, 0].max()) ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect') ax.set_xlabel('FEA Mass (g)') ax.set_ylabel('NN Predicted Mass (g)') ax.set_title('Mass: Predicted vs Actual') ax.legend() ax.grid(True, alpha=0.3) # 2. Frequency: Predicted vs Actual ax = axes[0, 1] ax.scatter(all_actuals[:, 1], all_predictions[:, 1], alpha=0.5, s=20) min_val = min(all_actuals[:, 1].min(), all_predictions[:, 1].min()) max_val = max(all_actuals[:, 1].max(), all_predictions[:, 1].max()) ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect') ax.set_xlabel('FEA Frequency (Hz)') ax.set_ylabel('NN Predicted Frequency (Hz)') ax.set_title('Frequency: Predicted vs Actual') ax.legend() ax.grid(True, alpha=0.3) # 3. Mass Errors by Fold ax = axes[0, 2] folds = [r['fold'] for r in fold_results] mass_mapes = [r['mass_mape'] for r in fold_results] bars = ax.bar(folds, mass_mapes, color='steelblue', alpha=0.7) ax.axhline(y=np.mean(mass_mapes), color='red', linestyle='--', label=f'Mean: {np.mean(mass_mapes):.1f}%') ax.axhline(y=10, color='green', linestyle='--', label='Target: 10%') ax.set_xlabel('Fold') ax.set_ylabel('Mass MAPE (%)') ax.set_title('Mass MAPE by Fold') ax.legend() ax.grid(True, alpha=0.3, axis='y') # 4. Frequency Errors by Fold ax = axes[1, 0] freq_mapes = [r['freq_mape'] for r in fold_results] bars = ax.bar(folds, freq_mapes, color='coral', alpha=0.7) ax.axhline(y=np.mean(freq_mapes), color='red', linestyle='--', label=f'Mean: {np.mean(freq_mapes):.1f}%') ax.axhline(y=10, color='green', linestyle='--', label='Target: 10%') ax.set_xlabel('Fold') ax.set_ylabel('Frequency MAPE (%)') ax.set_title('Frequency MAPE by Fold') ax.legend() ax.grid(True, alpha=0.3, axis='y') # 5. Error Distribution (Mass) ax = axes[1, 1] mass_errors = (all_predictions[:, 0] - all_actuals[:, 0]) / all_actuals[:, 0] * 100 ax.hist(mass_errors, bins=30, color='steelblue', alpha=0.7, edgecolor='black') ax.axvline(x=0, color='red', linestyle='--', linewidth=2) ax.set_xlabel('Mass Error (%)') ax.set_ylabel('Count') ax.set_title(f'Mass Error Distribution (Mean={np.mean(mass_errors):.1f}%, Std={np.std(mass_errors):.1f}%)') ax.grid(True, alpha=0.3) # 6. Error Distribution (Frequency) ax = axes[1, 2] freq_errors = (all_predictions[:, 1] - all_actuals[:, 1]) / all_actuals[:, 1] * 100 ax.hist(freq_errors, bins=30, color='coral', alpha=0.7, edgecolor='black') ax.axvline(x=0, color='red', linestyle='--', linewidth=2) ax.set_xlabel('Frequency Error (%)') ax.set_ylabel('Count') ax.set_title(f'Freq Error Distribution (Mean={np.mean(freq_errors):.1f}%, Std={np.std(freq_errors):.1f}%)') ax.grid(True, alpha=0.3) plt.suptitle('Cross-Validation Results on Real FEA Data', fontsize=14, fontweight='bold') plt.tight_layout() plt.savefig(save_path, dpi=150) plt.close() print(f"Saved validation plot: {save_path}") def main(): print("="*70) print("Cross-Validation Assessment on Real FEA Data") print("="*70) # Find database db_path = project_root / "studies/uav_arm_optimization/2_results/study.db" study_name = "uav_arm_optimization" if not db_path.exists(): db_path = project_root / "studies/uav_arm_atomizerfield_test/2_results/study.db" study_name = "uav_arm_atomizerfield_test" if not db_path.exists(): print(f"ERROR: No database found") return print(f"\nLoading data from: {db_path}") design_params, objectives, design_var_names = extract_training_data_from_study( str(db_path), study_name ) print(f"Total FEA samples: {len(design_params)}") print(f"Design variables: {design_var_names}") print(f"Objective ranges:") print(f" Mass: {objectives[:, 0].min():.1f} - {objectives[:, 0].max():.1f} g") print(f" Frequency: {objectives[:, 1].min():.1f} - {objectives[:, 1].max():.1f} Hz") # Run k-fold cross-validation print("\n" + "="*70) print("Running 5-Fold Cross-Validation") print("="*70) fold_results, all_predictions, all_actuals, all_indices = k_fold_cross_validation( design_params, objectives, design_var_names, n_folds=5, hidden_dims=[128, 64, 32], # Deeper network epochs=300 ) # Summary statistics print("\n" + "="*70) print("CROSS-VALIDATION SUMMARY") print("="*70) mass_mapes = [r['mass_mape'] for r in fold_results] freq_mapes = [r['freq_mape'] for r in fold_results] cv_summary = { 'mass_mape': np.mean(mass_mapes), 'mass_std': np.std(mass_mapes), 'freq_mape': np.mean(freq_mapes), 'freq_std': np.std(freq_mapes) } print(f"\nMass Prediction:") print(f" MAPE: {cv_summary['mass_mape']:.1f}% +/- {cv_summary['mass_std']:.1f}%") print(f" Status: {'[OK]' if cv_summary['mass_mape'] < 10 else '[NEEDS IMPROVEMENT]'}") print(f"\nFrequency Prediction:") print(f" MAPE: {cv_summary['freq_mape']:.1f}% +/- {cv_summary['freq_std']:.1f}%") print(f" Status: {'[OK]' if cv_summary['freq_mape'] < 10 else '[NEEDS IMPROVEMENT]'}") # Overall confidence mass_ok = cv_summary['mass_mape'] < 10 freq_ok = cv_summary['freq_mape'] < 10 if mass_ok and freq_ok: confidence = "HIGH" recommendation = "NN surrogate is ready for optimization" elif mass_ok: confidence = "MEDIUM" recommendation = "Mass prediction is good, but frequency needs more data or better architecture" else: confidence = "LOW" recommendation = "Need more FEA data or improved NN architecture" print(f"\nOverall Confidence: {confidence}") print(f"Recommendation: {recommendation}") # Generate plots plot_path = project_root / "cv_validation_results.png" plot_cv_results(all_predictions, all_actuals, fold_results, str(plot_path)) # Train and save final model model, model_state = train_final_model_with_cv_uncertainty( design_params, objectives, design_var_names, cv_summary ) print("\n" + "="*70) print("FILES GENERATED") print("="*70) print(f" Validation plot: {plot_path}") print(f" CV-validated model: {project_root / 'cv_validated_surrogate.pt'}") # Final assessment print("\n" + "="*70) print("NEXT STEPS") print("="*70) if confidence == "HIGH": print("1. Use the NN surrogate for fast optimization") print("2. Periodically validate Pareto-optimal designs with FEA") elif confidence == "MEDIUM": print("1. Frequency prediction needs improvement") print("2. Options:") print(" a. Collect more FEA samples in underrepresented regions") print(" b. Try deeper/wider network architecture") print(" c. Add physics-informed features (e.g., I-beam moment of inertia)") print(" d. Use ensemble with uncertainty-weighted Pareto front") else: print("1. Current model not ready for optimization") print("2. Run more FEA trials to expand training data") print("3. Consider data augmentation or transfer learning") if __name__ == '__main__': main()