Files
Atomizer/archive/scripts/validate_surrogate_real_data.py

433 lines
14 KiB
Python
Raw Normal View History

"""
Real Data Cross-Validation for Surrogate Model
This script performs proper k-fold cross-validation using ONLY real FEA data
to assess the true prediction accuracy of the neural network surrogate.
The key insight: We don't need simulated FEA - we already have real FEA data!
We can use cross-validation to estimate out-of-sample performance.
"""
import sys
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
# Add project paths
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))
from optimization_engine.active_learning_surrogate import (
EnsembleMLP,
extract_training_data_from_study
)
def k_fold_cross_validation(
design_params: np.ndarray,
objectives: np.ndarray,
design_var_names: list,
n_folds: int = 5,
hidden_dims: list = [128, 64, 32], # Deeper network
epochs: int = 300,
lr: float = 0.001
):
"""
Perform k-fold cross-validation to assess real prediction performance.
Returns detailed metrics for each fold.
"""
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)
fold_results = []
all_predictions = []
all_actuals = []
all_indices = []
for fold, (train_idx, test_idx) in enumerate(kf.split(design_params)):
print(f"\n--- Fold {fold+1}/{n_folds} ---")
X_train, X_test = design_params[train_idx], design_params[test_idx]
y_train, y_test = objectives[train_idx], objectives[test_idx]
# Normalization on training data
X_mean = X_train.mean(axis=0)
X_std = X_train.std(axis=0) + 1e-8
y_mean = y_train.mean(axis=0)
y_std = y_train.std(axis=0) + 1e-8
X_train_norm = (X_train - X_mean) / X_std
X_test_norm = (X_test - X_mean) / X_std
y_train_norm = (y_train - y_mean) / y_std
# Convert to tensors
X_train_t = torch.FloatTensor(X_train_norm)
y_train_t = torch.FloatTensor(y_train_norm)
X_test_t = torch.FloatTensor(X_test_norm)
# Train model
input_dim = X_train.shape[1]
output_dim = y_train.shape[1]
model = EnsembleMLP(input_dim, output_dim, hidden_dims)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss()
# Training with early stopping
best_loss = float('inf')
patience = 30
patience_counter = 0
best_state = None
for epoch in range(epochs):
model.train()
# Mini-batch training
batch_size = 32
perm = torch.randperm(len(X_train_t))
epoch_loss = 0
n_batches = 0
for j in range(0, len(X_train_t), batch_size):
batch_idx = perm[j:j+batch_size]
X_batch = X_train_t[batch_idx]
y_batch = y_train_t[batch_idx]
optimizer.zero_grad()
pred = model(X_batch)
loss = criterion(pred, y_batch)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
n_batches += 1
avg_loss = epoch_loss / n_batches
if avg_loss < best_loss:
best_loss = avg_loss
best_state = model.state_dict().copy()
patience_counter = 0
else:
patience_counter += 1
if patience_counter >= patience:
break
# Restore best model
if best_state:
model.load_state_dict(best_state)
# Evaluate on test set
model.eval()
with torch.no_grad():
pred_norm = model(X_test_t).numpy()
pred = pred_norm * y_std + y_mean
# Calculate errors for each objective
mass_pred = pred[:, 0]
mass_actual = y_test[:, 0]
freq_pred = pred[:, 1]
freq_actual = y_test[:, 1]
mass_errors = np.abs(mass_pred - mass_actual) / (np.abs(mass_actual) + 1e-8)
freq_errors = np.abs(freq_pred - freq_actual) / (np.abs(freq_actual) + 1e-8)
mass_mape = np.mean(mass_errors) * 100
freq_mape = np.mean(freq_errors) * 100
mass_rmse = np.sqrt(np.mean((mass_pred - mass_actual)**2))
freq_rmse = np.sqrt(np.mean((freq_pred - freq_actual)**2))
fold_results.append({
'fold': fold + 1,
'n_train': len(train_idx),
'n_test': len(test_idx),
'mass_mape': mass_mape,
'freq_mape': freq_mape,
'mass_rmse': mass_rmse,
'freq_rmse': freq_rmse,
'epochs_trained': epoch + 1
})
# Store for plotting
all_predictions.extend(pred.tolist())
all_actuals.extend(y_test.tolist())
all_indices.extend(test_idx.tolist())
print(f" Mass MAPE: {mass_mape:.1f}%, RMSE: {mass_rmse:.1f}g")
print(f" Freq MAPE: {freq_mape:.1f}%, RMSE: {freq_rmse:.1f}Hz")
return fold_results, np.array(all_predictions), np.array(all_actuals), all_indices
def train_final_model_with_cv_uncertainty(
design_params: np.ndarray,
objectives: np.ndarray,
design_var_names: list,
cv_results: dict
):
"""
Train final model on all data and attach CV-based uncertainty estimates.
"""
print("\n" + "="*60)
print("Training Final Model on All Data")
print("="*60)
# Normalization
X_mean = design_params.mean(axis=0)
X_std = design_params.std(axis=0) + 1e-8
y_mean = objectives.mean(axis=0)
y_std = objectives.std(axis=0) + 1e-8
X_norm = (design_params - X_mean) / X_std
y_norm = (objectives - y_mean) / y_std
X_t = torch.FloatTensor(X_norm)
y_t = torch.FloatTensor(y_norm)
# Train on all data
input_dim = design_params.shape[1]
output_dim = objectives.shape[1]
model = EnsembleMLP(input_dim, output_dim, [128, 64, 32])
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()
for epoch in range(500):
model.train()
optimizer.zero_grad()
pred = model(X_t)
loss = criterion(pred, y_t)
loss.backward()
optimizer.step()
# Save model with metadata
model_state = {
'model': model.state_dict(),
'input_mean': X_mean.tolist(),
'input_std': X_std.tolist(),
'output_mean': y_mean.tolist(),
'output_std': y_std.tolist(),
'design_var_names': design_var_names,
'cv_mass_mape': cv_results['mass_mape'],
'cv_freq_mape': cv_results['freq_mape'],
'cv_mass_std': cv_results['mass_std'],
'cv_freq_std': cv_results['freq_std'],
'n_samples': len(design_params),
'hidden_dims': [128, 64, 32]
}
save_path = project_root / "cv_validated_surrogate.pt"
torch.save(model_state, save_path)
print(f"Saved CV-validated model to: {save_path}")
return model, model_state
def plot_cv_results(
all_predictions: np.ndarray,
all_actuals: np.ndarray,
fold_results: list,
save_path: str
):
"""Generate comprehensive validation plots."""
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
# 1. Mass: Predicted vs Actual
ax = axes[0, 0]
ax.scatter(all_actuals[:, 0], all_predictions[:, 0], alpha=0.5, s=20)
min_val = min(all_actuals[:, 0].min(), all_predictions[:, 0].min())
max_val = max(all_actuals[:, 0].max(), all_predictions[:, 0].max())
ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect')
ax.set_xlabel('FEA Mass (g)')
ax.set_ylabel('NN Predicted Mass (g)')
ax.set_title('Mass: Predicted vs Actual')
ax.legend()
ax.grid(True, alpha=0.3)
# 2. Frequency: Predicted vs Actual
ax = axes[0, 1]
ax.scatter(all_actuals[:, 1], all_predictions[:, 1], alpha=0.5, s=20)
min_val = min(all_actuals[:, 1].min(), all_predictions[:, 1].min())
max_val = max(all_actuals[:, 1].max(), all_predictions[:, 1].max())
ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect')
ax.set_xlabel('FEA Frequency (Hz)')
ax.set_ylabel('NN Predicted Frequency (Hz)')
ax.set_title('Frequency: Predicted vs Actual')
ax.legend()
ax.grid(True, alpha=0.3)
# 3. Mass Errors by Fold
ax = axes[0, 2]
folds = [r['fold'] for r in fold_results]
mass_mapes = [r['mass_mape'] for r in fold_results]
bars = ax.bar(folds, mass_mapes, color='steelblue', alpha=0.7)
ax.axhline(y=np.mean(mass_mapes), color='red', linestyle='--', label=f'Mean: {np.mean(mass_mapes):.1f}%')
ax.axhline(y=10, color='green', linestyle='--', label='Target: 10%')
ax.set_xlabel('Fold')
ax.set_ylabel('Mass MAPE (%)')
ax.set_title('Mass MAPE by Fold')
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
# 4. Frequency Errors by Fold
ax = axes[1, 0]
freq_mapes = [r['freq_mape'] for r in fold_results]
bars = ax.bar(folds, freq_mapes, color='coral', alpha=0.7)
ax.axhline(y=np.mean(freq_mapes), color='red', linestyle='--', label=f'Mean: {np.mean(freq_mapes):.1f}%')
ax.axhline(y=10, color='green', linestyle='--', label='Target: 10%')
ax.set_xlabel('Fold')
ax.set_ylabel('Frequency MAPE (%)')
ax.set_title('Frequency MAPE by Fold')
ax.legend()
ax.grid(True, alpha=0.3, axis='y')
# 5. Error Distribution (Mass)
ax = axes[1, 1]
mass_errors = (all_predictions[:, 0] - all_actuals[:, 0]) / all_actuals[:, 0] * 100
ax.hist(mass_errors, bins=30, color='steelblue', alpha=0.7, edgecolor='black')
ax.axvline(x=0, color='red', linestyle='--', linewidth=2)
ax.set_xlabel('Mass Error (%)')
ax.set_ylabel('Count')
ax.set_title(f'Mass Error Distribution (Mean={np.mean(mass_errors):.1f}%, Std={np.std(mass_errors):.1f}%)')
ax.grid(True, alpha=0.3)
# 6. Error Distribution (Frequency)
ax = axes[1, 2]
freq_errors = (all_predictions[:, 1] - all_actuals[:, 1]) / all_actuals[:, 1] * 100
ax.hist(freq_errors, bins=30, color='coral', alpha=0.7, edgecolor='black')
ax.axvline(x=0, color='red', linestyle='--', linewidth=2)
ax.set_xlabel('Frequency Error (%)')
ax.set_ylabel('Count')
ax.set_title(f'Freq Error Distribution (Mean={np.mean(freq_errors):.1f}%, Std={np.std(freq_errors):.1f}%)')
ax.grid(True, alpha=0.3)
plt.suptitle('Cross-Validation Results on Real FEA Data', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(save_path, dpi=150)
plt.close()
print(f"Saved validation plot: {save_path}")
def main():
print("="*70)
print("Cross-Validation Assessment on Real FEA Data")
print("="*70)
# Find database
db_path = project_root / "studies/uav_arm_optimization/2_results/study.db"
study_name = "uav_arm_optimization"
if not db_path.exists():
db_path = project_root / "studies/uav_arm_atomizerfield_test/2_results/study.db"
study_name = "uav_arm_atomizerfield_test"
if not db_path.exists():
print(f"ERROR: No database found")
return
print(f"\nLoading data from: {db_path}")
design_params, objectives, design_var_names = extract_training_data_from_study(
str(db_path), study_name
)
print(f"Total FEA samples: {len(design_params)}")
print(f"Design variables: {design_var_names}")
print(f"Objective ranges:")
print(f" Mass: {objectives[:, 0].min():.1f} - {objectives[:, 0].max():.1f} g")
print(f" Frequency: {objectives[:, 1].min():.1f} - {objectives[:, 1].max():.1f} Hz")
# Run k-fold cross-validation
print("\n" + "="*70)
print("Running 5-Fold Cross-Validation")
print("="*70)
fold_results, all_predictions, all_actuals, all_indices = k_fold_cross_validation(
design_params, objectives, design_var_names,
n_folds=5,
hidden_dims=[128, 64, 32], # Deeper network
epochs=300
)
# Summary statistics
print("\n" + "="*70)
print("CROSS-VALIDATION SUMMARY")
print("="*70)
mass_mapes = [r['mass_mape'] for r in fold_results]
freq_mapes = [r['freq_mape'] for r in fold_results]
cv_summary = {
'mass_mape': np.mean(mass_mapes),
'mass_std': np.std(mass_mapes),
'freq_mape': np.mean(freq_mapes),
'freq_std': np.std(freq_mapes)
}
print(f"\nMass Prediction:")
print(f" MAPE: {cv_summary['mass_mape']:.1f}% +/- {cv_summary['mass_std']:.1f}%")
print(f" Status: {'[OK]' if cv_summary['mass_mape'] < 10 else '[NEEDS IMPROVEMENT]'}")
print(f"\nFrequency Prediction:")
print(f" MAPE: {cv_summary['freq_mape']:.1f}% +/- {cv_summary['freq_std']:.1f}%")
print(f" Status: {'[OK]' if cv_summary['freq_mape'] < 10 else '[NEEDS IMPROVEMENT]'}")
# Overall confidence
mass_ok = cv_summary['mass_mape'] < 10
freq_ok = cv_summary['freq_mape'] < 10
if mass_ok and freq_ok:
confidence = "HIGH"
recommendation = "NN surrogate is ready for optimization"
elif mass_ok:
confidence = "MEDIUM"
recommendation = "Mass prediction is good, but frequency needs more data or better architecture"
else:
confidence = "LOW"
recommendation = "Need more FEA data or improved NN architecture"
print(f"\nOverall Confidence: {confidence}")
print(f"Recommendation: {recommendation}")
# Generate plots
plot_path = project_root / "cv_validation_results.png"
plot_cv_results(all_predictions, all_actuals, fold_results, str(plot_path))
# Train and save final model
model, model_state = train_final_model_with_cv_uncertainty(
design_params, objectives, design_var_names, cv_summary
)
print("\n" + "="*70)
print("FILES GENERATED")
print("="*70)
print(f" Validation plot: {plot_path}")
print(f" CV-validated model: {project_root / 'cv_validated_surrogate.pt'}")
# Final assessment
print("\n" + "="*70)
print("NEXT STEPS")
print("="*70)
if confidence == "HIGH":
print("1. Use the NN surrogate for fast optimization")
print("2. Periodically validate Pareto-optimal designs with FEA")
elif confidence == "MEDIUM":
print("1. Frequency prediction needs improvement")
print("2. Options:")
print(" a. Collect more FEA samples in underrepresented regions")
print(" b. Try deeper/wider network architecture")
print(" c. Add physics-informed features (e.g., I-beam moment of inertia)")
print(" d. Use ensemble with uncertainty-weighted Pareto front")
else:
print("1. Current model not ready for optimization")
print("2. Run more FEA trials to expand training data")
print("3. Consider data augmentation or transfer learning")
if __name__ == '__main__':
main()