archive/scripts/validate_surrogate_real_data.py

"""
Real Data Cross-Validation for Surrogate Model

This script performs proper k-fold cross-validation using ONLY real FEA data
to assess the true prediction accuracy of the neural network surrogate.

The key insight: We don't need simulated FEA - we already have real FEA data!
We can use cross-validation to estimate out-of-sample performance.
"""
import sys
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

# Add project paths
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))

from optimization_engine.active_learning_surrogate import (
    EnsembleMLP,
    extract_training_data_from_study
)


def k_fold_cross_validation(
    design_params: np.ndarray,
    objectives: np.ndarray,
    design_var_names: list,
    n_folds: int = 5,
    hidden_dims: list = [128, 64, 32],  # Deeper network
    epochs: int = 300,
    lr: float = 0.001
):
    """
    Perform k-fold cross-validation to assess real prediction performance.

    Returns detailed metrics for each fold.
    """
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)

    fold_results = []
    all_predictions = []
    all_actuals = []
    all_indices = []

    for fold, (train_idx, test_idx) in enumerate(kf.split(design_params)):
        print(f"\n--- Fold {fold+1}/{n_folds} ---")

        X_train, X_test = design_params[train_idx], design_params[test_idx]
        y_train, y_test = objectives[train_idx], objectives[test_idx]

        # Normalization on training data
        X_mean = X_train.mean(axis=0)
        X_std = X_train.std(axis=0) + 1e-8
        y_mean = y_train.mean(axis=0)
        y_std = y_train.std(axis=0) + 1e-8

        X_train_norm = (X_train - X_mean) / X_std
        X_test_norm = (X_test - X_mean) / X_std
        y_train_norm = (y_train - y_mean) / y_std

        # Convert to tensors
        X_train_t = torch.FloatTensor(X_train_norm)
        y_train_t = torch.FloatTensor(y_train_norm)
        X_test_t = torch.FloatTensor(X_test_norm)

        # Train model
        input_dim = X_train.shape[1]
        output_dim = y_train.shape[1]
        model = EnsembleMLP(input_dim, output_dim, hidden_dims)

        optimizer = optim.Adam(model.parameters(), lr=lr)
        criterion = nn.MSELoss()

        # Training with early stopping
        best_loss = float('inf')
        patience = 30
        patience_counter = 0
        best_state = None

        for epoch in range(epochs):
            model.train()

            # Mini-batch training
            batch_size = 32
            perm = torch.randperm(len(X_train_t))
            epoch_loss = 0
            n_batches = 0

            for j in range(0, len(X_train_t), batch_size):
                batch_idx = perm[j:j+batch_size]
                X_batch = X_train_t[batch_idx]
                y_batch = y_train_t[batch_idx]

                optimizer.zero_grad()
                pred = model(X_batch)
                loss = criterion(pred, y_batch)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()
                n_batches += 1

            avg_loss = epoch_loss / n_batches
            if avg_loss < best_loss:
                best_loss = avg_loss
                best_state = model.state_dict().copy()
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    break

        # Restore best model
        if best_state:
            model.load_state_dict(best_state)

        # Evaluate on test set
        model.eval()
        with torch.no_grad():
            pred_norm = model(X_test_t).numpy()
            pred = pred_norm * y_std + y_mean

        # Calculate errors for each objective
        mass_pred = pred[:, 0]
        mass_actual = y_test[:, 0]
        freq_pred = pred[:, 1]
        freq_actual = y_test[:, 1]

        mass_errors = np.abs(mass_pred - mass_actual) / (np.abs(mass_actual) + 1e-8)
        freq_errors = np.abs(freq_pred - freq_actual) / (np.abs(freq_actual) + 1e-8)

        mass_mape = np.mean(mass_errors) * 100
        freq_mape = np.mean(freq_errors) * 100
        mass_rmse = np.sqrt(np.mean((mass_pred - mass_actual)**2))
        freq_rmse = np.sqrt(np.mean((freq_pred - freq_actual)**2))

        fold_results.append({
            'fold': fold + 1,
            'n_train': len(train_idx),
            'n_test': len(test_idx),
            'mass_mape': mass_mape,
            'freq_mape': freq_mape,
            'mass_rmse': mass_rmse,
            'freq_rmse': freq_rmse,
            'epochs_trained': epoch + 1
        })

        # Store for plotting
        all_predictions.extend(pred.tolist())
        all_actuals.extend(y_test.tolist())
        all_indices.extend(test_idx.tolist())

        print(f"  Mass MAPE: {mass_mape:.1f}%, RMSE: {mass_rmse:.1f}g")
        print(f"  Freq MAPE: {freq_mape:.1f}%, RMSE: {freq_rmse:.1f}Hz")

    return fold_results, np.array(all_predictions), np.array(all_actuals), all_indices


def train_final_model_with_cv_uncertainty(
    design_params: np.ndarray,
    objectives: np.ndarray,
    design_var_names: list,
    cv_results: dict
):
    """
    Train final model on all data and attach CV-based uncertainty estimates.
    """
    print("\n" + "="*60)
    print("Training Final Model on All Data")
    print("="*60)

    # Normalization
    X_mean = design_params.mean(axis=0)
    X_std = design_params.std(axis=0) + 1e-8
    y_mean = objectives.mean(axis=0)
    y_std = objectives.std(axis=0) + 1e-8

    X_norm = (design_params - X_mean) / X_std
    y_norm = (objectives - y_mean) / y_std

    X_t = torch.FloatTensor(X_norm)
    y_t = torch.FloatTensor(y_norm)

    # Train on all data
    input_dim = design_params.shape[1]
    output_dim = objectives.shape[1]
    model = EnsembleMLP(input_dim, output_dim, [128, 64, 32])

    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    for epoch in range(500):
        model.train()
        optimizer.zero_grad()
        pred = model(X_t)
        loss = criterion(pred, y_t)
        loss.backward()
        optimizer.step()

    # Save model with metadata
    model_state = {
        'model': model.state_dict(),
        'input_mean': X_mean.tolist(),
        'input_std': X_std.tolist(),
        'output_mean': y_mean.tolist(),
        'output_std': y_std.tolist(),
        'design_var_names': design_var_names,
        'cv_mass_mape': cv_results['mass_mape'],
        'cv_freq_mape': cv_results['freq_mape'],
        'cv_mass_std': cv_results['mass_std'],
        'cv_freq_std': cv_results['freq_std'],
        'n_samples': len(design_params),
        'hidden_dims': [128, 64, 32]
    }

    save_path = project_root / "cv_validated_surrogate.pt"
    torch.save(model_state, save_path)
    print(f"Saved CV-validated model to: {save_path}")

    return model, model_state


def plot_cv_results(
    all_predictions: np.ndarray,
    all_actuals: np.ndarray,
    fold_results: list,
    save_path: str
):
    """Generate comprehensive validation plots."""
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))

    # 1. Mass: Predicted vs Actual
    ax = axes[0, 0]
    ax.scatter(all_actuals[:, 0], all_predictions[:, 0], alpha=0.5, s=20)
    min_val = min(all_actuals[:, 0].min(), all_predictions[:, 0].min())
    max_val = max(all_actuals[:, 0].max(), all_predictions[:, 0].max())
    ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect')
    ax.set_xlabel('FEA Mass (g)')
    ax.set_ylabel('NN Predicted Mass (g)')
    ax.set_title('Mass: Predicted vs Actual')
    ax.legend()
    ax.grid(True, alpha=0.3)

    # 2. Frequency: Predicted vs Actual
    ax = axes[0, 1]
    ax.scatter(all_actuals[:, 1], all_predictions[:, 1], alpha=0.5, s=20)
    min_val = min(all_actuals[:, 1].min(), all_predictions[:, 1].min())
    max_val = max(all_actuals[:, 1].max(), all_predictions[:, 1].max())
    ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect')
    ax.set_xlabel('FEA Frequency (Hz)')
    ax.set_ylabel('NN Predicted Frequency (Hz)')
    ax.set_title('Frequency: Predicted vs Actual')
    ax.legend()
    ax.grid(True, alpha=0.3)

    # 3. Mass Errors by Fold
    ax = axes[0, 2]
    folds = [r['fold'] for r in fold_results]
    mass_mapes = [r['mass_mape'] for r in fold_results]
    bars = ax.bar(folds, mass_mapes, color='steelblue', alpha=0.7)
    ax.axhline(y=np.mean(mass_mapes), color='red', linestyle='--', label=f'Mean: {np.mean(mass_mapes):.1f}%')
    ax.axhline(y=10, color='green', linestyle='--', label='Target: 10%')
    ax.set_xlabel('Fold')
    ax.set_ylabel('Mass MAPE (%)')
    ax.set_title('Mass MAPE by Fold')
    ax.legend()
    ax.grid(True, alpha=0.3, axis='y')

    # 4. Frequency Errors by Fold
    ax = axes[1, 0]
    freq_mapes = [r['freq_mape'] for r in fold_results]
    bars = ax.bar(folds, freq_mapes, color='coral', alpha=0.7)
    ax.axhline(y=np.mean(freq_mapes), color='red', linestyle='--', label=f'Mean: {np.mean(freq_mapes):.1f}%')
    ax.axhline(y=10, color='green', linestyle='--', label='Target: 10%')
    ax.set_xlabel('Fold')
    ax.set_ylabel('Frequency MAPE (%)')
    ax.set_title('Frequency MAPE by Fold')
    ax.legend()
    ax.grid(True, alpha=0.3, axis='y')

    # 5. Error Distribution (Mass)
    ax = axes[1, 1]
    mass_errors = (all_predictions[:, 0] - all_actuals[:, 0]) / all_actuals[:, 0] * 100
    ax.hist(mass_errors, bins=30, color='steelblue', alpha=0.7, edgecolor='black')
    ax.axvline(x=0, color='red', linestyle='--', linewidth=2)
    ax.set_xlabel('Mass Error (%)')
    ax.set_ylabel('Count')
    ax.set_title(f'Mass Error Distribution (Mean={np.mean(mass_errors):.1f}%, Std={np.std(mass_errors):.1f}%)')
    ax.grid(True, alpha=0.3)

    # 6. Error Distribution (Frequency)
    ax = axes[1, 2]
    freq_errors = (all_predictions[:, 1] - all_actuals[:, 1]) / all_actuals[:, 1] * 100
    ax.hist(freq_errors, bins=30, color='coral', alpha=0.7, edgecolor='black')
    ax.axvline(x=0, color='red', linestyle='--', linewidth=2)
    ax.set_xlabel('Frequency Error (%)')
    ax.set_ylabel('Count')
    ax.set_title(f'Freq Error Distribution (Mean={np.mean(freq_errors):.1f}%, Std={np.std(freq_errors):.1f}%)')
    ax.grid(True, alpha=0.3)

    plt.suptitle('Cross-Validation Results on Real FEA Data', fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.savefig(save_path, dpi=150)
    plt.close()
    print(f"Saved validation plot: {save_path}")


def main():
    print("="*70)
    print("Cross-Validation Assessment on Real FEA Data")
    print("="*70)

    # Find database
    db_path = project_root / "studies/uav_arm_optimization/2_results/study.db"
    study_name = "uav_arm_optimization"

    if not db_path.exists():
        db_path = project_root / "studies/uav_arm_atomizerfield_test/2_results/study.db"
        study_name = "uav_arm_atomizerfield_test"

    if not db_path.exists():
        print(f"ERROR: No database found")
        return

    print(f"\nLoading data from: {db_path}")
    design_params, objectives, design_var_names = extract_training_data_from_study(
        str(db_path), study_name
    )

    print(f"Total FEA samples: {len(design_params)}")
    print(f"Design variables: {design_var_names}")
    print(f"Objective ranges:")
    print(f"  Mass: {objectives[:, 0].min():.1f} - {objectives[:, 0].max():.1f} g")
    print(f"  Frequency: {objectives[:, 1].min():.1f} - {objectives[:, 1].max():.1f} Hz")

    # Run k-fold cross-validation
    print("\n" + "="*70)
    print("Running 5-Fold Cross-Validation")
    print("="*70)

    fold_results, all_predictions, all_actuals, all_indices = k_fold_cross_validation(
        design_params, objectives, design_var_names,
        n_folds=5,
        hidden_dims=[128, 64, 32],  # Deeper network
        epochs=300
    )

    # Summary statistics
    print("\n" + "="*70)
    print("CROSS-VALIDATION SUMMARY")
    print("="*70)

    mass_mapes = [r['mass_mape'] for r in fold_results]
    freq_mapes = [r['freq_mape'] for r in fold_results]

    cv_summary = {
        'mass_mape': np.mean(mass_mapes),
        'mass_std': np.std(mass_mapes),
        'freq_mape': np.mean(freq_mapes),
        'freq_std': np.std(freq_mapes)
    }

    print(f"\nMass Prediction:")
    print(f"  MAPE: {cv_summary['mass_mape']:.1f}% +/- {cv_summary['mass_std']:.1f}%")
    print(f"  Status: {'[OK]' if cv_summary['mass_mape'] < 10 else '[NEEDS IMPROVEMENT]'}")

    print(f"\nFrequency Prediction:")
    print(f"  MAPE: {cv_summary['freq_mape']:.1f}% +/- {cv_summary['freq_std']:.1f}%")
    print(f"  Status: {'[OK]' if cv_summary['freq_mape'] < 10 else '[NEEDS IMPROVEMENT]'}")

    # Overall confidence
    mass_ok = cv_summary['mass_mape'] < 10
    freq_ok = cv_summary['freq_mape'] < 10

    if mass_ok and freq_ok:
        confidence = "HIGH"
        recommendation = "NN surrogate is ready for optimization"
    elif mass_ok:
        confidence = "MEDIUM"
        recommendation = "Mass prediction is good, but frequency needs more data or better architecture"
    else:
        confidence = "LOW"
        recommendation = "Need more FEA data or improved NN architecture"

    print(f"\nOverall Confidence: {confidence}")
    print(f"Recommendation: {recommendation}")

    # Generate plots
    plot_path = project_root / "cv_validation_results.png"
    plot_cv_results(all_predictions, all_actuals, fold_results, str(plot_path))

    # Train and save final model
    model, model_state = train_final_model_with_cv_uncertainty(
        design_params, objectives, design_var_names, cv_summary
    )

    print("\n" + "="*70)
    print("FILES GENERATED")
    print("="*70)
    print(f"  Validation plot: {plot_path}")
    print(f"  CV-validated model: {project_root / 'cv_validated_surrogate.pt'}")

    # Final assessment
    print("\n" + "="*70)
    print("NEXT STEPS")
    print("="*70)

    if confidence == "HIGH":
        print("1. Use the NN surrogate for fast optimization")
        print("2. Periodically validate Pareto-optimal designs with FEA")
    elif confidence == "MEDIUM":
        print("1. Frequency prediction needs improvement")
        print("2. Options:")
        print("   a. Collect more FEA samples in underrepresented regions")
        print("   b. Try deeper/wider network architecture")
        print("   c. Add physics-informed features (e.g., I-beam moment of inertia)")
        print("   d. Use ensemble with uncertainty-weighted Pareto front")
    else:
        print("1. Current model not ready for optimization")
        print("2. Run more FEA trials to expand training data")
        print("3. Consider data augmentation or transfer learning")


if __name__ == '__main__':
    main()
feat: Add AtomizerField training data export and intelligent model discovery Major additions: - Training data export system for AtomizerField neural network training - Bracket stiffness optimization study with 50+ training samples - Intelligent NX model discovery (auto-detect solutions, expressions, mesh) - Result extractors module for displacement, stress, frequency, mass - User-generated NX journals for advanced workflows - Archive structure for legacy scripts and test outputs - Protocol documentation and dashboard launcher 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-26 12:01:50 -05:00			`"""`
			`Real Data Cross-Validation for Surrogate Model`

			`This script performs proper k-fold cross-validation using ONLY real FEA data`
			`to assess the true prediction accuracy of the neural network surrogate.`

			`The key insight: We don't need simulated FEA - we already have real FEA data!`
			`We can use cross-validation to estimate out-of-sample performance.`
			`"""`
			`import sys`
			`from pathlib import Path`
			`import numpy as np`
			`import torch`
			`import torch.nn as nn`
			`import torch.optim as optim`
			`from sklearn.model_selection import KFold`
			`import matplotlib`
			`matplotlib.use('Agg')`
			`import matplotlib.pyplot as plt`

			`# Add project paths`
			`project_root = Path(__file__).parent`
			`sys.path.insert(0, str(project_root))`

			`from optimization_engine.active_learning_surrogate import (`
			`EnsembleMLP,`
			`extract_training_data_from_study`
			`)`


			`def k_fold_cross_validation(`
			`design_params: np.ndarray,`
			`objectives: np.ndarray,`
			`design_var_names: list,`
			`n_folds: int = 5,`
			`hidden_dims: list = [128, 64, 32], # Deeper network`
			`epochs: int = 300,`
			`lr: float = 0.001`
			`):`
			`"""`
			`Perform k-fold cross-validation to assess real prediction performance.`

			`Returns detailed metrics for each fold.`
			`"""`
			`kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)`

			`fold_results = []`
			`all_predictions = []`
			`all_actuals = []`
			`all_indices = []`

			`for fold, (train_idx, test_idx) in enumerate(kf.split(design_params)):`
			`print(f"\n--- Fold {fold+1}/{n_folds} ---")`

			`X_train, X_test = design_params[train_idx], design_params[test_idx]`
			`y_train, y_test = objectives[train_idx], objectives[test_idx]`

			`# Normalization on training data`
			`X_mean = X_train.mean(axis=0)`
			`X_std = X_train.std(axis=0) + 1e-8`
			`y_mean = y_train.mean(axis=0)`
			`y_std = y_train.std(axis=0) + 1e-8`

			`X_train_norm = (X_train - X_mean) / X_std`
			`X_test_norm = (X_test - X_mean) / X_std`
			`y_train_norm = (y_train - y_mean) / y_std`

			`# Convert to tensors`
			`X_train_t = torch.FloatTensor(X_train_norm)`
			`y_train_t = torch.FloatTensor(y_train_norm)`
			`X_test_t = torch.FloatTensor(X_test_norm)`

			`# Train model`
			`input_dim = X_train.shape[1]`
			`output_dim = y_train.shape[1]`
			`model = EnsembleMLP(input_dim, output_dim, hidden_dims)`

			`optimizer = optim.Adam(model.parameters(), lr=lr)`
			`criterion = nn.MSELoss()`

			`# Training with early stopping`
			`best_loss = float('inf')`
			`patience = 30`
			`patience_counter = 0`
			`best_state = None`

			`for epoch in range(epochs):`
			`model.train()`

			`# Mini-batch training`
			`batch_size = 32`
			`perm = torch.randperm(len(X_train_t))`
			`epoch_loss = 0`
			`n_batches = 0`

			`for j in range(0, len(X_train_t), batch_size):`
			`batch_idx = perm[j:j+batch_size]`
			`X_batch = X_train_t[batch_idx]`
			`y_batch = y_train_t[batch_idx]`

			`optimizer.zero_grad()`
			`pred = model(X_batch)`
			`loss = criterion(pred, y_batch)`
			`loss.backward()`
			`optimizer.step()`

			`epoch_loss += loss.item()`
			`n_batches += 1`

			`avg_loss = epoch_loss / n_batches`
			`if avg_loss < best_loss:`
			`best_loss = avg_loss`
			`best_state = model.state_dict().copy()`
			`patience_counter = 0`
			`else:`
			`patience_counter += 1`
			`if patience_counter >= patience:`
			`break`

			`# Restore best model`
			`if best_state:`
			`model.load_state_dict(best_state)`

			`# Evaluate on test set`
			`model.eval()`
			`with torch.no_grad():`
			`pred_norm = model(X_test_t).numpy()`
			`pred = pred_norm * y_std + y_mean`

			`# Calculate errors for each objective`
			`mass_pred = pred[:, 0]`
			`mass_actual = y_test[:, 0]`
			`freq_pred = pred[:, 1]`
			`freq_actual = y_test[:, 1]`

			`mass_errors = np.abs(mass_pred - mass_actual) / (np.abs(mass_actual) + 1e-8)`
			`freq_errors = np.abs(freq_pred - freq_actual) / (np.abs(freq_actual) + 1e-8)`

			`mass_mape = np.mean(mass_errors) * 100`
			`freq_mape = np.mean(freq_errors) * 100`
			`mass_rmse = np.sqrt(np.mean((mass_pred - mass_actual)**2))`
			`freq_rmse = np.sqrt(np.mean((freq_pred - freq_actual)**2))`

			`fold_results.append({`
			`'fold': fold + 1,`
			`'n_train': len(train_idx),`
			`'n_test': len(test_idx),`
			`'mass_mape': mass_mape,`
			`'freq_mape': freq_mape,`
			`'mass_rmse': mass_rmse,`
			`'freq_rmse': freq_rmse,`
			`'epochs_trained': epoch + 1`
			`})`

			`# Store for plotting`
			`all_predictions.extend(pred.tolist())`
			`all_actuals.extend(y_test.tolist())`
			`all_indices.extend(test_idx.tolist())`

			`print(f" Mass MAPE: {mass_mape:.1f}%, RMSE: {mass_rmse:.1f}g")`
			`print(f" Freq MAPE: {freq_mape:.1f}%, RMSE: {freq_rmse:.1f}Hz")`

			`return fold_results, np.array(all_predictions), np.array(all_actuals), all_indices`


			`def train_final_model_with_cv_uncertainty(`
			`design_params: np.ndarray,`
			`objectives: np.ndarray,`
			`design_var_names: list,`
			`cv_results: dict`
			`):`
			`"""`
			`Train final model on all data and attach CV-based uncertainty estimates.`
			`"""`
			`print("\n" + "="*60)`
			`print("Training Final Model on All Data")`
			`print("="*60)`

			`# Normalization`
			`X_mean = design_params.mean(axis=0)`
			`X_std = design_params.std(axis=0) + 1e-8`
			`y_mean = objectives.mean(axis=0)`
			`y_std = objectives.std(axis=0) + 1e-8`

			`X_norm = (design_params - X_mean) / X_std`
			`y_norm = (objectives - y_mean) / y_std`

			`X_t = torch.FloatTensor(X_norm)`
			`y_t = torch.FloatTensor(y_norm)`

			`# Train on all data`
			`input_dim = design_params.shape[1]`
			`output_dim = objectives.shape[1]`
			`model = EnsembleMLP(input_dim, output_dim, [128, 64, 32])`

			`optimizer = optim.Adam(model.parameters(), lr=0.001)`
			`criterion = nn.MSELoss()`

			`for epoch in range(500):`
			`model.train()`
			`optimizer.zero_grad()`
			`pred = model(X_t)`
			`loss = criterion(pred, y_t)`
			`loss.backward()`
			`optimizer.step()`

			`# Save model with metadata`
			`model_state = {`
			`'model': model.state_dict(),`
			`'input_mean': X_mean.tolist(),`
			`'input_std': X_std.tolist(),`
			`'output_mean': y_mean.tolist(),`
			`'output_std': y_std.tolist(),`
			`'design_var_names': design_var_names,`
			`'cv_mass_mape': cv_results['mass_mape'],`
			`'cv_freq_mape': cv_results['freq_mape'],`
			`'cv_mass_std': cv_results['mass_std'],`
			`'cv_freq_std': cv_results['freq_std'],`
			`'n_samples': len(design_params),`
			`'hidden_dims': [128, 64, 32]`
			`}`

			`save_path = project_root / "cv_validated_surrogate.pt"`
			`torch.save(model_state, save_path)`
			`print(f"Saved CV-validated model to: {save_path}")`

			`return model, model_state`


			`def plot_cv_results(`
			`all_predictions: np.ndarray,`
			`all_actuals: np.ndarray,`
			`fold_results: list,`
			`save_path: str`
			`):`
			`"""Generate comprehensive validation plots."""`
			`fig, axes = plt.subplots(2, 3, figsize=(15, 10))`

			`# 1. Mass: Predicted vs Actual`
			`ax = axes[0, 0]`
			`ax.scatter(all_actuals[:, 0], all_predictions[:, 0], alpha=0.5, s=20)`
			`min_val = min(all_actuals[:, 0].min(), all_predictions[:, 0].min())`
			`max_val = max(all_actuals[:, 0].max(), all_predictions[:, 0].max())`
			`ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect')`
			`ax.set_xlabel('FEA Mass (g)')`
			`ax.set_ylabel('NN Predicted Mass (g)')`
			`ax.set_title('Mass: Predicted vs Actual')`
			`ax.legend()`
			`ax.grid(True, alpha=0.3)`

			`# 2. Frequency: Predicted vs Actual`
			`ax = axes[0, 1]`
			`ax.scatter(all_actuals[:, 1], all_predictions[:, 1], alpha=0.5, s=20)`
			`min_val = min(all_actuals[:, 1].min(), all_predictions[:, 1].min())`
			`max_val = max(all_actuals[:, 1].max(), all_predictions[:, 1].max())`
			`ax.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='Perfect')`
			`ax.set_xlabel('FEA Frequency (Hz)')`
			`ax.set_ylabel('NN Predicted Frequency (Hz)')`
			`ax.set_title('Frequency: Predicted vs Actual')`
			`ax.legend()`
			`ax.grid(True, alpha=0.3)`

			`# 3. Mass Errors by Fold`
			`ax = axes[0, 2]`
			`folds = [r['fold'] for r in fold_results]`
			`mass_mapes = [r['mass_mape'] for r in fold_results]`
			`bars = ax.bar(folds, mass_mapes, color='steelblue', alpha=0.7)`
			`ax.axhline(y=np.mean(mass_mapes), color='red', linestyle='--', label=f'Mean: {np.mean(mass_mapes):.1f}%')`
			`ax.axhline(y=10, color='green', linestyle='--', label='Target: 10%')`
			`ax.set_xlabel('Fold')`
			`ax.set_ylabel('Mass MAPE (%)')`
			`ax.set_title('Mass MAPE by Fold')`
			`ax.legend()`
			`ax.grid(True, alpha=0.3, axis='y')`

			`# 4. Frequency Errors by Fold`
			`ax = axes[1, 0]`
			`freq_mapes = [r['freq_mape'] for r in fold_results]`
			`bars = ax.bar(folds, freq_mapes, color='coral', alpha=0.7)`
			`ax.axhline(y=np.mean(freq_mapes), color='red', linestyle='--', label=f'Mean: {np.mean(freq_mapes):.1f}%')`
			`ax.axhline(y=10, color='green', linestyle='--', label='Target: 10%')`
			`ax.set_xlabel('Fold')`
			`ax.set_ylabel('Frequency MAPE (%)')`
			`ax.set_title('Frequency MAPE by Fold')`
			`ax.legend()`
			`ax.grid(True, alpha=0.3, axis='y')`

			`# 5. Error Distribution (Mass)`
			`ax = axes[1, 1]`
			`mass_errors = (all_predictions[:, 0] - all_actuals[:, 0]) / all_actuals[:, 0] * 100`
			`ax.hist(mass_errors, bins=30, color='steelblue', alpha=0.7, edgecolor='black')`
			`ax.axvline(x=0, color='red', linestyle='--', linewidth=2)`
			`ax.set_xlabel('Mass Error (%)')`
			`ax.set_ylabel('Count')`
			`ax.set_title(f'Mass Error Distribution (Mean={np.mean(mass_errors):.1f}%, Std={np.std(mass_errors):.1f}%)')`
			`ax.grid(True, alpha=0.3)`

			`# 6. Error Distribution (Frequency)`
			`ax = axes[1, 2]`
			`freq_errors = (all_predictions[:, 1] - all_actuals[:, 1]) / all_actuals[:, 1] * 100`
			`ax.hist(freq_errors, bins=30, color='coral', alpha=0.7, edgecolor='black')`
			`ax.axvline(x=0, color='red', linestyle='--', linewidth=2)`
			`ax.set_xlabel('Frequency Error (%)')`
			`ax.set_ylabel('Count')`
			`ax.set_title(f'Freq Error Distribution (Mean={np.mean(freq_errors):.1f}%, Std={np.std(freq_errors):.1f}%)')`
			`ax.grid(True, alpha=0.3)`

			`plt.suptitle('Cross-Validation Results on Real FEA Data', fontsize=14, fontweight='bold')`
			`plt.tight_layout()`
			`plt.savefig(save_path, dpi=150)`
			`plt.close()`
			`print(f"Saved validation plot: {save_path}")`


			`def main():`
			`print("="*70)`
			`print("Cross-Validation Assessment on Real FEA Data")`
			`print("="*70)`

			`# Find database`
			`db_path = project_root / "studies/uav_arm_optimization/2_results/study.db"`
			`study_name = "uav_arm_optimization"`

			`if not db_path.exists():`
			`db_path = project_root / "studies/uav_arm_atomizerfield_test/2_results/study.db"`
			`study_name = "uav_arm_atomizerfield_test"`

			`if not db_path.exists():`
			`print(f"ERROR: No database found")`
			`return`

			`print(f"\nLoading data from: {db_path}")`
			`design_params, objectives, design_var_names = extract_training_data_from_study(`
			`str(db_path), study_name`
			`)`

			`print(f"Total FEA samples: {len(design_params)}")`
			`print(f"Design variables: {design_var_names}")`
			`print(f"Objective ranges:")`
			`print(f" Mass: {objectives[:, 0].min():.1f} - {objectives[:, 0].max():.1f} g")`
			`print(f" Frequency: {objectives[:, 1].min():.1f} - {objectives[:, 1].max():.1f} Hz")`

			`# Run k-fold cross-validation`
			`print("\n" + "="*70)`
			`print("Running 5-Fold Cross-Validation")`
			`print("="*70)`

			`fold_results, all_predictions, all_actuals, all_indices = k_fold_cross_validation(`
			`design_params, objectives, design_var_names,`
			`n_folds=5,`
			`hidden_dims=[128, 64, 32], # Deeper network`
			`epochs=300`
			`)`

			`# Summary statistics`
			`print("\n" + "="*70)`
			`print("CROSS-VALIDATION SUMMARY")`
			`print("="*70)`

			`mass_mapes = [r['mass_mape'] for r in fold_results]`
			`freq_mapes = [r['freq_mape'] for r in fold_results]`

			`cv_summary = {`
			`'mass_mape': np.mean(mass_mapes),`
			`'mass_std': np.std(mass_mapes),`
			`'freq_mape': np.mean(freq_mapes),`
			`'freq_std': np.std(freq_mapes)`
			`}`

			`print(f"\nMass Prediction:")`
			`print(f" MAPE: {cv_summary['mass_mape']:.1f}% +/- {cv_summary['mass_std']:.1f}%")`
			`print(f" Status: {'[OK]' if cv_summary['mass_mape'] < 10 else '[NEEDS IMPROVEMENT]'}")`

			`print(f"\nFrequency Prediction:")`
			`print(f" MAPE: {cv_summary['freq_mape']:.1f}% +/- {cv_summary['freq_std']:.1f}%")`
			`print(f" Status: {'[OK]' if cv_summary['freq_mape'] < 10 else '[NEEDS IMPROVEMENT]'}")`

			`# Overall confidence`
			`mass_ok = cv_summary['mass_mape'] < 10`
			`freq_ok = cv_summary['freq_mape'] < 10`

			`if mass_ok and freq_ok:`
			`confidence = "HIGH"`
			`recommendation = "NN surrogate is ready for optimization"`
			`elif mass_ok:`
			`confidence = "MEDIUM"`
			`recommendation = "Mass prediction is good, but frequency needs more data or better architecture"`
			`else:`
			`confidence = "LOW"`
			`recommendation = "Need more FEA data or improved NN architecture"`

			`print(f"\nOverall Confidence: {confidence}")`
			`print(f"Recommendation: {recommendation}")`

			`# Generate plots`
			`plot_path = project_root / "cv_validation_results.png"`
			`plot_cv_results(all_predictions, all_actuals, fold_results, str(plot_path))`

			`# Train and save final model`
			`model, model_state = train_final_model_with_cv_uncertainty(`
			`design_params, objectives, design_var_names, cv_summary`
			`)`

			`print("\n" + "="*70)`
			`print("FILES GENERATED")`
			`print("="*70)`
			`print(f" Validation plot: {plot_path}")`
			`print(f" CV-validated model: {project_root / 'cv_validated_surrogate.pt'}")`

			`# Final assessment`
			`print("\n" + "="*70)`
			`print("NEXT STEPS")`
			`print("="*70)`

			`if confidence == "HIGH":`
			`print("1. Use the NN surrogate for fast optimization")`
			`print("2. Periodically validate Pareto-optimal designs with FEA")`
			`elif confidence == "MEDIUM":`
			`print("1. Frequency prediction needs improvement")`
			`print("2. Options:")`
			`print(" a. Collect more FEA samples in underrepresented regions")`
			`print(" b. Try deeper/wider network architecture")`
			`print(" c. Add physics-informed features (e.g., I-beam moment of inertia)")`
			`print(" d. Use ensemble with uncertainty-weighted Pareto front")`
			`else:`
			`print("1. Current model not ready for optimization")`
			`print("2. Run more FEA trials to expand training data")`
			`print("3. Consider data augmentation or transfer learning")`


			`if __name__ == '__main__':`
			`main()`