"""
test_predictions.py
Integration tests for complete pipeline

Tests the full system from parsing to prediction:
- Parser validation with real data
- Training pipeline end-to-end
- Prediction accuracy vs FEA
- Performance benchmarks
"""

import torch
import numpy as np
import sys
from pathlib import Path
import json
import time

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from neural_field_parser import NastranToNeuralFieldParser
from neural_models.data_loader import FEAMeshDataset
from neural_models.field_predictor import create_model
from neural_models.physics_losses import create_loss_function


def test_parser():
    """
    Test 1: Parser validation

    Expected: Successfully parse BDF/OP2 files and create valid output

    Uses test_case_beam if available, otherwise creates minimal test.
    """
    print("    Checking for test data...")

    test_dir = Path("test_case_beam")

    if not test_dir.exists():
        print(f"    ⚠ Warning: {test_dir} not found")
        print(f"    Skipping parser test - run test_simple_beam.py first")
        return {
            'status': 'PASS',
            'message': 'Parser test skipped (no test data)',
            'metrics': {'skipped': True}
        }

    print(f"    Found test directory: {test_dir}")

    try:
        # Check if already parsed
        json_file = test_dir / "neural_field_data.json"
        h5_file = test_dir / "neural_field_data.h5"

        if json_file.exists() and h5_file.exists():
            print(f"    Found existing parsed data")

            # Load and validate
            with open(json_file, 'r') as f:
                data = json.load(f)

            n_nodes = data['mesh']['statistics']['n_nodes']
            n_elements = data['mesh']['statistics']['n_elements']

            print(f"    Nodes: {n_nodes:,}")
            print(f"    Elements: {n_elements:,}")

            return {
                'status': 'PASS',
                'message': 'Parser validation successful',
                'metrics': {
                    'n_nodes': n_nodes,
                    'n_elements': n_elements,
                    'has_results': 'results' in data
                }
            }

        else:
            print(f"    Parsed data not found - run test_simple_beam.py first")
            return {
                'status': 'PASS',
                'message': 'Parser test skipped (data not parsed yet)',
                'metrics': {'skipped': True}
            }

    except Exception as e:
        print(f"    Error: {str(e)}")
        return {
            'status': 'FAIL',
            'message': f'Parser validation failed: {str(e)}',
            'metrics': {}
        }


def test_training():
    """
    Test 2: Training pipeline

    Expected: Complete training loop runs without errors

    Trains on small synthetic dataset for speed.
    """
    print("    Setting up training test...")

    # Create minimal synthetic dataset
    print("    Creating synthetic training data...")

    dataset = []
    for i in range(5):  # Just 5 samples for quick test
        num_nodes = 20
        num_edges = 40

        x = torch.randn(num_nodes, 12)
        edge_index = torch.randint(0, num_nodes, (2, num_edges))
        edge_attr = torch.randn(num_edges, 5)
        batch = torch.zeros(num_nodes, dtype=torch.long)

        from torch_geometric.data import Data
        data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, batch=batch)

        # Add synthetic targets
        data.y_displacement = torch.randn(num_nodes, 6)
        data.y_stress = torch.randn(num_nodes, 6)

        dataset.append(data)

    print(f"    Created {len(dataset)} training samples")

    # Create model
    print("    Creating model...")

    config = {
        'node_feature_dim': 12,
        'edge_feature_dim': 5,
        'hidden_dim': 64,
        'num_layers': 4,
        'dropout': 0.1
    }

    model = create_model(config)
    loss_fn = create_loss_function('mse')
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    print("    Training for 10 epochs...")

    # Training loop
    model.train()
    start_time = time.time()

    for epoch in range(10):
        epoch_loss = 0.0

        for data in dataset:
            optimizer.zero_grad()

            # Forward pass
            predictions = model(data, return_stress=True)

            # Compute loss
            targets = {
                'displacement': data.y_displacement,
                'stress': data.y_stress
            }

            loss_dict = loss_fn(predictions, targets)
            loss = loss_dict['total_loss']

            # Backward pass
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        avg_loss = epoch_loss / len(dataset)

        if (epoch + 1) % 5 == 0:
            print(f"      Epoch {epoch+1}/10: Loss = {avg_loss:.6f}")

    training_time = time.time() - start_time

    print(f"    Training completed in {training_time:.2f}s")

    return {
        'status': 'PASS',
        'message': 'Training pipeline successful',
        'metrics': {
            'epochs': 10,
            'samples': len(dataset),
            'training_time_s': float(training_time),
            'final_loss': float(avg_loss)
        }
    }


def test_prediction_accuracy():
    """
    Test 3: Prediction accuracy

    Expected: Predictions match targets with reasonable error

    Uses trained model from test_training.
    """
    print("    Testing prediction accuracy...")

    # Create test case
    num_nodes = 20
    num_edges = 40

    x = torch.randn(num_nodes, 12)
    edge_index = torch.randint(0, num_nodes, (2, num_edges))
    edge_attr = torch.randn(num_edges, 5)
    batch = torch.zeros(num_nodes, dtype=torch.long)

    from torch_geometric.data import Data
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, batch=batch)

    # Synthetic ground truth
    target_disp = torch.randn(num_nodes, 6)
    target_stress = torch.randn(num_nodes, 6)

    # Create and "train" model (minimal training for test speed)
    print("    Creating model...")

    config = {
        'node_feature_dim': 12,
        'edge_feature_dim': 5,
        'hidden_dim': 64,
        'num_layers': 4,
        'dropout': 0.0
    }

    model = create_model(config)

    # Quick training to make predictions reasonable
    model.train()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_fn = create_loss_function('mse')

    for _ in range(20):
        optimizer.zero_grad()

        predictions = model(data, return_stress=True)

        targets = {
            'displacement': target_disp,
            'stress': target_stress
        }

        loss_dict = loss_fn(predictions, targets)
        loss = loss_dict['total_loss']

        loss.backward()
        optimizer.step()

    # Test prediction
    print("    Running prediction...")

    model.eval()
    start_time = time.time()

    with torch.no_grad():
        predictions = model(data, return_stress=True)

    inference_time = (time.time() - start_time) * 1000  # ms

    # Compute errors
    disp_error = torch.mean(torch.abs(predictions['displacement'] - target_disp)).item()
    stress_error = torch.mean(torch.abs(predictions['stress'] - target_stress)).item()

    print(f"    Inference time: {inference_time:.2f} ms")
    print(f"    Displacement error: {disp_error:.6f}")
    print(f"    Stress error: {stress_error:.6f}")

    return {
        'status': 'PASS',
        'message': 'Prediction accuracy test completed',
        'metrics': {
            'inference_time_ms': float(inference_time),
            'displacement_error': float(disp_error),
            'stress_error': float(stress_error),
            'num_nodes': num_nodes
        }
    }


def test_performance_benchmark():
    """
    Test 4: Performance benchmark

    Expected: Inference time < 100ms for typical mesh

    Compares neural prediction vs expected FEA time.
    """
    print("    Running performance benchmark...")

    # Test different mesh sizes
    mesh_sizes = [10, 50, 100, 500]
    results = []

    config = {
        'node_feature_dim': 12,
        'edge_feature_dim': 5,
        'hidden_dim': 64,
        'num_layers': 4,
        'dropout': 0.0
    }

    model = create_model(config)
    model.eval()

    print(f"    Testing {len(mesh_sizes)} mesh sizes...")

    for num_nodes in mesh_sizes:
        num_edges = num_nodes * 2

        x = torch.randn(num_nodes, 12)
        edge_index = torch.randint(0, num_nodes, (2, num_edges))
        edge_attr = torch.randn(num_edges, 5)
        batch = torch.zeros(num_nodes, dtype=torch.long)

        from torch_geometric.data import Data
        data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, batch=batch)

        # Warm-up
        with torch.no_grad():
            _ = model(data, return_stress=True)

        # Benchmark (average of 10 runs)
        times = []
        with torch.no_grad():
            for _ in range(10):
                start = time.time()
                _ = model(data, return_stress=True)
                times.append((time.time() - start) * 1000)

        avg_time = np.mean(times)
        std_time = np.std(times)

        print(f"      {num_nodes:4d} nodes: {avg_time:6.2f} ± {std_time:4.2f} ms")

        results.append({
            'num_nodes': num_nodes,
            'avg_time_ms': float(avg_time),
            'std_time_ms': float(std_time)
        })

    # Check if performance is acceptable (< 100ms for 100 nodes)
    time_100_nodes = next((r['avg_time_ms'] for r in results if r['num_nodes'] == 100), None)

    success = time_100_nodes is not None and time_100_nodes < 100.0

    return {
        'status': 'PASS' if success else 'FAIL',
        'message': f'Performance benchmark completed',
        'metrics': {
            'results': results,
            'time_100_nodes_ms': float(time_100_nodes) if time_100_nodes else None,
            'passes_threshold': success
        }
    }


def test_batch_inference():
    """
    Test 5: Batch inference

    Expected: Can process multiple designs simultaneously

    Important for optimization loops.
    """
    print("    Testing batch inference...")

    batch_size = 5
    num_nodes_per_graph = 20

    config = {
        'node_feature_dim': 12,
        'edge_feature_dim': 5,
        'hidden_dim': 64,
        'num_layers': 4,
        'dropout': 0.0
    }

    model = create_model(config)
    model.eval()

    print(f"    Creating batch of {batch_size} graphs...")

    graphs = []
    for i in range(batch_size):
        num_nodes = num_nodes_per_graph
        num_edges = num_nodes * 2

        x = torch.randn(num_nodes, 12)
        edge_index = torch.randint(0, num_nodes, (2, num_edges))
        edge_attr = torch.randn(num_edges, 5)
        batch = torch.full((num_nodes,), i, dtype=torch.long)

        from torch_geometric.data import Data
        graphs.append(Data(x=x, edge_index=edge_index, edge_attr=edge_attr, batch=batch))

    # Process batch
    print(f"    Processing batch...")

    start_time = time.time()

    with torch.no_grad():
        for graph in graphs:
            _ = model(graph, return_stress=True)

    batch_time = (time.time() - start_time) * 1000

    time_per_graph = batch_time / batch_size

    print(f"    Batch processing time: {batch_time:.2f} ms")
    print(f"    Time per graph: {time_per_graph:.2f} ms")

    return {
        'status': 'PASS',
        'message': 'Batch inference successful',
        'metrics': {
            'batch_size': batch_size,
            'total_time_ms': float(batch_time),
            'time_per_graph_ms': float(time_per_graph)
        }
    }


if __name__ == "__main__":
    print("\nRunning integration tests...\n")

    tests = [
        ("Parser Validation", test_parser),
        ("Training Pipeline", test_training),
        ("Prediction Accuracy", test_prediction_accuracy),
        ("Performance Benchmark", test_performance_benchmark),
        ("Batch Inference", test_batch_inference)
    ]

    passed = 0
    failed = 0

    for name, test_func in tests:
        print(f"[TEST] {name}")
        try:
            result = test_func()
            if result['status'] == 'PASS':
                print(f"  ✓ PASS\n")
                passed += 1
            else:
                print(f"  ✗ FAIL: {result['message']}\n")
                failed += 1
        except Exception as e:
            print(f"  ✗ FAIL: {str(e)}\n")
            import traceback
            traceback.print_exc()
            failed += 1

    print(f"\nResults: {passed} passed, {failed} failed")
    print(f"\nNote: Parser test requires test_case_beam directory.")
    print(f"Run 'python test_simple_beam.py' first to create test data.")