Atomizer/optimization_engine/validation/gate.py

"""
Validation Gate
===============

The final checkpoint before optimization begins.

1. Validates the study specification
2. Runs 2-3 test trials to verify:
   - Parameters actually update the model
   - Mesh regenerates correctly
   - Extractors work
   - Results are different (not stuck)
3. Estimates runtime
4. Gets user approval

This is CRITICAL for catching the "mesh not updating" issue that
wastes hours of optimization time.
"""

from __future__ import annotations

import json
import logging
import random
import time
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Optional, List, Dict, Any, Callable
import numpy as np

from .checker import SpecChecker, CheckResult, IssueSeverity

logger = logging.getLogger(__name__)


@dataclass
class TestTrialResult:
    """Result of a single test trial."""

    trial_number: int
    parameters: Dict[str, float]
    objectives: Dict[str, float]
    constraints: Dict[str, float] = field(default_factory=dict)
    solve_time_seconds: float = 0.0
    success: bool = False
    error: Optional[str] = None

    def to_dict(self) -> Dict[str, Any]:
        return {
            "trial_number": self.trial_number,
            "parameters": self.parameters,
            "objectives": self.objectives,
            "constraints": self.constraints,
            "solve_time_seconds": self.solve_time_seconds,
            "success": self.success,
            "error": self.error,
        }


@dataclass
class ValidationResult:
    """Complete validation result."""

    passed: bool
    timestamp: datetime = field(default_factory=datetime.now)

    # Spec validation
    spec_check: Optional[CheckResult] = None

    # Test trials
    test_trials: List[TestTrialResult] = field(default_factory=list)
    results_vary: bool = False
    variance_by_objective: Dict[str, float] = field(default_factory=dict)

    # Runtime estimates
    avg_solve_time: Optional[float] = None
    estimated_total_runtime: Optional[float] = None

    # Summary
    errors: List[str] = field(default_factory=list)
    warnings: List[str] = field(default_factory=list)

    def add_error(self, message: str):
        self.errors.append(message)
        self.passed = False

    def add_warning(self, message: str):
        self.warnings.append(message)

    def get_summary(self) -> str:
        """Get human-readable summary."""
        lines = []

        if self.passed:
            lines.append("VALIDATION PASSED")
        else:
            lines.append("VALIDATION FAILED")

        lines.append(f"\nSpec Validation:")
        if self.spec_check:
            lines.append(f"  Errors: {len(self.spec_check.errors)}")
            lines.append(f"  Warnings: {len(self.spec_check.warnings)}")

        lines.append(f"\nTest Trials:")
        lines.append(
            f"  Completed: {len([t for t in self.test_trials if t.success])}/{len(self.test_trials)}"
        )
        lines.append(f"  Results Vary: {'Yes' if self.results_vary else 'NO - PROBLEM!'}")

        if self.variance_by_objective:
            lines.append("  Variance by Objective:")
            for obj, var in self.variance_by_objective.items():
                lines.append(f"    {obj}: {var:.6f}")

        if self.avg_solve_time:
            lines.append(f"\nRuntime Estimate:")
            lines.append(f"  Avg solve time: {self.avg_solve_time:.1f}s")
            if self.estimated_total_runtime:
                hours = self.estimated_total_runtime / 3600
                lines.append(f"  Est. total: {hours:.1f} hours")

        return "\n".join(lines)

    def to_dict(self) -> Dict[str, Any]:
        return {
            "passed": self.passed,
            "timestamp": self.timestamp.isoformat(),
            "spec_errors": len(self.spec_check.errors) if self.spec_check else 0,
            "spec_warnings": len(self.spec_check.warnings) if self.spec_check else 0,
            "test_trials": [t.to_dict() for t in self.test_trials],
            "results_vary": self.results_vary,
            "variance_by_objective": self.variance_by_objective,
            "avg_solve_time": self.avg_solve_time,
            "estimated_total_runtime": self.estimated_total_runtime,
            "errors": self.errors,
            "warnings": self.warnings,
        }


class ValidationGate:
    """
    Validates study setup before optimization.

    This is the critical checkpoint that prevents wasted optimization time
    by catching issues like:
    - Missing files
    - Invalid bounds
    - Mesh not updating (all results identical)
    - Broken extractors
    """

    def __init__(
        self,
        study_dir: Path,
        progress_callback: Optional[Callable[[str, float], None]] = None,
    ):
        """
        Initialize the validation gate.

        Args:
            study_dir: Path to the study directory
            progress_callback: Optional callback for progress updates
        """
        self.study_dir = Path(study_dir)
        self.progress_callback = progress_callback or (lambda m, p: None)

        # Find spec file
        self.spec_path = self._find_spec_path()
        self.spec: Dict[str, Any] = {}

        if self.spec_path and self.spec_path.exists():
            with open(self.spec_path) as f:
                self.spec = json.load(f)

    def _find_spec_path(self) -> Optional[Path]:
        """Find the specification file."""
        # Try atomizer_spec.json first (v2.0)
        candidates = [
            self.study_dir / "atomizer_spec.json",
            self.study_dir / "1_setup" / "atomizer_spec.json",
            self.study_dir / "optimization_config.json",
            self.study_dir / "1_setup" / "optimization_config.json",
        ]

        for path in candidates:
            if path.exists():
                return path

        return None

    def validate(
        self,
        run_test_trials: bool = True,
        n_test_trials: int = 3,
        available_expressions: Optional[List[str]] = None,
    ) -> ValidationResult:
        """
        Run full validation.

        Args:
            run_test_trials: Whether to run test FEA solves
            n_test_trials: Number of test trials (2-3 recommended)
            available_expressions: Expression names from introspection

        Returns:
            ValidationResult with all findings
        """
        result = ValidationResult(passed=True)

        logger.info(f"Validating study: {self.study_dir.name}")
        self._progress("Starting validation...", 0.0)

        # Step 1: Check spec file exists
        if not self.spec_path:
            result.add_error("No specification file found")
            return result

        # Step 2: Validate spec
        self._progress("Validating specification...", 0.1)
        checker = SpecChecker(self.spec_path, available_expressions)
        result.spec_check = checker.check(self.spec)

        # Add spec errors to result
        for issue in result.spec_check.errors:
            result.add_error(str(issue))
        for issue in result.spec_check.warnings:
            result.add_warning(str(issue))

        # Stop if spec has errors (unless they're non-critical)
        if result.spec_check.errors:
            self._progress("Validation failed: spec errors", 1.0)
            return result

        # Step 3: Run test trials
        if run_test_trials:
            self._progress("Running test trials...", 0.2)
            self._run_test_trials(result, n_test_trials)

        # Step 4: Calculate estimates
        self._progress("Calculating estimates...", 0.9)
        self._calculate_estimates(result)

        self._progress("Validation complete", 1.0)

        return result

    def _progress(self, message: str, percent: float):
        """Report progress."""
        logger.info(f"[{percent * 100:.0f}%] {message}")
        self.progress_callback(message, percent)

    def _run_test_trials(self, result: ValidationResult, n_trials: int) -> None:
        """Run test trials to verify setup."""

        try:
            from optimization_engine.nx.solver import NXSolver
        except ImportError:
            result.add_warning("NXSolver not available - skipping test trials")
            return

        # Get design variables
        design_vars = self.spec.get("design_variables", [])
        if not design_vars:
            result.add_error("No design variables to test")
            return

        # Get model directory
        model_dir = self._find_model_dir()
        if not model_dir:
            result.add_error("Model directory not found")
            return

        # Get sim file
        sim_file = self._find_sim_file(model_dir)
        if not sim_file:
            result.add_error("Simulation file not found")
            return

        solver = NXSolver()

        for i in range(n_trials):
            self._progress(f"Running test trial {i + 1}/{n_trials}...", 0.2 + (0.6 * i / n_trials))

            trial_result = TestTrialResult(trial_number=i + 1, parameters={}, objectives={})

            # Generate random parameters within bounds
            params = {}
            for dv in design_vars:
                param_name = dv.get("parameter", dv.get("name"))
                bounds = dv.get("bounds", [0, 1])
                # Use random value within bounds
                value = random.uniform(bounds[0], bounds[1])
                params[param_name] = value

            trial_result.parameters = params

            try:
                start_time = time.time()

                # Run simulation
                solve_result = solver.run_simulation(
                    sim_file=sim_file,
                    working_dir=model_dir,
                    expression_updates=params,
                    cleanup=True,
                )

                trial_result.solve_time_seconds = time.time() - start_time

                if solve_result.get("success"):
                    trial_result.success = True

                    # Extract results
                    op2_file = solve_result.get("op2_file")
                    if op2_file:
                        objectives = self._extract_objectives(Path(op2_file), model_dir)
                        trial_result.objectives = objectives
                else:
                    trial_result.success = False
                    trial_result.error = solve_result.get("error", "Unknown error")

            except Exception as e:
                trial_result.success = False
                trial_result.error = str(e)
                logger.error(f"Test trial {i + 1} failed: {e}")

            result.test_trials.append(trial_result)

        # Check if results vary
        self._check_results_variance(result)

    def _find_model_dir(self) -> Optional[Path]:
        """Find the model directory."""
        candidates = [
            self.study_dir / "1_model",
            self.study_dir / "1_setup" / "model",
            self.study_dir,
        ]

        for path in candidates:
            if path.exists() and list(path.glob("*.sim")):
                return path

        return None

    def _find_sim_file(self, model_dir: Path) -> Optional[Path]:
        """Find the simulation file."""
        # From spec
        sim = self.spec.get("simulation", {})
        sim_name = sim.get("sim_file")

        if sim_name:
            sim_path = model_dir / sim_name
            if sim_path.exists():
                return sim_path

        # Search for .sim files
        sim_files = list(model_dir.glob("*.sim"))
        if sim_files:
            return sim_files[0]

        return None

    def _extract_objectives(self, op2_file: Path, model_dir: Path) -> Dict[str, float]:
        """Extract objective values from results."""
        objectives = {}

        # Extract based on configured objectives
        for obj in self.spec.get("objectives", []):
            name = obj.get("name", "objective")
            extraction = obj.get("extraction", {})
            action = extraction.get("action", "")

            try:
                if "mass" in action.lower():
                    from optimization_engine.extractors.bdf_mass_extractor import (
                        extract_mass_from_bdf,
                    )

                    dat_files = list(model_dir.glob("*.dat"))
                    if dat_files:
                        objectives[name] = extract_mass_from_bdf(str(dat_files[0]))

                elif "displacement" in action.lower():
                    from optimization_engine.extractors.extract_displacement import (
                        extract_displacement,
                    )

                    result = extract_displacement(op2_file, subcase=1)
                    objectives[name] = result.get("max_displacement", 0)

                elif "stress" in action.lower():
                    from optimization_engine.extractors.extract_von_mises_stress import (
                        extract_solid_stress,
                    )

                    result = extract_solid_stress(op2_file, subcase=1)
                    objectives[name] = result.get("max_von_mises", 0)

            except Exception as e:
                logger.debug(f"Failed to extract {name}: {e}")

        return objectives

    def _check_results_variance(self, result: ValidationResult) -> None:
        """Check if test trial results vary (indicating mesh is updating)."""

        successful_trials = [t for t in result.test_trials if t.success]

        if len(successful_trials) < 2:
            result.add_warning("Not enough successful trials to check variance")
            return

        # Check variance for each objective
        for obj_name in successful_trials[0].objectives.keys():
            values = [t.objectives.get(obj_name, 0) for t in successful_trials]

            if len(values) > 1:
                variance = np.var(values)
                result.variance_by_objective[obj_name] = variance

                # Check if variance is too low (results are stuck)
                mean_val = np.mean(values)
                if mean_val != 0:
                    cv = np.sqrt(variance) / abs(mean_val)  # Coefficient of variation

                    if cv < 0.001:  # Less than 0.1% variation
                        result.add_error(
                            f"Results for '{obj_name}' are nearly identical (CV={cv:.6f}). "
                            "The mesh may not be updating!"
                        )
                        result.results_vary = False
                    else:
                        result.results_vary = True
                else:
                    # Can't calculate CV if mean is 0
                    if variance < 1e-10:
                        result.add_warning(f"Results for '{obj_name}' show no variation")
                    else:
                        result.results_vary = True

        # Default to True if we couldn't check
        if not result.variance_by_objective:
            result.results_vary = True

    def _calculate_estimates(self, result: ValidationResult) -> None:
        """Calculate runtime estimates."""

        successful_trials = [t for t in result.test_trials if t.success]

        if successful_trials:
            solve_times = [t.solve_time_seconds for t in successful_trials]
            result.avg_solve_time = np.mean(solve_times)

            # Get total trials from spec
            n_trials = self.spec.get("optimization_settings", {}).get("n_trials", 100)
            result.estimated_total_runtime = result.avg_solve_time * n_trials

    def approve(self) -> bool:
        """
        Mark the study as approved for optimization.

        Creates an approval file to indicate validation passed.
        """
        approval_file = self.study_dir / ".validation_approved"

        try:
            approval_file.write_text(datetime.now().isoformat())
            logger.info(f"Study approved: {self.study_dir.name}")
            return True
        except Exception as e:
            logger.error(f"Failed to approve: {e}")
            return False

    def is_approved(self) -> bool:
        """Check if study has been approved."""
        approval_file = self.study_dir / ".validation_approved"
        return approval_file.exists()

    def save_result(self, result: ValidationResult) -> Path:
        """Save validation result to file."""
        output_path = self.study_dir / "validation_result.json"

        with open(output_path, "w") as f:
            json.dump(result.to_dict(), f, indent=2)

        return output_path


def validate_study(
    study_dir: Path,
    run_test_trials: bool = True,
    n_test_trials: int = 3,
) -> ValidationResult:
    """
    Convenience function to validate a study.

    Args:
        study_dir: Path to study directory
        run_test_trials: Whether to run test FEA solves
        n_test_trials: Number of test trials

    Returns:
        ValidationResult
    """
    gate = ValidationGate(study_dir)
    return gate.validate(run_test_trials=run_test_trials, n_test_trials=n_test_trials)