feat: Add Studio UI, intake system, and extractor improvements

Dashboard: - Add Studio page with drag-drop model upload and Claude chat - Add intake system for study creation workflow - Improve session manager and context builder - Add intake API routes and frontend components Optimization Engine: - Add CLI module for command-line operations - Add intake module for study preprocessing - Add validation module with gate checks - Improve Zernike extractor documentation - Update spec models with better validation - Enhance solve_simulation robustness Documentation: - Add ATOMIZER_STUDIO.md planning doc - Add ATOMIZER_UX_SYSTEM.md for UX patterns - Update extractor library docs - Add study-readme-generator skill Tools: - Add test scripts for extraction validation - Add Zernike recentering test Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-27 12:02:30 -05:00
parent 3193831340
commit a26914bbe8
56 changed files with 14173 additions and 646 deletions
--- a/optimization_engine/validation/gate.py
+++ b/optimization_engine/validation/gate.py
@@ -0,0 +1,508 @@
+"""
+Validation Gate
+===============
+
+The final checkpoint before optimization begins.
+
+1. Validates the study specification
+2. Runs 2-3 test trials to verify:
+   - Parameters actually update the model
+   - Mesh regenerates correctly
+   - Extractors work
+   - Results are different (not stuck)
+3. Estimates runtime
+4. Gets user approval
+
+This is CRITICAL for catching the "mesh not updating" issue that
+wastes hours of optimization time.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import random
+import time
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, List, Dict, Any, Callable
+import numpy as np
+
+from .checker import SpecChecker, CheckResult, IssueSeverity
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TestTrialResult:
+    """Result of a single test trial."""
+
+    trial_number: int
+    parameters: Dict[str, float]
+    objectives: Dict[str, float]
+    constraints: Dict[str, float] = field(default_factory=dict)
+    solve_time_seconds: float = 0.0
+    success: bool = False
+    error: Optional[str] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "trial_number": self.trial_number,
+            "parameters": self.parameters,
+            "objectives": self.objectives,
+            "constraints": self.constraints,
+            "solve_time_seconds": self.solve_time_seconds,
+            "success": self.success,
+            "error": self.error,
+        }
+
+
+@dataclass
+class ValidationResult:
+    """Complete validation result."""
+
+    passed: bool
+    timestamp: datetime = field(default_factory=datetime.now)
+
+    # Spec validation
+    spec_check: Optional[CheckResult] = None
+
+    # Test trials
+    test_trials: List[TestTrialResult] = field(default_factory=list)
+    results_vary: bool = False
+    variance_by_objective: Dict[str, float] = field(default_factory=dict)
+
+    # Runtime estimates
+    avg_solve_time: Optional[float] = None
+    estimated_total_runtime: Optional[float] = None
+
+    # Summary
+    errors: List[str] = field(default_factory=list)
+    warnings: List[str] = field(default_factory=list)
+
+    def add_error(self, message: str):
+        self.errors.append(message)
+        self.passed = False
+
+    def add_warning(self, message: str):
+        self.warnings.append(message)
+
+    def get_summary(self) -> str:
+        """Get human-readable summary."""
+        lines = []
+
+        if self.passed:
+            lines.append("VALIDATION PASSED")
+        else:
+            lines.append("VALIDATION FAILED")
+
+        lines.append(f"\nSpec Validation:")
+        if self.spec_check:
+            lines.append(f"  Errors: {len(self.spec_check.errors)}")
+            lines.append(f"  Warnings: {len(self.spec_check.warnings)}")
+
+        lines.append(f"\nTest Trials:")
+        lines.append(
+            f"  Completed: {len([t for t in self.test_trials if t.success])}/{len(self.test_trials)}"
+        )
+        lines.append(f"  Results Vary: {'Yes' if self.results_vary else 'NO - PROBLEM!'}")
+
+        if self.variance_by_objective:
+            lines.append("  Variance by Objective:")
+            for obj, var in self.variance_by_objective.items():
+                lines.append(f"    {obj}: {var:.6f}")
+
+        if self.avg_solve_time:
+            lines.append(f"\nRuntime Estimate:")
+            lines.append(f"  Avg solve time: {self.avg_solve_time:.1f}s")
+            if self.estimated_total_runtime:
+                hours = self.estimated_total_runtime / 3600
+                lines.append(f"  Est. total: {hours:.1f} hours")
+
+        return "\n".join(lines)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "passed": self.passed,
+            "timestamp": self.timestamp.isoformat(),
+            "spec_errors": len(self.spec_check.errors) if self.spec_check else 0,
+            "spec_warnings": len(self.spec_check.warnings) if self.spec_check else 0,
+            "test_trials": [t.to_dict() for t in self.test_trials],
+            "results_vary": self.results_vary,
+            "variance_by_objective": self.variance_by_objective,
+            "avg_solve_time": self.avg_solve_time,
+            "estimated_total_runtime": self.estimated_total_runtime,
+            "errors": self.errors,
+            "warnings": self.warnings,
+        }
+
+
+class ValidationGate:
+    """
+    Validates study setup before optimization.
+
+    This is the critical checkpoint that prevents wasted optimization time
+    by catching issues like:
+    - Missing files
+    - Invalid bounds
+    - Mesh not updating (all results identical)
+    - Broken extractors
+    """
+
+    def __init__(
+        self,
+        study_dir: Path,
+        progress_callback: Optional[Callable[[str, float], None]] = None,
+    ):
+        """
+        Initialize the validation gate.
+
+        Args:
+            study_dir: Path to the study directory
+            progress_callback: Optional callback for progress updates
+        """
+        self.study_dir = Path(study_dir)
+        self.progress_callback = progress_callback or (lambda m, p: None)
+
+        # Find spec file
+        self.spec_path = self._find_spec_path()
+        self.spec: Dict[str, Any] = {}
+
+        if self.spec_path and self.spec_path.exists():
+            with open(self.spec_path) as f:
+                self.spec = json.load(f)
+
+    def _find_spec_path(self) -> Optional[Path]:
+        """Find the specification file."""
+        # Try atomizer_spec.json first (v2.0)
+        candidates = [
+            self.study_dir / "atomizer_spec.json",
+            self.study_dir / "1_setup" / "atomizer_spec.json",
+            self.study_dir / "optimization_config.json",
+            self.study_dir / "1_setup" / "optimization_config.json",
+        ]
+
+        for path in candidates:
+            if path.exists():
+                return path
+
+        return None
+
+    def validate(
+        self,
+        run_test_trials: bool = True,
+        n_test_trials: int = 3,
+        available_expressions: Optional[List[str]] = None,
+    ) -> ValidationResult:
+        """
+        Run full validation.
+
+        Args:
+            run_test_trials: Whether to run test FEA solves
+            n_test_trials: Number of test trials (2-3 recommended)
+            available_expressions: Expression names from introspection
+
+        Returns:
+            ValidationResult with all findings
+        """
+        result = ValidationResult(passed=True)
+
+        logger.info(f"Validating study: {self.study_dir.name}")
+        self._progress("Starting validation...", 0.0)
+
+        # Step 1: Check spec file exists
+        if not self.spec_path:
+            result.add_error("No specification file found")
+            return result
+
+        # Step 2: Validate spec
+        self._progress("Validating specification...", 0.1)
+        checker = SpecChecker(self.spec_path, available_expressions)
+        result.spec_check = checker.check(self.spec)
+
+        # Add spec errors to result
+        for issue in result.spec_check.errors:
+            result.add_error(str(issue))
+        for issue in result.spec_check.warnings:
+            result.add_warning(str(issue))
+
+        # Stop if spec has errors (unless they're non-critical)
+        if result.spec_check.errors:
+            self._progress("Validation failed: spec errors", 1.0)
+            return result
+
+        # Step 3: Run test trials
+        if run_test_trials:
+            self._progress("Running test trials...", 0.2)
+            self._run_test_trials(result, n_test_trials)
+
+        # Step 4: Calculate estimates
+        self._progress("Calculating estimates...", 0.9)
+        self._calculate_estimates(result)
+
+        self._progress("Validation complete", 1.0)
+
+        return result
+
+    def _progress(self, message: str, percent: float):
+        """Report progress."""
+        logger.info(f"[{percent * 100:.0f}%] {message}")
+        self.progress_callback(message, percent)
+
+    def _run_test_trials(self, result: ValidationResult, n_trials: int) -> None:
+        """Run test trials to verify setup."""
+
+        try:
+            from optimization_engine.nx.solver import NXSolver
+        except ImportError:
+            result.add_warning("NXSolver not available - skipping test trials")
+            return
+
+        # Get design variables
+        design_vars = self.spec.get("design_variables", [])
+        if not design_vars:
+            result.add_error("No design variables to test")
+            return
+
+        # Get model directory
+        model_dir = self._find_model_dir()
+        if not model_dir:
+            result.add_error("Model directory not found")
+            return
+
+        # Get sim file
+        sim_file = self._find_sim_file(model_dir)
+        if not sim_file:
+            result.add_error("Simulation file not found")
+            return
+
+        solver = NXSolver()
+
+        for i in range(n_trials):
+            self._progress(f"Running test trial {i + 1}/{n_trials}...", 0.2 + (0.6 * i / n_trials))
+
+            trial_result = TestTrialResult(trial_number=i + 1, parameters={}, objectives={})
+
+            # Generate random parameters within bounds
+            params = {}
+            for dv in design_vars:
+                param_name = dv.get("parameter", dv.get("name"))
+                bounds = dv.get("bounds", [0, 1])
+                # Use random value within bounds
+                value = random.uniform(bounds[0], bounds[1])
+                params[param_name] = value
+
+            trial_result.parameters = params
+
+            try:
+                start_time = time.time()
+
+                # Run simulation
+                solve_result = solver.run_simulation(
+                    sim_file=sim_file,
+                    working_dir=model_dir,
+                    expression_updates=params,
+                    cleanup=True,
+                )
+
+                trial_result.solve_time_seconds = time.time() - start_time
+
+                if solve_result.get("success"):
+                    trial_result.success = True
+
+                    # Extract results
+                    op2_file = solve_result.get("op2_file")
+                    if op2_file:
+                        objectives = self._extract_objectives(Path(op2_file), model_dir)
+                        trial_result.objectives = objectives
+                else:
+                    trial_result.success = False
+                    trial_result.error = solve_result.get("error", "Unknown error")
+
+            except Exception as e:
+                trial_result.success = False
+                trial_result.error = str(e)
+                logger.error(f"Test trial {i + 1} failed: {e}")
+
+            result.test_trials.append(trial_result)
+
+        # Check if results vary
+        self._check_results_variance(result)
+
+    def _find_model_dir(self) -> Optional[Path]:
+        """Find the model directory."""
+        candidates = [
+            self.study_dir / "1_model",
+            self.study_dir / "1_setup" / "model",
+            self.study_dir,
+        ]
+
+        for path in candidates:
+            if path.exists() and list(path.glob("*.sim")):
+                return path
+
+        return None
+
+    def _find_sim_file(self, model_dir: Path) -> Optional[Path]:
+        """Find the simulation file."""
+        # From spec
+        sim = self.spec.get("simulation", {})
+        sim_name = sim.get("sim_file")
+
+        if sim_name:
+            sim_path = model_dir / sim_name
+            if sim_path.exists():
+                return sim_path
+
+        # Search for .sim files
+        sim_files = list(model_dir.glob("*.sim"))
+        if sim_files:
+            return sim_files[0]
+
+        return None
+
+    def _extract_objectives(self, op2_file: Path, model_dir: Path) -> Dict[str, float]:
+        """Extract objective values from results."""
+        objectives = {}
+
+        # Extract based on configured objectives
+        for obj in self.spec.get("objectives", []):
+            name = obj.get("name", "objective")
+            extraction = obj.get("extraction", {})
+            action = extraction.get("action", "")
+
+            try:
+                if "mass" in action.lower():
+                    from optimization_engine.extractors.bdf_mass_extractor import (
+                        extract_mass_from_bdf,
+                    )
+
+                    dat_files = list(model_dir.glob("*.dat"))
+                    if dat_files:
+                        objectives[name] = extract_mass_from_bdf(str(dat_files[0]))
+
+                elif "displacement" in action.lower():
+                    from optimization_engine.extractors.extract_displacement import (
+                        extract_displacement,
+                    )
+
+                    result = extract_displacement(op2_file, subcase=1)
+                    objectives[name] = result.get("max_displacement", 0)
+
+                elif "stress" in action.lower():
+                    from optimization_engine.extractors.extract_von_mises_stress import (
+                        extract_solid_stress,
+                    )
+
+                    result = extract_solid_stress(op2_file, subcase=1)
+                    objectives[name] = result.get("max_von_mises", 0)
+
+            except Exception as e:
+                logger.debug(f"Failed to extract {name}: {e}")
+
+        return objectives
+
+    def _check_results_variance(self, result: ValidationResult) -> None:
+        """Check if test trial results vary (indicating mesh is updating)."""
+
+        successful_trials = [t for t in result.test_trials if t.success]
+
+        if len(successful_trials) < 2:
+            result.add_warning("Not enough successful trials to check variance")
+            return
+
+        # Check variance for each objective
+        for obj_name in successful_trials[0].objectives.keys():
+            values = [t.objectives.get(obj_name, 0) for t in successful_trials]
+
+            if len(values) > 1:
+                variance = np.var(values)
+                result.variance_by_objective[obj_name] = variance
+
+                # Check if variance is too low (results are stuck)
+                mean_val = np.mean(values)
+                if mean_val != 0:
+                    cv = np.sqrt(variance) / abs(mean_val)  # Coefficient of variation
+
+                    if cv < 0.001:  # Less than 0.1% variation
+                        result.add_error(
+                            f"Results for '{obj_name}' are nearly identical (CV={cv:.6f}). "
+                            "The mesh may not be updating!"
+                        )
+                        result.results_vary = False
+                    else:
+                        result.results_vary = True
+                else:
+                    # Can't calculate CV if mean is 0
+                    if variance < 1e-10:
+                        result.add_warning(f"Results for '{obj_name}' show no variation")
+                    else:
+                        result.results_vary = True
+
+        # Default to True if we couldn't check
+        if not result.variance_by_objective:
+            result.results_vary = True
+
+    def _calculate_estimates(self, result: ValidationResult) -> None:
+        """Calculate runtime estimates."""
+
+        successful_trials = [t for t in result.test_trials if t.success]
+
+        if successful_trials:
+            solve_times = [t.solve_time_seconds for t in successful_trials]
+            result.avg_solve_time = np.mean(solve_times)
+
+            # Get total trials from spec
+            n_trials = self.spec.get("optimization_settings", {}).get("n_trials", 100)
+            result.estimated_total_runtime = result.avg_solve_time * n_trials
+
+    def approve(self) -> bool:
+        """
+        Mark the study as approved for optimization.
+
+        Creates an approval file to indicate validation passed.
+        """
+        approval_file = self.study_dir / ".validation_approved"
+
+        try:
+            approval_file.write_text(datetime.now().isoformat())
+            logger.info(f"Study approved: {self.study_dir.name}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to approve: {e}")
+            return False
+
+    def is_approved(self) -> bool:
+        """Check if study has been approved."""
+        approval_file = self.study_dir / ".validation_approved"
+        return approval_file.exists()
+
+    def save_result(self, result: ValidationResult) -> Path:
+        """Save validation result to file."""
+        output_path = self.study_dir / "validation_result.json"
+
+        with open(output_path, "w") as f:
+            json.dump(result.to_dict(), f, indent=2)
+
+        return output_path
+
+
+def validate_study(
+    study_dir: Path,
+    run_test_trials: bool = True,
+    n_test_trials: int = 3,
+) -> ValidationResult:
+    """
+    Convenience function to validate a study.
+
+    Args:
+        study_dir: Path to study directory
+        run_test_trials: Whether to run test FEA solves
+        n_test_trials: Number of test trials
+
+    Returns:
+        ValidationResult
+    """
+    gate = ValidationGate(study_dir)
+    return gate.validate(run_test_trials=run_test_trials, n_test_trials=n_test_trials)