Files
Atomizer/optimization_engine/validation/gate.py
Anto01 a26914bbe8 feat: Add Studio UI, intake system, and extractor improvements
Dashboard:
- Add Studio page with drag-drop model upload and Claude chat
- Add intake system for study creation workflow
- Improve session manager and context builder
- Add intake API routes and frontend components

Optimization Engine:
- Add CLI module for command-line operations
- Add intake module for study preprocessing
- Add validation module with gate checks
- Improve Zernike extractor documentation
- Update spec models with better validation
- Enhance solve_simulation robustness

Documentation:
- Add ATOMIZER_STUDIO.md planning doc
- Add ATOMIZER_UX_SYSTEM.md for UX patterns
- Update extractor library docs
- Add study-readme-generator skill

Tools:
- Add test scripts for extraction validation
- Add Zernike recentering test

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-27 12:02:30 -05:00

509 lines
17 KiB
Python

"""
Validation Gate
===============
The final checkpoint before optimization begins.
1. Validates the study specification
2. Runs 2-3 test trials to verify:
- Parameters actually update the model
- Mesh regenerates correctly
- Extractors work
- Results are different (not stuck)
3. Estimates runtime
4. Gets user approval
This is CRITICAL for catching the "mesh not updating" issue that
wastes hours of optimization time.
"""
from __future__ import annotations
import json
import logging
import random
import time
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Optional, List, Dict, Any, Callable
import numpy as np
from .checker import SpecChecker, CheckResult, IssueSeverity
logger = logging.getLogger(__name__)
@dataclass
class TestTrialResult:
"""Result of a single test trial."""
trial_number: int
parameters: Dict[str, float]
objectives: Dict[str, float]
constraints: Dict[str, float] = field(default_factory=dict)
solve_time_seconds: float = 0.0
success: bool = False
error: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
"trial_number": self.trial_number,
"parameters": self.parameters,
"objectives": self.objectives,
"constraints": self.constraints,
"solve_time_seconds": self.solve_time_seconds,
"success": self.success,
"error": self.error,
}
@dataclass
class ValidationResult:
"""Complete validation result."""
passed: bool
timestamp: datetime = field(default_factory=datetime.now)
# Spec validation
spec_check: Optional[CheckResult] = None
# Test trials
test_trials: List[TestTrialResult] = field(default_factory=list)
results_vary: bool = False
variance_by_objective: Dict[str, float] = field(default_factory=dict)
# Runtime estimates
avg_solve_time: Optional[float] = None
estimated_total_runtime: Optional[float] = None
# Summary
errors: List[str] = field(default_factory=list)
warnings: List[str] = field(default_factory=list)
def add_error(self, message: str):
self.errors.append(message)
self.passed = False
def add_warning(self, message: str):
self.warnings.append(message)
def get_summary(self) -> str:
"""Get human-readable summary."""
lines = []
if self.passed:
lines.append("VALIDATION PASSED")
else:
lines.append("VALIDATION FAILED")
lines.append(f"\nSpec Validation:")
if self.spec_check:
lines.append(f" Errors: {len(self.spec_check.errors)}")
lines.append(f" Warnings: {len(self.spec_check.warnings)}")
lines.append(f"\nTest Trials:")
lines.append(
f" Completed: {len([t for t in self.test_trials if t.success])}/{len(self.test_trials)}"
)
lines.append(f" Results Vary: {'Yes' if self.results_vary else 'NO - PROBLEM!'}")
if self.variance_by_objective:
lines.append(" Variance by Objective:")
for obj, var in self.variance_by_objective.items():
lines.append(f" {obj}: {var:.6f}")
if self.avg_solve_time:
lines.append(f"\nRuntime Estimate:")
lines.append(f" Avg solve time: {self.avg_solve_time:.1f}s")
if self.estimated_total_runtime:
hours = self.estimated_total_runtime / 3600
lines.append(f" Est. total: {hours:.1f} hours")
return "\n".join(lines)
def to_dict(self) -> Dict[str, Any]:
return {
"passed": self.passed,
"timestamp": self.timestamp.isoformat(),
"spec_errors": len(self.spec_check.errors) if self.spec_check else 0,
"spec_warnings": len(self.spec_check.warnings) if self.spec_check else 0,
"test_trials": [t.to_dict() for t in self.test_trials],
"results_vary": self.results_vary,
"variance_by_objective": self.variance_by_objective,
"avg_solve_time": self.avg_solve_time,
"estimated_total_runtime": self.estimated_total_runtime,
"errors": self.errors,
"warnings": self.warnings,
}
class ValidationGate:
"""
Validates study setup before optimization.
This is the critical checkpoint that prevents wasted optimization time
by catching issues like:
- Missing files
- Invalid bounds
- Mesh not updating (all results identical)
- Broken extractors
"""
def __init__(
self,
study_dir: Path,
progress_callback: Optional[Callable[[str, float], None]] = None,
):
"""
Initialize the validation gate.
Args:
study_dir: Path to the study directory
progress_callback: Optional callback for progress updates
"""
self.study_dir = Path(study_dir)
self.progress_callback = progress_callback or (lambda m, p: None)
# Find spec file
self.spec_path = self._find_spec_path()
self.spec: Dict[str, Any] = {}
if self.spec_path and self.spec_path.exists():
with open(self.spec_path) as f:
self.spec = json.load(f)
def _find_spec_path(self) -> Optional[Path]:
"""Find the specification file."""
# Try atomizer_spec.json first (v2.0)
candidates = [
self.study_dir / "atomizer_spec.json",
self.study_dir / "1_setup" / "atomizer_spec.json",
self.study_dir / "optimization_config.json",
self.study_dir / "1_setup" / "optimization_config.json",
]
for path in candidates:
if path.exists():
return path
return None
def validate(
self,
run_test_trials: bool = True,
n_test_trials: int = 3,
available_expressions: Optional[List[str]] = None,
) -> ValidationResult:
"""
Run full validation.
Args:
run_test_trials: Whether to run test FEA solves
n_test_trials: Number of test trials (2-3 recommended)
available_expressions: Expression names from introspection
Returns:
ValidationResult with all findings
"""
result = ValidationResult(passed=True)
logger.info(f"Validating study: {self.study_dir.name}")
self._progress("Starting validation...", 0.0)
# Step 1: Check spec file exists
if not self.spec_path:
result.add_error("No specification file found")
return result
# Step 2: Validate spec
self._progress("Validating specification...", 0.1)
checker = SpecChecker(self.spec_path, available_expressions)
result.spec_check = checker.check(self.spec)
# Add spec errors to result
for issue in result.spec_check.errors:
result.add_error(str(issue))
for issue in result.spec_check.warnings:
result.add_warning(str(issue))
# Stop if spec has errors (unless they're non-critical)
if result.spec_check.errors:
self._progress("Validation failed: spec errors", 1.0)
return result
# Step 3: Run test trials
if run_test_trials:
self._progress("Running test trials...", 0.2)
self._run_test_trials(result, n_test_trials)
# Step 4: Calculate estimates
self._progress("Calculating estimates...", 0.9)
self._calculate_estimates(result)
self._progress("Validation complete", 1.0)
return result
def _progress(self, message: str, percent: float):
"""Report progress."""
logger.info(f"[{percent * 100:.0f}%] {message}")
self.progress_callback(message, percent)
def _run_test_trials(self, result: ValidationResult, n_trials: int) -> None:
"""Run test trials to verify setup."""
try:
from optimization_engine.nx.solver import NXSolver
except ImportError:
result.add_warning("NXSolver not available - skipping test trials")
return
# Get design variables
design_vars = self.spec.get("design_variables", [])
if not design_vars:
result.add_error("No design variables to test")
return
# Get model directory
model_dir = self._find_model_dir()
if not model_dir:
result.add_error("Model directory not found")
return
# Get sim file
sim_file = self._find_sim_file(model_dir)
if not sim_file:
result.add_error("Simulation file not found")
return
solver = NXSolver()
for i in range(n_trials):
self._progress(f"Running test trial {i + 1}/{n_trials}...", 0.2 + (0.6 * i / n_trials))
trial_result = TestTrialResult(trial_number=i + 1, parameters={}, objectives={})
# Generate random parameters within bounds
params = {}
for dv in design_vars:
param_name = dv.get("parameter", dv.get("name"))
bounds = dv.get("bounds", [0, 1])
# Use random value within bounds
value = random.uniform(bounds[0], bounds[1])
params[param_name] = value
trial_result.parameters = params
try:
start_time = time.time()
# Run simulation
solve_result = solver.run_simulation(
sim_file=sim_file,
working_dir=model_dir,
expression_updates=params,
cleanup=True,
)
trial_result.solve_time_seconds = time.time() - start_time
if solve_result.get("success"):
trial_result.success = True
# Extract results
op2_file = solve_result.get("op2_file")
if op2_file:
objectives = self._extract_objectives(Path(op2_file), model_dir)
trial_result.objectives = objectives
else:
trial_result.success = False
trial_result.error = solve_result.get("error", "Unknown error")
except Exception as e:
trial_result.success = False
trial_result.error = str(e)
logger.error(f"Test trial {i + 1} failed: {e}")
result.test_trials.append(trial_result)
# Check if results vary
self._check_results_variance(result)
def _find_model_dir(self) -> Optional[Path]:
"""Find the model directory."""
candidates = [
self.study_dir / "1_model",
self.study_dir / "1_setup" / "model",
self.study_dir,
]
for path in candidates:
if path.exists() and list(path.glob("*.sim")):
return path
return None
def _find_sim_file(self, model_dir: Path) -> Optional[Path]:
"""Find the simulation file."""
# From spec
sim = self.spec.get("simulation", {})
sim_name = sim.get("sim_file")
if sim_name:
sim_path = model_dir / sim_name
if sim_path.exists():
return sim_path
# Search for .sim files
sim_files = list(model_dir.glob("*.sim"))
if sim_files:
return sim_files[0]
return None
def _extract_objectives(self, op2_file: Path, model_dir: Path) -> Dict[str, float]:
"""Extract objective values from results."""
objectives = {}
# Extract based on configured objectives
for obj in self.spec.get("objectives", []):
name = obj.get("name", "objective")
extraction = obj.get("extraction", {})
action = extraction.get("action", "")
try:
if "mass" in action.lower():
from optimization_engine.extractors.bdf_mass_extractor import (
extract_mass_from_bdf,
)
dat_files = list(model_dir.glob("*.dat"))
if dat_files:
objectives[name] = extract_mass_from_bdf(str(dat_files[0]))
elif "displacement" in action.lower():
from optimization_engine.extractors.extract_displacement import (
extract_displacement,
)
result = extract_displacement(op2_file, subcase=1)
objectives[name] = result.get("max_displacement", 0)
elif "stress" in action.lower():
from optimization_engine.extractors.extract_von_mises_stress import (
extract_solid_stress,
)
result = extract_solid_stress(op2_file, subcase=1)
objectives[name] = result.get("max_von_mises", 0)
except Exception as e:
logger.debug(f"Failed to extract {name}: {e}")
return objectives
def _check_results_variance(self, result: ValidationResult) -> None:
"""Check if test trial results vary (indicating mesh is updating)."""
successful_trials = [t for t in result.test_trials if t.success]
if len(successful_trials) < 2:
result.add_warning("Not enough successful trials to check variance")
return
# Check variance for each objective
for obj_name in successful_trials[0].objectives.keys():
values = [t.objectives.get(obj_name, 0) for t in successful_trials]
if len(values) > 1:
variance = np.var(values)
result.variance_by_objective[obj_name] = variance
# Check if variance is too low (results are stuck)
mean_val = np.mean(values)
if mean_val != 0:
cv = np.sqrt(variance) / abs(mean_val) # Coefficient of variation
if cv < 0.001: # Less than 0.1% variation
result.add_error(
f"Results for '{obj_name}' are nearly identical (CV={cv:.6f}). "
"The mesh may not be updating!"
)
result.results_vary = False
else:
result.results_vary = True
else:
# Can't calculate CV if mean is 0
if variance < 1e-10:
result.add_warning(f"Results for '{obj_name}' show no variation")
else:
result.results_vary = True
# Default to True if we couldn't check
if not result.variance_by_objective:
result.results_vary = True
def _calculate_estimates(self, result: ValidationResult) -> None:
"""Calculate runtime estimates."""
successful_trials = [t for t in result.test_trials if t.success]
if successful_trials:
solve_times = [t.solve_time_seconds for t in successful_trials]
result.avg_solve_time = np.mean(solve_times)
# Get total trials from spec
n_trials = self.spec.get("optimization_settings", {}).get("n_trials", 100)
result.estimated_total_runtime = result.avg_solve_time * n_trials
def approve(self) -> bool:
"""
Mark the study as approved for optimization.
Creates an approval file to indicate validation passed.
"""
approval_file = self.study_dir / ".validation_approved"
try:
approval_file.write_text(datetime.now().isoformat())
logger.info(f"Study approved: {self.study_dir.name}")
return True
except Exception as e:
logger.error(f"Failed to approve: {e}")
return False
def is_approved(self) -> bool:
"""Check if study has been approved."""
approval_file = self.study_dir / ".validation_approved"
return approval_file.exists()
def save_result(self, result: ValidationResult) -> Path:
"""Save validation result to file."""
output_path = self.study_dir / "validation_result.json"
with open(output_path, "w") as f:
json.dump(result.to_dict(), f, indent=2)
return output_path
def validate_study(
study_dir: Path,
run_test_trials: bool = True,
n_test_trials: int = 3,
) -> ValidationResult:
"""
Convenience function to validate a study.
Args:
study_dir: Path to study directory
run_test_trials: Whether to run test FEA solves
n_test_trials: Number of test trials
Returns:
ValidationResult
"""
gate = ValidationGate(study_dir)
return gate.validate(run_test_trials=run_test_trials, n_test_trials=n_test_trials)