Files
Atomizer/optimization_engine/intake/processor.py
Anto01 a26914bbe8 feat: Add Studio UI, intake system, and extractor improvements
Dashboard:
- Add Studio page with drag-drop model upload and Claude chat
- Add intake system for study creation workflow
- Improve session manager and context builder
- Add intake API routes and frontend components

Optimization Engine:
- Add CLI module for command-line operations
- Add intake module for study preprocessing
- Add validation module with gate checks
- Improve Zernike extractor documentation
- Update spec models with better validation
- Enhance solve_simulation robustness

Documentation:
- Add ATOMIZER_STUDIO.md planning doc
- Add ATOMIZER_UX_SYSTEM.md for UX patterns
- Update extractor library docs
- Add study-readme-generator skill

Tools:
- Add test scripts for extraction validation
- Add Zernike recentering test

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-27 12:02:30 -05:00

790 lines
29 KiB
Python

"""
Intake Processor
================
Processes intake folders to create study context:
1. Validates folder structure
2. Copies model files to study directory
3. Parses intake.yaml pre-configuration
4. Extracts text from context files (goals.md, PDFs)
5. Runs model introspection
6. Optionally runs baseline solve
7. Assembles complete StudyContext
Usage:
from optimization_engine.intake import IntakeProcessor
processor = IntakeProcessor(Path("studies/_inbox/my_project"))
context = processor.process(run_baseline=True)
"""
from __future__ import annotations
import logging
import shutil
import re
from datetime import datetime
from pathlib import Path
from typing import Optional, List, Callable, Dict, Any
from .config import IntakeConfig, DesignVariableConfig
from .context import (
StudyContext,
IntrospectionData,
ExpressionInfo,
SolutionInfo,
BaselineResult,
DVSuggestion,
ObjectiveSuggestion,
ConstraintSuggestion,
ConfidenceLevel,
)
logger = logging.getLogger(__name__)
class IntakeError(Exception):
"""Error during intake processing."""
pass
class IntakeProcessor:
"""
Processes an intake folder to create a complete StudyContext.
The processor handles:
- File discovery and validation
- Model file copying
- Configuration parsing
- Context file extraction
- Model introspection (via NX journals)
- Baseline solve (optional)
- Suggestion generation
"""
def __init__(
self,
inbox_folder: Path,
studies_dir: Optional[Path] = None,
progress_callback: Optional[Callable[[str, float], None]] = None,
):
"""
Initialize the intake processor.
Args:
inbox_folder: Path to the intake folder (in _inbox/)
studies_dir: Base studies directory (default: auto-detect)
progress_callback: Optional callback for progress updates (message, percent)
"""
self.inbox_folder = Path(inbox_folder)
self.progress_callback = progress_callback or (lambda m, p: None)
# Validate inbox folder exists
if not self.inbox_folder.exists():
raise IntakeError(f"Inbox folder not found: {self.inbox_folder}")
# Determine study name from folder name
self.study_name = self.inbox_folder.name
if self.study_name.startswith("_"):
# Strip leading underscore (used for examples)
self.study_name = self.study_name[1:]
# Set studies directory
if studies_dir is None:
# Find project root
current = Path(__file__).parent
while current != current.parent:
if (current / "CLAUDE.md").exists():
studies_dir = current / "studies"
break
current = current.parent
else:
studies_dir = Path.cwd() / "studies"
self.studies_dir = Path(studies_dir)
self.study_dir = self.studies_dir / self.study_name
# Initialize context
self.context = StudyContext(
study_name=self.study_name,
source_folder=self.inbox_folder,
)
def process(
self,
run_baseline: bool = True,
copy_files: bool = True,
run_introspection: bool = True,
) -> StudyContext:
"""
Process the intake folder and create StudyContext.
Args:
run_baseline: Run a baseline FEA solve to get actual values
copy_files: Copy model files to study directory
run_introspection: Run NX model introspection
Returns:
Complete StudyContext ready for interview or canvas
"""
logger.info(f"Processing intake: {self.inbox_folder}")
try:
# Step 1: Discover files
self._progress("Discovering files...", 0.0)
self._discover_files()
# Step 2: Parse intake.yaml
self._progress("Parsing configuration...", 0.1)
self._parse_config()
# Step 3: Extract context files
self._progress("Extracting context...", 0.2)
self._extract_context_files()
# Step 4: Copy model files
if copy_files:
self._progress("Copying model files...", 0.3)
self._copy_model_files()
# Step 5: Run introspection
if run_introspection:
self._progress("Introspecting model...", 0.4)
self._run_introspection()
# Step 6: Run baseline solve
if run_baseline and self.context.sim_file:
self._progress("Running baseline solve...", 0.6)
self._run_baseline_solve()
# Step 7: Generate suggestions
self._progress("Generating suggestions...", 0.8)
self._generate_suggestions()
# Step 8: Save context
self._progress("Saving context...", 0.9)
self._save_context()
self._progress("Complete!", 1.0)
except Exception as e:
self.context.errors.append(str(e))
logger.error(f"Intake processing failed: {e}")
raise
return self.context
def _progress(self, message: str, percent: float) -> None:
"""Report progress."""
logger.info(f"[{percent * 100:.0f}%] {message}")
self.progress_callback(message, percent)
def _discover_files(self) -> None:
"""Discover model and context files in the inbox folder."""
# Look for model files
models_dir = self.inbox_folder / "models"
if models_dir.exists():
search_dir = models_dir
else:
# Fall back to root folder
search_dir = self.inbox_folder
# Find simulation file (required)
sim_files = list(search_dir.glob("*.sim"))
if sim_files:
self.context.sim_file = sim_files[0]
logger.info(f"Found sim file: {self.context.sim_file.name}")
else:
self.context.warnings.append("No .sim file found in models/")
# Find FEM file
fem_files = list(search_dir.glob("*.fem"))
if fem_files:
self.context.fem_file = fem_files[0]
logger.info(f"Found fem file: {self.context.fem_file.name}")
# Find part file
prt_files = [f for f in search_dir.glob("*.prt") if "_i.prt" not in f.name.lower()]
if prt_files:
self.context.prt_file = prt_files[0]
logger.info(f"Found prt file: {self.context.prt_file.name}")
# Find idealized part (CRITICAL!)
idealized_files = list(search_dir.glob("*_i.prt")) + list(search_dir.glob("*_I.prt"))
if idealized_files:
self.context.idealized_prt_file = idealized_files[0]
logger.info(f"Found idealized prt: {self.context.idealized_prt_file.name}")
else:
self.context.warnings.append(
"No idealized part (*_i.prt) found - mesh may not update during optimization!"
)
def _parse_config(self) -> None:
"""Parse intake.yaml if present."""
config_path = self.inbox_folder / "intake.yaml"
if config_path.exists():
try:
self.context.preconfig = IntakeConfig.from_yaml(config_path)
logger.info("Loaded intake.yaml configuration")
# Update study name if specified
if self.context.preconfig.study and self.context.preconfig.study.name:
self.context.study_name = self.context.preconfig.study.name
self.study_name = self.context.study_name
self.study_dir = self.studies_dir / self.study_name
except Exception as e:
self.context.warnings.append(f"Failed to parse intake.yaml: {e}")
logger.warning(f"Failed to parse intake.yaml: {e}")
else:
logger.info("No intake.yaml found, will use interview mode")
def _extract_context_files(self) -> None:
"""Extract text from context files."""
context_dir = self.inbox_folder / "context"
# Read goals.md
goals_path = context_dir / "goals.md"
if goals_path.exists():
self.context.goals_text = goals_path.read_text(encoding="utf-8")
logger.info("Loaded goals.md")
# Read constraints.txt
constraints_path = context_dir / "constraints.txt"
if constraints_path.exists():
self.context.constraints_text = constraints_path.read_text(encoding="utf-8")
logger.info("Loaded constraints.txt")
# Read any .txt or .md files in context/
if context_dir.exists():
for txt_file in context_dir.glob("*.txt"):
if txt_file.name != "constraints.txt":
content = txt_file.read_text(encoding="utf-8")
if self.context.notes_text:
self.context.notes_text += f"\n\n--- {txt_file.name} ---\n{content}"
else:
self.context.notes_text = content
# Extract PDF text (basic implementation)
# TODO: Add PyMuPDF and Claude Vision integration
for pdf_path in context_dir.glob("*.pdf") if context_dir.exists() else []:
try:
text = self._extract_pdf_text(pdf_path)
if text:
self.context.requirements_text = text
logger.info(f"Extracted text from {pdf_path.name}")
except Exception as e:
self.context.warnings.append(f"Failed to extract PDF {pdf_path.name}: {e}")
def _extract_pdf_text(self, pdf_path: Path) -> Optional[str]:
"""Extract text from PDF using PyMuPDF if available."""
try:
import fitz # PyMuPDF
doc = fitz.open(pdf_path)
text_parts = []
for page in doc:
text_parts.append(page.get_text())
doc.close()
return "\n".join(text_parts)
except ImportError:
logger.warning("PyMuPDF not installed, skipping PDF extraction")
return None
except Exception as e:
logger.warning(f"PDF extraction failed: {e}")
return None
def _copy_model_files(self) -> None:
"""Copy model files to study directory."""
# Create study directory structure
model_dir = self.study_dir / "1_model"
model_dir.mkdir(parents=True, exist_ok=True)
(self.study_dir / "2_iterations").mkdir(exist_ok=True)
(self.study_dir / "3_results").mkdir(exist_ok=True)
# Copy files
files_to_copy = [
self.context.sim_file,
self.context.fem_file,
self.context.prt_file,
self.context.idealized_prt_file,
]
for src in files_to_copy:
if src and src.exists():
dst = model_dir / src.name
if not dst.exists():
shutil.copy2(src, dst)
logger.info(f"Copied: {src.name}")
else:
logger.info(f"Already exists: {src.name}")
# Update paths to point to copied files
if self.context.sim_file:
self.context.sim_file = model_dir / self.context.sim_file.name
if self.context.fem_file:
self.context.fem_file = model_dir / self.context.fem_file.name
if self.context.prt_file:
self.context.prt_file = model_dir / self.context.prt_file.name
if self.context.idealized_prt_file:
self.context.idealized_prt_file = model_dir / self.context.idealized_prt_file.name
def _run_introspection(self) -> None:
"""Run NX model introspection."""
if not self.context.sim_file or not self.context.sim_file.exists():
self.context.warnings.append("Cannot introspect - no sim file")
return
introspection = IntrospectionData(timestamp=datetime.now())
try:
# Try to use existing introspection modules
from optimization_engine.extractors.introspect_part import introspect_part_expressions
# Introspect part for expressions
if self.context.prt_file and self.context.prt_file.exists():
expressions = introspect_part_expressions(str(self.context.prt_file))
for expr in expressions:
is_candidate = self._is_design_candidate(expr["name"], expr.get("value"))
introspection.expressions.append(
ExpressionInfo(
name=expr["name"],
value=expr.get("value"),
units=expr.get("units"),
formula=expr.get("formula"),
type=expr.get("type", "Number"),
is_design_candidate=is_candidate,
confidence=ConfidenceLevel.HIGH
if is_candidate
else ConfidenceLevel.MEDIUM,
)
)
introspection.success = True
logger.info(f"Introspected {len(introspection.expressions)} expressions")
except ImportError:
logger.warning("Introspection module not available, using fallback")
introspection.success = False
introspection.error = "Introspection module not available"
except Exception as e:
logger.error(f"Introspection failed: {e}")
introspection.success = False
introspection.error = str(e)
self.context.introspection = introspection
def _is_design_candidate(self, name: str, value: Optional[float]) -> bool:
"""Check if an expression looks like a design variable candidate."""
# Skip if no value or non-numeric
if value is None:
return False
# Skip system/reference expressions
if name.startswith("p") and name[1:].isdigit():
return False
# Skip mass-related outputs (not inputs)
if "mass" in name.lower() and "input" not in name.lower():
return False
# Look for typical design parameter names
design_keywords = [
"thickness",
"width",
"height",
"length",
"radius",
"diameter",
"angle",
"offset",
"depth",
"size",
"span",
"pitch",
"gap",
"rib",
"flange",
"web",
"wall",
"fillet",
"chamfer",
]
name_lower = name.lower()
return any(kw in name_lower for kw in design_keywords)
def _run_baseline_solve(self) -> None:
"""Run baseline FEA solve to get actual values."""
if not self.context.introspection:
self.context.introspection = IntrospectionData(timestamp=datetime.now())
baseline = BaselineResult()
try:
from optimization_engine.nx.solver import NXSolver
solver = NXSolver()
model_dir = self.context.sim_file.parent
result = solver.run_simulation(
sim_file=self.context.sim_file,
working_dir=model_dir,
expression_updates={}, # No updates for baseline
cleanup=True,
)
if result["success"]:
baseline.success = True
baseline.solve_time_seconds = result.get("solve_time", 0)
# Extract results from OP2
op2_file = result.get("op2_file")
if op2_file and Path(op2_file).exists():
self._extract_baseline_results(baseline, Path(op2_file), model_dir)
logger.info(f"Baseline solve complete: {baseline.get_summary()}")
else:
baseline.success = False
baseline.error = result.get("error", "Unknown error")
logger.warning(f"Baseline solve failed: {baseline.error}")
except ImportError:
logger.warning("NXSolver not available, skipping baseline")
baseline.success = False
baseline.error = "NXSolver not available"
except Exception as e:
logger.error(f"Baseline solve failed: {e}")
baseline.success = False
baseline.error = str(e)
self.context.introspection.baseline = baseline
def _extract_baseline_results(
self, baseline: BaselineResult, op2_file: Path, model_dir: Path
) -> None:
"""Extract results from OP2 file."""
try:
# Try to extract displacement
from optimization_engine.extractors.extract_displacement import extract_displacement
disp_result = extract_displacement(op2_file, subcase=1)
baseline.max_displacement_mm = disp_result.get("max_displacement")
except Exception as e:
logger.debug(f"Displacement extraction failed: {e}")
try:
# Try to extract stress
from optimization_engine.extractors.extract_von_mises_stress import extract_solid_stress
stress_result = extract_solid_stress(op2_file, subcase=1)
baseline.max_stress_mpa = stress_result.get("max_von_mises")
except Exception as e:
logger.debug(f"Stress extraction failed: {e}")
try:
# Try to extract mass from BDF
from optimization_engine.extractors.bdf_mass_extractor import extract_mass_from_bdf
dat_files = list(model_dir.glob("*.dat"))
if dat_files:
baseline.mass_kg = extract_mass_from_bdf(str(dat_files[0]))
except Exception as e:
logger.debug(f"Mass extraction failed: {e}")
def _generate_suggestions(self) -> None:
"""Generate intelligent suggestions based on all context."""
self._generate_dv_suggestions()
self._generate_objective_suggestions()
self._generate_constraint_suggestions()
self._query_lac()
def _generate_dv_suggestions(self) -> None:
"""Generate design variable suggestions."""
suggestions: Dict[str, DVSuggestion] = {}
# From introspection
if self.context.introspection:
for expr in self.context.introspection.get_design_candidates():
if expr.value is not None and isinstance(expr.value, (int, float)):
# Calculate suggested bounds (50% to 150% of current value)
if expr.value > 0:
bounds = (expr.value * 0.5, expr.value * 1.5)
else:
bounds = (expr.value * 1.5, expr.value * 0.5)
suggestions[expr.name] = DVSuggestion(
name=expr.name,
current_value=expr.value,
suggested_bounds=bounds,
units=expr.units,
confidence=expr.confidence,
reason=f"Numeric expression with value {expr.value}",
source="introspection",
)
# Override/add from preconfig
if self.context.preconfig and self.context.preconfig.design_variables:
for dv in self.context.preconfig.design_variables:
if dv.name in suggestions:
# Update existing suggestion
suggestions[dv.name].suggested_bounds = dv.bounds
suggestions[dv.name].units = dv.units or suggestions[dv.name].units
suggestions[dv.name].source = "preconfig"
suggestions[dv.name].confidence = ConfidenceLevel.HIGH
else:
# Add new suggestion
suggestions[dv.name] = DVSuggestion(
name=dv.name,
suggested_bounds=dv.bounds,
units=dv.units,
confidence=ConfidenceLevel.HIGH,
reason="Specified in intake.yaml",
source="preconfig",
)
self.context.suggested_dvs = list(suggestions.values())
logger.info(f"Generated {len(self.context.suggested_dvs)} DV suggestions")
def _generate_objective_suggestions(self) -> None:
"""Generate objective suggestions from context."""
suggestions = []
# From preconfig
if self.context.preconfig and self.context.preconfig.objectives:
obj = self.context.preconfig.objectives.primary
extractor = self._get_extractor_for_target(obj.target)
suggestions.append(
ObjectiveSuggestion(
name=obj.target,
goal=obj.goal,
extractor=extractor,
confidence=ConfidenceLevel.HIGH,
reason="Specified in intake.yaml",
source="preconfig",
)
)
# From goals text (simple keyword matching)
elif self.context.goals_text:
goals_lower = self.context.goals_text.lower()
if "minimize" in goals_lower and "mass" in goals_lower:
suggestions.append(
ObjectiveSuggestion(
name="mass",
goal="minimize",
extractor="extract_mass_from_bdf",
confidence=ConfidenceLevel.MEDIUM,
reason="Found 'minimize mass' in goals",
source="goals",
)
)
elif "minimize" in goals_lower and "weight" in goals_lower:
suggestions.append(
ObjectiveSuggestion(
name="mass",
goal="minimize",
extractor="extract_mass_from_bdf",
confidence=ConfidenceLevel.MEDIUM,
reason="Found 'minimize weight' in goals",
source="goals",
)
)
if "maximize" in goals_lower and "stiffness" in goals_lower:
suggestions.append(
ObjectiveSuggestion(
name="stiffness",
goal="maximize",
extractor="extract_displacement", # Inverse of displacement
confidence=ConfidenceLevel.MEDIUM,
reason="Found 'maximize stiffness' in goals",
source="goals",
)
)
self.context.suggested_objectives = suggestions
def _generate_constraint_suggestions(self) -> None:
"""Generate constraint suggestions from context."""
suggestions = []
# From preconfig
if self.context.preconfig and self.context.preconfig.constraints:
for const in self.context.preconfig.constraints:
suggestions.append(
ConstraintSuggestion(
name=const.type,
type="less_than" if "max" in const.type else "greater_than",
suggested_threshold=const.threshold,
units=const.units,
confidence=ConfidenceLevel.HIGH,
reason="Specified in intake.yaml",
source="preconfig",
)
)
# From requirements text
if self.context.requirements_text:
# Simple pattern matching for constraints
text = self.context.requirements_text
# Look for stress limits
stress_pattern = r"(?:max(?:imum)?|stress)\s*[:<]?\s*(\d+(?:\.\d+)?)\s*(?:MPa|mpa)"
matches = re.findall(stress_pattern, text, re.IGNORECASE)
if matches:
suggestions.append(
ConstraintSuggestion(
name="max_stress",
type="less_than",
suggested_threshold=float(matches[0]),
units="MPa",
confidence=ConfidenceLevel.MEDIUM,
reason=f"Found stress limit in requirements: {matches[0]} MPa",
source="requirements",
)
)
# Look for displacement limits
disp_pattern = (
r"(?:max(?:imum)?|displacement|deflection)\s*[:<]?\s*(\d+(?:\.\d+)?)\s*(?:mm|MM)"
)
matches = re.findall(disp_pattern, text, re.IGNORECASE)
if matches:
suggestions.append(
ConstraintSuggestion(
name="max_displacement",
type="less_than",
suggested_threshold=float(matches[0]),
units="mm",
confidence=ConfidenceLevel.MEDIUM,
reason=f"Found displacement limit in requirements: {matches[0]} mm",
source="requirements",
)
)
self.context.suggested_constraints = suggestions
def _get_extractor_for_target(self, target: str) -> str:
"""Map optimization target to extractor function."""
extractors = {
"mass": "extract_mass_from_bdf",
"displacement": "extract_displacement",
"stress": "extract_solid_stress",
"frequency": "extract_frequency",
"stiffness": "extract_displacement", # Inverse
"strain_energy": "extract_strain_energy",
}
return extractors.get(target.lower(), f"extract_{target}")
def _query_lac(self) -> None:
"""Query Learning Atomizer Core for similar studies."""
try:
from knowledge_base.lac import get_lac
lac = get_lac()
# Build query from context
query_parts = [self.study_name]
if self.context.goals_text:
query_parts.append(self.context.goals_text[:200])
query = " ".join(query_parts)
# Get similar studies
similar = lac.query_similar_optimizations(query)
# Get method recommendation
n_objectives = 1
if self.context.preconfig and self.context.preconfig.objectives:
n_objectives = len(self.context.preconfig.objectives.all_objectives)
recommendation = lac.get_best_method_for(
geometry_type="unknown", n_objectives=n_objectives
)
if recommendation:
self.context.recommended_method = recommendation.get("method")
logger.info(f"LAC query complete: {len(similar)} similar studies found")
except ImportError:
logger.debug("LAC not available")
except Exception as e:
logger.debug(f"LAC query failed: {e}")
def _save_context(self) -> None:
"""Save assembled context to study directory."""
# Ensure study directory exists
self.study_dir.mkdir(parents=True, exist_ok=True)
# Save context JSON
context_path = self.study_dir / "0_intake" / "study_context.json"
context_path.parent.mkdir(exist_ok=True)
self.context.save(context_path)
# Save introspection report
if self.context.introspection:
introspection_path = self.study_dir / "0_intake" / "introspection.json"
import json
with open(introspection_path, "w") as f:
json.dump(self.context.introspection.to_dict(), f, indent=2)
# Copy original context files
intake_dir = self.study_dir / "0_intake" / "original_context"
intake_dir.mkdir(parents=True, exist_ok=True)
context_source = self.inbox_folder / "context"
if context_source.exists():
for f in context_source.iterdir():
if f.is_file():
shutil.copy2(f, intake_dir / f.name)
# Copy intake.yaml
intake_yaml = self.inbox_folder / "intake.yaml"
if intake_yaml.exists():
shutil.copy2(intake_yaml, self.study_dir / "0_intake" / "intake.yaml")
logger.info(f"Saved context to {self.study_dir / '0_intake'}")
def process_intake(
inbox_folder: Path,
run_baseline: bool = True,
progress_callback: Optional[Callable[[str, float], None]] = None,
) -> StudyContext:
"""
Convenience function to process an intake folder.
Args:
inbox_folder: Path to inbox folder
run_baseline: Run baseline solve
progress_callback: Optional progress callback
Returns:
Complete StudyContext
"""
processor = IntakeProcessor(inbox_folder, progress_callback=progress_callback)
return processor.process(run_baseline=run_baseline)