feat: Add Studio UI, intake system, and extractor improvements

Dashboard:
- Add Studio page with drag-drop model upload and Claude chat
- Add intake system for study creation workflow
- Improve session manager and context builder
- Add intake API routes and frontend components

Optimization Engine:
- Add CLI module for command-line operations
- Add intake module for study preprocessing
- Add validation module with gate checks
- Improve Zernike extractor documentation
- Update spec models with better validation
- Enhance solve_simulation robustness

Documentation:
- Add ATOMIZER_STUDIO.md planning doc
- Add ATOMIZER_UX_SYSTEM.md for UX patterns
- Update extractor library docs
- Add study-readme-generator skill

Tools:
- Add test scripts for extraction validation
- Add Zernike recentering test

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-27 12:02:30 -05:00
parent 3193831340
commit a26914bbe8
56 changed files with 14173 additions and 646 deletions

View File

@@ -0,0 +1,46 @@
"""
Atomizer Intake System
======================
Provides structured intake processing for optimization studies.
Components:
- IntakeConfig: Pydantic schema for intake.yaml
- StudyContext: Complete assembled context for study creation
- IntakeProcessor: File handling and processing
- ContextAssembler: Combines all context sources
Usage:
from optimization_engine.intake import IntakeProcessor, IntakeConfig
processor = IntakeProcessor(inbox_folder)
context = processor.process()
"""
from .config import (
IntakeConfig,
StudyConfig,
ObjectiveConfig,
ConstraintConfig,
DesignVariableConfig,
BudgetConfig,
AlgorithmConfig,
MaterialConfig,
)
from .context import StudyContext, IntrospectionData, BaselineResult
from .processor import IntakeProcessor
__all__ = [
"IntakeConfig",
"StudyConfig",
"ObjectiveConfig",
"ConstraintConfig",
"DesignVariableConfig",
"BudgetConfig",
"AlgorithmConfig",
"MaterialConfig",
"StudyContext",
"IntrospectionData",
"BaselineResult",
"IntakeProcessor",
]

View File

@@ -0,0 +1,371 @@
"""
Intake Configuration Schema
===========================
Pydantic models for intake.yaml configuration files.
These models define the structure of pre-configuration that users can
provide to skip interview questions and speed up study setup.
"""
from __future__ import annotations
from pathlib import Path
from typing import Optional, List, Literal, Union, Any, Dict
from pydantic import BaseModel, Field, field_validator, model_validator
import yaml
class ObjectiveConfig(BaseModel):
"""Configuration for an optimization objective."""
goal: Literal["minimize", "maximize"]
target: str = Field(
description="What to optimize: mass, displacement, stress, frequency, stiffness, or custom name"
)
weight: float = Field(default=1.0, ge=0.0, le=10.0)
extractor: Optional[str] = Field(
default=None, description="Custom extractor function name (auto-detected if not specified)"
)
@field_validator("target")
@classmethod
def validate_target(cls, v: str) -> str:
"""Normalize target names."""
known_targets = {
"mass",
"weight",
"displacement",
"deflection",
"stress",
"frequency",
"stiffness",
"strain_energy",
"volume",
}
normalized = v.lower().strip()
# Map common aliases
aliases = {
"weight": "mass",
"deflection": "displacement",
}
return aliases.get(normalized, normalized)
class ConstraintConfig(BaseModel):
"""Configuration for an optimization constraint."""
type: str = Field(
description="Constraint type: max_stress, max_displacement, min_frequency, etc."
)
threshold: float
units: str = ""
description: Optional[str] = None
@field_validator("type")
@classmethod
def normalize_type(cls, v: str) -> str:
"""Normalize constraint type names."""
return v.lower().strip().replace(" ", "_")
class DesignVariableConfig(BaseModel):
"""Configuration for a design variable."""
name: str = Field(description="NX expression name")
bounds: tuple[float, float] = Field(description="(min, max) bounds")
units: Optional[str] = None
description: Optional[str] = None
step: Optional[float] = Field(default=None, description="Step size for discrete variables")
@field_validator("bounds")
@classmethod
def validate_bounds(cls, v: tuple[float, float]) -> tuple[float, float]:
"""Ensure bounds are valid."""
if len(v) != 2:
raise ValueError("Bounds must be a tuple of (min, max)")
if v[0] >= v[1]:
raise ValueError(f"Lower bound ({v[0]}) must be less than upper bound ({v[1]})")
return v
@property
def range(self) -> float:
"""Get the range of the design variable."""
return self.bounds[1] - self.bounds[0]
@property
def range_ratio(self) -> float:
"""Get the ratio of upper to lower bound."""
if self.bounds[0] == 0:
return float("inf")
return self.bounds[1] / self.bounds[0]
class BudgetConfig(BaseModel):
"""Configuration for optimization budget."""
max_trials: int = Field(default=100, ge=1, le=10000)
timeout_per_trial: int = Field(default=300, ge=10, le=7200, description="Seconds per FEA solve")
target_runtime: Optional[str] = Field(
default=None, description="Target total runtime (e.g., '2h', '30m')"
)
def get_target_runtime_seconds(self) -> Optional[int]:
"""Parse target_runtime string to seconds."""
if not self.target_runtime:
return None
runtime = self.target_runtime.lower().strip()
if runtime.endswith("h"):
return int(float(runtime[:-1]) * 3600)
elif runtime.endswith("m"):
return int(float(runtime[:-1]) * 60)
elif runtime.endswith("s"):
return int(float(runtime[:-1]))
else:
# Assume seconds
return int(float(runtime))
class AlgorithmConfig(BaseModel):
"""Configuration for optimization algorithm."""
method: Literal["auto", "TPE", "CMA-ES", "NSGA-II", "random"] = "auto"
neural_acceleration: bool = Field(
default=False, description="Enable surrogate model for speedup"
)
priority: Literal["speed", "accuracy", "balanced"] = "balanced"
seed: Optional[int] = Field(default=None, description="Random seed for reproducibility")
class MaterialConfig(BaseModel):
"""Configuration for material properties."""
name: str
yield_stress: Optional[float] = Field(default=None, ge=0, description="Yield stress in MPa")
ultimate_stress: Optional[float] = Field(
default=None, ge=0, description="Ultimate stress in MPa"
)
density: Optional[float] = Field(default=None, ge=0, description="Density in kg/m3")
youngs_modulus: Optional[float] = Field(
default=None, ge=0, description="Young's modulus in GPa"
)
poissons_ratio: Optional[float] = Field(
default=None, ge=0, le=0.5, description="Poisson's ratio"
)
class ObjectivesConfig(BaseModel):
"""Configuration for all objectives."""
primary: ObjectiveConfig
secondary: Optional[List[ObjectiveConfig]] = None
@property
def is_multi_objective(self) -> bool:
"""Check if this is a multi-objective problem."""
return self.secondary is not None and len(self.secondary) > 0
@property
def all_objectives(self) -> List[ObjectiveConfig]:
"""Get all objectives as a flat list."""
objectives = [self.primary]
if self.secondary:
objectives.extend(self.secondary)
return objectives
class StudyConfig(BaseModel):
"""Configuration for study metadata."""
name: Optional[str] = Field(
default=None, description="Study name (auto-generated from folder if omitted)"
)
type: Literal["single_objective", "multi_objective"] = "single_objective"
description: Optional[str] = None
tags: Optional[List[str]] = None
class IntakeConfig(BaseModel):
"""
Complete intake.yaml configuration schema.
All fields are optional - anything not specified will be asked
in the interview or auto-detected from introspection.
"""
study: Optional[StudyConfig] = None
objectives: Optional[ObjectivesConfig] = None
constraints: Optional[List[ConstraintConfig]] = None
design_variables: Optional[List[DesignVariableConfig]] = None
budget: Optional[BudgetConfig] = None
algorithm: Optional[AlgorithmConfig] = None
material: Optional[MaterialConfig] = None
notes: Optional[str] = None
@classmethod
def from_yaml(cls, yaml_path: Union[str, Path]) -> "IntakeConfig":
"""Load configuration from a YAML file."""
yaml_path = Path(yaml_path)
if not yaml_path.exists():
raise FileNotFoundError(f"Intake config not found: {yaml_path}")
with open(yaml_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
if data is None:
return cls()
return cls.model_validate(data)
@classmethod
def from_yaml_safe(cls, yaml_path: Union[str, Path]) -> Optional["IntakeConfig"]:
"""Load configuration from YAML, returning None if file doesn't exist."""
yaml_path = Path(yaml_path)
if not yaml_path.exists():
return None
try:
return cls.from_yaml(yaml_path)
except Exception:
return None
def to_yaml(self, yaml_path: Union[str, Path]) -> None:
"""Save configuration to a YAML file."""
yaml_path = Path(yaml_path)
data = self.model_dump(exclude_none=True)
with open(yaml_path, "w", encoding="utf-8") as f:
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
def get_value(self, key: str) -> Optional[Any]:
"""
Get a configuration value by dot-notation key.
Examples:
config.get_value("study.name")
config.get_value("budget.max_trials")
config.get_value("objectives.primary.goal")
"""
parts = key.split(".")
value: Any = self
for part in parts:
if value is None:
return None
if hasattr(value, part):
value = getattr(value, part)
elif isinstance(value, dict):
value = value.get(part)
else:
return None
return value
def is_complete(self) -> bool:
"""Check if all required configuration is provided."""
return (
self.objectives is not None
and self.design_variables is not None
and len(self.design_variables) > 0
)
def get_missing_fields(self) -> List[str]:
"""Get list of fields that still need to be configured."""
missing = []
if self.objectives is None:
missing.append("objectives")
if self.design_variables is None or len(self.design_variables) == 0:
missing.append("design_variables")
if self.constraints is None:
missing.append("constraints (recommended)")
if self.budget is None:
missing.append("budget")
return missing
@model_validator(mode="after")
def validate_consistency(self) -> "IntakeConfig":
"""Validate consistency between configuration sections."""
# Check study type matches objectives
if self.study and self.objectives:
is_multi = self.objectives.is_multi_objective
declared_multi = self.study.type == "multi_objective"
if is_multi and not declared_multi:
# Auto-correct study type
self.study.type = "multi_objective"
return self
# Common material presets
MATERIAL_PRESETS: Dict[str, MaterialConfig] = {
"aluminum_6061_t6": MaterialConfig(
name="Aluminum 6061-T6",
yield_stress=276,
ultimate_stress=310,
density=2700,
youngs_modulus=68.9,
poissons_ratio=0.33,
),
"aluminum_7075_t6": MaterialConfig(
name="Aluminum 7075-T6",
yield_stress=503,
ultimate_stress=572,
density=2810,
youngs_modulus=71.7,
poissons_ratio=0.33,
),
"steel_a36": MaterialConfig(
name="Steel A36",
yield_stress=250,
ultimate_stress=400,
density=7850,
youngs_modulus=200,
poissons_ratio=0.26,
),
"stainless_304": MaterialConfig(
name="Stainless Steel 304",
yield_stress=215,
ultimate_stress=505,
density=8000,
youngs_modulus=193,
poissons_ratio=0.29,
),
"titanium_6al4v": MaterialConfig(
name="Titanium Ti-6Al-4V",
yield_stress=880,
ultimate_stress=950,
density=4430,
youngs_modulus=113.8,
poissons_ratio=0.342,
),
}
def get_material_preset(name: str) -> Optional[MaterialConfig]:
"""
Get a material preset by name (fuzzy matching).
Examples:
get_material_preset("6061") # Returns aluminum_6061_t6
get_material_preset("steel") # Returns steel_a36
"""
name_lower = name.lower().replace("-", "_").replace(" ", "_")
# Direct match
if name_lower in MATERIAL_PRESETS:
return MATERIAL_PRESETS[name_lower]
# Partial match
for key, material in MATERIAL_PRESETS.items():
if name_lower in key or name_lower in material.name.lower():
return material
return None

View File

@@ -0,0 +1,540 @@
"""
Study Context
=============
Complete assembled context for study creation, combining:
- Model introspection results
- Context files (goals.md, PDFs, images)
- Pre-configuration (intake.yaml)
- LAC memory (similar studies, recommendations)
This context object is used by both Interview Mode and Canvas Mode
to provide intelligent suggestions and pre-filled values.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Optional, List, Dict, Any
from enum import Enum
import json
class ConfidenceLevel(str, Enum):
"""Confidence level for suggestions."""
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
@dataclass
class ExpressionInfo:
"""Information about an NX expression."""
name: str
value: Optional[float] = None
units: Optional[str] = None
formula: Optional[str] = None
type: str = "Number"
is_design_candidate: bool = False
confidence: ConfidenceLevel = ConfidenceLevel.MEDIUM
reason: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
"name": self.name,
"value": self.value,
"units": self.units,
"formula": self.formula,
"type": self.type,
"is_design_candidate": self.is_design_candidate,
"confidence": self.confidence.value,
"reason": self.reason,
}
@dataclass
class SolutionInfo:
"""Information about an NX solution."""
name: str
type: str # SOL 101, SOL 103, etc.
description: Optional[str] = None
@dataclass
class BoundaryConditionInfo:
"""Information about a boundary condition."""
name: str
type: str # Fixed, Pinned, etc.
location: Optional[str] = None
@dataclass
class LoadInfo:
"""Information about a load."""
name: str
type: str # Force, Pressure, etc.
magnitude: Optional[float] = None
units: Optional[str] = None
location: Optional[str] = None
@dataclass
class MaterialInfo:
"""Information about a material in the model."""
name: str
yield_stress: Optional[float] = None
density: Optional[float] = None
youngs_modulus: Optional[float] = None
@dataclass
class MeshInfo:
"""Information about the mesh."""
element_count: int = 0
node_count: int = 0
element_types: List[str] = field(default_factory=list)
quality_metrics: Dict[str, float] = field(default_factory=dict)
@dataclass
class BaselineResult:
"""Results from baseline solve."""
mass_kg: Optional[float] = None
max_displacement_mm: Optional[float] = None
max_stress_mpa: Optional[float] = None
max_strain: Optional[float] = None
first_frequency_hz: Optional[float] = None
strain_energy_j: Optional[float] = None
solve_time_seconds: Optional[float] = None
success: bool = False
error: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
"mass_kg": self.mass_kg,
"max_displacement_mm": self.max_displacement_mm,
"max_stress_mpa": self.max_stress_mpa,
"max_strain": self.max_strain,
"first_frequency_hz": self.first_frequency_hz,
"strain_energy_j": self.strain_energy_j,
"solve_time_seconds": self.solve_time_seconds,
"success": self.success,
"error": self.error,
}
def get_summary(self) -> str:
"""Get a human-readable summary of baseline results."""
if not self.success:
return f"Baseline solve failed: {self.error or 'Unknown error'}"
parts = []
if self.mass_kg is not None:
parts.append(f"mass={self.mass_kg:.2f}kg")
if self.max_displacement_mm is not None:
parts.append(f"disp={self.max_displacement_mm:.3f}mm")
if self.max_stress_mpa is not None:
parts.append(f"stress={self.max_stress_mpa:.1f}MPa")
if self.first_frequency_hz is not None:
parts.append(f"freq={self.first_frequency_hz:.1f}Hz")
return ", ".join(parts) if parts else "No results"
@dataclass
class IntrospectionData:
"""Complete introspection results from NX model."""
success: bool = False
timestamp: Optional[datetime] = None
error: Optional[str] = None
# Part information
expressions: List[ExpressionInfo] = field(default_factory=list)
bodies: List[Dict[str, Any]] = field(default_factory=list)
# Simulation information
solutions: List[SolutionInfo] = field(default_factory=list)
boundary_conditions: List[BoundaryConditionInfo] = field(default_factory=list)
loads: List[LoadInfo] = field(default_factory=list)
materials: List[MaterialInfo] = field(default_factory=list)
mesh_info: Optional[MeshInfo] = None
# Available result types (from OP2)
available_results: Dict[str, bool] = field(default_factory=dict)
subcases: List[int] = field(default_factory=list)
# Baseline solve
baseline: Optional[BaselineResult] = None
def get_expression_names(self) -> List[str]:
"""Get list of all expression names."""
return [e.name for e in self.expressions]
def get_design_candidates(self) -> List[ExpressionInfo]:
"""Get expressions that look like design variables."""
return [e for e in self.expressions if e.is_design_candidate]
def get_expression(self, name: str) -> Optional[ExpressionInfo]:
"""Get expression by name."""
for expr in self.expressions:
if expr.name == name:
return expr
return None
def get_solver_type(self) -> Optional[str]:
"""Get the primary solver type (SOL 101, etc.)."""
if self.solutions:
return self.solutions[0].type
return None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return {
"success": self.success,
"timestamp": self.timestamp.isoformat() if self.timestamp else None,
"error": self.error,
"expressions": [e.to_dict() for e in self.expressions],
"solutions": [{"name": s.name, "type": s.type} for s in self.solutions],
"boundary_conditions": [
{"name": bc.name, "type": bc.type} for bc in self.boundary_conditions
],
"loads": [
{"name": l.name, "type": l.type, "magnitude": l.magnitude} for l in self.loads
],
"materials": [{"name": m.name, "yield_stress": m.yield_stress} for m in self.materials],
"available_results": self.available_results,
"subcases": self.subcases,
"baseline": self.baseline.to_dict() if self.baseline else None,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "IntrospectionData":
"""Create from dictionary."""
introspection = cls(
success=data.get("success", False),
error=data.get("error"),
)
if data.get("timestamp"):
introspection.timestamp = datetime.fromisoformat(data["timestamp"])
# Parse expressions
for expr_data in data.get("expressions", []):
introspection.expressions.append(
ExpressionInfo(
name=expr_data["name"],
value=expr_data.get("value"),
units=expr_data.get("units"),
formula=expr_data.get("formula"),
type=expr_data.get("type", "Number"),
is_design_candidate=expr_data.get("is_design_candidate", False),
confidence=ConfidenceLevel(expr_data.get("confidence", "medium")),
)
)
# Parse solutions
for sol_data in data.get("solutions", []):
introspection.solutions.append(
SolutionInfo(
name=sol_data["name"],
type=sol_data["type"],
)
)
introspection.available_results = data.get("available_results", {})
introspection.subcases = data.get("subcases", [])
# Parse baseline
if data.get("baseline"):
baseline_data = data["baseline"]
introspection.baseline = BaselineResult(
mass_kg=baseline_data.get("mass_kg"),
max_displacement_mm=baseline_data.get("max_displacement_mm"),
max_stress_mpa=baseline_data.get("max_stress_mpa"),
solve_time_seconds=baseline_data.get("solve_time_seconds"),
success=baseline_data.get("success", False),
error=baseline_data.get("error"),
)
return introspection
@dataclass
class DVSuggestion:
"""Suggested design variable."""
name: str
current_value: Optional[float] = None
suggested_bounds: Optional[tuple[float, float]] = None
units: Optional[str] = None
confidence: ConfidenceLevel = ConfidenceLevel.MEDIUM
reason: str = ""
source: str = "introspection" # introspection, preconfig, lac
lac_insight: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
return {
"name": self.name,
"current_value": self.current_value,
"suggested_bounds": list(self.suggested_bounds) if self.suggested_bounds else None,
"units": self.units,
"confidence": self.confidence.value,
"reason": self.reason,
"source": self.source,
"lac_insight": self.lac_insight,
}
@dataclass
class ObjectiveSuggestion:
"""Suggested optimization objective."""
name: str
goal: str # minimize, maximize
extractor: str
confidence: ConfidenceLevel = ConfidenceLevel.MEDIUM
reason: str = ""
source: str = "goals"
@dataclass
class ConstraintSuggestion:
"""Suggested optimization constraint."""
name: str
type: str # less_than, greater_than
suggested_threshold: Optional[float] = None
units: Optional[str] = None
confidence: ConfidenceLevel = ConfidenceLevel.MEDIUM
reason: str = ""
source: str = "requirements"
@dataclass
class ImageAnalysis:
"""Analysis result from Claude Vision for an image."""
image_path: Path
component_type: Optional[str] = None
dimensions: List[str] = field(default_factory=list)
load_conditions: List[str] = field(default_factory=list)
annotations: List[str] = field(default_factory=list)
suggestions: List[str] = field(default_factory=list)
raw_analysis: Optional[str] = None
@dataclass
class LACInsight:
"""Insight from Learning Atomizer Core."""
study_name: str
similarity_score: float
geometry_type: str
method_used: str
objectives: List[str]
trials_to_convergence: Optional[int] = None
success: bool = True
lesson: Optional[str] = None
@dataclass
class StudyContext:
"""
Complete context for study creation.
This is the central data structure that combines all information
gathered during intake processing, ready for use by Interview Mode
or Canvas Mode.
"""
# === Identity ===
study_name: str
source_folder: Path
created_at: datetime = field(default_factory=datetime.now)
# === Model Files ===
sim_file: Optional[Path] = None
fem_file: Optional[Path] = None
prt_file: Optional[Path] = None
idealized_prt_file: Optional[Path] = None
# === From Introspection ===
introspection: Optional[IntrospectionData] = None
# === From Context Files ===
goals_text: Optional[str] = None
requirements_text: Optional[str] = None
constraints_text: Optional[str] = None
notes_text: Optional[str] = None
image_analyses: List[ImageAnalysis] = field(default_factory=list)
# === From intake.yaml ===
preconfig: Optional[Any] = None # IntakeConfig, imported dynamically to avoid circular import
# === From LAC ===
similar_studies: List[LACInsight] = field(default_factory=list)
recommended_method: Optional[str] = None
known_issues: List[str] = field(default_factory=list)
user_preferences: Dict[str, Any] = field(default_factory=dict)
# === Derived Suggestions ===
suggested_dvs: List[DVSuggestion] = field(default_factory=list)
suggested_objectives: List[ObjectiveSuggestion] = field(default_factory=list)
suggested_constraints: List[ConstraintSuggestion] = field(default_factory=list)
# === Status ===
warnings: List[str] = field(default_factory=list)
errors: List[str] = field(default_factory=list)
@property
def has_introspection(self) -> bool:
"""Check if introspection data is available."""
return self.introspection is not None and self.introspection.success
@property
def has_baseline(self) -> bool:
"""Check if baseline results are available."""
return (
self.introspection is not None
and self.introspection.baseline is not None
and self.introspection.baseline.success
)
@property
def has_preconfig(self) -> bool:
"""Check if pre-configuration is available."""
return self.preconfig is not None
@property
def ready_for_interview(self) -> bool:
"""Check if context is ready for interview mode."""
return self.has_introspection and len(self.errors) == 0
@property
def ready_for_canvas(self) -> bool:
"""Check if context is ready for canvas mode."""
return self.has_introspection and self.sim_file is not None
def get_baseline_summary(self) -> str:
"""Get human-readable baseline summary."""
if self.introspection is None:
return "No baseline data"
if self.introspection.baseline is None:
return "No baseline data"
return self.introspection.baseline.get_summary()
def get_missing_required(self) -> List[str]:
"""Get list of missing required items."""
missing = []
if self.sim_file is None:
missing.append("Simulation file (.sim)")
if not self.has_introspection:
missing.append("Model introspection")
return missing
def get_context_summary(self) -> Dict[str, Any]:
"""Get a summary of loaded context for display."""
return {
"study_name": self.study_name,
"has_model": self.sim_file is not None,
"has_introspection": self.has_introspection,
"has_baseline": self.has_baseline,
"has_goals": self.goals_text is not None,
"has_requirements": self.requirements_text is not None,
"has_preconfig": self.has_preconfig,
"num_expressions": len(self.introspection.expressions) if self.introspection else 0,
"num_dv_candidates": len(self.introspection.get_design_candidates())
if self.introspection
else 0,
"num_similar_studies": len(self.similar_studies),
"warnings": self.warnings,
"errors": self.errors,
}
def to_interview_context(self) -> Dict[str, Any]:
"""Get context formatted for interview mode."""
return {
"study_name": self.study_name,
"baseline": (
self.introspection.baseline.to_dict()
if self.introspection is not None and self.introspection.baseline is not None
else None
),
"expressions": [e.to_dict() for e in self.introspection.expressions]
if self.introspection
else [],
"design_candidates": [e.to_dict() for e in self.introspection.get_design_candidates()]
if self.introspection
else [],
"solver_type": self.introspection.get_solver_type() if self.introspection else None,
"goals_text": self.goals_text,
"requirements_text": self.requirements_text,
"preconfig": self.preconfig.model_dump() if self.preconfig else None,
"suggested_dvs": [dv.to_dict() for dv in self.suggested_dvs],
"similar_studies": [
{"name": s.study_name, "method": s.method_used, "similarity": s.similarity_score}
for s in self.similar_studies
],
"recommended_method": self.recommended_method,
}
def save(self, output_path: Path) -> None:
"""Save context to JSON file."""
data = {
"study_name": self.study_name,
"source_folder": str(self.source_folder),
"created_at": self.created_at.isoformat(),
"sim_file": str(self.sim_file) if self.sim_file else None,
"fem_file": str(self.fem_file) if self.fem_file else None,
"prt_file": str(self.prt_file) if self.prt_file else None,
"introspection": self.introspection.to_dict() if self.introspection else None,
"goals_text": self.goals_text,
"requirements_text": self.requirements_text,
"suggested_dvs": [dv.to_dict() for dv in self.suggested_dvs],
"warnings": self.warnings,
"errors": self.errors,
}
with open(output_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
@classmethod
def load(cls, input_path: Path) -> "StudyContext":
"""Load context from JSON file."""
with open(input_path, "r", encoding="utf-8") as f:
data = json.load(f)
context = cls(
study_name=data["study_name"],
source_folder=Path(data["source_folder"]),
created_at=datetime.fromisoformat(data["created_at"]),
)
if data.get("sim_file"):
context.sim_file = Path(data["sim_file"])
if data.get("fem_file"):
context.fem_file = Path(data["fem_file"])
if data.get("prt_file"):
context.prt_file = Path(data["prt_file"])
if data.get("introspection"):
context.introspection = IntrospectionData.from_dict(data["introspection"])
context.goals_text = data.get("goals_text")
context.requirements_text = data.get("requirements_text")
context.warnings = data.get("warnings", [])
context.errors = data.get("errors", [])
return context

View File

@@ -0,0 +1,789 @@
"""
Intake Processor
================
Processes intake folders to create study context:
1. Validates folder structure
2. Copies model files to study directory
3. Parses intake.yaml pre-configuration
4. Extracts text from context files (goals.md, PDFs)
5. Runs model introspection
6. Optionally runs baseline solve
7. Assembles complete StudyContext
Usage:
from optimization_engine.intake import IntakeProcessor
processor = IntakeProcessor(Path("studies/_inbox/my_project"))
context = processor.process(run_baseline=True)
"""
from __future__ import annotations
import logging
import shutil
import re
from datetime import datetime
from pathlib import Path
from typing import Optional, List, Callable, Dict, Any
from .config import IntakeConfig, DesignVariableConfig
from .context import (
StudyContext,
IntrospectionData,
ExpressionInfo,
SolutionInfo,
BaselineResult,
DVSuggestion,
ObjectiveSuggestion,
ConstraintSuggestion,
ConfidenceLevel,
)
logger = logging.getLogger(__name__)
class IntakeError(Exception):
"""Error during intake processing."""
pass
class IntakeProcessor:
"""
Processes an intake folder to create a complete StudyContext.
The processor handles:
- File discovery and validation
- Model file copying
- Configuration parsing
- Context file extraction
- Model introspection (via NX journals)
- Baseline solve (optional)
- Suggestion generation
"""
def __init__(
self,
inbox_folder: Path,
studies_dir: Optional[Path] = None,
progress_callback: Optional[Callable[[str, float], None]] = None,
):
"""
Initialize the intake processor.
Args:
inbox_folder: Path to the intake folder (in _inbox/)
studies_dir: Base studies directory (default: auto-detect)
progress_callback: Optional callback for progress updates (message, percent)
"""
self.inbox_folder = Path(inbox_folder)
self.progress_callback = progress_callback or (lambda m, p: None)
# Validate inbox folder exists
if not self.inbox_folder.exists():
raise IntakeError(f"Inbox folder not found: {self.inbox_folder}")
# Determine study name from folder name
self.study_name = self.inbox_folder.name
if self.study_name.startswith("_"):
# Strip leading underscore (used for examples)
self.study_name = self.study_name[1:]
# Set studies directory
if studies_dir is None:
# Find project root
current = Path(__file__).parent
while current != current.parent:
if (current / "CLAUDE.md").exists():
studies_dir = current / "studies"
break
current = current.parent
else:
studies_dir = Path.cwd() / "studies"
self.studies_dir = Path(studies_dir)
self.study_dir = self.studies_dir / self.study_name
# Initialize context
self.context = StudyContext(
study_name=self.study_name,
source_folder=self.inbox_folder,
)
def process(
self,
run_baseline: bool = True,
copy_files: bool = True,
run_introspection: bool = True,
) -> StudyContext:
"""
Process the intake folder and create StudyContext.
Args:
run_baseline: Run a baseline FEA solve to get actual values
copy_files: Copy model files to study directory
run_introspection: Run NX model introspection
Returns:
Complete StudyContext ready for interview or canvas
"""
logger.info(f"Processing intake: {self.inbox_folder}")
try:
# Step 1: Discover files
self._progress("Discovering files...", 0.0)
self._discover_files()
# Step 2: Parse intake.yaml
self._progress("Parsing configuration...", 0.1)
self._parse_config()
# Step 3: Extract context files
self._progress("Extracting context...", 0.2)
self._extract_context_files()
# Step 4: Copy model files
if copy_files:
self._progress("Copying model files...", 0.3)
self._copy_model_files()
# Step 5: Run introspection
if run_introspection:
self._progress("Introspecting model...", 0.4)
self._run_introspection()
# Step 6: Run baseline solve
if run_baseline and self.context.sim_file:
self._progress("Running baseline solve...", 0.6)
self._run_baseline_solve()
# Step 7: Generate suggestions
self._progress("Generating suggestions...", 0.8)
self._generate_suggestions()
# Step 8: Save context
self._progress("Saving context...", 0.9)
self._save_context()
self._progress("Complete!", 1.0)
except Exception as e:
self.context.errors.append(str(e))
logger.error(f"Intake processing failed: {e}")
raise
return self.context
def _progress(self, message: str, percent: float) -> None:
"""Report progress."""
logger.info(f"[{percent * 100:.0f}%] {message}")
self.progress_callback(message, percent)
def _discover_files(self) -> None:
"""Discover model and context files in the inbox folder."""
# Look for model files
models_dir = self.inbox_folder / "models"
if models_dir.exists():
search_dir = models_dir
else:
# Fall back to root folder
search_dir = self.inbox_folder
# Find simulation file (required)
sim_files = list(search_dir.glob("*.sim"))
if sim_files:
self.context.sim_file = sim_files[0]
logger.info(f"Found sim file: {self.context.sim_file.name}")
else:
self.context.warnings.append("No .sim file found in models/")
# Find FEM file
fem_files = list(search_dir.glob("*.fem"))
if fem_files:
self.context.fem_file = fem_files[0]
logger.info(f"Found fem file: {self.context.fem_file.name}")
# Find part file
prt_files = [f for f in search_dir.glob("*.prt") if "_i.prt" not in f.name.lower()]
if prt_files:
self.context.prt_file = prt_files[0]
logger.info(f"Found prt file: {self.context.prt_file.name}")
# Find idealized part (CRITICAL!)
idealized_files = list(search_dir.glob("*_i.prt")) + list(search_dir.glob("*_I.prt"))
if idealized_files:
self.context.idealized_prt_file = idealized_files[0]
logger.info(f"Found idealized prt: {self.context.idealized_prt_file.name}")
else:
self.context.warnings.append(
"No idealized part (*_i.prt) found - mesh may not update during optimization!"
)
def _parse_config(self) -> None:
"""Parse intake.yaml if present."""
config_path = self.inbox_folder / "intake.yaml"
if config_path.exists():
try:
self.context.preconfig = IntakeConfig.from_yaml(config_path)
logger.info("Loaded intake.yaml configuration")
# Update study name if specified
if self.context.preconfig.study and self.context.preconfig.study.name:
self.context.study_name = self.context.preconfig.study.name
self.study_name = self.context.study_name
self.study_dir = self.studies_dir / self.study_name
except Exception as e:
self.context.warnings.append(f"Failed to parse intake.yaml: {e}")
logger.warning(f"Failed to parse intake.yaml: {e}")
else:
logger.info("No intake.yaml found, will use interview mode")
def _extract_context_files(self) -> None:
"""Extract text from context files."""
context_dir = self.inbox_folder / "context"
# Read goals.md
goals_path = context_dir / "goals.md"
if goals_path.exists():
self.context.goals_text = goals_path.read_text(encoding="utf-8")
logger.info("Loaded goals.md")
# Read constraints.txt
constraints_path = context_dir / "constraints.txt"
if constraints_path.exists():
self.context.constraints_text = constraints_path.read_text(encoding="utf-8")
logger.info("Loaded constraints.txt")
# Read any .txt or .md files in context/
if context_dir.exists():
for txt_file in context_dir.glob("*.txt"):
if txt_file.name != "constraints.txt":
content = txt_file.read_text(encoding="utf-8")
if self.context.notes_text:
self.context.notes_text += f"\n\n--- {txt_file.name} ---\n{content}"
else:
self.context.notes_text = content
# Extract PDF text (basic implementation)
# TODO: Add PyMuPDF and Claude Vision integration
for pdf_path in context_dir.glob("*.pdf") if context_dir.exists() else []:
try:
text = self._extract_pdf_text(pdf_path)
if text:
self.context.requirements_text = text
logger.info(f"Extracted text from {pdf_path.name}")
except Exception as e:
self.context.warnings.append(f"Failed to extract PDF {pdf_path.name}: {e}")
def _extract_pdf_text(self, pdf_path: Path) -> Optional[str]:
"""Extract text from PDF using PyMuPDF if available."""
try:
import fitz # PyMuPDF
doc = fitz.open(pdf_path)
text_parts = []
for page in doc:
text_parts.append(page.get_text())
doc.close()
return "\n".join(text_parts)
except ImportError:
logger.warning("PyMuPDF not installed, skipping PDF extraction")
return None
except Exception as e:
logger.warning(f"PDF extraction failed: {e}")
return None
def _copy_model_files(self) -> None:
"""Copy model files to study directory."""
# Create study directory structure
model_dir = self.study_dir / "1_model"
model_dir.mkdir(parents=True, exist_ok=True)
(self.study_dir / "2_iterations").mkdir(exist_ok=True)
(self.study_dir / "3_results").mkdir(exist_ok=True)
# Copy files
files_to_copy = [
self.context.sim_file,
self.context.fem_file,
self.context.prt_file,
self.context.idealized_prt_file,
]
for src in files_to_copy:
if src and src.exists():
dst = model_dir / src.name
if not dst.exists():
shutil.copy2(src, dst)
logger.info(f"Copied: {src.name}")
else:
logger.info(f"Already exists: {src.name}")
# Update paths to point to copied files
if self.context.sim_file:
self.context.sim_file = model_dir / self.context.sim_file.name
if self.context.fem_file:
self.context.fem_file = model_dir / self.context.fem_file.name
if self.context.prt_file:
self.context.prt_file = model_dir / self.context.prt_file.name
if self.context.idealized_prt_file:
self.context.idealized_prt_file = model_dir / self.context.idealized_prt_file.name
def _run_introspection(self) -> None:
"""Run NX model introspection."""
if not self.context.sim_file or not self.context.sim_file.exists():
self.context.warnings.append("Cannot introspect - no sim file")
return
introspection = IntrospectionData(timestamp=datetime.now())
try:
# Try to use existing introspection modules
from optimization_engine.extractors.introspect_part import introspect_part_expressions
# Introspect part for expressions
if self.context.prt_file and self.context.prt_file.exists():
expressions = introspect_part_expressions(str(self.context.prt_file))
for expr in expressions:
is_candidate = self._is_design_candidate(expr["name"], expr.get("value"))
introspection.expressions.append(
ExpressionInfo(
name=expr["name"],
value=expr.get("value"),
units=expr.get("units"),
formula=expr.get("formula"),
type=expr.get("type", "Number"),
is_design_candidate=is_candidate,
confidence=ConfidenceLevel.HIGH
if is_candidate
else ConfidenceLevel.MEDIUM,
)
)
introspection.success = True
logger.info(f"Introspected {len(introspection.expressions)} expressions")
except ImportError:
logger.warning("Introspection module not available, using fallback")
introspection.success = False
introspection.error = "Introspection module not available"
except Exception as e:
logger.error(f"Introspection failed: {e}")
introspection.success = False
introspection.error = str(e)
self.context.introspection = introspection
def _is_design_candidate(self, name: str, value: Optional[float]) -> bool:
"""Check if an expression looks like a design variable candidate."""
# Skip if no value or non-numeric
if value is None:
return False
# Skip system/reference expressions
if name.startswith("p") and name[1:].isdigit():
return False
# Skip mass-related outputs (not inputs)
if "mass" in name.lower() and "input" not in name.lower():
return False
# Look for typical design parameter names
design_keywords = [
"thickness",
"width",
"height",
"length",
"radius",
"diameter",
"angle",
"offset",
"depth",
"size",
"span",
"pitch",
"gap",
"rib",
"flange",
"web",
"wall",
"fillet",
"chamfer",
]
name_lower = name.lower()
return any(kw in name_lower for kw in design_keywords)
def _run_baseline_solve(self) -> None:
"""Run baseline FEA solve to get actual values."""
if not self.context.introspection:
self.context.introspection = IntrospectionData(timestamp=datetime.now())
baseline = BaselineResult()
try:
from optimization_engine.nx.solver import NXSolver
solver = NXSolver()
model_dir = self.context.sim_file.parent
result = solver.run_simulation(
sim_file=self.context.sim_file,
working_dir=model_dir,
expression_updates={}, # No updates for baseline
cleanup=True,
)
if result["success"]:
baseline.success = True
baseline.solve_time_seconds = result.get("solve_time", 0)
# Extract results from OP2
op2_file = result.get("op2_file")
if op2_file and Path(op2_file).exists():
self._extract_baseline_results(baseline, Path(op2_file), model_dir)
logger.info(f"Baseline solve complete: {baseline.get_summary()}")
else:
baseline.success = False
baseline.error = result.get("error", "Unknown error")
logger.warning(f"Baseline solve failed: {baseline.error}")
except ImportError:
logger.warning("NXSolver not available, skipping baseline")
baseline.success = False
baseline.error = "NXSolver not available"
except Exception as e:
logger.error(f"Baseline solve failed: {e}")
baseline.success = False
baseline.error = str(e)
self.context.introspection.baseline = baseline
def _extract_baseline_results(
self, baseline: BaselineResult, op2_file: Path, model_dir: Path
) -> None:
"""Extract results from OP2 file."""
try:
# Try to extract displacement
from optimization_engine.extractors.extract_displacement import extract_displacement
disp_result = extract_displacement(op2_file, subcase=1)
baseline.max_displacement_mm = disp_result.get("max_displacement")
except Exception as e:
logger.debug(f"Displacement extraction failed: {e}")
try:
# Try to extract stress
from optimization_engine.extractors.extract_von_mises_stress import extract_solid_stress
stress_result = extract_solid_stress(op2_file, subcase=1)
baseline.max_stress_mpa = stress_result.get("max_von_mises")
except Exception as e:
logger.debug(f"Stress extraction failed: {e}")
try:
# Try to extract mass from BDF
from optimization_engine.extractors.bdf_mass_extractor import extract_mass_from_bdf
dat_files = list(model_dir.glob("*.dat"))
if dat_files:
baseline.mass_kg = extract_mass_from_bdf(str(dat_files[0]))
except Exception as e:
logger.debug(f"Mass extraction failed: {e}")
def _generate_suggestions(self) -> None:
"""Generate intelligent suggestions based on all context."""
self._generate_dv_suggestions()
self._generate_objective_suggestions()
self._generate_constraint_suggestions()
self._query_lac()
def _generate_dv_suggestions(self) -> None:
"""Generate design variable suggestions."""
suggestions: Dict[str, DVSuggestion] = {}
# From introspection
if self.context.introspection:
for expr in self.context.introspection.get_design_candidates():
if expr.value is not None and isinstance(expr.value, (int, float)):
# Calculate suggested bounds (50% to 150% of current value)
if expr.value > 0:
bounds = (expr.value * 0.5, expr.value * 1.5)
else:
bounds = (expr.value * 1.5, expr.value * 0.5)
suggestions[expr.name] = DVSuggestion(
name=expr.name,
current_value=expr.value,
suggested_bounds=bounds,
units=expr.units,
confidence=expr.confidence,
reason=f"Numeric expression with value {expr.value}",
source="introspection",
)
# Override/add from preconfig
if self.context.preconfig and self.context.preconfig.design_variables:
for dv in self.context.preconfig.design_variables:
if dv.name in suggestions:
# Update existing suggestion
suggestions[dv.name].suggested_bounds = dv.bounds
suggestions[dv.name].units = dv.units or suggestions[dv.name].units
suggestions[dv.name].source = "preconfig"
suggestions[dv.name].confidence = ConfidenceLevel.HIGH
else:
# Add new suggestion
suggestions[dv.name] = DVSuggestion(
name=dv.name,
suggested_bounds=dv.bounds,
units=dv.units,
confidence=ConfidenceLevel.HIGH,
reason="Specified in intake.yaml",
source="preconfig",
)
self.context.suggested_dvs = list(suggestions.values())
logger.info(f"Generated {len(self.context.suggested_dvs)} DV suggestions")
def _generate_objective_suggestions(self) -> None:
"""Generate objective suggestions from context."""
suggestions = []
# From preconfig
if self.context.preconfig and self.context.preconfig.objectives:
obj = self.context.preconfig.objectives.primary
extractor = self._get_extractor_for_target(obj.target)
suggestions.append(
ObjectiveSuggestion(
name=obj.target,
goal=obj.goal,
extractor=extractor,
confidence=ConfidenceLevel.HIGH,
reason="Specified in intake.yaml",
source="preconfig",
)
)
# From goals text (simple keyword matching)
elif self.context.goals_text:
goals_lower = self.context.goals_text.lower()
if "minimize" in goals_lower and "mass" in goals_lower:
suggestions.append(
ObjectiveSuggestion(
name="mass",
goal="minimize",
extractor="extract_mass_from_bdf",
confidence=ConfidenceLevel.MEDIUM,
reason="Found 'minimize mass' in goals",
source="goals",
)
)
elif "minimize" in goals_lower and "weight" in goals_lower:
suggestions.append(
ObjectiveSuggestion(
name="mass",
goal="minimize",
extractor="extract_mass_from_bdf",
confidence=ConfidenceLevel.MEDIUM,
reason="Found 'minimize weight' in goals",
source="goals",
)
)
if "maximize" in goals_lower and "stiffness" in goals_lower:
suggestions.append(
ObjectiveSuggestion(
name="stiffness",
goal="maximize",
extractor="extract_displacement", # Inverse of displacement
confidence=ConfidenceLevel.MEDIUM,
reason="Found 'maximize stiffness' in goals",
source="goals",
)
)
self.context.suggested_objectives = suggestions
def _generate_constraint_suggestions(self) -> None:
"""Generate constraint suggestions from context."""
suggestions = []
# From preconfig
if self.context.preconfig and self.context.preconfig.constraints:
for const in self.context.preconfig.constraints:
suggestions.append(
ConstraintSuggestion(
name=const.type,
type="less_than" if "max" in const.type else "greater_than",
suggested_threshold=const.threshold,
units=const.units,
confidence=ConfidenceLevel.HIGH,
reason="Specified in intake.yaml",
source="preconfig",
)
)
# From requirements text
if self.context.requirements_text:
# Simple pattern matching for constraints
text = self.context.requirements_text
# Look for stress limits
stress_pattern = r"(?:max(?:imum)?|stress)\s*[:<]?\s*(\d+(?:\.\d+)?)\s*(?:MPa|mpa)"
matches = re.findall(stress_pattern, text, re.IGNORECASE)
if matches:
suggestions.append(
ConstraintSuggestion(
name="max_stress",
type="less_than",
suggested_threshold=float(matches[0]),
units="MPa",
confidence=ConfidenceLevel.MEDIUM,
reason=f"Found stress limit in requirements: {matches[0]} MPa",
source="requirements",
)
)
# Look for displacement limits
disp_pattern = (
r"(?:max(?:imum)?|displacement|deflection)\s*[:<]?\s*(\d+(?:\.\d+)?)\s*(?:mm|MM)"
)
matches = re.findall(disp_pattern, text, re.IGNORECASE)
if matches:
suggestions.append(
ConstraintSuggestion(
name="max_displacement",
type="less_than",
suggested_threshold=float(matches[0]),
units="mm",
confidence=ConfidenceLevel.MEDIUM,
reason=f"Found displacement limit in requirements: {matches[0]} mm",
source="requirements",
)
)
self.context.suggested_constraints = suggestions
def _get_extractor_for_target(self, target: str) -> str:
"""Map optimization target to extractor function."""
extractors = {
"mass": "extract_mass_from_bdf",
"displacement": "extract_displacement",
"stress": "extract_solid_stress",
"frequency": "extract_frequency",
"stiffness": "extract_displacement", # Inverse
"strain_energy": "extract_strain_energy",
}
return extractors.get(target.lower(), f"extract_{target}")
def _query_lac(self) -> None:
"""Query Learning Atomizer Core for similar studies."""
try:
from knowledge_base.lac import get_lac
lac = get_lac()
# Build query from context
query_parts = [self.study_name]
if self.context.goals_text:
query_parts.append(self.context.goals_text[:200])
query = " ".join(query_parts)
# Get similar studies
similar = lac.query_similar_optimizations(query)
# Get method recommendation
n_objectives = 1
if self.context.preconfig and self.context.preconfig.objectives:
n_objectives = len(self.context.preconfig.objectives.all_objectives)
recommendation = lac.get_best_method_for(
geometry_type="unknown", n_objectives=n_objectives
)
if recommendation:
self.context.recommended_method = recommendation.get("method")
logger.info(f"LAC query complete: {len(similar)} similar studies found")
except ImportError:
logger.debug("LAC not available")
except Exception as e:
logger.debug(f"LAC query failed: {e}")
def _save_context(self) -> None:
"""Save assembled context to study directory."""
# Ensure study directory exists
self.study_dir.mkdir(parents=True, exist_ok=True)
# Save context JSON
context_path = self.study_dir / "0_intake" / "study_context.json"
context_path.parent.mkdir(exist_ok=True)
self.context.save(context_path)
# Save introspection report
if self.context.introspection:
introspection_path = self.study_dir / "0_intake" / "introspection.json"
import json
with open(introspection_path, "w") as f:
json.dump(self.context.introspection.to_dict(), f, indent=2)
# Copy original context files
intake_dir = self.study_dir / "0_intake" / "original_context"
intake_dir.mkdir(parents=True, exist_ok=True)
context_source = self.inbox_folder / "context"
if context_source.exists():
for f in context_source.iterdir():
if f.is_file():
shutil.copy2(f, intake_dir / f.name)
# Copy intake.yaml
intake_yaml = self.inbox_folder / "intake.yaml"
if intake_yaml.exists():
shutil.copy2(intake_yaml, self.study_dir / "0_intake" / "intake.yaml")
logger.info(f"Saved context to {self.study_dir / '0_intake'}")
def process_intake(
inbox_folder: Path,
run_baseline: bool = True,
progress_callback: Optional[Callable[[str, float], None]] = None,
) -> StudyContext:
"""
Convenience function to process an intake folder.
Args:
inbox_folder: Path to inbox folder
run_baseline: Run baseline solve
progress_callback: Optional progress callback
Returns:
Complete StudyContext
"""
processor = IntakeProcessor(inbox_folder, progress_callback=progress_callback)
return processor.process(run_baseline=run_baseline)