""" Intake Processor ================ Processes intake folders to create study context: 1. Validates folder structure 2. Copies model files to study directory 3. Parses intake.yaml pre-configuration 4. Extracts text from context files (goals.md, PDFs) 5. Runs model introspection 6. Optionally runs baseline solve 7. Assembles complete StudyContext Usage: from optimization_engine.intake import IntakeProcessor processor = IntakeProcessor(Path("studies/_inbox/my_project")) context = processor.process(run_baseline=True) """ from __future__ import annotations import logging import shutil import re from datetime import datetime from pathlib import Path from typing import Optional, List, Callable, Dict, Any from .config import IntakeConfig, DesignVariableConfig from .context import ( StudyContext, IntrospectionData, ExpressionInfo, SolutionInfo, BaselineResult, DVSuggestion, ObjectiveSuggestion, ConstraintSuggestion, ConfidenceLevel, ) logger = logging.getLogger(__name__) class IntakeError(Exception): """Error during intake processing.""" pass class IntakeProcessor: """ Processes an intake folder to create a complete StudyContext. The processor handles: - File discovery and validation - Model file copying - Configuration parsing - Context file extraction - Model introspection (via NX journals) - Baseline solve (optional) - Suggestion generation """ def __init__( self, inbox_folder: Path, studies_dir: Optional[Path] = None, progress_callback: Optional[Callable[[str, float], None]] = None, ): """ Initialize the intake processor. Args: inbox_folder: Path to the intake folder (in _inbox/) studies_dir: Base studies directory (default: auto-detect) progress_callback: Optional callback for progress updates (message, percent) """ self.inbox_folder = Path(inbox_folder) self.progress_callback = progress_callback or (lambda m, p: None) # Validate inbox folder exists if not self.inbox_folder.exists(): raise IntakeError(f"Inbox folder not found: {self.inbox_folder}") # Determine study name from folder name self.study_name = self.inbox_folder.name if self.study_name.startswith("_"): # Strip leading underscore (used for examples) self.study_name = self.study_name[1:] # Set studies directory if studies_dir is None: # Find project root current = Path(__file__).parent while current != current.parent: if (current / "CLAUDE.md").exists(): studies_dir = current / "studies" break current = current.parent else: studies_dir = Path.cwd() / "studies" self.studies_dir = Path(studies_dir) self.study_dir = self.studies_dir / self.study_name # Initialize context self.context = StudyContext( study_name=self.study_name, source_folder=self.inbox_folder, ) def process( self, run_baseline: bool = True, copy_files: bool = True, run_introspection: bool = True, ) -> StudyContext: """ Process the intake folder and create StudyContext. Args: run_baseline: Run a baseline FEA solve to get actual values copy_files: Copy model files to study directory run_introspection: Run NX model introspection Returns: Complete StudyContext ready for interview or canvas """ logger.info(f"Processing intake: {self.inbox_folder}") try: # Step 1: Discover files self._progress("Discovering files...", 0.0) self._discover_files() # Step 2: Parse intake.yaml self._progress("Parsing configuration...", 0.1) self._parse_config() # Step 3: Extract context files self._progress("Extracting context...", 0.2) self._extract_context_files() # Step 4: Copy model files if copy_files: self._progress("Copying model files...", 0.3) self._copy_model_files() # Step 5: Run introspection if run_introspection: self._progress("Introspecting model...", 0.4) self._run_introspection() # Step 6: Run baseline solve if run_baseline and self.context.sim_file: self._progress("Running baseline solve...", 0.6) self._run_baseline_solve() # Step 7: Generate suggestions self._progress("Generating suggestions...", 0.8) self._generate_suggestions() # Step 8: Save context self._progress("Saving context...", 0.9) self._save_context() self._progress("Complete!", 1.0) except Exception as e: self.context.errors.append(str(e)) logger.error(f"Intake processing failed: {e}") raise return self.context def _progress(self, message: str, percent: float) -> None: """Report progress.""" logger.info(f"[{percent * 100:.0f}%] {message}") self.progress_callback(message, percent) def _discover_files(self) -> None: """Discover model and context files in the inbox folder.""" # Look for model files models_dir = self.inbox_folder / "models" if models_dir.exists(): search_dir = models_dir else: # Fall back to root folder search_dir = self.inbox_folder # Find simulation file (required) sim_files = list(search_dir.glob("*.sim")) if sim_files: self.context.sim_file = sim_files[0] logger.info(f"Found sim file: {self.context.sim_file.name}") else: self.context.warnings.append("No .sim file found in models/") # Find FEM file fem_files = list(search_dir.glob("*.fem")) if fem_files: self.context.fem_file = fem_files[0] logger.info(f"Found fem file: {self.context.fem_file.name}") # Find part file prt_files = [f for f in search_dir.glob("*.prt") if "_i.prt" not in f.name.lower()] if prt_files: self.context.prt_file = prt_files[0] logger.info(f"Found prt file: {self.context.prt_file.name}") # Find idealized part (CRITICAL!) idealized_files = list(search_dir.glob("*_i.prt")) + list(search_dir.glob("*_I.prt")) if idealized_files: self.context.idealized_prt_file = idealized_files[0] logger.info(f"Found idealized prt: {self.context.idealized_prt_file.name}") else: self.context.warnings.append( "No idealized part (*_i.prt) found - mesh may not update during optimization!" ) def _parse_config(self) -> None: """Parse intake.yaml if present.""" config_path = self.inbox_folder / "intake.yaml" if config_path.exists(): try: self.context.preconfig = IntakeConfig.from_yaml(config_path) logger.info("Loaded intake.yaml configuration") # Update study name if specified if self.context.preconfig.study and self.context.preconfig.study.name: self.context.study_name = self.context.preconfig.study.name self.study_name = self.context.study_name self.study_dir = self.studies_dir / self.study_name except Exception as e: self.context.warnings.append(f"Failed to parse intake.yaml: {e}") logger.warning(f"Failed to parse intake.yaml: {e}") else: logger.info("No intake.yaml found, will use interview mode") def _extract_context_files(self) -> None: """Extract text from context files.""" context_dir = self.inbox_folder / "context" # Read goals.md goals_path = context_dir / "goals.md" if goals_path.exists(): self.context.goals_text = goals_path.read_text(encoding="utf-8") logger.info("Loaded goals.md") # Read constraints.txt constraints_path = context_dir / "constraints.txt" if constraints_path.exists(): self.context.constraints_text = constraints_path.read_text(encoding="utf-8") logger.info("Loaded constraints.txt") # Read any .txt or .md files in context/ if context_dir.exists(): for txt_file in context_dir.glob("*.txt"): if txt_file.name != "constraints.txt": content = txt_file.read_text(encoding="utf-8") if self.context.notes_text: self.context.notes_text += f"\n\n--- {txt_file.name} ---\n{content}" else: self.context.notes_text = content # Extract PDF text (basic implementation) # TODO: Add PyMuPDF and Claude Vision integration for pdf_path in context_dir.glob("*.pdf") if context_dir.exists() else []: try: text = self._extract_pdf_text(pdf_path) if text: self.context.requirements_text = text logger.info(f"Extracted text from {pdf_path.name}") except Exception as e: self.context.warnings.append(f"Failed to extract PDF {pdf_path.name}: {e}") def _extract_pdf_text(self, pdf_path: Path) -> Optional[str]: """Extract text from PDF using PyMuPDF if available.""" try: import fitz # PyMuPDF doc = fitz.open(pdf_path) text_parts = [] for page in doc: text_parts.append(page.get_text()) doc.close() return "\n".join(text_parts) except ImportError: logger.warning("PyMuPDF not installed, skipping PDF extraction") return None except Exception as e: logger.warning(f"PDF extraction failed: {e}") return None def _copy_model_files(self) -> None: """Copy model files to study directory.""" # Create study directory structure model_dir = self.study_dir / "1_model" model_dir.mkdir(parents=True, exist_ok=True) (self.study_dir / "2_iterations").mkdir(exist_ok=True) (self.study_dir / "3_results").mkdir(exist_ok=True) # Copy files files_to_copy = [ self.context.sim_file, self.context.fem_file, self.context.prt_file, self.context.idealized_prt_file, ] for src in files_to_copy: if src and src.exists(): dst = model_dir / src.name if not dst.exists(): shutil.copy2(src, dst) logger.info(f"Copied: {src.name}") else: logger.info(f"Already exists: {src.name}") # Update paths to point to copied files if self.context.sim_file: self.context.sim_file = model_dir / self.context.sim_file.name if self.context.fem_file: self.context.fem_file = model_dir / self.context.fem_file.name if self.context.prt_file: self.context.prt_file = model_dir / self.context.prt_file.name if self.context.idealized_prt_file: self.context.idealized_prt_file = model_dir / self.context.idealized_prt_file.name def _run_introspection(self) -> None: """Run NX model introspection.""" if not self.context.sim_file or not self.context.sim_file.exists(): self.context.warnings.append("Cannot introspect - no sim file") return introspection = IntrospectionData(timestamp=datetime.now()) try: # Try to use existing introspection modules from optimization_engine.extractors.introspect_part import introspect_part_expressions # Introspect part for expressions if self.context.prt_file and self.context.prt_file.exists(): expressions = introspect_part_expressions(str(self.context.prt_file)) for expr in expressions: is_candidate = self._is_design_candidate(expr["name"], expr.get("value")) introspection.expressions.append( ExpressionInfo( name=expr["name"], value=expr.get("value"), units=expr.get("units"), formula=expr.get("formula"), type=expr.get("type", "Number"), is_design_candidate=is_candidate, confidence=ConfidenceLevel.HIGH if is_candidate else ConfidenceLevel.MEDIUM, ) ) introspection.success = True logger.info(f"Introspected {len(introspection.expressions)} expressions") except ImportError: logger.warning("Introspection module not available, using fallback") introspection.success = False introspection.error = "Introspection module not available" except Exception as e: logger.error(f"Introspection failed: {e}") introspection.success = False introspection.error = str(e) self.context.introspection = introspection def _is_design_candidate(self, name: str, value: Optional[float]) -> bool: """Check if an expression looks like a design variable candidate.""" # Skip if no value or non-numeric if value is None: return False # Skip system/reference expressions if name.startswith("p") and name[1:].isdigit(): return False # Skip mass-related outputs (not inputs) if "mass" in name.lower() and "input" not in name.lower(): return False # Look for typical design parameter names design_keywords = [ "thickness", "width", "height", "length", "radius", "diameter", "angle", "offset", "depth", "size", "span", "pitch", "gap", "rib", "flange", "web", "wall", "fillet", "chamfer", ] name_lower = name.lower() return any(kw in name_lower for kw in design_keywords) def _run_baseline_solve(self) -> None: """Run baseline FEA solve to get actual values.""" if not self.context.introspection: self.context.introspection = IntrospectionData(timestamp=datetime.now()) baseline = BaselineResult() try: from optimization_engine.nx.solver import NXSolver solver = NXSolver() model_dir = self.context.sim_file.parent result = solver.run_simulation( sim_file=self.context.sim_file, working_dir=model_dir, expression_updates={}, # No updates for baseline cleanup=True, ) if result["success"]: baseline.success = True baseline.solve_time_seconds = result.get("solve_time", 0) # Extract results from OP2 op2_file = result.get("op2_file") if op2_file and Path(op2_file).exists(): self._extract_baseline_results(baseline, Path(op2_file), model_dir) logger.info(f"Baseline solve complete: {baseline.get_summary()}") else: baseline.success = False baseline.error = result.get("error", "Unknown error") logger.warning(f"Baseline solve failed: {baseline.error}") except ImportError: logger.warning("NXSolver not available, skipping baseline") baseline.success = False baseline.error = "NXSolver not available" except Exception as e: logger.error(f"Baseline solve failed: {e}") baseline.success = False baseline.error = str(e) self.context.introspection.baseline = baseline def _extract_baseline_results( self, baseline: BaselineResult, op2_file: Path, model_dir: Path ) -> None: """Extract results from OP2 file.""" try: # Try to extract displacement from optimization_engine.extractors.extract_displacement import extract_displacement disp_result = extract_displacement(op2_file, subcase=1) baseline.max_displacement_mm = disp_result.get("max_displacement") except Exception as e: logger.debug(f"Displacement extraction failed: {e}") try: # Try to extract stress from optimization_engine.extractors.extract_von_mises_stress import extract_solid_stress stress_result = extract_solid_stress(op2_file, subcase=1) baseline.max_stress_mpa = stress_result.get("max_von_mises") except Exception as e: logger.debug(f"Stress extraction failed: {e}") try: # Try to extract mass from BDF from optimization_engine.extractors.bdf_mass_extractor import extract_mass_from_bdf dat_files = list(model_dir.glob("*.dat")) if dat_files: baseline.mass_kg = extract_mass_from_bdf(str(dat_files[0])) except Exception as e: logger.debug(f"Mass extraction failed: {e}") def _generate_suggestions(self) -> None: """Generate intelligent suggestions based on all context.""" self._generate_dv_suggestions() self._generate_objective_suggestions() self._generate_constraint_suggestions() self._query_lac() def _generate_dv_suggestions(self) -> None: """Generate design variable suggestions.""" suggestions: Dict[str, DVSuggestion] = {} # From introspection if self.context.introspection: for expr in self.context.introspection.get_design_candidates(): if expr.value is not None and isinstance(expr.value, (int, float)): # Calculate suggested bounds (50% to 150% of current value) if expr.value > 0: bounds = (expr.value * 0.5, expr.value * 1.5) else: bounds = (expr.value * 1.5, expr.value * 0.5) suggestions[expr.name] = DVSuggestion( name=expr.name, current_value=expr.value, suggested_bounds=bounds, units=expr.units, confidence=expr.confidence, reason=f"Numeric expression with value {expr.value}", source="introspection", ) # Override/add from preconfig if self.context.preconfig and self.context.preconfig.design_variables: for dv in self.context.preconfig.design_variables: if dv.name in suggestions: # Update existing suggestion suggestions[dv.name].suggested_bounds = dv.bounds suggestions[dv.name].units = dv.units or suggestions[dv.name].units suggestions[dv.name].source = "preconfig" suggestions[dv.name].confidence = ConfidenceLevel.HIGH else: # Add new suggestion suggestions[dv.name] = DVSuggestion( name=dv.name, suggested_bounds=dv.bounds, units=dv.units, confidence=ConfidenceLevel.HIGH, reason="Specified in intake.yaml", source="preconfig", ) self.context.suggested_dvs = list(suggestions.values()) logger.info(f"Generated {len(self.context.suggested_dvs)} DV suggestions") def _generate_objective_suggestions(self) -> None: """Generate objective suggestions from context.""" suggestions = [] # From preconfig if self.context.preconfig and self.context.preconfig.objectives: obj = self.context.preconfig.objectives.primary extractor = self._get_extractor_for_target(obj.target) suggestions.append( ObjectiveSuggestion( name=obj.target, goal=obj.goal, extractor=extractor, confidence=ConfidenceLevel.HIGH, reason="Specified in intake.yaml", source="preconfig", ) ) # From goals text (simple keyword matching) elif self.context.goals_text: goals_lower = self.context.goals_text.lower() if "minimize" in goals_lower and "mass" in goals_lower: suggestions.append( ObjectiveSuggestion( name="mass", goal="minimize", extractor="extract_mass_from_bdf", confidence=ConfidenceLevel.MEDIUM, reason="Found 'minimize mass' in goals", source="goals", ) ) elif "minimize" in goals_lower and "weight" in goals_lower: suggestions.append( ObjectiveSuggestion( name="mass", goal="minimize", extractor="extract_mass_from_bdf", confidence=ConfidenceLevel.MEDIUM, reason="Found 'minimize weight' in goals", source="goals", ) ) if "maximize" in goals_lower and "stiffness" in goals_lower: suggestions.append( ObjectiveSuggestion( name="stiffness", goal="maximize", extractor="extract_displacement", # Inverse of displacement confidence=ConfidenceLevel.MEDIUM, reason="Found 'maximize stiffness' in goals", source="goals", ) ) self.context.suggested_objectives = suggestions def _generate_constraint_suggestions(self) -> None: """Generate constraint suggestions from context.""" suggestions = [] # From preconfig if self.context.preconfig and self.context.preconfig.constraints: for const in self.context.preconfig.constraints: suggestions.append( ConstraintSuggestion( name=const.type, type="less_than" if "max" in const.type else "greater_than", suggested_threshold=const.threshold, units=const.units, confidence=ConfidenceLevel.HIGH, reason="Specified in intake.yaml", source="preconfig", ) ) # From requirements text if self.context.requirements_text: # Simple pattern matching for constraints text = self.context.requirements_text # Look for stress limits stress_pattern = r"(?:max(?:imum)?|stress)\s*[:<]?\s*(\d+(?:\.\d+)?)\s*(?:MPa|mpa)" matches = re.findall(stress_pattern, text, re.IGNORECASE) if matches: suggestions.append( ConstraintSuggestion( name="max_stress", type="less_than", suggested_threshold=float(matches[0]), units="MPa", confidence=ConfidenceLevel.MEDIUM, reason=f"Found stress limit in requirements: {matches[0]} MPa", source="requirements", ) ) # Look for displacement limits disp_pattern = ( r"(?:max(?:imum)?|displacement|deflection)\s*[:<]?\s*(\d+(?:\.\d+)?)\s*(?:mm|MM)" ) matches = re.findall(disp_pattern, text, re.IGNORECASE) if matches: suggestions.append( ConstraintSuggestion( name="max_displacement", type="less_than", suggested_threshold=float(matches[0]), units="mm", confidence=ConfidenceLevel.MEDIUM, reason=f"Found displacement limit in requirements: {matches[0]} mm", source="requirements", ) ) self.context.suggested_constraints = suggestions def _get_extractor_for_target(self, target: str) -> str: """Map optimization target to extractor function.""" extractors = { "mass": "extract_mass_from_bdf", "displacement": "extract_displacement", "stress": "extract_solid_stress", "frequency": "extract_frequency", "stiffness": "extract_displacement", # Inverse "strain_energy": "extract_strain_energy", } return extractors.get(target.lower(), f"extract_{target}") def _query_lac(self) -> None: """Query Learning Atomizer Core for similar studies.""" try: from knowledge_base.lac import get_lac lac = get_lac() # Build query from context query_parts = [self.study_name] if self.context.goals_text: query_parts.append(self.context.goals_text[:200]) query = " ".join(query_parts) # Get similar studies similar = lac.query_similar_optimizations(query) # Get method recommendation n_objectives = 1 if self.context.preconfig and self.context.preconfig.objectives: n_objectives = len(self.context.preconfig.objectives.all_objectives) recommendation = lac.get_best_method_for( geometry_type="unknown", n_objectives=n_objectives ) if recommendation: self.context.recommended_method = recommendation.get("method") logger.info(f"LAC query complete: {len(similar)} similar studies found") except ImportError: logger.debug("LAC not available") except Exception as e: logger.debug(f"LAC query failed: {e}") def _save_context(self) -> None: """Save assembled context to study directory.""" # Ensure study directory exists self.study_dir.mkdir(parents=True, exist_ok=True) # Save context JSON context_path = self.study_dir / "0_intake" / "study_context.json" context_path.parent.mkdir(exist_ok=True) self.context.save(context_path) # Save introspection report if self.context.introspection: introspection_path = self.study_dir / "0_intake" / "introspection.json" import json with open(introspection_path, "w") as f: json.dump(self.context.introspection.to_dict(), f, indent=2) # Copy original context files intake_dir = self.study_dir / "0_intake" / "original_context" intake_dir.mkdir(parents=True, exist_ok=True) context_source = self.inbox_folder / "context" if context_source.exists(): for f in context_source.iterdir(): if f.is_file(): shutil.copy2(f, intake_dir / f.name) # Copy intake.yaml intake_yaml = self.inbox_folder / "intake.yaml" if intake_yaml.exists(): shutil.copy2(intake_yaml, self.study_dir / "0_intake" / "intake.yaml") logger.info(f"Saved context to {self.study_dir / '0_intake'}") def process_intake( inbox_folder: Path, run_baseline: bool = True, progress_callback: Optional[Callable[[str, float], None]] = None, ) -> StudyContext: """ Convenience function to process an intake folder. Args: inbox_folder: Path to inbox folder run_baseline: Run baseline solve progress_callback: Optional progress callback Returns: Complete StudyContext """ processor = IntakeProcessor(inbox_folder, progress_callback=progress_callback) return processor.process(run_baseline=run_baseline)