""" Research Agent for Autonomous Learning and Feature Generation This module enables Atomizer to autonomously research unknown domains, learn patterns from examples and documentation, and generate new features. Philosophy: ----------- When encountering a request for functionality that doesn't exist: 1. Detect the knowledge gap by searching the feature registry 2. Plan research strategy: User examples → NX MCP → Web docs 3. Execute interactive research (ask user for examples first) 4. Learn patterns and schemas from gathered information 5. Generate new features following learned patterns 6. Test and validate with user confirmation 7. Document and integrate into knowledge base This creates a self-extending system that grows more capable over time. Example Workflow: ----------------- User: "Create NX material XML for titanium Ti-6Al-4V" ResearchAgent: 1. identify_knowledge_gap() → No 'material_generator' feature found 2. create_research_plan() → Ask user for example XML first 3. execute_interactive_research() → User provides steel_material.xml 4. synthesize_knowledge() → Extract XML schema, material properties 5. design_feature() → Generate nx_material_generator.py 6. validate_with_user() → User confirms generated XML works 7. document_session() → Save to knowledge_base/research_sessions/ Author: Atomizer Development Team Version: 0.1.0 (Phase 2) Last Updated: 2025-01-16 """ import json import os from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Any import xml.etree.ElementTree as ET class KnowledgeGap: """Represents a detected gap in Atomizer's current capabilities.""" def __init__( self, missing_features: List[str], missing_knowledge: List[str], user_request: str, confidence: float ): """ Initialize knowledge gap. Args: missing_features: Feature IDs that don't exist in registry missing_knowledge: Domains we don't have knowledge about user_request: Original user request that triggered detection confidence: How confident we are this is a true gap (0.0-1.0) """ self.missing_features = missing_features self.missing_knowledge = missing_knowledge self.user_request = user_request self.confidence = confidence self.research_needed = confidence < 0.8 def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return { 'missing_features': self.missing_features, 'missing_knowledge': self.missing_knowledge, 'user_request': self.user_request, 'confidence': self.confidence, 'research_needed': self.research_needed } class ResearchPlan: """A step-by-step plan for researching a knowledge gap.""" def __init__(self, steps: List[Dict[str, Any]]): """ Initialize research plan. Args: steps: List of research steps, each with: - step: Step number (1, 2, 3...) - action: Type of action ('ask_user', 'query_mcp', 'web_search') - priority: Priority level ('high', 'medium', 'low') - details: Action-specific details (query string, etc.) """ self.steps = steps def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return {'steps': self.steps} class ResearchFindings: """Results gathered from executing a research plan.""" def __init__( self, sources: Dict[str, Any], raw_data: Dict[str, Any], confidence_scores: Dict[str, float] ): """ Initialize research findings. Args: sources: Dictionary mapping source type to source details raw_data: Raw data gathered from each source confidence_scores: Confidence score for each source (0.0-1.0) """ self.sources = sources self.raw_data = raw_data self.confidence_scores = confidence_scores def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return { 'sources': self.sources, 'raw_data': self.raw_data, 'confidence_scores': self.confidence_scores } class SynthesizedKnowledge: """Knowledge synthesized from multiple research sources.""" def __init__( self, schema: Optional[Dict[str, Any]], patterns: List[Dict[str, Any]], examples: List[Dict[str, Any]], confidence: float, synthesis_notes: str ): """ Initialize synthesized knowledge. Args: schema: Extracted schema (e.g., XML structure, API signatures) patterns: Identified reusable patterns examples: Concrete examples demonstrating usage confidence: Overall confidence in synthesized knowledge (0.0-1.0) synthesis_notes: Explanation of synthesis process """ self.schema = schema self.patterns = patterns self.examples = examples self.confidence = confidence self.synthesis_notes = synthesis_notes def to_dict(self) -> Dict[str, Any]: """Convert to dictionary for JSON serialization.""" return { 'schema': self.schema, 'patterns': self.patterns, 'examples': self.examples, 'confidence': self.confidence, 'synthesis_notes': self.synthesis_notes } class ResearchAgent: """ Autonomous research system for learning new capabilities. The ResearchAgent enables Atomizer to: - Detect when it lacks knowledge to fulfill a user request - Plan and execute multi-source research - Learn patterns and schemas from examples and documentation - Generate new features based on learned knowledge - Persist knowledge for future use Attributes: feature_registry_path: Path to feature_registry.json knowledge_base_path: Path to knowledge_base/ directory min_confidence_threshold: Minimum confidence to generate code (default: 0.70) """ def __init__( self, feature_registry_path: Optional[Path] = None, knowledge_base_path: Optional[Path] = None, min_confidence_threshold: float = 0.70 ): """ Initialize ResearchAgent. Args: feature_registry_path: Path to feature registry JSON knowledge_base_path: Path to knowledge base directory min_confidence_threshold: Min confidence to generate code (0.0-1.0) """ # Determine paths if feature_registry_path is None: atomizer_root = Path(__file__).parent.parent feature_registry_path = atomizer_root / "optimization_engine" / "feature_registry.json" if knowledge_base_path is None: atomizer_root = Path(__file__).parent.parent knowledge_base_path = atomizer_root / "knowledge_base" self.feature_registry_path = Path(feature_registry_path) self.knowledge_base_path = Path(knowledge_base_path) self.min_confidence_threshold = min_confidence_threshold # Load feature registry self.feature_registry = self._load_feature_registry() def _load_feature_registry(self) -> Dict[str, Any]: """Load feature registry from JSON file.""" if not self.feature_registry_path.exists(): return {'feature_registry': {'version': '0.2.0', 'categories': {}}} with open(self.feature_registry_path, 'r') as f: return json.load(f) def identify_knowledge_gap(self, user_request: str) -> KnowledgeGap: """ Analyze user request and identify what we don't know. This method searches the feature registry to determine if we have the necessary features to fulfill the user's request. If not, it identifies what's missing and returns a KnowledgeGap. Args: user_request: The user's natural language request Returns: KnowledgeGap object containing: - missing_features: List of feature IDs we don't have - missing_knowledge: List of domains we lack knowledge in - research_needed: Whether research is required - confidence: How confident we are in this assessment Example: >>> agent = ResearchAgent() >>> gap = agent.identify_knowledge_gap( ... "Create NX material XML for titanium" ... ) >>> gap.missing_features ['material_xml_generator'] >>> gap.research_needed True """ # Convert request to lowercase for case-insensitive matching request_lower = user_request.lower() # Define keywords that indicate different domains domain_keywords = { 'material': ['material', 'material xml', 'physical material', 'alloy', 'steel', 'titanium', 'aluminum'], 'geometry': ['geometry', 'fillet', 'chamfer', 'thickness', 'dimension', 'sketch', 'feature'], 'loads_bc': ['load', 'boundary condition', 'constraint', 'force', 'pressure', 'fixed', 'displacement'], 'mesh': ['mesh', 'element', 'refinement', 'element size', 'mesh quality'], 'analysis': ['analysis', 'modal', 'thermal', 'fatigue', 'buckling', 'nonlinear'], 'reporting': ['report', 'visualization', 'plot', 'chart', 'summary', 'dashboard'], 'optimization': ['optimize', 'minimize', 'maximize', 'pareto', 'multi-objective'] } # Search feature registry for matching features matched_features = [] registry = self.feature_registry.get('feature_registry', {}) categories = registry.get('categories', {}) for category_name, category_data in categories.items(): subcategories = category_data.get('subcategories', {}) for subcat_name, subcat_data in subcategories.items(): for feature_id, feature_data in subcat_data.items(): if isinstance(feature_data, dict): # Check natural language mappings usage_examples = feature_data.get('usage_examples', []) for example in usage_examples: natural_lang = example.get('natural_language', []) for phrase in natural_lang: if phrase.lower() in request_lower: matched_features.append(feature_id) break # Identify missing domains missing_domains = [] for domain, keywords in domain_keywords.items(): for keyword in keywords: if keyword in request_lower: # Check if we have features for this domain domain_covered = False for category_name, category_data in categories.items(): subcategories = category_data.get('subcategories', {}) for subcat_name in subcategories.keys(): if domain in subcat_name or subcat_name in domain: domain_covered = True break if domain_covered: break if not domain_covered: missing_domains.append(domain) break # Check knowledge base for existing knowledge existing_knowledge = self.search_knowledge_base(request_lower) # Determine confidence based on matches if matched_features and not missing_domains: # We have features and domain knowledge confidence = 0.9 missing_features = [] elif matched_features and missing_domains: # We have some features but missing domain knowledge confidence = 0.6 missing_features = [] elif not matched_features and not missing_domains: # No matches but domain seems covered (might need new feature) confidence = 0.4 missing_features = ['unknown_feature_needed'] else: # No matches and missing domain knowledge confidence = 0.2 missing_features = ['new_feature_required'] # Adjust confidence if we have existing knowledge if existing_knowledge and existing_knowledge.get('confidence', 0) > 0.7: confidence = max(confidence, 0.8) return KnowledgeGap( missing_features=missing_features if not matched_features else [], missing_knowledge=list(set(missing_domains)), user_request=user_request, confidence=confidence ) def create_research_plan(self, knowledge_gap: KnowledgeGap) -> ResearchPlan: """ Create step-by-step research plan to fill knowledge gap. Prioritizes research sources: 1. User examples (highest confidence) 2. NX MCP / official documentation (high confidence) 3. Web search / community docs (medium confidence) Args: knowledge_gap: The detected knowledge gap Returns: ResearchPlan with ordered steps Example: >>> gap = KnowledgeGap( ... missing_features=['material_generator'], ... missing_knowledge=['NX material XML format'], ... user_request="Create material XML", ... confidence=0.2 ... ) >>> plan = agent.create_research_plan(gap) >>> plan.steps[0]['action'] 'ask_user_for_example' """ steps = [] # Determine what topics we need to research topics = knowledge_gap.missing_knowledge if knowledge_gap.missing_knowledge else ['general approach'] primary_topic = topics[0] # Step 1: ALWAYS ask user for examples first (highest confidence source) steps.append({ 'step': 1, 'action': 'ask_user_for_example', 'priority': 'high', 'source_type': 'user_validated', 'expected_confidence': CONFIDENCE_LEVELS['user_validated'], 'details': { 'prompt': self._generate_user_prompt(knowledge_gap), 'topic': primary_topic, 'file_types': self._infer_file_types(primary_topic) } }) # Step 2: Search existing knowledge base steps.append({ 'step': 2, 'action': 'search_knowledge_base', 'priority': 'high', 'source_type': 'internal', 'expected_confidence': 0.8, 'details': { 'query': primary_topic, 'search_path': self.knowledge_base_path / 'research_sessions' } }) # Step 3: Query NX MCP if available (for NX-specific topics) if any(kw in primary_topic.lower() for kw in ['nx', 'nastran', 'material', 'geometry', 'load', 'mesh']): steps.append({ 'step': 3, 'action': 'query_nx_mcp', 'priority': 'medium', 'source_type': 'nx_mcp_official', 'expected_confidence': CONFIDENCE_LEVELS['nx_mcp_official'], 'details': { 'query': f"NX {primary_topic} API documentation", 'fallback': True # Skip if MCP not available } }) # Step 4: Web search for documentation and examples steps.append({ 'step': 4, 'action': 'web_search', 'priority': 'low', 'source_type': 'web_generic', 'expected_confidence': CONFIDENCE_LEVELS['web_generic'], 'details': { 'query': f"Siemens NX {primary_topic} documentation examples", 'fallback_queries': [ f"NXOpen {primary_topic} API", f"{primary_topic} NX automation" ] } }) # Step 5: Search NXOpenTSE (community examples) steps.append({ 'step': 5, 'action': 'search_nxopen_tse', 'priority': 'low', 'source_type': 'nxopen_tse', 'expected_confidence': CONFIDENCE_LEVELS['nxopen_tse'], 'details': { 'query': f"{primary_topic} example code", 'site': 'nxopen.tse.de' } }) return ResearchPlan(steps) def execute_interactive_research( self, plan: ResearchPlan, user_responses: Optional[Dict[int, Any]] = None ) -> ResearchFindings: """ Execute research plan, gathering information from multiple sources. This method executes each step in the research plan, starting with asking the user for examples. It collects data from all sources and assigns confidence scores based on source reliability. Args: plan: The research plan to execute user_responses: Optional dict mapping step number to user response Returns: ResearchFindings with gathered data and confidence scores Example: >>> plan = agent.create_research_plan(gap) >>> findings = agent.execute_interactive_research( ... plan, ... user_responses={1: 'steel_material.xml'} ... ) >>> findings.sources {'user_example': 'steel_material.xml', ...} """ sources = {} raw_data = {} confidence_scores = {} user_responses = user_responses or {} # Execute each step in the plan for step in plan.steps: step_num = step['step'] action = step['action'] source_type = step.get('source_type', 'unknown') expected_confidence = step.get('expected_confidence', 0.5) # Step 1: Ask user for example if action == 'ask_user_for_example': if step_num in user_responses: user_input = user_responses[step_num] # Handle file path if isinstance(user_input, (str, Path)): file_path = Path(user_input) if file_path.exists(): file_content = file_path.read_text(encoding='utf-8') sources['user_example'] = str(file_path) raw_data['user_example'] = file_content confidence_scores['user_example'] = CONFIDENCE_LEVELS['user_validated'] else: # User provided content directly as string sources['user_example'] = 'user_provided_content' raw_data['user_example'] = user_input confidence_scores['user_example'] = CONFIDENCE_LEVELS['user_validated'] # Handle dict/object elif isinstance(user_input, dict): sources['user_example'] = 'user_provided_data' raw_data['user_example'] = user_input confidence_scores['user_example'] = CONFIDENCE_LEVELS['user_validated'] # Step 2: Search knowledge base elif action == 'search_knowledge_base': existing_knowledge = self.search_knowledge_base(step['details']['query']) if existing_knowledge: sources['knowledge_base'] = f"research_sessions/{existing_knowledge.get('session_id')}" raw_data['knowledge_base'] = existing_knowledge confidence_scores['knowledge_base'] = existing_knowledge.get('confidence', 0.8) # Step 3: Query NX MCP (placeholder for future implementation) elif action == 'query_nx_mcp': # TODO: Implement NX MCP query when MCP server is available # For now, skip this step pass # Step 4: Web search elif action == 'web_search': # Perform web search for NXOpen documentation query = step['details']['query'] try: # In a real LLM integration, this would call WebSearch tool # For now, we'll mark that web search would happen here # and store placeholder data sources['web_search'] = f"Web search: {query}" raw_data['web_search'] = { 'query': query, 'note': 'Web search integration requires LLM tool access', 'implementation_status': 'placeholder' } confidence_scores['web_search'] = CONFIDENCE_LEVELS['web_generic'] except Exception as e: # Silently skip if web search fails pass # Step 5: Search NXOpenTSE elif action == 'search_nxopen_tse': # Search NXOpenTSE community examples query = step['details']['query'] try: # In a real implementation, this would scrape/search nxopen.tse.de # For now, mark as placeholder sources['nxopen_tse'] = f"NXOpenTSE: {query}" raw_data['nxopen_tse'] = { 'query': query, 'site': 'nxopen.tse.de', 'note': 'NXOpenTSE search integration requires web scraping', 'implementation_status': 'placeholder' } confidence_scores['nxopen_tse'] = CONFIDENCE_LEVELS['nxopen_tse'] except Exception: # Silently skip if search fails pass return ResearchFindings( sources=sources, raw_data=raw_data, confidence_scores=confidence_scores ) def synthesize_knowledge( self, findings: ResearchFindings ) -> SynthesizedKnowledge: """ Combine findings from multiple sources into actionable knowledge. This method analyzes raw data from research findings, extracts patterns and schemas, and creates a coherent knowledge representation that can be used for feature generation. Args: findings: Research findings from multiple sources Returns: SynthesizedKnowledge with: - schema: Extracted structure/format - patterns: Reusable patterns identified - examples: Concrete usage examples - confidence: Overall confidence score Example: >>> knowledge = agent.synthesize_knowledge(findings) >>> knowledge.schema['root_element'] 'PhysicalMaterial' >>> knowledge.confidence 0.85 """ # Initialize synthesis structures schema = {} patterns = [] examples = [] synthesis_notes = [] # Calculate weighted confidence from sources total_confidence = 0.0 total_weight = 0 for source_type, confidence in findings.confidence_scores.items(): # Weight based on source type weight = CONFIDENCE_LEVELS.get(source_type, 0.5) total_confidence += confidence * weight total_weight += weight overall_confidence = total_confidence / total_weight if total_weight > 0 else 0.5 # Process each source's raw data for source_type, raw_data in findings.raw_data.items(): synthesis_notes.append(f"Processing {source_type}...") # Handle XML data (e.g., NX material files) if isinstance(raw_data, str) and raw_data.strip().startswith(' Optional[Dict[str, Any]]: """ Extract schema information from XML content. Args: xml_content: XML string content Returns: Dictionary with root_element, required_fields, optional_fields, attributes """ try: root = ET.fromstring(xml_content) # Extract root element info schema = { 'root_element': root.tag, 'attributes': dict(root.attrib), 'required_fields': [], 'optional_fields': [], 'structure': {} } # Analyze child elements for child in root: field_info = { 'name': child.tag, 'attributes': dict(child.attrib), 'text_content': child.text.strip() if child.text else None } # Determine if field is likely required (has content) if child.text and child.text.strip(): schema['required_fields'].append(child.tag) else: schema['optional_fields'].append(child.tag) schema['structure'][child.tag] = field_info return schema except ET.ParseError: return None def _extract_code_patterns(self, code_content: str) -> List[Dict[str, Any]]: """ Extract reusable patterns from Python code. Args: code_content: Python code string Returns: List of identified patterns (functions, classes, imports) """ patterns = [] # Extract function definitions import re func_pattern = r'def\s+(\w+)\s*\((.*?)\):' for match in re.finditer(func_pattern, code_content): func_name = match.group(1) params = match.group(2) patterns.append({ 'type': 'function', 'name': func_name, 'parameters': params, 'reusable': True }) # Extract class definitions class_pattern = r'class\s+(\w+)(?:\((.*?)\))?:' for match in re.finditer(class_pattern, code_content): class_name = match.group(1) base_classes = match.group(2) if match.group(2) else None patterns.append({ 'type': 'class', 'name': class_name, 'base_classes': base_classes, 'reusable': True }) # Extract import statements import_pattern = r'(?:from\s+([\w.]+)\s+)?import\s+([\w\s,*]+)' for match in re.finditer(import_pattern, code_content): module = match.group(1) if match.group(1) else None imports = match.group(2) patterns.append({ 'type': 'import', 'module': module, 'items': imports, 'reusable': True }) return patterns def design_feature( self, synthesized_knowledge: SynthesizedKnowledge, feature_name: str ) -> Dict[str, Any]: """ Create feature specification from synthesized knowledge. This method takes learned knowledge and designs a new feature that follows Atomizer's feature registry schema. Args: synthesized_knowledge: Knowledge learned from research feature_name: Name for the new feature Returns: Feature specification dict following feature_registry.json schema Example: >>> feature_spec = agent.design_feature( ... knowledge, ... 'nx_material_generator' ... ) >>> feature_spec['feature_id'] 'nx_material_generator' """ # Extract category from feature name or patterns category = self._infer_category(feature_name, synthesized_knowledge) subcategory = self._infer_subcategory(feature_name, synthesized_knowledge) # Create base feature specification feature_spec = { 'feature_id': feature_name, 'name': feature_name.replace('_', ' ').title(), 'description': f'Auto-generated feature for {feature_name.replace("_", " ")}', 'category': category, 'subcategory': subcategory, 'lifecycle_stage': self._infer_lifecycle_stage(feature_name), 'abstraction_level': 'primitive', # Start as primitive, can be composed later 'implementation': { 'file_path': f'optimization_engine/custom_functions/{feature_name}.py', 'function_name': feature_name, 'entry_point': f'from optimization_engine.custom_functions.{feature_name} import {feature_name}' }, 'interface': { 'inputs': self._extract_inputs_from_knowledge(synthesized_knowledge), 'outputs': self._extract_outputs_from_knowledge(synthesized_knowledge) }, 'dependencies': { 'features': [], 'libraries': self._extract_libraries_from_knowledge(synthesized_knowledge), 'nx_version': '2412' # Default to current version }, 'usage_examples': [{ 'description': f'Use {feature_name} for automated task', 'natural_language': [ feature_name.replace('_', ' '), f'generate {feature_name.split("_")[0]}' ] }], 'metadata': { 'author': 'Research Agent (Auto-generated)', 'created': datetime.now().strftime('%Y-%m-%d'), 'status': 'experimental', 'tested': False, 'confidence': synthesized_knowledge.confidence } } # Add schema information if available if synthesized_knowledge.schema: feature_spec['learned_schema'] = synthesized_knowledge.schema # Add patterns if available if synthesized_knowledge.patterns: feature_spec['learned_patterns'] = synthesized_knowledge.patterns return feature_spec def _infer_category(self, feature_name: str, knowledge: SynthesizedKnowledge) -> str: """Infer feature category from name and knowledge.""" name_lower = feature_name.lower() if any(kw in name_lower for kw in ['extract', 'stress', 'displacement', 'metric']): return 'engineering' elif any(kw in name_lower for kw in ['optimize', 'solver', 'runner']): return 'software' elif any(kw in name_lower for kw in ['chart', 'dashboard', 'visualize']): return 'ui' else: return 'engineering' # Default def _infer_subcategory(self, feature_name: str, knowledge: SynthesizedKnowledge) -> str: """Infer feature subcategory from name and knowledge.""" name_lower = feature_name.lower() if 'extractor' in name_lower: return 'extractors' elif 'generator' in name_lower or 'material' in name_lower: return 'generators' elif 'solver' in name_lower or 'runner' in name_lower: return 'optimization' else: return 'custom' def _infer_lifecycle_stage(self, feature_name: str) -> str: """Infer lifecycle stage from feature name.""" name_lower = feature_name.lower() if 'extract' in name_lower: return 'post_extraction' elif 'solver' in name_lower or 'run' in name_lower: return 'solve' elif 'update' in name_lower or 'prepare' in name_lower: return 'pre_solve' else: return 'all' def _extract_inputs_from_knowledge(self, knowledge: SynthesizedKnowledge) -> List[Dict]: """Extract input parameters from synthesized knowledge.""" inputs = [] # Check if XML schema exists if knowledge.schema and 'xml_structure' in knowledge.schema: xml_schema = knowledge.schema['xml_structure'] for field in xml_schema.get('required_fields', []): inputs.append({ 'name': field.lower(), 'type': 'float', # Assume numeric for now 'required': True, 'description': f'{field} parameter from learned schema' }) # If no inputs found, add generic parameter if not inputs: inputs.append({ 'name': 'parameters', 'type': 'dict', 'required': True, 'description': 'Feature parameters' }) return inputs def _extract_outputs_from_knowledge(self, knowledge: SynthesizedKnowledge) -> List[Dict]: """Extract output parameters from synthesized knowledge.""" # Default output structure return [{ 'name': 'result', 'type': 'dict', 'description': 'Generated result from feature' }] def _extract_libraries_from_knowledge(self, knowledge: SynthesizedKnowledge) -> List[str]: """Extract required libraries from code patterns.""" libraries = [] for pattern in knowledge.patterns: if pattern['type'] == 'import': module = pattern.get('module') if module: libraries.append(module) return list(set(libraries)) # Remove duplicates def validate_with_user(self, feature_spec: Dict[str, Any]) -> bool: """ Confirm feature specification with user before implementation. Args: feature_spec: The designed feature specification Returns: True if user approves, False otherwise """ # TODO: Implement user validation workflow # This will be interactive in actual implementation return True def generate_feature_code( self, feature_spec: Dict[str, Any], synthesized_knowledge: SynthesizedKnowledge ) -> str: """ Generate Python code for a feature from learned templates and patterns. Args: feature_spec: Feature specification from design_feature() synthesized_knowledge: Knowledge synthesized from research Returns: Generated Python code as string Example: >>> code = agent.generate_feature_code(feature_spec, knowledge) >>> # code contains working Python implementation """ feature_name = feature_spec['feature_id'] feature_description = feature_spec['description'] # Start building the code code_lines = [] # Add header code_lines.append('"""') code_lines.append(f'{feature_name}') code_lines.append('') code_lines.append(f'{feature_description}') code_lines.append('') code_lines.append('Auto-generated by Research Agent') code_lines.append(f'Created: {datetime.now().strftime("%Y-%m-%d")}') code_lines.append(f'Confidence: {synthesized_knowledge.confidence:.2f}') code_lines.append('"""') code_lines.append('') # Add imports code_lines.append('from pathlib import Path') code_lines.append('from typing import Dict, Any, Optional') code_lines.append('') # Add imports from learned patterns for pattern in synthesized_knowledge.patterns: if pattern['type'] == 'import': module = pattern.get('module') items = pattern.get('items', '') if module: code_lines.append(f'from {module} import {items}') else: code_lines.append(f'import {items}') if any(p['type'] == 'import' for p in synthesized_knowledge.patterns): code_lines.append('') # Add XML ElementTree if we have XML schema if synthesized_knowledge.schema and 'xml_structure' in synthesized_knowledge.schema: code_lines.append('import xml.etree.ElementTree as ET') code_lines.append('') # Generate main function code_lines.append(f'def {feature_name}(') # Add function parameters from feature spec inputs = feature_spec['interface']['inputs'] for i, input_param in enumerate(inputs): param_name = input_param['name'] param_type = input_param.get('type', 'Any') required = input_param.get('required', True) # Map types to Python type hints type_map = { 'str': 'str', 'int': 'int', 'float': 'float', 'bool': 'bool', 'dict': 'Dict[str, Any]', 'list': 'list', 'Path': 'Path' } py_type = type_map.get(param_type, 'Any') if not required: py_type = f'Optional[{py_type}]' default = ' = None' else: default = '' comma = ',' if i < len(inputs) - 1 else '' code_lines.append(f' {param_name}: {py_type}{default}{comma}') code_lines.append(') -> Dict[str, Any]:') code_lines.append(' """') code_lines.append(f' {feature_description}') code_lines.append('') code_lines.append(' Args:') for input_param in inputs: code_lines.append(f' {input_param["name"]}: {input_param.get("description", "")}') code_lines.append('') code_lines.append(' Returns:') code_lines.append(' Dictionary with generated results') code_lines.append(' """') code_lines.append('') # Generate function body based on learned patterns if synthesized_knowledge.schema and 'xml_structure' in synthesized_knowledge.schema: # XML generation code xml_schema = synthesized_knowledge.schema['xml_structure'] root_element = xml_schema['root_element'] code_lines.append(' # Generate XML from learned schema') code_lines.append(f' root = ET.Element("{root_element}")') code_lines.append('') code_lines.append(' # Add attributes if any') if xml_schema.get('attributes'): for attr_name, attr_value in xml_schema['attributes'].items(): code_lines.append(f' root.set("{attr_name}", "{attr_value}")') code_lines.append('') code_lines.append(' # Add child elements from parameters') for field in xml_schema.get('required_fields', []): field_lower = field.lower() code_lines.append(f' if {field_lower} is not None:') code_lines.append(f' elem = ET.SubElement(root, "{field}")') code_lines.append(f' elem.text = str({field_lower})') code_lines.append('') code_lines.append(' # Convert to string') code_lines.append(' xml_str = ET.tostring(root, encoding="unicode")') code_lines.append('') code_lines.append(' return {') code_lines.append(' "xml_content": xml_str,') code_lines.append(' "root_element": root.tag,') code_lines.append(' "success": True') code_lines.append(' }') else: # Generic implementation code_lines.append(' # TODO: Implement feature logic') code_lines.append(' # This is a placeholder implementation') code_lines.append(' result = {') code_lines.append(' "status": "generated",') code_lines.append(f' "feature": "{feature_name}",') code_lines.append(' "note": "This is an auto-generated placeholder"') code_lines.append(' }') code_lines.append('') code_lines.append(' return result') code_lines.append('') code_lines.append('') code_lines.append('# Example usage') code_lines.append('if __name__ == "__main__":') code_lines.append(f' result = {feature_name}(') # Add example parameter values for input_param in inputs: param_name = input_param['name'] code_lines.append(f' {param_name}=None, # TODO: Provide example value') code_lines.append(' )') code_lines.append(' print(result)') code_lines.append('') return '\n'.join(code_lines) def document_session( self, topic: str, knowledge_gap: KnowledgeGap, findings: ResearchFindings, knowledge: SynthesizedKnowledge, generated_files: List[str] ) -> Path: """ Save research session to knowledge base for future reference. Creates a dated folder in knowledge_base/research_sessions/ with: - user_question.txt: Original user request - sources_consulted.txt: List of sources with confidence scores - findings.md: What was learned from each source - decision_rationale.md: Why this approach was chosen Args: topic: Short topic name (e.g., 'nx_materials') knowledge_gap: The original knowledge gap findings: Research findings gathered knowledge: Synthesized knowledge generated_files: List of files generated from this research Returns: Path to created session folder Example: >>> session_path = agent.document_session( ... 'nx_materials', ... gap, findings, knowledge, ... ['nx_material_generator.py'] ... ) >>> session_path PosixPath('knowledge_base/research_sessions/2025-01-16_nx_materials') """ # Create session folder date_str = datetime.now().strftime('%Y-%m-%d') session_name = f"{date_str}_{topic}" session_path = self.knowledge_base_path / "research_sessions" / session_name session_path.mkdir(parents=True, exist_ok=True) # Save user question with open(session_path / "user_question.txt", 'w', encoding='utf-8') as f: f.write(knowledge_gap.user_request) # Save sources consulted with open(session_path / "sources_consulted.txt", 'w', encoding='utf-8') as f: f.write("Sources Consulted\n") f.write("=" * 50 + "\n\n") for source, score in findings.confidence_scores.items(): f.write(f"- {source}: {findings.sources.get(source, 'N/A')} " f"(confidence: {score:.2f})\n") # Save findings with open(session_path / "findings.md", 'w', encoding='utf-8') as f: f.write(f"# Research Findings: {topic}\n\n") f.write(f"**Date**: {date_str}\n\n") f.write("## Knowledge Synthesized\n\n") f.write(knowledge.synthesis_notes + "\n\n") f.write(f"**Overall Confidence**: {knowledge.confidence:.2f}\n\n") f.write("## Generated Files\n\n") for file_path in generated_files: f.write(f"- `{file_path}`\n") # Save decision rationale with open(session_path / "decision_rationale.md", 'w', encoding='utf-8') as f: f.write(f"# Decision Rationale: {topic}\n\n") f.write(f"**Confidence Score**: {knowledge.confidence:.2f}\n\n") f.write("## Why This Approach\n\n") f.write(knowledge.synthesis_notes + "\n\n") f.write("## Alternative Approaches Considered\n\n") f.write("(To be filled by implementation)\n") return session_path def search_knowledge_base(self, query: str) -> Optional[Dict[str, Any]]: """ Search existing knowledge base for relevant information. Before starting new research, check if we already have knowledge about this topic from past research sessions. Args: query: Search query (topic or keywords) Returns: Dict with existing knowledge if found, None otherwise Example: >>> existing = agent.search_knowledge_base("material XML") >>> if existing and existing['confidence'] > 0.8: ... # Use existing knowledge ... template = load_template(existing['template_path']) """ query_lower = query.lower() research_sessions_path = self.knowledge_base_path / "research_sessions" if not research_sessions_path.exists(): return None # Search through all research sessions best_match = None best_score = 0.0 for session_dir in research_sessions_path.iterdir(): if not session_dir.is_dir(): continue # Calculate relevance score based on folder name and contents folder_name = session_dir.name.lower() relevance_score = 0.0 # Check folder name for keywords query_words = query_lower.split() for word in query_words: # Special handling for important short words (NX, AI, ML, etc.) min_length = 1 if word in ['nx', 'ai', 'ml', 'ui'] else 2 if len(word) > min_length and word in folder_name: relevance_score += 0.3 # Check user_question.txt user_question_file = session_dir / "user_question.txt" if user_question_file.exists(): try: question_content = user_question_file.read_text(encoding='utf-8').lower() for word in query_words: min_length = 1 if word in ['nx', 'ai', 'ml', 'ui'] else 2 if len(word) > min_length and word in question_content: relevance_score += 0.2 except Exception: pass # Check findings.md for relevant content findings_file = session_dir / "findings.md" if findings_file.exists(): try: findings_content = findings_file.read_text(encoding='utf-8').lower() for word in query_words: min_length = 1 if word in ['nx', 'ai', 'ml', 'ui'] else 2 if len(word) > min_length and word in findings_content: relevance_score += 0.1 except Exception: pass # Update best match if this session is more relevant if relevance_score > best_score and relevance_score > 0.5: # Threshold best_score = relevance_score best_match = { 'session_id': session_dir.name, 'session_path': session_dir, 'relevance_score': relevance_score, 'confidence': min(0.9, relevance_score) # Cap at 0.9 } # Try to extract confidence from findings if findings_file.exists(): try: findings_content = findings_file.read_text(encoding='utf-8') # Look for confidence score in findings import re conf_match = re.search(r'confidence[:\s]+([0-9.]+)', findings_content.lower()) if conf_match: extracted_conf = float(conf_match.group(1)) best_match['confidence'] = extracted_conf except Exception: pass # Load schema if available (from findings or decision_rationale) try: if findings_file.exists(): findings_content = findings_file.read_text(encoding='utf-8') # Try to extract schema information if 'schema' in findings_content.lower() or 'xml' in findings_content.lower(): best_match['has_schema'] = True except Exception: pass return best_match def _generate_user_prompt(self, knowledge_gap: KnowledgeGap) -> str: """ Generate user-friendly prompt asking for examples. Args: knowledge_gap: The detected knowledge gap Returns: Formatted prompt string """ topic = knowledge_gap.missing_knowledge[0] if knowledge_gap.missing_knowledge else "this feature" file_types = self._infer_file_types(topic) prompt = f"I don't currently have knowledge about {topic}.\n\n" prompt += f"To help me learn, could you provide an example file?\n" prompt += f"Suggested file types: {', '.join(file_types)}\n\n" prompt += f"Once you provide an example, I'll:\n" prompt += f"1. Analyze its structure and patterns\n" prompt += f"2. Extract reusable templates\n" prompt += f"3. Generate the feature you requested\n" prompt += f"4. Save the knowledge for future use" return prompt def _infer_file_types(self, topic: str) -> List[str]: """ Infer expected file types based on topic. Args: topic: The topic or domain Returns: List of suggested file extensions """ topic_lower = topic.lower() # Material-related topics if any(kw in topic_lower for kw in ['material', 'physical property', 'alloy']): return ['.xml', '.mat', '.txt'] # Geometry-related topics elif any(kw in topic_lower for kw in ['geometry', 'fillet', 'chamfer', 'sketch']): return ['.prt', '.py', '.txt'] # Load/BC-related topics elif any(kw in topic_lower for kw in ['load', 'boundary condition', 'constraint', 'force']): return ['.py', '.txt', '.sim'] # Python/code-related topics elif any(kw in topic_lower for kw in ['function', 'script', 'automation', 'journal']): return ['.py', '.txt'] # XML/data-related topics elif any(kw in topic_lower for kw in ['xml', 'config', 'settings']): return ['.xml', '.json', '.txt'] # Default: accept common file types else: return ['.xml', '.py', '.txt', '.json'] # Confidence score reference CONFIDENCE_LEVELS = { 'user_validated': 0.95, # User confirmed it works 'nx_mcp_official': 0.85, # Official NX documentation 'nxopen_tse': 0.70, # Community-verified (NXOpenTSE) 'web_generic': 0.50 # Generic web search results } def get_confidence_description(score: float) -> str: """ Get human-readable confidence description. Args: score: Confidence score (0.0-1.0) Returns: Description like "HIGH", "MEDIUM", "LOW" """ if score >= 0.8: return "HIGH" elif score >= 0.6: return "MEDIUM" elif score >= 0.4: return "LOW" else: return "VERY LOW"