tests/demo_research_agent.py

"""
Quick Interactive Demo of Research Agent

This demo shows the Research Agent learning from a material XML example
and documenting the research session.

Run this to see Phase 2 in action!
"""

import sys
from pathlib import Path

# Set UTF-8 encoding for Windows console
if sys.platform == 'win32':
    import codecs
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, errors='replace')
    sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, errors='replace')

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

from optimization_engine.future.research_agent import (
    ResearchAgent,
    ResearchFindings,
    KnowledgeGap,
    CONFIDENCE_LEVELS
)


def main():
    print("\n" + "="*70)
    print("  RESEARCH AGENT DEMO - Phase 2 Self-Learning System")
    print("="*70)

    # Initialize agent
    agent = ResearchAgent()
    print("\n[1] Research Agent initialized")
    print(f"    Feature registry loaded: {agent.feature_registry_path}")
    print(f"    Knowledge base: {agent.knowledge_base_path}")

    # Test 1: Detect knowledge gap
    print("\n" + "-"*70)
    print("[2] Testing Knowledge Gap Detection")
    print("-"*70)

    request = "Create NX material XML for titanium Ti-6Al-4V"
    print(f"\nUser request: \"{request}\"")

    gap = agent.identify_knowledge_gap(request)
    print(f"\n  Analysis:")
    print(f"    Missing features: {gap.missing_features}")
    print(f"    Missing knowledge: {gap.missing_knowledge}")
    print(f"    Confidence: {gap.confidence:.2f}")
    print(f"    Research needed: {gap.research_needed}")

    # Test 2: Learn from example
    print("\n" + "-"*70)
    print("[3] Learning from User Example")
    print("-"*70)

    # Simulated user provides this example
    example_xml = """<?xml version="1.0" encoding="UTF-8"?>
<PhysicalMaterial name="Steel_AISI_1020" version="1.0">
    <Density units="kg/m3">7850</Density>
    <YoungModulus units="GPa">200</YoungModulus>
    <PoissonRatio>0.29</PoissonRatio>
    <ThermalExpansion units="1/K">1.17e-05</ThermalExpansion>
    <YieldStrength units="MPa">295</YieldStrength>
    <UltimateTensileStrength units="MPa">420</UltimateTensileStrength>
</PhysicalMaterial>"""

    print("\nUser provides example: steel_material.xml")
    print("  (Simulating user uploading a file)")

    # Create research findings
    findings = ResearchFindings(
        sources={'user_example': 'steel_material.xml'},
        raw_data={'user_example': example_xml},
        confidence_scores={'user_example': CONFIDENCE_LEVELS['user_validated']}
    )

    print(f"\n  Source: user_example")
    print(f"  Confidence: {CONFIDENCE_LEVELS['user_validated']:.2f} (user-validated)")

    # Test 3: Synthesize knowledge
    print("\n" + "-"*70)
    print("[4] Synthesizing Knowledge")
    print("-"*70)

    knowledge = agent.synthesize_knowledge(findings)

    print(f"\n  {knowledge.synthesis_notes}")

    if knowledge.schema and 'xml_structure' in knowledge.schema:
        xml_schema = knowledge.schema['xml_structure']
        print(f"\n  Learned Schema:")
        print(f"    Root element: {xml_schema['root_element']}")
        print(f"    Required fields: {len(xml_schema['required_fields'])}")
        for field in xml_schema['required_fields'][:3]:
            print(f"      - {field}")
        if len(xml_schema['required_fields']) > 3:
            print(f"      ... and {len(xml_schema['required_fields']) - 3} more")

    # Test 4: Document session
    print("\n" + "-"*70)
    print("[5] Documenting Research Session")
    print("-"*70)

    session_path = agent.document_session(
        topic='nx_materials_demo',
        knowledge_gap=gap,
        findings=findings,
        knowledge=knowledge,
        generated_files=[
            'optimization_engine/custom_functions/nx_material_generator.py',
            'knowledge_base/templates/material_xml_template.py'
        ]
    )

    print(f"\n  Session saved to:")
    print(f"    {session_path}")

    print(f"\n  Files created:")
    for file in ['user_question.txt', 'sources_consulted.txt', 'findings.md', 'decision_rationale.md']:
        file_path = session_path / file
        if file_path.exists():
            print(f"    [OK] {file}")
        else:
            print(f"    [MISSING] {file}")

    # Show content of findings
    print("\n  Preview of findings.md:")
    findings_path = session_path / 'findings.md'
    if findings_path.exists():
        content = findings_path.read_text(encoding='utf-8')
        for i, line in enumerate(content.split('\n')[:12]):
            print(f"    {line}")
        print("    ...")

    # Test 5: Now agent can generate materials
    print("\n" + "-"*70)
    print("[6] Agent is Now Ready to Generate Materials!")
    print("-"*70)

    print("\n  Next time you request a material XML, the agent will:")
    print("    1. Search knowledge base and find this research session")
    print("    2. Retrieve the learned schema")
    print("    3. Generate new material XML following the pattern")
    print("    4. Confidence: HIGH (based on user-validated example)")

    print("\n  Example usage:")
    print('    User: "Create aluminum alloy 6061-T6 material XML"')
    print('    Agent: "I know how to do this! Using learned schema..."')
    print('           [Generates XML with Al 6061-T6 properties]')

    # Summary
    print("\n" + "="*70)
    print("  DEMO COMPLETE - Research Agent Successfully Learned!")
    print("="*70)

    print("\n  What was accomplished:")
    print("    [OK] Detected knowledge gap (material XML generation)")
    print("    [OK] Learned XML schema from user example")
    print("    [OK] Extracted reusable patterns")
    print("    [OK] Documented research session for future reference")
    print("    [OK] Ready to generate similar features autonomously")

    print("\n  Knowledge persisted in:")
    print(f"    {session_path}")

    print("\n  This demonstrates Phase 2: Self-Extending Research System")
    print("  The agent can now learn ANY new capability from examples!\n")


if __name__ == '__main__':
    try:
        main()
    except Exception as e:
        print(f"\n[ERROR] {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
feat: Complete Phase 2.5-2.7 - Intelligent LLM-Powered Workflow Analysis This commit implements three major architectural improvements to transform Atomizer from static pattern matching to intelligent AI-powered analysis. ## Phase 2.5: Intelligent Codebase-Aware Gap Detection ✅ Created intelligent system that understands existing capabilities before requesting examples: New Files: - optimization_engine/codebase_analyzer.py (379 lines) Scans Atomizer codebase for existing FEA/CAE capabilities - optimization_engine/workflow_decomposer.py (507 lines, v0.2.0) Breaks user requests into atomic workflow steps Complete rewrite with multi-objective, constraints, subcase targeting - optimization_engine/capability_matcher.py (312 lines) Matches workflow steps to existing code implementations - optimization_engine/targeted_research_planner.py (259 lines) Creates focused research plans for only missing capabilities Results: - 80-90% coverage on complex optimization requests - 87-93% confidence in capability matching - Fixed expression reading misclassification (geometry vs result_extraction) ## Phase 2.6: Intelligent Step Classification ✅ Distinguishes engineering features from simple math operations: New Files: - optimization_engine/step_classifier.py (335 lines) Classification Types: 1. Engineering Features - Complex FEA/CAE needing research 2. Inline Calculations - Simple math to auto-generate 3. Post-Processing Hooks - Middleware between FEA steps ## Phase 2.7: LLM-Powered Workflow Intelligence ✅ Replaces static regex patterns with Claude AI analysis: New Files: - optimization_engine/llm_workflow_analyzer.py (395 lines) Uses Claude API for intelligent request analysis Supports both Claude Code (dev) and API (production) modes - .claude/skills/analyze-workflow.md Skill template for LLM workflow analysis integration Key Breakthrough: - Detects ALL intermediate steps (avg, min, normalization, etc.) - Understands engineering context (CBUSH vs CBAR, directions, metrics) - Distinguishes OP2 extraction from part expression reading - Expected 95%+ accuracy with full nuance detection ## Test Coverage New Test Files: - tests/test_phase_2_5_intelligent_gap_detection.py (335 lines) - tests/test_complex_multiobj_request.py (130 lines) - tests/test_cbush_optimization.py (130 lines) - tests/test_cbar_genetic_algorithm.py (150 lines) - tests/test_step_classifier.py (140 lines) - tests/test_llm_complex_request.py (387 lines) All tests include: - UTF-8 encoding for Windows console - atomizer environment (not test_env) - Comprehensive validation checks ## Documentation New Documentation: - docs/PHASE_2_5_INTELLIGENT_GAP_DETECTION.md (254 lines) - docs/PHASE_2_7_LLM_INTEGRATION.md (227 lines) - docs/SESSION_SUMMARY_PHASE_2_5_TO_2_7.md (252 lines) Updated: - README.md - Added Phase 2.5-2.7 completion status - DEVELOPMENT_ROADMAP.md - Updated phase progress ## Critical Fixes 1. Expression Reading Misclassification (lines cited in session summary) - Updated codebase_analyzer.py pattern detection - Fixed workflow_decomposer.py domain classification - Added capability_matcher.py read_expression mapping 2. Environment Standardization - All code now uses 'atomizer' conda environment - Removed test_env references throughout 3. Multi-Objective Support - WorkflowDecomposer v0.2.0 handles multiple objectives - Constraint extraction and validation - Subcase and direction targeting ## Architecture Evolution Before (Static & Dumb): User Request → Regex Patterns → Hardcoded Rules → Missed Steps ❌ After (LLM-Powered & Intelligent): User Request → Claude AI Analysis → Structured JSON → ├─ Engineering (research needed) ├─ Inline (auto-generate Python) ├─ Hooks (middleware scripts) └─ Optimization (config) ✅ ## LLM Integration Strategy Development Mode (Current): - Use Claude Code directly for interactive analysis - No API consumption or costs - Perfect for iterative development Production Mode (Future): - Optional Anthropic API integration - Falls back to heuristics if no API key - For standalone batch processing ## Next Steps - Phase 2.8: Inline Code Generation - Phase 2.9: Post-Processing Hook Generation - Phase 3: MCP Integration for automated documentation research 🚀 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-16 13:35:41 -05:00			`"""`
			`Quick Interactive Demo of Research Agent`

			`This demo shows the Research Agent learning from a material XML example`
			`and documenting the research session.`

			`Run this to see Phase 2 in action!`
			`"""`

			`import sys`
			`from pathlib import Path`

			`# Set UTF-8 encoding for Windows console`
			`if sys.platform == 'win32':`
			`import codecs`
			`sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, errors='replace')`
			`sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, errors='replace')`

			`# Add project root to path`
			`project_root = Path(__file__).parent.parent`
			`sys.path.insert(0, str(project_root))`

refactor: Major reorganization of optimization_engine module structure BREAKING CHANGE: Module paths have been reorganized for better maintainability. Backwards compatibility aliases with deprecation warnings are provided. New Structure: - core/ - Optimization runners (runner, intelligent_optimizer, etc.) - processors/ - Data processing - surrogates/ - Neural network surrogates - nx/ - NX/Nastran integration (solver, updater, session_manager) - study/ - Study management (creator, wizard, state, reset) - reporting/ - Reports and analysis (visualizer, report_generator) - config/ - Configuration management (manager, builder) - utils/ - Utilities (logger, auto_doc, etc.) - future/ - Research/experimental code Migration: - ~200 import changes across 125 files - All __init__.py files use lazy loading to avoid circular imports - Backwards compatibility layer supports old import paths with warnings - All existing functionality preserved To migrate existing code: OLD: from optimization_engine.nx_solver import NXSolver NEW: from optimization_engine.nx.solver import NXSolver OLD: from optimization_engine.runner import OptimizationRunner NEW: from optimization_engine.core.runner import OptimizationRunner 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2025-12-29 12:30:59 -05:00			`from optimization_engine.future.research_agent import (`
feat: Complete Phase 2.5-2.7 - Intelligent LLM-Powered Workflow Analysis This commit implements three major architectural improvements to transform Atomizer from static pattern matching to intelligent AI-powered analysis. ## Phase 2.5: Intelligent Codebase-Aware Gap Detection ✅ Created intelligent system that understands existing capabilities before requesting examples: New Files: - optimization_engine/codebase_analyzer.py (379 lines) Scans Atomizer codebase for existing FEA/CAE capabilities - optimization_engine/workflow_decomposer.py (507 lines, v0.2.0) Breaks user requests into atomic workflow steps Complete rewrite with multi-objective, constraints, subcase targeting - optimization_engine/capability_matcher.py (312 lines) Matches workflow steps to existing code implementations - optimization_engine/targeted_research_planner.py (259 lines) Creates focused research plans for only missing capabilities Results: - 80-90% coverage on complex optimization requests - 87-93% confidence in capability matching - Fixed expression reading misclassification (geometry vs result_extraction) ## Phase 2.6: Intelligent Step Classification ✅ Distinguishes engineering features from simple math operations: New Files: - optimization_engine/step_classifier.py (335 lines) Classification Types: 1. Engineering Features - Complex FEA/CAE needing research 2. Inline Calculations - Simple math to auto-generate 3. Post-Processing Hooks - Middleware between FEA steps ## Phase 2.7: LLM-Powered Workflow Intelligence ✅ Replaces static regex patterns with Claude AI analysis: New Files: - optimization_engine/llm_workflow_analyzer.py (395 lines) Uses Claude API for intelligent request analysis Supports both Claude Code (dev) and API (production) modes - .claude/skills/analyze-workflow.md Skill template for LLM workflow analysis integration Key Breakthrough: - Detects ALL intermediate steps (avg, min, normalization, etc.) - Understands engineering context (CBUSH vs CBAR, directions, metrics) - Distinguishes OP2 extraction from part expression reading - Expected 95%+ accuracy with full nuance detection ## Test Coverage New Test Files: - tests/test_phase_2_5_intelligent_gap_detection.py (335 lines) - tests/test_complex_multiobj_request.py (130 lines) - tests/test_cbush_optimization.py (130 lines) - tests/test_cbar_genetic_algorithm.py (150 lines) - tests/test_step_classifier.py (140 lines) - tests/test_llm_complex_request.py (387 lines) All tests include: - UTF-8 encoding for Windows console - atomizer environment (not test_env) - Comprehensive validation checks ## Documentation New Documentation: - docs/PHASE_2_5_INTELLIGENT_GAP_DETECTION.md (254 lines) - docs/PHASE_2_7_LLM_INTEGRATION.md (227 lines) - docs/SESSION_SUMMARY_PHASE_2_5_TO_2_7.md (252 lines) Updated: - README.md - Added Phase 2.5-2.7 completion status - DEVELOPMENT_ROADMAP.md - Updated phase progress ## Critical Fixes 1. Expression Reading Misclassification (lines cited in session summary) - Updated codebase_analyzer.py pattern detection - Fixed workflow_decomposer.py domain classification - Added capability_matcher.py read_expression mapping 2. Environment Standardization - All code now uses 'atomizer' conda environment - Removed test_env references throughout 3. Multi-Objective Support - WorkflowDecomposer v0.2.0 handles multiple objectives - Constraint extraction and validation - Subcase and direction targeting ## Architecture Evolution Before (Static & Dumb): User Request → Regex Patterns → Hardcoded Rules → Missed Steps ❌ After (LLM-Powered & Intelligent): User Request → Claude AI Analysis → Structured JSON → ├─ Engineering (research needed) ├─ Inline (auto-generate Python) ├─ Hooks (middleware scripts) └─ Optimization (config) ✅ ## LLM Integration Strategy Development Mode (Current): - Use Claude Code directly for interactive analysis - No API consumption or costs - Perfect for iterative development Production Mode (Future): - Optional Anthropic API integration - Falls back to heuristics if no API key - For standalone batch processing ## Next Steps - Phase 2.8: Inline Code Generation - Phase 2.9: Post-Processing Hook Generation - Phase 3: MCP Integration for automated documentation research 🚀 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-16 13:35:41 -05:00			`ResearchAgent,`
			`ResearchFindings,`
			`KnowledgeGap,`
			`CONFIDENCE_LEVELS`
			`)`


			`def main():`
			`print("\n" + "="*70)`
			`print(" RESEARCH AGENT DEMO - Phase 2 Self-Learning System")`
			`print("="*70)`

			`# Initialize agent`
			`agent = ResearchAgent()`
			`print("\n[1] Research Agent initialized")`
			`print(f" Feature registry loaded: {agent.feature_registry_path}")`
			`print(f" Knowledge base: {agent.knowledge_base_path}")`

			`# Test 1: Detect knowledge gap`
			`print("\n" + "-"*70)`
			`print("[2] Testing Knowledge Gap Detection")`
			`print("-"*70)`

			`request = "Create NX material XML for titanium Ti-6Al-4V"`
			`print(f"\nUser request: \"{request}\"")`

			`gap = agent.identify_knowledge_gap(request)`
			`print(f"\n Analysis:")`
			`print(f" Missing features: {gap.missing_features}")`
			`print(f" Missing knowledge: {gap.missing_knowledge}")`
			`print(f" Confidence: {gap.confidence:.2f}")`
			`print(f" Research needed: {gap.research_needed}")`

			`# Test 2: Learn from example`
			`print("\n" + "-"*70)`
			`print("[3] Learning from User Example")`
			`print("-"*70)`

			`# Simulated user provides this example`
			`example_xml = """<?xml version="1.0" encoding="UTF-8"?>`
			`<PhysicalMaterial name="Steel_AISI_1020" version="1.0">`
			`<Density units="kg/m3">7850</Density>`
			`<YoungModulus units="GPa">200</YoungModulus>`
			`<PoissonRatio>0.29</PoissonRatio>`
			`<ThermalExpansion units="1/K">1.17e-05</ThermalExpansion>`
			`<YieldStrength units="MPa">295</YieldStrength>`
			`<UltimateTensileStrength units="MPa">420</UltimateTensileStrength>`
			`</PhysicalMaterial>"""`

			`print("\nUser provides example: steel_material.xml")`
			`print(" (Simulating user uploading a file)")`

			`# Create research findings`
			`findings = ResearchFindings(`
			`sources={'user_example': 'steel_material.xml'},`
			`raw_data={'user_example': example_xml},`
			`confidence_scores={'user_example': CONFIDENCE_LEVELS['user_validated']}`
			`)`

			`print(f"\n Source: user_example")`
			`print(f" Confidence: {CONFIDENCE_LEVELS['user_validated']:.2f} (user-validated)")`

			`# Test 3: Synthesize knowledge`
			`print("\n" + "-"*70)`
			`print("[4] Synthesizing Knowledge")`
			`print("-"*70)`

			`knowledge = agent.synthesize_knowledge(findings)`

			`print(f"\n {knowledge.synthesis_notes}")`

			`if knowledge.schema and 'xml_structure' in knowledge.schema:`
			`xml_schema = knowledge.schema['xml_structure']`
			`print(f"\n Learned Schema:")`
			`print(f" Root element: {xml_schema['root_element']}")`
			`print(f" Required fields: {len(xml_schema['required_fields'])}")`
			`for field in xml_schema['required_fields'][:3]:`
			`print(f" - {field}")`
			`if len(xml_schema['required_fields']) > 3:`
			`print(f" ... and {len(xml_schema['required_fields']) - 3} more")`

			`# Test 4: Document session`
			`print("\n" + "-"*70)`
			`print("[5] Documenting Research Session")`
			`print("-"*70)`

			`session_path = agent.document_session(`
			`topic='nx_materials_demo',`
			`knowledge_gap=gap,`
			`findings=findings,`
			`knowledge=knowledge,`
			`generated_files=[`
			`'optimization_engine/custom_functions/nx_material_generator.py',`
			`'knowledge_base/templates/material_xml_template.py'`
			`]`
			`)`

			`print(f"\n Session saved to:")`
			`print(f" {session_path}")`

			`print(f"\n Files created:")`
			`for file in ['user_question.txt', 'sources_consulted.txt', 'findings.md', 'decision_rationale.md']:`
			`file_path = session_path / file`
			`if file_path.exists():`
			`print(f" [OK] {file}")`
			`else:`
			`print(f" [MISSING] {file}")`

			`# Show content of findings`
			`print("\n Preview of findings.md:")`
			`findings_path = session_path / 'findings.md'`
			`if findings_path.exists():`
			`content = findings_path.read_text(encoding='utf-8')`
			`for i, line in enumerate(content.split('\n')[:12]):`
			`print(f" {line}")`
			`print(" ...")`

			`# Test 5: Now agent can generate materials`
			`print("\n" + "-"*70)`
			`print("[6] Agent is Now Ready to Generate Materials!")`
			`print("-"*70)`

			`print("\n Next time you request a material XML, the agent will:")`
			`print(" 1. Search knowledge base and find this research session")`
			`print(" 2. Retrieve the learned schema")`
			`print(" 3. Generate new material XML following the pattern")`
			`print(" 4. Confidence: HIGH (based on user-validated example)")`

			`print("\n Example usage:")`
			`print(' User: "Create aluminum alloy 6061-T6 material XML"')`
			`print(' Agent: "I know how to do this! Using learned schema..."')`
			`print(' [Generates XML with Al 6061-T6 properties]')`

			`# Summary`
			`print("\n" + "="*70)`
			`print(" DEMO COMPLETE - Research Agent Successfully Learned!")`
			`print("="*70)`

			`print("\n What was accomplished:")`
			`print(" [OK] Detected knowledge gap (material XML generation)")`
			`print(" [OK] Learned XML schema from user example")`
			`print(" [OK] Extracted reusable patterns")`
			`print(" [OK] Documented research session for future reference")`
			`print(" [OK] Ready to generate similar features autonomously")`

			`print("\n Knowledge persisted in:")`
			`print(f" {session_path}")`

			`print("\n This demonstrates Phase 2: Self-Extending Research System")`
			`print(" The agent can now learn ANY new capability from examples!\n")`


			`if __name__ == '__main__':`
			`try:`
			`main()`
			`except Exception as e:`
			`print(f"\n[ERROR] {e}")`
			`import traceback`
			`traceback.print_exc()`
			`sys.exit(1)`