tests/test_knowledge_base_search.py

"""
Test Knowledge Base Search and Retrieval

This test demonstrates the Research Agent's ability to:
1. Search through past research sessions
2. Find relevant knowledge based on keywords
3. Retrieve session information with confidence scores
4. Avoid re-learning what it already knows

Author: Atomizer Development Team
Version: 0.1.0 (Phase 2 Week 2)
Last Updated: 2025-01-16
"""

import sys
from pathlib import Path

# Set UTF-8 encoding for Windows console
if sys.platform == 'win32':
    import codecs
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, errors='replace')
    sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, errors='replace')

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

from optimization_engine.future.research_agent import (
    ResearchAgent,
    ResearchFindings,
    KnowledgeGap,
    CONFIDENCE_LEVELS
)


def test_knowledge_base_search():
    """Test that the agent can find and retrieve past research sessions."""
    print("\n" + "="*70)
    print("KNOWLEDGE BASE SEARCH TEST")
    print("="*70)

    agent = ResearchAgent()

    # Step 1: Create a research session (if not exists)
    print("\n" + "-"*70)
    print("[Step 1] Creating Test Research Session")
    print("-"*70)

    gap = KnowledgeGap(
        missing_features=['material_xml_generator'],
        missing_knowledge=['NX material XML format'],
        user_request="Create NX material XML for titanium Ti-6Al-4V",
        confidence=0.2
    )

    # Simulate findings from user example
    example_xml = """<?xml version="1.0" encoding="UTF-8"?>
<PhysicalMaterial name="Steel_AISI_1020" version="1.0">
    <Density units="kg/m3">7850</Density>
    <YoungModulus units="GPa">200</YoungModulus>
    <PoissonRatio>0.29</PoissonRatio>
</PhysicalMaterial>"""

    findings = ResearchFindings(
        sources={'user_example': 'steel_material.xml'},
        raw_data={'user_example': example_xml},
        confidence_scores={'user_example': CONFIDENCE_LEVELS['user_validated']}
    )

    knowledge = agent.synthesize_knowledge(findings)

    # Document session
    session_path = agent.document_session(
        topic='nx_materials_search_test',
        knowledge_gap=gap,
        findings=findings,
        knowledge=knowledge,
        generated_files=[]
    )

    print(f"\n  Session created: {session_path.name}")
    print(f"  Confidence: {knowledge.confidence:.2f}")

    # Step 2: Search for material-related knowledge
    print("\n" + "-"*70)
    print("[Step 2] Searching for 'material XML' Knowledge")
    print("-"*70)

    result = agent.search_knowledge_base("material XML")

    if result:
        print(f"\n  ✓ Found relevant session!")
        print(f"    Session ID: {result['session_id']}")
        print(f"    Relevance score: {result['relevance_score']:.2f}")
        print(f"    Confidence: {result['confidence']:.2f}")
        print(f"    Has schema: {result.get('has_schema', False)}")
        assert result['relevance_score'] > 0.5, "Should have good relevance score"
        assert result['confidence'] > 0.7, "Should have high confidence"
    else:
        print("\n  ✗ No matching session found")
        assert False, "Should find the material XML session"

    # Step 3: Search for similar query
    print("\n" + "-"*70)
    print("[Step 3] Searching for 'NX materials' Knowledge")
    print("-"*70)

    result2 = agent.search_knowledge_base("NX materials")

    if result2:
        print(f"\n  ✓ Found relevant session!")
        print(f"    Session ID: {result2['session_id']}")
        print(f"    Relevance score: {result2['relevance_score']:.2f}")
        print(f"    Confidence: {result2['confidence']:.2f}")
        assert result2['session_id'] == result['session_id'], "Should find same session"
    else:
        print("\n  ✗ No matching session found")
        assert False, "Should find the materials session"

    # Step 4: Search for non-existent knowledge
    print("\n" + "-"*70)
    print("[Step 4] Searching for 'thermal analysis' Knowledge")
    print("-"*70)

    result3 = agent.search_knowledge_base("thermal analysis buckling")

    if result3:
        print(f"\n  Found session (unexpected): {result3['session_id']}")
        print(f"    Relevance score: {result3['relevance_score']:.2f}")
        print("  (This might be OK if relevance is low)")
    else:
        print("\n  ✓ No matching session found (as expected)")
        print("    Agent correctly identified this as new knowledge")

    # Step 5: Demonstrate how this prevents re-learning
    print("\n" + "-"*70)
    print("[Step 5] Demonstrating Knowledge Reuse")
    print("-"*70)

    # Simulate user asking for another material
    new_request = "Create aluminum alloy 6061-T6 material XML"
    print(f"\n  User request: '{new_request}'")

    # First, identify knowledge gap
    gap2 = agent.identify_knowledge_gap(new_request)
    print(f"\n  Knowledge gap detected:")
    print(f"    Missing features: {gap2.missing_features}")
    print(f"    Missing knowledge: {gap2.missing_knowledge}")
    print(f"    Confidence: {gap2.confidence:.2f}")

    # Then search knowledge base
    existing = agent.search_knowledge_base("material XML")

    if existing and existing['confidence'] > 0.8:
        print(f"\n  ✓ Found existing knowledge! No need to ask user again")
        print(f"    Can reuse learned schema from: {existing['session_id']}")
        print(f"    Confidence: {existing['confidence']:.2f}")
        print("\n  Workflow:")
        print("    1. Retrieve learned XML schema from session")
        print("    2. Apply aluminum 6061-T6 properties")
        print("    3. Generate XML using template")
        print("    4. Return result instantly (no user interaction needed!)")
    else:
        print(f"\n  ✗ No reliable existing knowledge, would ask user for example")

    # Summary
    print("\n" + "="*70)
    print("TEST SUMMARY")
    print("="*70)

    print("\n  Knowledge Base Search Performance:")
    print("    ✓ Created research session and documented knowledge")
    print("    ✓ Successfully searched and found relevant sessions")
    print("    ✓ Correctly matched similar queries to same session")
    print("    ✓ Returned confidence scores for decision-making")
    print("    ✓ Demonstrated knowledge reuse (avoid re-learning)")

    print("\n  Benefits:")
    print("    - Second material request doesn't ask user for example")
    print("    - Instant generation using learned template")
    print("    - Knowledge accumulates over time")
    print("    - Agent becomes smarter with each research session")

    print("\n" + "="*70)
    print("Knowledge Base Search: WORKING! ✓")
    print("="*70 + "\n")

    return True


if __name__ == '__main__':
    try:
        success = test_knowledge_base_search()
        sys.exit(0 if success else 1)
    except Exception as e:
        print(f"\n[ERROR] {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
feat: Complete Phase 2.5-2.7 - Intelligent LLM-Powered Workflow Analysis This commit implements three major architectural improvements to transform Atomizer from static pattern matching to intelligent AI-powered analysis. ## Phase 2.5: Intelligent Codebase-Aware Gap Detection ✅ Created intelligent system that understands existing capabilities before requesting examples: New Files: - optimization_engine/codebase_analyzer.py (379 lines) Scans Atomizer codebase for existing FEA/CAE capabilities - optimization_engine/workflow_decomposer.py (507 lines, v0.2.0) Breaks user requests into atomic workflow steps Complete rewrite with multi-objective, constraints, subcase targeting - optimization_engine/capability_matcher.py (312 lines) Matches workflow steps to existing code implementations - optimization_engine/targeted_research_planner.py (259 lines) Creates focused research plans for only missing capabilities Results: - 80-90% coverage on complex optimization requests - 87-93% confidence in capability matching - Fixed expression reading misclassification (geometry vs result_extraction) ## Phase 2.6: Intelligent Step Classification ✅ Distinguishes engineering features from simple math operations: New Files: - optimization_engine/step_classifier.py (335 lines) Classification Types: 1. Engineering Features - Complex FEA/CAE needing research 2. Inline Calculations - Simple math to auto-generate 3. Post-Processing Hooks - Middleware between FEA steps ## Phase 2.7: LLM-Powered Workflow Intelligence ✅ Replaces static regex patterns with Claude AI analysis: New Files: - optimization_engine/llm_workflow_analyzer.py (395 lines) Uses Claude API for intelligent request analysis Supports both Claude Code (dev) and API (production) modes - .claude/skills/analyze-workflow.md Skill template for LLM workflow analysis integration Key Breakthrough: - Detects ALL intermediate steps (avg, min, normalization, etc.) - Understands engineering context (CBUSH vs CBAR, directions, metrics) - Distinguishes OP2 extraction from part expression reading - Expected 95%+ accuracy with full nuance detection ## Test Coverage New Test Files: - tests/test_phase_2_5_intelligent_gap_detection.py (335 lines) - tests/test_complex_multiobj_request.py (130 lines) - tests/test_cbush_optimization.py (130 lines) - tests/test_cbar_genetic_algorithm.py (150 lines) - tests/test_step_classifier.py (140 lines) - tests/test_llm_complex_request.py (387 lines) All tests include: - UTF-8 encoding for Windows console - atomizer environment (not test_env) - Comprehensive validation checks ## Documentation New Documentation: - docs/PHASE_2_5_INTELLIGENT_GAP_DETECTION.md (254 lines) - docs/PHASE_2_7_LLM_INTEGRATION.md (227 lines) - docs/SESSION_SUMMARY_PHASE_2_5_TO_2_7.md (252 lines) Updated: - README.md - Added Phase 2.5-2.7 completion status - DEVELOPMENT_ROADMAP.md - Updated phase progress ## Critical Fixes 1. Expression Reading Misclassification (lines cited in session summary) - Updated codebase_analyzer.py pattern detection - Fixed workflow_decomposer.py domain classification - Added capability_matcher.py read_expression mapping 2. Environment Standardization - All code now uses 'atomizer' conda environment - Removed test_env references throughout 3. Multi-Objective Support - WorkflowDecomposer v0.2.0 handles multiple objectives - Constraint extraction and validation - Subcase and direction targeting ## Architecture Evolution Before (Static & Dumb): User Request → Regex Patterns → Hardcoded Rules → Missed Steps ❌ After (LLM-Powered & Intelligent): User Request → Claude AI Analysis → Structured JSON → ├─ Engineering (research needed) ├─ Inline (auto-generate Python) ├─ Hooks (middleware scripts) └─ Optimization (config) ✅ ## LLM Integration Strategy Development Mode (Current): - Use Claude Code directly for interactive analysis - No API consumption or costs - Perfect for iterative development Production Mode (Future): - Optional Anthropic API integration - Falls back to heuristics if no API key - For standalone batch processing ## Next Steps - Phase 2.8: Inline Code Generation - Phase 2.9: Post-Processing Hook Generation - Phase 3: MCP Integration for automated documentation research 🚀 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-16 13:35:41 -05:00			`"""`
			`Test Knowledge Base Search and Retrieval`

			`This test demonstrates the Research Agent's ability to:`
			`1. Search through past research sessions`
			`2. Find relevant knowledge based on keywords`
			`3. Retrieve session information with confidence scores`
			`4. Avoid re-learning what it already knows`

			`Author: Atomizer Development Team`
			`Version: 0.1.0 (Phase 2 Week 2)`
			`Last Updated: 2025-01-16`
			`"""`

			`import sys`
			`from pathlib import Path`

			`# Set UTF-8 encoding for Windows console`
			`if sys.platform == 'win32':`
			`import codecs`
			`sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, errors='replace')`
			`sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, errors='replace')`

			`# Add project root to path`
			`project_root = Path(__file__).parent.parent`
			`sys.path.insert(0, str(project_root))`

refactor: Major reorganization of optimization_engine module structure BREAKING CHANGE: Module paths have been reorganized for better maintainability. Backwards compatibility aliases with deprecation warnings are provided. New Structure: - core/ - Optimization runners (runner, intelligent_optimizer, etc.) - processors/ - Data processing - surrogates/ - Neural network surrogates - nx/ - NX/Nastran integration (solver, updater, session_manager) - study/ - Study management (creator, wizard, state, reset) - reporting/ - Reports and analysis (visualizer, report_generator) - config/ - Configuration management (manager, builder) - utils/ - Utilities (logger, auto_doc, etc.) - future/ - Research/experimental code Migration: - ~200 import changes across 125 files - All __init__.py files use lazy loading to avoid circular imports - Backwards compatibility layer supports old import paths with warnings - All existing functionality preserved To migrate existing code: OLD: from optimization_engine.nx_solver import NXSolver NEW: from optimization_engine.nx.solver import NXSolver OLD: from optimization_engine.runner import OptimizationRunner NEW: from optimization_engine.core.runner import OptimizationRunner 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2025-12-29 12:30:59 -05:00			`from optimization_engine.future.research_agent import (`
feat: Complete Phase 2.5-2.7 - Intelligent LLM-Powered Workflow Analysis This commit implements three major architectural improvements to transform Atomizer from static pattern matching to intelligent AI-powered analysis. ## Phase 2.5: Intelligent Codebase-Aware Gap Detection ✅ Created intelligent system that understands existing capabilities before requesting examples: New Files: - optimization_engine/codebase_analyzer.py (379 lines) Scans Atomizer codebase for existing FEA/CAE capabilities - optimization_engine/workflow_decomposer.py (507 lines, v0.2.0) Breaks user requests into atomic workflow steps Complete rewrite with multi-objective, constraints, subcase targeting - optimization_engine/capability_matcher.py (312 lines) Matches workflow steps to existing code implementations - optimization_engine/targeted_research_planner.py (259 lines) Creates focused research plans for only missing capabilities Results: - 80-90% coverage on complex optimization requests - 87-93% confidence in capability matching - Fixed expression reading misclassification (geometry vs result_extraction) ## Phase 2.6: Intelligent Step Classification ✅ Distinguishes engineering features from simple math operations: New Files: - optimization_engine/step_classifier.py (335 lines) Classification Types: 1. Engineering Features - Complex FEA/CAE needing research 2. Inline Calculations - Simple math to auto-generate 3. Post-Processing Hooks - Middleware between FEA steps ## Phase 2.7: LLM-Powered Workflow Intelligence ✅ Replaces static regex patterns with Claude AI analysis: New Files: - optimization_engine/llm_workflow_analyzer.py (395 lines) Uses Claude API for intelligent request analysis Supports both Claude Code (dev) and API (production) modes - .claude/skills/analyze-workflow.md Skill template for LLM workflow analysis integration Key Breakthrough: - Detects ALL intermediate steps (avg, min, normalization, etc.) - Understands engineering context (CBUSH vs CBAR, directions, metrics) - Distinguishes OP2 extraction from part expression reading - Expected 95%+ accuracy with full nuance detection ## Test Coverage New Test Files: - tests/test_phase_2_5_intelligent_gap_detection.py (335 lines) - tests/test_complex_multiobj_request.py (130 lines) - tests/test_cbush_optimization.py (130 lines) - tests/test_cbar_genetic_algorithm.py (150 lines) - tests/test_step_classifier.py (140 lines) - tests/test_llm_complex_request.py (387 lines) All tests include: - UTF-8 encoding for Windows console - atomizer environment (not test_env) - Comprehensive validation checks ## Documentation New Documentation: - docs/PHASE_2_5_INTELLIGENT_GAP_DETECTION.md (254 lines) - docs/PHASE_2_7_LLM_INTEGRATION.md (227 lines) - docs/SESSION_SUMMARY_PHASE_2_5_TO_2_7.md (252 lines) Updated: - README.md - Added Phase 2.5-2.7 completion status - DEVELOPMENT_ROADMAP.md - Updated phase progress ## Critical Fixes 1. Expression Reading Misclassification (lines cited in session summary) - Updated codebase_analyzer.py pattern detection - Fixed workflow_decomposer.py domain classification - Added capability_matcher.py read_expression mapping 2. Environment Standardization - All code now uses 'atomizer' conda environment - Removed test_env references throughout 3. Multi-Objective Support - WorkflowDecomposer v0.2.0 handles multiple objectives - Constraint extraction and validation - Subcase and direction targeting ## Architecture Evolution Before (Static & Dumb): User Request → Regex Patterns → Hardcoded Rules → Missed Steps ❌ After (LLM-Powered & Intelligent): User Request → Claude AI Analysis → Structured JSON → ├─ Engineering (research needed) ├─ Inline (auto-generate Python) ├─ Hooks (middleware scripts) └─ Optimization (config) ✅ ## LLM Integration Strategy Development Mode (Current): - Use Claude Code directly for interactive analysis - No API consumption or costs - Perfect for iterative development Production Mode (Future): - Optional Anthropic API integration - Falls back to heuristics if no API key - For standalone batch processing ## Next Steps - Phase 2.8: Inline Code Generation - Phase 2.9: Post-Processing Hook Generation - Phase 3: MCP Integration for automated documentation research 🚀 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-16 13:35:41 -05:00			`ResearchAgent,`
			`ResearchFindings,`
			`KnowledgeGap,`
			`CONFIDENCE_LEVELS`
			`)`


			`def test_knowledge_base_search():`
			`"""Test that the agent can find and retrieve past research sessions."""`
			`print("\n" + "="*70)`
			`print("KNOWLEDGE BASE SEARCH TEST")`
			`print("="*70)`

			`agent = ResearchAgent()`

			`# Step 1: Create a research session (if not exists)`
			`print("\n" + "-"*70)`
			`print("[Step 1] Creating Test Research Session")`
			`print("-"*70)`

			`gap = KnowledgeGap(`
			`missing_features=['material_xml_generator'],`
			`missing_knowledge=['NX material XML format'],`
			`user_request="Create NX material XML for titanium Ti-6Al-4V",`
			`confidence=0.2`
			`)`

			`# Simulate findings from user example`
			`example_xml = """<?xml version="1.0" encoding="UTF-8"?>`
			`<PhysicalMaterial name="Steel_AISI_1020" version="1.0">`
			`<Density units="kg/m3">7850</Density>`
			`<YoungModulus units="GPa">200</YoungModulus>`
			`<PoissonRatio>0.29</PoissonRatio>`
			`</PhysicalMaterial>"""`

			`findings = ResearchFindings(`
			`sources={'user_example': 'steel_material.xml'},`
			`raw_data={'user_example': example_xml},`
			`confidence_scores={'user_example': CONFIDENCE_LEVELS['user_validated']}`
			`)`

			`knowledge = agent.synthesize_knowledge(findings)`

			`# Document session`
			`session_path = agent.document_session(`
			`topic='nx_materials_search_test',`
			`knowledge_gap=gap,`
			`findings=findings,`
			`knowledge=knowledge,`
			`generated_files=[]`
			`)`

			`print(f"\n Session created: {session_path.name}")`
			`print(f" Confidence: {knowledge.confidence:.2f}")`

			`# Step 2: Search for material-related knowledge`
			`print("\n" + "-"*70)`
			`print("[Step 2] Searching for 'material XML' Knowledge")`
			`print("-"*70)`

			`result = agent.search_knowledge_base("material XML")`

			`if result:`
			`print(f"\n ✓ Found relevant session!")`
			`print(f" Session ID: {result['session_id']}")`
			`print(f" Relevance score: {result['relevance_score']:.2f}")`
			`print(f" Confidence: {result['confidence']:.2f}")`
			`print(f" Has schema: {result.get('has_schema', False)}")`
			`assert result['relevance_score'] > 0.5, "Should have good relevance score"`
			`assert result['confidence'] > 0.7, "Should have high confidence"`
			`else:`
			`print("\n ✗ No matching session found")`
			`assert False, "Should find the material XML session"`

			`# Step 3: Search for similar query`
			`print("\n" + "-"*70)`
			`print("[Step 3] Searching for 'NX materials' Knowledge")`
			`print("-"*70)`

			`result2 = agent.search_knowledge_base("NX materials")`

			`if result2:`
			`print(f"\n ✓ Found relevant session!")`
			`print(f" Session ID: {result2['session_id']}")`
			`print(f" Relevance score: {result2['relevance_score']:.2f}")`
			`print(f" Confidence: {result2['confidence']:.2f}")`
			`assert result2['session_id'] == result['session_id'], "Should find same session"`
			`else:`
			`print("\n ✗ No matching session found")`
			`assert False, "Should find the materials session"`

			`# Step 4: Search for non-existent knowledge`
			`print("\n" + "-"*70)`
			`print("[Step 4] Searching for 'thermal analysis' Knowledge")`
			`print("-"*70)`

			`result3 = agent.search_knowledge_base("thermal analysis buckling")`

			`if result3:`
			`print(f"\n Found session (unexpected): {result3['session_id']}")`
			`print(f" Relevance score: {result3['relevance_score']:.2f}")`
			`print(" (This might be OK if relevance is low)")`
			`else:`
			`print("\n ✓ No matching session found (as expected)")`
			`print(" Agent correctly identified this as new knowledge")`

			`# Step 5: Demonstrate how this prevents re-learning`
			`print("\n" + "-"*70)`
			`print("[Step 5] Demonstrating Knowledge Reuse")`
			`print("-"*70)`

			`# Simulate user asking for another material`
			`new_request = "Create aluminum alloy 6061-T6 material XML"`
			`print(f"\n User request: '{new_request}'")`

			`# First, identify knowledge gap`
			`gap2 = agent.identify_knowledge_gap(new_request)`
			`print(f"\n Knowledge gap detected:")`
			`print(f" Missing features: {gap2.missing_features}")`
			`print(f" Missing knowledge: {gap2.missing_knowledge}")`
			`print(f" Confidence: {gap2.confidence:.2f}")`

			`# Then search knowledge base`
			`existing = agent.search_knowledge_base("material XML")`

			`if existing and existing['confidence'] > 0.8:`
			`print(f"\n ✓ Found existing knowledge! No need to ask user again")`
			`print(f" Can reuse learned schema from: {existing['session_id']}")`
			`print(f" Confidence: {existing['confidence']:.2f}")`
			`print("\n Workflow:")`
			`print(" 1. Retrieve learned XML schema from session")`
			`print(" 2. Apply aluminum 6061-T6 properties")`
			`print(" 3. Generate XML using template")`
			`print(" 4. Return result instantly (no user interaction needed!)")`
			`else:`
			`print(f"\n ✗ No reliable existing knowledge, would ask user for example")`

			`# Summary`
			`print("\n" + "="*70)`
			`print("TEST SUMMARY")`
			`print("="*70)`

			`print("\n Knowledge Base Search Performance:")`
			`print(" ✓ Created research session and documented knowledge")`
			`print(" ✓ Successfully searched and found relevant sessions")`
			`print(" ✓ Correctly matched similar queries to same session")`
			`print(" ✓ Returned confidence scores for decision-making")`
			`print(" ✓ Demonstrated knowledge reuse (avoid re-learning)")`

			`print("\n Benefits:")`
			`print(" - Second material request doesn't ask user for example")`
			`print(" - Instant generation using learned template")`
			`print(" - Knowledge accumulates over time")`
			`print(" - Agent becomes smarter with each research session")`

			`print("\n" + "="*70)`
			`print("Knowledge Base Search: WORKING! ✓")`
			`print("="*70 + "\n")`

			`return True`


			`if __name__ == '__main__':`
			`try:`
			`success = test_knowledge_base_search()`
			`sys.exit(0 if success else 1)`
			`except Exception as e:`
			`print(f"\n[ERROR] {e}")`
			`import traceback`
			`traceback.print_exc()`
			`sys.exit(1)`