Atomizer/tests/test_knowledge_base_search.py

"""
Test Knowledge Base Search and Retrieval

This test demonstrates the Research Agent's ability to:
1. Search through past research sessions
2. Find relevant knowledge based on keywords
3. Retrieve session information with confidence scores
4. Avoid re-learning what it already knows

Author: Atomizer Development Team
Version: 0.1.0 (Phase 2 Week 2)
Last Updated: 2025-01-16
"""

import sys
from pathlib import Path

# Set UTF-8 encoding for Windows console
if sys.platform == 'win32':
    import codecs
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, errors='replace')
    sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, errors='replace')

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

from optimization_engine.research_agent import (
    ResearchAgent,
    ResearchFindings,
    KnowledgeGap,
    CONFIDENCE_LEVELS
)


def test_knowledge_base_search():
    """Test that the agent can find and retrieve past research sessions."""
    print("\n" + "="*70)
    print("KNOWLEDGE BASE SEARCH TEST")
    print("="*70)

    agent = ResearchAgent()

    # Step 1: Create a research session (if not exists)
    print("\n" + "-"*70)
    print("[Step 1] Creating Test Research Session")
    print("-"*70)

    gap = KnowledgeGap(
        missing_features=['material_xml_generator'],
        missing_knowledge=['NX material XML format'],
        user_request="Create NX material XML for titanium Ti-6Al-4V",
        confidence=0.2
    )

    # Simulate findings from user example
    example_xml = """<?xml version="1.0" encoding="UTF-8"?>
<PhysicalMaterial name="Steel_AISI_1020" version="1.0">
    <Density units="kg/m3">7850</Density>
    <YoungModulus units="GPa">200</YoungModulus>
    <PoissonRatio>0.29</PoissonRatio>
</PhysicalMaterial>"""

    findings = ResearchFindings(
        sources={'user_example': 'steel_material.xml'},
        raw_data={'user_example': example_xml},
        confidence_scores={'user_example': CONFIDENCE_LEVELS['user_validated']}
    )

    knowledge = agent.synthesize_knowledge(findings)

    # Document session
    session_path = agent.document_session(
        topic='nx_materials_search_test',
        knowledge_gap=gap,
        findings=findings,
        knowledge=knowledge,
        generated_files=[]
    )

    print(f"\n  Session created: {session_path.name}")
    print(f"  Confidence: {knowledge.confidence:.2f}")

    # Step 2: Search for material-related knowledge
    print("\n" + "-"*70)
    print("[Step 2] Searching for 'material XML' Knowledge")
    print("-"*70)

    result = agent.search_knowledge_base("material XML")

    if result:
        print(f"\n  ✓ Found relevant session!")
        print(f"    Session ID: {result['session_id']}")
        print(f"    Relevance score: {result['relevance_score']:.2f}")
        print(f"    Confidence: {result['confidence']:.2f}")
        print(f"    Has schema: {result.get('has_schema', False)}")
        assert result['relevance_score'] > 0.5, "Should have good relevance score"
        assert result['confidence'] > 0.7, "Should have high confidence"
    else:
        print("\n  ✗ No matching session found")
        assert False, "Should find the material XML session"

    # Step 3: Search for similar query
    print("\n" + "-"*70)
    print("[Step 3] Searching for 'NX materials' Knowledge")
    print("-"*70)

    result2 = agent.search_knowledge_base("NX materials")

    if result2:
        print(f"\n  ✓ Found relevant session!")
        print(f"    Session ID: {result2['session_id']}")
        print(f"    Relevance score: {result2['relevance_score']:.2f}")
        print(f"    Confidence: {result2['confidence']:.2f}")
        assert result2['session_id'] == result['session_id'], "Should find same session"
    else:
        print("\n  ✗ No matching session found")
        assert False, "Should find the materials session"

    # Step 4: Search for non-existent knowledge
    print("\n" + "-"*70)
    print("[Step 4] Searching for 'thermal analysis' Knowledge")
    print("-"*70)

    result3 = agent.search_knowledge_base("thermal analysis buckling")

    if result3:
        print(f"\n  Found session (unexpected): {result3['session_id']}")
        print(f"    Relevance score: {result3['relevance_score']:.2f}")
        print("  (This might be OK if relevance is low)")
    else:
        print("\n  ✓ No matching session found (as expected)")
        print("    Agent correctly identified this as new knowledge")

    # Step 5: Demonstrate how this prevents re-learning
    print("\n" + "-"*70)
    print("[Step 5] Demonstrating Knowledge Reuse")
    print("-"*70)

    # Simulate user asking for another material
    new_request = "Create aluminum alloy 6061-T6 material XML"
    print(f"\n  User request: '{new_request}'")

    # First, identify knowledge gap
    gap2 = agent.identify_knowledge_gap(new_request)
    print(f"\n  Knowledge gap detected:")
    print(f"    Missing features: {gap2.missing_features}")
    print(f"    Missing knowledge: {gap2.missing_knowledge}")
    print(f"    Confidence: {gap2.confidence:.2f}")

    # Then search knowledge base
    existing = agent.search_knowledge_base("material XML")

    if existing and existing['confidence'] > 0.8:
        print(f"\n  ✓ Found existing knowledge! No need to ask user again")
        print(f"    Can reuse learned schema from: {existing['session_id']}")
        print(f"    Confidence: {existing['confidence']:.2f}")
        print("\n  Workflow:")
        print("    1. Retrieve learned XML schema from session")
        print("    2. Apply aluminum 6061-T6 properties")
        print("    3. Generate XML using template")
        print("    4. Return result instantly (no user interaction needed!)")
    else:
        print(f"\n  ✗ No reliable existing knowledge, would ask user for example")

    # Summary
    print("\n" + "="*70)
    print("TEST SUMMARY")
    print("="*70)

    print("\n  Knowledge Base Search Performance:")
    print("    ✓ Created research session and documented knowledge")
    print("    ✓ Successfully searched and found relevant sessions")
    print("    ✓ Correctly matched similar queries to same session")
    print("    ✓ Returned confidence scores for decision-making")
    print("    ✓ Demonstrated knowledge reuse (avoid re-learning)")

    print("\n  Benefits:")
    print("    - Second material request doesn't ask user for example")
    print("    - Instant generation using learned template")
    print("    - Knowledge accumulates over time")
    print("    - Agent becomes smarter with each research session")

    print("\n" + "="*70)
    print("Knowledge Base Search: WORKING! ✓")
    print("="*70 + "\n")

    return True


if __name__ == '__main__':
    try:
        success = test_knowledge_base_search()
        sys.exit(0 if success else 1)
    except Exception as e:
        print(f"\n[ERROR] {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)