tests/test_research_agent.py

"""
Test Research Agent Functionality

This test demonstrates the Research Agent's ability to:
1. Detect knowledge gaps by searching the feature registry
2. Learn patterns from example files (XML, Python, etc.)
3. Synthesize knowledge from multiple sources
4. Document research sessions

Example workflow:
- User requests: "Create NX material XML for titanium"
- Agent detects: No 'material_generator' feature exists
- Agent plans: Ask user for example → Learn schema → Generate feature
- Agent learns: From user-provided steel_material.xml
- Agent generates: New material XML following learned schema

Author: Atomizer Development Team
Version: 0.1.0 (Phase 2)
Last Updated: 2025-01-16
"""

import sys
import os
from pathlib import Path

# Set UTF-8 encoding for Windows console
if sys.platform == 'win32':
    import codecs
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, errors='replace')
    sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, errors='replace')

# Add project root to path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

from optimization_engine.future.research_agent import (
    ResearchAgent,
    ResearchFindings,
    CONFIDENCE_LEVELS
)


def test_knowledge_gap_detection():
    """Test that the agent can detect when it lacks knowledge."""
    print("\n" + "="*60)
    print("TEST 1: Knowledge Gap Detection")
    print("="*60)

    agent = ResearchAgent()

    # Test 1: Known feature (minimize stress)
    print("\n[Test 1a] Request: 'Minimize stress in my bracket'")
    gap = agent.identify_knowledge_gap("Minimize stress in my bracket")
    print(f"  Missing features: {gap.missing_features}")
    print(f"  Missing knowledge: {gap.missing_knowledge}")
    print(f"  Confidence: {gap.confidence:.2f}")
    print(f"  Research needed: {gap.research_needed}")

    assert gap.confidence > 0.5, "Should have high confidence for known features"
    print("  [PASS] Correctly identified existing feature")

    # Test 2: Unknown feature (material XML)
    print("\n[Test 1b] Request: 'Create NX material XML for titanium'")
    gap = agent.identify_knowledge_gap("Create NX material XML for titanium")
    print(f"  Missing features: {gap.missing_features}")
    print(f"  Missing knowledge: {gap.missing_knowledge}")
    print(f"  Confidence: {gap.confidence:.2f}")
    print(f"  Research needed: {gap.research_needed}")

    assert gap.research_needed, "Should need research for unknown domain"
    assert 'material' in gap.missing_knowledge, "Should identify material domain gap"
    print("  [PASS] Correctly detected knowledge gap")


def test_xml_schema_learning():
    """Test that the agent can learn XML schemas from examples."""
    print("\n" + "="*60)
    print("TEST 2: XML Schema Learning")
    print("="*60)

    agent = ResearchAgent()

    # Create example NX material XML
    example_xml = """<?xml version="1.0" encoding="UTF-8"?>
<PhysicalMaterial name="Steel_AISI_1020" version="1.0">
    <Density units="kg/m3">7850</Density>
    <YoungModulus units="GPa">200</YoungModulus>
    <PoissonRatio>0.29</PoissonRatio>
    <ThermalExpansion units="1/K">1.17e-05</ThermalExpansion>
    <YieldStrength units="MPa">295</YieldStrength>
    <UltimateTensileStrength units="MPa">420</UltimateTensileStrength>
</PhysicalMaterial>"""

    print("\n[Test 2a] Learning from steel material XML...")
    print("  Example XML:")
    print("  " + "\n  ".join(example_xml.split('\n')[:3]))
    print("  ...")

    # Create research findings with XML data
    findings = ResearchFindings(
        sources={'user_example': 'steel_material.xml'},
        raw_data={'user_example': example_xml},
        confidence_scores={'user_example': CONFIDENCE_LEVELS['user_validated']}
    )

    # Synthesize knowledge from findings
    knowledge = agent.synthesize_knowledge(findings)

    print(f"\n  Synthesis notes:")
    for line in knowledge.synthesis_notes.split('\n'):
        print(f"    {line}")

    # Verify schema was extracted
    assert knowledge.schema is not None, "Should extract schema from XML"
    assert 'xml_structure' in knowledge.schema, "Should have XML structure"
    assert knowledge.schema['xml_structure']['root_element'] == 'PhysicalMaterial', "Should identify root element"

    print(f"\n  Root element: {knowledge.schema['xml_structure']['root_element']}")
    print(f"  Required fields: {knowledge.schema['xml_structure']['required_fields']}")
    print(f"  Confidence: {knowledge.confidence:.2f}")

    assert knowledge.confidence > 0.8, "User-validated example should have high confidence"
    print("\n  ✓ PASSED: Successfully learned XML schema")


def test_python_code_pattern_extraction():
    """Test that the agent can extract reusable patterns from Python code."""
    print("\n" + "="*60)
    print("TEST 3: Python Code Pattern Extraction")
    print("="*60)

    agent = ResearchAgent()

    # Example Python code
    example_code = """
import numpy as np
from pathlib import Path

class MaterialGenerator:
    def __init__(self, template_path):
        self.template_path = template_path

    def generate_material_xml(self, name, density, youngs_modulus):
        # Generate XML from template
        xml_content = f'''<?xml version="1.0"?>
<PhysicalMaterial name="{name}">
    <Density>{density}</Density>
    <YoungModulus>{youngs_modulus}</YoungModulus>
</PhysicalMaterial>'''
        return xml_content
"""

    print("\n[Test 3a] Extracting patterns from Python code...")
    print("  Code sample:")
    print("  " + "\n  ".join(example_code.split('\n')[:5]))
    print("  ...")

    findings = ResearchFindings(
        sources={'code_example': 'material_generator.py'},
        raw_data={'code_example': example_code},
        confidence_scores={'code_example': 0.8}
    )

    knowledge = agent.synthesize_knowledge(findings)

    print(f"\n  Patterns extracted: {len(knowledge.patterns)}")
    for pattern in knowledge.patterns:
        if pattern['type'] == 'class':
            print(f"    - Class: {pattern['name']}")
        elif pattern['type'] == 'function':
            print(f"    - Function: {pattern['name']}({pattern['parameters']})")
        elif pattern['type'] == 'import':
            module = pattern['module'] or ''
            print(f"    - Import: {module} {pattern['items']}")

    # Verify patterns were extracted
    class_patterns = [p for p in knowledge.patterns if p['type'] == 'class']
    func_patterns = [p for p in knowledge.patterns if p['type'] == 'function']
    import_patterns = [p for p in knowledge.patterns if p['type'] == 'import']

    assert len(class_patterns) > 0, "Should extract class definitions"
    assert len(func_patterns) > 0, "Should extract function definitions"
    assert len(import_patterns) > 0, "Should extract import statements"

    print("\n  ✓ PASSED: Successfully extracted code patterns")


def test_research_session_documentation():
    """Test that research sessions are properly documented."""
    print("\n" + "="*60)
    print("TEST 4: Research Session Documentation")
    print("="*60)

    agent = ResearchAgent()

    # Simulate a complete research session
    from optimization_engine.future.research_agent import KnowledgeGap, SynthesizedKnowledge

    gap = KnowledgeGap(
        missing_features=['material_xml_generator'],
        missing_knowledge=['NX material XML format'],
        user_request="Create NX material XML for titanium Ti-6Al-4V",
        confidence=0.2
    )

    findings = ResearchFindings(
        sources={'user_example': 'steel_material.xml'},
        raw_data={'user_example': '<?xml version="1.0"?><PhysicalMaterial></PhysicalMaterial>'},
        confidence_scores={'user_example': 0.95}
    )

    knowledge = agent.synthesize_knowledge(findings)

    generated_files = [
        'optimization_engine/custom_functions/nx_material_generator.py',
        'knowledge_base/templates/xml_generation_template.py'
    ]

    print("\n[Test 4a] Documenting research session...")
    session_path = agent.document_session(
        topic='nx_materials',
        knowledge_gap=gap,
        findings=findings,
        knowledge=knowledge,
        generated_files=generated_files
    )

    print(f"\n  Session path: {session_path}")
    print(f"  Session exists: {session_path.exists()}")

    # Verify session files were created
    assert session_path.exists(), "Session folder should be created"
    assert (session_path / 'user_question.txt').exists(), "Should save user question"
    assert (session_path / 'sources_consulted.txt').exists(), "Should save sources"
    assert (session_path / 'findings.md').exists(), "Should save findings"
    assert (session_path / 'decision_rationale.md').exists(), "Should save rationale"

    # Read and display user question
    user_question = (session_path / 'user_question.txt').read_text()
    print(f"\n  User question saved: {user_question}")

    # Read and display findings
    findings_content = (session_path / 'findings.md').read_text()
    print(f"\n  Findings preview:")
    for line in findings_content.split('\n')[:10]:
        print(f"    {line}")

    print("\n  ✓ PASSED: Successfully documented research session")


def test_multi_source_synthesis():
    """Test combining knowledge from multiple sources."""
    print("\n" + "="*60)
    print("TEST 5: Multi-Source Knowledge Synthesis")
    print("="*60)

    agent = ResearchAgent()

    # Simulate findings from multiple sources
    xml_example = """<?xml version="1.0"?>
<Material>
    <Density>8000</Density>
    <Modulus>110</Modulus>
</Material>"""

    code_example = """
def create_material(density, modulus):
    return {'density': density, 'modulus': modulus}
"""

    findings = ResearchFindings(
        sources={
            'user_example': 'material.xml',
            'web_docs': 'documentation.html',
            'code_sample': 'generator.py'
        },
        raw_data={
            'user_example': xml_example,
            'web_docs': {'schema': 'Material schema from official docs'},
            'code_sample': code_example
        },
        confidence_scores={
            'user_example': CONFIDENCE_LEVELS['user_validated'],  # 0.95
            'web_docs': CONFIDENCE_LEVELS['web_generic'],          # 0.50
            'code_sample': CONFIDENCE_LEVELS['nxopen_tse']         # 0.70
        }
    )

    print("\n[Test 5a] Synthesizing from 3 sources...")
    print(f"  Sources: {list(findings.sources.keys())}")
    print(f"  Confidence scores:")
    for source, score in findings.confidence_scores.items():
        print(f"    - {source}: {score:.2f}")

    knowledge = agent.synthesize_knowledge(findings)

    print(f"\n  Overall confidence: {knowledge.confidence:.2f}")
    print(f"  Total patterns: {len(knowledge.patterns)}")
    print(f"  Schema elements: {len(knowledge.schema) if knowledge.schema else 0}")

    # Weighted confidence should be dominated by high-confidence user example
    assert knowledge.confidence > 0.7, "Should have high confidence with user-validated source"
    assert knowledge.schema is not None, "Should extract schema from XML"
    assert len(knowledge.patterns) > 0, "Should extract patterns from code"

    print("\n  ✓ PASSED: Successfully synthesized multi-source knowledge")


def run_all_tests():
    """Run all Research Agent tests."""
    print("\n" + "="*60)
    print("=" + " "*58 + "=")
    print("=" + "  RESEARCH AGENT TEST SUITE - Phase 2".center(58) + "=")
    print("=" + " "*58 + "=")
    print("="*60)

    try:
        test_knowledge_gap_detection()
        test_xml_schema_learning()
        test_python_code_pattern_extraction()
        test_research_session_documentation()
        test_multi_source_synthesis()

        print("\n" + "="*60)
        print("ALL TESTS PASSED! ✓")
        print("="*60)
        print("\nResearch Agent is functional and ready for use.")
        print("\nNext steps:")
        print("  1. Integrate with LLM interface for interactive research")
        print("  2. Add web search capability (Phase 2 Week 2)")
        print("  3. Implement feature generation from learned templates")
        print("  4. Build knowledge retrieval system")
        print()

        return True

    except AssertionError as e:
        print(f"\n✗ TEST FAILED: {e}")
        import traceback
        traceback.print_exc()
        return False

    except Exception as e:
        print(f"\n✗ UNEXPECTED ERROR: {e}")
        import traceback
        traceback.print_exc()
        return False


if __name__ == '__main__':
    success = run_all_tests()
    sys.exit(0 if success else 1)