tests/test_context_builder.py

"""Tests for the context builder."""

import json

import atocore.config as config
from atocore.context.builder import build_context, get_last_context_pack
from atocore.context.project_state import init_project_state_schema, set_state
from atocore.ingestion.pipeline import ingest_file
from atocore.models.database import init_db


def test_build_context_returns_pack(tmp_data_dir, sample_markdown):
    """Test that context builder returns a valid pack."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    pack = build_context("What is AtoCore?")
    assert pack.total_chars > 0
    assert len(pack.chunks_used) > 0
    assert pack.budget_remaining >= 0
    assert "--- End Context ---" in pack.formatted_context


def test_context_respects_budget(tmp_data_dir, sample_markdown):
    """Test that context builder respects character budget."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    pack = build_context("What is AtoCore?", budget=500)
    assert pack.total_chars <= 500
    assert len(pack.formatted_context) <= 500


def test_context_with_project_hint(tmp_data_dir, sample_markdown):
    """Test that project hint boosts relevant chunks."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    pack = build_context("What is the architecture?", project_hint="atocore")
    assert len(pack.chunks_used) > 0
    assert pack.total_chars > 0


def test_context_builder_passes_project_hint_to_retrieval(monkeypatch):
    init_db()
    init_project_state_schema()

    calls = []

    def fake_retrieve(query, top_k=None, filter_tags=None, project_hint=None):
        calls.append((query, project_hint))
        return []

    monkeypatch.setattr("atocore.context.builder.retrieve", fake_retrieve)

    build_context("architecture", project_hint="p05-interferometer", budget=300)

    assert calls == [("architecture", "p05-interferometer")]


def test_last_context_pack_stored(tmp_data_dir, sample_markdown):
    """Test that last context pack is stored for debug."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    build_context("test prompt")
    last = get_last_context_pack()
    assert last is not None
    assert last.query == "test prompt"


def test_full_prompt_structure(tmp_data_dir, sample_markdown):
    """Test that the full prompt has correct structure."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    pack = build_context("What are memory types?")
    assert "knowledge base" in pack.full_prompt.lower()
    assert "What are memory types?" in pack.full_prompt


def test_project_state_included_in_context(tmp_data_dir, sample_markdown):
    """Test that trusted project state is injected into context."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    # Set some project state
    set_state("atocore", "status", "phase", "Phase 0.5 complete")
    set_state("atocore", "decision", "database", "SQLite for structured data")

    pack = build_context("What is AtoCore?", project_hint="atocore")

    # Project state should appear in context
    assert "--- Trusted Project State ---" in pack.formatted_context
    assert "Phase 0.5 complete" in pack.formatted_context
    assert "SQLite for structured data" in pack.formatted_context
    assert pack.project_state_chars > 0


def test_trusted_state_precedence_is_restated_in_retrieved_context(tmp_data_dir, sample_markdown):
    """When trusted state and retrieval coexist, the context should restate precedence explicitly."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    set_state("atocore", "status", "phase", "Phase 2")
    pack = build_context("What is AtoCore?", project_hint="atocore")

    assert "If retrieved context conflicts with Trusted Project State above" in pack.formatted_context


def test_project_state_takes_priority_budget(tmp_data_dir, sample_markdown):
    """Test that project state is included even with tight budget."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    set_state("atocore", "status", "phase", "Phase 1 in progress")

    # Small budget — project state should still be included
    pack = build_context("status?", project_hint="atocore", budget=500)
    assert "Phase 1 in progress" in pack.formatted_context


def test_project_state_respects_total_budget(tmp_data_dir, sample_markdown):
    """Trusted state should still fit within the total context budget."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    set_state("atocore", "status", "notes", "x" * 400)
    set_state("atocore", "decision", "details", "y" * 400)

    pack = build_context("status?", project_hint="atocore", budget=120)
    assert pack.total_chars <= 120
    assert pack.budget_remaining >= 0
    assert len(pack.formatted_context) <= 120


def test_project_hint_matches_state_case_insensitively(tmp_data_dir, sample_markdown):
    """Project state lookup should not depend on exact casing."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    set_state("AtoCore", "status", "phase", "Phase 2")
    pack = build_context("status?", project_hint="atocore")
    assert "Phase 2" in pack.formatted_context


def test_no_project_state_without_hint(tmp_data_dir, sample_markdown):
    """Test that project state is not included without project hint."""
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    set_state("atocore", "status", "phase", "Phase 1")

    pack = build_context("What is AtoCore?")
    assert pack.project_state_chars == 0
    assert "--- Trusted Project State ---" not in pack.formatted_context


def test_alias_hint_resolves_through_registry(tmp_data_dir, sample_markdown, monkeypatch):
    """An alias hint like 'p05' should find project state stored under 'p05-interferometer'.

    This is the regression test for the P1 finding from codex's review:
    /context/build was previously doing an exact-name lookup that
    silently dropped trusted project state when the caller passed an
    alias instead of the canonical project id.
    """
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    # Stand up a minimal project registry that knows the aliases.
    # The registry lives in a JSON file pointed to by
    # ATOCORE_PROJECT_REGISTRY_PATH; the dataclass-driven loader picks
    # it up on every call (no in-process cache to invalidate).
    registry_path = tmp_data_dir / "project-registry.json"
    registry_path.write_text(
        json.dumps(
            {
                "projects": [
                    {
                        "id": "p05-interferometer",
                        "aliases": ["p05", "interferometer"],
                        "description": "P05 alias-resolution regression test",
                        "ingest_roots": [
                            {"source": "vault", "subpath": "incoming/projects/p05"}
                        ],
                    }
                ]
            }
        ),
        encoding="utf-8",
    )
    monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
    config.settings = config.Settings()

    # Trusted state is stored under the canonical id (the way the
    # /project/state endpoint always writes it).
    set_state(
        "p05-interferometer",
        "status",
        "next_focus",
        "Wave 2 trusted-operational ingestion",
    )

    # The bug: pack with alias hint used to silently miss the state.
    pack_with_alias = build_context("status?", project_hint="p05", budget=2000)
    assert "Wave 2 trusted-operational ingestion" in pack_with_alias.formatted_context
    assert pack_with_alias.project_state_chars > 0

    # The canonical id should still work the same way.
    pack_with_canonical = build_context(
        "status?", project_hint="p05-interferometer", budget=2000
    )
    assert "Wave 2 trusted-operational ingestion" in pack_with_canonical.formatted_context

    # A second alias should also resolve.
    pack_with_other_alias = build_context(
        "status?", project_hint="interferometer", budget=2000
    )
    assert "Wave 2 trusted-operational ingestion" in pack_with_other_alias.formatted_context


def test_unknown_hint_falls_back_to_raw_lookup(tmp_data_dir, sample_markdown, monkeypatch):
    """A hint that isn't in the registry should still try the raw name.

    This preserves backwards compatibility with hand-curated
    project_state entries that predate the project registry.
    """
    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    # Empty registry — the hint won't resolve through it.
    registry_path = tmp_data_dir / "project-registry.json"
    registry_path.write_text('{"projects": []}', encoding="utf-8")
    monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
    config.settings = config.Settings()

    set_state("orphan-project", "status", "phase", "Solo run")

    pack = build_context("status?", project_hint="orphan-project", budget=2000)
    assert "Solo run" in pack.formatted_context


def test_project_memories_included_in_pack(tmp_data_dir, sample_markdown):
    """Active project-scoped memories for the target project should
    land in a dedicated '--- Project Memories ---' band so the
    Phase 9 reflection loop has a retrieval outlet."""
    from atocore.memory.service import create_memory

    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    mem = create_memory(
        memory_type="project",
        content="the mirror architecture is Option B conical back for p04-gigabit",
        project="p04-gigabit",
        confidence=0.9,
    )
    # A sibling memory for a different project must NOT leak into the pack.
    create_memory(
        memory_type="project",
        content="polisher suite splits into sim, post, control, contracts",
        project="p06-polisher",
        confidence=0.9,
    )

    pack = build_context(
        "remind me about the mirror architecture",
        project_hint="p04-gigabit",
        budget=3000,
    )
    assert "--- Project Memories ---" in pack.formatted_context
    assert "Option B conical back" in pack.formatted_context
    assert "polisher suite splits" not in pack.formatted_context
    assert pack.project_memory_chars > 0
    assert mem.project == "p04-gigabit"


def test_project_memories_absent_without_project_hint(tmp_data_dir, sample_markdown):
    """Without a project hint, project memories stay out of the pack —
    cross-project bleed would rot the signal."""
    from atocore.memory.service import create_memory

    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    create_memory(
        memory_type="project",
        content="scoped project knowledge that should not leak globally",
        project="p04-gigabit",
        confidence=0.9,
    )

    pack = build_context("tell me something", budget=3000)
    assert "--- Project Memories ---" not in pack.formatted_context
    assert pack.project_memory_chars == 0


def test_project_memories_query_relevance_ordering(tmp_data_dir, sample_markdown):
    """When the budget only fits one memory, query-relevance ordering
    should pick the one the query is actually about — even if another
    memory has higher confidence.

    Regression for the 2026-04-11 p05-vendor-signal harness failure:
    memory selection was fixed-order by confidence, so a lower-ranked
    vendor memory got starved out of the budget when a query was
    specifically about vendors.
    """
    from atocore.memory.service import create_memory

    init_db()
    init_project_state_schema()
    ingest_file(sample_markdown)

    create_memory(
        memory_type="project",
        content="the folded-beam interferometer uses a CGH stage and fold mirror",
        project="p05-interferometer",
        confidence=0.97,
    )
    create_memory(
        memory_type="knowledge",
        content="vendor signal: Zygo Verifire SV is the strongest value path for the interferometer",
        project="p05-interferometer",
        confidence=0.85,
    )

    pack = build_context(
        "what is the current vendor signal for the interferometer",
        project_hint="p05-interferometer",
        budget=1200,  # tight enough that only one project memory fits
    )
    assert "Zygo Verifire SV" in pack.formatted_context
    assert pack.project_memory_chars > 0