"""Tests for the context builder.""" import json import atocore.config as config from atocore.context.builder import build_context, get_last_context_pack from atocore.context.project_state import init_project_state_schema, set_state from atocore.ingestion.pipeline import ingest_file from atocore.models.database import init_db def test_build_context_returns_pack(tmp_data_dir, sample_markdown): """Test that context builder returns a valid pack.""" init_db() init_project_state_schema() ingest_file(sample_markdown) pack = build_context("What is AtoCore?") assert pack.total_chars > 0 assert len(pack.chunks_used) > 0 assert pack.budget_remaining >= 0 assert "--- End Context ---" in pack.formatted_context def test_context_respects_budget(tmp_data_dir, sample_markdown): """Test that context builder respects character budget.""" init_db() init_project_state_schema() ingest_file(sample_markdown) pack = build_context("What is AtoCore?", budget=500) assert pack.total_chars <= 500 assert len(pack.formatted_context) <= 500 def test_context_with_project_hint(tmp_data_dir, sample_markdown): """Test that project hint boosts relevant chunks.""" init_db() init_project_state_schema() ingest_file(sample_markdown) pack = build_context("What is the architecture?", project_hint="atocore") assert len(pack.chunks_used) > 0 assert pack.total_chars > 0 def test_context_builder_passes_project_hint_to_retrieval(monkeypatch): init_db() init_project_state_schema() calls = [] def fake_retrieve(query, top_k=None, filter_tags=None, project_hint=None): calls.append((query, project_hint)) return [] monkeypatch.setattr("atocore.context.builder.retrieve", fake_retrieve) build_context("architecture", project_hint="p05-interferometer", budget=300) assert calls == [("architecture", "p05-interferometer")] def test_last_context_pack_stored(tmp_data_dir, sample_markdown): """Test that last context pack is stored for debug.""" init_db() init_project_state_schema() ingest_file(sample_markdown) build_context("test prompt") last = get_last_context_pack() assert last is not None assert last.query == "test prompt" def test_full_prompt_structure(tmp_data_dir, sample_markdown): """Test that the full prompt has correct structure.""" init_db() init_project_state_schema() ingest_file(sample_markdown) pack = build_context("What are memory types?") assert "knowledge base" in pack.full_prompt.lower() assert "What are memory types?" in pack.full_prompt def test_project_state_included_in_context(tmp_data_dir, sample_markdown): """Test that trusted project state is injected into context.""" init_db() init_project_state_schema() ingest_file(sample_markdown) # Set some project state set_state("atocore", "status", "phase", "Phase 0.5 complete") set_state("atocore", "decision", "database", "SQLite for structured data") pack = build_context("What is AtoCore?", project_hint="atocore") # Project state should appear in context assert "--- Trusted Project State ---" in pack.formatted_context assert "Phase 0.5 complete" in pack.formatted_context assert "SQLite for structured data" in pack.formatted_context assert pack.project_state_chars > 0 def test_trusted_state_precedence_is_restated_in_retrieved_context(tmp_data_dir, sample_markdown): """When trusted state and retrieval coexist, the context should restate precedence explicitly.""" init_db() init_project_state_schema() ingest_file(sample_markdown) set_state("atocore", "status", "phase", "Phase 2") pack = build_context("What is AtoCore?", project_hint="atocore") assert "If retrieved context conflicts with Trusted Project State above" in pack.formatted_context def test_project_state_takes_priority_budget(tmp_data_dir, sample_markdown): """Test that project state is included even with tight budget.""" init_db() init_project_state_schema() ingest_file(sample_markdown) set_state("atocore", "status", "phase", "Phase 1 in progress") # Small budget — project state should still be included pack = build_context("status?", project_hint="atocore", budget=500) assert "Phase 1 in progress" in pack.formatted_context def test_project_state_respects_total_budget(tmp_data_dir, sample_markdown): """Trusted state should still fit within the total context budget.""" init_db() init_project_state_schema() ingest_file(sample_markdown) set_state("atocore", "status", "notes", "x" * 400) set_state("atocore", "decision", "details", "y" * 400) pack = build_context("status?", project_hint="atocore", budget=120) assert pack.total_chars <= 120 assert pack.budget_remaining >= 0 assert len(pack.formatted_context) <= 120 def test_project_hint_matches_state_case_insensitively(tmp_data_dir, sample_markdown): """Project state lookup should not depend on exact casing.""" init_db() init_project_state_schema() ingest_file(sample_markdown) set_state("AtoCore", "status", "phase", "Phase 2") pack = build_context("status?", project_hint="atocore") assert "Phase 2" in pack.formatted_context def test_no_project_state_without_hint(tmp_data_dir, sample_markdown): """Test that project state is not included without project hint.""" init_db() init_project_state_schema() ingest_file(sample_markdown) set_state("atocore", "status", "phase", "Phase 1") pack = build_context("What is AtoCore?") assert pack.project_state_chars == 0 assert "--- Trusted Project State ---" not in pack.formatted_context def test_alias_hint_resolves_through_registry(tmp_data_dir, sample_markdown, monkeypatch): """An alias hint like 'p05' should find project state stored under 'p05-interferometer'. This is the regression test for the P1 finding from codex's review: /context/build was previously doing an exact-name lookup that silently dropped trusted project state when the caller passed an alias instead of the canonical project id. """ init_db() init_project_state_schema() ingest_file(sample_markdown) # Stand up a minimal project registry that knows the aliases. # The registry lives in a JSON file pointed to by # ATOCORE_PROJECT_REGISTRY_PATH; the dataclass-driven loader picks # it up on every call (no in-process cache to invalidate). registry_path = tmp_data_dir / "project-registry.json" registry_path.write_text( json.dumps( { "projects": [ { "id": "p05-interferometer", "aliases": ["p05", "interferometer"], "description": "P05 alias-resolution regression test", "ingest_roots": [ {"source": "vault", "subpath": "incoming/projects/p05"} ], } ] } ), encoding="utf-8", ) monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) config.settings = config.Settings() # Trusted state is stored under the canonical id (the way the # /project/state endpoint always writes it). set_state( "p05-interferometer", "status", "next_focus", "Wave 2 trusted-operational ingestion", ) # The bug: pack with alias hint used to silently miss the state. pack_with_alias = build_context("status?", project_hint="p05", budget=2000) assert "Wave 2 trusted-operational ingestion" in pack_with_alias.formatted_context assert pack_with_alias.project_state_chars > 0 # The canonical id should still work the same way. pack_with_canonical = build_context( "status?", project_hint="p05-interferometer", budget=2000 ) assert "Wave 2 trusted-operational ingestion" in pack_with_canonical.formatted_context # A second alias should also resolve. pack_with_other_alias = build_context( "status?", project_hint="interferometer", budget=2000 ) assert "Wave 2 trusted-operational ingestion" in pack_with_other_alias.formatted_context def test_unknown_hint_falls_back_to_raw_lookup(tmp_data_dir, sample_markdown, monkeypatch): """A hint that isn't in the registry should still try the raw name. This preserves backwards compatibility with hand-curated project_state entries that predate the project registry. """ init_db() init_project_state_schema() ingest_file(sample_markdown) # Empty registry — the hint won't resolve through it. registry_path = tmp_data_dir / "project-registry.json" registry_path.write_text('{"projects": []}', encoding="utf-8") monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) config.settings = config.Settings() set_state("orphan-project", "status", "phase", "Solo run") pack = build_context("status?", project_hint="orphan-project", budget=2000) assert "Solo run" in pack.formatted_context