feat: Phase 1 ingestion hardening + Phase 5 Trusted Project State

Phase 1 - Ingestion hardening: - Encoding fallback (UTF-8/UTF-8-sig/Latin-1/CP1252) - Delete detection: purge DB/vector entries for removed files - Ingestion stats endpoint (GET /stats) Phase 5 - Trusted Project State: - project_state table with categories (status, decision, requirement, contact, milestone, fact, config) - CRUD API: POST/GET/DELETE /project/state - Upsert semantics, invalidation (supersede) support - Context builder integrates project state at highest trust precedence - Project state gets 20% budget allocation, appears first in context - Trust precedence: Project State > Retrieved Chunks (per Master Plan) 33/33 tests passing. Validated end-to-end with GigaBIT M1 project data. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 09:41:59 -04:00
parent 6081462058
commit 531c560db7
7 changed files with 671 additions and 35 deletions
--- a/tests/test_context_builder.py
+++ b/tests/test_context_builder.py
@@ -1,6 +1,7 @@
 """Tests for the context builder."""

 from atocore.context.builder import build_context, get_last_context_pack
+from atocore.context.project_state import init_project_state_schema, set_state
 from atocore.ingestion.pipeline import ingest_file
 from atocore.models.database import init_db

@@ -8,19 +9,20 @@ from atocore.models.database import init_db
 def test_build_context_returns_pack(tmp_data_dir, sample_markdown):
    """Test that context builder returns a valid pack."""
    init_db()
+    init_project_state_schema()
    ingest_file(sample_markdown)

    pack = build_context("What is AtoCore?")
    assert pack.total_chars > 0
    assert len(pack.chunks_used) > 0
    assert pack.budget_remaining >= 0
-    assert "--- AtoCore Context ---" in pack.formatted_context
    assert "--- End Context ---" in pack.formatted_context


 def test_context_respects_budget(tmp_data_dir, sample_markdown):
    """Test that context builder respects character budget."""
    init_db()
+    init_project_state_schema()
    ingest_file(sample_markdown)

    pack = build_context("What is AtoCore?", budget=500)
@@ -30,17 +32,18 @@ def test_context_respects_budget(tmp_data_dir, sample_markdown):
 def test_context_with_project_hint(tmp_data_dir, sample_markdown):
    """Test that project hint boosts relevant chunks."""
    init_db()
+    init_project_state_schema()
    ingest_file(sample_markdown)

    pack = build_context("What is the architecture?", project_hint="atocore")
    assert len(pack.chunks_used) > 0
-    # With project hint, we should still get results
    assert pack.total_chars > 0


 def test_last_context_pack_stored(tmp_data_dir, sample_markdown):
    """Test that last context pack is stored for debug."""
    init_db()
+    init_project_state_schema()
    ingest_file(sample_markdown)

    build_context("test prompt")
@@ -52,9 +55,54 @@ def test_last_context_pack_stored(tmp_data_dir, sample_markdown):
 def test_full_prompt_structure(tmp_data_dir, sample_markdown):
    """Test that the full prompt has correct structure."""
    init_db()
+    init_project_state_schema()
    ingest_file(sample_markdown)

    pack = build_context("What are memory types?")
    assert "knowledge base" in pack.full_prompt.lower()
-    assert "--- AtoCore Context ---" in pack.full_prompt
    assert "What are memory types?" in pack.full_prompt
+
+
+def test_project_state_included_in_context(tmp_data_dir, sample_markdown):
+    """Test that trusted project state is injected into context."""
+    init_db()
+    init_project_state_schema()
+    ingest_file(sample_markdown)
+
+    # Set some project state
+    set_state("atocore", "status", "phase", "Phase 0.5 complete")
+    set_state("atocore", "decision", "database", "SQLite for structured data")
+
+    pack = build_context("What is AtoCore?", project_hint="atocore")
+
+    # Project state should appear in context
+    assert "--- Trusted Project State ---" in pack.formatted_context
+    assert "Phase 0.5 complete" in pack.formatted_context
+    assert "SQLite for structured data" in pack.formatted_context
+    assert pack.project_state_chars > 0
+
+
+def test_project_state_takes_priority_budget(tmp_data_dir, sample_markdown):
+    """Test that project state is included even with tight budget."""
+    init_db()
+    init_project_state_schema()
+    ingest_file(sample_markdown)
+
+    set_state("atocore", "status", "phase", "Phase 1 in progress")
+
+    # Small budget — project state should still be included
+    pack = build_context("status?", project_hint="atocore", budget=500)
+    assert "Phase 1 in progress" in pack.formatted_context
+
+
+def test_no_project_state_without_hint(tmp_data_dir, sample_markdown):
+    """Test that project state is not included without project hint."""
+    init_db()
+    init_project_state_schema()
+    ingest_file(sample_markdown)
+
+    set_state("atocore", "status", "phase", "Phase 1")
+
+    pack = build_context("What is AtoCore?")
+    assert pack.project_state_chars == 0
+    assert "--- Trusted Project State ---" not in pack.formatted_context
--- a/tests/test_project_state.py
+++ b/tests/test_project_state.py
@@ -0,0 +1,127 @@
+"""Tests for Trusted Project State."""
+
+import pytest
+
+from atocore.context.project_state import (
+    CATEGORIES,
+    ensure_project,
+    format_project_state,
+    get_state,
+    init_project_state_schema,
+    invalidate_state,
+    set_state,
+)
+from atocore.models.database import init_db
+
+
+@pytest.fixture(autouse=True)
+def setup_db(tmp_data_dir):
+    """Initialize DB and project state schema for every test."""
+    init_db()
+    init_project_state_schema()
+
+
+def test_ensure_project_creates():
+    """Test creating a new project."""
+    pid = ensure_project("test-project", "A test project")
+    assert pid
+    # Second call returns same ID
+    pid2 = ensure_project("test-project")
+    assert pid == pid2
+
+
+def test_set_state_creates_entry():
+    """Test creating a project state entry."""
+    entry = set_state("myproject", "status", "phase", "Phase 0.5 — PoC complete")
+    assert entry.category == "status"
+    assert entry.key == "phase"
+    assert entry.value == "Phase 0.5 — PoC complete"
+    assert entry.status == "active"
+
+
+def test_set_state_upserts():
+    """Test that setting same key updates the value."""
+    set_state("myproject", "status", "phase", "Phase 0")
+    entry = set_state("myproject", "status", "phase", "Phase 1")
+    assert entry.value == "Phase 1"
+
+    # Only one entry should exist
+    entries = get_state("myproject", category="status")
+    assert len(entries) == 1
+    assert entries[0].value == "Phase 1"
+
+
+def test_set_state_invalid_category():
+    """Test that invalid category raises ValueError."""
+    with pytest.raises(ValueError, match="Invalid category"):
+        set_state("myproject", "invalid_category", "key", "value")
+
+
+def test_get_state_all():
+    """Test getting all state entries for a project."""
+    set_state("proj", "status", "phase", "Phase 1")
+    set_state("proj", "decision", "database", "SQLite for v1")
+    set_state("proj", "requirement", "latency", "<2 seconds")
+
+    entries = get_state("proj")
+    assert len(entries) == 3
+    categories = {e.category for e in entries}
+    assert categories == {"status", "decision", "requirement"}
+
+
+def test_get_state_by_category():
+    """Test filtering by category."""
+    set_state("proj", "status", "phase", "Phase 1")
+    set_state("proj", "decision", "database", "SQLite")
+    set_state("proj", "decision", "vectordb", "ChromaDB")
+
+    entries = get_state("proj", category="decision")
+    assert len(entries) == 2
+    assert all(e.category == "decision" for e in entries)
+
+
+def test_get_state_nonexistent_project():
+    """Test getting state for a project that doesn't exist."""
+    entries = get_state("nonexistent")
+    assert entries == []
+
+
+def test_invalidate_state():
+    """Test marking a state entry as superseded."""
+    set_state("invalidate-test", "decision", "approach", "monolith")
+    success = invalidate_state("invalidate-test", "decision", "approach")
+    assert success
+
+    # Active entries should be empty
+    entries = get_state("invalidate-test", active_only=True)
+    assert len(entries) == 0
+
+    # But entry still exists if we include inactive
+    entries = get_state("invalidate-test", active_only=False)
+    assert len(entries) == 1
+    assert entries[0].status == "superseded"
+
+
+def test_invalidate_nonexistent():
+    """Test invalidating a nonexistent entry."""
+    success = invalidate_state("proj", "decision", "nonexistent")
+    assert not success
+
+
+def test_format_project_state():
+    """Test formatting state entries for context injection."""
+    set_state("proj", "status", "phase", "Phase 1")
+    set_state("proj", "decision", "database", "SQLite", source="Build Spec V1")
+    entries = get_state("proj")
+
+    formatted = format_project_state(entries)
+    assert "--- Trusted Project State ---" in formatted
+    assert "--- End Project State ---" in formatted
+    assert "phase: Phase 1" in formatted
+    assert "database: SQLite" in formatted
+    assert "(source: Build Spec V1)" in formatted
+
+
+def test_format_empty():
+    """Test formatting empty state."""
+    assert format_project_state([]) == ""