feat: Phase 1 ingestion hardening + Phase 5 Trusted Project State
Phase 1 - Ingestion hardening: - Encoding fallback (UTF-8/UTF-8-sig/Latin-1/CP1252) - Delete detection: purge DB/vector entries for removed files - Ingestion stats endpoint (GET /stats) Phase 5 - Trusted Project State: - project_state table with categories (status, decision, requirement, contact, milestone, fact, config) - CRUD API: POST/GET/DELETE /project/state - Upsert semantics, invalidation (supersede) support - Context builder integrates project state at highest trust precedence - Project state gets 20% budget allocation, appears first in context - Trust precedence: Project State > Retrieved Chunks (per Master Plan) 33/33 tests passing. Validated end-to-end with GigaBIT M1 project data. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
"""Tests for the context builder."""
|
||||
|
||||
from atocore.context.builder import build_context, get_last_context_pack
|
||||
from atocore.context.project_state import init_project_state_schema, set_state
|
||||
from atocore.ingestion.pipeline import ingest_file
|
||||
from atocore.models.database import init_db
|
||||
|
||||
@@ -8,19 +9,20 @@ from atocore.models.database import init_db
|
||||
def test_build_context_returns_pack(tmp_data_dir, sample_markdown):
|
||||
"""Test that context builder returns a valid pack."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
pack = build_context("What is AtoCore?")
|
||||
assert pack.total_chars > 0
|
||||
assert len(pack.chunks_used) > 0
|
||||
assert pack.budget_remaining >= 0
|
||||
assert "--- AtoCore Context ---" in pack.formatted_context
|
||||
assert "--- End Context ---" in pack.formatted_context
|
||||
|
||||
|
||||
def test_context_respects_budget(tmp_data_dir, sample_markdown):
|
||||
"""Test that context builder respects character budget."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
pack = build_context("What is AtoCore?", budget=500)
|
||||
@@ -30,17 +32,18 @@ def test_context_respects_budget(tmp_data_dir, sample_markdown):
|
||||
def test_context_with_project_hint(tmp_data_dir, sample_markdown):
|
||||
"""Test that project hint boosts relevant chunks."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
pack = build_context("What is the architecture?", project_hint="atocore")
|
||||
assert len(pack.chunks_used) > 0
|
||||
# With project hint, we should still get results
|
||||
assert pack.total_chars > 0
|
||||
|
||||
|
||||
def test_last_context_pack_stored(tmp_data_dir, sample_markdown):
|
||||
"""Test that last context pack is stored for debug."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
build_context("test prompt")
|
||||
@@ -52,9 +55,54 @@ def test_last_context_pack_stored(tmp_data_dir, sample_markdown):
|
||||
def test_full_prompt_structure(tmp_data_dir, sample_markdown):
|
||||
"""Test that the full prompt has correct structure."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
pack = build_context("What are memory types?")
|
||||
assert "knowledge base" in pack.full_prompt.lower()
|
||||
assert "--- AtoCore Context ---" in pack.full_prompt
|
||||
assert "What are memory types?" in pack.full_prompt
|
||||
|
||||
|
||||
def test_project_state_included_in_context(tmp_data_dir, sample_markdown):
|
||||
"""Test that trusted project state is injected into context."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
# Set some project state
|
||||
set_state("atocore", "status", "phase", "Phase 0.5 complete")
|
||||
set_state("atocore", "decision", "database", "SQLite for structured data")
|
||||
|
||||
pack = build_context("What is AtoCore?", project_hint="atocore")
|
||||
|
||||
# Project state should appear in context
|
||||
assert "--- Trusted Project State ---" in pack.formatted_context
|
||||
assert "Phase 0.5 complete" in pack.formatted_context
|
||||
assert "SQLite for structured data" in pack.formatted_context
|
||||
assert pack.project_state_chars > 0
|
||||
|
||||
|
||||
def test_project_state_takes_priority_budget(tmp_data_dir, sample_markdown):
|
||||
"""Test that project state is included even with tight budget."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
set_state("atocore", "status", "phase", "Phase 1 in progress")
|
||||
|
||||
# Small budget — project state should still be included
|
||||
pack = build_context("status?", project_hint="atocore", budget=500)
|
||||
assert "Phase 1 in progress" in pack.formatted_context
|
||||
|
||||
|
||||
def test_no_project_state_without_hint(tmp_data_dir, sample_markdown):
|
||||
"""Test that project state is not included without project hint."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
set_state("atocore", "status", "phase", "Phase 1")
|
||||
|
||||
pack = build_context("What is AtoCore?")
|
||||
assert pack.project_state_chars == 0
|
||||
assert "--- Trusted Project State ---" not in pack.formatted_context
|
||||
|
||||
127
tests/test_project_state.py
Normal file
127
tests/test_project_state.py
Normal file
@@ -0,0 +1,127 @@
|
||||
"""Tests for Trusted Project State."""
|
||||
|
||||
import pytest
|
||||
|
||||
from atocore.context.project_state import (
|
||||
CATEGORIES,
|
||||
ensure_project,
|
||||
format_project_state,
|
||||
get_state,
|
||||
init_project_state_schema,
|
||||
invalidate_state,
|
||||
set_state,
|
||||
)
|
||||
from atocore.models.database import init_db
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_db(tmp_data_dir):
|
||||
"""Initialize DB and project state schema for every test."""
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
|
||||
|
||||
def test_ensure_project_creates():
|
||||
"""Test creating a new project."""
|
||||
pid = ensure_project("test-project", "A test project")
|
||||
assert pid
|
||||
# Second call returns same ID
|
||||
pid2 = ensure_project("test-project")
|
||||
assert pid == pid2
|
||||
|
||||
|
||||
def test_set_state_creates_entry():
|
||||
"""Test creating a project state entry."""
|
||||
entry = set_state("myproject", "status", "phase", "Phase 0.5 — PoC complete")
|
||||
assert entry.category == "status"
|
||||
assert entry.key == "phase"
|
||||
assert entry.value == "Phase 0.5 — PoC complete"
|
||||
assert entry.status == "active"
|
||||
|
||||
|
||||
def test_set_state_upserts():
|
||||
"""Test that setting same key updates the value."""
|
||||
set_state("myproject", "status", "phase", "Phase 0")
|
||||
entry = set_state("myproject", "status", "phase", "Phase 1")
|
||||
assert entry.value == "Phase 1"
|
||||
|
||||
# Only one entry should exist
|
||||
entries = get_state("myproject", category="status")
|
||||
assert len(entries) == 1
|
||||
assert entries[0].value == "Phase 1"
|
||||
|
||||
|
||||
def test_set_state_invalid_category():
|
||||
"""Test that invalid category raises ValueError."""
|
||||
with pytest.raises(ValueError, match="Invalid category"):
|
||||
set_state("myproject", "invalid_category", "key", "value")
|
||||
|
||||
|
||||
def test_get_state_all():
|
||||
"""Test getting all state entries for a project."""
|
||||
set_state("proj", "status", "phase", "Phase 1")
|
||||
set_state("proj", "decision", "database", "SQLite for v1")
|
||||
set_state("proj", "requirement", "latency", "<2 seconds")
|
||||
|
||||
entries = get_state("proj")
|
||||
assert len(entries) == 3
|
||||
categories = {e.category for e in entries}
|
||||
assert categories == {"status", "decision", "requirement"}
|
||||
|
||||
|
||||
def test_get_state_by_category():
|
||||
"""Test filtering by category."""
|
||||
set_state("proj", "status", "phase", "Phase 1")
|
||||
set_state("proj", "decision", "database", "SQLite")
|
||||
set_state("proj", "decision", "vectordb", "ChromaDB")
|
||||
|
||||
entries = get_state("proj", category="decision")
|
||||
assert len(entries) == 2
|
||||
assert all(e.category == "decision" for e in entries)
|
||||
|
||||
|
||||
def test_get_state_nonexistent_project():
|
||||
"""Test getting state for a project that doesn't exist."""
|
||||
entries = get_state("nonexistent")
|
||||
assert entries == []
|
||||
|
||||
|
||||
def test_invalidate_state():
|
||||
"""Test marking a state entry as superseded."""
|
||||
set_state("invalidate-test", "decision", "approach", "monolith")
|
||||
success = invalidate_state("invalidate-test", "decision", "approach")
|
||||
assert success
|
||||
|
||||
# Active entries should be empty
|
||||
entries = get_state("invalidate-test", active_only=True)
|
||||
assert len(entries) == 0
|
||||
|
||||
# But entry still exists if we include inactive
|
||||
entries = get_state("invalidate-test", active_only=False)
|
||||
assert len(entries) == 1
|
||||
assert entries[0].status == "superseded"
|
||||
|
||||
|
||||
def test_invalidate_nonexistent():
|
||||
"""Test invalidating a nonexistent entry."""
|
||||
success = invalidate_state("proj", "decision", "nonexistent")
|
||||
assert not success
|
||||
|
||||
|
||||
def test_format_project_state():
|
||||
"""Test formatting state entries for context injection."""
|
||||
set_state("proj", "status", "phase", "Phase 1")
|
||||
set_state("proj", "decision", "database", "SQLite", source="Build Spec V1")
|
||||
entries = get_state("proj")
|
||||
|
||||
formatted = format_project_state(entries)
|
||||
assert "--- Trusted Project State ---" in formatted
|
||||
assert "--- End Project State ---" in formatted
|
||||
assert "phase: Phase 1" in formatted
|
||||
assert "database: SQLite" in formatted
|
||||
assert "(source: Build Spec V1)" in formatted
|
||||
|
||||
|
||||
def test_format_empty():
|
||||
"""Test formatting empty state."""
|
||||
assert format_project_state([]) == ""
|
||||
Reference in New Issue
Block a user