Stabilize core correctness and sync project plan state

2026-04-05 17:53:23 -04:00
parent b48f0c95ab
commit b0889b3925
20 changed files with 551 additions and 168 deletions
--- a/src/atocore/context/builder.py
+++ b/src/atocore/context/builder.py
@@ -10,7 +10,7 @@ import time
 from dataclasses import dataclass, field
 from pathlib import Path

-from atocore.config import settings
+import atocore.config as _config
 from atocore.context.project_state import format_project_state, get_state
 from atocore.memory.service import get_memories_for_context
 from atocore.observability.logger import get_logger
@@ -74,20 +74,27 @@ def build_context(
    """
    global _last_context_pack
    start = time.time()
-    budget = budget or settings.context_budget
+    budget = _config.settings.context_budget if budget is None else max(budget, 0)

    # 1. Get Trusted Project State (highest precedence)
    project_state_text = ""
    project_state_chars = 0
+    project_state_budget = min(
+        budget,
+        max(0, int(budget * PROJECT_STATE_BUDGET_RATIO)),
+    )

    if project_hint:
        state_entries = get_state(project_hint)
        if state_entries:
            project_state_text = format_project_state(state_entries)
-            project_state_chars = len(project_state_text)
+            project_state_text, project_state_chars = _truncate_text_block(
+                project_state_text,
+                project_state_budget or budget,
+            )

    # 2. Get identity + preference memories (second precedence)
-    memory_budget = int(budget * MEMORY_BUDGET_RATIO)
+    memory_budget = min(int(budget * MEMORY_BUDGET_RATIO), max(budget - project_state_chars, 0))
    memory_text, memory_chars = get_memories_for_context(
        memory_types=["identity", "preference"],
        budget=memory_budget,
@@ -97,7 +104,7 @@ def build_context(
    retrieval_budget = budget - project_state_chars - memory_chars

    # 4. Retrieve candidates
-    candidates = retrieve(user_prompt, top_k=settings.context_top_k)
+    candidates = retrieve(user_prompt, top_k=_config.settings.context_top_k) if retrieval_budget > 0 else []

    # 5. Score and rank
    scored = _rank_chunks(candidates, project_hint)
@@ -107,12 +114,21 @@ def build_context(

    # 7. Format full context
    formatted = _format_full_context(project_state_text, memory_text, selected)
+    if len(formatted) > budget:
+        formatted, selected = _trim_context_to_budget(
+            project_state_text,
+            memory_text,
+            selected,
+            budget,
+        )

    # 8. Build full prompt
    full_prompt = f"{SYSTEM_PREFIX}\n\n{formatted}\n\n{user_prompt}"

+    project_state_chars = len(project_state_text)
+    memory_chars = len(memory_text)
    retrieval_chars = sum(c.char_count for c in selected)
-    total_chars = project_state_chars + memory_chars + retrieval_chars
+    total_chars = len(formatted)
    duration_ms = int((time.time() - start) * 1000)

    pack = ContextPack(
@@ -235,6 +251,8 @@ def _format_full_context(
    # 3. Retrieved chunks (lowest trust)
    if chunks:
        parts.append("--- AtoCore Retrieved Context ---")
+        if project_state_text:
+            parts.append("If retrieved context conflicts with Trusted Project State above, trust the Trusted Project State.")
        for chunk in chunks:
            parts.append(
                f"[Source: {chunk.source_file} | Section: {chunk.heading_path} | Score: {chunk.score:.2f}]"
@@ -282,3 +300,44 @@ def _pack_to_dict(pack: ContextPack) -> dict:
            for c in pack.chunks_used
        ],
    }
+
+
+def _truncate_text_block(text: str, budget: int) -> tuple[str, int]:
+    """Trim a formatted text block so trusted tiers cannot exceed the total budget."""
+    if budget <= 0 or not text:
+        return "", 0
+    if len(text) <= budget:
+        return text, len(text)
+    if budget <= 3:
+        trimmed = text[:budget]
+    else:
+        trimmed = f"{text[: budget - 3].rstrip()}..."
+    return trimmed, len(trimmed)
+
+
+def _trim_context_to_budget(
+    project_state_text: str,
+    memory_text: str,
+    chunks: list[ContextChunk],
+    budget: int,
+) -> tuple[str, list[ContextChunk]]:
+    """Trim retrieval first, then memory, then project state until formatted context fits."""
+    kept_chunks = list(chunks)
+    formatted = _format_full_context(project_state_text, memory_text, kept_chunks)
+    while len(formatted) > budget and kept_chunks:
+        kept_chunks.pop()
+        formatted = _format_full_context(project_state_text, memory_text, kept_chunks)
+
+    if len(formatted) <= budget:
+        return formatted, kept_chunks
+
+    memory_text, _ = _truncate_text_block(memory_text, max(budget - len(project_state_text), 0))
+    formatted = _format_full_context(project_state_text, memory_text, kept_chunks)
+    if len(formatted) <= budget:
+        return formatted, kept_chunks
+
+    project_state_text, _ = _truncate_text_block(project_state_text, budget)
+    formatted = _format_full_context(project_state_text, "", [])
+    if len(formatted) > budget:
+        formatted, _ = _truncate_text_block(formatted, budget)
+    return formatted, []
--- a/src/atocore/context/project_state.py
+++ b/src/atocore/context/project_state.py
@@ -12,10 +12,8 @@ Project state is manually curated or explicitly confirmed facts about a project.
 It always wins over retrieval-based context when there's a conflict.
 """

-import json
-import time
 import uuid
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from datetime import datetime, timezone

 from atocore.models.database import get_connection
@@ -81,7 +79,7 @@ def ensure_project(name: str, description: str = "") -> str:
    """Get or create a project by name. Returns project_id."""
    with get_connection() as conn:
        row = conn.execute(
-            "SELECT id FROM projects WHERE name = ?", (name,)
+            "SELECT id FROM projects WHERE lower(name) = lower(?)", (name,)
        ).fetchone()
        if row:
            return row["id"]
@@ -106,6 +104,7 @@ def set_state(
    """Set or update a project state entry. Upsert semantics."""
    if category not in CATEGORIES:
        raise ValueError(f"Invalid category '{category}'. Must be one of: {CATEGORIES}")
+    _validate_confidence(confidence)

    project_id = ensure_project(project_name)
    entry_id = str(uuid.uuid4())
@@ -157,7 +156,7 @@ def get_state(
    """Get project state entries, optionally filtered by category."""
    with get_connection() as conn:
        project = conn.execute(
-            "SELECT id FROM projects WHERE name = ?", (project_name,)
+            "SELECT id FROM projects WHERE lower(name) = lower(?)", (project_name,)
        ).fetchone()
        if not project:
            return []
@@ -195,7 +194,7 @@ def invalidate_state(project_name: str, category: str, key: str) -> bool:
    """Mark a project state entry as superseded."""
    with get_connection() as conn:
        project = conn.execute(
-            "SELECT id FROM projects WHERE name = ?", (project_name,)
+            "SELECT id FROM projects WHERE lower(name) = lower(?)", (project_name,)
        ).fetchone()
        if not project:
            return False
@@ -229,3 +228,8 @@ def format_project_state(entries: list[ProjectStateEntry]) -> str:

    lines.append("\n--- End Project State ---")
    return "\n".join(lines)
+
+
+def _validate_confidence(confidence: float) -> None:
+    if not 0.0 <= confidence <= 1.0:
+        raise ValueError("Confidence must be between 0.0 and 1.0")