diff --git a/src/atocore/context/builder.py b/src/atocore/context/builder.py index 3e4ab7d..2d4ad84 100644 --- a/src/atocore/context/builder.py +++ b/src/atocore/context/builder.py @@ -30,6 +30,12 @@ SYSTEM_PREFIX = ( # identity: 5%, preferences: 5%, project state: 20%, retrieval: 60%+ PROJECT_STATE_BUDGET_RATIO = 0.20 MEMORY_BUDGET_RATIO = 0.10 # 5% identity + 5% preference +# Project-scoped memories (project/knowledge/episodic) are the outlet +# for the Phase 9 reflection loop on the retrieval side. Budget sits +# between identity/preference and retrieved chunks so a reinforced +# memory can actually reach the model. +PROJECT_MEMORY_BUDGET_RATIO = 0.15 +PROJECT_MEMORY_TYPES = ["project", "knowledge", "episodic"] # Last built context pack for debug inspection _last_context_pack: "ContextPack | None" = None @@ -51,6 +57,8 @@ class ContextPack: project_state_chars: int = 0 memory_text: str = "" memory_chars: int = 0 + project_memory_text: str = "" + project_memory_chars: int = 0 total_chars: int = 0 budget: int = 0 budget_remaining: int = 0 @@ -109,8 +117,28 @@ def build_context( budget=memory_budget, ) + # 2b. Get project-scoped memories (third precedence). Only + # populated when a canonical project is in scope — cross-project + # memory bleed would rot the pack. Active-only filtering is + # handled by the shared min_confidence=0.5 gate inside + # get_memories_for_context. + project_memory_text = "" + project_memory_chars = 0 + if canonical_project: + project_memory_budget = min( + int(budget * PROJECT_MEMORY_BUDGET_RATIO), + max(budget - project_state_chars - memory_chars, 0), + ) + project_memory_text, project_memory_chars = get_memories_for_context( + memory_types=PROJECT_MEMORY_TYPES, + project=canonical_project, + budget=project_memory_budget, + header="--- Project Memories ---", + footer="--- End Project Memories ---", + ) + # 3. Calculate remaining budget for retrieval - retrieval_budget = budget - project_state_chars - memory_chars + retrieval_budget = budget - project_state_chars - memory_chars - project_memory_chars # 4. Retrieve candidates candidates = ( @@ -130,11 +158,14 @@ def build_context( selected = _select_within_budget(scored, max(retrieval_budget, 0)) # 7. Format full context - formatted = _format_full_context(project_state_text, memory_text, selected) + formatted = _format_full_context( + project_state_text, memory_text, project_memory_text, selected + ) if len(formatted) > budget: formatted, selected = _trim_context_to_budget( project_state_text, memory_text, + project_memory_text, selected, budget, ) @@ -144,6 +175,7 @@ def build_context( project_state_chars = len(project_state_text) memory_chars = len(memory_text) + project_memory_chars = len(project_memory_text) retrieval_chars = sum(c.char_count for c in selected) total_chars = len(formatted) duration_ms = int((time.time() - start) * 1000) @@ -154,6 +186,8 @@ def build_context( project_state_chars=project_state_chars, memory_text=memory_text, memory_chars=memory_chars, + project_memory_text=project_memory_text, + project_memory_chars=project_memory_chars, total_chars=total_chars, budget=budget, budget_remaining=budget - total_chars, @@ -171,6 +205,7 @@ def build_context( chunks_used=len(selected), project_state_chars=project_state_chars, memory_chars=memory_chars, + project_memory_chars=project_memory_chars, retrieval_chars=retrieval_chars, total_chars=total_chars, budget_remaining=budget - total_chars, @@ -250,6 +285,7 @@ def _select_within_budget( def _format_full_context( project_state_text: str, memory_text: str, + project_memory_text: str, chunks: list[ContextChunk], ) -> str: """Format project state + memories + retrieved chunks into full context block.""" @@ -265,7 +301,12 @@ def _format_full_context( parts.append(memory_text) parts.append("") - # 3. Retrieved chunks (lowest trust) + # 3. Project-scoped memories (third trust level) + if project_memory_text: + parts.append(project_memory_text) + parts.append("") + + # 4. Retrieved chunks (lowest trust) if chunks: parts.append("--- AtoCore Retrieved Context ---") if project_state_text: @@ -277,7 +318,7 @@ def _format_full_context( parts.append(chunk.content) parts.append("") parts.append("--- End Context ---") - elif not project_state_text and not memory_text: + elif not project_state_text and not memory_text and not project_memory_text: parts.append("--- AtoCore Context ---\nNo relevant context found.\n--- End Context ---") return "\n".join(parts) @@ -299,6 +340,7 @@ def _pack_to_dict(pack: ContextPack) -> dict: "project_hint": pack.project_hint, "project_state_chars": pack.project_state_chars, "memory_chars": pack.memory_chars, + "project_memory_chars": pack.project_memory_chars, "chunks_used": len(pack.chunks_used), "total_chars": pack.total_chars, "budget": pack.budget, @@ -306,6 +348,7 @@ def _pack_to_dict(pack: ContextPack) -> dict: "duration_ms": pack.duration_ms, "has_project_state": bool(pack.project_state_text), "has_memories": bool(pack.memory_text), + "has_project_memories": bool(pack.project_memory_text), "chunks": [ { "source_file": c.source_file, @@ -335,26 +378,45 @@ def _truncate_text_block(text: str, budget: int) -> tuple[str, int]: def _trim_context_to_budget( project_state_text: str, memory_text: str, + project_memory_text: str, chunks: list[ContextChunk], budget: int, ) -> tuple[str, list[ContextChunk]]: - """Trim retrieval first, then memory, then project state until formatted context fits.""" + """Trim retrieval → project memories → identity/preference → project state.""" kept_chunks = list(chunks) - formatted = _format_full_context(project_state_text, memory_text, kept_chunks) + formatted = _format_full_context( + project_state_text, memory_text, project_memory_text, kept_chunks + ) while len(formatted) > budget and kept_chunks: kept_chunks.pop() - formatted = _format_full_context(project_state_text, memory_text, kept_chunks) + formatted = _format_full_context( + project_state_text, memory_text, project_memory_text, kept_chunks + ) if len(formatted) <= budget: return formatted, kept_chunks + # Drop project memories next (they were the most recently added + # tier and carry less trust than identity/preference). + project_memory_text, _ = _truncate_text_block( + project_memory_text, + max(budget - len(project_state_text) - len(memory_text), 0), + ) + formatted = _format_full_context( + project_state_text, memory_text, project_memory_text, kept_chunks + ) + if len(formatted) <= budget: + return formatted, kept_chunks + memory_text, _ = _truncate_text_block(memory_text, max(budget - len(project_state_text), 0)) - formatted = _format_full_context(project_state_text, memory_text, kept_chunks) + formatted = _format_full_context( + project_state_text, memory_text, project_memory_text, kept_chunks + ) if len(formatted) <= budget: return formatted, kept_chunks project_state_text, _ = _truncate_text_block(project_state_text, budget) - formatted = _format_full_context(project_state_text, "", []) + formatted = _format_full_context(project_state_text, "", "", []) if len(formatted) > budget: formatted, _ = _truncate_text_block(formatted, budget) return formatted, [] diff --git a/src/atocore/memory/service.py b/src/atocore/memory/service.py index 79ea29b..3f8e24c 100644 --- a/src/atocore/memory/service.py +++ b/src/atocore/memory/service.py @@ -344,6 +344,8 @@ def get_memories_for_context( memory_types: list[str] | None = None, project: str | None = None, budget: int = 500, + header: str = "--- AtoCore Memory ---", + footer: str = "--- End Memory ---", ) -> tuple[str, int]: """Get formatted memories for context injection. @@ -351,15 +353,16 @@ def get_memories_for_context( Budget allocation per Master Plan section 9: identity: 5%, preference: 5%, rest from retrieval budget + + The caller can override ``header`` / ``footer`` to distinguish + multiple memory blocks in the same pack (e.g. identity/preference + vs project/knowledge memories). """ if memory_types is None: memory_types = ["identity", "preference"] if budget <= 0: return "", 0 - - header = "--- AtoCore Memory ---" - footer = "--- End Memory ---" wrapper_chars = len(header) + len(footer) + 2 if budget <= wrapper_chars: return "", 0 diff --git a/tests/test_context_builder.py b/tests/test_context_builder.py index 85c59ec..ede536a 100644 --- a/tests/test_context_builder.py +++ b/tests/test_context_builder.py @@ -251,3 +251,60 @@ def test_unknown_hint_falls_back_to_raw_lookup(tmp_data_dir, sample_markdown, mo pack = build_context("status?", project_hint="orphan-project", budget=2000) assert "Solo run" in pack.formatted_context + + +def test_project_memories_included_in_pack(tmp_data_dir, sample_markdown): + """Active project-scoped memories for the target project should + land in a dedicated '--- Project Memories ---' band so the + Phase 9 reflection loop has a retrieval outlet.""" + from atocore.memory.service import create_memory + + init_db() + init_project_state_schema() + ingest_file(sample_markdown) + + mem = create_memory( + memory_type="project", + content="the mirror architecture is Option B conical back for p04-gigabit", + project="p04-gigabit", + confidence=0.9, + ) + # A sibling memory for a different project must NOT leak into the pack. + create_memory( + memory_type="project", + content="polisher suite splits into sim, post, control, contracts", + project="p06-polisher", + confidence=0.9, + ) + + pack = build_context( + "remind me about the mirror architecture", + project_hint="p04-gigabit", + budget=3000, + ) + assert "--- Project Memories ---" in pack.formatted_context + assert "Option B conical back" in pack.formatted_context + assert "polisher suite splits" not in pack.formatted_context + assert pack.project_memory_chars > 0 + assert mem.project == "p04-gigabit" + + +def test_project_memories_absent_without_project_hint(tmp_data_dir, sample_markdown): + """Without a project hint, project memories stay out of the pack — + cross-project bleed would rot the signal.""" + from atocore.memory.service import create_memory + + init_db() + init_project_state_schema() + ingest_file(sample_markdown) + + create_memory( + memory_type="project", + content="scoped project knowledge that should not leak globally", + project="p04-gigabit", + confidence=0.9, + ) + + pack = build_context("tell me something", budget=3000) + assert "--- Project Memories ---" not in pack.formatted_context + assert pack.project_memory_chars == 0