feat: engineering-aware context assembly

When a query matches a known engineering entity by name, the context pack now includes a structured '--- Engineering Context ---' band showing the entity's type, description, and its relationships to other entities (subsystems, materials, requirements, decisions). Six-tier context assembly: 1. Trusted Project State 2. Identity / Preferences 3. Project Memories 4. Domain Knowledge 5. Engineering Context (NEW) 6. Retrieved Chunks The engineering band uses the same token-overlap scoring as memory ranking: query tokens are matched against entity names + descriptions. The top match gets its full relationship context included. 10% budget allocation. Trims before domain knowledge (lowest priority of the structured tiers since the same info may appear in chunks). Example: query 'lateral support design' against p04-gigabit surfaces the Lateral Support subsystem entity with its relationships to GF-PTFE material, M1 Mirror Assembly parent system, and related components. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-13 11:17:01 -04:00
parent 3e0a357441
commit ccc49d3a8f
1 changed files with 131 additions and 11 deletions
--- a/src/atocore/context/builder.py
+++ b/src/atocore/context/builder.py
@@ -14,6 +14,7 @@ import atocore.config as _config
 from atocore.context.project_state import format_project_state, get_state
 from atocore.memory.service import get_memories_for_context
 from atocore.observability.logger import get_logger
+from atocore.engineering.service import get_entities, get_entity_with_context
 from atocore.projects.registry import resolve_project_name
 from atocore.retrieval.retriever import ChunkResult, retrieve

@@ -42,6 +43,7 @@ PROJECT_MEMORY_TYPES = ["project", "knowledge", "episodic"]
 # model breaks down below 5N because the contact assumption fails").
 DOMAIN_KNOWLEDGE_BUDGET_RATIO = 0.10
 DOMAIN_KNOWLEDGE_TYPES = ["knowledge"]
+ENGINEERING_CONTEXT_BUDGET_RATIO = 0.10

 # Last built context pack for debug inspection
 _last_context_pack: "ContextPack | None" = None
@@ -67,6 +69,8 @@ class ContextPack:
    project_memory_chars: int = 0
    domain_knowledge_text: str = ""
    domain_knowledge_chars: int = 0
+    engineering_context_text: str = ""
+    engineering_context_chars: int = 0
    total_chars: int = 0
    budget: int = 0
    budget_remaining: int = 0
@@ -165,10 +169,27 @@ def build_context(
            query=user_prompt,
        )

+    # 2d. Engineering context — structured entity/relationship data
+    # when the query matches a known entity name.
+    engineering_context_text = ""
+    engineering_context_chars = 0
+    if canonical_project:
+        eng_budget = min(
+            int(budget * ENGINEERING_CONTEXT_BUDGET_RATIO),
+            max(budget - project_state_chars - memory_chars
+                - project_memory_chars - domain_knowledge_chars, 0),
+        )
+        if eng_budget > 0:
+            engineering_context_text = _build_engineering_context(
+                user_prompt, canonical_project, eng_budget,
+            )
+            engineering_context_chars = len(engineering_context_text)
+
    # 3. Calculate remaining budget for retrieval
    retrieval_budget = (
        budget - project_state_chars - memory_chars
        - project_memory_chars - domain_knowledge_chars
+        - engineering_context_chars
    )

    # 4. Retrieve candidates
@@ -191,7 +212,7 @@ def build_context(
    # 7. Format full context
    formatted = _format_full_context(
        project_state_text, memory_text, project_memory_text,
-        domain_knowledge_text, selected,
+        domain_knowledge_text, engineering_context_text, selected,
    )
    if len(formatted) > budget:
        formatted, selected = _trim_context_to_budget(
@@ -199,6 +220,7 @@ def build_context(
            memory_text,
            project_memory_text,
            domain_knowledge_text,
+            engineering_context_text,
            selected,
            budget,
        )
@@ -210,6 +232,7 @@ def build_context(
    memory_chars = len(memory_text)
    project_memory_chars = len(project_memory_text)
    domain_knowledge_chars = len(domain_knowledge_text)
+    engineering_context_chars = len(engineering_context_text)
    retrieval_chars = sum(c.char_count for c in selected)
    total_chars = len(formatted)
    duration_ms = int((time.time() - start) * 1000)
@@ -224,6 +247,8 @@ def build_context(
        project_memory_chars=project_memory_chars,
        domain_knowledge_text=domain_knowledge_text,
        domain_knowledge_chars=domain_knowledge_chars,
+        engineering_context_text=engineering_context_text,
+        engineering_context_chars=engineering_context_chars,
        total_chars=total_chars,
        budget=budget,
        budget_remaining=budget - total_chars,
@@ -243,6 +268,7 @@ def build_context(
        memory_chars=memory_chars,
        project_memory_chars=project_memory_chars,
        domain_knowledge_chars=domain_knowledge_chars,
+        engineering_context_chars=engineering_context_chars,
        retrieval_chars=retrieval_chars,
        total_chars=total_chars,
        budget_remaining=budget - total_chars,
@@ -324,7 +350,8 @@ def _format_full_context(
    memory_text: str,
    project_memory_text: str,
    domain_knowledge_text: str,
-    chunks: list[ContextChunk],
+    engineering_context_text: str = "",
+    chunks: list[ContextChunk] | None = None,
 ) -> str:
    """Format project state + memories + retrieved chunks into full context block."""
    parts = []
@@ -349,7 +376,12 @@ def _format_full_context(
        parts.append(domain_knowledge_text)
        parts.append("")

-    # 5. Retrieved chunks (lowest trust)
+    # 5. Engineering context (structured entity/relationship data)
+    if engineering_context_text:
+        parts.append(engineering_context_text)
+        parts.append("")
+
+    # 6. Retrieved chunks (lowest trust)
    if chunks:
        parts.append("--- AtoCore Retrieved Context ---")
        if project_state_text:
@@ -361,7 +393,7 @@ def _format_full_context(
            parts.append(chunk.content)
            parts.append("")
        parts.append("--- End Context ---")
-    elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text:
+    elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text and not engineering_context_text:
        parts.append("--- AtoCore Context ---\nNo relevant context found.\n--- End Context ---")

    return "\n".join(parts)
@@ -394,6 +426,7 @@ def _pack_to_dict(pack: ContextPack) -> dict:
        "has_memories": bool(pack.memory_text),
        "has_project_memories": bool(pack.project_memory_text),
        "has_domain_knowledge": bool(pack.domain_knowledge_text),
+        "has_engineering_context": bool(pack.engineering_context_text),
        "chunks": [
            {
                "source_file": c.source_file,
@@ -407,6 +440,83 @@ def _pack_to_dict(pack: ContextPack) -> dict:
    }


+def _build_engineering_context(
+    query: str,
+    project: str,
+    budget: int,
+) -> str:
+    """Find entities matching the query and format their context.
+
+    Uses simple word-overlap matching between query tokens and entity
+    names to find relevant entities, then formats the top match with
+    its relationships as a compact text band.
+    """
+    if budget < 100:
+        return ""
+
+    from atocore.memory.reinforcement import _normalize, _tokenize
+
+    query_tokens = _tokenize(_normalize(query))
+    if not query_tokens:
+        return ""
+
+    try:
+        entities = get_entities(project=project, limit=100)
+    except Exception:
+        return ""
+
+    if not entities:
+        return ""
+
+    scored: list[tuple[int, "Entity"]] = []
+    for ent in entities:
+        name_tokens = _tokenize(_normalize(ent.name))
+        desc_tokens = _tokenize(_normalize(ent.description))
+        overlap = len(query_tokens & (name_tokens | desc_tokens))
+        if overlap > 0:
+            scored.append((overlap, ent))
+
+    if not scored:
+        return ""
+
+    scored.sort(key=lambda t: t[0], reverse=True)
+    best_entity = scored[0][1]
+
+    try:
+        ctx = get_entity_with_context(best_entity.id)
+    except Exception:
+        return ""
+
+    if ctx is None:
+        return ""
+
+    lines = ["--- Engineering Context ---"]
+    lines.append(f"[{best_entity.entity_type}] {best_entity.name}")
+    if best_entity.description:
+        lines.append(f"  {best_entity.description[:150]}")
+
+    for rel in ctx["relationships"][:8]:
+        other_id = (
+            rel.target_entity_id
+            if rel.source_entity_id == best_entity.id
+            else rel.source_entity_id
+        )
+        other = ctx["related_entities"].get(other_id)
+        if other:
+            direction = "->" if rel.source_entity_id == best_entity.id else "<-"
+            lines.append(
+                f"  {direction} {rel.relationship_type} [{other.entity_type}] {other.name}"
+            )
+
+    lines.append("--- End Engineering Context ---")
+    text = "\n".join(lines)
+
+    if len(text) > budget:
+        text = text[:budget - 3].rstrip() + "..."
+
+    return text
+
+
 def _truncate_text_block(text: str, budget: int) -> tuple[str, int]:
    """Trim a formatted text block so trusted tiers cannot exceed the total budget."""
    if budget <= 0 or not text:
@@ -425,30 +535,40 @@ def _trim_context_to_budget(
    memory_text: str,
    project_memory_text: str,
    domain_knowledge_text: str,
+    engineering_context_text: str,
    chunks: list[ContextChunk],
    budget: int,
 ) -> tuple[str, list[ContextChunk]]:
-    """Trim retrieval -> domain knowledge -> project memories -> identity/preference -> project state."""
+    """Trim retrieval -> engineering -> domain -> project memories -> identity -> state."""
    kept_chunks = list(chunks)
    formatted = _format_full_context(
        project_state_text, memory_text, project_memory_text,
-        domain_knowledge_text, kept_chunks,
+        domain_knowledge_text, engineering_context_text, kept_chunks,
    )
    while len(formatted) > budget and kept_chunks:
        kept_chunks.pop()
        formatted = _format_full_context(
            project_state_text, memory_text, project_memory_text,
-            domain_knowledge_text, kept_chunks,
+            domain_knowledge_text, engineering_context_text, kept_chunks,
        )

    if len(formatted) <= budget:
        return formatted, kept_chunks

-    # Drop domain knowledge first (lowest trust of the memory tiers).
+    # Drop engineering context first.
+    engineering_context_text = ""
+    formatted = _format_full_context(
+        project_state_text, memory_text, project_memory_text,
+        domain_knowledge_text, engineering_context_text, kept_chunks,
+    )
+    if len(formatted) <= budget:
+        return formatted, kept_chunks
+
+    # Drop domain knowledge next.
    domain_knowledge_text, _ = _truncate_text_block(domain_knowledge_text, 0)
    formatted = _format_full_context(
        project_state_text, memory_text, project_memory_text,
-        domain_knowledge_text, kept_chunks,
+        domain_knowledge_text, engineering_context_text, kept_chunks,
    )
    if len(formatted) <= budget:
        return formatted, kept_chunks
@@ -459,7 +579,7 @@ def _trim_context_to_budget(
    )
    formatted = _format_full_context(
        project_state_text, memory_text, project_memory_text,
-        domain_knowledge_text, kept_chunks,
+        domain_knowledge_text, engineering_context_text, kept_chunks,
    )
    if len(formatted) <= budget:
        return formatted, kept_chunks
@@ -467,7 +587,7 @@ def _trim_context_to_budget(
    memory_text, _ = _truncate_text_block(memory_text, max(budget - len(project_state_text), 0))
    formatted = _format_full_context(
        project_state_text, memory_text, project_memory_text,
-        domain_knowledge_text, kept_chunks,
+        domain_knowledge_text, engineering_context_text, kept_chunks,
    )
    if len(formatted) <= budget:
        return formatted, kept_chunks