feat: engineering-aware context assembly

When a query matches a known engineering entity by name, the context
pack now includes a structured '--- Engineering Context ---' band
showing the entity's type, description, and its relationships to
other entities (subsystems, materials, requirements, decisions).

Six-tier context assembly:
  1. Trusted Project State
  2. Identity / Preferences
  3. Project Memories
  4. Domain Knowledge
  5. Engineering Context (NEW)
  6. Retrieved Chunks

The engineering band uses the same token-overlap scoring as memory
ranking: query tokens are matched against entity names + descriptions.
The top match gets its full relationship context included.

10% budget allocation. Trims before domain knowledge (lowest
priority of the structured tiers since the same info may appear in
chunks).

Example: query 'lateral support design' against p04-gigabit
surfaces the Lateral Support subsystem entity with its relationships
to GF-PTFE material, M1 Mirror Assembly parent system, and related
components.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-13 11:17:01 -04:00
parent 3e0a357441
commit ccc49d3a8f

View File

@@ -14,6 +14,7 @@ import atocore.config as _config
from atocore.context.project_state import format_project_state, get_state from atocore.context.project_state import format_project_state, get_state
from atocore.memory.service import get_memories_for_context from atocore.memory.service import get_memories_for_context
from atocore.observability.logger import get_logger from atocore.observability.logger import get_logger
from atocore.engineering.service import get_entities, get_entity_with_context
from atocore.projects.registry import resolve_project_name from atocore.projects.registry import resolve_project_name
from atocore.retrieval.retriever import ChunkResult, retrieve from atocore.retrieval.retriever import ChunkResult, retrieve
@@ -42,6 +43,7 @@ PROJECT_MEMORY_TYPES = ["project", "knowledge", "episodic"]
# model breaks down below 5N because the contact assumption fails"). # model breaks down below 5N because the contact assumption fails").
DOMAIN_KNOWLEDGE_BUDGET_RATIO = 0.10 DOMAIN_KNOWLEDGE_BUDGET_RATIO = 0.10
DOMAIN_KNOWLEDGE_TYPES = ["knowledge"] DOMAIN_KNOWLEDGE_TYPES = ["knowledge"]
ENGINEERING_CONTEXT_BUDGET_RATIO = 0.10
# Last built context pack for debug inspection # Last built context pack for debug inspection
_last_context_pack: "ContextPack | None" = None _last_context_pack: "ContextPack | None" = None
@@ -67,6 +69,8 @@ class ContextPack:
project_memory_chars: int = 0 project_memory_chars: int = 0
domain_knowledge_text: str = "" domain_knowledge_text: str = ""
domain_knowledge_chars: int = 0 domain_knowledge_chars: int = 0
engineering_context_text: str = ""
engineering_context_chars: int = 0
total_chars: int = 0 total_chars: int = 0
budget: int = 0 budget: int = 0
budget_remaining: int = 0 budget_remaining: int = 0
@@ -165,10 +169,27 @@ def build_context(
query=user_prompt, query=user_prompt,
) )
# 2d. Engineering context — structured entity/relationship data
# when the query matches a known entity name.
engineering_context_text = ""
engineering_context_chars = 0
if canonical_project:
eng_budget = min(
int(budget * ENGINEERING_CONTEXT_BUDGET_RATIO),
max(budget - project_state_chars - memory_chars
- project_memory_chars - domain_knowledge_chars, 0),
)
if eng_budget > 0:
engineering_context_text = _build_engineering_context(
user_prompt, canonical_project, eng_budget,
)
engineering_context_chars = len(engineering_context_text)
# 3. Calculate remaining budget for retrieval # 3. Calculate remaining budget for retrieval
retrieval_budget = ( retrieval_budget = (
budget - project_state_chars - memory_chars budget - project_state_chars - memory_chars
- project_memory_chars - domain_knowledge_chars - project_memory_chars - domain_knowledge_chars
- engineering_context_chars
) )
# 4. Retrieve candidates # 4. Retrieve candidates
@@ -191,7 +212,7 @@ def build_context(
# 7. Format full context # 7. Format full context
formatted = _format_full_context( formatted = _format_full_context(
project_state_text, memory_text, project_memory_text, project_state_text, memory_text, project_memory_text,
domain_knowledge_text, selected, domain_knowledge_text, engineering_context_text, selected,
) )
if len(formatted) > budget: if len(formatted) > budget:
formatted, selected = _trim_context_to_budget( formatted, selected = _trim_context_to_budget(
@@ -199,6 +220,7 @@ def build_context(
memory_text, memory_text,
project_memory_text, project_memory_text,
domain_knowledge_text, domain_knowledge_text,
engineering_context_text,
selected, selected,
budget, budget,
) )
@@ -210,6 +232,7 @@ def build_context(
memory_chars = len(memory_text) memory_chars = len(memory_text)
project_memory_chars = len(project_memory_text) project_memory_chars = len(project_memory_text)
domain_knowledge_chars = len(domain_knowledge_text) domain_knowledge_chars = len(domain_knowledge_text)
engineering_context_chars = len(engineering_context_text)
retrieval_chars = sum(c.char_count for c in selected) retrieval_chars = sum(c.char_count for c in selected)
total_chars = len(formatted) total_chars = len(formatted)
duration_ms = int((time.time() - start) * 1000) duration_ms = int((time.time() - start) * 1000)
@@ -224,6 +247,8 @@ def build_context(
project_memory_chars=project_memory_chars, project_memory_chars=project_memory_chars,
domain_knowledge_text=domain_knowledge_text, domain_knowledge_text=domain_knowledge_text,
domain_knowledge_chars=domain_knowledge_chars, domain_knowledge_chars=domain_knowledge_chars,
engineering_context_text=engineering_context_text,
engineering_context_chars=engineering_context_chars,
total_chars=total_chars, total_chars=total_chars,
budget=budget, budget=budget,
budget_remaining=budget - total_chars, budget_remaining=budget - total_chars,
@@ -243,6 +268,7 @@ def build_context(
memory_chars=memory_chars, memory_chars=memory_chars,
project_memory_chars=project_memory_chars, project_memory_chars=project_memory_chars,
domain_knowledge_chars=domain_knowledge_chars, domain_knowledge_chars=domain_knowledge_chars,
engineering_context_chars=engineering_context_chars,
retrieval_chars=retrieval_chars, retrieval_chars=retrieval_chars,
total_chars=total_chars, total_chars=total_chars,
budget_remaining=budget - total_chars, budget_remaining=budget - total_chars,
@@ -324,7 +350,8 @@ def _format_full_context(
memory_text: str, memory_text: str,
project_memory_text: str, project_memory_text: str,
domain_knowledge_text: str, domain_knowledge_text: str,
chunks: list[ContextChunk], engineering_context_text: str = "",
chunks: list[ContextChunk] | None = None,
) -> str: ) -> str:
"""Format project state + memories + retrieved chunks into full context block.""" """Format project state + memories + retrieved chunks into full context block."""
parts = [] parts = []
@@ -349,7 +376,12 @@ def _format_full_context(
parts.append(domain_knowledge_text) parts.append(domain_knowledge_text)
parts.append("") parts.append("")
# 5. Retrieved chunks (lowest trust) # 5. Engineering context (structured entity/relationship data)
if engineering_context_text:
parts.append(engineering_context_text)
parts.append("")
# 6. Retrieved chunks (lowest trust)
if chunks: if chunks:
parts.append("--- AtoCore Retrieved Context ---") parts.append("--- AtoCore Retrieved Context ---")
if project_state_text: if project_state_text:
@@ -361,7 +393,7 @@ def _format_full_context(
parts.append(chunk.content) parts.append(chunk.content)
parts.append("") parts.append("")
parts.append("--- End Context ---") parts.append("--- End Context ---")
elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text: elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text and not engineering_context_text:
parts.append("--- AtoCore Context ---\nNo relevant context found.\n--- End Context ---") parts.append("--- AtoCore Context ---\nNo relevant context found.\n--- End Context ---")
return "\n".join(parts) return "\n".join(parts)
@@ -394,6 +426,7 @@ def _pack_to_dict(pack: ContextPack) -> dict:
"has_memories": bool(pack.memory_text), "has_memories": bool(pack.memory_text),
"has_project_memories": bool(pack.project_memory_text), "has_project_memories": bool(pack.project_memory_text),
"has_domain_knowledge": bool(pack.domain_knowledge_text), "has_domain_knowledge": bool(pack.domain_knowledge_text),
"has_engineering_context": bool(pack.engineering_context_text),
"chunks": [ "chunks": [
{ {
"source_file": c.source_file, "source_file": c.source_file,
@@ -407,6 +440,83 @@ def _pack_to_dict(pack: ContextPack) -> dict:
} }
def _build_engineering_context(
query: str,
project: str,
budget: int,
) -> str:
"""Find entities matching the query and format their context.
Uses simple word-overlap matching between query tokens and entity
names to find relevant entities, then formats the top match with
its relationships as a compact text band.
"""
if budget < 100:
return ""
from atocore.memory.reinforcement import _normalize, _tokenize
query_tokens = _tokenize(_normalize(query))
if not query_tokens:
return ""
try:
entities = get_entities(project=project, limit=100)
except Exception:
return ""
if not entities:
return ""
scored: list[tuple[int, "Entity"]] = []
for ent in entities:
name_tokens = _tokenize(_normalize(ent.name))
desc_tokens = _tokenize(_normalize(ent.description))
overlap = len(query_tokens & (name_tokens | desc_tokens))
if overlap > 0:
scored.append((overlap, ent))
if not scored:
return ""
scored.sort(key=lambda t: t[0], reverse=True)
best_entity = scored[0][1]
try:
ctx = get_entity_with_context(best_entity.id)
except Exception:
return ""
if ctx is None:
return ""
lines = ["--- Engineering Context ---"]
lines.append(f"[{best_entity.entity_type}] {best_entity.name}")
if best_entity.description:
lines.append(f" {best_entity.description[:150]}")
for rel in ctx["relationships"][:8]:
other_id = (
rel.target_entity_id
if rel.source_entity_id == best_entity.id
else rel.source_entity_id
)
other = ctx["related_entities"].get(other_id)
if other:
direction = "->" if rel.source_entity_id == best_entity.id else "<-"
lines.append(
f" {direction} {rel.relationship_type} [{other.entity_type}] {other.name}"
)
lines.append("--- End Engineering Context ---")
text = "\n".join(lines)
if len(text) > budget:
text = text[:budget - 3].rstrip() + "..."
return text
def _truncate_text_block(text: str, budget: int) -> tuple[str, int]: def _truncate_text_block(text: str, budget: int) -> tuple[str, int]:
"""Trim a formatted text block so trusted tiers cannot exceed the total budget.""" """Trim a formatted text block so trusted tiers cannot exceed the total budget."""
if budget <= 0 or not text: if budget <= 0 or not text:
@@ -425,30 +535,40 @@ def _trim_context_to_budget(
memory_text: str, memory_text: str,
project_memory_text: str, project_memory_text: str,
domain_knowledge_text: str, domain_knowledge_text: str,
engineering_context_text: str,
chunks: list[ContextChunk], chunks: list[ContextChunk],
budget: int, budget: int,
) -> tuple[str, list[ContextChunk]]: ) -> tuple[str, list[ContextChunk]]:
"""Trim retrieval -> domain knowledge -> project memories -> identity/preference -> project state.""" """Trim retrieval -> engineering -> domain -> project memories -> identity -> state."""
kept_chunks = list(chunks) kept_chunks = list(chunks)
formatted = _format_full_context( formatted = _format_full_context(
project_state_text, memory_text, project_memory_text, project_state_text, memory_text, project_memory_text,
domain_knowledge_text, kept_chunks, domain_knowledge_text, engineering_context_text, kept_chunks,
) )
while len(formatted) > budget and kept_chunks: while len(formatted) > budget and kept_chunks:
kept_chunks.pop() kept_chunks.pop()
formatted = _format_full_context( formatted = _format_full_context(
project_state_text, memory_text, project_memory_text, project_state_text, memory_text, project_memory_text,
domain_knowledge_text, kept_chunks, domain_knowledge_text, engineering_context_text, kept_chunks,
) )
if len(formatted) <= budget: if len(formatted) <= budget:
return formatted, kept_chunks return formatted, kept_chunks
# Drop domain knowledge first (lowest trust of the memory tiers). # Drop engineering context first.
engineering_context_text = ""
formatted = _format_full_context(
project_state_text, memory_text, project_memory_text,
domain_knowledge_text, engineering_context_text, kept_chunks,
)
if len(formatted) <= budget:
return formatted, kept_chunks
# Drop domain knowledge next.
domain_knowledge_text, _ = _truncate_text_block(domain_knowledge_text, 0) domain_knowledge_text, _ = _truncate_text_block(domain_knowledge_text, 0)
formatted = _format_full_context( formatted = _format_full_context(
project_state_text, memory_text, project_memory_text, project_state_text, memory_text, project_memory_text,
domain_knowledge_text, kept_chunks, domain_knowledge_text, engineering_context_text, kept_chunks,
) )
if len(formatted) <= budget: if len(formatted) <= budget:
return formatted, kept_chunks return formatted, kept_chunks
@@ -459,7 +579,7 @@ def _trim_context_to_budget(
) )
formatted = _format_full_context( formatted = _format_full_context(
project_state_text, memory_text, project_memory_text, project_state_text, memory_text, project_memory_text,
domain_knowledge_text, kept_chunks, domain_knowledge_text, engineering_context_text, kept_chunks,
) )
if len(formatted) <= budget: if len(formatted) <= budget:
return formatted, kept_chunks return formatted, kept_chunks
@@ -467,7 +587,7 @@ def _trim_context_to_budget(
memory_text, _ = _truncate_text_block(memory_text, max(budget - len(project_state_text), 0)) memory_text, _ = _truncate_text_block(memory_text, max(budget - len(project_state_text), 0))
formatted = _format_full_context( formatted = _format_full_context(
project_state_text, memory_text, project_memory_text, project_state_text, memory_text, project_memory_text,
domain_knowledge_text, kept_chunks, domain_knowledge_text, engineering_context_text, kept_chunks,
) )
if len(formatted) <= budget: if len(formatted) <= budget:
return formatted, kept_chunks return formatted, kept_chunks