feat: engineering-aware context assembly
When a query matches a known engineering entity by name, the context pack now includes a structured '--- Engineering Context ---' band showing the entity's type, description, and its relationships to other entities (subsystems, materials, requirements, decisions). Six-tier context assembly: 1. Trusted Project State 2. Identity / Preferences 3. Project Memories 4. Domain Knowledge 5. Engineering Context (NEW) 6. Retrieved Chunks The engineering band uses the same token-overlap scoring as memory ranking: query tokens are matched against entity names + descriptions. The top match gets its full relationship context included. 10% budget allocation. Trims before domain knowledge (lowest priority of the structured tiers since the same info may appear in chunks). Example: query 'lateral support design' against p04-gigabit surfaces the Lateral Support subsystem entity with its relationships to GF-PTFE material, M1 Mirror Assembly parent system, and related components. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,7 @@ import atocore.config as _config
|
||||
from atocore.context.project_state import format_project_state, get_state
|
||||
from atocore.memory.service import get_memories_for_context
|
||||
from atocore.observability.logger import get_logger
|
||||
from atocore.engineering.service import get_entities, get_entity_with_context
|
||||
from atocore.projects.registry import resolve_project_name
|
||||
from atocore.retrieval.retriever import ChunkResult, retrieve
|
||||
|
||||
@@ -42,6 +43,7 @@ PROJECT_MEMORY_TYPES = ["project", "knowledge", "episodic"]
|
||||
# model breaks down below 5N because the contact assumption fails").
|
||||
DOMAIN_KNOWLEDGE_BUDGET_RATIO = 0.10
|
||||
DOMAIN_KNOWLEDGE_TYPES = ["knowledge"]
|
||||
ENGINEERING_CONTEXT_BUDGET_RATIO = 0.10
|
||||
|
||||
# Last built context pack for debug inspection
|
||||
_last_context_pack: "ContextPack | None" = None
|
||||
@@ -67,6 +69,8 @@ class ContextPack:
|
||||
project_memory_chars: int = 0
|
||||
domain_knowledge_text: str = ""
|
||||
domain_knowledge_chars: int = 0
|
||||
engineering_context_text: str = ""
|
||||
engineering_context_chars: int = 0
|
||||
total_chars: int = 0
|
||||
budget: int = 0
|
||||
budget_remaining: int = 0
|
||||
@@ -165,10 +169,27 @@ def build_context(
|
||||
query=user_prompt,
|
||||
)
|
||||
|
||||
# 2d. Engineering context — structured entity/relationship data
|
||||
# when the query matches a known entity name.
|
||||
engineering_context_text = ""
|
||||
engineering_context_chars = 0
|
||||
if canonical_project:
|
||||
eng_budget = min(
|
||||
int(budget * ENGINEERING_CONTEXT_BUDGET_RATIO),
|
||||
max(budget - project_state_chars - memory_chars
|
||||
- project_memory_chars - domain_knowledge_chars, 0),
|
||||
)
|
||||
if eng_budget > 0:
|
||||
engineering_context_text = _build_engineering_context(
|
||||
user_prompt, canonical_project, eng_budget,
|
||||
)
|
||||
engineering_context_chars = len(engineering_context_text)
|
||||
|
||||
# 3. Calculate remaining budget for retrieval
|
||||
retrieval_budget = (
|
||||
budget - project_state_chars - memory_chars
|
||||
- project_memory_chars - domain_knowledge_chars
|
||||
- engineering_context_chars
|
||||
)
|
||||
|
||||
# 4. Retrieve candidates
|
||||
@@ -191,7 +212,7 @@ def build_context(
|
||||
# 7. Format full context
|
||||
formatted = _format_full_context(
|
||||
project_state_text, memory_text, project_memory_text,
|
||||
domain_knowledge_text, selected,
|
||||
domain_knowledge_text, engineering_context_text, selected,
|
||||
)
|
||||
if len(formatted) > budget:
|
||||
formatted, selected = _trim_context_to_budget(
|
||||
@@ -199,6 +220,7 @@ def build_context(
|
||||
memory_text,
|
||||
project_memory_text,
|
||||
domain_knowledge_text,
|
||||
engineering_context_text,
|
||||
selected,
|
||||
budget,
|
||||
)
|
||||
@@ -210,6 +232,7 @@ def build_context(
|
||||
memory_chars = len(memory_text)
|
||||
project_memory_chars = len(project_memory_text)
|
||||
domain_knowledge_chars = len(domain_knowledge_text)
|
||||
engineering_context_chars = len(engineering_context_text)
|
||||
retrieval_chars = sum(c.char_count for c in selected)
|
||||
total_chars = len(formatted)
|
||||
duration_ms = int((time.time() - start) * 1000)
|
||||
@@ -224,6 +247,8 @@ def build_context(
|
||||
project_memory_chars=project_memory_chars,
|
||||
domain_knowledge_text=domain_knowledge_text,
|
||||
domain_knowledge_chars=domain_knowledge_chars,
|
||||
engineering_context_text=engineering_context_text,
|
||||
engineering_context_chars=engineering_context_chars,
|
||||
total_chars=total_chars,
|
||||
budget=budget,
|
||||
budget_remaining=budget - total_chars,
|
||||
@@ -243,6 +268,7 @@ def build_context(
|
||||
memory_chars=memory_chars,
|
||||
project_memory_chars=project_memory_chars,
|
||||
domain_knowledge_chars=domain_knowledge_chars,
|
||||
engineering_context_chars=engineering_context_chars,
|
||||
retrieval_chars=retrieval_chars,
|
||||
total_chars=total_chars,
|
||||
budget_remaining=budget - total_chars,
|
||||
@@ -324,7 +350,8 @@ def _format_full_context(
|
||||
memory_text: str,
|
||||
project_memory_text: str,
|
||||
domain_knowledge_text: str,
|
||||
chunks: list[ContextChunk],
|
||||
engineering_context_text: str = "",
|
||||
chunks: list[ContextChunk] | None = None,
|
||||
) -> str:
|
||||
"""Format project state + memories + retrieved chunks into full context block."""
|
||||
parts = []
|
||||
@@ -349,7 +376,12 @@ def _format_full_context(
|
||||
parts.append(domain_knowledge_text)
|
||||
parts.append("")
|
||||
|
||||
# 5. Retrieved chunks (lowest trust)
|
||||
# 5. Engineering context (structured entity/relationship data)
|
||||
if engineering_context_text:
|
||||
parts.append(engineering_context_text)
|
||||
parts.append("")
|
||||
|
||||
# 6. Retrieved chunks (lowest trust)
|
||||
if chunks:
|
||||
parts.append("--- AtoCore Retrieved Context ---")
|
||||
if project_state_text:
|
||||
@@ -361,7 +393,7 @@ def _format_full_context(
|
||||
parts.append(chunk.content)
|
||||
parts.append("")
|
||||
parts.append("--- End Context ---")
|
||||
elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text:
|
||||
elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text and not engineering_context_text:
|
||||
parts.append("--- AtoCore Context ---\nNo relevant context found.\n--- End Context ---")
|
||||
|
||||
return "\n".join(parts)
|
||||
@@ -394,6 +426,7 @@ def _pack_to_dict(pack: ContextPack) -> dict:
|
||||
"has_memories": bool(pack.memory_text),
|
||||
"has_project_memories": bool(pack.project_memory_text),
|
||||
"has_domain_knowledge": bool(pack.domain_knowledge_text),
|
||||
"has_engineering_context": bool(pack.engineering_context_text),
|
||||
"chunks": [
|
||||
{
|
||||
"source_file": c.source_file,
|
||||
@@ -407,6 +440,83 @@ def _pack_to_dict(pack: ContextPack) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def _build_engineering_context(
|
||||
query: str,
|
||||
project: str,
|
||||
budget: int,
|
||||
) -> str:
|
||||
"""Find entities matching the query and format their context.
|
||||
|
||||
Uses simple word-overlap matching between query tokens and entity
|
||||
names to find relevant entities, then formats the top match with
|
||||
its relationships as a compact text band.
|
||||
"""
|
||||
if budget < 100:
|
||||
return ""
|
||||
|
||||
from atocore.memory.reinforcement import _normalize, _tokenize
|
||||
|
||||
query_tokens = _tokenize(_normalize(query))
|
||||
if not query_tokens:
|
||||
return ""
|
||||
|
||||
try:
|
||||
entities = get_entities(project=project, limit=100)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
if not entities:
|
||||
return ""
|
||||
|
||||
scored: list[tuple[int, "Entity"]] = []
|
||||
for ent in entities:
|
||||
name_tokens = _tokenize(_normalize(ent.name))
|
||||
desc_tokens = _tokenize(_normalize(ent.description))
|
||||
overlap = len(query_tokens & (name_tokens | desc_tokens))
|
||||
if overlap > 0:
|
||||
scored.append((overlap, ent))
|
||||
|
||||
if not scored:
|
||||
return ""
|
||||
|
||||
scored.sort(key=lambda t: t[0], reverse=True)
|
||||
best_entity = scored[0][1]
|
||||
|
||||
try:
|
||||
ctx = get_entity_with_context(best_entity.id)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
if ctx is None:
|
||||
return ""
|
||||
|
||||
lines = ["--- Engineering Context ---"]
|
||||
lines.append(f"[{best_entity.entity_type}] {best_entity.name}")
|
||||
if best_entity.description:
|
||||
lines.append(f" {best_entity.description[:150]}")
|
||||
|
||||
for rel in ctx["relationships"][:8]:
|
||||
other_id = (
|
||||
rel.target_entity_id
|
||||
if rel.source_entity_id == best_entity.id
|
||||
else rel.source_entity_id
|
||||
)
|
||||
other = ctx["related_entities"].get(other_id)
|
||||
if other:
|
||||
direction = "->" if rel.source_entity_id == best_entity.id else "<-"
|
||||
lines.append(
|
||||
f" {direction} {rel.relationship_type} [{other.entity_type}] {other.name}"
|
||||
)
|
||||
|
||||
lines.append("--- End Engineering Context ---")
|
||||
text = "\n".join(lines)
|
||||
|
||||
if len(text) > budget:
|
||||
text = text[:budget - 3].rstrip() + "..."
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def _truncate_text_block(text: str, budget: int) -> tuple[str, int]:
|
||||
"""Trim a formatted text block so trusted tiers cannot exceed the total budget."""
|
||||
if budget <= 0 or not text:
|
||||
@@ -425,30 +535,40 @@ def _trim_context_to_budget(
|
||||
memory_text: str,
|
||||
project_memory_text: str,
|
||||
domain_knowledge_text: str,
|
||||
engineering_context_text: str,
|
||||
chunks: list[ContextChunk],
|
||||
budget: int,
|
||||
) -> tuple[str, list[ContextChunk]]:
|
||||
"""Trim retrieval -> domain knowledge -> project memories -> identity/preference -> project state."""
|
||||
"""Trim retrieval -> engineering -> domain -> project memories -> identity -> state."""
|
||||
kept_chunks = list(chunks)
|
||||
formatted = _format_full_context(
|
||||
project_state_text, memory_text, project_memory_text,
|
||||
domain_knowledge_text, kept_chunks,
|
||||
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||
)
|
||||
while len(formatted) > budget and kept_chunks:
|
||||
kept_chunks.pop()
|
||||
formatted = _format_full_context(
|
||||
project_state_text, memory_text, project_memory_text,
|
||||
domain_knowledge_text, kept_chunks,
|
||||
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||
)
|
||||
|
||||
if len(formatted) <= budget:
|
||||
return formatted, kept_chunks
|
||||
|
||||
# Drop domain knowledge first (lowest trust of the memory tiers).
|
||||
# Drop engineering context first.
|
||||
engineering_context_text = ""
|
||||
formatted = _format_full_context(
|
||||
project_state_text, memory_text, project_memory_text,
|
||||
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||
)
|
||||
if len(formatted) <= budget:
|
||||
return formatted, kept_chunks
|
||||
|
||||
# Drop domain knowledge next.
|
||||
domain_knowledge_text, _ = _truncate_text_block(domain_knowledge_text, 0)
|
||||
formatted = _format_full_context(
|
||||
project_state_text, memory_text, project_memory_text,
|
||||
domain_knowledge_text, kept_chunks,
|
||||
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||
)
|
||||
if len(formatted) <= budget:
|
||||
return formatted, kept_chunks
|
||||
@@ -459,7 +579,7 @@ def _trim_context_to_budget(
|
||||
)
|
||||
formatted = _format_full_context(
|
||||
project_state_text, memory_text, project_memory_text,
|
||||
domain_knowledge_text, kept_chunks,
|
||||
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||
)
|
||||
if len(formatted) <= budget:
|
||||
return formatted, kept_chunks
|
||||
@@ -467,7 +587,7 @@ def _trim_context_to_budget(
|
||||
memory_text, _ = _truncate_text_block(memory_text, max(budget - len(project_state_text), 0))
|
||||
formatted = _format_full_context(
|
||||
project_state_text, memory_text, project_memory_text,
|
||||
domain_knowledge_text, kept_chunks,
|
||||
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||
)
|
||||
if len(formatted) <= budget:
|
||||
return formatted, kept_chunks
|
||||
|
||||
Reference in New Issue
Block a user