feat: engineering-aware context assembly
When a query matches a known engineering entity by name, the context pack now includes a structured '--- Engineering Context ---' band showing the entity's type, description, and its relationships to other entities (subsystems, materials, requirements, decisions). Six-tier context assembly: 1. Trusted Project State 2. Identity / Preferences 3. Project Memories 4. Domain Knowledge 5. Engineering Context (NEW) 6. Retrieved Chunks The engineering band uses the same token-overlap scoring as memory ranking: query tokens are matched against entity names + descriptions. The top match gets its full relationship context included. 10% budget allocation. Trims before domain knowledge (lowest priority of the structured tiers since the same info may appear in chunks). Example: query 'lateral support design' against p04-gigabit surfaces the Lateral Support subsystem entity with its relationships to GF-PTFE material, M1 Mirror Assembly parent system, and related components. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -14,6 +14,7 @@ import atocore.config as _config
|
|||||||
from atocore.context.project_state import format_project_state, get_state
|
from atocore.context.project_state import format_project_state, get_state
|
||||||
from atocore.memory.service import get_memories_for_context
|
from atocore.memory.service import get_memories_for_context
|
||||||
from atocore.observability.logger import get_logger
|
from atocore.observability.logger import get_logger
|
||||||
|
from atocore.engineering.service import get_entities, get_entity_with_context
|
||||||
from atocore.projects.registry import resolve_project_name
|
from atocore.projects.registry import resolve_project_name
|
||||||
from atocore.retrieval.retriever import ChunkResult, retrieve
|
from atocore.retrieval.retriever import ChunkResult, retrieve
|
||||||
|
|
||||||
@@ -42,6 +43,7 @@ PROJECT_MEMORY_TYPES = ["project", "knowledge", "episodic"]
|
|||||||
# model breaks down below 5N because the contact assumption fails").
|
# model breaks down below 5N because the contact assumption fails").
|
||||||
DOMAIN_KNOWLEDGE_BUDGET_RATIO = 0.10
|
DOMAIN_KNOWLEDGE_BUDGET_RATIO = 0.10
|
||||||
DOMAIN_KNOWLEDGE_TYPES = ["knowledge"]
|
DOMAIN_KNOWLEDGE_TYPES = ["knowledge"]
|
||||||
|
ENGINEERING_CONTEXT_BUDGET_RATIO = 0.10
|
||||||
|
|
||||||
# Last built context pack for debug inspection
|
# Last built context pack for debug inspection
|
||||||
_last_context_pack: "ContextPack | None" = None
|
_last_context_pack: "ContextPack | None" = None
|
||||||
@@ -67,6 +69,8 @@ class ContextPack:
|
|||||||
project_memory_chars: int = 0
|
project_memory_chars: int = 0
|
||||||
domain_knowledge_text: str = ""
|
domain_knowledge_text: str = ""
|
||||||
domain_knowledge_chars: int = 0
|
domain_knowledge_chars: int = 0
|
||||||
|
engineering_context_text: str = ""
|
||||||
|
engineering_context_chars: int = 0
|
||||||
total_chars: int = 0
|
total_chars: int = 0
|
||||||
budget: int = 0
|
budget: int = 0
|
||||||
budget_remaining: int = 0
|
budget_remaining: int = 0
|
||||||
@@ -165,10 +169,27 @@ def build_context(
|
|||||||
query=user_prompt,
|
query=user_prompt,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 2d. Engineering context — structured entity/relationship data
|
||||||
|
# when the query matches a known entity name.
|
||||||
|
engineering_context_text = ""
|
||||||
|
engineering_context_chars = 0
|
||||||
|
if canonical_project:
|
||||||
|
eng_budget = min(
|
||||||
|
int(budget * ENGINEERING_CONTEXT_BUDGET_RATIO),
|
||||||
|
max(budget - project_state_chars - memory_chars
|
||||||
|
- project_memory_chars - domain_knowledge_chars, 0),
|
||||||
|
)
|
||||||
|
if eng_budget > 0:
|
||||||
|
engineering_context_text = _build_engineering_context(
|
||||||
|
user_prompt, canonical_project, eng_budget,
|
||||||
|
)
|
||||||
|
engineering_context_chars = len(engineering_context_text)
|
||||||
|
|
||||||
# 3. Calculate remaining budget for retrieval
|
# 3. Calculate remaining budget for retrieval
|
||||||
retrieval_budget = (
|
retrieval_budget = (
|
||||||
budget - project_state_chars - memory_chars
|
budget - project_state_chars - memory_chars
|
||||||
- project_memory_chars - domain_knowledge_chars
|
- project_memory_chars - domain_knowledge_chars
|
||||||
|
- engineering_context_chars
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4. Retrieve candidates
|
# 4. Retrieve candidates
|
||||||
@@ -191,7 +212,7 @@ def build_context(
|
|||||||
# 7. Format full context
|
# 7. Format full context
|
||||||
formatted = _format_full_context(
|
formatted = _format_full_context(
|
||||||
project_state_text, memory_text, project_memory_text,
|
project_state_text, memory_text, project_memory_text,
|
||||||
domain_knowledge_text, selected,
|
domain_knowledge_text, engineering_context_text, selected,
|
||||||
)
|
)
|
||||||
if len(formatted) > budget:
|
if len(formatted) > budget:
|
||||||
formatted, selected = _trim_context_to_budget(
|
formatted, selected = _trim_context_to_budget(
|
||||||
@@ -199,6 +220,7 @@ def build_context(
|
|||||||
memory_text,
|
memory_text,
|
||||||
project_memory_text,
|
project_memory_text,
|
||||||
domain_knowledge_text,
|
domain_knowledge_text,
|
||||||
|
engineering_context_text,
|
||||||
selected,
|
selected,
|
||||||
budget,
|
budget,
|
||||||
)
|
)
|
||||||
@@ -210,6 +232,7 @@ def build_context(
|
|||||||
memory_chars = len(memory_text)
|
memory_chars = len(memory_text)
|
||||||
project_memory_chars = len(project_memory_text)
|
project_memory_chars = len(project_memory_text)
|
||||||
domain_knowledge_chars = len(domain_knowledge_text)
|
domain_knowledge_chars = len(domain_knowledge_text)
|
||||||
|
engineering_context_chars = len(engineering_context_text)
|
||||||
retrieval_chars = sum(c.char_count for c in selected)
|
retrieval_chars = sum(c.char_count for c in selected)
|
||||||
total_chars = len(formatted)
|
total_chars = len(formatted)
|
||||||
duration_ms = int((time.time() - start) * 1000)
|
duration_ms = int((time.time() - start) * 1000)
|
||||||
@@ -224,6 +247,8 @@ def build_context(
|
|||||||
project_memory_chars=project_memory_chars,
|
project_memory_chars=project_memory_chars,
|
||||||
domain_knowledge_text=domain_knowledge_text,
|
domain_knowledge_text=domain_knowledge_text,
|
||||||
domain_knowledge_chars=domain_knowledge_chars,
|
domain_knowledge_chars=domain_knowledge_chars,
|
||||||
|
engineering_context_text=engineering_context_text,
|
||||||
|
engineering_context_chars=engineering_context_chars,
|
||||||
total_chars=total_chars,
|
total_chars=total_chars,
|
||||||
budget=budget,
|
budget=budget,
|
||||||
budget_remaining=budget - total_chars,
|
budget_remaining=budget - total_chars,
|
||||||
@@ -243,6 +268,7 @@ def build_context(
|
|||||||
memory_chars=memory_chars,
|
memory_chars=memory_chars,
|
||||||
project_memory_chars=project_memory_chars,
|
project_memory_chars=project_memory_chars,
|
||||||
domain_knowledge_chars=domain_knowledge_chars,
|
domain_knowledge_chars=domain_knowledge_chars,
|
||||||
|
engineering_context_chars=engineering_context_chars,
|
||||||
retrieval_chars=retrieval_chars,
|
retrieval_chars=retrieval_chars,
|
||||||
total_chars=total_chars,
|
total_chars=total_chars,
|
||||||
budget_remaining=budget - total_chars,
|
budget_remaining=budget - total_chars,
|
||||||
@@ -324,7 +350,8 @@ def _format_full_context(
|
|||||||
memory_text: str,
|
memory_text: str,
|
||||||
project_memory_text: str,
|
project_memory_text: str,
|
||||||
domain_knowledge_text: str,
|
domain_knowledge_text: str,
|
||||||
chunks: list[ContextChunk],
|
engineering_context_text: str = "",
|
||||||
|
chunks: list[ContextChunk] | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Format project state + memories + retrieved chunks into full context block."""
|
"""Format project state + memories + retrieved chunks into full context block."""
|
||||||
parts = []
|
parts = []
|
||||||
@@ -349,7 +376,12 @@ def _format_full_context(
|
|||||||
parts.append(domain_knowledge_text)
|
parts.append(domain_knowledge_text)
|
||||||
parts.append("")
|
parts.append("")
|
||||||
|
|
||||||
# 5. Retrieved chunks (lowest trust)
|
# 5. Engineering context (structured entity/relationship data)
|
||||||
|
if engineering_context_text:
|
||||||
|
parts.append(engineering_context_text)
|
||||||
|
parts.append("")
|
||||||
|
|
||||||
|
# 6. Retrieved chunks (lowest trust)
|
||||||
if chunks:
|
if chunks:
|
||||||
parts.append("--- AtoCore Retrieved Context ---")
|
parts.append("--- AtoCore Retrieved Context ---")
|
||||||
if project_state_text:
|
if project_state_text:
|
||||||
@@ -361,7 +393,7 @@ def _format_full_context(
|
|||||||
parts.append(chunk.content)
|
parts.append(chunk.content)
|
||||||
parts.append("")
|
parts.append("")
|
||||||
parts.append("--- End Context ---")
|
parts.append("--- End Context ---")
|
||||||
elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text:
|
elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text and not engineering_context_text:
|
||||||
parts.append("--- AtoCore Context ---\nNo relevant context found.\n--- End Context ---")
|
parts.append("--- AtoCore Context ---\nNo relevant context found.\n--- End Context ---")
|
||||||
|
|
||||||
return "\n".join(parts)
|
return "\n".join(parts)
|
||||||
@@ -394,6 +426,7 @@ def _pack_to_dict(pack: ContextPack) -> dict:
|
|||||||
"has_memories": bool(pack.memory_text),
|
"has_memories": bool(pack.memory_text),
|
||||||
"has_project_memories": bool(pack.project_memory_text),
|
"has_project_memories": bool(pack.project_memory_text),
|
||||||
"has_domain_knowledge": bool(pack.domain_knowledge_text),
|
"has_domain_knowledge": bool(pack.domain_knowledge_text),
|
||||||
|
"has_engineering_context": bool(pack.engineering_context_text),
|
||||||
"chunks": [
|
"chunks": [
|
||||||
{
|
{
|
||||||
"source_file": c.source_file,
|
"source_file": c.source_file,
|
||||||
@@ -407,6 +440,83 @@ def _pack_to_dict(pack: ContextPack) -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _build_engineering_context(
|
||||||
|
query: str,
|
||||||
|
project: str,
|
||||||
|
budget: int,
|
||||||
|
) -> str:
|
||||||
|
"""Find entities matching the query and format their context.
|
||||||
|
|
||||||
|
Uses simple word-overlap matching between query tokens and entity
|
||||||
|
names to find relevant entities, then formats the top match with
|
||||||
|
its relationships as a compact text band.
|
||||||
|
"""
|
||||||
|
if budget < 100:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
from atocore.memory.reinforcement import _normalize, _tokenize
|
||||||
|
|
||||||
|
query_tokens = _tokenize(_normalize(query))
|
||||||
|
if not query_tokens:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
try:
|
||||||
|
entities = get_entities(project=project, limit=100)
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
if not entities:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
scored: list[tuple[int, "Entity"]] = []
|
||||||
|
for ent in entities:
|
||||||
|
name_tokens = _tokenize(_normalize(ent.name))
|
||||||
|
desc_tokens = _tokenize(_normalize(ent.description))
|
||||||
|
overlap = len(query_tokens & (name_tokens | desc_tokens))
|
||||||
|
if overlap > 0:
|
||||||
|
scored.append((overlap, ent))
|
||||||
|
|
||||||
|
if not scored:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
scored.sort(key=lambda t: t[0], reverse=True)
|
||||||
|
best_entity = scored[0][1]
|
||||||
|
|
||||||
|
try:
|
||||||
|
ctx = get_entity_with_context(best_entity.id)
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
if ctx is None:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
lines = ["--- Engineering Context ---"]
|
||||||
|
lines.append(f"[{best_entity.entity_type}] {best_entity.name}")
|
||||||
|
if best_entity.description:
|
||||||
|
lines.append(f" {best_entity.description[:150]}")
|
||||||
|
|
||||||
|
for rel in ctx["relationships"][:8]:
|
||||||
|
other_id = (
|
||||||
|
rel.target_entity_id
|
||||||
|
if rel.source_entity_id == best_entity.id
|
||||||
|
else rel.source_entity_id
|
||||||
|
)
|
||||||
|
other = ctx["related_entities"].get(other_id)
|
||||||
|
if other:
|
||||||
|
direction = "->" if rel.source_entity_id == best_entity.id else "<-"
|
||||||
|
lines.append(
|
||||||
|
f" {direction} {rel.relationship_type} [{other.entity_type}] {other.name}"
|
||||||
|
)
|
||||||
|
|
||||||
|
lines.append("--- End Engineering Context ---")
|
||||||
|
text = "\n".join(lines)
|
||||||
|
|
||||||
|
if len(text) > budget:
|
||||||
|
text = text[:budget - 3].rstrip() + "..."
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
def _truncate_text_block(text: str, budget: int) -> tuple[str, int]:
|
def _truncate_text_block(text: str, budget: int) -> tuple[str, int]:
|
||||||
"""Trim a formatted text block so trusted tiers cannot exceed the total budget."""
|
"""Trim a formatted text block so trusted tiers cannot exceed the total budget."""
|
||||||
if budget <= 0 or not text:
|
if budget <= 0 or not text:
|
||||||
@@ -425,30 +535,40 @@ def _trim_context_to_budget(
|
|||||||
memory_text: str,
|
memory_text: str,
|
||||||
project_memory_text: str,
|
project_memory_text: str,
|
||||||
domain_knowledge_text: str,
|
domain_knowledge_text: str,
|
||||||
|
engineering_context_text: str,
|
||||||
chunks: list[ContextChunk],
|
chunks: list[ContextChunk],
|
||||||
budget: int,
|
budget: int,
|
||||||
) -> tuple[str, list[ContextChunk]]:
|
) -> tuple[str, list[ContextChunk]]:
|
||||||
"""Trim retrieval -> domain knowledge -> project memories -> identity/preference -> project state."""
|
"""Trim retrieval -> engineering -> domain -> project memories -> identity -> state."""
|
||||||
kept_chunks = list(chunks)
|
kept_chunks = list(chunks)
|
||||||
formatted = _format_full_context(
|
formatted = _format_full_context(
|
||||||
project_state_text, memory_text, project_memory_text,
|
project_state_text, memory_text, project_memory_text,
|
||||||
domain_knowledge_text, kept_chunks,
|
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||||
)
|
)
|
||||||
while len(formatted) > budget and kept_chunks:
|
while len(formatted) > budget and kept_chunks:
|
||||||
kept_chunks.pop()
|
kept_chunks.pop()
|
||||||
formatted = _format_full_context(
|
formatted = _format_full_context(
|
||||||
project_state_text, memory_text, project_memory_text,
|
project_state_text, memory_text, project_memory_text,
|
||||||
domain_knowledge_text, kept_chunks,
|
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(formatted) <= budget:
|
if len(formatted) <= budget:
|
||||||
return formatted, kept_chunks
|
return formatted, kept_chunks
|
||||||
|
|
||||||
# Drop domain knowledge first (lowest trust of the memory tiers).
|
# Drop engineering context first.
|
||||||
|
engineering_context_text = ""
|
||||||
|
formatted = _format_full_context(
|
||||||
|
project_state_text, memory_text, project_memory_text,
|
||||||
|
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||||
|
)
|
||||||
|
if len(formatted) <= budget:
|
||||||
|
return formatted, kept_chunks
|
||||||
|
|
||||||
|
# Drop domain knowledge next.
|
||||||
domain_knowledge_text, _ = _truncate_text_block(domain_knowledge_text, 0)
|
domain_knowledge_text, _ = _truncate_text_block(domain_knowledge_text, 0)
|
||||||
formatted = _format_full_context(
|
formatted = _format_full_context(
|
||||||
project_state_text, memory_text, project_memory_text,
|
project_state_text, memory_text, project_memory_text,
|
||||||
domain_knowledge_text, kept_chunks,
|
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||||
)
|
)
|
||||||
if len(formatted) <= budget:
|
if len(formatted) <= budget:
|
||||||
return formatted, kept_chunks
|
return formatted, kept_chunks
|
||||||
@@ -459,7 +579,7 @@ def _trim_context_to_budget(
|
|||||||
)
|
)
|
||||||
formatted = _format_full_context(
|
formatted = _format_full_context(
|
||||||
project_state_text, memory_text, project_memory_text,
|
project_state_text, memory_text, project_memory_text,
|
||||||
domain_knowledge_text, kept_chunks,
|
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||||
)
|
)
|
||||||
if len(formatted) <= budget:
|
if len(formatted) <= budget:
|
||||||
return formatted, kept_chunks
|
return formatted, kept_chunks
|
||||||
@@ -467,7 +587,7 @@ def _trim_context_to_budget(
|
|||||||
memory_text, _ = _truncate_text_block(memory_text, max(budget - len(project_state_text), 0))
|
memory_text, _ = _truncate_text_block(memory_text, max(budget - len(project_state_text), 0))
|
||||||
formatted = _format_full_context(
|
formatted = _format_full_context(
|
||||||
project_state_text, memory_text, project_memory_text,
|
project_state_text, memory_text, project_memory_text,
|
||||||
domain_knowledge_text, kept_chunks,
|
domain_knowledge_text, engineering_context_text, kept_chunks,
|
||||||
)
|
)
|
||||||
if len(formatted) <= budget:
|
if len(formatted) <= budget:
|
||||||
return formatted, kept_chunks
|
return formatted, kept_chunks
|
||||||
|
|||||||
Reference in New Issue
Block a user