diff --git a/src/atocore/context/builder.py b/src/atocore/context/builder.py index 91665cf..3ce00b9 100644 --- a/src/atocore/context/builder.py +++ b/src/atocore/context/builder.py @@ -14,6 +14,7 @@ import atocore.config as _config from atocore.context.project_state import format_project_state, get_state from atocore.memory.service import get_memories_for_context from atocore.observability.logger import get_logger +from atocore.engineering.service import get_entities, get_entity_with_context from atocore.projects.registry import resolve_project_name from atocore.retrieval.retriever import ChunkResult, retrieve @@ -42,6 +43,7 @@ PROJECT_MEMORY_TYPES = ["project", "knowledge", "episodic"] # model breaks down below 5N because the contact assumption fails"). DOMAIN_KNOWLEDGE_BUDGET_RATIO = 0.10 DOMAIN_KNOWLEDGE_TYPES = ["knowledge"] +ENGINEERING_CONTEXT_BUDGET_RATIO = 0.10 # Last built context pack for debug inspection _last_context_pack: "ContextPack | None" = None @@ -67,6 +69,8 @@ class ContextPack: project_memory_chars: int = 0 domain_knowledge_text: str = "" domain_knowledge_chars: int = 0 + engineering_context_text: str = "" + engineering_context_chars: int = 0 total_chars: int = 0 budget: int = 0 budget_remaining: int = 0 @@ -165,10 +169,27 @@ def build_context( query=user_prompt, ) + # 2d. Engineering context — structured entity/relationship data + # when the query matches a known entity name. + engineering_context_text = "" + engineering_context_chars = 0 + if canonical_project: + eng_budget = min( + int(budget * ENGINEERING_CONTEXT_BUDGET_RATIO), + max(budget - project_state_chars - memory_chars + - project_memory_chars - domain_knowledge_chars, 0), + ) + if eng_budget > 0: + engineering_context_text = _build_engineering_context( + user_prompt, canonical_project, eng_budget, + ) + engineering_context_chars = len(engineering_context_text) + # 3. Calculate remaining budget for retrieval retrieval_budget = ( budget - project_state_chars - memory_chars - project_memory_chars - domain_knowledge_chars + - engineering_context_chars ) # 4. Retrieve candidates @@ -191,7 +212,7 @@ def build_context( # 7. Format full context formatted = _format_full_context( project_state_text, memory_text, project_memory_text, - domain_knowledge_text, selected, + domain_knowledge_text, engineering_context_text, selected, ) if len(formatted) > budget: formatted, selected = _trim_context_to_budget( @@ -199,6 +220,7 @@ def build_context( memory_text, project_memory_text, domain_knowledge_text, + engineering_context_text, selected, budget, ) @@ -210,6 +232,7 @@ def build_context( memory_chars = len(memory_text) project_memory_chars = len(project_memory_text) domain_knowledge_chars = len(domain_knowledge_text) + engineering_context_chars = len(engineering_context_text) retrieval_chars = sum(c.char_count for c in selected) total_chars = len(formatted) duration_ms = int((time.time() - start) * 1000) @@ -224,6 +247,8 @@ def build_context( project_memory_chars=project_memory_chars, domain_knowledge_text=domain_knowledge_text, domain_knowledge_chars=domain_knowledge_chars, + engineering_context_text=engineering_context_text, + engineering_context_chars=engineering_context_chars, total_chars=total_chars, budget=budget, budget_remaining=budget - total_chars, @@ -243,6 +268,7 @@ def build_context( memory_chars=memory_chars, project_memory_chars=project_memory_chars, domain_knowledge_chars=domain_knowledge_chars, + engineering_context_chars=engineering_context_chars, retrieval_chars=retrieval_chars, total_chars=total_chars, budget_remaining=budget - total_chars, @@ -324,7 +350,8 @@ def _format_full_context( memory_text: str, project_memory_text: str, domain_knowledge_text: str, - chunks: list[ContextChunk], + engineering_context_text: str = "", + chunks: list[ContextChunk] | None = None, ) -> str: """Format project state + memories + retrieved chunks into full context block.""" parts = [] @@ -349,7 +376,12 @@ def _format_full_context( parts.append(domain_knowledge_text) parts.append("") - # 5. Retrieved chunks (lowest trust) + # 5. Engineering context (structured entity/relationship data) + if engineering_context_text: + parts.append(engineering_context_text) + parts.append("") + + # 6. Retrieved chunks (lowest trust) if chunks: parts.append("--- AtoCore Retrieved Context ---") if project_state_text: @@ -361,7 +393,7 @@ def _format_full_context( parts.append(chunk.content) parts.append("") parts.append("--- End Context ---") - elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text: + elif not project_state_text and not memory_text and not project_memory_text and not domain_knowledge_text and not engineering_context_text: parts.append("--- AtoCore Context ---\nNo relevant context found.\n--- End Context ---") return "\n".join(parts) @@ -394,6 +426,7 @@ def _pack_to_dict(pack: ContextPack) -> dict: "has_memories": bool(pack.memory_text), "has_project_memories": bool(pack.project_memory_text), "has_domain_knowledge": bool(pack.domain_knowledge_text), + "has_engineering_context": bool(pack.engineering_context_text), "chunks": [ { "source_file": c.source_file, @@ -407,6 +440,83 @@ def _pack_to_dict(pack: ContextPack) -> dict: } +def _build_engineering_context( + query: str, + project: str, + budget: int, +) -> str: + """Find entities matching the query and format their context. + + Uses simple word-overlap matching between query tokens and entity + names to find relevant entities, then formats the top match with + its relationships as a compact text band. + """ + if budget < 100: + return "" + + from atocore.memory.reinforcement import _normalize, _tokenize + + query_tokens = _tokenize(_normalize(query)) + if not query_tokens: + return "" + + try: + entities = get_entities(project=project, limit=100) + except Exception: + return "" + + if not entities: + return "" + + scored: list[tuple[int, "Entity"]] = [] + for ent in entities: + name_tokens = _tokenize(_normalize(ent.name)) + desc_tokens = _tokenize(_normalize(ent.description)) + overlap = len(query_tokens & (name_tokens | desc_tokens)) + if overlap > 0: + scored.append((overlap, ent)) + + if not scored: + return "" + + scored.sort(key=lambda t: t[0], reverse=True) + best_entity = scored[0][1] + + try: + ctx = get_entity_with_context(best_entity.id) + except Exception: + return "" + + if ctx is None: + return "" + + lines = ["--- Engineering Context ---"] + lines.append(f"[{best_entity.entity_type}] {best_entity.name}") + if best_entity.description: + lines.append(f" {best_entity.description[:150]}") + + for rel in ctx["relationships"][:8]: + other_id = ( + rel.target_entity_id + if rel.source_entity_id == best_entity.id + else rel.source_entity_id + ) + other = ctx["related_entities"].get(other_id) + if other: + direction = "->" if rel.source_entity_id == best_entity.id else "<-" + lines.append( + f" {direction} {rel.relationship_type} [{other.entity_type}] {other.name}" + ) + + lines.append("--- End Engineering Context ---") + text = "\n".join(lines) + + if len(text) > budget: + text = text[:budget - 3].rstrip() + "..." + + return text + + def _truncate_text_block(text: str, budget: int) -> tuple[str, int]: """Trim a formatted text block so trusted tiers cannot exceed the total budget.""" if budget <= 0 or not text: @@ -425,30 +535,40 @@ def _trim_context_to_budget( memory_text: str, project_memory_text: str, domain_knowledge_text: str, + engineering_context_text: str, chunks: list[ContextChunk], budget: int, ) -> tuple[str, list[ContextChunk]]: - """Trim retrieval -> domain knowledge -> project memories -> identity/preference -> project state.""" + """Trim retrieval -> engineering -> domain -> project memories -> identity -> state.""" kept_chunks = list(chunks) formatted = _format_full_context( project_state_text, memory_text, project_memory_text, - domain_knowledge_text, kept_chunks, + domain_knowledge_text, engineering_context_text, kept_chunks, ) while len(formatted) > budget and kept_chunks: kept_chunks.pop() formatted = _format_full_context( project_state_text, memory_text, project_memory_text, - domain_knowledge_text, kept_chunks, + domain_knowledge_text, engineering_context_text, kept_chunks, ) if len(formatted) <= budget: return formatted, kept_chunks - # Drop domain knowledge first (lowest trust of the memory tiers). + # Drop engineering context first. + engineering_context_text = "" + formatted = _format_full_context( + project_state_text, memory_text, project_memory_text, + domain_knowledge_text, engineering_context_text, kept_chunks, + ) + if len(formatted) <= budget: + return formatted, kept_chunks + + # Drop domain knowledge next. domain_knowledge_text, _ = _truncate_text_block(domain_knowledge_text, 0) formatted = _format_full_context( project_state_text, memory_text, project_memory_text, - domain_knowledge_text, kept_chunks, + domain_knowledge_text, engineering_context_text, kept_chunks, ) if len(formatted) <= budget: return formatted, kept_chunks @@ -459,7 +579,7 @@ def _trim_context_to_budget( ) formatted = _format_full_context( project_state_text, memory_text, project_memory_text, - domain_knowledge_text, kept_chunks, + domain_knowledge_text, engineering_context_text, kept_chunks, ) if len(formatted) <= budget: return formatted, kept_chunks @@ -467,7 +587,7 @@ def _trim_context_to_budget( memory_text, _ = _truncate_text_block(memory_text, max(budget - len(project_state_text), 0)) formatted = _format_full_context( project_state_text, memory_text, project_memory_text, - domain_knowledge_text, kept_chunks, + domain_knowledge_text, engineering_context_text, kept_chunks, ) if len(formatted) <= budget: return formatted, kept_chunks