feat: Implement ACE Context Engineering framework (SYS_17)

Complete implementation of Agentic Context Engineering (ACE) framework:

Core modules (optimization_engine/context/):
- playbook.py: AtomizerPlaybook with helpful/harmful scoring
- reflector.py: AtomizerReflector for insight extraction
- session_state.py: Context isolation (exposed/isolated state)
- feedback_loop.py: Automated learning from trial results
- compaction.py: Long-session context management
- cache_monitor.py: KV-cache optimization tracking
- runner_integration.py: OptimizationRunner integration

Dashboard integration:
- context.py: 12 REST API endpoints for playbook management

Tests:
- test_context_engineering.py: 44 unit tests
- test_context_integration.py: 16 integration tests

Documentation:
- CONTEXT_ENGINEERING_REPORT.md: Comprehensive implementation report
- CONTEXT_ENGINEERING_API.md: Complete API reference
- SYS_17_CONTEXT_ENGINEERING.md: System protocol
- Updated cheatsheet with SYS_17 quick reference
- Enhanced bootstrap (00_BOOTSTRAP_V2.md)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 20:21:20 -05:00
parent 0110d80401
commit 773f8ff8af
19 changed files with 8184 additions and 2 deletions

View File

@@ -0,0 +1,432 @@
"""
Atomizer Playbook - Structured Knowledge Store
Part of the ACE (Agentic Context Engineering) implementation for Atomizer.
Based on ACE framework principles:
- Incremental delta updates (never rewrite wholesale)
- Helpful/harmful tracking for each insight
- Semantic deduplication
- Category-based organization
This module provides the core data structures for accumulating optimization
knowledge across sessions.
"""
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
from enum import Enum
import json
from pathlib import Path
from datetime import datetime
import hashlib
class InsightCategory(Enum):
"""Categories for playbook insights."""
STRATEGY = "str" # Optimization strategies
CALCULATION = "cal" # Formulas and calculations
MISTAKE = "mis" # Common mistakes to avoid
TOOL = "tool" # Tool usage patterns
DOMAIN = "dom" # Domain-specific knowledge (FEA, NX)
WORKFLOW = "wf" # Workflow patterns
@dataclass
class PlaybookItem:
"""
Single insight in the playbook with helpful/harmful tracking.
Each item accumulates feedback over time:
- helpful_count: Times this insight led to success
- harmful_count: Times this insight led to failure
- net_score: helpful - harmful (used for ranking)
- confidence: helpful / (helpful + harmful)
"""
id: str
category: InsightCategory
content: str
helpful_count: int = 0
harmful_count: int = 0
created_at: str = field(default_factory=lambda: datetime.now().isoformat())
last_used: Optional[str] = None
source_trials: List[int] = field(default_factory=list)
tags: List[str] = field(default_factory=list)
@property
def net_score(self) -> int:
"""Net helpfulness score (helpful - harmful)."""
return self.helpful_count - self.harmful_count
@property
def confidence(self) -> float:
"""Confidence score (0.0-1.0) based on outcome ratio."""
total = self.helpful_count + self.harmful_count
if total == 0:
return 0.5 # Neutral confidence for untested items
return self.helpful_count / total
def to_context_string(self) -> str:
"""Format for injection into LLM context."""
return f"[{self.id}] helpful={self.helpful_count} harmful={self.harmful_count} :: {self.content}"
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
return {
"id": self.id,
"category": self.category.value,
"content": self.content,
"helpful_count": self.helpful_count,
"harmful_count": self.harmful_count,
"created_at": self.created_at,
"last_used": self.last_used,
"source_trials": self.source_trials,
"tags": self.tags
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "PlaybookItem":
"""Create from dictionary."""
return cls(
id=data["id"],
category=InsightCategory(data["category"]),
content=data["content"],
helpful_count=data.get("helpful_count", 0),
harmful_count=data.get("harmful_count", 0),
created_at=data.get("created_at", ""),
last_used=data.get("last_used"),
source_trials=data.get("source_trials", []),
tags=data.get("tags", [])
)
@dataclass
class AtomizerPlaybook:
"""
Evolving playbook that accumulates optimization knowledge.
Based on ACE framework principles:
- Incremental delta updates (never rewrite wholesale)
- Helpful/harmful tracking for each insight
- Semantic deduplication
- Category-based organization
Usage:
playbook = AtomizerPlaybook.load(path)
item = playbook.add_insight(InsightCategory.STRATEGY, "Use shell elements for thin walls")
playbook.record_outcome(item.id, helpful=True)
playbook.save(path)
"""
items: Dict[str, PlaybookItem] = field(default_factory=dict)
version: int = 1
last_updated: str = field(default_factory=lambda: datetime.now().isoformat())
def _generate_id(self, category: InsightCategory) -> str:
"""Generate unique ID for new item."""
existing = [k for k in self.items.keys() if k.startswith(category.value)]
next_num = len(existing) + 1
return f"{category.value}-{next_num:05d}"
def _content_hash(self, content: str) -> str:
"""Generate hash for content deduplication."""
normalized = content.lower().strip()
return hashlib.md5(normalized.encode()).hexdigest()[:12]
def add_insight(
self,
category: InsightCategory,
content: str,
source_trial: Optional[int] = None,
tags: Optional[List[str]] = None
) -> PlaybookItem:
"""
Add new insight with delta update (ACE principle).
Checks for semantic duplicates before adding.
If duplicate found, increments helpful_count instead.
Args:
category: Type of insight
content: The insight text
source_trial: Trial number that generated this insight
tags: Optional tags for filtering
Returns:
The created or updated PlaybookItem
"""
content_hash = self._content_hash(content)
# Check for near-duplicates
for item in self.items.values():
existing_hash = self._content_hash(item.content)
if content_hash == existing_hash:
# Update existing instead of adding duplicate
item.helpful_count += 1
if source_trial and source_trial not in item.source_trials:
item.source_trials.append(source_trial)
if tags:
item.tags = list(set(item.tags + tags))
self.last_updated = datetime.now().isoformat()
return item
# Create new item
item_id = self._generate_id(category)
item = PlaybookItem(
id=item_id,
category=category,
content=content,
source_trials=[source_trial] if source_trial else [],
tags=tags or []
)
self.items[item_id] = item
self.last_updated = datetime.now().isoformat()
self.version += 1
return item
def record_outcome(self, item_id: str, helpful: bool) -> bool:
"""
Record whether using this insight was helpful or harmful.
Args:
item_id: The playbook item ID
helpful: True if outcome was positive, False if negative
Returns:
True if item was found and updated, False otherwise
"""
if item_id not in self.items:
return False
if helpful:
self.items[item_id].helpful_count += 1
else:
self.items[item_id].harmful_count += 1
self.items[item_id].last_used = datetime.now().isoformat()
self.last_updated = datetime.now().isoformat()
return True
def get_context_for_task(
self,
task_type: str,
max_items: int = 20,
min_confidence: float = 0.5,
tags: Optional[List[str]] = None
) -> str:
"""
Generate context string for LLM consumption.
Filters by relevance and confidence, sorted by net score.
Args:
task_type: Type of task (for filtering)
max_items: Maximum items to include
min_confidence: Minimum confidence threshold
tags: Optional tags to filter by
Returns:
Formatted context string for LLM
"""
relevant_items = [
item for item in self.items.values()
if item.confidence >= min_confidence
]
# Filter by tags if provided
if tags:
relevant_items = [
item for item in relevant_items
if any(tag in item.tags for tag in tags)
]
# Sort by net score (most helpful first)
relevant_items.sort(key=lambda x: x.net_score, reverse=True)
# Group by category
sections: Dict[str, List[str]] = {}
for item in relevant_items[:max_items]:
cat_name = item.category.name
if cat_name not in sections:
sections[cat_name] = []
sections[cat_name].append(item.to_context_string())
# Build context string
lines = ["## Atomizer Knowledge Playbook", ""]
for cat_name, items in sections.items():
lines.append(f"### {cat_name}")
lines.extend(items)
lines.append("")
return "\n".join(lines)
def search_by_content(
self,
query: str,
category: Optional[InsightCategory] = None,
limit: int = 5
) -> List[PlaybookItem]:
"""
Search playbook items by content similarity.
Simple keyword matching - could be enhanced with embeddings.
Args:
query: Search query
category: Optional category filter
limit: Maximum results
Returns:
List of matching items sorted by relevance
"""
query_lower = query.lower()
query_words = set(query_lower.split())
scored_items = []
for item in self.items.values():
if category and item.category != category:
continue
content_lower = item.content.lower()
content_words = set(content_lower.split())
# Simple word overlap scoring
overlap = len(query_words & content_words)
if overlap > 0 or query_lower in content_lower:
score = overlap + (1 if query_lower in content_lower else 0)
scored_items.append((score, item))
scored_items.sort(key=lambda x: (-x[0], -x[1].net_score))
return [item for _, item in scored_items[:limit]]
def get_by_category(
self,
category: InsightCategory,
min_score: int = 0
) -> List[PlaybookItem]:
"""Get all items in a category with minimum net score."""
return [
item for item in self.items.values()
if item.category == category and item.net_score >= min_score
]
def prune_harmful(self, threshold: int = -3) -> int:
"""
Remove items that have proven consistently harmful.
Args:
threshold: Net score threshold (items at or below are removed)
Returns:
Number of items removed
"""
to_remove = [
item_id for item_id, item in self.items.items()
if item.net_score <= threshold
]
for item_id in to_remove:
del self.items[item_id]
if to_remove:
self.last_updated = datetime.now().isoformat()
self.version += 1
return len(to_remove)
def get_stats(self) -> Dict[str, Any]:
"""Get playbook statistics."""
by_category = {}
for item in self.items.values():
cat = item.category.name
if cat not in by_category:
by_category[cat] = 0
by_category[cat] += 1
scores = [item.net_score for item in self.items.values()]
return {
"total_items": len(self.items),
"by_category": by_category,
"version": self.version,
"last_updated": self.last_updated,
"avg_score": sum(scores) / len(scores) if scores else 0,
"max_score": max(scores) if scores else 0,
"min_score": min(scores) if scores else 0
}
def save(self, path: Path) -> None:
"""
Persist playbook to JSON.
Args:
path: File path to save to
"""
data = {
"version": self.version,
"last_updated": self.last_updated,
"items": {k: v.to_dict() for k, v in self.items.items()}
}
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
@classmethod
def load(cls, path: Path) -> "AtomizerPlaybook":
"""
Load playbook from JSON.
Args:
path: File path to load from
Returns:
Loaded playbook (or new empty playbook if file doesn't exist)
"""
if not path.exists():
return cls()
with open(path, encoding='utf-8') as f:
data = json.load(f)
playbook = cls(
version=data.get("version", 1),
last_updated=data.get("last_updated", datetime.now().isoformat())
)
for item_data in data.get("items", {}).values():
item = PlaybookItem.from_dict(item_data)
playbook.items[item.id] = item
return playbook
# Convenience function for global playbook access
_global_playbook: Optional[AtomizerPlaybook] = None
_global_playbook_path: Optional[Path] = None
def get_playbook(path: Optional[Path] = None) -> AtomizerPlaybook:
"""
Get the global playbook instance.
Args:
path: Optional path to load from (uses default if not provided)
Returns:
The global AtomizerPlaybook instance
"""
global _global_playbook, _global_playbook_path
if path is None:
# Default path
path = Path(__file__).parents[2] / "knowledge_base" / "playbook.json"
if _global_playbook is None or _global_playbook_path != path:
_global_playbook = AtomizerPlaybook.load(path)
_global_playbook_path = path
return _global_playbook
def save_playbook() -> None:
"""Save the global playbook to its path."""
global _global_playbook, _global_playbook_path
if _global_playbook is not None and _global_playbook_path is not None:
_global_playbook.save(_global_playbook_path)