feat: Implement ACE Context Engineering framework (SYS_17)

Complete implementation of Agentic Context Engineering (ACE) framework: Core modules (optimization_engine/context/): - playbook.py: AtomizerPlaybook with helpful/harmful scoring - reflector.py: AtomizerReflector for insight extraction - session_state.py: Context isolation (exposed/isolated state) - feedback_loop.py: Automated learning from trial results - compaction.py: Long-session context management - cache_monitor.py: KV-cache optimization tracking - runner_integration.py: OptimizationRunner integration Dashboard integration: - context.py: 12 REST API endpoints for playbook management Tests: - test_context_engineering.py: 44 unit tests - test_context_integration.py: 16 integration tests Documentation: - CONTEXT_ENGINEERING_REPORT.md: Comprehensive implementation report - CONTEXT_ENGINEERING_API.md: Complete API reference - SYS_17_CONTEXT_ENGINEERING.md: System protocol - Updated cheatsheet with SYS_17 quick reference - Enhanced bootstrap (00_BOOTSTRAP_V2.md) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 20:21:20 -05:00
parent 0110d80401
commit 773f8ff8af
19 changed files with 8184 additions and 2 deletions
--- a/optimization_engine/context/playbook.py
+++ b/optimization_engine/context/playbook.py
@@ -0,0 +1,432 @@
+"""
+Atomizer Playbook - Structured Knowledge Store
+
+Part of the ACE (Agentic Context Engineering) implementation for Atomizer.
+Based on ACE framework principles:
+- Incremental delta updates (never rewrite wholesale)
+- Helpful/harmful tracking for each insight
+- Semantic deduplication
+- Category-based organization
+
+This module provides the core data structures for accumulating optimization
+knowledge across sessions.
+"""
+
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Any
+from enum import Enum
+import json
+from pathlib import Path
+from datetime import datetime
+import hashlib
+
+
+class InsightCategory(Enum):
+    """Categories for playbook insights."""
+    STRATEGY = "str"      # Optimization strategies
+    CALCULATION = "cal"   # Formulas and calculations
+    MISTAKE = "mis"       # Common mistakes to avoid
+    TOOL = "tool"         # Tool usage patterns
+    DOMAIN = "dom"        # Domain-specific knowledge (FEA, NX)
+    WORKFLOW = "wf"       # Workflow patterns
+
+
+@dataclass
+class PlaybookItem:
+    """
+    Single insight in the playbook with helpful/harmful tracking.
+
+    Each item accumulates feedback over time:
+    - helpful_count: Times this insight led to success
+    - harmful_count: Times this insight led to failure
+    - net_score: helpful - harmful (used for ranking)
+    - confidence: helpful / (helpful + harmful)
+    """
+    id: str
+    category: InsightCategory
+    content: str
+    helpful_count: int = 0
+    harmful_count: int = 0
+    created_at: str = field(default_factory=lambda: datetime.now().isoformat())
+    last_used: Optional[str] = None
+    source_trials: List[int] = field(default_factory=list)
+    tags: List[str] = field(default_factory=list)
+
+    @property
+    def net_score(self) -> int:
+        """Net helpfulness score (helpful - harmful)."""
+        return self.helpful_count - self.harmful_count
+
+    @property
+    def confidence(self) -> float:
+        """Confidence score (0.0-1.0) based on outcome ratio."""
+        total = self.helpful_count + self.harmful_count
+        if total == 0:
+            return 0.5  # Neutral confidence for untested items
+        return self.helpful_count / total
+
+    def to_context_string(self) -> str:
+        """Format for injection into LLM context."""
+        return f"[{self.id}] helpful={self.helpful_count} harmful={self.harmful_count} :: {self.content}"
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "id": self.id,
+            "category": self.category.value,
+            "content": self.content,
+            "helpful_count": self.helpful_count,
+            "harmful_count": self.harmful_count,
+            "created_at": self.created_at,
+            "last_used": self.last_used,
+            "source_trials": self.source_trials,
+            "tags": self.tags
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "PlaybookItem":
+        """Create from dictionary."""
+        return cls(
+            id=data["id"],
+            category=InsightCategory(data["category"]),
+            content=data["content"],
+            helpful_count=data.get("helpful_count", 0),
+            harmful_count=data.get("harmful_count", 0),
+            created_at=data.get("created_at", ""),
+            last_used=data.get("last_used"),
+            source_trials=data.get("source_trials", []),
+            tags=data.get("tags", [])
+        )
+
+
+@dataclass
+class AtomizerPlaybook:
+    """
+    Evolving playbook that accumulates optimization knowledge.
+
+    Based on ACE framework principles:
+    - Incremental delta updates (never rewrite wholesale)
+    - Helpful/harmful tracking for each insight
+    - Semantic deduplication
+    - Category-based organization
+
+    Usage:
+        playbook = AtomizerPlaybook.load(path)
+        item = playbook.add_insight(InsightCategory.STRATEGY, "Use shell elements for thin walls")
+        playbook.record_outcome(item.id, helpful=True)
+        playbook.save(path)
+    """
+    items: Dict[str, PlaybookItem] = field(default_factory=dict)
+    version: int = 1
+    last_updated: str = field(default_factory=lambda: datetime.now().isoformat())
+
+    def _generate_id(self, category: InsightCategory) -> str:
+        """Generate unique ID for new item."""
+        existing = [k for k in self.items.keys() if k.startswith(category.value)]
+        next_num = len(existing) + 1
+        return f"{category.value}-{next_num:05d}"
+
+    def _content_hash(self, content: str) -> str:
+        """Generate hash for content deduplication."""
+        normalized = content.lower().strip()
+        return hashlib.md5(normalized.encode()).hexdigest()[:12]
+
+    def add_insight(
+        self,
+        category: InsightCategory,
+        content: str,
+        source_trial: Optional[int] = None,
+        tags: Optional[List[str]] = None
+    ) -> PlaybookItem:
+        """
+        Add new insight with delta update (ACE principle).
+
+        Checks for semantic duplicates before adding.
+        If duplicate found, increments helpful_count instead.
+
+        Args:
+            category: Type of insight
+            content: The insight text
+            source_trial: Trial number that generated this insight
+            tags: Optional tags for filtering
+
+        Returns:
+            The created or updated PlaybookItem
+        """
+        content_hash = self._content_hash(content)
+
+        # Check for near-duplicates
+        for item in self.items.values():
+            existing_hash = self._content_hash(item.content)
+            if content_hash == existing_hash:
+                # Update existing instead of adding duplicate
+                item.helpful_count += 1
+                if source_trial and source_trial not in item.source_trials:
+                    item.source_trials.append(source_trial)
+                if tags:
+                    item.tags = list(set(item.tags + tags))
+                self.last_updated = datetime.now().isoformat()
+                return item
+
+        # Create new item
+        item_id = self._generate_id(category)
+        item = PlaybookItem(
+            id=item_id,
+            category=category,
+            content=content,
+            source_trials=[source_trial] if source_trial else [],
+            tags=tags or []
+        )
+        self.items[item_id] = item
+        self.last_updated = datetime.now().isoformat()
+        self.version += 1
+        return item
+
+    def record_outcome(self, item_id: str, helpful: bool) -> bool:
+        """
+        Record whether using this insight was helpful or harmful.
+
+        Args:
+            item_id: The playbook item ID
+            helpful: True if outcome was positive, False if negative
+
+        Returns:
+            True if item was found and updated, False otherwise
+        """
+        if item_id not in self.items:
+            return False
+
+        if helpful:
+            self.items[item_id].helpful_count += 1
+        else:
+            self.items[item_id].harmful_count += 1
+        self.items[item_id].last_used = datetime.now().isoformat()
+        self.last_updated = datetime.now().isoformat()
+        return True
+
+    def get_context_for_task(
+        self,
+        task_type: str,
+        max_items: int = 20,
+        min_confidence: float = 0.5,
+        tags: Optional[List[str]] = None
+    ) -> str:
+        """
+        Generate context string for LLM consumption.
+
+        Filters by relevance and confidence, sorted by net score.
+
+        Args:
+            task_type: Type of task (for filtering)
+            max_items: Maximum items to include
+            min_confidence: Minimum confidence threshold
+            tags: Optional tags to filter by
+
+        Returns:
+            Formatted context string for LLM
+        """
+        relevant_items = [
+            item for item in self.items.values()
+            if item.confidence >= min_confidence
+        ]
+
+        # Filter by tags if provided
+        if tags:
+            relevant_items = [
+                item for item in relevant_items
+                if any(tag in item.tags for tag in tags)
+            ]
+
+        # Sort by net score (most helpful first)
+        relevant_items.sort(key=lambda x: x.net_score, reverse=True)
+
+        # Group by category
+        sections: Dict[str, List[str]] = {}
+        for item in relevant_items[:max_items]:
+            cat_name = item.category.name
+            if cat_name not in sections:
+                sections[cat_name] = []
+            sections[cat_name].append(item.to_context_string())
+
+        # Build context string
+        lines = ["## Atomizer Knowledge Playbook", ""]
+        for cat_name, items in sections.items():
+            lines.append(f"### {cat_name}")
+            lines.extend(items)
+            lines.append("")
+
+        return "\n".join(lines)
+
+    def search_by_content(
+        self,
+        query: str,
+        category: Optional[InsightCategory] = None,
+        limit: int = 5
+    ) -> List[PlaybookItem]:
+        """
+        Search playbook items by content similarity.
+
+        Simple keyword matching - could be enhanced with embeddings.
+
+        Args:
+            query: Search query
+            category: Optional category filter
+            limit: Maximum results
+
+        Returns:
+            List of matching items sorted by relevance
+        """
+        query_lower = query.lower()
+        query_words = set(query_lower.split())
+
+        scored_items = []
+        for item in self.items.values():
+            if category and item.category != category:
+                continue
+
+            content_lower = item.content.lower()
+            content_words = set(content_lower.split())
+
+            # Simple word overlap scoring
+            overlap = len(query_words & content_words)
+            if overlap > 0 or query_lower in content_lower:
+                score = overlap + (1 if query_lower in content_lower else 0)
+                scored_items.append((score, item))
+
+        scored_items.sort(key=lambda x: (-x[0], -x[1].net_score))
+        return [item for _, item in scored_items[:limit]]
+
+    def get_by_category(
+        self,
+        category: InsightCategory,
+        min_score: int = 0
+    ) -> List[PlaybookItem]:
+        """Get all items in a category with minimum net score."""
+        return [
+            item for item in self.items.values()
+            if item.category == category and item.net_score >= min_score
+        ]
+
+    def prune_harmful(self, threshold: int = -3) -> int:
+        """
+        Remove items that have proven consistently harmful.
+
+        Args:
+            threshold: Net score threshold (items at or below are removed)
+
+        Returns:
+            Number of items removed
+        """
+        to_remove = [
+            item_id for item_id, item in self.items.items()
+            if item.net_score <= threshold
+        ]
+        for item_id in to_remove:
+            del self.items[item_id]
+
+        if to_remove:
+            self.last_updated = datetime.now().isoformat()
+            self.version += 1
+
+        return len(to_remove)
+
+    def get_stats(self) -> Dict[str, Any]:
+        """Get playbook statistics."""
+        by_category = {}
+        for item in self.items.values():
+            cat = item.category.name
+            if cat not in by_category:
+                by_category[cat] = 0
+            by_category[cat] += 1
+
+        scores = [item.net_score for item in self.items.values()]
+
+        return {
+            "total_items": len(self.items),
+            "by_category": by_category,
+            "version": self.version,
+            "last_updated": self.last_updated,
+            "avg_score": sum(scores) / len(scores) if scores else 0,
+            "max_score": max(scores) if scores else 0,
+            "min_score": min(scores) if scores else 0
+        }
+
+    def save(self, path: Path) -> None:
+        """
+        Persist playbook to JSON.
+
+        Args:
+            path: File path to save to
+        """
+        data = {
+            "version": self.version,
+            "last_updated": self.last_updated,
+            "items": {k: v.to_dict() for k, v in self.items.items()}
+        }
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2)
+
+    @classmethod
+    def load(cls, path: Path) -> "AtomizerPlaybook":
+        """
+        Load playbook from JSON.
+
+        Args:
+            path: File path to load from
+
+        Returns:
+            Loaded playbook (or new empty playbook if file doesn't exist)
+        """
+        if not path.exists():
+            return cls()
+
+        with open(path, encoding='utf-8') as f:
+            data = json.load(f)
+
+        playbook = cls(
+            version=data.get("version", 1),
+            last_updated=data.get("last_updated", datetime.now().isoformat())
+        )
+
+        for item_data in data.get("items", {}).values():
+            item = PlaybookItem.from_dict(item_data)
+            playbook.items[item.id] = item
+
+        return playbook
+
+
+# Convenience function for global playbook access
+_global_playbook: Optional[AtomizerPlaybook] = None
+_global_playbook_path: Optional[Path] = None
+
+
+def get_playbook(path: Optional[Path] = None) -> AtomizerPlaybook:
+    """
+    Get the global playbook instance.
+
+    Args:
+        path: Optional path to load from (uses default if not provided)
+
+    Returns:
+        The global AtomizerPlaybook instance
+    """
+    global _global_playbook, _global_playbook_path
+
+    if path is None:
+        # Default path
+        path = Path(__file__).parents[2] / "knowledge_base" / "playbook.json"
+
+    if _global_playbook is None or _global_playbook_path != path:
+        _global_playbook = AtomizerPlaybook.load(path)
+        _global_playbook_path = path
+
+    return _global_playbook
+
+
+def save_playbook() -> None:
+    """Save the global playbook to its path."""
+    global _global_playbook, _global_playbook_path
+
+    if _global_playbook is not None and _global_playbook_path is not None:
+        _global_playbook.save(_global_playbook_path)