feat: Implement ACE Context Engineering framework (SYS_17)

Complete implementation of Agentic Context Engineering (ACE) framework: Core modules (optimization_engine/context/): - playbook.py: AtomizerPlaybook with helpful/harmful scoring - reflector.py: AtomizerReflector for insight extraction - session_state.py: Context isolation (exposed/isolated state) - feedback_loop.py: Automated learning from trial results - compaction.py: Long-session context management - cache_monitor.py: KV-cache optimization tracking - runner_integration.py: OptimizationRunner integration Dashboard integration: - context.py: 12 REST API endpoints for playbook management Tests: - test_context_engineering.py: 44 unit tests - test_context_integration.py: 16 integration tests Documentation: - CONTEXT_ENGINEERING_REPORT.md: Comprehensive implementation report - CONTEXT_ENGINEERING_API.md: Complete API reference - SYS_17_CONTEXT_ENGINEERING.md: System protocol - Updated cheatsheet with SYS_17 quick reference - Enhanced bootstrap (00_BOOTSTRAP_V2.md) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 20:21:20 -05:00
parent 0110d80401
commit 773f8ff8af
19 changed files with 8184 additions and 2 deletions
--- a/optimization_engine/context/cache_monitor.py
+++ b/optimization_engine/context/cache_monitor.py
@@ -0,0 +1,390 @@
+"""
+Atomizer Cache Monitor - KV-Cache Optimization
+
+Part of the ACE (Agentic Context Engineering) implementation for Atomizer.
+
+Monitors and optimizes KV-cache hit rates for cost reduction.
+Based on the principle that cached tokens cost ~10x less than uncached.
+
+The cache monitor tracks:
+- Stable prefix length (should stay constant for cache hits)
+- Cache hit rate across requests
+- Estimated cost savings
+
+Structure for KV-cache optimization:
+1. STABLE PREFIX - Never changes (identity, tools, routing)
+2. SEMI-STABLE - Changes per session type (protocols, playbook)
+3. DYNAMIC - Changes every turn (state, user message)
+"""
+
+from dataclasses import dataclass, field
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+import hashlib
+import json
+from pathlib import Path
+
+
+@dataclass
+class CacheStats:
+    """Statistics for cache efficiency tracking."""
+    total_requests: int = 0
+    cache_hits: int = 0
+    cache_misses: int = 0
+    prefix_length_chars: int = 0
+    prefix_length_tokens: int = 0  # Estimated
+
+    @property
+    def hit_rate(self) -> float:
+        """Calculate cache hit rate (0.0-1.0)."""
+        if self.total_requests == 0:
+            return 0.0
+        return self.cache_hits / self.total_requests
+
+    @property
+    def estimated_savings_percent(self) -> float:
+        """
+        Estimate cost savings from cache hits.
+
+        Based on ~10x cost difference between cached/uncached tokens.
+        """
+        if self.total_requests == 0:
+            return 0.0
+        # Cached tokens cost ~10% of uncached
+        # So savings = hit_rate * 90%
+        return self.hit_rate * 90.0
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary."""
+        return {
+            "total_requests": self.total_requests,
+            "cache_hits": self.cache_hits,
+            "cache_misses": self.cache_misses,
+            "hit_rate": self.hit_rate,
+            "prefix_length_chars": self.prefix_length_chars,
+            "prefix_length_tokens": self.prefix_length_tokens,
+            "estimated_savings_percent": self.estimated_savings_percent
+        }
+
+
+@dataclass
+class ContextSection:
+    """A section of context with stability classification."""
+    name: str
+    content: str
+    stability: str  # "stable", "semi_stable", "dynamic"
+    last_hash: str = ""
+
+    def compute_hash(self) -> str:
+        """Compute content hash for change detection."""
+        return hashlib.md5(self.content.encode()).hexdigest()
+
+    def has_changed(self) -> bool:
+        """Check if content has changed since last hash."""
+        current_hash = self.compute_hash()
+        changed = current_hash != self.last_hash
+        self.last_hash = current_hash
+        return changed
+
+
+class ContextCacheOptimizer:
+    """
+    Tracks and optimizes context for cache efficiency.
+
+    Implements the three-tier context structure:
+    1. Stable prefix (cached across all requests)
+    2. Semi-stable section (cached per session type)
+    3. Dynamic section (changes every turn)
+
+    Usage:
+        optimizer = ContextCacheOptimizer()
+
+        # Build context with cache optimization
+        context = optimizer.prepare_context(
+            stable_prefix=identity_and_tools,
+            semi_stable=protocols_and_playbook,
+            dynamic=state_and_message
+        )
+
+        # Check efficiency
+        print(optimizer.get_report())
+    """
+
+    # Approximate tokens per character for estimation
+    CHARS_PER_TOKEN = 4
+
+    def __init__(self):
+        self.stats = CacheStats()
+        self._sections: Dict[str, ContextSection] = {}
+        self._last_stable_hash: Optional[str] = None
+        self._last_semi_stable_hash: Optional[str] = None
+        self._request_history: List[Dict[str, Any]] = []
+
+    def prepare_context(
+        self,
+        stable_prefix: str,
+        semi_stable: str,
+        dynamic: str
+    ) -> str:
+        """
+        Assemble context optimized for caching.
+
+        Tracks whether prefix changed (cache miss).
+
+        Args:
+            stable_prefix: Content that never changes (tools, identity)
+            semi_stable: Content that changes per session type
+            dynamic: Content that changes every turn
+
+        Returns:
+            Assembled context string with clear section boundaries
+        """
+        # Hash the stable prefix
+        stable_hash = hashlib.md5(stable_prefix.encode()).hexdigest()
+
+        self.stats.total_requests += 1
+
+        # Check for cache hit (stable prefix unchanged)
+        if stable_hash == self._last_stable_hash:
+            self.stats.cache_hits += 1
+        else:
+            self.stats.cache_misses += 1
+
+        self._last_stable_hash = stable_hash
+        self.stats.prefix_length_chars = len(stable_prefix)
+        self.stats.prefix_length_tokens = len(stable_prefix) // self.CHARS_PER_TOKEN
+
+        # Record request for history
+        self._request_history.append({
+            "timestamp": datetime.now().isoformat(),
+            "cache_hit": stable_hash == self._last_stable_hash,
+            "stable_length": len(stable_prefix),
+            "semi_stable_length": len(semi_stable),
+            "dynamic_length": len(dynamic)
+        })
+
+        # Keep history bounded
+        if len(self._request_history) > 100:
+            self._request_history = self._request_history[-100:]
+
+        # Assemble with clear boundaries
+        # Using markdown horizontal rules as section separators
+        return f"""{stable_prefix}
+
+---
+
+{semi_stable}
+
+---
+
+{dynamic}"""
+
+    def register_section(
+        self,
+        name: str,
+        content: str,
+        stability: str = "dynamic"
+    ) -> None:
+        """
+        Register a context section for change tracking.
+
+        Args:
+            name: Section identifier
+            content: Section content
+            stability: One of "stable", "semi_stable", "dynamic"
+        """
+        section = ContextSection(
+            name=name,
+            content=content,
+            stability=stability
+        )
+        section.last_hash = section.compute_hash()
+        self._sections[name] = section
+
+    def check_section_changes(self) -> Dict[str, bool]:
+        """
+        Check which sections have changed.
+
+        Returns:
+            Dictionary mapping section names to change status
+        """
+        changes = {}
+        for name, section in self._sections.items():
+            changes[name] = section.has_changed()
+        return changes
+
+    def get_stable_sections(self) -> List[str]:
+        """Get names of sections marked as stable."""
+        return [
+            name for name, section in self._sections.items()
+            if section.stability == "stable"
+        ]
+
+    def get_report(self) -> str:
+        """Generate human-readable cache efficiency report."""
+        return f"""
+Cache Efficiency Report
+=======================
+Requests: {self.stats.total_requests}
+Cache Hits: {self.stats.cache_hits}
+Cache Misses: {self.stats.cache_misses}
+Hit Rate: {self.stats.hit_rate:.1%}
+
+Stable Prefix:
+- Characters: {self.stats.prefix_length_chars:,}
+- Estimated Tokens: {self.stats.prefix_length_tokens:,}
+
+Cost Impact:
+- Estimated Savings: {self.stats.estimated_savings_percent:.0f}%
+- (Based on 10x cost difference for cached tokens)
+
+Recommendations:
+{self._get_recommendations()}
+"""
+
+    def _get_recommendations(self) -> str:
+        """Generate optimization recommendations."""
+        recommendations = []
+
+        if self.stats.hit_rate < 0.5 and self.stats.total_requests > 5:
+            recommendations.append(
+                "- Low cache hit rate: Check if stable prefix is actually stable"
+            )
+
+        if self.stats.prefix_length_tokens > 5000:
+            recommendations.append(
+                "- Large stable prefix: Consider moving less-stable content to semi-stable"
+            )
+
+        if self.stats.prefix_length_tokens < 1000:
+            recommendations.append(
+                "- Small stable prefix: Consider moving more content to stable section"
+            )
+
+        if not recommendations:
+            recommendations.append("- Cache performance looks good!")
+
+        return "\n".join(recommendations)
+
+    def get_stats_dict(self) -> Dict[str, Any]:
+        """Get statistics as dictionary."""
+        return self.stats.to_dict()
+
+    def reset_stats(self) -> None:
+        """Reset all statistics."""
+        self.stats = CacheStats()
+        self._request_history = []
+
+    def save_stats(self, path: Path) -> None:
+        """Save statistics to JSON file."""
+        data = {
+            "stats": self.stats.to_dict(),
+            "request_history": self._request_history[-50:],  # Last 50
+            "sections": {
+                name: {
+                    "stability": s.stability,
+                    "content_length": len(s.content)
+                }
+                for name, s in self._sections.items()
+            }
+        }
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2)
+
+    @classmethod
+    def load_stats(cls, path: Path) -> "ContextCacheOptimizer":
+        """Load statistics from JSON file."""
+        optimizer = cls()
+
+        if not path.exists():
+            return optimizer
+
+        with open(path, encoding='utf-8') as f:
+            data = json.load(f)
+
+        stats = data.get("stats", {})
+        optimizer.stats.total_requests = stats.get("total_requests", 0)
+        optimizer.stats.cache_hits = stats.get("cache_hits", 0)
+        optimizer.stats.cache_misses = stats.get("cache_misses", 0)
+        optimizer.stats.prefix_length_chars = stats.get("prefix_length_chars", 0)
+        optimizer.stats.prefix_length_tokens = stats.get("prefix_length_tokens", 0)
+
+        optimizer._request_history = data.get("request_history", [])
+
+        return optimizer
+
+
+class StablePrefixBuilder:
+    """
+    Helper for building stable prefix content.
+
+    Ensures consistent ordering and formatting of stable content
+    to maximize cache hits.
+    """
+
+    def __init__(self):
+        self._sections: List[tuple] = []  # (order, name, content)
+
+    def add_section(self, name: str, content: str, order: int = 50) -> "StablePrefixBuilder":
+        """
+        Add a section to the stable prefix.
+
+        Args:
+            name: Section name (for documentation)
+            content: Section content
+            order: Sort order (lower = earlier)
+
+        Returns:
+            Self for chaining
+        """
+        self._sections.append((order, name, content))
+        return self
+
+    def add_identity(self, identity: str) -> "StablePrefixBuilder":
+        """Add identity section (order 10)."""
+        return self.add_section("identity", identity, order=10)
+
+    def add_capabilities(self, capabilities: str) -> "StablePrefixBuilder":
+        """Add capabilities section (order 20)."""
+        return self.add_section("capabilities", capabilities, order=20)
+
+    def add_tools(self, tools: str) -> "StablePrefixBuilder":
+        """Add tools section (order 30)."""
+        return self.add_section("tools", tools, order=30)
+
+    def add_routing(self, routing: str) -> "StablePrefixBuilder":
+        """Add routing section (order 40)."""
+        return self.add_section("routing", routing, order=40)
+
+    def build(self) -> str:
+        """
+        Build the stable prefix string.
+
+        Sections are sorted by order to ensure consistency.
+
+        Returns:
+            Assembled stable prefix
+        """
+        # Sort by order
+        sorted_sections = sorted(self._sections, key=lambda x: x[0])
+
+        lines = []
+        for _, name, content in sorted_sections:
+            lines.append(f"<!-- {name} -->")
+            lines.append(content.strip())
+            lines.append("")
+
+        return "\n".join(lines)
+
+
+# Global cache optimizer instance
+_global_optimizer: Optional[ContextCacheOptimizer] = None
+
+
+def get_cache_optimizer() -> ContextCacheOptimizer:
+    """Get the global cache optimizer instance."""
+    global _global_optimizer
+    if _global_optimizer is None:
+        _global_optimizer = ContextCacheOptimizer()
+    return _global_optimizer