optimization_engine/context/cache_monitor.py

"""
Atomizer Cache Monitor - KV-Cache Optimization

Part of the ACE (Agentic Context Engineering) implementation for Atomizer.

Monitors and optimizes KV-cache hit rates for cost reduction.
Based on the principle that cached tokens cost ~10x less than uncached.

The cache monitor tracks:
- Stable prefix length (should stay constant for cache hits)
- Cache hit rate across requests
- Estimated cost savings

Structure for KV-cache optimization:
1. STABLE PREFIX - Never changes (identity, tools, routing)
2. SEMI-STABLE - Changes per session type (protocols, playbook)
3. DYNAMIC - Changes every turn (state, user message)
"""

from dataclasses import dataclass, field
from typing import Optional, List, Dict, Any
from datetime import datetime
import hashlib
import json
from pathlib import Path


@dataclass
class CacheStats:
    """Statistics for cache efficiency tracking."""
    total_requests: int = 0
    cache_hits: int = 0
    cache_misses: int = 0
    prefix_length_chars: int = 0
    prefix_length_tokens: int = 0  # Estimated

    @property
    def hit_rate(self) -> float:
        """Calculate cache hit rate (0.0-1.0)."""
        if self.total_requests == 0:
            return 0.0
        return self.cache_hits / self.total_requests

    @property
    def estimated_savings_percent(self) -> float:
        """
        Estimate cost savings from cache hits.

        Based on ~10x cost difference between cached/uncached tokens.
        """
        if self.total_requests == 0:
            return 0.0
        # Cached tokens cost ~10% of uncached
        # So savings = hit_rate * 90%
        return self.hit_rate * 90.0

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary."""
        return {
            "total_requests": self.total_requests,
            "cache_hits": self.cache_hits,
            "cache_misses": self.cache_misses,
            "hit_rate": self.hit_rate,
            "prefix_length_chars": self.prefix_length_chars,
            "prefix_length_tokens": self.prefix_length_tokens,
            "estimated_savings_percent": self.estimated_savings_percent
        }


@dataclass
class ContextSection:
    """A section of context with stability classification."""
    name: str
    content: str
    stability: str  # "stable", "semi_stable", "dynamic"
    last_hash: str = ""

    def compute_hash(self) -> str:
        """Compute content hash for change detection."""
        return hashlib.md5(self.content.encode()).hexdigest()

    def has_changed(self) -> bool:
        """Check if content has changed since last hash."""
        current_hash = self.compute_hash()
        changed = current_hash != self.last_hash
        self.last_hash = current_hash
        return changed


class ContextCacheOptimizer:
    """
    Tracks and optimizes context for cache efficiency.

    Implements the three-tier context structure:
    1. Stable prefix (cached across all requests)
    2. Semi-stable section (cached per session type)
    3. Dynamic section (changes every turn)

    Usage:
        optimizer = ContextCacheOptimizer()

        # Build context with cache optimization
        context = optimizer.prepare_context(
            stable_prefix=identity_and_tools,
            semi_stable=protocols_and_playbook,
            dynamic=state_and_message
        )

        # Check efficiency
        print(optimizer.get_report())
    """

    # Approximate tokens per character for estimation
    CHARS_PER_TOKEN = 4

    def __init__(self):
        self.stats = CacheStats()
        self._sections: Dict[str, ContextSection] = {}
        self._last_stable_hash: Optional[str] = None
        self._last_semi_stable_hash: Optional[str] = None
        self._request_history: List[Dict[str, Any]] = []

    def prepare_context(
        self,
        stable_prefix: str,
        semi_stable: str,
        dynamic: str
    ) -> str:
        """
        Assemble context optimized for caching.

        Tracks whether prefix changed (cache miss).

        Args:
            stable_prefix: Content that never changes (tools, identity)
            semi_stable: Content that changes per session type
            dynamic: Content that changes every turn

        Returns:
            Assembled context string with clear section boundaries
        """
        # Hash the stable prefix
        stable_hash = hashlib.md5(stable_prefix.encode()).hexdigest()

        self.stats.total_requests += 1

        # Check for cache hit (stable prefix unchanged)
        if stable_hash == self._last_stable_hash:
            self.stats.cache_hits += 1
        else:
            self.stats.cache_misses += 1

        self._last_stable_hash = stable_hash
        self.stats.prefix_length_chars = len(stable_prefix)
        self.stats.prefix_length_tokens = len(stable_prefix) // self.CHARS_PER_TOKEN

        # Record request for history
        self._request_history.append({
            "timestamp": datetime.now().isoformat(),
            "cache_hit": stable_hash == self._last_stable_hash,
            "stable_length": len(stable_prefix),
            "semi_stable_length": len(semi_stable),
            "dynamic_length": len(dynamic)
        })

        # Keep history bounded
        if len(self._request_history) > 100:
            self._request_history = self._request_history[-100:]

        # Assemble with clear boundaries
        # Using markdown horizontal rules as section separators
        return f"""{stable_prefix}

---

{semi_stable}

---

{dynamic}"""

    def register_section(
        self,
        name: str,
        content: str,
        stability: str = "dynamic"
    ) -> None:
        """
        Register a context section for change tracking.

        Args:
            name: Section identifier
            content: Section content
            stability: One of "stable", "semi_stable", "dynamic"
        """
        section = ContextSection(
            name=name,
            content=content,
            stability=stability
        )
        section.last_hash = section.compute_hash()
        self._sections[name] = section

    def check_section_changes(self) -> Dict[str, bool]:
        """
        Check which sections have changed.

        Returns:
            Dictionary mapping section names to change status
        """
        changes = {}
        for name, section in self._sections.items():
            changes[name] = section.has_changed()
        return changes

    def get_stable_sections(self) -> List[str]:
        """Get names of sections marked as stable."""
        return [
            name for name, section in self._sections.items()
            if section.stability == "stable"
        ]

    def get_report(self) -> str:
        """Generate human-readable cache efficiency report."""
        return f"""
Cache Efficiency Report
=======================
Requests: {self.stats.total_requests}
Cache Hits: {self.stats.cache_hits}
Cache Misses: {self.stats.cache_misses}
Hit Rate: {self.stats.hit_rate:.1%}

Stable Prefix:
- Characters: {self.stats.prefix_length_chars:,}
- Estimated Tokens: {self.stats.prefix_length_tokens:,}

Cost Impact:
- Estimated Savings: {self.stats.estimated_savings_percent:.0f}%
- (Based on 10x cost difference for cached tokens)

Recommendations:
{self._get_recommendations()}
"""

    def _get_recommendations(self) -> str:
        """Generate optimization recommendations."""
        recommendations = []

        if self.stats.hit_rate < 0.5 and self.stats.total_requests > 5:
            recommendations.append(
                "- Low cache hit rate: Check if stable prefix is actually stable"
            )

        if self.stats.prefix_length_tokens > 5000:
            recommendations.append(
                "- Large stable prefix: Consider moving less-stable content to semi-stable"
            )

        if self.stats.prefix_length_tokens < 1000:
            recommendations.append(
                "- Small stable prefix: Consider moving more content to stable section"
            )

        if not recommendations:
            recommendations.append("- Cache performance looks good!")

        return "\n".join(recommendations)

    def get_stats_dict(self) -> Dict[str, Any]:
        """Get statistics as dictionary."""
        return self.stats.to_dict()

    def reset_stats(self) -> None:
        """Reset all statistics."""
        self.stats = CacheStats()
        self._request_history = []

    def save_stats(self, path: Path) -> None:
        """Save statistics to JSON file."""
        data = {
            "stats": self.stats.to_dict(),
            "request_history": self._request_history[-50:],  # Last 50
            "sections": {
                name: {
                    "stability": s.stability,
                    "content_length": len(s.content)
                }
                for name, s in self._sections.items()
            }
        }
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2)

    @classmethod
    def load_stats(cls, path: Path) -> "ContextCacheOptimizer":
        """Load statistics from JSON file."""
        optimizer = cls()

        if not path.exists():
            return optimizer

        with open(path, encoding='utf-8') as f:
            data = json.load(f)

        stats = data.get("stats", {})
        optimizer.stats.total_requests = stats.get("total_requests", 0)
        optimizer.stats.cache_hits = stats.get("cache_hits", 0)
        optimizer.stats.cache_misses = stats.get("cache_misses", 0)
        optimizer.stats.prefix_length_chars = stats.get("prefix_length_chars", 0)
        optimizer.stats.prefix_length_tokens = stats.get("prefix_length_tokens", 0)

        optimizer._request_history = data.get("request_history", [])

        return optimizer


class StablePrefixBuilder:
    """
    Helper for building stable prefix content.

    Ensures consistent ordering and formatting of stable content
    to maximize cache hits.
    """

    def __init__(self):
        self._sections: List[tuple] = []  # (order, name, content)

    def add_section(self, name: str, content: str, order: int = 50) -> "StablePrefixBuilder":
        """
        Add a section to the stable prefix.

        Args:
            name: Section name (for documentation)
            content: Section content
            order: Sort order (lower = earlier)

        Returns:
            Self for chaining
        """
        self._sections.append((order, name, content))
        return self

    def add_identity(self, identity: str) -> "StablePrefixBuilder":
        """Add identity section (order 10)."""
        return self.add_section("identity", identity, order=10)

    def add_capabilities(self, capabilities: str) -> "StablePrefixBuilder":
        """Add capabilities section (order 20)."""
        return self.add_section("capabilities", capabilities, order=20)

    def add_tools(self, tools: str) -> "StablePrefixBuilder":
        """Add tools section (order 30)."""
        return self.add_section("tools", tools, order=30)

    def add_routing(self, routing: str) -> "StablePrefixBuilder":
        """Add routing section (order 40)."""
        return self.add_section("routing", routing, order=40)

    def build(self) -> str:
        """
        Build the stable prefix string.

        Sections are sorted by order to ensure consistency.

        Returns:
            Assembled stable prefix
        """
        # Sort by order
        sorted_sections = sorted(self._sections, key=lambda x: x[0])

        lines = []
        for _, name, content in sorted_sections:
            lines.append(f"<!-- {name} -->")
            lines.append(content.strip())
            lines.append("")

        return "\n".join(lines)


# Global cache optimizer instance
_global_optimizer: Optional[ContextCacheOptimizer] = None


def get_cache_optimizer() -> ContextCacheOptimizer:
    """Get the global cache optimizer instance."""
    global _global_optimizer
    if _global_optimizer is None:
        _global_optimizer = ContextCacheOptimizer()
    return _global_optimizer
feat: Implement ACE Context Engineering framework (SYS_17) Complete implementation of Agentic Context Engineering (ACE) framework: Core modules (optimization_engine/context/): - playbook.py: AtomizerPlaybook with helpful/harmful scoring - reflector.py: AtomizerReflector for insight extraction - session_state.py: Context isolation (exposed/isolated state) - feedback_loop.py: Automated learning from trial results - compaction.py: Long-session context management - cache_monitor.py: KV-cache optimization tracking - runner_integration.py: OptimizationRunner integration Dashboard integration: - context.py: 12 REST API endpoints for playbook management Tests: - test_context_engineering.py: 44 unit tests - test_context_integration.py: 16 integration tests Documentation: - CONTEXT_ENGINEERING_REPORT.md: Comprehensive implementation report - CONTEXT_ENGINEERING_API.md: Complete API reference - SYS_17_CONTEXT_ENGINEERING.md: System protocol - Updated cheatsheet with SYS_17 quick reference - Enhanced bootstrap (00_BOOTSTRAP_V2.md) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2025-12-29 20:21:20 -05:00			`"""`
			`Atomizer Cache Monitor - KV-Cache Optimization`

			`Part of the ACE (Agentic Context Engineering) implementation for Atomizer.`

			`Monitors and optimizes KV-cache hit rates for cost reduction.`
			`Based on the principle that cached tokens cost ~10x less than uncached.`

			`The cache monitor tracks:`
			`- Stable prefix length (should stay constant for cache hits)`
			`- Cache hit rate across requests`
			`- Estimated cost savings`

			`Structure for KV-cache optimization:`
			`1. STABLE PREFIX - Never changes (identity, tools, routing)`
			`2. SEMI-STABLE - Changes per session type (protocols, playbook)`
			`3. DYNAMIC - Changes every turn (state, user message)`
			`"""`

			`from dataclasses import dataclass, field`
			`from typing import Optional, List, Dict, Any`
			`from datetime import datetime`
			`import hashlib`
			`import json`
			`from pathlib import Path`


			`@dataclass`
			`class CacheStats:`
			`"""Statistics for cache efficiency tracking."""`
			`total_requests: int = 0`
			`cache_hits: int = 0`
			`cache_misses: int = 0`
			`prefix_length_chars: int = 0`
			`prefix_length_tokens: int = 0 # Estimated`

			`@property`
			`def hit_rate(self) -> float:`
			`"""Calculate cache hit rate (0.0-1.0)."""`
			`if self.total_requests == 0:`
			`return 0.0`
			`return self.cache_hits / self.total_requests`

			`@property`
			`def estimated_savings_percent(self) -> float:`
			`"""`
			`Estimate cost savings from cache hits.`

			`Based on ~10x cost difference between cached/uncached tokens.`
			`"""`
			`if self.total_requests == 0:`
			`return 0.0`
			`# Cached tokens cost ~10% of uncached`
			`# So savings = hit_rate * 90%`
			`return self.hit_rate * 90.0`

			`def to_dict(self) -> Dict[str, Any]:`
			`"""Convert to dictionary."""`
			`return {`
			`"total_requests": self.total_requests,`
			`"cache_hits": self.cache_hits,`
			`"cache_misses": self.cache_misses,`
			`"hit_rate": self.hit_rate,`
			`"prefix_length_chars": self.prefix_length_chars,`
			`"prefix_length_tokens": self.prefix_length_tokens,`
			`"estimated_savings_percent": self.estimated_savings_percent`
			`}`


			`@dataclass`
			`class ContextSection:`
			`"""A section of context with stability classification."""`
			`name: str`
			`content: str`
			`stability: str # "stable", "semi_stable", "dynamic"`
			`last_hash: str = ""`

			`def compute_hash(self) -> str:`
			`"""Compute content hash for change detection."""`
			`return hashlib.md5(self.content.encode()).hexdigest()`

			`def has_changed(self) -> bool:`
			`"""Check if content has changed since last hash."""`
			`current_hash = self.compute_hash()`
			`changed = current_hash != self.last_hash`
			`self.last_hash = current_hash`
			`return changed`


			`class ContextCacheOptimizer:`
			`"""`
			`Tracks and optimizes context for cache efficiency.`

			`Implements the three-tier context structure:`
			`1. Stable prefix (cached across all requests)`
			`2. Semi-stable section (cached per session type)`
			`3. Dynamic section (changes every turn)`

			`Usage:`
			`optimizer = ContextCacheOptimizer()`

			`# Build context with cache optimization`
			`context = optimizer.prepare_context(`
			`stable_prefix=identity_and_tools,`
			`semi_stable=protocols_and_playbook,`
			`dynamic=state_and_message`
			`)`

			`# Check efficiency`
			`print(optimizer.get_report())`
			`"""`

			`# Approximate tokens per character for estimation`
			`CHARS_PER_TOKEN = 4`

			`def __init__(self):`
			`self.stats = CacheStats()`
			`self._sections: Dict[str, ContextSection] = {}`
			`self._last_stable_hash: Optional[str] = None`
			`self._last_semi_stable_hash: Optional[str] = None`
			`self._request_history: List[Dict[str, Any]] = []`

			`def prepare_context(`
			`self,`
			`stable_prefix: str,`
			`semi_stable: str,`
			`dynamic: str`
			`) -> str:`
			`"""`
			`Assemble context optimized for caching.`

			`Tracks whether prefix changed (cache miss).`

			`Args:`
			`stable_prefix: Content that never changes (tools, identity)`
			`semi_stable: Content that changes per session type`
			`dynamic: Content that changes every turn`

			`Returns:`
			`Assembled context string with clear section boundaries`
			`"""`
			`# Hash the stable prefix`
			`stable_hash = hashlib.md5(stable_prefix.encode()).hexdigest()`

			`self.stats.total_requests += 1`

			`# Check for cache hit (stable prefix unchanged)`
			`if stable_hash == self._last_stable_hash:`
			`self.stats.cache_hits += 1`
			`else:`
			`self.stats.cache_misses += 1`

			`self._last_stable_hash = stable_hash`
			`self.stats.prefix_length_chars = len(stable_prefix)`
			`self.stats.prefix_length_tokens = len(stable_prefix) // self.CHARS_PER_TOKEN`

			`# Record request for history`
			`self._request_history.append({`
			`"timestamp": datetime.now().isoformat(),`
			`"cache_hit": stable_hash == self._last_stable_hash,`
			`"stable_length": len(stable_prefix),`
			`"semi_stable_length": len(semi_stable),`
			`"dynamic_length": len(dynamic)`
			`})`

			`# Keep history bounded`
			`if len(self._request_history) > 100:`
			`self._request_history = self._request_history[-100:]`

			`# Assemble with clear boundaries`
			`# Using markdown horizontal rules as section separators`
			`return f"""{stable_prefix}`

			`---`

			`{semi_stable}`

			`---`

			`{dynamic}"""`

			`def register_section(`
			`self,`
			`name: str,`
			`content: str,`
			`stability: str = "dynamic"`
			`) -> None:`
			`"""`
			`Register a context section for change tracking.`

			`Args:`
			`name: Section identifier`
			`content: Section content`
			`stability: One of "stable", "semi_stable", "dynamic"`
			`"""`
			`section = ContextSection(`
			`name=name,`
			`content=content,`
			`stability=stability`
			`)`
			`section.last_hash = section.compute_hash()`
			`self._sections[name] = section`

			`def check_section_changes(self) -> Dict[str, bool]:`
			`"""`
			`Check which sections have changed.`

			`Returns:`
			`Dictionary mapping section names to change status`
			`"""`
			`changes = {}`
			`for name, section in self._sections.items():`
			`changes[name] = section.has_changed()`
			`return changes`

			`def get_stable_sections(self) -> List[str]:`
			`"""Get names of sections marked as stable."""`
			`return [`
			`name for name, section in self._sections.items()`
			`if section.stability == "stable"`
			`]`

			`def get_report(self) -> str:`
			`"""Generate human-readable cache efficiency report."""`
			`return f"""`
			`Cache Efficiency Report`
			`=======================`
			`Requests: {self.stats.total_requests}`
			`Cache Hits: {self.stats.cache_hits}`
			`Cache Misses: {self.stats.cache_misses}`
			`Hit Rate: {self.stats.hit_rate:.1%}`

			`Stable Prefix:`
			`- Characters: {self.stats.prefix_length_chars:,}`
			`- Estimated Tokens: {self.stats.prefix_length_tokens:,}`

			`Cost Impact:`
			`- Estimated Savings: {self.stats.estimated_savings_percent:.0f}%`
			`- (Based on 10x cost difference for cached tokens)`

			`Recommendations:`
			`{self._get_recommendations()}`
			`"""`

			`def _get_recommendations(self) -> str:`
			`"""Generate optimization recommendations."""`
			`recommendations = []`

			`if self.stats.hit_rate < 0.5 and self.stats.total_requests > 5:`
			`recommendations.append(`
			`"- Low cache hit rate: Check if stable prefix is actually stable"`
			`)`

			`if self.stats.prefix_length_tokens > 5000:`
			`recommendations.append(`
			`"- Large stable prefix: Consider moving less-stable content to semi-stable"`
			`)`

			`if self.stats.prefix_length_tokens < 1000:`
			`recommendations.append(`
			`"- Small stable prefix: Consider moving more content to stable section"`
			`)`

			`if not recommendations:`
			`recommendations.append("- Cache performance looks good!")`

			`return "\n".join(recommendations)`

			`def get_stats_dict(self) -> Dict[str, Any]:`
			`"""Get statistics as dictionary."""`
			`return self.stats.to_dict()`

			`def reset_stats(self) -> None:`
			`"""Reset all statistics."""`
			`self.stats = CacheStats()`
			`self._request_history = []`

			`def save_stats(self, path: Path) -> None:`
			`"""Save statistics to JSON file."""`
			`data = {`
			`"stats": self.stats.to_dict(),`
			`"request_history": self._request_history[-50:], # Last 50`
			`"sections": {`
			`name: {`
			`"stability": s.stability,`
			`"content_length": len(s.content)`
			`}`
			`for name, s in self._sections.items()`
			`}`
			`}`
			`path.parent.mkdir(parents=True, exist_ok=True)`
			`with open(path, 'w', encoding='utf-8') as f:`
			`json.dump(data, f, indent=2)`

			`@classmethod`
			`def load_stats(cls, path: Path) -> "ContextCacheOptimizer":`
			`"""Load statistics from JSON file."""`
			`optimizer = cls()`

			`if not path.exists():`
			`return optimizer`

			`with open(path, encoding='utf-8') as f:`
			`data = json.load(f)`

			`stats = data.get("stats", {})`
			`optimizer.stats.total_requests = stats.get("total_requests", 0)`
			`optimizer.stats.cache_hits = stats.get("cache_hits", 0)`
			`optimizer.stats.cache_misses = stats.get("cache_misses", 0)`
			`optimizer.stats.prefix_length_chars = stats.get("prefix_length_chars", 0)`
			`optimizer.stats.prefix_length_tokens = stats.get("prefix_length_tokens", 0)`

			`optimizer._request_history = data.get("request_history", [])`

			`return optimizer`


			`class StablePrefixBuilder:`
			`"""`
			`Helper for building stable prefix content.`

			`Ensures consistent ordering and formatting of stable content`
			`to maximize cache hits.`
			`"""`

			`def __init__(self):`
			`self._sections: List[tuple] = [] # (order, name, content)`

			`def add_section(self, name: str, content: str, order: int = 50) -> "StablePrefixBuilder":`
			`"""`
			`Add a section to the stable prefix.`

			`Args:`
			`name: Section name (for documentation)`
			`content: Section content`
			`order: Sort order (lower = earlier)`

			`Returns:`
			`Self for chaining`
			`"""`
			`self._sections.append((order, name, content))`
			`return self`

			`def add_identity(self, identity: str) -> "StablePrefixBuilder":`
			`"""Add identity section (order 10)."""`
			`return self.add_section("identity", identity, order=10)`

			`def add_capabilities(self, capabilities: str) -> "StablePrefixBuilder":`
			`"""Add capabilities section (order 20)."""`
			`return self.add_section("capabilities", capabilities, order=20)`

			`def add_tools(self, tools: str) -> "StablePrefixBuilder":`
			`"""Add tools section (order 30)."""`
			`return self.add_section("tools", tools, order=30)`

			`def add_routing(self, routing: str) -> "StablePrefixBuilder":`
			`"""Add routing section (order 40)."""`
			`return self.add_section("routing", routing, order=40)`

			`def build(self) -> str:`
			`"""`
			`Build the stable prefix string.`

			`Sections are sorted by order to ensure consistency.`

			`Returns:`
			`Assembled stable prefix`
			`"""`
			`# Sort by order`
			`sorted_sections = sorted(self._sections, key=lambda x: x[0])`

			`lines = []`
			`for _, name, content in sorted_sections:`
			`lines.append(f"<!-- {name} -->")`
			`lines.append(content.strip())`
			`lines.append("")`

			`return "\n".join(lines)`


			`# Global cache optimizer instance`
			`_global_optimizer: Optional[ContextCacheOptimizer] = None`


			`def get_cache_optimizer() -> ContextCacheOptimizer:`
			`"""Get the global cache optimizer instance."""`
			`global _global_optimizer`
			`if _global_optimizer is None:`
			`_global_optimizer = ContextCacheOptimizer()`
			`return _global_optimizer`