feat: Implement ACE Context Engineering framework (SYS_17)
Complete implementation of Agentic Context Engineering (ACE) framework: Core modules (optimization_engine/context/): - playbook.py: AtomizerPlaybook with helpful/harmful scoring - reflector.py: AtomizerReflector for insight extraction - session_state.py: Context isolation (exposed/isolated state) - feedback_loop.py: Automated learning from trial results - compaction.py: Long-session context management - cache_monitor.py: KV-cache optimization tracking - runner_integration.py: OptimizationRunner integration Dashboard integration: - context.py: 12 REST API endpoints for playbook management Tests: - test_context_engineering.py: 44 unit tests - test_context_integration.py: 16 integration tests Documentation: - CONTEXT_ENGINEERING_REPORT.md: Comprehensive implementation report - CONTEXT_ENGINEERING_API.md: Complete API reference - SYS_17_CONTEXT_ENGINEERING.md: System protocol - Updated cheatsheet with SYS_17 quick reference - Enhanced bootstrap (00_BOOTSTRAP_V2.md) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -12,7 +12,7 @@ import sys
|
||||
# Add parent directory to path to import optimization_engine
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent.parent))
|
||||
|
||||
from api.routes import optimization, claude, terminal, insights
|
||||
from api.routes import optimization, claude, terminal, insights, context
|
||||
from api.websocket import optimization_stream
|
||||
|
||||
# Create FastAPI app
|
||||
@@ -37,6 +37,7 @@ app.include_router(optimization_stream.router, prefix="/api/ws", tags=["websocke
|
||||
app.include_router(claude.router, prefix="/api/claude", tags=["claude"])
|
||||
app.include_router(terminal.router, prefix="/api/terminal", tags=["terminal"])
|
||||
app.include_router(insights.router, prefix="/api/insights", tags=["insights"])
|
||||
app.include_router(context.router, prefix="/api/context", tags=["context"])
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
|
||||
450
atomizer-dashboard/backend/api/routes/context.py
Normal file
450
atomizer-dashboard/backend/api/routes/context.py
Normal file
@@ -0,0 +1,450 @@
|
||||
"""
|
||||
Context Engineering API Routes
|
||||
|
||||
Provides endpoints for:
|
||||
- Viewing playbook contents
|
||||
- Managing session state
|
||||
- Recording feedback on playbook items
|
||||
- Triggering compaction
|
||||
- Monitoring cache efficiency
|
||||
- Exporting learning reports
|
||||
|
||||
Part of the ACE (Agentic Context Engineering) implementation for Atomizer.
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
import sys
|
||||
|
||||
# Add parent paths for imports
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent.parent.parent))
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Paths
|
||||
ATOMIZER_ROOT = Path(__file__).parents[4]
|
||||
PLAYBOOK_PATH = ATOMIZER_ROOT / "knowledge_base" / "playbook.json"
|
||||
|
||||
|
||||
# Pydantic models for request/response
|
||||
class PlaybookItemResponse(BaseModel):
|
||||
id: str
|
||||
category: str
|
||||
content: str
|
||||
helpful_count: int
|
||||
harmful_count: int
|
||||
net_score: int
|
||||
confidence: float
|
||||
tags: List[str]
|
||||
created_at: str
|
||||
last_used: Optional[str]
|
||||
|
||||
|
||||
class PlaybookSummary(BaseModel):
|
||||
total_items: int
|
||||
by_category: dict
|
||||
version: int
|
||||
last_updated: str
|
||||
avg_score: float
|
||||
top_score: int
|
||||
lowest_score: int
|
||||
|
||||
|
||||
class FeedbackRequest(BaseModel):
|
||||
item_id: str
|
||||
helpful: bool
|
||||
|
||||
|
||||
class InsightRequest(BaseModel):
|
||||
category: str
|
||||
content: str
|
||||
tags: Optional[List[str]] = None
|
||||
source_trial: Optional[int] = None
|
||||
|
||||
|
||||
class SessionStateResponse(BaseModel):
|
||||
session_id: str
|
||||
task_type: Optional[str]
|
||||
study_name: Optional[str]
|
||||
study_status: str
|
||||
trials_completed: int
|
||||
trials_total: int
|
||||
best_value: Optional[float]
|
||||
recent_actions: List[str]
|
||||
recent_errors: List[str]
|
||||
|
||||
|
||||
# Helper function to get playbook
|
||||
def get_playbook():
|
||||
"""Load playbook, handling import errors gracefully."""
|
||||
try:
|
||||
from optimization_engine.context.playbook import AtomizerPlaybook
|
||||
return AtomizerPlaybook.load(PLAYBOOK_PATH)
|
||||
except ImportError as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Context engineering module not available: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# Playbook endpoints
|
||||
@router.get("/playbook", response_model=PlaybookSummary)
|
||||
async def get_playbook_summary():
|
||||
"""Get playbook summary statistics."""
|
||||
playbook = get_playbook()
|
||||
stats = playbook.get_stats()
|
||||
|
||||
return PlaybookSummary(
|
||||
total_items=stats["total_items"],
|
||||
by_category=stats["by_category"],
|
||||
version=stats["version"],
|
||||
last_updated=stats["last_updated"],
|
||||
avg_score=stats["avg_score"],
|
||||
top_score=stats["max_score"],
|
||||
lowest_score=stats["min_score"]
|
||||
)
|
||||
|
||||
|
||||
@router.get("/playbook/items", response_model=List[PlaybookItemResponse])
|
||||
async def get_playbook_items(
|
||||
category: Optional[str] = Query(None, description="Filter by category (str, mis, tool, etc.)"),
|
||||
min_score: int = Query(0, description="Minimum net score"),
|
||||
min_confidence: float = Query(0.0, description="Minimum confidence (0.0-1.0)"),
|
||||
limit: int = Query(50, description="Maximum items to return"),
|
||||
offset: int = Query(0, description="Pagination offset")
|
||||
):
|
||||
"""
|
||||
Get playbook items with optional filtering.
|
||||
|
||||
Categories:
|
||||
- str: Strategy
|
||||
- mis: Mistake
|
||||
- tool: Tool usage
|
||||
- cal: Calculation
|
||||
- dom: Domain knowledge
|
||||
- wf: Workflow
|
||||
"""
|
||||
playbook = get_playbook()
|
||||
|
||||
items = list(playbook.items.values())
|
||||
|
||||
# Filter by category
|
||||
if category:
|
||||
try:
|
||||
from optimization_engine.context.playbook import InsightCategory
|
||||
cat = InsightCategory(category)
|
||||
items = [i for i in items if i.category == cat]
|
||||
except ValueError:
|
||||
raise HTTPException(400, f"Invalid category: {category}. Valid: str, mis, tool, cal, dom, wf")
|
||||
|
||||
# Filter by score
|
||||
items = [i for i in items if i.net_score >= min_score]
|
||||
|
||||
# Filter by confidence
|
||||
items = [i for i in items if i.confidence >= min_confidence]
|
||||
|
||||
# Sort by score
|
||||
items.sort(key=lambda x: x.net_score, reverse=True)
|
||||
|
||||
# Paginate
|
||||
items = items[offset:offset + limit]
|
||||
|
||||
return [
|
||||
PlaybookItemResponse(
|
||||
id=item.id,
|
||||
category=item.category.value,
|
||||
content=item.content,
|
||||
helpful_count=item.helpful_count,
|
||||
harmful_count=item.harmful_count,
|
||||
net_score=item.net_score,
|
||||
confidence=item.confidence,
|
||||
tags=item.tags,
|
||||
created_at=item.created_at,
|
||||
last_used=item.last_used
|
||||
)
|
||||
for item in items
|
||||
]
|
||||
|
||||
|
||||
@router.get("/playbook/items/{item_id}", response_model=PlaybookItemResponse)
|
||||
async def get_playbook_item(item_id: str):
|
||||
"""Get a specific playbook item by ID."""
|
||||
playbook = get_playbook()
|
||||
|
||||
if item_id not in playbook.items:
|
||||
raise HTTPException(404, f"Item not found: {item_id}")
|
||||
|
||||
item = playbook.items[item_id]
|
||||
|
||||
return PlaybookItemResponse(
|
||||
id=item.id,
|
||||
category=item.category.value,
|
||||
content=item.content,
|
||||
helpful_count=item.helpful_count,
|
||||
harmful_count=item.harmful_count,
|
||||
net_score=item.net_score,
|
||||
confidence=item.confidence,
|
||||
tags=item.tags,
|
||||
created_at=item.created_at,
|
||||
last_used=item.last_used
|
||||
)
|
||||
|
||||
|
||||
@router.post("/playbook/feedback")
|
||||
async def record_feedback(request: FeedbackRequest):
|
||||
"""
|
||||
Record feedback on a playbook item.
|
||||
|
||||
This is how the system learns:
|
||||
- helpful=true increases the item's score
|
||||
- helpful=false decreases the item's score
|
||||
"""
|
||||
playbook = get_playbook()
|
||||
|
||||
if request.item_id not in playbook.items:
|
||||
raise HTTPException(404, f"Item not found: {request.item_id}")
|
||||
|
||||
playbook.record_outcome(request.item_id, helpful=request.helpful)
|
||||
playbook.save(PLAYBOOK_PATH)
|
||||
|
||||
item = playbook.items[request.item_id]
|
||||
|
||||
return {
|
||||
"item_id": request.item_id,
|
||||
"new_score": item.net_score,
|
||||
"new_confidence": item.confidence,
|
||||
"helpful_count": item.helpful_count,
|
||||
"harmful_count": item.harmful_count
|
||||
}
|
||||
|
||||
|
||||
@router.post("/playbook/insights")
|
||||
async def add_insight(request: InsightRequest):
|
||||
"""
|
||||
Add a new insight to the playbook.
|
||||
|
||||
Categories:
|
||||
- str: Strategy - Optimization strategies that work
|
||||
- mis: Mistake - Common mistakes to avoid
|
||||
- tool: Tool - Tool usage patterns
|
||||
- cal: Calculation - Formulas and calculations
|
||||
- dom: Domain - Domain-specific knowledge (FEA, NX)
|
||||
- wf: Workflow - Workflow patterns
|
||||
"""
|
||||
try:
|
||||
from optimization_engine.context.playbook import InsightCategory
|
||||
except ImportError as e:
|
||||
raise HTTPException(500, f"Context module not available: {e}")
|
||||
|
||||
# Validate category
|
||||
try:
|
||||
category = InsightCategory(request.category)
|
||||
except ValueError:
|
||||
raise HTTPException(400, f"Invalid category: {request.category}")
|
||||
|
||||
playbook = get_playbook()
|
||||
|
||||
item = playbook.add_insight(
|
||||
category=category,
|
||||
content=request.content,
|
||||
source_trial=request.source_trial,
|
||||
tags=request.tags
|
||||
)
|
||||
|
||||
playbook.save(PLAYBOOK_PATH)
|
||||
|
||||
return {
|
||||
"item_id": item.id,
|
||||
"category": item.category.value,
|
||||
"content": item.content,
|
||||
"message": "Insight added successfully"
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/playbook/items/{item_id}")
|
||||
async def delete_playbook_item(item_id: str):
|
||||
"""Delete a playbook item."""
|
||||
playbook = get_playbook()
|
||||
|
||||
if item_id not in playbook.items:
|
||||
raise HTTPException(404, f"Item not found: {item_id}")
|
||||
|
||||
content = playbook.items[item_id].content[:50]
|
||||
del playbook.items[item_id]
|
||||
playbook.save(PLAYBOOK_PATH)
|
||||
|
||||
return {
|
||||
"deleted": item_id,
|
||||
"content_preview": content
|
||||
}
|
||||
|
||||
|
||||
@router.post("/playbook/prune")
|
||||
async def prune_playbook(threshold: int = Query(-3, description="Net score threshold for pruning")):
|
||||
"""
|
||||
Prune harmful items from the playbook.
|
||||
|
||||
Items with net_score <= threshold will be removed.
|
||||
"""
|
||||
playbook = get_playbook()
|
||||
|
||||
removed_count = playbook.prune_harmful(threshold=threshold)
|
||||
playbook.save(PLAYBOOK_PATH)
|
||||
|
||||
return {
|
||||
"items_pruned": removed_count,
|
||||
"threshold_used": threshold,
|
||||
"remaining_items": len(playbook.items)
|
||||
}
|
||||
|
||||
|
||||
@router.get("/playbook/context")
|
||||
async def get_playbook_context(
|
||||
task_type: str = Query("optimization", description="Task type for context filtering"),
|
||||
max_items: int = Query(15, description="Maximum items to include"),
|
||||
min_confidence: float = Query(0.5, description="Minimum confidence threshold")
|
||||
):
|
||||
"""
|
||||
Get playbook context string formatted for LLM consumption.
|
||||
|
||||
This is what gets injected into the LLM context window.
|
||||
"""
|
||||
playbook = get_playbook()
|
||||
|
||||
context = playbook.get_context_for_task(
|
||||
task_type=task_type,
|
||||
max_items=max_items,
|
||||
min_confidence=min_confidence
|
||||
)
|
||||
|
||||
return {
|
||||
"context": context,
|
||||
"items_included": min(max_items, len(playbook.items)),
|
||||
"task_type": task_type
|
||||
}
|
||||
|
||||
|
||||
# Session state endpoints
|
||||
@router.get("/session", response_model=SessionStateResponse)
|
||||
async def get_session_state():
|
||||
"""Get current session state."""
|
||||
try:
|
||||
from optimization_engine.context.session_state import get_session
|
||||
session = get_session()
|
||||
|
||||
return SessionStateResponse(
|
||||
session_id=session.session_id,
|
||||
task_type=session.exposed.task_type.value if session.exposed.task_type else None,
|
||||
study_name=session.exposed.study_name,
|
||||
study_status=session.exposed.study_status,
|
||||
trials_completed=session.exposed.trials_completed,
|
||||
trials_total=session.exposed.trials_total,
|
||||
best_value=session.exposed.best_value,
|
||||
recent_actions=session.exposed.recent_actions[-10:],
|
||||
recent_errors=session.exposed.recent_errors[-5:]
|
||||
)
|
||||
except ImportError:
|
||||
raise HTTPException(500, "Session state module not available")
|
||||
|
||||
|
||||
@router.get("/session/context")
|
||||
async def get_session_context():
|
||||
"""Get session context string for LLM consumption."""
|
||||
try:
|
||||
from optimization_engine.context.session_state import get_session
|
||||
session = get_session()
|
||||
|
||||
return {
|
||||
"context": session.get_llm_context(),
|
||||
"session_id": session.session_id,
|
||||
"last_updated": session.last_updated
|
||||
}
|
||||
except ImportError:
|
||||
raise HTTPException(500, "Session state module not available")
|
||||
|
||||
|
||||
# Cache monitoring endpoints
|
||||
@router.get("/cache/stats")
|
||||
async def get_cache_stats():
|
||||
"""Get KV-cache efficiency statistics."""
|
||||
try:
|
||||
from optimization_engine.context.cache_monitor import get_cache_optimizer
|
||||
optimizer = get_cache_optimizer()
|
||||
|
||||
return {
|
||||
"stats": optimizer.get_stats_dict(),
|
||||
"report": optimizer.get_report()
|
||||
}
|
||||
except ImportError:
|
||||
return {
|
||||
"message": "Cache monitoring not active",
|
||||
"stats": None
|
||||
}
|
||||
|
||||
|
||||
# Learning report endpoints
|
||||
@router.get("/learning/report")
|
||||
async def get_learning_report():
|
||||
"""Get a comprehensive learning report."""
|
||||
playbook = get_playbook()
|
||||
stats = playbook.get_stats()
|
||||
|
||||
# Get top and worst performers
|
||||
items = list(playbook.items.values())
|
||||
items.sort(key=lambda x: x.net_score, reverse=True)
|
||||
|
||||
top_performers = [
|
||||
{"id": i.id, "content": i.content[:100], "score": i.net_score}
|
||||
for i in items[:10]
|
||||
]
|
||||
|
||||
items.sort(key=lambda x: x.net_score)
|
||||
worst_performers = [
|
||||
{"id": i.id, "content": i.content[:100], "score": i.net_score}
|
||||
for i in items[:5] if i.net_score < 0
|
||||
]
|
||||
|
||||
return {
|
||||
"generated_at": datetime.now().isoformat(),
|
||||
"playbook_stats": stats,
|
||||
"top_performers": top_performers,
|
||||
"worst_performers": worst_performers,
|
||||
"recommendations": _generate_recommendations(playbook)
|
||||
}
|
||||
|
||||
|
||||
def _generate_recommendations(playbook) -> List[str]:
|
||||
"""Generate recommendations based on playbook state."""
|
||||
recommendations = []
|
||||
|
||||
# Check for harmful items
|
||||
harmful = [i for i in playbook.items.values() if i.net_score < -3]
|
||||
if harmful:
|
||||
recommendations.append(
|
||||
f"Consider pruning {len(harmful)} harmful items (net_score < -3)"
|
||||
)
|
||||
|
||||
# Check for untested items
|
||||
untested = [
|
||||
i for i in playbook.items.values()
|
||||
if i.helpful_count + i.harmful_count == 0
|
||||
]
|
||||
if len(untested) > 10:
|
||||
recommendations.append(
|
||||
f"{len(untested)} items have no feedback - consider testing them"
|
||||
)
|
||||
|
||||
# Check category balance
|
||||
stats = playbook.get_stats()
|
||||
if stats["by_category"].get("MISTAKE", 0) < 5:
|
||||
recommendations.append(
|
||||
"Low mistake count - actively record errors when they occur"
|
||||
)
|
||||
|
||||
if not recommendations:
|
||||
recommendations.append("Playbook is in good health!")
|
||||
|
||||
return recommendations
|
||||
Reference in New Issue
Block a user