451 lines
13 KiB
Python
451 lines
13 KiB
Python
|
|
"""
|
||
|
|
Context Engineering API Routes
|
||
|
|
|
||
|
|
Provides endpoints for:
|
||
|
|
- Viewing playbook contents
|
||
|
|
- Managing session state
|
||
|
|
- Recording feedback on playbook items
|
||
|
|
- Triggering compaction
|
||
|
|
- Monitoring cache efficiency
|
||
|
|
- Exporting learning reports
|
||
|
|
|
||
|
|
Part of the ACE (Agentic Context Engineering) implementation for Atomizer.
|
||
|
|
"""
|
||
|
|
|
||
|
|
from fastapi import APIRouter, HTTPException, Query
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Optional, List
|
||
|
|
from pydantic import BaseModel
|
||
|
|
from datetime import datetime
|
||
|
|
import sys
|
||
|
|
|
||
|
|
# Add parent paths for imports
|
||
|
|
sys.path.append(str(Path(__file__).parent.parent.parent.parent.parent))
|
||
|
|
|
||
|
|
router = APIRouter()
|
||
|
|
|
||
|
|
# Paths
|
||
|
|
ATOMIZER_ROOT = Path(__file__).parents[4]
|
||
|
|
PLAYBOOK_PATH = ATOMIZER_ROOT / "knowledge_base" / "playbook.json"
|
||
|
|
|
||
|
|
|
||
|
|
# Pydantic models for request/response
|
||
|
|
class PlaybookItemResponse(BaseModel):
|
||
|
|
id: str
|
||
|
|
category: str
|
||
|
|
content: str
|
||
|
|
helpful_count: int
|
||
|
|
harmful_count: int
|
||
|
|
net_score: int
|
||
|
|
confidence: float
|
||
|
|
tags: List[str]
|
||
|
|
created_at: str
|
||
|
|
last_used: Optional[str]
|
||
|
|
|
||
|
|
|
||
|
|
class PlaybookSummary(BaseModel):
|
||
|
|
total_items: int
|
||
|
|
by_category: dict
|
||
|
|
version: int
|
||
|
|
last_updated: str
|
||
|
|
avg_score: float
|
||
|
|
top_score: int
|
||
|
|
lowest_score: int
|
||
|
|
|
||
|
|
|
||
|
|
class FeedbackRequest(BaseModel):
|
||
|
|
item_id: str
|
||
|
|
helpful: bool
|
||
|
|
|
||
|
|
|
||
|
|
class InsightRequest(BaseModel):
|
||
|
|
category: str
|
||
|
|
content: str
|
||
|
|
tags: Optional[List[str]] = None
|
||
|
|
source_trial: Optional[int] = None
|
||
|
|
|
||
|
|
|
||
|
|
class SessionStateResponse(BaseModel):
|
||
|
|
session_id: str
|
||
|
|
task_type: Optional[str]
|
||
|
|
study_name: Optional[str]
|
||
|
|
study_status: str
|
||
|
|
trials_completed: int
|
||
|
|
trials_total: int
|
||
|
|
best_value: Optional[float]
|
||
|
|
recent_actions: List[str]
|
||
|
|
recent_errors: List[str]
|
||
|
|
|
||
|
|
|
||
|
|
# Helper function to get playbook
|
||
|
|
def get_playbook():
|
||
|
|
"""Load playbook, handling import errors gracefully."""
|
||
|
|
try:
|
||
|
|
from optimization_engine.context.playbook import AtomizerPlaybook
|
||
|
|
return AtomizerPlaybook.load(PLAYBOOK_PATH)
|
||
|
|
except ImportError as e:
|
||
|
|
raise HTTPException(
|
||
|
|
status_code=500,
|
||
|
|
detail=f"Context engineering module not available: {str(e)}"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
# Playbook endpoints
|
||
|
|
@router.get("/playbook", response_model=PlaybookSummary)
|
||
|
|
async def get_playbook_summary():
|
||
|
|
"""Get playbook summary statistics."""
|
||
|
|
playbook = get_playbook()
|
||
|
|
stats = playbook.get_stats()
|
||
|
|
|
||
|
|
return PlaybookSummary(
|
||
|
|
total_items=stats["total_items"],
|
||
|
|
by_category=stats["by_category"],
|
||
|
|
version=stats["version"],
|
||
|
|
last_updated=stats["last_updated"],
|
||
|
|
avg_score=stats["avg_score"],
|
||
|
|
top_score=stats["max_score"],
|
||
|
|
lowest_score=stats["min_score"]
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
@router.get("/playbook/items", response_model=List[PlaybookItemResponse])
|
||
|
|
async def get_playbook_items(
|
||
|
|
category: Optional[str] = Query(None, description="Filter by category (str, mis, tool, etc.)"),
|
||
|
|
min_score: int = Query(0, description="Minimum net score"),
|
||
|
|
min_confidence: float = Query(0.0, description="Minimum confidence (0.0-1.0)"),
|
||
|
|
limit: int = Query(50, description="Maximum items to return"),
|
||
|
|
offset: int = Query(0, description="Pagination offset")
|
||
|
|
):
|
||
|
|
"""
|
||
|
|
Get playbook items with optional filtering.
|
||
|
|
|
||
|
|
Categories:
|
||
|
|
- str: Strategy
|
||
|
|
- mis: Mistake
|
||
|
|
- tool: Tool usage
|
||
|
|
- cal: Calculation
|
||
|
|
- dom: Domain knowledge
|
||
|
|
- wf: Workflow
|
||
|
|
"""
|
||
|
|
playbook = get_playbook()
|
||
|
|
|
||
|
|
items = list(playbook.items.values())
|
||
|
|
|
||
|
|
# Filter by category
|
||
|
|
if category:
|
||
|
|
try:
|
||
|
|
from optimization_engine.context.playbook import InsightCategory
|
||
|
|
cat = InsightCategory(category)
|
||
|
|
items = [i for i in items if i.category == cat]
|
||
|
|
except ValueError:
|
||
|
|
raise HTTPException(400, f"Invalid category: {category}. Valid: str, mis, tool, cal, dom, wf")
|
||
|
|
|
||
|
|
# Filter by score
|
||
|
|
items = [i for i in items if i.net_score >= min_score]
|
||
|
|
|
||
|
|
# Filter by confidence
|
||
|
|
items = [i for i in items if i.confidence >= min_confidence]
|
||
|
|
|
||
|
|
# Sort by score
|
||
|
|
items.sort(key=lambda x: x.net_score, reverse=True)
|
||
|
|
|
||
|
|
# Paginate
|
||
|
|
items = items[offset:offset + limit]
|
||
|
|
|
||
|
|
return [
|
||
|
|
PlaybookItemResponse(
|
||
|
|
id=item.id,
|
||
|
|
category=item.category.value,
|
||
|
|
content=item.content,
|
||
|
|
helpful_count=item.helpful_count,
|
||
|
|
harmful_count=item.harmful_count,
|
||
|
|
net_score=item.net_score,
|
||
|
|
confidence=item.confidence,
|
||
|
|
tags=item.tags,
|
||
|
|
created_at=item.created_at,
|
||
|
|
last_used=item.last_used
|
||
|
|
)
|
||
|
|
for item in items
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
@router.get("/playbook/items/{item_id}", response_model=PlaybookItemResponse)
|
||
|
|
async def get_playbook_item(item_id: str):
|
||
|
|
"""Get a specific playbook item by ID."""
|
||
|
|
playbook = get_playbook()
|
||
|
|
|
||
|
|
if item_id not in playbook.items:
|
||
|
|
raise HTTPException(404, f"Item not found: {item_id}")
|
||
|
|
|
||
|
|
item = playbook.items[item_id]
|
||
|
|
|
||
|
|
return PlaybookItemResponse(
|
||
|
|
id=item.id,
|
||
|
|
category=item.category.value,
|
||
|
|
content=item.content,
|
||
|
|
helpful_count=item.helpful_count,
|
||
|
|
harmful_count=item.harmful_count,
|
||
|
|
net_score=item.net_score,
|
||
|
|
confidence=item.confidence,
|
||
|
|
tags=item.tags,
|
||
|
|
created_at=item.created_at,
|
||
|
|
last_used=item.last_used
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
@router.post("/playbook/feedback")
|
||
|
|
async def record_feedback(request: FeedbackRequest):
|
||
|
|
"""
|
||
|
|
Record feedback on a playbook item.
|
||
|
|
|
||
|
|
This is how the system learns:
|
||
|
|
- helpful=true increases the item's score
|
||
|
|
- helpful=false decreases the item's score
|
||
|
|
"""
|
||
|
|
playbook = get_playbook()
|
||
|
|
|
||
|
|
if request.item_id not in playbook.items:
|
||
|
|
raise HTTPException(404, f"Item not found: {request.item_id}")
|
||
|
|
|
||
|
|
playbook.record_outcome(request.item_id, helpful=request.helpful)
|
||
|
|
playbook.save(PLAYBOOK_PATH)
|
||
|
|
|
||
|
|
item = playbook.items[request.item_id]
|
||
|
|
|
||
|
|
return {
|
||
|
|
"item_id": request.item_id,
|
||
|
|
"new_score": item.net_score,
|
||
|
|
"new_confidence": item.confidence,
|
||
|
|
"helpful_count": item.helpful_count,
|
||
|
|
"harmful_count": item.harmful_count
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
@router.post("/playbook/insights")
|
||
|
|
async def add_insight(request: InsightRequest):
|
||
|
|
"""
|
||
|
|
Add a new insight to the playbook.
|
||
|
|
|
||
|
|
Categories:
|
||
|
|
- str: Strategy - Optimization strategies that work
|
||
|
|
- mis: Mistake - Common mistakes to avoid
|
||
|
|
- tool: Tool - Tool usage patterns
|
||
|
|
- cal: Calculation - Formulas and calculations
|
||
|
|
- dom: Domain - Domain-specific knowledge (FEA, NX)
|
||
|
|
- wf: Workflow - Workflow patterns
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
from optimization_engine.context.playbook import InsightCategory
|
||
|
|
except ImportError as e:
|
||
|
|
raise HTTPException(500, f"Context module not available: {e}")
|
||
|
|
|
||
|
|
# Validate category
|
||
|
|
try:
|
||
|
|
category = InsightCategory(request.category)
|
||
|
|
except ValueError:
|
||
|
|
raise HTTPException(400, f"Invalid category: {request.category}")
|
||
|
|
|
||
|
|
playbook = get_playbook()
|
||
|
|
|
||
|
|
item = playbook.add_insight(
|
||
|
|
category=category,
|
||
|
|
content=request.content,
|
||
|
|
source_trial=request.source_trial,
|
||
|
|
tags=request.tags
|
||
|
|
)
|
||
|
|
|
||
|
|
playbook.save(PLAYBOOK_PATH)
|
||
|
|
|
||
|
|
return {
|
||
|
|
"item_id": item.id,
|
||
|
|
"category": item.category.value,
|
||
|
|
"content": item.content,
|
||
|
|
"message": "Insight added successfully"
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
@router.delete("/playbook/items/{item_id}")
|
||
|
|
async def delete_playbook_item(item_id: str):
|
||
|
|
"""Delete a playbook item."""
|
||
|
|
playbook = get_playbook()
|
||
|
|
|
||
|
|
if item_id not in playbook.items:
|
||
|
|
raise HTTPException(404, f"Item not found: {item_id}")
|
||
|
|
|
||
|
|
content = playbook.items[item_id].content[:50]
|
||
|
|
del playbook.items[item_id]
|
||
|
|
playbook.save(PLAYBOOK_PATH)
|
||
|
|
|
||
|
|
return {
|
||
|
|
"deleted": item_id,
|
||
|
|
"content_preview": content
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
@router.post("/playbook/prune")
|
||
|
|
async def prune_playbook(threshold: int = Query(-3, description="Net score threshold for pruning")):
|
||
|
|
"""
|
||
|
|
Prune harmful items from the playbook.
|
||
|
|
|
||
|
|
Items with net_score <= threshold will be removed.
|
||
|
|
"""
|
||
|
|
playbook = get_playbook()
|
||
|
|
|
||
|
|
removed_count = playbook.prune_harmful(threshold=threshold)
|
||
|
|
playbook.save(PLAYBOOK_PATH)
|
||
|
|
|
||
|
|
return {
|
||
|
|
"items_pruned": removed_count,
|
||
|
|
"threshold_used": threshold,
|
||
|
|
"remaining_items": len(playbook.items)
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
@router.get("/playbook/context")
|
||
|
|
async def get_playbook_context(
|
||
|
|
task_type: str = Query("optimization", description="Task type for context filtering"),
|
||
|
|
max_items: int = Query(15, description="Maximum items to include"),
|
||
|
|
min_confidence: float = Query(0.5, description="Minimum confidence threshold")
|
||
|
|
):
|
||
|
|
"""
|
||
|
|
Get playbook context string formatted for LLM consumption.
|
||
|
|
|
||
|
|
This is what gets injected into the LLM context window.
|
||
|
|
"""
|
||
|
|
playbook = get_playbook()
|
||
|
|
|
||
|
|
context = playbook.get_context_for_task(
|
||
|
|
task_type=task_type,
|
||
|
|
max_items=max_items,
|
||
|
|
min_confidence=min_confidence
|
||
|
|
)
|
||
|
|
|
||
|
|
return {
|
||
|
|
"context": context,
|
||
|
|
"items_included": min(max_items, len(playbook.items)),
|
||
|
|
"task_type": task_type
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
# Session state endpoints
|
||
|
|
@router.get("/session", response_model=SessionStateResponse)
|
||
|
|
async def get_session_state():
|
||
|
|
"""Get current session state."""
|
||
|
|
try:
|
||
|
|
from optimization_engine.context.session_state import get_session
|
||
|
|
session = get_session()
|
||
|
|
|
||
|
|
return SessionStateResponse(
|
||
|
|
session_id=session.session_id,
|
||
|
|
task_type=session.exposed.task_type.value if session.exposed.task_type else None,
|
||
|
|
study_name=session.exposed.study_name,
|
||
|
|
study_status=session.exposed.study_status,
|
||
|
|
trials_completed=session.exposed.trials_completed,
|
||
|
|
trials_total=session.exposed.trials_total,
|
||
|
|
best_value=session.exposed.best_value,
|
||
|
|
recent_actions=session.exposed.recent_actions[-10:],
|
||
|
|
recent_errors=session.exposed.recent_errors[-5:]
|
||
|
|
)
|
||
|
|
except ImportError:
|
||
|
|
raise HTTPException(500, "Session state module not available")
|
||
|
|
|
||
|
|
|
||
|
|
@router.get("/session/context")
|
||
|
|
async def get_session_context():
|
||
|
|
"""Get session context string for LLM consumption."""
|
||
|
|
try:
|
||
|
|
from optimization_engine.context.session_state import get_session
|
||
|
|
session = get_session()
|
||
|
|
|
||
|
|
return {
|
||
|
|
"context": session.get_llm_context(),
|
||
|
|
"session_id": session.session_id,
|
||
|
|
"last_updated": session.last_updated
|
||
|
|
}
|
||
|
|
except ImportError:
|
||
|
|
raise HTTPException(500, "Session state module not available")
|
||
|
|
|
||
|
|
|
||
|
|
# Cache monitoring endpoints
|
||
|
|
@router.get("/cache/stats")
|
||
|
|
async def get_cache_stats():
|
||
|
|
"""Get KV-cache efficiency statistics."""
|
||
|
|
try:
|
||
|
|
from optimization_engine.context.cache_monitor import get_cache_optimizer
|
||
|
|
optimizer = get_cache_optimizer()
|
||
|
|
|
||
|
|
return {
|
||
|
|
"stats": optimizer.get_stats_dict(),
|
||
|
|
"report": optimizer.get_report()
|
||
|
|
}
|
||
|
|
except ImportError:
|
||
|
|
return {
|
||
|
|
"message": "Cache monitoring not active",
|
||
|
|
"stats": None
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
# Learning report endpoints
|
||
|
|
@router.get("/learning/report")
|
||
|
|
async def get_learning_report():
|
||
|
|
"""Get a comprehensive learning report."""
|
||
|
|
playbook = get_playbook()
|
||
|
|
stats = playbook.get_stats()
|
||
|
|
|
||
|
|
# Get top and worst performers
|
||
|
|
items = list(playbook.items.values())
|
||
|
|
items.sort(key=lambda x: x.net_score, reverse=True)
|
||
|
|
|
||
|
|
top_performers = [
|
||
|
|
{"id": i.id, "content": i.content[:100], "score": i.net_score}
|
||
|
|
for i in items[:10]
|
||
|
|
]
|
||
|
|
|
||
|
|
items.sort(key=lambda x: x.net_score)
|
||
|
|
worst_performers = [
|
||
|
|
{"id": i.id, "content": i.content[:100], "score": i.net_score}
|
||
|
|
for i in items[:5] if i.net_score < 0
|
||
|
|
]
|
||
|
|
|
||
|
|
return {
|
||
|
|
"generated_at": datetime.now().isoformat(),
|
||
|
|
"playbook_stats": stats,
|
||
|
|
"top_performers": top_performers,
|
||
|
|
"worst_performers": worst_performers,
|
||
|
|
"recommendations": _generate_recommendations(playbook)
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def _generate_recommendations(playbook) -> List[str]:
|
||
|
|
"""Generate recommendations based on playbook state."""
|
||
|
|
recommendations = []
|
||
|
|
|
||
|
|
# Check for harmful items
|
||
|
|
harmful = [i for i in playbook.items.values() if i.net_score < -3]
|
||
|
|
if harmful:
|
||
|
|
recommendations.append(
|
||
|
|
f"Consider pruning {len(harmful)} harmful items (net_score < -3)"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Check for untested items
|
||
|
|
untested = [
|
||
|
|
i for i in playbook.items.values()
|
||
|
|
if i.helpful_count + i.harmful_count == 0
|
||
|
|
]
|
||
|
|
if len(untested) > 10:
|
||
|
|
recommendations.append(
|
||
|
|
f"{len(untested)} items have no feedback - consider testing them"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Check category balance
|
||
|
|
stats = playbook.get_stats()
|
||
|
|
if stats["by_category"].get("MISTAKE", 0) < 5:
|
||
|
|
recommendations.append(
|
||
|
|
"Low mistake count - actively record errors when they occur"
|
||
|
|
)
|
||
|
|
|
||
|
|
if not recommendations:
|
||
|
|
recommendations.append("Playbook is in good health!")
|
||
|
|
|
||
|
|
return recommendations
|