feat: Add DevLoop automation and HTML Reports
## DevLoop - Closed-Loop Development System - Orchestrator for plan → build → test → analyze cycle - Gemini planning via OpenCode CLI - Claude implementation via CLI bridge - Playwright browser testing integration - Test runner with API, filesystem, and browser tests - Persistent state in .devloop/ directory - CLI tool: tools/devloop_cli.py Usage: python tools/devloop_cli.py start 'Create new feature' python tools/devloop_cli.py plan 'Fix bug in X' python tools/devloop_cli.py test --study support_arm python tools/devloop_cli.py browser --level full ## HTML Reports (optimization_engine/reporting/) - Interactive Plotly-based reports - Convergence plot, Pareto front, parallel coordinates - Parameter importance analysis - Self-contained HTML (offline-capable) - Tailwind CSS styling ## Playwright E2E Tests - Home page tests - Test results in test-results/ ## LAC Knowledge Base Updates - Session insights (failures, workarounds, patterns) - Optimization memory for arm support study
This commit is contained in:
561
optimization_engine/devloop/orchestrator.py
Normal file
561
optimization_engine/devloop/orchestrator.py
Normal file
@@ -0,0 +1,561 @@
|
||||
"""
|
||||
DevLoop Orchestrator - Master controller for closed-loop development.
|
||||
|
||||
Coordinates:
|
||||
- Gemini Pro: Strategic planning, analysis, test design
|
||||
- Claude Code: Implementation, code changes, fixes
|
||||
- Dashboard: Automated testing, verification
|
||||
- LAC: Learning capture and retrieval
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Callable
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LoopPhase(Enum):
|
||||
"""Current phase in the development loop."""
|
||||
|
||||
IDLE = "idle"
|
||||
PLANNING = "planning"
|
||||
IMPLEMENTING = "implementing"
|
||||
TESTING = "testing"
|
||||
ANALYZING = "analyzing"
|
||||
FIXING = "fixing"
|
||||
VERIFYING = "verifying"
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoopState:
|
||||
"""Current state of the development loop."""
|
||||
|
||||
phase: LoopPhase = LoopPhase.IDLE
|
||||
iteration: int = 0
|
||||
current_task: Optional[str] = None
|
||||
test_results: Optional[Dict] = None
|
||||
analysis: Optional[Dict] = None
|
||||
last_update: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
|
||||
|
||||
@dataclass
|
||||
class IterationResult:
|
||||
"""Result of a single development iteration."""
|
||||
|
||||
iteration: int
|
||||
plan: Optional[Dict] = None
|
||||
implementation: Optional[Dict] = None
|
||||
test_results: Optional[Dict] = None
|
||||
analysis: Optional[Dict] = None
|
||||
fixes: Optional[List[Dict]] = None
|
||||
verification: Optional[Dict] = None
|
||||
success: bool = False
|
||||
duration_seconds: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class CycleReport:
|
||||
"""Complete report for a development cycle."""
|
||||
|
||||
objective: str
|
||||
start_time: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
end_time: Optional[str] = None
|
||||
iterations: List[IterationResult] = field(default_factory=list)
|
||||
status: str = "in_progress"
|
||||
total_duration_seconds: float = 0.0
|
||||
|
||||
|
||||
class DevLoopOrchestrator:
|
||||
"""
|
||||
Autonomous development loop orchestrator.
|
||||
|
||||
Coordinates Gemini (planning) + Claude Code (implementation) + Dashboard (testing)
|
||||
in a continuous improvement cycle.
|
||||
|
||||
Flow:
|
||||
1. Gemini: Plan features/fixes
|
||||
2. Claude Code: Implement
|
||||
3. Dashboard: Test
|
||||
4. Gemini: Analyze results
|
||||
5. Claude Code: Fix issues
|
||||
6. Dashboard: Verify
|
||||
7. Loop back with learnings
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Optional[Dict] = None,
|
||||
gemini_client: Optional[Any] = None,
|
||||
claude_bridge: Optional[Any] = None,
|
||||
dashboard_runner: Optional[Any] = None,
|
||||
):
|
||||
"""
|
||||
Initialize the orchestrator.
|
||||
|
||||
Args:
|
||||
config: Configuration dict with API keys and settings
|
||||
gemini_client: Pre-configured Gemini client (optional)
|
||||
claude_bridge: Pre-configured Claude Code bridge (optional)
|
||||
dashboard_runner: Pre-configured Dashboard test runner (optional)
|
||||
"""
|
||||
self.config = config or self._default_config()
|
||||
self.state = LoopState()
|
||||
self.subscribers: List[Callable] = []
|
||||
|
||||
# Initialize components lazily
|
||||
self._gemini = gemini_client
|
||||
self._claude_bridge = claude_bridge
|
||||
self._dashboard = dashboard_runner
|
||||
self._lac = None
|
||||
|
||||
# History for learning
|
||||
self.cycle_history: List[CycleReport] = []
|
||||
|
||||
def _default_config(self) -> Dict:
|
||||
"""Default configuration."""
|
||||
return {
|
||||
"max_iterations": 10,
|
||||
"auto_fix_threshold": "high", # Only auto-fix high+ severity
|
||||
"learning_enabled": True,
|
||||
"dashboard_url": "http://localhost:3000",
|
||||
"websocket_url": "ws://localhost:8000",
|
||||
"test_timeout_ms": 30000,
|
||||
}
|
||||
|
||||
@property
|
||||
def gemini(self):
|
||||
"""Lazy-load Gemini planner."""
|
||||
if self._gemini is None:
|
||||
from .planning import GeminiPlanner
|
||||
|
||||
self._gemini = GeminiPlanner(self.config.get("gemini", {}))
|
||||
return self._gemini
|
||||
|
||||
@property
|
||||
def claude_bridge(self):
|
||||
"""Lazy-load Claude Code bridge."""
|
||||
if self._claude_bridge is None:
|
||||
from .claude_bridge import ClaudeCodeBridge
|
||||
|
||||
self._claude_bridge = ClaudeCodeBridge(self.config.get("claude", {}))
|
||||
return self._claude_bridge
|
||||
|
||||
@property
|
||||
def dashboard(self):
|
||||
"""Lazy-load Dashboard test runner."""
|
||||
if self._dashboard is None:
|
||||
from .test_runner import DashboardTestRunner
|
||||
|
||||
self._dashboard = DashboardTestRunner(self.config)
|
||||
return self._dashboard
|
||||
|
||||
@property
|
||||
def lac(self):
|
||||
"""Lazy-load LAC (Learning Atomizer Core)."""
|
||||
if self._lac is None and self.config.get("learning_enabled", True):
|
||||
try:
|
||||
from knowledge_base.lac import get_lac
|
||||
|
||||
self._lac = get_lac()
|
||||
except ImportError:
|
||||
logger.warning("LAC not available, learning disabled")
|
||||
return self._lac
|
||||
|
||||
def subscribe(self, callback: Callable[[LoopState], None]):
|
||||
"""Subscribe to state updates."""
|
||||
self.subscribers.append(callback)
|
||||
|
||||
def unsubscribe(self, callback: Callable):
|
||||
"""Unsubscribe from state updates."""
|
||||
if callback in self.subscribers:
|
||||
self.subscribers.remove(callback)
|
||||
|
||||
def _notify_subscribers(self):
|
||||
"""Notify all subscribers of state change."""
|
||||
self.state.last_update = datetime.now().isoformat()
|
||||
for callback in self.subscribers:
|
||||
try:
|
||||
callback(self.state)
|
||||
except Exception as e:
|
||||
logger.error(f"Subscriber error: {e}")
|
||||
|
||||
def _update_state(self, phase: Optional[LoopPhase] = None, task: Optional[str] = None):
|
||||
"""Update state and notify subscribers."""
|
||||
if phase:
|
||||
self.state.phase = phase
|
||||
if task:
|
||||
self.state.current_task = task
|
||||
self._notify_subscribers()
|
||||
|
||||
async def run_development_cycle(
|
||||
self,
|
||||
objective: str,
|
||||
context: Optional[Dict] = None,
|
||||
max_iterations: Optional[int] = None,
|
||||
) -> CycleReport:
|
||||
"""
|
||||
Execute a complete development cycle.
|
||||
|
||||
Args:
|
||||
objective: What to achieve (e.g., "Create support_arm optimization study")
|
||||
context: Additional context (study spec, problem statement, etc.)
|
||||
max_iterations: Override default max iterations
|
||||
|
||||
Returns:
|
||||
CycleReport with all iteration results
|
||||
"""
|
||||
max_iter = max_iterations or self.config.get("max_iterations", 10)
|
||||
|
||||
report = CycleReport(objective=objective)
|
||||
start_time = datetime.now()
|
||||
|
||||
logger.info(f"Starting development cycle: {objective}")
|
||||
|
||||
try:
|
||||
while not self._is_objective_complete(report) and len(report.iterations) < max_iter:
|
||||
iteration_result = await self._run_iteration(objective, context)
|
||||
report.iterations.append(iteration_result)
|
||||
|
||||
# Record learning from successful patterns
|
||||
if iteration_result.success and self.lac:
|
||||
await self._record_learning(iteration_result)
|
||||
|
||||
# Check for max iterations
|
||||
if len(report.iterations) >= max_iter:
|
||||
report.status = "max_iterations_reached"
|
||||
logger.warning(f"Max iterations ({max_iter}) reached")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
report.status = f"error: {str(e)}"
|
||||
logger.error(f"Development cycle error: {e}")
|
||||
|
||||
report.end_time = datetime.now().isoformat()
|
||||
report.total_duration_seconds = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
if report.status == "in_progress":
|
||||
report.status = "completed"
|
||||
|
||||
self.cycle_history.append(report)
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
|
||||
return report
|
||||
|
||||
def _is_objective_complete(self, report: CycleReport) -> bool:
|
||||
"""Check if the objective has been achieved."""
|
||||
if not report.iterations:
|
||||
return False
|
||||
|
||||
last_iter = report.iterations[-1]
|
||||
|
||||
# Success if last iteration passed all tests
|
||||
if last_iter.success and last_iter.test_results:
|
||||
tests = last_iter.test_results
|
||||
if tests.get("summary", {}).get("failed", 0) == 0:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def _run_iteration(self, objective: str, context: Optional[Dict]) -> IterationResult:
|
||||
"""Run a single iteration through all phases."""
|
||||
start_time = datetime.now()
|
||||
result = IterationResult(iteration=self.state.iteration)
|
||||
|
||||
try:
|
||||
# Phase 1: Planning (Gemini)
|
||||
self._update_state(LoopPhase.PLANNING, "Creating implementation plan")
|
||||
result.plan = await self._planning_phase(objective, context)
|
||||
|
||||
# Phase 2: Implementation (Claude Code)
|
||||
self._update_state(LoopPhase.IMPLEMENTING, "Implementing changes")
|
||||
result.implementation = await self._implementation_phase(result.plan)
|
||||
|
||||
# Phase 3: Testing (Dashboard)
|
||||
self._update_state(LoopPhase.TESTING, "Running tests")
|
||||
result.test_results = await self._testing_phase(result.plan)
|
||||
self.state.test_results = result.test_results
|
||||
|
||||
# Phase 4: Analysis (Gemini)
|
||||
self._update_state(LoopPhase.ANALYZING, "Analyzing results")
|
||||
result.analysis = await self._analysis_phase(result.test_results)
|
||||
self.state.analysis = result.analysis
|
||||
|
||||
# Phases 5-6: Fix & Verify if needed
|
||||
if result.analysis and result.analysis.get("issues_found"):
|
||||
self._update_state(LoopPhase.FIXING, "Implementing fixes")
|
||||
result.fixes = await self._fixing_phase(result.analysis)
|
||||
|
||||
self._update_state(LoopPhase.VERIFYING, "Verifying fixes")
|
||||
result.verification = await self._verification_phase(result.fixes)
|
||||
result.success = result.verification.get("all_passed", False)
|
||||
else:
|
||||
result.success = True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Iteration {self.state.iteration} failed: {e}")
|
||||
result.success = False
|
||||
|
||||
result.duration_seconds = (datetime.now() - start_time).total_seconds()
|
||||
self.state.iteration += 1
|
||||
|
||||
return result
|
||||
|
||||
async def _planning_phase(self, objective: str, context: Optional[Dict]) -> Dict:
|
||||
"""Gemini creates implementation plan."""
|
||||
# Gather context
|
||||
historical_learnings = []
|
||||
if self.lac:
|
||||
historical_learnings = self.lac.get_relevant_insights(objective)
|
||||
|
||||
plan_request = {
|
||||
"objective": objective,
|
||||
"context": context or {},
|
||||
"previous_results": self.state.test_results,
|
||||
"historical_learnings": historical_learnings,
|
||||
}
|
||||
|
||||
try:
|
||||
plan = await self.gemini.create_plan(plan_request)
|
||||
logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
|
||||
return plan
|
||||
except Exception as e:
|
||||
logger.error(f"Planning phase failed: {e}")
|
||||
return {"error": str(e), "tasks": [], "test_scenarios": []}
|
||||
|
||||
async def _implementation_phase(self, plan: Dict) -> Dict:
|
||||
"""Claude Code implements the plan."""
|
||||
if not plan or plan.get("error"):
|
||||
return {"status": "skipped", "reason": "No valid plan"}
|
||||
|
||||
try:
|
||||
result = await self.claude_bridge.execute_plan(plan)
|
||||
return {
|
||||
"status": result.get("status", "unknown"),
|
||||
"files_modified": result.get("files", []),
|
||||
"warnings": result.get("warnings", []),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Implementation phase failed: {e}")
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
async def _testing_phase(self, plan: Dict) -> Dict:
|
||||
"""Dashboard runs automated tests."""
|
||||
test_scenarios = plan.get("test_scenarios", [])
|
||||
|
||||
if not test_scenarios:
|
||||
# Generate default tests based on objective
|
||||
test_scenarios = self._generate_default_tests(plan)
|
||||
|
||||
try:
|
||||
results = await self.dashboard.run_test_suite(test_scenarios)
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Testing phase failed: {e}")
|
||||
return {
|
||||
"status": "error",
|
||||
"error": str(e),
|
||||
"summary": {"passed": 0, "failed": 1, "total": 1},
|
||||
}
|
||||
|
||||
def _generate_default_tests(self, plan: Dict) -> List[Dict]:
|
||||
"""Generate default test scenarios based on the plan."""
|
||||
objective = plan.get("objective", "")
|
||||
|
||||
tests = []
|
||||
|
||||
# Study creation tests
|
||||
if "study" in objective.lower() or "create" in objective.lower():
|
||||
tests.extend(
|
||||
[
|
||||
{
|
||||
"id": "test_study_exists",
|
||||
"name": "Study directory exists",
|
||||
"type": "filesystem",
|
||||
"check": "directory_exists",
|
||||
},
|
||||
{
|
||||
"id": "test_spec_valid",
|
||||
"name": "AtomizerSpec is valid",
|
||||
"type": "api",
|
||||
"endpoint": "/api/studies/{study_id}/spec/validate",
|
||||
},
|
||||
{
|
||||
"id": "test_dashboard_loads",
|
||||
"name": "Dashboard loads study",
|
||||
"type": "browser",
|
||||
"action": "load_study",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
# Optimization tests
|
||||
if "optimi" in objective.lower():
|
||||
tests.extend(
|
||||
[
|
||||
{
|
||||
"id": "test_run_trial",
|
||||
"name": "Single trial executes",
|
||||
"type": "cli",
|
||||
"command": "python run_optimization.py --test",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
return tests
|
||||
|
||||
async def _analysis_phase(self, test_results: Dict) -> Dict:
|
||||
"""Gemini analyzes test results."""
|
||||
try:
|
||||
from .analyzer import ProblemAnalyzer
|
||||
|
||||
analyzer = ProblemAnalyzer(self.gemini)
|
||||
return await analyzer.analyze_test_results(test_results)
|
||||
except Exception as e:
|
||||
logger.error(f"Analysis phase failed: {e}")
|
||||
return {
|
||||
"issues_found": True,
|
||||
"issues": [{"description": str(e), "severity": "high"}],
|
||||
"fix_plans": {},
|
||||
}
|
||||
|
||||
async def _fixing_phase(self, analysis: Dict) -> List[Dict]:
|
||||
"""Claude Code implements fixes."""
|
||||
fixes = []
|
||||
|
||||
for issue in analysis.get("issues", []):
|
||||
fix_plan = analysis.get("fix_plans", {}).get(issue.get("id", "unknown"))
|
||||
|
||||
if fix_plan:
|
||||
try:
|
||||
result = await self.claude_bridge.execute_fix(fix_plan)
|
||||
fixes.append(
|
||||
{
|
||||
"issue_id": issue.get("id"),
|
||||
"status": result.get("status"),
|
||||
"files_modified": result.get("files", []),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
fixes.append(
|
||||
{
|
||||
"issue_id": issue.get("id"),
|
||||
"status": "error",
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
|
||||
return fixes
|
||||
|
||||
async def _verification_phase(self, fixes: List[Dict]) -> Dict:
|
||||
"""Dashboard verifies fixes."""
|
||||
# Re-run tests for each fix
|
||||
all_passed = True
|
||||
verification_results = []
|
||||
|
||||
for fix in fixes:
|
||||
if fix.get("status") == "error":
|
||||
all_passed = False
|
||||
verification_results.append(
|
||||
{
|
||||
"issue_id": fix.get("issue_id"),
|
||||
"passed": False,
|
||||
"reason": fix.get("error"),
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Run targeted test
|
||||
result = await self.dashboard.verify_fix(fix)
|
||||
verification_results.append(result)
|
||||
if not result.get("passed", False):
|
||||
all_passed = False
|
||||
|
||||
return {
|
||||
"all_passed": all_passed,
|
||||
"results": verification_results,
|
||||
}
|
||||
|
||||
async def _record_learning(self, iteration: IterationResult):
|
||||
"""Store successful patterns for future reference."""
|
||||
if not self.lac:
|
||||
return
|
||||
|
||||
try:
|
||||
self.lac.record_insight(
|
||||
category="success_pattern",
|
||||
context=f"DevLoop iteration {iteration.iteration}",
|
||||
insight=f"Successfully completed: {iteration.plan.get('objective', 'unknown')}",
|
||||
confidence=0.8,
|
||||
tags=["devloop", "success"],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to record learning: {e}")
|
||||
|
||||
# ========================================================================
|
||||
# Single-step operations (for manual control)
|
||||
# ========================================================================
|
||||
|
||||
async def step_plan(self, objective: str, context: Optional[Dict] = None) -> Dict:
|
||||
"""Execute only the planning phase."""
|
||||
self._update_state(LoopPhase.PLANNING, objective)
|
||||
plan = await self._planning_phase(objective, context)
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
return plan
|
||||
|
||||
async def step_implement(self, plan: Dict) -> Dict:
|
||||
"""Execute only the implementation phase."""
|
||||
self._update_state(LoopPhase.IMPLEMENTING)
|
||||
result = await self._implementation_phase(plan)
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
return result
|
||||
|
||||
async def step_test(self, scenarios: List[Dict]) -> Dict:
|
||||
"""Execute only the testing phase."""
|
||||
self._update_state(LoopPhase.TESTING)
|
||||
result = await self._testing_phase({"test_scenarios": scenarios})
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
return result
|
||||
|
||||
async def step_analyze(self, test_results: Dict) -> Dict:
|
||||
"""Execute only the analysis phase."""
|
||||
self._update_state(LoopPhase.ANALYZING)
|
||||
result = await self._analysis_phase(test_results)
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
return result
|
||||
|
||||
def get_state(self) -> Dict:
|
||||
"""Get current state as dict."""
|
||||
return {
|
||||
"phase": self.state.phase.value,
|
||||
"iteration": self.state.iteration,
|
||||
"current_task": self.state.current_task,
|
||||
"test_results": self.state.test_results,
|
||||
"last_update": self.state.last_update,
|
||||
}
|
||||
|
||||
def export_history(self, filepath: Optional[Path] = None) -> Dict:
|
||||
"""Export cycle history for analysis."""
|
||||
history = {
|
||||
"exported_at": datetime.now().isoformat(),
|
||||
"total_cycles": len(self.cycle_history),
|
||||
"cycles": [
|
||||
{
|
||||
"objective": c.objective,
|
||||
"status": c.status,
|
||||
"iterations": len(c.iterations),
|
||||
"duration_seconds": c.total_duration_seconds,
|
||||
}
|
||||
for c in self.cycle_history
|
||||
],
|
||||
}
|
||||
|
||||
if filepath:
|
||||
with open(filepath, "w") as f:
|
||||
json.dump(history, f, indent=2)
|
||||
|
||||
return history
|
||||
Reference in New Issue
Block a user