Files
Atomizer/optimization_engine/devloop/orchestrator.py
Anto01 3193831340 feat: Add DevLoop automation and HTML Reports
## DevLoop - Closed-Loop Development System
- Orchestrator for plan → build → test → analyze cycle
- Gemini planning via OpenCode CLI
- Claude implementation via CLI bridge
- Playwright browser testing integration
- Test runner with API, filesystem, and browser tests
- Persistent state in .devloop/ directory
- CLI tool: tools/devloop_cli.py

Usage:
  python tools/devloop_cli.py start 'Create new feature'
  python tools/devloop_cli.py plan 'Fix bug in X'
  python tools/devloop_cli.py test --study support_arm
  python tools/devloop_cli.py browser --level full

## HTML Reports (optimization_engine/reporting/)
- Interactive Plotly-based reports
- Convergence plot, Pareto front, parallel coordinates
- Parameter importance analysis
- Self-contained HTML (offline-capable)
- Tailwind CSS styling

## Playwright E2E Tests
- Home page tests
- Test results in test-results/

## LAC Knowledge Base Updates
- Session insights (failures, workarounds, patterns)
- Optimization memory for arm support study
2026-01-24 21:18:18 -05:00

562 lines
19 KiB
Python

"""
DevLoop Orchestrator - Master controller for closed-loop development.
Coordinates:
- Gemini Pro: Strategic planning, analysis, test design
- Claude Code: Implementation, code changes, fixes
- Dashboard: Automated testing, verification
- LAC: Learning capture and retrieval
"""
import asyncio
import json
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, Callable
import logging
logger = logging.getLogger(__name__)
class LoopPhase(Enum):
"""Current phase in the development loop."""
IDLE = "idle"
PLANNING = "planning"
IMPLEMENTING = "implementing"
TESTING = "testing"
ANALYZING = "analyzing"
FIXING = "fixing"
VERIFYING = "verifying"
@dataclass
class LoopState:
"""Current state of the development loop."""
phase: LoopPhase = LoopPhase.IDLE
iteration: int = 0
current_task: Optional[str] = None
test_results: Optional[Dict] = None
analysis: Optional[Dict] = None
last_update: str = field(default_factory=lambda: datetime.now().isoformat())
@dataclass
class IterationResult:
"""Result of a single development iteration."""
iteration: int
plan: Optional[Dict] = None
implementation: Optional[Dict] = None
test_results: Optional[Dict] = None
analysis: Optional[Dict] = None
fixes: Optional[List[Dict]] = None
verification: Optional[Dict] = None
success: bool = False
duration_seconds: float = 0.0
@dataclass
class CycleReport:
"""Complete report for a development cycle."""
objective: str
start_time: str = field(default_factory=lambda: datetime.now().isoformat())
end_time: Optional[str] = None
iterations: List[IterationResult] = field(default_factory=list)
status: str = "in_progress"
total_duration_seconds: float = 0.0
class DevLoopOrchestrator:
"""
Autonomous development loop orchestrator.
Coordinates Gemini (planning) + Claude Code (implementation) + Dashboard (testing)
in a continuous improvement cycle.
Flow:
1. Gemini: Plan features/fixes
2. Claude Code: Implement
3. Dashboard: Test
4. Gemini: Analyze results
5. Claude Code: Fix issues
6. Dashboard: Verify
7. Loop back with learnings
"""
def __init__(
self,
config: Optional[Dict] = None,
gemini_client: Optional[Any] = None,
claude_bridge: Optional[Any] = None,
dashboard_runner: Optional[Any] = None,
):
"""
Initialize the orchestrator.
Args:
config: Configuration dict with API keys and settings
gemini_client: Pre-configured Gemini client (optional)
claude_bridge: Pre-configured Claude Code bridge (optional)
dashboard_runner: Pre-configured Dashboard test runner (optional)
"""
self.config = config or self._default_config()
self.state = LoopState()
self.subscribers: List[Callable] = []
# Initialize components lazily
self._gemini = gemini_client
self._claude_bridge = claude_bridge
self._dashboard = dashboard_runner
self._lac = None
# History for learning
self.cycle_history: List[CycleReport] = []
def _default_config(self) -> Dict:
"""Default configuration."""
return {
"max_iterations": 10,
"auto_fix_threshold": "high", # Only auto-fix high+ severity
"learning_enabled": True,
"dashboard_url": "http://localhost:3000",
"websocket_url": "ws://localhost:8000",
"test_timeout_ms": 30000,
}
@property
def gemini(self):
"""Lazy-load Gemini planner."""
if self._gemini is None:
from .planning import GeminiPlanner
self._gemini = GeminiPlanner(self.config.get("gemini", {}))
return self._gemini
@property
def claude_bridge(self):
"""Lazy-load Claude Code bridge."""
if self._claude_bridge is None:
from .claude_bridge import ClaudeCodeBridge
self._claude_bridge = ClaudeCodeBridge(self.config.get("claude", {}))
return self._claude_bridge
@property
def dashboard(self):
"""Lazy-load Dashboard test runner."""
if self._dashboard is None:
from .test_runner import DashboardTestRunner
self._dashboard = DashboardTestRunner(self.config)
return self._dashboard
@property
def lac(self):
"""Lazy-load LAC (Learning Atomizer Core)."""
if self._lac is None and self.config.get("learning_enabled", True):
try:
from knowledge_base.lac import get_lac
self._lac = get_lac()
except ImportError:
logger.warning("LAC not available, learning disabled")
return self._lac
def subscribe(self, callback: Callable[[LoopState], None]):
"""Subscribe to state updates."""
self.subscribers.append(callback)
def unsubscribe(self, callback: Callable):
"""Unsubscribe from state updates."""
if callback in self.subscribers:
self.subscribers.remove(callback)
def _notify_subscribers(self):
"""Notify all subscribers of state change."""
self.state.last_update = datetime.now().isoformat()
for callback in self.subscribers:
try:
callback(self.state)
except Exception as e:
logger.error(f"Subscriber error: {e}")
def _update_state(self, phase: Optional[LoopPhase] = None, task: Optional[str] = None):
"""Update state and notify subscribers."""
if phase:
self.state.phase = phase
if task:
self.state.current_task = task
self._notify_subscribers()
async def run_development_cycle(
self,
objective: str,
context: Optional[Dict] = None,
max_iterations: Optional[int] = None,
) -> CycleReport:
"""
Execute a complete development cycle.
Args:
objective: What to achieve (e.g., "Create support_arm optimization study")
context: Additional context (study spec, problem statement, etc.)
max_iterations: Override default max iterations
Returns:
CycleReport with all iteration results
"""
max_iter = max_iterations or self.config.get("max_iterations", 10)
report = CycleReport(objective=objective)
start_time = datetime.now()
logger.info(f"Starting development cycle: {objective}")
try:
while not self._is_objective_complete(report) and len(report.iterations) < max_iter:
iteration_result = await self._run_iteration(objective, context)
report.iterations.append(iteration_result)
# Record learning from successful patterns
if iteration_result.success and self.lac:
await self._record_learning(iteration_result)
# Check for max iterations
if len(report.iterations) >= max_iter:
report.status = "max_iterations_reached"
logger.warning(f"Max iterations ({max_iter}) reached")
break
except Exception as e:
report.status = f"error: {str(e)}"
logger.error(f"Development cycle error: {e}")
report.end_time = datetime.now().isoformat()
report.total_duration_seconds = (datetime.now() - start_time).total_seconds()
if report.status == "in_progress":
report.status = "completed"
self.cycle_history.append(report)
self._update_state(LoopPhase.IDLE)
return report
def _is_objective_complete(self, report: CycleReport) -> bool:
"""Check if the objective has been achieved."""
if not report.iterations:
return False
last_iter = report.iterations[-1]
# Success if last iteration passed all tests
if last_iter.success and last_iter.test_results:
tests = last_iter.test_results
if tests.get("summary", {}).get("failed", 0) == 0:
return True
return False
async def _run_iteration(self, objective: str, context: Optional[Dict]) -> IterationResult:
"""Run a single iteration through all phases."""
start_time = datetime.now()
result = IterationResult(iteration=self.state.iteration)
try:
# Phase 1: Planning (Gemini)
self._update_state(LoopPhase.PLANNING, "Creating implementation plan")
result.plan = await self._planning_phase(objective, context)
# Phase 2: Implementation (Claude Code)
self._update_state(LoopPhase.IMPLEMENTING, "Implementing changes")
result.implementation = await self._implementation_phase(result.plan)
# Phase 3: Testing (Dashboard)
self._update_state(LoopPhase.TESTING, "Running tests")
result.test_results = await self._testing_phase(result.plan)
self.state.test_results = result.test_results
# Phase 4: Analysis (Gemini)
self._update_state(LoopPhase.ANALYZING, "Analyzing results")
result.analysis = await self._analysis_phase(result.test_results)
self.state.analysis = result.analysis
# Phases 5-6: Fix & Verify if needed
if result.analysis and result.analysis.get("issues_found"):
self._update_state(LoopPhase.FIXING, "Implementing fixes")
result.fixes = await self._fixing_phase(result.analysis)
self._update_state(LoopPhase.VERIFYING, "Verifying fixes")
result.verification = await self._verification_phase(result.fixes)
result.success = result.verification.get("all_passed", False)
else:
result.success = True
except Exception as e:
logger.error(f"Iteration {self.state.iteration} failed: {e}")
result.success = False
result.duration_seconds = (datetime.now() - start_time).total_seconds()
self.state.iteration += 1
return result
async def _planning_phase(self, objective: str, context: Optional[Dict]) -> Dict:
"""Gemini creates implementation plan."""
# Gather context
historical_learnings = []
if self.lac:
historical_learnings = self.lac.get_relevant_insights(objective)
plan_request = {
"objective": objective,
"context": context or {},
"previous_results": self.state.test_results,
"historical_learnings": historical_learnings,
}
try:
plan = await self.gemini.create_plan(plan_request)
logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
return plan
except Exception as e:
logger.error(f"Planning phase failed: {e}")
return {"error": str(e), "tasks": [], "test_scenarios": []}
async def _implementation_phase(self, plan: Dict) -> Dict:
"""Claude Code implements the plan."""
if not plan or plan.get("error"):
return {"status": "skipped", "reason": "No valid plan"}
try:
result = await self.claude_bridge.execute_plan(plan)
return {
"status": result.get("status", "unknown"),
"files_modified": result.get("files", []),
"warnings": result.get("warnings", []),
}
except Exception as e:
logger.error(f"Implementation phase failed: {e}")
return {"status": "error", "error": str(e)}
async def _testing_phase(self, plan: Dict) -> Dict:
"""Dashboard runs automated tests."""
test_scenarios = plan.get("test_scenarios", [])
if not test_scenarios:
# Generate default tests based on objective
test_scenarios = self._generate_default_tests(plan)
try:
results = await self.dashboard.run_test_suite(test_scenarios)
return results
except Exception as e:
logger.error(f"Testing phase failed: {e}")
return {
"status": "error",
"error": str(e),
"summary": {"passed": 0, "failed": 1, "total": 1},
}
def _generate_default_tests(self, plan: Dict) -> List[Dict]:
"""Generate default test scenarios based on the plan."""
objective = plan.get("objective", "")
tests = []
# Study creation tests
if "study" in objective.lower() or "create" in objective.lower():
tests.extend(
[
{
"id": "test_study_exists",
"name": "Study directory exists",
"type": "filesystem",
"check": "directory_exists",
},
{
"id": "test_spec_valid",
"name": "AtomizerSpec is valid",
"type": "api",
"endpoint": "/api/studies/{study_id}/spec/validate",
},
{
"id": "test_dashboard_loads",
"name": "Dashboard loads study",
"type": "browser",
"action": "load_study",
},
]
)
# Optimization tests
if "optimi" in objective.lower():
tests.extend(
[
{
"id": "test_run_trial",
"name": "Single trial executes",
"type": "cli",
"command": "python run_optimization.py --test",
},
]
)
return tests
async def _analysis_phase(self, test_results: Dict) -> Dict:
"""Gemini analyzes test results."""
try:
from .analyzer import ProblemAnalyzer
analyzer = ProblemAnalyzer(self.gemini)
return await analyzer.analyze_test_results(test_results)
except Exception as e:
logger.error(f"Analysis phase failed: {e}")
return {
"issues_found": True,
"issues": [{"description": str(e), "severity": "high"}],
"fix_plans": {},
}
async def _fixing_phase(self, analysis: Dict) -> List[Dict]:
"""Claude Code implements fixes."""
fixes = []
for issue in analysis.get("issues", []):
fix_plan = analysis.get("fix_plans", {}).get(issue.get("id", "unknown"))
if fix_plan:
try:
result = await self.claude_bridge.execute_fix(fix_plan)
fixes.append(
{
"issue_id": issue.get("id"),
"status": result.get("status"),
"files_modified": result.get("files", []),
}
)
except Exception as e:
fixes.append(
{
"issue_id": issue.get("id"),
"status": "error",
"error": str(e),
}
)
return fixes
async def _verification_phase(self, fixes: List[Dict]) -> Dict:
"""Dashboard verifies fixes."""
# Re-run tests for each fix
all_passed = True
verification_results = []
for fix in fixes:
if fix.get("status") == "error":
all_passed = False
verification_results.append(
{
"issue_id": fix.get("issue_id"),
"passed": False,
"reason": fix.get("error"),
}
)
else:
# Run targeted test
result = await self.dashboard.verify_fix(fix)
verification_results.append(result)
if not result.get("passed", False):
all_passed = False
return {
"all_passed": all_passed,
"results": verification_results,
}
async def _record_learning(self, iteration: IterationResult):
"""Store successful patterns for future reference."""
if not self.lac:
return
try:
self.lac.record_insight(
category="success_pattern",
context=f"DevLoop iteration {iteration.iteration}",
insight=f"Successfully completed: {iteration.plan.get('objective', 'unknown')}",
confidence=0.8,
tags=["devloop", "success"],
)
except Exception as e:
logger.warning(f"Failed to record learning: {e}")
# ========================================================================
# Single-step operations (for manual control)
# ========================================================================
async def step_plan(self, objective: str, context: Optional[Dict] = None) -> Dict:
"""Execute only the planning phase."""
self._update_state(LoopPhase.PLANNING, objective)
plan = await self._planning_phase(objective, context)
self._update_state(LoopPhase.IDLE)
return plan
async def step_implement(self, plan: Dict) -> Dict:
"""Execute only the implementation phase."""
self._update_state(LoopPhase.IMPLEMENTING)
result = await self._implementation_phase(plan)
self._update_state(LoopPhase.IDLE)
return result
async def step_test(self, scenarios: List[Dict]) -> Dict:
"""Execute only the testing phase."""
self._update_state(LoopPhase.TESTING)
result = await self._testing_phase({"test_scenarios": scenarios})
self._update_state(LoopPhase.IDLE)
return result
async def step_analyze(self, test_results: Dict) -> Dict:
"""Execute only the analysis phase."""
self._update_state(LoopPhase.ANALYZING)
result = await self._analysis_phase(test_results)
self._update_state(LoopPhase.IDLE)
return result
def get_state(self) -> Dict:
"""Get current state as dict."""
return {
"phase": self.state.phase.value,
"iteration": self.state.iteration,
"current_task": self.state.current_task,
"test_results": self.state.test_results,
"last_update": self.state.last_update,
}
def export_history(self, filepath: Optional[Path] = None) -> Dict:
"""Export cycle history for analysis."""
history = {
"exported_at": datetime.now().isoformat(),
"total_cycles": len(self.cycle_history),
"cycles": [
{
"objective": c.objective,
"status": c.status,
"iterations": len(c.iterations),
"duration_seconds": c.total_duration_seconds,
}
for c in self.cycle_history
],
}
if filepath:
with open(filepath, "w") as f:
json.dump(history, f, indent=2)
return history