feat: Add DevLoop automation and HTML Reports

## DevLoop - Closed-Loop Development System - Orchestrator for plan → build → test → analyze cycle - Gemini planning via OpenCode CLI - Claude implementation via CLI bridge - Playwright browser testing integration - Test runner with API, filesystem, and browser tests - Persistent state in .devloop/ directory - CLI tool: tools/devloop_cli.py Usage: python tools/devloop_cli.py start 'Create new feature' python tools/devloop_cli.py plan 'Fix bug in X' python tools/devloop_cli.py test --study support_arm python tools/devloop_cli.py browser --level full ## HTML Reports (optimization_engine/reporting/) - Interactive Plotly-based reports - Convergence plot, Pareto front, parallel coordinates - Parameter importance analysis - Self-contained HTML (offline-capable) - Tailwind CSS styling ## Playwright E2E Tests - Home page tests - Test results in test-results/ ## LAC Knowledge Base Updates - Session insights (failures, workarounds, patterns) - Optimization memory for arm support study
2026-01-24 21:18:18 -05:00
parent a3f18dc377
commit 3193831340
24 changed files with 6437 additions and 0 deletions
--- a/optimization_engine/devloop/orchestrator.py
+++ b/optimization_engine/devloop/orchestrator.py
@@ -0,0 +1,561 @@
+"""
+DevLoop Orchestrator - Master controller for closed-loop development.
+
+Coordinates:
+- Gemini Pro: Strategic planning, analysis, test design
+- Claude Code: Implementation, code changes, fixes
+- Dashboard: Automated testing, verification
+- LAC: Learning capture and retrieval
+"""
+
+import asyncio
+import json
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Callable
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class LoopPhase(Enum):
+    """Current phase in the development loop."""
+
+    IDLE = "idle"
+    PLANNING = "planning"
+    IMPLEMENTING = "implementing"
+    TESTING = "testing"
+    ANALYZING = "analyzing"
+    FIXING = "fixing"
+    VERIFYING = "verifying"
+
+
+@dataclass
+class LoopState:
+    """Current state of the development loop."""
+
+    phase: LoopPhase = LoopPhase.IDLE
+    iteration: int = 0
+    current_task: Optional[str] = None
+    test_results: Optional[Dict] = None
+    analysis: Optional[Dict] = None
+    last_update: str = field(default_factory=lambda: datetime.now().isoformat())
+
+
+@dataclass
+class IterationResult:
+    """Result of a single development iteration."""
+
+    iteration: int
+    plan: Optional[Dict] = None
+    implementation: Optional[Dict] = None
+    test_results: Optional[Dict] = None
+    analysis: Optional[Dict] = None
+    fixes: Optional[List[Dict]] = None
+    verification: Optional[Dict] = None
+    success: bool = False
+    duration_seconds: float = 0.0
+
+
+@dataclass
+class CycleReport:
+    """Complete report for a development cycle."""
+
+    objective: str
+    start_time: str = field(default_factory=lambda: datetime.now().isoformat())
+    end_time: Optional[str] = None
+    iterations: List[IterationResult] = field(default_factory=list)
+    status: str = "in_progress"
+    total_duration_seconds: float = 0.0
+
+
+class DevLoopOrchestrator:
+    """
+    Autonomous development loop orchestrator.
+
+    Coordinates Gemini (planning) + Claude Code (implementation) + Dashboard (testing)
+    in a continuous improvement cycle.
+
+    Flow:
+    1. Gemini: Plan features/fixes
+    2. Claude Code: Implement
+    3. Dashboard: Test
+    4. Gemini: Analyze results
+    5. Claude Code: Fix issues
+    6. Dashboard: Verify
+    7. Loop back with learnings
+    """
+
+    def __init__(
+        self,
+        config: Optional[Dict] = None,
+        gemini_client: Optional[Any] = None,
+        claude_bridge: Optional[Any] = None,
+        dashboard_runner: Optional[Any] = None,
+    ):
+        """
+        Initialize the orchestrator.
+
+        Args:
+            config: Configuration dict with API keys and settings
+            gemini_client: Pre-configured Gemini client (optional)
+            claude_bridge: Pre-configured Claude Code bridge (optional)
+            dashboard_runner: Pre-configured Dashboard test runner (optional)
+        """
+        self.config = config or self._default_config()
+        self.state = LoopState()
+        self.subscribers: List[Callable] = []
+
+        # Initialize components lazily
+        self._gemini = gemini_client
+        self._claude_bridge = claude_bridge
+        self._dashboard = dashboard_runner
+        self._lac = None
+
+        # History for learning
+        self.cycle_history: List[CycleReport] = []
+
+    def _default_config(self) -> Dict:
+        """Default configuration."""
+        return {
+            "max_iterations": 10,
+            "auto_fix_threshold": "high",  # Only auto-fix high+ severity
+            "learning_enabled": True,
+            "dashboard_url": "http://localhost:3000",
+            "websocket_url": "ws://localhost:8000",
+            "test_timeout_ms": 30000,
+        }
+
+    @property
+    def gemini(self):
+        """Lazy-load Gemini planner."""
+        if self._gemini is None:
+            from .planning import GeminiPlanner
+
+            self._gemini = GeminiPlanner(self.config.get("gemini", {}))
+        return self._gemini
+
+    @property
+    def claude_bridge(self):
+        """Lazy-load Claude Code bridge."""
+        if self._claude_bridge is None:
+            from .claude_bridge import ClaudeCodeBridge
+
+            self._claude_bridge = ClaudeCodeBridge(self.config.get("claude", {}))
+        return self._claude_bridge
+
+    @property
+    def dashboard(self):
+        """Lazy-load Dashboard test runner."""
+        if self._dashboard is None:
+            from .test_runner import DashboardTestRunner
+
+            self._dashboard = DashboardTestRunner(self.config)
+        return self._dashboard
+
+    @property
+    def lac(self):
+        """Lazy-load LAC (Learning Atomizer Core)."""
+        if self._lac is None and self.config.get("learning_enabled", True):
+            try:
+                from knowledge_base.lac import get_lac
+
+                self._lac = get_lac()
+            except ImportError:
+                logger.warning("LAC not available, learning disabled")
+        return self._lac
+
+    def subscribe(self, callback: Callable[[LoopState], None]):
+        """Subscribe to state updates."""
+        self.subscribers.append(callback)
+
+    def unsubscribe(self, callback: Callable):
+        """Unsubscribe from state updates."""
+        if callback in self.subscribers:
+            self.subscribers.remove(callback)
+
+    def _notify_subscribers(self):
+        """Notify all subscribers of state change."""
+        self.state.last_update = datetime.now().isoformat()
+        for callback in self.subscribers:
+            try:
+                callback(self.state)
+            except Exception as e:
+                logger.error(f"Subscriber error: {e}")
+
+    def _update_state(self, phase: Optional[LoopPhase] = None, task: Optional[str] = None):
+        """Update state and notify subscribers."""
+        if phase:
+            self.state.phase = phase
+        if task:
+            self.state.current_task = task
+        self._notify_subscribers()
+
+    async def run_development_cycle(
+        self,
+        objective: str,
+        context: Optional[Dict] = None,
+        max_iterations: Optional[int] = None,
+    ) -> CycleReport:
+        """
+        Execute a complete development cycle.
+
+        Args:
+            objective: What to achieve (e.g., "Create support_arm optimization study")
+            context: Additional context (study spec, problem statement, etc.)
+            max_iterations: Override default max iterations
+
+        Returns:
+            CycleReport with all iteration results
+        """
+        max_iter = max_iterations or self.config.get("max_iterations", 10)
+
+        report = CycleReport(objective=objective)
+        start_time = datetime.now()
+
+        logger.info(f"Starting development cycle: {objective}")
+
+        try:
+            while not self._is_objective_complete(report) and len(report.iterations) < max_iter:
+                iteration_result = await self._run_iteration(objective, context)
+                report.iterations.append(iteration_result)
+
+                # Record learning from successful patterns
+                if iteration_result.success and self.lac:
+                    await self._record_learning(iteration_result)
+
+                # Check for max iterations
+                if len(report.iterations) >= max_iter:
+                    report.status = "max_iterations_reached"
+                    logger.warning(f"Max iterations ({max_iter}) reached")
+                    break
+
+        except Exception as e:
+            report.status = f"error: {str(e)}"
+            logger.error(f"Development cycle error: {e}")
+
+        report.end_time = datetime.now().isoformat()
+        report.total_duration_seconds = (datetime.now() - start_time).total_seconds()
+
+        if report.status == "in_progress":
+            report.status = "completed"
+
+        self.cycle_history.append(report)
+        self._update_state(LoopPhase.IDLE)
+
+        return report
+
+    def _is_objective_complete(self, report: CycleReport) -> bool:
+        """Check if the objective has been achieved."""
+        if not report.iterations:
+            return False
+
+        last_iter = report.iterations[-1]
+
+        # Success if last iteration passed all tests
+        if last_iter.success and last_iter.test_results:
+            tests = last_iter.test_results
+            if tests.get("summary", {}).get("failed", 0) == 0:
+                return True
+
+        return False
+
+    async def _run_iteration(self, objective: str, context: Optional[Dict]) -> IterationResult:
+        """Run a single iteration through all phases."""
+        start_time = datetime.now()
+        result = IterationResult(iteration=self.state.iteration)
+
+        try:
+            # Phase 1: Planning (Gemini)
+            self._update_state(LoopPhase.PLANNING, "Creating implementation plan")
+            result.plan = await self._planning_phase(objective, context)
+
+            # Phase 2: Implementation (Claude Code)
+            self._update_state(LoopPhase.IMPLEMENTING, "Implementing changes")
+            result.implementation = await self._implementation_phase(result.plan)
+
+            # Phase 3: Testing (Dashboard)
+            self._update_state(LoopPhase.TESTING, "Running tests")
+            result.test_results = await self._testing_phase(result.plan)
+            self.state.test_results = result.test_results
+
+            # Phase 4: Analysis (Gemini)
+            self._update_state(LoopPhase.ANALYZING, "Analyzing results")
+            result.analysis = await self._analysis_phase(result.test_results)
+            self.state.analysis = result.analysis
+
+            # Phases 5-6: Fix & Verify if needed
+            if result.analysis and result.analysis.get("issues_found"):
+                self._update_state(LoopPhase.FIXING, "Implementing fixes")
+                result.fixes = await self._fixing_phase(result.analysis)
+
+                self._update_state(LoopPhase.VERIFYING, "Verifying fixes")
+                result.verification = await self._verification_phase(result.fixes)
+                result.success = result.verification.get("all_passed", False)
+            else:
+                result.success = True
+
+        except Exception as e:
+            logger.error(f"Iteration {self.state.iteration} failed: {e}")
+            result.success = False
+
+        result.duration_seconds = (datetime.now() - start_time).total_seconds()
+        self.state.iteration += 1
+
+        return result
+
+    async def _planning_phase(self, objective: str, context: Optional[Dict]) -> Dict:
+        """Gemini creates implementation plan."""
+        # Gather context
+        historical_learnings = []
+        if self.lac:
+            historical_learnings = self.lac.get_relevant_insights(objective)
+
+        plan_request = {
+            "objective": objective,
+            "context": context or {},
+            "previous_results": self.state.test_results,
+            "historical_learnings": historical_learnings,
+        }
+
+        try:
+            plan = await self.gemini.create_plan(plan_request)
+            logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
+            return plan
+        except Exception as e:
+            logger.error(f"Planning phase failed: {e}")
+            return {"error": str(e), "tasks": [], "test_scenarios": []}
+
+    async def _implementation_phase(self, plan: Dict) -> Dict:
+        """Claude Code implements the plan."""
+        if not plan or plan.get("error"):
+            return {"status": "skipped", "reason": "No valid plan"}
+
+        try:
+            result = await self.claude_bridge.execute_plan(plan)
+            return {
+                "status": result.get("status", "unknown"),
+                "files_modified": result.get("files", []),
+                "warnings": result.get("warnings", []),
+            }
+        except Exception as e:
+            logger.error(f"Implementation phase failed: {e}")
+            return {"status": "error", "error": str(e)}
+
+    async def _testing_phase(self, plan: Dict) -> Dict:
+        """Dashboard runs automated tests."""
+        test_scenarios = plan.get("test_scenarios", [])
+
+        if not test_scenarios:
+            # Generate default tests based on objective
+            test_scenarios = self._generate_default_tests(plan)
+
+        try:
+            results = await self.dashboard.run_test_suite(test_scenarios)
+            return results
+        except Exception as e:
+            logger.error(f"Testing phase failed: {e}")
+            return {
+                "status": "error",
+                "error": str(e),
+                "summary": {"passed": 0, "failed": 1, "total": 1},
+            }
+
+    def _generate_default_tests(self, plan: Dict) -> List[Dict]:
+        """Generate default test scenarios based on the plan."""
+        objective = plan.get("objective", "")
+
+        tests = []
+
+        # Study creation tests
+        if "study" in objective.lower() or "create" in objective.lower():
+            tests.extend(
+                [
+                    {
+                        "id": "test_study_exists",
+                        "name": "Study directory exists",
+                        "type": "filesystem",
+                        "check": "directory_exists",
+                    },
+                    {
+                        "id": "test_spec_valid",
+                        "name": "AtomizerSpec is valid",
+                        "type": "api",
+                        "endpoint": "/api/studies/{study_id}/spec/validate",
+                    },
+                    {
+                        "id": "test_dashboard_loads",
+                        "name": "Dashboard loads study",
+                        "type": "browser",
+                        "action": "load_study",
+                    },
+                ]
+            )
+
+        # Optimization tests
+        if "optimi" in objective.lower():
+            tests.extend(
+                [
+                    {
+                        "id": "test_run_trial",
+                        "name": "Single trial executes",
+                        "type": "cli",
+                        "command": "python run_optimization.py --test",
+                    },
+                ]
+            )
+
+        return tests
+
+    async def _analysis_phase(self, test_results: Dict) -> Dict:
+        """Gemini analyzes test results."""
+        try:
+            from .analyzer import ProblemAnalyzer
+
+            analyzer = ProblemAnalyzer(self.gemini)
+            return await analyzer.analyze_test_results(test_results)
+        except Exception as e:
+            logger.error(f"Analysis phase failed: {e}")
+            return {
+                "issues_found": True,
+                "issues": [{"description": str(e), "severity": "high"}],
+                "fix_plans": {},
+            }
+
+    async def _fixing_phase(self, analysis: Dict) -> List[Dict]:
+        """Claude Code implements fixes."""
+        fixes = []
+
+        for issue in analysis.get("issues", []):
+            fix_plan = analysis.get("fix_plans", {}).get(issue.get("id", "unknown"))
+
+            if fix_plan:
+                try:
+                    result = await self.claude_bridge.execute_fix(fix_plan)
+                    fixes.append(
+                        {
+                            "issue_id": issue.get("id"),
+                            "status": result.get("status"),
+                            "files_modified": result.get("files", []),
+                        }
+                    )
+                except Exception as e:
+                    fixes.append(
+                        {
+                            "issue_id": issue.get("id"),
+                            "status": "error",
+                            "error": str(e),
+                        }
+                    )
+
+        return fixes
+
+    async def _verification_phase(self, fixes: List[Dict]) -> Dict:
+        """Dashboard verifies fixes."""
+        # Re-run tests for each fix
+        all_passed = True
+        verification_results = []
+
+        for fix in fixes:
+            if fix.get("status") == "error":
+                all_passed = False
+                verification_results.append(
+                    {
+                        "issue_id": fix.get("issue_id"),
+                        "passed": False,
+                        "reason": fix.get("error"),
+                    }
+                )
+            else:
+                # Run targeted test
+                result = await self.dashboard.verify_fix(fix)
+                verification_results.append(result)
+                if not result.get("passed", False):
+                    all_passed = False
+
+        return {
+            "all_passed": all_passed,
+            "results": verification_results,
+        }
+
+    async def _record_learning(self, iteration: IterationResult):
+        """Store successful patterns for future reference."""
+        if not self.lac:
+            return
+
+        try:
+            self.lac.record_insight(
+                category="success_pattern",
+                context=f"DevLoop iteration {iteration.iteration}",
+                insight=f"Successfully completed: {iteration.plan.get('objective', 'unknown')}",
+                confidence=0.8,
+                tags=["devloop", "success"],
+            )
+        except Exception as e:
+            logger.warning(f"Failed to record learning: {e}")
+
+    # ========================================================================
+    # Single-step operations (for manual control)
+    # ========================================================================
+
+    async def step_plan(self, objective: str, context: Optional[Dict] = None) -> Dict:
+        """Execute only the planning phase."""
+        self._update_state(LoopPhase.PLANNING, objective)
+        plan = await self._planning_phase(objective, context)
+        self._update_state(LoopPhase.IDLE)
+        return plan
+
+    async def step_implement(self, plan: Dict) -> Dict:
+        """Execute only the implementation phase."""
+        self._update_state(LoopPhase.IMPLEMENTING)
+        result = await self._implementation_phase(plan)
+        self._update_state(LoopPhase.IDLE)
+        return result
+
+    async def step_test(self, scenarios: List[Dict]) -> Dict:
+        """Execute only the testing phase."""
+        self._update_state(LoopPhase.TESTING)
+        result = await self._testing_phase({"test_scenarios": scenarios})
+        self._update_state(LoopPhase.IDLE)
+        return result
+
+    async def step_analyze(self, test_results: Dict) -> Dict:
+        """Execute only the analysis phase."""
+        self._update_state(LoopPhase.ANALYZING)
+        result = await self._analysis_phase(test_results)
+        self._update_state(LoopPhase.IDLE)
+        return result
+
+    def get_state(self) -> Dict:
+        """Get current state as dict."""
+        return {
+            "phase": self.state.phase.value,
+            "iteration": self.state.iteration,
+            "current_task": self.state.current_task,
+            "test_results": self.state.test_results,
+            "last_update": self.state.last_update,
+        }
+
+    def export_history(self, filepath: Optional[Path] = None) -> Dict:
+        """Export cycle history for analysis."""
+        history = {
+            "exported_at": datetime.now().isoformat(),
+            "total_cycles": len(self.cycle_history),
+            "cycles": [
+                {
+                    "objective": c.objective,
+                    "status": c.status,
+                    "iterations": len(c.iterations),
+                    "duration_seconds": c.total_duration_seconds,
+                }
+                for c in self.cycle_history
+            ],
+        }
+
+        if filepath:
+            with open(filepath, "w") as f:
+                json.dump(history, f, indent=2)
+
+        return history