feat: Add DevLoop automation and HTML Reports

## DevLoop - Closed-Loop Development System - Orchestrator for plan → build → test → analyze cycle - Gemini planning via OpenCode CLI - Claude implementation via CLI bridge - Playwright browser testing integration - Test runner with API, filesystem, and browser tests - Persistent state in .devloop/ directory - CLI tool: tools/devloop_cli.py Usage: python tools/devloop_cli.py start 'Create new feature' python tools/devloop_cli.py plan 'Fix bug in X' python tools/devloop_cli.py test --study support_arm python tools/devloop_cli.py browser --level full ## HTML Reports (optimization_engine/reporting/) - Interactive Plotly-based reports - Convergence plot, Pareto front, parallel coordinates - Parameter importance analysis - Self-contained HTML (offline-capable) - Tailwind CSS styling ## Playwright E2E Tests - Home page tests - Test results in test-results/ ## LAC Knowledge Base Updates - Session insights (failures, workarounds, patterns) - Optimization memory for arm support study
2026-01-24 21:18:18 -05:00
parent a3f18dc377
commit 3193831340
24 changed files with 6437 additions and 0 deletions
--- a/optimization_engine/devloop/cli_bridge.py
+++ b/optimization_engine/devloop/cli_bridge.py
@@ -0,0 +1,652 @@
+"""
+CLI Bridge - Execute AI tasks through Claude Code CLI and OpenCode CLI.
+
+Uses your existing subscriptions via CLI tools:
+- Claude Code CLI (claude.exe) for implementation
+- OpenCode CLI (opencode) for Gemini planning
+
+No API keys needed - leverages your CLI subscriptions.
+"""
+
+import asyncio
+import json
+import logging
+import os
+import subprocess
+import tempfile
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+import re
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class CLIResult:
+    """Result from CLI execution."""
+
+    success: bool
+    output: str
+    error: str
+    duration_seconds: float
+    files_modified: List[str]
+
+
+class ClaudeCodeCLI:
+    """
+    Execute tasks through Claude Code CLI.
+
+    Uses: claude.exe --print for non-interactive execution
+    """
+
+    CLAUDE_PATH = r"C:\Users\antoi\.local\bin\claude.exe"
+
+    def __init__(self, workspace: Path):
+        self.workspace = workspace
+
+    async def execute(
+        self,
+        prompt: str,
+        timeout: int = 300,
+        model: str = "opus",
+    ) -> CLIResult:
+        """
+        Execute a prompt through Claude Code CLI.
+
+        Args:
+            prompt: The instruction/prompt to execute
+            timeout: Timeout in seconds
+            model: Model to use (opus, sonnet, haiku)
+
+        Returns:
+            CLIResult with output and modified files
+        """
+        start_time = datetime.now()
+
+        # Build command
+        cmd = [
+            self.CLAUDE_PATH,
+            "--print",  # Non-interactive mode
+            "--model",
+            model,
+            "--permission-mode",
+            "acceptEdits",  # Auto-accept edits
+            prompt,
+        ]
+
+        logger.info(f"Executing Claude Code CLI: {prompt[:100]}...")
+
+        try:
+            # Run in workspace directory
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+                cwd=str(self.workspace),
+                env={**os.environ, "TERM": "dumb"},  # Disable colors
+            )
+
+            output = result.stdout
+            error = result.stderr
+            success = result.returncode == 0
+
+            # Extract modified files from output
+            files_modified = self._extract_modified_files(output)
+
+            duration = (datetime.now() - start_time).total_seconds()
+
+            logger.info(
+                f"Claude Code completed in {duration:.1f}s, modified {len(files_modified)} files"
+            )
+
+            return CLIResult(
+                success=success,
+                output=output,
+                error=error,
+                duration_seconds=duration,
+                files_modified=files_modified,
+            )
+
+        except subprocess.TimeoutExpired:
+            return CLIResult(
+                success=False,
+                output="",
+                error=f"Timeout after {timeout}s",
+                duration_seconds=timeout,
+                files_modified=[],
+            )
+        except Exception as e:
+            return CLIResult(
+                success=False,
+                output="",
+                error=str(e),
+                duration_seconds=(datetime.now() - start_time).total_seconds(),
+                files_modified=[],
+            )
+
+    def _extract_modified_files(self, output: str) -> List[str]:
+        """Extract list of modified files from Claude Code output."""
+        files = []
+
+        # Look for file modification patterns
+        patterns = [
+            r"(?:Created|Modified|Wrote|Updated|Edited):\s*[`'\"]?([^\s`'\"]+)[`'\"]?",
+            r"Writing to [`'\"]?([^\s`'\"]+)[`'\"]?",
+            r"File saved: ([^\s]+)",
+        ]
+
+        for pattern in patterns:
+            matches = re.findall(pattern, output, re.IGNORECASE)
+            files.extend(matches)
+
+        return list(set(files))
+
+    async def execute_with_context(
+        self,
+        prompt: str,
+        context_files: List[str],
+        timeout: int = 300,
+    ) -> CLIResult:
+        """
+        Execute with additional context files loaded.
+
+        Args:
+            prompt: The instruction
+            context_files: Files to read as context
+            timeout: Timeout in seconds
+        """
+        # Build prompt with context
+        context_prompt = prompt
+
+        if context_files:
+            context_prompt += "\n\nContext files to consider:\n"
+            for f in context_files:
+                context_prompt += f"- {f}\n"
+
+        return await self.execute(context_prompt, timeout)
+
+
+class OpenCodeCLI:
+    """
+    Execute tasks through OpenCode CLI (Gemini).
+
+    Uses: opencode run for non-interactive execution
+    """
+
+    OPENCODE_PATH = r"C:\Users\antoi\AppData\Roaming\npm\opencode.cmd"
+
+    def __init__(self, workspace: Path):
+        self.workspace = workspace
+
+    async def execute(
+        self,
+        prompt: str,
+        timeout: int = 180,
+        model: str = "google/gemini-3-pro-preview",
+    ) -> CLIResult:
+        """
+        Execute a prompt through OpenCode CLI.
+
+        Args:
+            prompt: The instruction/prompt
+            timeout: Timeout in seconds
+            model: Model to use
+
+        Returns:
+            CLIResult with output
+        """
+        start_time = datetime.now()
+
+        # Build command
+        cmd = [self.OPENCODE_PATH, "run", "--model", model, prompt]
+
+        logger.info(f"Executing OpenCode CLI: {prompt[:100]}...")
+
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+                cwd=str(self.workspace),
+                env={**os.environ, "TERM": "dumb"},
+            )
+
+            output = result.stdout
+            error = result.stderr
+            success = result.returncode == 0
+
+            duration = (datetime.now() - start_time).total_seconds()
+
+            logger.info(f"OpenCode completed in {duration:.1f}s")
+
+            return CLIResult(
+                success=success,
+                output=output,
+                error=error,
+                duration_seconds=duration,
+                files_modified=[],  # OpenCode typically doesn't modify files directly
+            )
+
+        except subprocess.TimeoutExpired:
+            return CLIResult(
+                success=False,
+                output="",
+                error=f"Timeout after {timeout}s",
+                duration_seconds=timeout,
+                files_modified=[],
+            )
+        except Exception as e:
+            return CLIResult(
+                success=False,
+                output="",
+                error=str(e),
+                duration_seconds=(datetime.now() - start_time).total_seconds(),
+                files_modified=[],
+            )
+
+    async def plan(self, objective: str, context: Dict = None) -> Dict:
+        """
+        Create an implementation plan using Gemini via OpenCode.
+
+        Args:
+            objective: What to achieve
+            context: Additional context
+
+        Returns:
+            Plan dict with tasks and test scenarios
+        """
+        prompt = f"""You are a strategic planner for Atomizer, an FEA optimization framework.
+
+## Objective
+{objective}
+
+## Context
+{json.dumps(context, indent=2) if context else "None provided"}
+
+## Task
+Create a detailed implementation plan in JSON format with:
+1. tasks: List of implementation tasks for Claude Code
+2. test_scenarios: Tests to verify implementation
+3. acceptance_criteria: Success conditions
+
+Output ONLY valid JSON in this format:
+```json
+{{
+  "objective": "{objective}",
+  "approach": "Brief description",
+  "tasks": [
+    {{
+      "id": "task_001",
+      "description": "What to do",
+      "file": "path/to/file.py",
+      "priority": "high"
+    }}
+  ],
+  "test_scenarios": [
+    {{
+      "id": "test_001",
+      "name": "Test name",
+      "type": "filesystem",
+      "steps": [{{"action": "check_exists", "path": "some/path"}}],
+      "expected_outcome": {{"exists": true}}
+    }}
+  ],
+  "acceptance_criteria": [
+    "Criterion 1"
+  ]
+}}
+```
+"""
+
+        result = await self.execute(prompt)
+
+        if not result.success:
+            logger.error(f"OpenCode planning failed: {result.error}")
+            return self._fallback_plan(objective, context)
+
+        # Parse JSON from output
+        try:
+            # Find JSON block in output
+            output = result.output
+
+            if "```json" in output:
+                start = output.find("```json") + 7
+                end = output.find("```", start)
+                json_str = output[start:end].strip()
+            elif "```" in output:
+                start = output.find("```") + 3
+                end = output.find("```", start)
+                json_str = output[start:end].strip()
+            else:
+                # Try to find JSON object directly
+                match = re.search(r"\{.*\}", output, re.DOTALL)
+                if match:
+                    json_str = match.group()
+                else:
+                    return self._fallback_plan(objective, context)
+
+            plan = json.loads(json_str)
+            logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
+            return plan
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse plan JSON: {e}")
+            return self._fallback_plan(objective, context)
+
+    def _fallback_plan(self, objective: str, context: Dict = None) -> Dict:
+        """Generate a fallback plan when Gemini fails."""
+        logger.warning("Using fallback plan")
+
+        return {
+            "objective": objective,
+            "approach": "Fallback plan - manual implementation",
+            "tasks": [
+                {
+                    "id": "task_001",
+                    "description": f"Implement: {objective}",
+                    "file": "TBD",
+                    "priority": "high",
+                }
+            ],
+            "test_scenarios": [],
+            "acceptance_criteria": [objective],
+        }
+
+    async def analyze(self, test_results: Dict) -> Dict:
+        """
+        Analyze test results using Gemini via OpenCode.
+
+        Args:
+            test_results: Test report from dashboard
+
+        Returns:
+            Analysis with issues and fix plans
+        """
+        summary = test_results.get("summary", {})
+        scenarios = test_results.get("scenarios", [])
+
+        if summary.get("failed", 0) == 0:
+            return {
+                "issues_found": False,
+                "issues": [],
+                "fix_plans": {},
+                "recommendations": ["All tests passed!"],
+            }
+
+        failures = [s for s in scenarios if not s.get("passed", True)]
+
+        prompt = f"""Analyze these test failures for Atomizer FEA optimization framework:
+
+## Test Summary
+- Total: {summary.get("total", 0)}
+- Passed: {summary.get("passed", 0)}  
+- Failed: {summary.get("failed", 0)}
+
+## Failed Tests
+{json.dumps(failures, indent=2)}
+
+## Task
+Provide root cause analysis and fix plans in JSON:
+
+```json
+{{
+  "issues_found": true,
+  "issues": [
+    {{
+      "id": "issue_001",
+      "description": "What went wrong",
+      "severity": "high",
+      "root_cause": "Why it failed"
+    }}
+  ],
+  "fix_plans": {{
+    "issue_001": {{
+      "approach": "How to fix",
+      "steps": [{{"action": "edit", "file": "path", "description": "change"}}]
+    }}
+  }},
+  "recommendations": ["suggestion"]
+}}
+```
+"""
+
+        result = await self.execute(prompt)
+
+        if not result.success:
+            return self._fallback_analysis(failures)
+
+        try:
+            output = result.output
+            if "```json" in output:
+                start = output.find("```json") + 7
+                end = output.find("```", start)
+                json_str = output[start:end].strip()
+            else:
+                match = re.search(r"\{.*\}", output, re.DOTALL)
+                json_str = match.group() if match else "{}"
+
+            return json.loads(json_str)
+
+        except:
+            return self._fallback_analysis(failures)
+
+    def _fallback_analysis(self, failures: List[Dict]) -> Dict:
+        """Generate fallback analysis."""
+        issues = []
+        fix_plans = {}
+
+        for i, failure in enumerate(failures):
+            issue_id = f"issue_{i + 1}"
+            issues.append(
+                {
+                    "id": issue_id,
+                    "description": failure.get("error", "Unknown error"),
+                    "severity": "medium",
+                    "root_cause": "Requires investigation",
+                }
+            )
+            fix_plans[issue_id] = {
+                "approach": "Manual investigation required",
+                "steps": [],
+            }
+
+        return {
+            "issues_found": len(issues) > 0,
+            "issues": issues,
+            "fix_plans": fix_plans,
+            "recommendations": ["Review failed tests manually"],
+        }
+
+
+class DevLoopCLIOrchestrator:
+    """
+    Orchestrate DevLoop using CLI tools.
+
+    - OpenCode (Gemini) for planning and analysis
+    - Claude Code for implementation and fixes
+    """
+
+    def __init__(self, workspace: Path = None):
+        self.workspace = workspace or Path("C:/Users/antoi/Atomizer")
+        self.claude = ClaudeCodeCLI(self.workspace)
+        self.opencode = OpenCodeCLI(self.workspace)
+        self.iteration = 0
+
+    async def run_cycle(
+        self,
+        objective: str,
+        context: Dict = None,
+        max_iterations: int = 5,
+    ) -> Dict:
+        """
+        Run a complete development cycle.
+
+        Args:
+            objective: What to achieve
+            context: Additional context
+            max_iterations: Maximum fix iterations
+
+        Returns:
+            Cycle report
+        """
+        from .test_runner import DashboardTestRunner
+
+        start_time = datetime.now()
+        results = {
+            "objective": objective,
+            "iterations": [],
+            "status": "in_progress",
+        }
+
+        logger.info(f"Starting DevLoop cycle: {objective}")
+
+        # Phase 1: Plan (Gemini via OpenCode)
+        logger.info("Phase 1: Planning with Gemini...")
+        plan = await self.opencode.plan(objective, context)
+
+        iteration = 0
+        while iteration < max_iterations:
+            iteration += 1
+            iter_result = {"iteration": iteration}
+
+            # Phase 2: Implement (Claude Code)
+            logger.info(f"Phase 2 (iter {iteration}): Implementing with Claude Code...")
+            impl_result = await self._implement(plan)
+            iter_result["implementation"] = {
+                "success": impl_result.success,
+                "files_modified": impl_result.files_modified,
+            }
+
+            # Phase 3: Test (Dashboard)
+            logger.info(f"Phase 3 (iter {iteration}): Testing...")
+            test_runner = DashboardTestRunner()
+            test_results = await test_runner.run_test_suite(plan.get("test_scenarios", []))
+            iter_result["test_results"] = test_results
+
+            # Check if all tests pass
+            summary = test_results.get("summary", {})
+            if summary.get("failed", 0) == 0:
+                logger.info("All tests passed!")
+                results["iterations"].append(iter_result)
+                results["status"] = "success"
+                break
+
+            # Phase 4: Analyze (Gemini via OpenCode)
+            logger.info(f"Phase 4 (iter {iteration}): Analyzing failures...")
+            analysis = await self.opencode.analyze(test_results)
+            iter_result["analysis"] = analysis
+
+            if not analysis.get("issues_found"):
+                results["status"] = "success"
+                results["iterations"].append(iter_result)
+                break
+
+            # Phase 5: Fix (Claude Code)
+            logger.info(f"Phase 5 (iter {iteration}): Fixing issues...")
+            fix_result = await self._fix(analysis)
+            iter_result["fixes"] = {
+                "success": fix_result.success,
+                "files_modified": fix_result.files_modified,
+            }
+
+            results["iterations"].append(iter_result)
+
+        if results["status"] == "in_progress":
+            results["status"] = "max_iterations_reached"
+
+        results["duration_seconds"] = (datetime.now() - start_time).total_seconds()
+
+        logger.info(f"DevLoop cycle completed: {results['status']}")
+
+        return results
+
+    async def _implement(self, plan: Dict) -> CLIResult:
+        """Implement the plan using Claude Code."""
+        tasks = plan.get("tasks", [])
+
+        if not tasks:
+            return CLIResult(
+                success=True,
+                output="No tasks to implement",
+                error="",
+                duration_seconds=0,
+                files_modified=[],
+            )
+
+        # Build implementation prompt
+        prompt = f"""Implement the following tasks for Atomizer:
+
+## Objective
+{plan.get("objective", "Unknown")}
+
+## Approach
+{plan.get("approach", "Follow best practices")}
+
+## Tasks
+"""
+        for task in tasks:
+            prompt += f"""
+### {task.get("id", "task")}: {task.get("description", "")}
+- File: {task.get("file", "TBD")}
+- Priority: {task.get("priority", "medium")}
+"""
+
+        prompt += """
+## Requirements
+- Follow Atomizer coding standards
+- Use AtomizerSpec v2.0 format
+- Create README.md for any new study
+- Use existing extractors from optimization_engine/extractors/
+"""
+
+        return await self.claude.execute(prompt, timeout=300)
+
+    async def _fix(self, analysis: Dict) -> CLIResult:
+        """Apply fixes using Claude Code."""
+        issues = analysis.get("issues", [])
+        fix_plans = analysis.get("fix_plans", {})
+
+        if not issues:
+            return CLIResult(
+                success=True,
+                output="No issues to fix",
+                error="",
+                duration_seconds=0,
+                files_modified=[],
+            )
+
+        # Build fix prompt
+        prompt = "Fix the following issues:\n\n"
+
+        for issue in issues:
+            issue_id = issue.get("id", "unknown")
+            prompt += f"""
+## Issue: {issue_id}
+- Description: {issue.get("description", "")}
+- Root Cause: {issue.get("root_cause", "Unknown")}
+- Severity: {issue.get("severity", "medium")}
+"""
+
+            fix_plan = fix_plans.get(issue_id, {})
+            if fix_plan:
+                prompt += f"- Fix Approach: {fix_plan.get('approach', 'Investigate')}\n"
+                for step in fix_plan.get("steps", []):
+                    prompt += f"  - {step.get('description', step.get('action', 'step'))}\n"
+
+        return await self.claude.execute(prompt, timeout=300)
+
+    async def step_plan(self, objective: str, context: Dict = None) -> Dict:
+        """Execute only the planning phase."""
+        return await self.opencode.plan(objective, context)
+
+    async def step_implement(self, plan: Dict) -> CLIResult:
+        """Execute only the implementation phase."""
+        return await self._implement(plan)
+
+    async def step_analyze(self, test_results: Dict) -> Dict:
+        """Execute only the analysis phase."""
+        return await self.opencode.analyze(test_results)