feat: Add DevLoop automation and HTML Reports

## DevLoop - Closed-Loop Development System - Orchestrator for plan → build → test → analyze cycle - Gemini planning via OpenCode CLI - Claude implementation via CLI bridge - Playwright browser testing integration - Test runner with API, filesystem, and browser tests - Persistent state in .devloop/ directory - CLI tool: tools/devloop_cli.py Usage: python tools/devloop_cli.py start 'Create new feature' python tools/devloop_cli.py plan 'Fix bug in X' python tools/devloop_cli.py test --study support_arm python tools/devloop_cli.py browser --level full ## HTML Reports (optimization_engine/reporting/) - Interactive Plotly-based reports - Convergence plot, Pareto front, parallel coordinates - Parameter importance analysis - Self-contained HTML (offline-capable) - Tailwind CSS styling ## Playwright E2E Tests - Home page tests - Test results in test-results/ ## LAC Knowledge Base Updates - Session insights (failures, workarounds, patterns) - Optimization memory for arm support study
2026-01-24 21:18:18 -05:00
parent a3f18dc377
commit 3193831340
24 changed files with 6437 additions and 0 deletions
--- a/optimization_engine/devloop/init.py
+++ b/optimization_engine/devloop/init.py
@@ -0,0 +1,68 @@
+"""
+Atomizer DevLoop - Closed-Loop Development System
+
+This module provides autonomous development cycle capabilities:
+1. Gemini Pro for strategic planning and analysis
+2. Claude Code (Opus 4.5) for implementation
+3. Dashboard testing for verification
+4. LAC integration for persistent learning
+
+The DevLoop orchestrates the full cycle:
+PLAN (Gemini) -> BUILD (Claude) -> TEST (Dashboard) -> ANALYZE (Gemini) -> FIX (Claude) -> VERIFY
+
+Example usage:
+    from optimization_engine.devloop import DevLoopOrchestrator
+
+    orchestrator = DevLoopOrchestrator()
+    result = await orchestrator.run_development_cycle(
+        objective="Create support_arm optimization study"
+    )
+"""
+
+
+# Lazy imports to avoid circular dependencies
+def __getattr__(name):
+    if name == "DevLoopOrchestrator":
+        from .orchestrator import DevLoopOrchestrator
+
+        return DevLoopOrchestrator
+    elif name == "LoopPhase":
+        from .orchestrator import LoopPhase
+
+        return LoopPhase
+    elif name == "LoopState":
+        from .orchestrator import LoopState
+
+        return LoopState
+    elif name == "DashboardTestRunner":
+        from .test_runner import DashboardTestRunner
+
+        return DashboardTestRunner
+    elif name == "TestScenario":
+        from .test_runner import TestScenario
+
+        return TestScenario
+    elif name == "GeminiPlanner":
+        from .planning import GeminiPlanner
+
+        return GeminiPlanner
+    elif name == "ProblemAnalyzer":
+        from .analyzer import ProblemAnalyzer
+
+        return ProblemAnalyzer
+    elif name == "ClaudeCodeBridge":
+        from .claude_bridge import ClaudeCodeBridge
+
+        return ClaudeCodeBridge
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+__all__ = [
+    "DevLoopOrchestrator",
+    "LoopPhase",
+    "LoopState",
+    "DashboardTestRunner",
+    "TestScenario",
+    "GeminiPlanner",
+    "ProblemAnalyzer",
+]
--- a/optimization_engine/devloop/analyzer.py
+++ b/optimization_engine/devloop/analyzer.py
@@ -0,0 +1,421 @@
+"""
+Problem Analyzer - Analyze test results and generate fix plans using Gemini.
+
+Handles:
+- Root cause analysis from test failures
+- Pattern detection across failures
+- Fix plan generation
+- Priority assessment
+"""
+
+import asyncio
+import json
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class Issue:
+    """A detected issue from test results."""
+
+    id: str
+    description: str
+    severity: str = "medium"  # "critical", "high", "medium", "low"
+    category: str = "unknown"
+    affected_files: List[str] = field(default_factory=list)
+    test_ids: List[str] = field(default_factory=list)
+    root_cause: Optional[str] = None
+
+
+@dataclass
+class FixPlan:
+    """Plan for fixing an issue."""
+
+    issue_id: str
+    approach: str
+    steps: List[Dict] = field(default_factory=list)
+    estimated_effort: str = "medium"
+    rollback_steps: List[str] = field(default_factory=list)
+
+
+@dataclass
+class AnalysisReport:
+    """Complete analysis report."""
+
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    issues_found: bool = False
+    issues: List[Issue] = field(default_factory=list)
+    fix_plans: Dict[str, FixPlan] = field(default_factory=dict)
+    patterns: List[Dict] = field(default_factory=list)
+    recommendations: List[str] = field(default_factory=list)
+
+
+class ProblemAnalyzer:
+    """
+    Gemini-powered analysis of test failures and improvement opportunities.
+
+    Capabilities:
+    - Deep analysis of test results
+    - Root cause identification
+    - Pattern detection across failures
+    - Fix plan generation with priority
+    """
+
+    def __init__(self, gemini_planner: Optional[Any] = None):
+        """
+        Initialize the analyzer.
+
+        Args:
+            gemini_planner: GeminiPlanner instance for API access
+        """
+        self._planner = gemini_planner
+        self._history: List[AnalysisReport] = []
+
+    @property
+    def planner(self):
+        """Get or create Gemini planner."""
+        if self._planner is None:
+            from .planning import GeminiPlanner
+
+            self._planner = GeminiPlanner()
+        return self._planner
+
+    async def analyze_test_results(self, test_report: Dict) -> Dict:
+        """
+        Perform deep analysis of test results.
+
+        Args:
+            test_report: Test report from DashboardTestRunner
+
+        Returns:
+            Analysis dict with issues, fix_plans, patterns
+        """
+        summary = test_report.get("summary", {})
+        scenarios = test_report.get("scenarios", [])
+
+        # Quick return if all passed
+        if summary.get("failed", 0) == 0:
+            return {
+                "issues_found": False,
+                "issues": [],
+                "fix_plans": {},
+                "patterns": [],
+                "recommendations": ["All tests passed!"],
+            }
+
+        # Analyze failures
+        failures = [s for s in scenarios if not s.get("passed", True)]
+
+        # Use Gemini for deep analysis if available
+        if self.planner.client != "mock":
+            return await self._gemini_analysis(test_report, failures)
+        else:
+            return self._rule_based_analysis(test_report, failures)
+
+    async def _gemini_analysis(self, test_report: Dict, failures: List[Dict]) -> Dict:
+        """Use Gemini for sophisticated analysis."""
+        prompt = self._build_analysis_prompt(test_report, failures)
+
+        try:
+            loop = asyncio.get_event_loop()
+            response = await loop.run_in_executor(
+                None, lambda: self.planner._model.generate_content(prompt)
+            )
+
+            text = response.text
+
+            # Parse JSON from response
+            if "```json" in text:
+                start = text.find("```json") + 7
+                end = text.find("```", start)
+                json_str = text[start:end].strip()
+                analysis = json.loads(json_str)
+            else:
+                analysis = self._rule_based_analysis(test_report, failures)
+
+            logger.info(f"Gemini analysis found {len(analysis.get('issues', []))} issues")
+            return analysis
+
+        except Exception as e:
+            logger.error(f"Gemini analysis failed: {e}, falling back to rule-based")
+            return self._rule_based_analysis(test_report, failures)
+
+    def _build_analysis_prompt(self, test_report: Dict, failures: List[Dict]) -> str:
+        """Build analysis prompt for Gemini."""
+        return f"""## Test Failure Analysis
+
+### Test Report Summary
+- Total Tests: {test_report.get("summary", {}).get("total", 0)}
+- Passed: {test_report.get("summary", {}).get("passed", 0)}
+- Failed: {test_report.get("summary", {}).get("failed", 0)}
+
+### Failed Tests
+{json.dumps(failures, indent=2)}
+
+### Analysis Required
+
+Analyze these test failures and provide:
+
+1. **Root Cause Analysis**: What caused each failure?
+2. **Pattern Detection**: Are there recurring issues?
+3. **Fix Priority**: Which issues should be addressed first?
+4. **Implementation Plan**: Specific code changes needed
+
+Output as JSON:
+```json
+{{
+  "issues_found": true,
+  "issues": [
+    {{
+      "id": "issue_001",
+      "description": "What went wrong",
+      "severity": "high|medium|low",
+      "category": "api|ui|config|filesystem|logic",
+      "affected_files": ["path/to/file.py"],
+      "test_ids": ["test_001"],
+      "root_cause": "Why it happened"
+    }}
+  ],
+  "fix_plans": {{
+    "issue_001": {{
+      "issue_id": "issue_001",
+      "approach": "How to fix it",
+      "steps": [
+        {{"action": "edit", "file": "path/to/file.py", "description": "Change X to Y"}}
+      ],
+      "estimated_effort": "low|medium|high",
+      "rollback_steps": ["How to undo if needed"]
+    }}
+  }},
+  "patterns": [
+    {{"pattern": "Common issue type", "occurrences": 3, "suggestion": "Systemic fix"}}
+  ],
+  "recommendations": [
+    "High-level improvement suggestions"
+  ]
+}}
+```
+
+Focus on actionable, specific fixes that Claude Code can implement.
+"""
+
+    def _rule_based_analysis(self, test_report: Dict, failures: List[Dict]) -> Dict:
+        """Rule-based analysis when Gemini is not available."""
+        issues = []
+        fix_plans = {}
+        patterns = []
+
+        # Categorize failures
+        api_failures = []
+        filesystem_failures = []
+        browser_failures = []
+        cli_failures = []
+
+        for failure in failures:
+            scenario_id = failure.get("scenario_id", "unknown")
+            error = failure.get("error", "")
+            details = failure.get("details", {})
+
+            # Detect issue type
+            if "api" in scenario_id.lower() or "status_code" in details:
+                api_failures.append(failure)
+            elif "filesystem" in scenario_id.lower() or "exists" in details:
+                filesystem_failures.append(failure)
+            elif "browser" in scenario_id.lower():
+                browser_failures.append(failure)
+            elif "cli" in scenario_id.lower() or "command" in details:
+                cli_failures.append(failure)
+
+        # Generate issues for API failures
+        for i, failure in enumerate(api_failures):
+            issue_id = f"api_issue_{i + 1}"
+            status = failure.get("details", {}).get("status_code", "unknown")
+
+            issues.append(
+                {
+                    "id": issue_id,
+                    "description": f"API request failed with status {status}",
+                    "severity": "high" if status in [500, 503] else "medium",
+                    "category": "api",
+                    "affected_files": self._guess_api_files(failure),
+                    "test_ids": [failure.get("scenario_id")],
+                    "root_cause": failure.get("error", "Unknown API error"),
+                }
+            )
+
+            fix_plans[issue_id] = {
+                "issue_id": issue_id,
+                "approach": "Check API endpoint implementation",
+                "steps": [
+                    {"action": "check", "description": "Verify endpoint exists in routes"},
+                    {"action": "test", "description": "Run endpoint manually with curl"},
+                ],
+                "estimated_effort": "medium",
+                "rollback_steps": [],
+            }
+
+        # Generate issues for filesystem failures
+        for i, failure in enumerate(filesystem_failures):
+            issue_id = f"fs_issue_{i + 1}"
+            path = failure.get("details", {}).get("path", "unknown path")
+
+            issues.append(
+                {
+                    "id": issue_id,
+                    "description": f"Expected file/directory not found: {path}",
+                    "severity": "high",
+                    "category": "filesystem",
+                    "affected_files": [path],
+                    "test_ids": [failure.get("scenario_id")],
+                    "root_cause": "File was not created during implementation",
+                }
+            )
+
+            fix_plans[issue_id] = {
+                "issue_id": issue_id,
+                "approach": "Create missing file/directory",
+                "steps": [
+                    {"action": "create", "path": path, "description": f"Create {path}"},
+                ],
+                "estimated_effort": "low",
+                "rollback_steps": [f"Remove {path}"],
+            }
+
+        # Detect patterns
+        if len(api_failures) > 1:
+            patterns.append(
+                {
+                    "pattern": "Multiple API failures",
+                    "occurrences": len(api_failures),
+                    "suggestion": "Check if backend server is running",
+                }
+            )
+
+        if len(filesystem_failures) > 1:
+            patterns.append(
+                {
+                    "pattern": "Multiple missing files",
+                    "occurrences": len(filesystem_failures),
+                    "suggestion": "Review study creation process",
+                }
+            )
+
+        # Generate recommendations
+        recommendations = []
+        if api_failures:
+            recommendations.append("Verify backend API is running on port 8000")
+        if filesystem_failures:
+            recommendations.append("Check that study directory structure is correctly created")
+        if browser_failures:
+            recommendations.append("Ensure frontend is running on port 3000")
+        if cli_failures:
+            recommendations.append("Check Python environment and script paths")
+
+        return {
+            "issues_found": len(issues) > 0,
+            "issues": issues,
+            "fix_plans": fix_plans,
+            "patterns": patterns,
+            "recommendations": recommendations,
+        }
+
+    def _guess_api_files(self, failure: Dict) -> List[str]:
+        """Guess which API files might be affected."""
+        endpoint = failure.get("details", {}).get("response", {})
+
+        # Common API file patterns
+        return [
+            "atomizer-dashboard/backend/api/routes/",
+            "atomizer-dashboard/backend/api/services/",
+        ]
+
+    async def analyze_iteration_history(self, iterations: List[Dict]) -> Dict:
+        """
+        Analyze patterns across multiple iterations.
+
+        Args:
+            iterations: List of IterationResult dicts
+
+        Returns:
+            Cross-iteration analysis
+        """
+        recurring_issues = {}
+        success_rate = 0
+
+        for iteration in iterations:
+            if iteration.get("success"):
+                success_rate += 1
+
+            # Track recurring issues
+            analysis = iteration.get("analysis", {})
+            for issue in analysis.get("issues", []):
+                issue_type = issue.get("category", "unknown")
+                if issue_type not in recurring_issues:
+                    recurring_issues[issue_type] = 0
+                recurring_issues[issue_type] += 1
+
+        total = len(iterations) or 1
+
+        return {
+            "total_iterations": len(iterations),
+            "success_rate": success_rate / total,
+            "recurring_issues": recurring_issues,
+            "most_common_issue": max(recurring_issues, key=recurring_issues.get)
+            if recurring_issues
+            else None,
+            "recommendation": self._generate_meta_recommendation(
+                recurring_issues, success_rate / total
+            ),
+        }
+
+    def _generate_meta_recommendation(self, recurring_issues: Dict, success_rate: float) -> str:
+        """Generate high-level recommendation based on iteration history."""
+        if success_rate >= 0.8:
+            return "Development cycle is healthy. Minor issues detected."
+        elif success_rate >= 0.5:
+            most_common = (
+                max(recurring_issues, key=recurring_issues.get) if recurring_issues else "unknown"
+            )
+            return f"Focus on fixing {most_common} issues to improve success rate."
+        else:
+            return (
+                "Development cycle needs attention. Consider reviewing architecture or test design."
+            )
+
+    def get_priority_queue(self, analysis: Dict) -> List[Dict]:
+        """
+        Get issues sorted by priority for fixing.
+
+        Args:
+            analysis: Analysis result dict
+
+        Returns:
+            Sorted list of issues with their fix plans
+        """
+        issues = analysis.get("issues", [])
+        fix_plans = analysis.get("fix_plans", {})
+
+        # Priority order
+        severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
+
+        # Sort by severity
+        sorted_issues = sorted(
+            issues, key=lambda x: severity_order.get(x.get("severity", "medium"), 2)
+        )
+
+        # Attach fix plans
+        queue = []
+        for issue in sorted_issues:
+            issue_id = issue.get("id")
+            queue.append(
+                {
+                    "issue": issue,
+                    "fix_plan": fix_plans.get(issue_id),
+                }
+            )
+
+        return queue
--- a/optimization_engine/devloop/browser_scenarios.py
+++ b/optimization_engine/devloop/browser_scenarios.py
@@ -0,0 +1,170 @@
+"""
+Browser Test Scenarios for DevLoop
+Pre-built Playwright scenarios that can be used for dashboard verification.
+
+These scenarios use the same structure as DashboardTestRunner browser tests
+but provide ready-made tests for common dashboard operations.
+"""
+
+from typing import Dict, List
+
+
+def get_study_browser_scenarios(study_name: str) -> List[Dict]:
+    """
+    Get browser test scenarios for a specific study.
+
+    Args:
+        study_name: The study to test
+
+    Returns:
+        List of browser test scenarios
+    """
+    return [
+        {
+            "id": "browser_home_loads",
+            "name": "Home page loads with studies",
+            "type": "browser",
+            "steps": [
+                {"action": "navigate", "url": "/"},
+                {"action": "wait_for", "selector": "text=Studies"},
+                {"action": "wait_for", "selector": "button:has-text('trials')"},
+            ],
+            "expected_outcome": {"status": "pass"},
+            "timeout_ms": 15000,
+        },
+        {
+            "id": "browser_canvas_loads",
+            "name": f"Canvas loads for {study_name}",
+            "type": "browser",
+            "steps": [
+                {"action": "navigate", "url": f"/canvas/{study_name}"},
+                # Wait for ReactFlow nodes to render
+                {"action": "wait_for", "selector": ".react-flow__node"},
+            ],
+            "expected_outcome": {"status": "pass"},
+            "timeout_ms": 20000,
+        },
+        {
+            "id": "browser_dashboard_loads",
+            "name": f"Dashboard loads for {study_name}",
+            "type": "browser",
+            "steps": [
+                {"action": "navigate", "url": f"/dashboard"},
+                # Wait for dashboard main element to load
+                {"action": "wait_for", "selector": "main"},
+            ],
+            "expected_outcome": {"status": "pass"},
+            "timeout_ms": 15000,
+        },
+    ]
+
+
+def get_ui_verification_scenarios() -> List[Dict]:
+    """
+    Get scenarios for verifying UI components.
+
+    These are general UI health checks, not study-specific.
+    """
+    return [
+        {
+            "id": "browser_home_stats",
+            "name": "Home page shows statistics",
+            "type": "browser",
+            "steps": [
+                {"action": "navigate", "url": "/"},
+                {"action": "wait_for", "selector": "text=Total Studies"},
+                {"action": "wait_for", "selector": "text=Running"},
+                {"action": "wait_for", "selector": "text=Total Trials"},
+            ],
+            "expected_outcome": {"status": "pass"},
+            "timeout_ms": 10000,
+        },
+        {
+            "id": "browser_expand_folder",
+            "name": "Topic folder expands on click",
+            "type": "browser",
+            "steps": [
+                {"action": "navigate", "url": "/"},
+                {"action": "wait_for", "selector": "button:has-text('trials')"},
+                {"action": "click", "selector": "button:has-text('trials')"},
+                # After click, should see study status badges
+                {
+                    "action": "wait_for",
+                    "selector": "span:has-text('completed'), span:has-text('running'), span:has-text('paused')",
+                },
+            ],
+            "expected_outcome": {"status": "pass"},
+            "timeout_ms": 10000,
+        },
+    ]
+
+
+def get_chat_verification_scenarios() -> List[Dict]:
+    """
+    Get scenarios for verifying chat/Claude integration.
+    """
+    return [
+        {
+            "id": "browser_chat_panel",
+            "name": "Chat panel opens",
+            "type": "browser",
+            "steps": [
+                {"action": "navigate", "url": "/canvas/support_arm"},
+                {"action": "wait_for", "selector": ".react-flow__node"},
+                # Look for chat toggle or chat panel
+                {
+                    "action": "click",
+                    "selector": "button[aria-label='Chat'], button:has-text('Chat')",
+                },
+                {"action": "wait_for", "selector": "textarea, input[type='text']"},
+            ],
+            "expected_outcome": {"status": "pass"},
+            "timeout_ms": 15000,
+        },
+    ]
+
+
+# Standard scenario sets
+STANDARD_BROWSER_SCENARIOS: Dict[str, List[Dict]] = {
+    "quick": [
+        {
+            "id": "browser_smoke",
+            "name": "Dashboard smoke test",
+            "type": "browser",
+            "steps": [
+                {"action": "navigate", "url": "/"},
+                {"action": "wait_for", "selector": "text=Studies"},
+            ],
+            "expected_outcome": {"status": "pass"},
+            "timeout_ms": 10000,
+        }
+    ],
+    "home": get_ui_verification_scenarios(),
+    "full": get_ui_verification_scenarios() + get_study_browser_scenarios("support_arm"),
+}
+
+
+def get_browser_scenarios(level: str = "quick", study_name: str = None) -> List[Dict]:
+    """
+    Get browser scenarios by level.
+
+    Args:
+        level: "quick" (smoke), "home" (home page), "full" (all scenarios)
+        study_name: Optional study name for study-specific tests
+
+    Returns:
+        List of browser test scenarios
+    """
+    if level == "quick":
+        return STANDARD_BROWSER_SCENARIOS["quick"]
+    elif level == "home":
+        return STANDARD_BROWSER_SCENARIOS["home"]
+    elif level == "full":
+        scenarios = list(STANDARD_BROWSER_SCENARIOS["full"])
+        if study_name:
+            scenarios.extend(get_study_browser_scenarios(study_name))
+        return scenarios
+    elif level == "study" and study_name:
+        return get_study_browser_scenarios(study_name)
+    else:
+        return STANDARD_BROWSER_SCENARIOS["quick"]
--- a/optimization_engine/devloop/claude_bridge.py
+++ b/optimization_engine/devloop/claude_bridge.py
@@ -0,0 +1,392 @@
+"""
+Claude Code Bridge - Interface between DevLoop and Claude Code execution.
+
+Handles:
+- Translating Gemini plans into Claude Code instructions
+- Executing code changes through OpenCode extension or CLI
+- Capturing implementation results
+"""
+
+import asyncio
+import json
+import logging
+import os
+import subprocess
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ImplementationResult:
+    """Result of a Claude Code implementation."""
+
+    status: str  # "success", "partial", "error"
+    files_modified: List[str]
+    warnings: List[str]
+    errors: List[str]
+    duration_seconds: float
+
+
+class ClaudeCodeBridge:
+    """
+    Bridge between Gemini plans and Claude Code execution.
+
+    Supports multiple execution modes:
+    - CLI: Direct Claude Code CLI invocation
+    - API: Anthropic API for code generation (if API key available)
+    - Manual: Generate instructions for human execution
+    """
+
+    def __init__(self, config: Optional[Dict] = None):
+        """
+        Initialize the bridge.
+
+        Args:
+            config: Configuration with execution mode and API settings
+        """
+        self.config = config or {}
+        self.workspace = Path(self.config.get("workspace", "C:/Users/antoi/Atomizer"))
+        self.execution_mode = self.config.get("mode", "cli")
+        self._client = None
+
+    @property
+    def client(self):
+        """Lazy-load Anthropic client if API mode."""
+        if self._client is None and self.execution_mode == "api":
+            try:
+                import anthropic
+
+                api_key = self.config.get("api_key") or os.environ.get("ANTHROPIC_API_KEY")
+                if api_key:
+                    self._client = anthropic.Anthropic(api_key=api_key)
+                    logger.info("Anthropic client initialized")
+            except ImportError:
+                logger.warning("anthropic package not installed")
+        return self._client
+
+    def create_implementation_session(self, plan: Dict) -> str:
+        """
+        Generate Claude Code instruction from Gemini plan.
+
+        Args:
+            plan: Plan dict from GeminiPlanner
+
+        Returns:
+            Formatted instruction string for Claude Code
+        """
+        objective = plan.get("objective", "Unknown objective")
+        approach = plan.get("approach", "")
+        tasks = plan.get("tasks", [])
+        acceptance_criteria = plan.get("acceptance_criteria", [])
+
+        instruction = f"""## Implementation Task: {objective}
+
+### Approach
+{approach}
+
+### Tasks to Complete
+"""
+
+        for i, task in enumerate(tasks, 1):
+            instruction += f"""
+{i}. **{task.get("description", "Task")}**
+   - File: `{task.get("file", "TBD")}`
+   - Priority: {task.get("priority", "medium")}
+"""
+            if task.get("code_hint"):
+                instruction += f"   - Hint: {task.get('code_hint')}\n"
+            if task.get("dependencies"):
+                instruction += f"   - Depends on: {', '.join(task['dependencies'])}\n"
+
+        instruction += """
+### Acceptance Criteria
+"""
+        for criterion in acceptance_criteria:
+            instruction += f"- [ ] {criterion}\n"
+
+        instruction += """
+### Constraints
+- Maintain existing API contracts
+- Follow Atomizer coding standards
+- Ensure AtomizerSpec v2.0 compatibility
+- Create README.md for any new study
+- Use existing extractors from SYS_12 when possible
+"""
+
+        return instruction
+
+    async def execute_plan(self, plan: Dict) -> Dict:
+        """
+        Execute an implementation plan.
+
+        Args:
+            plan: Plan dict from GeminiPlanner
+
+        Returns:
+            Implementation result dict
+        """
+        instruction = self.create_implementation_session(plan)
+
+        if self.execution_mode == "cli":
+            return await self._execute_via_cli(instruction, plan)
+        elif self.execution_mode == "api":
+            return await self._execute_via_api(instruction, plan)
+        else:
+            return await self._execute_manual(instruction, plan)
+
+    async def _execute_via_cli(self, instruction: str, plan: Dict) -> Dict:
+        """Execute through Claude Code CLI."""
+        start_time = datetime.now()
+
+        # Write instruction to temp file
+        instruction_file = self.workspace / ".devloop_instruction.md"
+        instruction_file.write_text(instruction)
+
+        files_modified = []
+        warnings = []
+        errors = []
+
+        try:
+            # Try to invoke Claude Code CLI
+            # Note: This assumes claude-code or similar CLI is available
+            result = subprocess.run(
+                [
+                    "powershell",
+                    "-Command",
+                    f"cd {self.workspace}; claude --print '{instruction_file}'",
+                ],
+                capture_output=True,
+                text=True,
+                timeout=300,  # 5 minute timeout
+                cwd=str(self.workspace),
+            )
+
+            if result.returncode == 0:
+                # Parse output for modified files
+                output = result.stdout
+                for line in output.split("\n"):
+                    if "Modified:" in line or "Created:" in line:
+                        parts = line.split(":", 1)
+                        if len(parts) > 1:
+                            files_modified.append(parts[1].strip())
+
+                status = "success"
+            else:
+                errors.append(result.stderr or "CLI execution failed")
+                status = "error"
+
+        except subprocess.TimeoutExpired:
+            errors.append("CLI execution timed out after 5 minutes")
+            status = "error"
+        except FileNotFoundError:
+            # Claude CLI not found, fall back to manual mode
+            logger.warning("Claude CLI not found, switching to manual mode")
+            return await self._execute_manual(instruction, plan)
+        except Exception as e:
+            errors.append(str(e))
+            status = "error"
+        finally:
+            # Clean up temp file
+            if instruction_file.exists():
+                instruction_file.unlink()
+
+        duration = (datetime.now() - start_time).total_seconds()
+
+        return {
+            "status": status,
+            "files": files_modified,
+            "warnings": warnings,
+            "errors": errors,
+            "duration_seconds": duration,
+        }
+
+    async def _execute_via_api(self, instruction: str, plan: Dict) -> Dict:
+        """Execute through Anthropic API for code generation."""
+        if not self.client:
+            return await self._execute_manual(instruction, plan)
+
+        start_time = datetime.now()
+        files_modified = []
+        warnings = []
+        errors = []
+
+        try:
+            # Use Claude API for code generation
+            response = self.client.messages.create(
+                model="claude-sonnet-4-20250514",
+                max_tokens=8192,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": f"""You are implementing code for the Atomizer FEA optimization framework.
+
+{instruction}
+
+For each file that needs to be created or modified, output the complete file content in this format:
+
+### FILE: path/to/file.py
+```python
+# file content here
+```
+
+Be thorough and implement all tasks completely.
+""",
+                    }
+                ],
+            )
+
+            # Parse response for file contents
+            content = response.content[0].text
+
+            # Extract files from response
+            import re
+
+            file_pattern = r"### FILE: (.+?)\n```\w*\n(.*?)```"
+            matches = re.findall(file_pattern, content, re.DOTALL)
+
+            for file_path, file_content in matches:
+                try:
+                    full_path = self.workspace / file_path.strip()
+                    full_path.parent.mkdir(parents=True, exist_ok=True)
+                    full_path.write_text(file_content.strip())
+                    files_modified.append(str(file_path.strip()))
+                    logger.info(f"Created/modified: {file_path}")
+                except Exception as e:
+                    errors.append(f"Failed to write {file_path}: {e}")
+
+            status = "success" if files_modified else "partial"
+
+        except Exception as e:
+            errors.append(str(e))
+            status = "error"
+
+        duration = (datetime.now() - start_time).total_seconds()
+
+        return {
+            "status": status,
+            "files": files_modified,
+            "warnings": warnings,
+            "errors": errors,
+            "duration_seconds": duration,
+        }
+
+    async def _execute_manual(self, instruction: str, plan: Dict) -> Dict:
+        """
+        Generate manual instructions (when automation not available).
+
+        Saves instruction to file for human execution.
+        """
+        start_time = datetime.now()
+
+        # Save instruction for manual execution
+        output_file = self.workspace / ".devloop" / "pending_instruction.md"
+        output_file.parent.mkdir(parents=True, exist_ok=True)
+        output_file.write_text(instruction)
+
+        logger.info(f"Manual instruction saved to: {output_file}")
+
+        return {
+            "status": "pending_manual",
+            "instruction_file": str(output_file),
+            "files": [],
+            "warnings": ["Automated execution not available. Please execute manually."],
+            "errors": [],
+            "duration_seconds": (datetime.now() - start_time).total_seconds(),
+        }
+
+    async def execute_fix(self, fix_plan: Dict) -> Dict:
+        """
+        Execute a specific fix from analysis.
+
+        Args:
+            fix_plan: Fix plan dict from ProblemAnalyzer
+
+        Returns:
+            Fix result dict
+        """
+        issue_id = fix_plan.get("issue_id", "unknown")
+        approach = fix_plan.get("approach", "")
+        steps = fix_plan.get("steps", [])
+
+        instruction = f"""## Bug Fix: {issue_id}
+
+### Approach
+{approach}
+
+### Steps
+"""
+        for i, step in enumerate(steps, 1):
+            instruction += f"{i}. {step.get('description', step.get('action', 'Step'))}\n"
+            if step.get("file"):
+                instruction += f"   File: `{step['file']}`\n"
+
+        instruction += """
+### Verification
+After implementing the fix, verify that:
+1. The specific test case passes
+2. No regressions are introduced
+3. Code follows Atomizer patterns
+"""
+
+        # Execute as a mini-plan
+        return await self.execute_plan(
+            {
+                "objective": f"Fix: {issue_id}",
+                "approach": approach,
+                "tasks": [
+                    {
+                        "description": step.get("description", step.get("action")),
+                        "file": step.get("file"),
+                        "priority": "high",
+                    }
+                    for step in steps
+                ],
+                "acceptance_criteria": [
+                    "Original test passes",
+                    "No new errors introduced",
+                ],
+            }
+        )
+
+    def get_execution_status(self) -> Dict:
+        """Get current execution status."""
+        pending_file = self.workspace / ".devloop" / "pending_instruction.md"
+
+        return {
+            "mode": self.execution_mode,
+            "workspace": str(self.workspace),
+            "has_pending_instruction": pending_file.exists(),
+            "api_available": self.client is not None,
+        }
+
+    async def verify_implementation(self, expected_files: List[str]) -> Dict:
+        """
+        Verify that implementation created expected files.
+
+        Args:
+            expected_files: List of file paths that should exist
+
+        Returns:
+            Verification result
+        """
+        missing = []
+        found = []
+
+        for file_path in expected_files:
+            path = (
+                self.workspace / file_path if not Path(file_path).is_absolute() else Path(file_path)
+            )
+            if path.exists():
+                found.append(str(file_path))
+            else:
+                missing.append(str(file_path))
+
+        return {
+            "complete": len(missing) == 0,
+            "found": found,
+            "missing": missing,
+        }
--- a/optimization_engine/devloop/cli_bridge.py
+++ b/optimization_engine/devloop/cli_bridge.py
@@ -0,0 +1,652 @@
+"""
+CLI Bridge - Execute AI tasks through Claude Code CLI and OpenCode CLI.
+
+Uses your existing subscriptions via CLI tools:
+- Claude Code CLI (claude.exe) for implementation
+- OpenCode CLI (opencode) for Gemini planning
+
+No API keys needed - leverages your CLI subscriptions.
+"""
+
+import asyncio
+import json
+import logging
+import os
+import subprocess
+import tempfile
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+import re
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class CLIResult:
+    """Result from CLI execution."""
+
+    success: bool
+    output: str
+    error: str
+    duration_seconds: float
+    files_modified: List[str]
+
+
+class ClaudeCodeCLI:
+    """
+    Execute tasks through Claude Code CLI.
+
+    Uses: claude.exe --print for non-interactive execution
+    """
+
+    CLAUDE_PATH = r"C:\Users\antoi\.local\bin\claude.exe"
+
+    def __init__(self, workspace: Path):
+        self.workspace = workspace
+
+    async def execute(
+        self,
+        prompt: str,
+        timeout: int = 300,
+        model: str = "opus",
+    ) -> CLIResult:
+        """
+        Execute a prompt through Claude Code CLI.
+
+        Args:
+            prompt: The instruction/prompt to execute
+            timeout: Timeout in seconds
+            model: Model to use (opus, sonnet, haiku)
+
+        Returns:
+            CLIResult with output and modified files
+        """
+        start_time = datetime.now()
+
+        # Build command
+        cmd = [
+            self.CLAUDE_PATH,
+            "--print",  # Non-interactive mode
+            "--model",
+            model,
+            "--permission-mode",
+            "acceptEdits",  # Auto-accept edits
+            prompt,
+        ]
+
+        logger.info(f"Executing Claude Code CLI: {prompt[:100]}...")
+
+        try:
+            # Run in workspace directory
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+                cwd=str(self.workspace),
+                env={**os.environ, "TERM": "dumb"},  # Disable colors
+            )
+
+            output = result.stdout
+            error = result.stderr
+            success = result.returncode == 0
+
+            # Extract modified files from output
+            files_modified = self._extract_modified_files(output)
+
+            duration = (datetime.now() - start_time).total_seconds()
+
+            logger.info(
+                f"Claude Code completed in {duration:.1f}s, modified {len(files_modified)} files"
+            )
+
+            return CLIResult(
+                success=success,
+                output=output,
+                error=error,
+                duration_seconds=duration,
+                files_modified=files_modified,
+            )
+
+        except subprocess.TimeoutExpired:
+            return CLIResult(
+                success=False,
+                output="",
+                error=f"Timeout after {timeout}s",
+                duration_seconds=timeout,
+                files_modified=[],
+            )
+        except Exception as e:
+            return CLIResult(
+                success=False,
+                output="",
+                error=str(e),
+                duration_seconds=(datetime.now() - start_time).total_seconds(),
+                files_modified=[],
+            )
+
+    def _extract_modified_files(self, output: str) -> List[str]:
+        """Extract list of modified files from Claude Code output."""
+        files = []
+
+        # Look for file modification patterns
+        patterns = [
+            r"(?:Created|Modified|Wrote|Updated|Edited):\s*[`'\"]?([^\s`'\"]+)[`'\"]?",
+            r"Writing to [`'\"]?([^\s`'\"]+)[`'\"]?",
+            r"File saved: ([^\s]+)",
+        ]
+
+        for pattern in patterns:
+            matches = re.findall(pattern, output, re.IGNORECASE)
+            files.extend(matches)
+
+        return list(set(files))
+
+    async def execute_with_context(
+        self,
+        prompt: str,
+        context_files: List[str],
+        timeout: int = 300,
+    ) -> CLIResult:
+        """
+        Execute with additional context files loaded.
+
+        Args:
+            prompt: The instruction
+            context_files: Files to read as context
+            timeout: Timeout in seconds
+        """
+        # Build prompt with context
+        context_prompt = prompt
+
+        if context_files:
+            context_prompt += "\n\nContext files to consider:\n"
+            for f in context_files:
+                context_prompt += f"- {f}\n"
+
+        return await self.execute(context_prompt, timeout)
+
+
+class OpenCodeCLI:
+    """
+    Execute tasks through OpenCode CLI (Gemini).
+
+    Uses: opencode run for non-interactive execution
+    """
+
+    OPENCODE_PATH = r"C:\Users\antoi\AppData\Roaming\npm\opencode.cmd"
+
+    def __init__(self, workspace: Path):
+        self.workspace = workspace
+
+    async def execute(
+        self,
+        prompt: str,
+        timeout: int = 180,
+        model: str = "google/gemini-3-pro-preview",
+    ) -> CLIResult:
+        """
+        Execute a prompt through OpenCode CLI.
+
+        Args:
+            prompt: The instruction/prompt
+            timeout: Timeout in seconds
+            model: Model to use
+
+        Returns:
+            CLIResult with output
+        """
+        start_time = datetime.now()
+
+        # Build command
+        cmd = [self.OPENCODE_PATH, "run", "--model", model, prompt]
+
+        logger.info(f"Executing OpenCode CLI: {prompt[:100]}...")
+
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout,
+                cwd=str(self.workspace),
+                env={**os.environ, "TERM": "dumb"},
+            )
+
+            output = result.stdout
+            error = result.stderr
+            success = result.returncode == 0
+
+            duration = (datetime.now() - start_time).total_seconds()
+
+            logger.info(f"OpenCode completed in {duration:.1f}s")
+
+            return CLIResult(
+                success=success,
+                output=output,
+                error=error,
+                duration_seconds=duration,
+                files_modified=[],  # OpenCode typically doesn't modify files directly
+            )
+
+        except subprocess.TimeoutExpired:
+            return CLIResult(
+                success=False,
+                output="",
+                error=f"Timeout after {timeout}s",
+                duration_seconds=timeout,
+                files_modified=[],
+            )
+        except Exception as e:
+            return CLIResult(
+                success=False,
+                output="",
+                error=str(e),
+                duration_seconds=(datetime.now() - start_time).total_seconds(),
+                files_modified=[],
+            )
+
+    async def plan(self, objective: str, context: Dict = None) -> Dict:
+        """
+        Create an implementation plan using Gemini via OpenCode.
+
+        Args:
+            objective: What to achieve
+            context: Additional context
+
+        Returns:
+            Plan dict with tasks and test scenarios
+        """
+        prompt = f"""You are a strategic planner for Atomizer, an FEA optimization framework.
+
+## Objective
+{objective}
+
+## Context
+{json.dumps(context, indent=2) if context else "None provided"}
+
+## Task
+Create a detailed implementation plan in JSON format with:
+1. tasks: List of implementation tasks for Claude Code
+2. test_scenarios: Tests to verify implementation
+3. acceptance_criteria: Success conditions
+
+Output ONLY valid JSON in this format:
+```json
+{{
+  "objective": "{objective}",
+  "approach": "Brief description",
+  "tasks": [
+    {{
+      "id": "task_001",
+      "description": "What to do",
+      "file": "path/to/file.py",
+      "priority": "high"
+    }}
+  ],
+  "test_scenarios": [
+    {{
+      "id": "test_001",
+      "name": "Test name",
+      "type": "filesystem",
+      "steps": [{{"action": "check_exists", "path": "some/path"}}],
+      "expected_outcome": {{"exists": true}}
+    }}
+  ],
+  "acceptance_criteria": [
+    "Criterion 1"
+  ]
+}}
+```
+"""
+
+        result = await self.execute(prompt)
+
+        if not result.success:
+            logger.error(f"OpenCode planning failed: {result.error}")
+            return self._fallback_plan(objective, context)
+
+        # Parse JSON from output
+        try:
+            # Find JSON block in output
+            output = result.output
+
+            if "```json" in output:
+                start = output.find("```json") + 7
+                end = output.find("```", start)
+                json_str = output[start:end].strip()
+            elif "```" in output:
+                start = output.find("```") + 3
+                end = output.find("```", start)
+                json_str = output[start:end].strip()
+            else:
+                # Try to find JSON object directly
+                match = re.search(r"\{.*\}", output, re.DOTALL)
+                if match:
+                    json_str = match.group()
+                else:
+                    return self._fallback_plan(objective, context)
+
+            plan = json.loads(json_str)
+            logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
+            return plan
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse plan JSON: {e}")
+            return self._fallback_plan(objective, context)
+
+    def _fallback_plan(self, objective: str, context: Dict = None) -> Dict:
+        """Generate a fallback plan when Gemini fails."""
+        logger.warning("Using fallback plan")
+
+        return {
+            "objective": objective,
+            "approach": "Fallback plan - manual implementation",
+            "tasks": [
+                {
+                    "id": "task_001",
+                    "description": f"Implement: {objective}",
+                    "file": "TBD",
+                    "priority": "high",
+                }
+            ],
+            "test_scenarios": [],
+            "acceptance_criteria": [objective],
+        }
+
+    async def analyze(self, test_results: Dict) -> Dict:
+        """
+        Analyze test results using Gemini via OpenCode.
+
+        Args:
+            test_results: Test report from dashboard
+
+        Returns:
+            Analysis with issues and fix plans
+        """
+        summary = test_results.get("summary", {})
+        scenarios = test_results.get("scenarios", [])
+
+        if summary.get("failed", 0) == 0:
+            return {
+                "issues_found": False,
+                "issues": [],
+                "fix_plans": {},
+                "recommendations": ["All tests passed!"],
+            }
+
+        failures = [s for s in scenarios if not s.get("passed", True)]
+
+        prompt = f"""Analyze these test failures for Atomizer FEA optimization framework:
+
+## Test Summary
+- Total: {summary.get("total", 0)}
+- Passed: {summary.get("passed", 0)}  
+- Failed: {summary.get("failed", 0)}
+
+## Failed Tests
+{json.dumps(failures, indent=2)}
+
+## Task
+Provide root cause analysis and fix plans in JSON:
+
+```json
+{{
+  "issues_found": true,
+  "issues": [
+    {{
+      "id": "issue_001",
+      "description": "What went wrong",
+      "severity": "high",
+      "root_cause": "Why it failed"
+    }}
+  ],
+  "fix_plans": {{
+    "issue_001": {{
+      "approach": "How to fix",
+      "steps": [{{"action": "edit", "file": "path", "description": "change"}}]
+    }}
+  }},
+  "recommendations": ["suggestion"]
+}}
+```
+"""
+
+        result = await self.execute(prompt)
+
+        if not result.success:
+            return self._fallback_analysis(failures)
+
+        try:
+            output = result.output
+            if "```json" in output:
+                start = output.find("```json") + 7
+                end = output.find("```", start)
+                json_str = output[start:end].strip()
+            else:
+                match = re.search(r"\{.*\}", output, re.DOTALL)
+                json_str = match.group() if match else "{}"
+
+            return json.loads(json_str)
+
+        except:
+            return self._fallback_analysis(failures)
+
+    def _fallback_analysis(self, failures: List[Dict]) -> Dict:
+        """Generate fallback analysis."""
+        issues = []
+        fix_plans = {}
+
+        for i, failure in enumerate(failures):
+            issue_id = f"issue_{i + 1}"
+            issues.append(
+                {
+                    "id": issue_id,
+                    "description": failure.get("error", "Unknown error"),
+                    "severity": "medium",
+                    "root_cause": "Requires investigation",
+                }
+            )
+            fix_plans[issue_id] = {
+                "approach": "Manual investigation required",
+                "steps": [],
+            }
+
+        return {
+            "issues_found": len(issues) > 0,
+            "issues": issues,
+            "fix_plans": fix_plans,
+            "recommendations": ["Review failed tests manually"],
+        }
+
+
+class DevLoopCLIOrchestrator:
+    """
+    Orchestrate DevLoop using CLI tools.
+
+    - OpenCode (Gemini) for planning and analysis
+    - Claude Code for implementation and fixes
+    """
+
+    def __init__(self, workspace: Path = None):
+        self.workspace = workspace or Path("C:/Users/antoi/Atomizer")
+        self.claude = ClaudeCodeCLI(self.workspace)
+        self.opencode = OpenCodeCLI(self.workspace)
+        self.iteration = 0
+
+    async def run_cycle(
+        self,
+        objective: str,
+        context: Dict = None,
+        max_iterations: int = 5,
+    ) -> Dict:
+        """
+        Run a complete development cycle.
+
+        Args:
+            objective: What to achieve
+            context: Additional context
+            max_iterations: Maximum fix iterations
+
+        Returns:
+            Cycle report
+        """
+        from .test_runner import DashboardTestRunner
+
+        start_time = datetime.now()
+        results = {
+            "objective": objective,
+            "iterations": [],
+            "status": "in_progress",
+        }
+
+        logger.info(f"Starting DevLoop cycle: {objective}")
+
+        # Phase 1: Plan (Gemini via OpenCode)
+        logger.info("Phase 1: Planning with Gemini...")
+        plan = await self.opencode.plan(objective, context)
+
+        iteration = 0
+        while iteration < max_iterations:
+            iteration += 1
+            iter_result = {"iteration": iteration}
+
+            # Phase 2: Implement (Claude Code)
+            logger.info(f"Phase 2 (iter {iteration}): Implementing with Claude Code...")
+            impl_result = await self._implement(plan)
+            iter_result["implementation"] = {
+                "success": impl_result.success,
+                "files_modified": impl_result.files_modified,
+            }
+
+            # Phase 3: Test (Dashboard)
+            logger.info(f"Phase 3 (iter {iteration}): Testing...")
+            test_runner = DashboardTestRunner()
+            test_results = await test_runner.run_test_suite(plan.get("test_scenarios", []))
+            iter_result["test_results"] = test_results
+
+            # Check if all tests pass
+            summary = test_results.get("summary", {})
+            if summary.get("failed", 0) == 0:
+                logger.info("All tests passed!")
+                results["iterations"].append(iter_result)
+                results["status"] = "success"
+                break
+
+            # Phase 4: Analyze (Gemini via OpenCode)
+            logger.info(f"Phase 4 (iter {iteration}): Analyzing failures...")
+            analysis = await self.opencode.analyze(test_results)
+            iter_result["analysis"] = analysis
+
+            if not analysis.get("issues_found"):
+                results["status"] = "success"
+                results["iterations"].append(iter_result)
+                break
+
+            # Phase 5: Fix (Claude Code)
+            logger.info(f"Phase 5 (iter {iteration}): Fixing issues...")
+            fix_result = await self._fix(analysis)
+            iter_result["fixes"] = {
+                "success": fix_result.success,
+                "files_modified": fix_result.files_modified,
+            }
+
+            results["iterations"].append(iter_result)
+
+        if results["status"] == "in_progress":
+            results["status"] = "max_iterations_reached"
+
+        results["duration_seconds"] = (datetime.now() - start_time).total_seconds()
+
+        logger.info(f"DevLoop cycle completed: {results['status']}")
+
+        return results
+
+    async def _implement(self, plan: Dict) -> CLIResult:
+        """Implement the plan using Claude Code."""
+        tasks = plan.get("tasks", [])
+
+        if not tasks:
+            return CLIResult(
+                success=True,
+                output="No tasks to implement",
+                error="",
+                duration_seconds=0,
+                files_modified=[],
+            )
+
+        # Build implementation prompt
+        prompt = f"""Implement the following tasks for Atomizer:
+
+## Objective
+{plan.get("objective", "Unknown")}
+
+## Approach
+{plan.get("approach", "Follow best practices")}
+
+## Tasks
+"""
+        for task in tasks:
+            prompt += f"""
+### {task.get("id", "task")}: {task.get("description", "")}
+- File: {task.get("file", "TBD")}
+- Priority: {task.get("priority", "medium")}
+"""
+
+        prompt += """
+## Requirements
+- Follow Atomizer coding standards
+- Use AtomizerSpec v2.0 format
+- Create README.md for any new study
+- Use existing extractors from optimization_engine/extractors/
+"""
+
+        return await self.claude.execute(prompt, timeout=300)
+
+    async def _fix(self, analysis: Dict) -> CLIResult:
+        """Apply fixes using Claude Code."""
+        issues = analysis.get("issues", [])
+        fix_plans = analysis.get("fix_plans", {})
+
+        if not issues:
+            return CLIResult(
+                success=True,
+                output="No issues to fix",
+                error="",
+                duration_seconds=0,
+                files_modified=[],
+            )
+
+        # Build fix prompt
+        prompt = "Fix the following issues:\n\n"
+
+        for issue in issues:
+            issue_id = issue.get("id", "unknown")
+            prompt += f"""
+## Issue: {issue_id}
+- Description: {issue.get("description", "")}
+- Root Cause: {issue.get("root_cause", "Unknown")}
+- Severity: {issue.get("severity", "medium")}
+"""
+
+            fix_plan = fix_plans.get(issue_id, {})
+            if fix_plan:
+                prompt += f"- Fix Approach: {fix_plan.get('approach', 'Investigate')}\n"
+                for step in fix_plan.get("steps", []):
+                    prompt += f"  - {step.get('description', step.get('action', 'step'))}\n"
+
+        return await self.claude.execute(prompt, timeout=300)
+
+    async def step_plan(self, objective: str, context: Dict = None) -> Dict:
+        """Execute only the planning phase."""
+        return await self.opencode.plan(objective, context)
+
+    async def step_implement(self, plan: Dict) -> CLIResult:
+        """Execute only the implementation phase."""
+        return await self._implement(plan)
+
+    async def step_analyze(self, test_results: Dict) -> Dict:
+        """Execute only the analysis phase."""
+        return await self.opencode.analyze(test_results)
--- a/optimization_engine/devloop/orchestrator.py
+++ b/optimization_engine/devloop/orchestrator.py
@@ -0,0 +1,561 @@
+"""
+DevLoop Orchestrator - Master controller for closed-loop development.
+
+Coordinates:
+- Gemini Pro: Strategic planning, analysis, test design
+- Claude Code: Implementation, code changes, fixes
+- Dashboard: Automated testing, verification
+- LAC: Learning capture and retrieval
+"""
+
+import asyncio
+import json
+from dataclasses import dataclass, field
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Callable
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class LoopPhase(Enum):
+    """Current phase in the development loop."""
+
+    IDLE = "idle"
+    PLANNING = "planning"
+    IMPLEMENTING = "implementing"
+    TESTING = "testing"
+    ANALYZING = "analyzing"
+    FIXING = "fixing"
+    VERIFYING = "verifying"
+
+
+@dataclass
+class LoopState:
+    """Current state of the development loop."""
+
+    phase: LoopPhase = LoopPhase.IDLE
+    iteration: int = 0
+    current_task: Optional[str] = None
+    test_results: Optional[Dict] = None
+    analysis: Optional[Dict] = None
+    last_update: str = field(default_factory=lambda: datetime.now().isoformat())
+
+
+@dataclass
+class IterationResult:
+    """Result of a single development iteration."""
+
+    iteration: int
+    plan: Optional[Dict] = None
+    implementation: Optional[Dict] = None
+    test_results: Optional[Dict] = None
+    analysis: Optional[Dict] = None
+    fixes: Optional[List[Dict]] = None
+    verification: Optional[Dict] = None
+    success: bool = False
+    duration_seconds: float = 0.0
+
+
+@dataclass
+class CycleReport:
+    """Complete report for a development cycle."""
+
+    objective: str
+    start_time: str = field(default_factory=lambda: datetime.now().isoformat())
+    end_time: Optional[str] = None
+    iterations: List[IterationResult] = field(default_factory=list)
+    status: str = "in_progress"
+    total_duration_seconds: float = 0.0
+
+
+class DevLoopOrchestrator:
+    """
+    Autonomous development loop orchestrator.
+
+    Coordinates Gemini (planning) + Claude Code (implementation) + Dashboard (testing)
+    in a continuous improvement cycle.
+
+    Flow:
+    1. Gemini: Plan features/fixes
+    2. Claude Code: Implement
+    3. Dashboard: Test
+    4. Gemini: Analyze results
+    5. Claude Code: Fix issues
+    6. Dashboard: Verify
+    7. Loop back with learnings
+    """
+
+    def __init__(
+        self,
+        config: Optional[Dict] = None,
+        gemini_client: Optional[Any] = None,
+        claude_bridge: Optional[Any] = None,
+        dashboard_runner: Optional[Any] = None,
+    ):
+        """
+        Initialize the orchestrator.
+
+        Args:
+            config: Configuration dict with API keys and settings
+            gemini_client: Pre-configured Gemini client (optional)
+            claude_bridge: Pre-configured Claude Code bridge (optional)
+            dashboard_runner: Pre-configured Dashboard test runner (optional)
+        """
+        self.config = config or self._default_config()
+        self.state = LoopState()
+        self.subscribers: List[Callable] = []
+
+        # Initialize components lazily
+        self._gemini = gemini_client
+        self._claude_bridge = claude_bridge
+        self._dashboard = dashboard_runner
+        self._lac = None
+
+        # History for learning
+        self.cycle_history: List[CycleReport] = []
+
+    def _default_config(self) -> Dict:
+        """Default configuration."""
+        return {
+            "max_iterations": 10,
+            "auto_fix_threshold": "high",  # Only auto-fix high+ severity
+            "learning_enabled": True,
+            "dashboard_url": "http://localhost:3000",
+            "websocket_url": "ws://localhost:8000",
+            "test_timeout_ms": 30000,
+        }
+
+    @property
+    def gemini(self):
+        """Lazy-load Gemini planner."""
+        if self._gemini is None:
+            from .planning import GeminiPlanner
+
+            self._gemini = GeminiPlanner(self.config.get("gemini", {}))
+        return self._gemini
+
+    @property
+    def claude_bridge(self):
+        """Lazy-load Claude Code bridge."""
+        if self._claude_bridge is None:
+            from .claude_bridge import ClaudeCodeBridge
+
+            self._claude_bridge = ClaudeCodeBridge(self.config.get("claude", {}))
+        return self._claude_bridge
+
+    @property
+    def dashboard(self):
+        """Lazy-load Dashboard test runner."""
+        if self._dashboard is None:
+            from .test_runner import DashboardTestRunner
+
+            self._dashboard = DashboardTestRunner(self.config)
+        return self._dashboard
+
+    @property
+    def lac(self):
+        """Lazy-load LAC (Learning Atomizer Core)."""
+        if self._lac is None and self.config.get("learning_enabled", True):
+            try:
+                from knowledge_base.lac import get_lac
+
+                self._lac = get_lac()
+            except ImportError:
+                logger.warning("LAC not available, learning disabled")
+        return self._lac
+
+    def subscribe(self, callback: Callable[[LoopState], None]):
+        """Subscribe to state updates."""
+        self.subscribers.append(callback)
+
+    def unsubscribe(self, callback: Callable):
+        """Unsubscribe from state updates."""
+        if callback in self.subscribers:
+            self.subscribers.remove(callback)
+
+    def _notify_subscribers(self):
+        """Notify all subscribers of state change."""
+        self.state.last_update = datetime.now().isoformat()
+        for callback in self.subscribers:
+            try:
+                callback(self.state)
+            except Exception as e:
+                logger.error(f"Subscriber error: {e}")
+
+    def _update_state(self, phase: Optional[LoopPhase] = None, task: Optional[str] = None):
+        """Update state and notify subscribers."""
+        if phase:
+            self.state.phase = phase
+        if task:
+            self.state.current_task = task
+        self._notify_subscribers()
+
+    async def run_development_cycle(
+        self,
+        objective: str,
+        context: Optional[Dict] = None,
+        max_iterations: Optional[int] = None,
+    ) -> CycleReport:
+        """
+        Execute a complete development cycle.
+
+        Args:
+            objective: What to achieve (e.g., "Create support_arm optimization study")
+            context: Additional context (study spec, problem statement, etc.)
+            max_iterations: Override default max iterations
+
+        Returns:
+            CycleReport with all iteration results
+        """
+        max_iter = max_iterations or self.config.get("max_iterations", 10)
+
+        report = CycleReport(objective=objective)
+        start_time = datetime.now()
+
+        logger.info(f"Starting development cycle: {objective}")
+
+        try:
+            while not self._is_objective_complete(report) and len(report.iterations) < max_iter:
+                iteration_result = await self._run_iteration(objective, context)
+                report.iterations.append(iteration_result)
+
+                # Record learning from successful patterns
+                if iteration_result.success and self.lac:
+                    await self._record_learning(iteration_result)
+
+                # Check for max iterations
+                if len(report.iterations) >= max_iter:
+                    report.status = "max_iterations_reached"
+                    logger.warning(f"Max iterations ({max_iter}) reached")
+                    break
+
+        except Exception as e:
+            report.status = f"error: {str(e)}"
+            logger.error(f"Development cycle error: {e}")
+
+        report.end_time = datetime.now().isoformat()
+        report.total_duration_seconds = (datetime.now() - start_time).total_seconds()
+
+        if report.status == "in_progress":
+            report.status = "completed"
+
+        self.cycle_history.append(report)
+        self._update_state(LoopPhase.IDLE)
+
+        return report
+
+    def _is_objective_complete(self, report: CycleReport) -> bool:
+        """Check if the objective has been achieved."""
+        if not report.iterations:
+            return False
+
+        last_iter = report.iterations[-1]
+
+        # Success if last iteration passed all tests
+        if last_iter.success and last_iter.test_results:
+            tests = last_iter.test_results
+            if tests.get("summary", {}).get("failed", 0) == 0:
+                return True
+
+        return False
+
+    async def _run_iteration(self, objective: str, context: Optional[Dict]) -> IterationResult:
+        """Run a single iteration through all phases."""
+        start_time = datetime.now()
+        result = IterationResult(iteration=self.state.iteration)
+
+        try:
+            # Phase 1: Planning (Gemini)
+            self._update_state(LoopPhase.PLANNING, "Creating implementation plan")
+            result.plan = await self._planning_phase(objective, context)
+
+            # Phase 2: Implementation (Claude Code)
+            self._update_state(LoopPhase.IMPLEMENTING, "Implementing changes")
+            result.implementation = await self._implementation_phase(result.plan)
+
+            # Phase 3: Testing (Dashboard)
+            self._update_state(LoopPhase.TESTING, "Running tests")
+            result.test_results = await self._testing_phase(result.plan)
+            self.state.test_results = result.test_results
+
+            # Phase 4: Analysis (Gemini)
+            self._update_state(LoopPhase.ANALYZING, "Analyzing results")
+            result.analysis = await self._analysis_phase(result.test_results)
+            self.state.analysis = result.analysis
+
+            # Phases 5-6: Fix & Verify if needed
+            if result.analysis and result.analysis.get("issues_found"):
+                self._update_state(LoopPhase.FIXING, "Implementing fixes")
+                result.fixes = await self._fixing_phase(result.analysis)
+
+                self._update_state(LoopPhase.VERIFYING, "Verifying fixes")
+                result.verification = await self._verification_phase(result.fixes)
+                result.success = result.verification.get("all_passed", False)
+            else:
+                result.success = True
+
+        except Exception as e:
+            logger.error(f"Iteration {self.state.iteration} failed: {e}")
+            result.success = False
+
+        result.duration_seconds = (datetime.now() - start_time).total_seconds()
+        self.state.iteration += 1
+
+        return result
+
+    async def _planning_phase(self, objective: str, context: Optional[Dict]) -> Dict:
+        """Gemini creates implementation plan."""
+        # Gather context
+        historical_learnings = []
+        if self.lac:
+            historical_learnings = self.lac.get_relevant_insights(objective)
+
+        plan_request = {
+            "objective": objective,
+            "context": context or {},
+            "previous_results": self.state.test_results,
+            "historical_learnings": historical_learnings,
+        }
+
+        try:
+            plan = await self.gemini.create_plan(plan_request)
+            logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
+            return plan
+        except Exception as e:
+            logger.error(f"Planning phase failed: {e}")
+            return {"error": str(e), "tasks": [], "test_scenarios": []}
+
+    async def _implementation_phase(self, plan: Dict) -> Dict:
+        """Claude Code implements the plan."""
+        if not plan or plan.get("error"):
+            return {"status": "skipped", "reason": "No valid plan"}
+
+        try:
+            result = await self.claude_bridge.execute_plan(plan)
+            return {
+                "status": result.get("status", "unknown"),
+                "files_modified": result.get("files", []),
+                "warnings": result.get("warnings", []),
+            }
+        except Exception as e:
+            logger.error(f"Implementation phase failed: {e}")
+            return {"status": "error", "error": str(e)}
+
+    async def _testing_phase(self, plan: Dict) -> Dict:
+        """Dashboard runs automated tests."""
+        test_scenarios = plan.get("test_scenarios", [])
+
+        if not test_scenarios:
+            # Generate default tests based on objective
+            test_scenarios = self._generate_default_tests(plan)
+
+        try:
+            results = await self.dashboard.run_test_suite(test_scenarios)
+            return results
+        except Exception as e:
+            logger.error(f"Testing phase failed: {e}")
+            return {
+                "status": "error",
+                "error": str(e),
+                "summary": {"passed": 0, "failed": 1, "total": 1},
+            }
+
+    def _generate_default_tests(self, plan: Dict) -> List[Dict]:
+        """Generate default test scenarios based on the plan."""
+        objective = plan.get("objective", "")
+
+        tests = []
+
+        # Study creation tests
+        if "study" in objective.lower() or "create" in objective.lower():
+            tests.extend(
+                [
+                    {
+                        "id": "test_study_exists",
+                        "name": "Study directory exists",
+                        "type": "filesystem",
+                        "check": "directory_exists",
+                    },
+                    {
+                        "id": "test_spec_valid",
+                        "name": "AtomizerSpec is valid",
+                        "type": "api",
+                        "endpoint": "/api/studies/{study_id}/spec/validate",
+                    },
+                    {
+                        "id": "test_dashboard_loads",
+                        "name": "Dashboard loads study",
+                        "type": "browser",
+                        "action": "load_study",
+                    },
+                ]
+            )
+
+        # Optimization tests
+        if "optimi" in objective.lower():
+            tests.extend(
+                [
+                    {
+                        "id": "test_run_trial",
+                        "name": "Single trial executes",
+                        "type": "cli",
+                        "command": "python run_optimization.py --test",
+                    },
+                ]
+            )
+
+        return tests
+
+    async def _analysis_phase(self, test_results: Dict) -> Dict:
+        """Gemini analyzes test results."""
+        try:
+            from .analyzer import ProblemAnalyzer
+
+            analyzer = ProblemAnalyzer(self.gemini)
+            return await analyzer.analyze_test_results(test_results)
+        except Exception as e:
+            logger.error(f"Analysis phase failed: {e}")
+            return {
+                "issues_found": True,
+                "issues": [{"description": str(e), "severity": "high"}],
+                "fix_plans": {},
+            }
+
+    async def _fixing_phase(self, analysis: Dict) -> List[Dict]:
+        """Claude Code implements fixes."""
+        fixes = []
+
+        for issue in analysis.get("issues", []):
+            fix_plan = analysis.get("fix_plans", {}).get(issue.get("id", "unknown"))
+
+            if fix_plan:
+                try:
+                    result = await self.claude_bridge.execute_fix(fix_plan)
+                    fixes.append(
+                        {
+                            "issue_id": issue.get("id"),
+                            "status": result.get("status"),
+                            "files_modified": result.get("files", []),
+                        }
+                    )
+                except Exception as e:
+                    fixes.append(
+                        {
+                            "issue_id": issue.get("id"),
+                            "status": "error",
+                            "error": str(e),
+                        }
+                    )
+
+        return fixes
+
+    async def _verification_phase(self, fixes: List[Dict]) -> Dict:
+        """Dashboard verifies fixes."""
+        # Re-run tests for each fix
+        all_passed = True
+        verification_results = []
+
+        for fix in fixes:
+            if fix.get("status") == "error":
+                all_passed = False
+                verification_results.append(
+                    {
+                        "issue_id": fix.get("issue_id"),
+                        "passed": False,
+                        "reason": fix.get("error"),
+                    }
+                )
+            else:
+                # Run targeted test
+                result = await self.dashboard.verify_fix(fix)
+                verification_results.append(result)
+                if not result.get("passed", False):
+                    all_passed = False
+
+        return {
+            "all_passed": all_passed,
+            "results": verification_results,
+        }
+
+    async def _record_learning(self, iteration: IterationResult):
+        """Store successful patterns for future reference."""
+        if not self.lac:
+            return
+
+        try:
+            self.lac.record_insight(
+                category="success_pattern",
+                context=f"DevLoop iteration {iteration.iteration}",
+                insight=f"Successfully completed: {iteration.plan.get('objective', 'unknown')}",
+                confidence=0.8,
+                tags=["devloop", "success"],
+            )
+        except Exception as e:
+            logger.warning(f"Failed to record learning: {e}")
+
+    # ========================================================================
+    # Single-step operations (for manual control)
+    # ========================================================================
+
+    async def step_plan(self, objective: str, context: Optional[Dict] = None) -> Dict:
+        """Execute only the planning phase."""
+        self._update_state(LoopPhase.PLANNING, objective)
+        plan = await self._planning_phase(objective, context)
+        self._update_state(LoopPhase.IDLE)
+        return plan
+
+    async def step_implement(self, plan: Dict) -> Dict:
+        """Execute only the implementation phase."""
+        self._update_state(LoopPhase.IMPLEMENTING)
+        result = await self._implementation_phase(plan)
+        self._update_state(LoopPhase.IDLE)
+        return result
+
+    async def step_test(self, scenarios: List[Dict]) -> Dict:
+        """Execute only the testing phase."""
+        self._update_state(LoopPhase.TESTING)
+        result = await self._testing_phase({"test_scenarios": scenarios})
+        self._update_state(LoopPhase.IDLE)
+        return result
+
+    async def step_analyze(self, test_results: Dict) -> Dict:
+        """Execute only the analysis phase."""
+        self._update_state(LoopPhase.ANALYZING)
+        result = await self._analysis_phase(test_results)
+        self._update_state(LoopPhase.IDLE)
+        return result
+
+    def get_state(self) -> Dict:
+        """Get current state as dict."""
+        return {
+            "phase": self.state.phase.value,
+            "iteration": self.state.iteration,
+            "current_task": self.state.current_task,
+            "test_results": self.state.test_results,
+            "last_update": self.state.last_update,
+        }
+
+    def export_history(self, filepath: Optional[Path] = None) -> Dict:
+        """Export cycle history for analysis."""
+        history = {
+            "exported_at": datetime.now().isoformat(),
+            "total_cycles": len(self.cycle_history),
+            "cycles": [
+                {
+                    "objective": c.objective,
+                    "status": c.status,
+                    "iterations": len(c.iterations),
+                    "duration_seconds": c.total_duration_seconds,
+                }
+                for c in self.cycle_history
+            ],
+        }
+
+        if filepath:
+            with open(filepath, "w") as f:
+                json.dump(history, f, indent=2)
+
+        return history
--- a/optimization_engine/devloop/planning.py
+++ b/optimization_engine/devloop/planning.py
@@ -0,0 +1,451 @@
+"""
+Gemini Planner - Strategic planning and test design using Gemini Pro.
+
+Handles:
+- Implementation planning from objectives
+- Test scenario generation
+- Architecture decisions
+- Risk assessment
+"""
+
+import asyncio
+import json
+import logging
+import os
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PlanTask:
+    """A single task in the implementation plan."""
+
+    id: str
+    description: str
+    file: Optional[str] = None
+    code_hint: Optional[str] = None
+    priority: str = "medium"
+    dependencies: List[str] = None
+
+    def __post_init__(self):
+        if self.dependencies is None:
+            self.dependencies = []
+
+
+@dataclass
+class TestScenario:
+    """A test scenario for dashboard verification."""
+
+    id: str
+    name: str
+    type: str  # "api", "browser", "cli", "filesystem"
+    steps: List[Dict] = None
+    expected_outcome: Dict = None
+
+    def __post_init__(self):
+        if self.steps is None:
+            self.steps = []
+        if self.expected_outcome is None:
+            self.expected_outcome = {"status": "pass"}
+
+
+class GeminiPlanner:
+    """
+    Strategic planner using Gemini Pro.
+
+    Generates:
+    - Implementation tasks for Claude Code
+    - Test scenarios for dashboard verification
+    - Architecture decisions
+    - Risk assessments
+    """
+
+    def __init__(self, config: Optional[Dict] = None):
+        """
+        Initialize the planner.
+
+        Args:
+            config: Configuration with API key and model settings
+        """
+        self.config = config or {}
+        self._client = None
+        self._model = None
+
+    @property
+    def client(self):
+        """Lazy-load Gemini client."""
+        if self._client is None:
+            try:
+                import google.generativeai as genai
+
+                api_key = self.config.get("api_key") or os.environ.get("GEMINI_API_KEY")
+                if not api_key:
+                    raise ValueError("GEMINI_API_KEY not set")
+
+                genai.configure(api_key=api_key)
+                self._client = genai
+
+                model_name = self.config.get("model", "gemini-2.0-flash-thinking-exp-01-21")
+                self._model = genai.GenerativeModel(model_name)
+
+                logger.info(f"Gemini client initialized with model: {model_name}")
+
+            except ImportError:
+                logger.warning("google-generativeai not installed, using mock planner")
+                self._client = "mock"
+
+        return self._client
+
+    async def create_plan(self, request: Dict) -> Dict:
+        """
+        Create an implementation plan from an objective.
+
+        Args:
+            request: Dict with:
+                - objective: What to achieve
+                - context: Additional context (study spec, etc.)
+                - previous_results: Results from last iteration
+                - historical_learnings: Relevant LAC insights
+
+        Returns:
+            Plan dict with tasks, test_scenarios, risks
+        """
+        objective = request.get("objective", "")
+        context = request.get("context", {})
+        previous_results = request.get("previous_results")
+        learnings = request.get("historical_learnings", [])
+
+        # Build planning prompt
+        prompt = self._build_planning_prompt(objective, context, previous_results, learnings)
+
+        # Get response from Gemini
+        if self.client == "mock":
+            plan = self._mock_plan(objective, context)
+        else:
+            plan = await self._query_gemini(prompt)
+
+        return plan
+
+    def _build_planning_prompt(
+        self,
+        objective: str,
+        context: Dict,
+        previous_results: Optional[Dict],
+        learnings: List[Dict],
+    ) -> str:
+        """Build the planning prompt for Gemini."""
+
+        prompt = f"""## Atomizer Development Planning Session
+
+### Objective
+{objective}
+
+### Context
+{json.dumps(context, indent=2) if context else "No additional context provided."}
+
+### Previous Iteration Results
+{json.dumps(previous_results, indent=2) if previous_results else "First iteration - no previous results."}
+
+### Historical Learnings (from LAC)
+{self._format_learnings(learnings)}
+
+### Required Outputs
+
+Generate a detailed implementation plan in JSON format with the following structure:
+
+```json
+{{
+  "objective": "{objective}",
+  "approach": "Brief description of the approach",
+  "tasks": [
+    {{
+      "id": "task_001",
+      "description": "What to do",
+      "file": "path/to/file.py",
+      "code_hint": "Pseudo-code or pattern to use",
+      "priority": "high|medium|low",
+      "dependencies": ["task_000"]
+    }}
+  ],
+  "test_scenarios": [
+    {{
+      "id": "test_001",
+      "name": "Test name",
+      "type": "api|browser|cli|filesystem",
+      "steps": [
+        {{"action": "navigate", "target": "/canvas"}}
+      ],
+      "expected_outcome": {{"status": "pass", "assertions": []}}
+    }}
+  ],
+  "risks": [
+    {{
+      "description": "What could go wrong",
+      "mitigation": "How to handle it",
+      "severity": "high|medium|low"
+    }}
+  ],
+  "acceptance_criteria": [
+    "Criteria 1",
+    "Criteria 2"
+  ]
+}}
+```
+
+### Guidelines
+
+1. **Tasks should be specific and actionable** - Each task should be completable by Claude Code
+2. **Test scenarios must be verifiable** - Use dashboard endpoints and browser actions
+3. **Consider Atomizer architecture** - Use existing extractors (SYS_12), follow AtomizerSpec v2.0
+4. **Apply historical learnings** - Avoid known failure patterns
+
+### Important Atomizer Patterns
+
+- Studies use `atomizer_spec.json` (AtomizerSpec v2.0)
+- Design variables have bounds: {{"min": X, "max": Y}}
+- Objectives use extractors: E1 (displacement), E3 (stress), E4 (mass)
+- Constraints define limits with operators: <, >, <=, >=
+
+Output ONLY the JSON plan, no additional text.
+"""
+        return prompt
+
+    def _format_learnings(self, learnings: List[Dict]) -> str:
+        """Format LAC learnings for the prompt."""
+        if not learnings:
+            return "No relevant historical learnings."
+
+        formatted = []
+        for learning in learnings[:5]:  # Limit to 5 most relevant
+            formatted.append(
+                f"- [{learning.get('category', 'insight')}] {learning.get('insight', '')}"
+            )
+
+        return "\n".join(formatted)
+
+    async def _query_gemini(self, prompt: str) -> Dict:
+        """Query Gemini and parse response."""
+        try:
+            # Run in executor to not block
+            loop = asyncio.get_event_loop()
+            response = await loop.run_in_executor(
+                None, lambda: self._model.generate_content(prompt)
+            )
+
+            # Extract JSON from response
+            text = response.text
+
+            # Try to parse JSON
+            try:
+                # Find JSON block
+                if "```json" in text:
+                    start = text.find("```json") + 7
+                    end = text.find("```", start)
+                    json_str = text[start:end].strip()
+                elif "```" in text:
+                    start = text.find("```") + 3
+                    end = text.find("```", start)
+                    json_str = text[start:end].strip()
+                else:
+                    json_str = text.strip()
+
+                plan = json.loads(json_str)
+                logger.info(f"Gemini plan parsed: {len(plan.get('tasks', []))} tasks")
+                return plan
+
+            except json.JSONDecodeError as e:
+                logger.error(f"Failed to parse Gemini response: {e}")
+                return {
+                    "objective": "Parse error",
+                    "error": str(e),
+                    "raw_response": text[:500],
+                    "tasks": [],
+                    "test_scenarios": [],
+                }
+
+        except Exception as e:
+            logger.error(f"Gemini query failed: {e}")
+            return {
+                "objective": "Query error",
+                "error": str(e),
+                "tasks": [],
+                "test_scenarios": [],
+            }
+
+    def _mock_plan(self, objective: str, context: Dict) -> Dict:
+        """Generate a mock plan for testing without Gemini API."""
+        logger.info("Using mock planner (Gemini not available)")
+
+        # Detect objective type
+        is_study_creation = any(
+            kw in objective.lower() for kw in ["create", "study", "new", "setup"]
+        )
+
+        tasks = []
+        test_scenarios = []
+
+        if is_study_creation:
+            study_name = context.get("study_name", "support_arm")
+
+            tasks = [
+                {
+                    "id": "task_001",
+                    "description": f"Create study directory structure for {study_name}",
+                    "file": f"studies/_Other/{study_name}/",
+                    "priority": "high",
+                    "dependencies": [],
+                },
+                {
+                    "id": "task_002",
+                    "description": "Copy NX model files to study directory",
+                    "file": f"studies/_Other/{study_name}/1_setup/model/",
+                    "priority": "high",
+                    "dependencies": ["task_001"],
+                },
+                {
+                    "id": "task_003",
+                    "description": "Create AtomizerSpec v2.0 configuration",
+                    "file": f"studies/_Other/{study_name}/atomizer_spec.json",
+                    "priority": "high",
+                    "dependencies": ["task_002"],
+                },
+                {
+                    "id": "task_004",
+                    "description": "Create run_optimization.py script",
+                    "file": f"studies/_Other/{study_name}/run_optimization.py",
+                    "priority": "high",
+                    "dependencies": ["task_003"],
+                },
+                {
+                    "id": "task_005",
+                    "description": "Create README.md documentation",
+                    "file": f"studies/_Other/{study_name}/README.md",
+                    "priority": "medium",
+                    "dependencies": ["task_003"],
+                },
+            ]
+
+            test_scenarios = [
+                {
+                    "id": "test_001",
+                    "name": "Study directory exists",
+                    "type": "filesystem",
+                    "steps": [{"action": "check_exists", "path": f"studies/_Other/{study_name}"}],
+                    "expected_outcome": {"exists": True},
+                },
+                {
+                    "id": "test_002",
+                    "name": "AtomizerSpec is valid",
+                    "type": "api",
+                    "steps": [
+                        {"action": "get", "endpoint": f"/api/studies/{study_name}/spec/validate"}
+                    ],
+                    "expected_outcome": {"valid": True},
+                },
+                {
+                    "id": "test_003",
+                    "name": "Dashboard loads study",
+                    "type": "browser",
+                    "steps": [
+                        {"action": "navigate", "url": f"/canvas/{study_name}"},
+                        {"action": "wait_for", "selector": "[data-testid='canvas-container']"},
+                    ],
+                    "expected_outcome": {"loaded": True},
+                },
+            ]
+
+        return {
+            "objective": objective,
+            "approach": "Mock plan for development testing",
+            "tasks": tasks,
+            "test_scenarios": test_scenarios,
+            "risks": [
+                {
+                    "description": "NX model files may have dependencies",
+                    "mitigation": "Copy all related files (_i.prt, .fem, .sim)",
+                    "severity": "high",
+                }
+            ],
+            "acceptance_criteria": [
+                "Study directory structure created",
+                "AtomizerSpec validates without errors",
+                "Dashboard loads study canvas",
+            ],
+        }
+
+    async def analyze_codebase(self, query: str) -> Dict:
+        """
+        Use Gemini to analyze codebase state.
+
+        Args:
+            query: What to analyze (e.g., "current dashboard components")
+
+        Returns:
+            Analysis results
+        """
+        # This would integrate with codebase scanning
+        # For now, return a stub
+        return {
+            "query": query,
+            "analysis": "Codebase analysis not yet implemented",
+            "recommendations": [],
+        }
+
+    async def generate_test_scenarios(
+        self,
+        feature: str,
+        context: Optional[Dict] = None,
+    ) -> List[Dict]:
+        """
+        Generate test scenarios for a specific feature.
+
+        Args:
+            feature: Feature to test (e.g., "study creation", "spec validation")
+            context: Additional context
+
+        Returns:
+            List of test scenarios
+        """
+        prompt = f"""Generate test scenarios for the Atomizer feature: {feature}
+
+Context: {json.dumps(context, indent=2) if context else "None"}
+
+Output as JSON array of test scenarios:
+```json
+[
+  {{
+    "id": "test_001",
+    "name": "Test name",
+    "type": "api|browser|cli|filesystem",
+    "steps": [...]
+    "expected_outcome": {{...}}
+  }}
+]
+```
+"""
+
+        if self.client == "mock":
+            return self._mock_plan(feature, context or {}).get("test_scenarios", [])
+
+        # Query Gemini
+        try:
+            loop = asyncio.get_event_loop()
+            response = await loop.run_in_executor(
+                None, lambda: self._model.generate_content(prompt)
+            )
+
+            text = response.text
+            if "```json" in text:
+                start = text.find("```json") + 7
+                end = text.find("```", start)
+                json_str = text[start:end].strip()
+                return json.loads(json_str)
+
+        except Exception as e:
+            logger.error(f"Failed to generate test scenarios: {e}")
+
+        return []
--- a/optimization_engine/devloop/test_runner.py
+++ b/optimization_engine/devloop/test_runner.py
@@ -0,0 +1,585 @@
+"""
+Dashboard Test Runner - Automated testing through the Atomizer dashboard.
+
+Supports test types:
+- API tests (REST endpoint verification)
+- Browser tests (UI interaction via Playwright)
+- CLI tests (command line execution)
+- Filesystem tests (file/directory verification)
+"""
+
+import asyncio
+import json
+import logging
+import subprocess
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import aiohttp
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TestStep:
+    """A single step in a test scenario."""
+
+    action: str
+    target: Optional[str] = None
+    data: Optional[Dict] = None
+    timeout_ms: int = 5000
+
+
+@dataclass
+class TestScenario:
+    """A complete test scenario."""
+
+    id: str
+    name: str
+    type: str  # "api", "browser", "cli", "filesystem"
+    steps: List[Dict] = field(default_factory=list)
+    expected_outcome: Dict = field(default_factory=lambda: {"status": "pass"})
+    timeout_ms: int = 30000
+
+
+@dataclass
+class TestResult:
+    """Result of a single test."""
+
+    scenario_id: str
+    scenario_name: str
+    passed: bool
+    duration_ms: float
+    error: Optional[str] = None
+    details: Optional[Dict] = None
+
+
+@dataclass
+class TestReport:
+    """Complete test report."""
+
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    scenarios: List[TestResult] = field(default_factory=list)
+    summary: Dict = field(default_factory=lambda: {"passed": 0, "failed": 0, "total": 0})
+
+
+class DashboardTestRunner:
+    """
+    Automated test runner for Atomizer dashboard.
+
+    Executes test scenarios against:
+    - Backend API endpoints
+    - Frontend UI (via Playwright if available)
+    - CLI commands
+    - Filesystem assertions
+    """
+
+    def __init__(self, config: Optional[Dict] = None):
+        """
+        Initialize the test runner.
+
+        Args:
+            config: Configuration with dashboard URLs and timeouts
+        """
+        self.config = config or {}
+        self.base_url = self.config.get("dashboard_url", "http://localhost:8000")
+        self.ws_url = self.config.get("websocket_url", "ws://localhost:8000")
+        self.timeout_ms = self.config.get("test_timeout_ms", 30000)
+        self.studies_dir = Path(self.config.get("studies_dir", "C:/Users/antoi/Atomizer/studies"))
+
+        self._session: Optional[aiohttp.ClientSession] = None
+        self._ws: Optional[aiohttp.ClientWebSocketResponse] = None
+        self._playwright = None
+        self._browser = None
+
+    async def connect(self):
+        """Initialize connections."""
+        if self._session is None:
+            self._session = aiohttp.ClientSession(
+                timeout=aiohttp.ClientTimeout(total=self.timeout_ms / 1000)
+            )
+
+    async def disconnect(self):
+        """Clean up connections."""
+        if self._ws:
+            await self._ws.close()
+            self._ws = None
+        if self._session:
+            await self._session.close()
+            self._session = None
+        if self._browser:
+            await self._browser.close()
+            self._browser = None
+
+    async def run_test_suite(self, scenarios: List[Dict]) -> Dict:
+        """
+        Run a complete test suite.
+
+        Args:
+            scenarios: List of test scenario dicts
+
+        Returns:
+            Test report as dict
+        """
+        await self.connect()
+
+        report = TestReport()
+
+        for scenario_dict in scenarios:
+            scenario = self._parse_scenario(scenario_dict)
+            start_time = datetime.now()
+
+            try:
+                result = await self._execute_scenario(scenario)
+                result.duration_ms = (datetime.now() - start_time).total_seconds() * 1000
+                report.scenarios.append(result)
+
+                if result.passed:
+                    report.summary["passed"] += 1
+                else:
+                    report.summary["failed"] += 1
+
+            except Exception as e:
+                logger.error(f"Scenario {scenario.id} failed with error: {e}")
+                report.scenarios.append(
+                    TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=(datetime.now() - start_time).total_seconds() * 1000,
+                        error=str(e),
+                    )
+                )
+                report.summary["failed"] += 1
+
+            report.summary["total"] += 1
+
+        return {
+            "timestamp": report.timestamp,
+            "scenarios": [self._result_to_dict(r) for r in report.scenarios],
+            "summary": report.summary,
+        }
+
+    def _parse_scenario(self, scenario_dict: Dict) -> TestScenario:
+        """Parse a scenario dict into TestScenario."""
+        return TestScenario(
+            id=scenario_dict.get("id", "unknown"),
+            name=scenario_dict.get("name", "Unnamed test"),
+            type=scenario_dict.get("type", "api"),
+            steps=scenario_dict.get("steps", []),
+            expected_outcome=scenario_dict.get("expected_outcome", {"status": "pass"}),
+            timeout_ms=scenario_dict.get("timeout_ms", self.timeout_ms),
+        )
+
+    def _result_to_dict(self, result: TestResult) -> Dict:
+        """Convert TestResult to dict."""
+        return {
+            "scenario_id": result.scenario_id,
+            "scenario_name": result.scenario_name,
+            "passed": result.passed,
+            "duration_ms": result.duration_ms,
+            "error": result.error,
+            "details": result.details,
+        }
+
+    async def _execute_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute a single test scenario."""
+        logger.info(f"Executing test: {scenario.name} ({scenario.type})")
+
+        if scenario.type == "api":
+            return await self._execute_api_scenario(scenario)
+        elif scenario.type == "browser":
+            return await self._execute_browser_scenario(scenario)
+        elif scenario.type == "cli":
+            return await self._execute_cli_scenario(scenario)
+        elif scenario.type == "filesystem":
+            return await self._execute_filesystem_scenario(scenario)
+        else:
+            return TestResult(
+                scenario_id=scenario.id,
+                scenario_name=scenario.name,
+                passed=False,
+                duration_ms=0,
+                error=f"Unknown test type: {scenario.type}",
+            )
+
+    async def _execute_api_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute an API test scenario."""
+        details = {}
+
+        for step in scenario.steps:
+            action = step.get("action", "get").lower()
+            endpoint = step.get("endpoint", step.get("target", "/"))
+            data = step.get("data")
+
+            url = f"{self.base_url}{endpoint}"
+
+            try:
+                if action == "get":
+                    async with self._session.get(url) as resp:
+                        details["status_code"] = resp.status
+                        details["response"] = await resp.json()
+
+                elif action == "post":
+                    async with self._session.post(url, json=data) as resp:
+                        details["status_code"] = resp.status
+                        details["response"] = await resp.json()
+
+                elif action == "put":
+                    async with self._session.put(url, json=data) as resp:
+                        details["status_code"] = resp.status
+                        details["response"] = await resp.json()
+
+                elif action == "delete":
+                    async with self._session.delete(url) as resp:
+                        details["status_code"] = resp.status
+                        details["response"] = await resp.json()
+
+            except aiohttp.ClientError as e:
+                return TestResult(
+                    scenario_id=scenario.id,
+                    scenario_name=scenario.name,
+                    passed=False,
+                    duration_ms=0,
+                    error=f"API request failed: {e}",
+                    details={"url": url, "action": action},
+                )
+            except json.JSONDecodeError:
+                details["response"] = "Non-JSON response"
+
+        # Check expected outcome
+        passed = self._check_outcome(details, scenario.expected_outcome)
+
+        return TestResult(
+            scenario_id=scenario.id,
+            scenario_name=scenario.name,
+            passed=passed,
+            duration_ms=0,
+            details=details,
+        )
+
+    async def _execute_browser_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute a browser test scenario using Playwright."""
+        try:
+            from playwright.async_api import async_playwright
+        except ImportError:
+            logger.warning("Playwright not available, skipping browser test")
+            return TestResult(
+                scenario_id=scenario.id,
+                scenario_name=scenario.name,
+                passed=True,  # Skip, don't fail
+                duration_ms=0,
+                error="Playwright not installed - test skipped",
+            )
+
+        details = {}
+
+        try:
+            async with async_playwright() as p:
+                browser = await p.chromium.launch(headless=True)
+                page = await browser.new_page()
+
+                for step in scenario.steps:
+                    action = step.get("action", "navigate")
+
+                    if action == "navigate":
+                        url = step.get("url", "/")
+                        # Use frontend URL (port 3003 for Vite dev server)
+                        full_url = f"http://localhost:3003{url}" if url.startswith("/") else url
+                        await page.goto(full_url, timeout=scenario.timeout_ms)
+                        details["navigated_to"] = full_url
+
+                    elif action == "wait_for":
+                        selector = step.get("selector")
+                        if selector:
+                            await page.wait_for_selector(selector, timeout=scenario.timeout_ms)
+                            details["found_selector"] = selector
+
+                    elif action == "click":
+                        selector = step.get("selector")
+                        if selector:
+                            await page.click(selector)
+                            details["clicked"] = selector
+
+                    elif action == "fill":
+                        selector = step.get("selector")
+                        value = step.get("value", "")
+                        if selector:
+                            await page.fill(selector, value)
+                            details["filled"] = {selector: value}
+
+                    elif action == "screenshot":
+                        path = step.get("path", f"test_{scenario.id}.png")
+                        await page.screenshot(path=path)
+                        details["screenshot"] = path
+
+                await browser.close()
+
+            passed = True
+
+        except Exception as e:
+            return TestResult(
+                scenario_id=scenario.id,
+                scenario_name=scenario.name,
+                passed=False,
+                duration_ms=0,
+                error=f"Browser test failed: {e}",
+                details=details,
+            )
+
+        return TestResult(
+            scenario_id=scenario.id,
+            scenario_name=scenario.name,
+            passed=passed,
+            duration_ms=0,
+            details=details,
+        )
+
+    async def _execute_cli_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute a CLI test scenario."""
+        details = {}
+
+        for step in scenario.steps:
+            command = step.get("command", step.get("target", ""))
+            cwd = step.get("cwd", str(self.studies_dir))
+
+            if not command:
+                continue
+
+            try:
+                # Use PowerShell on Windows
+                result = subprocess.run(
+                    ["powershell", "-Command", command],
+                    capture_output=True,
+                    text=True,
+                    cwd=cwd,
+                    timeout=scenario.timeout_ms / 1000,
+                )
+
+                details["command"] = command
+                details["returncode"] = result.returncode
+                details["stdout"] = result.stdout[:1000] if result.stdout else ""
+                details["stderr"] = result.stderr[:1000] if result.stderr else ""
+
+                if result.returncode != 0:
+                    return TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=0,
+                        error=f"Command failed with code {result.returncode}",
+                        details=details,
+                    )
+
+            except subprocess.TimeoutExpired:
+                return TestResult(
+                    scenario_id=scenario.id,
+                    scenario_name=scenario.name,
+                    passed=False,
+                    duration_ms=0,
+                    error=f"Command timed out after {scenario.timeout_ms}ms",
+                    details={"command": command},
+                )
+            except Exception as e:
+                return TestResult(
+                    scenario_id=scenario.id,
+                    scenario_name=scenario.name,
+                    passed=False,
+                    duration_ms=0,
+                    error=f"CLI execution failed: {e}",
+                    details={"command": command},
+                )
+
+        passed = self._check_outcome(details, scenario.expected_outcome)
+
+        return TestResult(
+            scenario_id=scenario.id,
+            scenario_name=scenario.name,
+            passed=passed,
+            duration_ms=0,
+            details=details,
+        )
+
+    async def _execute_filesystem_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute a filesystem test scenario."""
+        details = {}
+
+        for step in scenario.steps:
+            action = step.get("action", "check_exists")
+            path_str = step.get("path", "")
+
+            # Resolve relative paths
+            if not Path(path_str).is_absolute():
+                path = self.studies_dir.parent / path_str
+            else:
+                path = Path(path_str)
+
+            if action == "check_exists":
+                exists = path.exists()
+                details["path"] = str(path)
+                details["exists"] = exists
+
+                if scenario.expected_outcome.get("exists", True) != exists:
+                    return TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=0,
+                        error=f"Path {'does not exist' if not exists else 'exists but should not'}: {path}",
+                        details=details,
+                    )
+
+            elif action == "check_file_contains":
+                content_check = step.get("contains", "")
+                if path.exists() and path.is_file():
+                    content = path.read_text()
+                    contains = content_check in content
+                    details["contains"] = contains
+                    details["search_term"] = content_check
+
+                    if not contains:
+                        return TestResult(
+                            scenario_id=scenario.id,
+                            scenario_name=scenario.name,
+                            passed=False,
+                            duration_ms=0,
+                            error=f"File does not contain: {content_check}",
+                            details=details,
+                        )
+                else:
+                    return TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=0,
+                        error=f"File not found: {path}",
+                        details=details,
+                    )
+
+            elif action == "check_json_valid":
+                if path.exists() and path.is_file():
+                    try:
+                        with open(path) as f:
+                            json.load(f)
+                        details["valid_json"] = True
+                    except json.JSONDecodeError as e:
+                        return TestResult(
+                            scenario_id=scenario.id,
+                            scenario_name=scenario.name,
+                            passed=False,
+                            duration_ms=0,
+                            error=f"Invalid JSON: {e}",
+                            details={"path": str(path)},
+                        )
+                else:
+                    return TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=0,
+                        error=f"File not found: {path}",
+                        details=details,
+                    )
+
+        return TestResult(
+            scenario_id=scenario.id,
+            scenario_name=scenario.name,
+            passed=True,
+            duration_ms=0,
+            details=details,
+        )
+
+    def _check_outcome(self, details: Dict, expected: Dict) -> bool:
+        """Check if test details match expected outcome."""
+        for key, expected_value in expected.items():
+            if key not in details:
+                continue
+
+            actual_value = details[key]
+
+            # Handle nested dicts
+            if isinstance(expected_value, dict) and isinstance(actual_value, dict):
+                if not self._check_outcome(actual_value, expected_value):
+                    return False
+            # Handle lists
+            elif isinstance(expected_value, list) and isinstance(actual_value, list):
+                if expected_value != actual_value:
+                    return False
+            # Handle simple values
+            elif actual_value != expected_value:
+                return False
+
+        return True
+
+    async def verify_fix(self, fix: Dict) -> Dict:
+        """
+        Verify that a specific fix was successful.
+
+        Args:
+            fix: Fix dict with issue_id and files_modified
+
+        Returns:
+            Verification result
+        """
+        issue_id = fix.get("issue_id", "unknown")
+        files_modified = fix.get("files_modified", [])
+
+        # Run quick verification
+        passed = True
+        details = {}
+
+        # Check that modified files exist
+        for file_path in files_modified:
+            path = Path(file_path)
+            if not path.exists():
+                passed = False
+                details["missing_file"] = str(path)
+                break
+
+        # Could add more sophisticated verification here
+
+        return {
+            "issue_id": issue_id,
+            "passed": passed,
+            "details": details,
+        }
+
+    async def run_health_check(self) -> Dict:
+        """
+        Run a quick health check on dashboard components.
+
+        Returns:
+            Health status dict
+        """
+        await self.connect()
+
+        health = {
+            "timestamp": datetime.now().isoformat(),
+            "api": "unknown",
+            "frontend": "unknown",
+            "websocket": "unknown",
+        }
+
+        # Check API
+        try:
+            async with self._session.get(f"{self.base_url}/health") as resp:
+                if resp.status == 200:
+                    health["api"] = "healthy"
+                else:
+                    health["api"] = f"unhealthy (status {resp.status})"
+        except Exception as e:
+            health["api"] = f"error: {e}"
+
+        # Check frontend (if available)
+        try:
+            async with self._session.get("http://localhost:3000") as resp:
+                if resp.status == 200:
+                    health["frontend"] = "healthy"
+                else:
+                    health["frontend"] = f"unhealthy (status {resp.status})"
+        except Exception as e:
+            health["frontend"] = f"error: {e}"
+
+        return health
--- a/optimization_engine/reporting/html_report.py
+++ b/optimization_engine/reporting/html_report.py