feat: Add DevLoop automation and HTML Reports

## DevLoop - Closed-Loop Development System - Orchestrator for plan → build → test → analyze cycle - Gemini planning via OpenCode CLI - Claude implementation via CLI bridge - Playwright browser testing integration - Test runner with API, filesystem, and browser tests - Persistent state in .devloop/ directory - CLI tool: tools/devloop_cli.py Usage: python tools/devloop_cli.py start 'Create new feature' python tools/devloop_cli.py plan 'Fix bug in X' python tools/devloop_cli.py test --study support_arm python tools/devloop_cli.py browser --level full ## HTML Reports (optimization_engine/reporting/) - Interactive Plotly-based reports - Convergence plot, Pareto front, parallel coordinates - Parameter importance analysis - Self-contained HTML (offline-capable) - Tailwind CSS styling ## Playwright E2E Tests - Home page tests - Test results in test-results/ ## LAC Knowledge Base Updates - Session insights (failures, workarounds, patterns) - Optimization memory for arm support study
2026-01-24 21:18:18 -05:00
parent a3f18dc377
commit 3193831340
24 changed files with 6437 additions and 0 deletions
--- a/optimization_engine/devloop/test_runner.py
+++ b/optimization_engine/devloop/test_runner.py
@@ -0,0 +1,585 @@
+"""
+Dashboard Test Runner - Automated testing through the Atomizer dashboard.
+
+Supports test types:
+- API tests (REST endpoint verification)
+- Browser tests (UI interaction via Playwright)
+- CLI tests (command line execution)
+- Filesystem tests (file/directory verification)
+"""
+
+import asyncio
+import json
+import logging
+import subprocess
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import aiohttp
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TestStep:
+    """A single step in a test scenario."""
+
+    action: str
+    target: Optional[str] = None
+    data: Optional[Dict] = None
+    timeout_ms: int = 5000
+
+
+@dataclass
+class TestScenario:
+    """A complete test scenario."""
+
+    id: str
+    name: str
+    type: str  # "api", "browser", "cli", "filesystem"
+    steps: List[Dict] = field(default_factory=list)
+    expected_outcome: Dict = field(default_factory=lambda: {"status": "pass"})
+    timeout_ms: int = 30000
+
+
+@dataclass
+class TestResult:
+    """Result of a single test."""
+
+    scenario_id: str
+    scenario_name: str
+    passed: bool
+    duration_ms: float
+    error: Optional[str] = None
+    details: Optional[Dict] = None
+
+
+@dataclass
+class TestReport:
+    """Complete test report."""
+
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    scenarios: List[TestResult] = field(default_factory=list)
+    summary: Dict = field(default_factory=lambda: {"passed": 0, "failed": 0, "total": 0})
+
+
+class DashboardTestRunner:
+    """
+    Automated test runner for Atomizer dashboard.
+
+    Executes test scenarios against:
+    - Backend API endpoints
+    - Frontend UI (via Playwright if available)
+    - CLI commands
+    - Filesystem assertions
+    """
+
+    def __init__(self, config: Optional[Dict] = None):
+        """
+        Initialize the test runner.
+
+        Args:
+            config: Configuration with dashboard URLs and timeouts
+        """
+        self.config = config or {}
+        self.base_url = self.config.get("dashboard_url", "http://localhost:8000")
+        self.ws_url = self.config.get("websocket_url", "ws://localhost:8000")
+        self.timeout_ms = self.config.get("test_timeout_ms", 30000)
+        self.studies_dir = Path(self.config.get("studies_dir", "C:/Users/antoi/Atomizer/studies"))
+
+        self._session: Optional[aiohttp.ClientSession] = None
+        self._ws: Optional[aiohttp.ClientWebSocketResponse] = None
+        self._playwright = None
+        self._browser = None
+
+    async def connect(self):
+        """Initialize connections."""
+        if self._session is None:
+            self._session = aiohttp.ClientSession(
+                timeout=aiohttp.ClientTimeout(total=self.timeout_ms / 1000)
+            )
+
+    async def disconnect(self):
+        """Clean up connections."""
+        if self._ws:
+            await self._ws.close()
+            self._ws = None
+        if self._session:
+            await self._session.close()
+            self._session = None
+        if self._browser:
+            await self._browser.close()
+            self._browser = None
+
+    async def run_test_suite(self, scenarios: List[Dict]) -> Dict:
+        """
+        Run a complete test suite.
+
+        Args:
+            scenarios: List of test scenario dicts
+
+        Returns:
+            Test report as dict
+        """
+        await self.connect()
+
+        report = TestReport()
+
+        for scenario_dict in scenarios:
+            scenario = self._parse_scenario(scenario_dict)
+            start_time = datetime.now()
+
+            try:
+                result = await self._execute_scenario(scenario)
+                result.duration_ms = (datetime.now() - start_time).total_seconds() * 1000
+                report.scenarios.append(result)
+
+                if result.passed:
+                    report.summary["passed"] += 1
+                else:
+                    report.summary["failed"] += 1
+
+            except Exception as e:
+                logger.error(f"Scenario {scenario.id} failed with error: {e}")
+                report.scenarios.append(
+                    TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=(datetime.now() - start_time).total_seconds() * 1000,
+                        error=str(e),
+                    )
+                )
+                report.summary["failed"] += 1
+
+            report.summary["total"] += 1
+
+        return {
+            "timestamp": report.timestamp,
+            "scenarios": [self._result_to_dict(r) for r in report.scenarios],
+            "summary": report.summary,
+        }
+
+    def _parse_scenario(self, scenario_dict: Dict) -> TestScenario:
+        """Parse a scenario dict into TestScenario."""
+        return TestScenario(
+            id=scenario_dict.get("id", "unknown"),
+            name=scenario_dict.get("name", "Unnamed test"),
+            type=scenario_dict.get("type", "api"),
+            steps=scenario_dict.get("steps", []),
+            expected_outcome=scenario_dict.get("expected_outcome", {"status": "pass"}),
+            timeout_ms=scenario_dict.get("timeout_ms", self.timeout_ms),
+        )
+
+    def _result_to_dict(self, result: TestResult) -> Dict:
+        """Convert TestResult to dict."""
+        return {
+            "scenario_id": result.scenario_id,
+            "scenario_name": result.scenario_name,
+            "passed": result.passed,
+            "duration_ms": result.duration_ms,
+            "error": result.error,
+            "details": result.details,
+        }
+
+    async def _execute_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute a single test scenario."""
+        logger.info(f"Executing test: {scenario.name} ({scenario.type})")
+
+        if scenario.type == "api":
+            return await self._execute_api_scenario(scenario)
+        elif scenario.type == "browser":
+            return await self._execute_browser_scenario(scenario)
+        elif scenario.type == "cli":
+            return await self._execute_cli_scenario(scenario)
+        elif scenario.type == "filesystem":
+            return await self._execute_filesystem_scenario(scenario)
+        else:
+            return TestResult(
+                scenario_id=scenario.id,
+                scenario_name=scenario.name,
+                passed=False,
+                duration_ms=0,
+                error=f"Unknown test type: {scenario.type}",
+            )
+
+    async def _execute_api_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute an API test scenario."""
+        details = {}
+
+        for step in scenario.steps:
+            action = step.get("action", "get").lower()
+            endpoint = step.get("endpoint", step.get("target", "/"))
+            data = step.get("data")
+
+            url = f"{self.base_url}{endpoint}"
+
+            try:
+                if action == "get":
+                    async with self._session.get(url) as resp:
+                        details["status_code"] = resp.status
+                        details["response"] = await resp.json()
+
+                elif action == "post":
+                    async with self._session.post(url, json=data) as resp:
+                        details["status_code"] = resp.status
+                        details["response"] = await resp.json()
+
+                elif action == "put":
+                    async with self._session.put(url, json=data) as resp:
+                        details["status_code"] = resp.status
+                        details["response"] = await resp.json()
+
+                elif action == "delete":
+                    async with self._session.delete(url) as resp:
+                        details["status_code"] = resp.status
+                        details["response"] = await resp.json()
+
+            except aiohttp.ClientError as e:
+                return TestResult(
+                    scenario_id=scenario.id,
+                    scenario_name=scenario.name,
+                    passed=False,
+                    duration_ms=0,
+                    error=f"API request failed: {e}",
+                    details={"url": url, "action": action},
+                )
+            except json.JSONDecodeError:
+                details["response"] = "Non-JSON response"
+
+        # Check expected outcome
+        passed = self._check_outcome(details, scenario.expected_outcome)
+
+        return TestResult(
+            scenario_id=scenario.id,
+            scenario_name=scenario.name,
+            passed=passed,
+            duration_ms=0,
+            details=details,
+        )
+
+    async def _execute_browser_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute a browser test scenario using Playwright."""
+        try:
+            from playwright.async_api import async_playwright
+        except ImportError:
+            logger.warning("Playwright not available, skipping browser test")
+            return TestResult(
+                scenario_id=scenario.id,
+                scenario_name=scenario.name,
+                passed=True,  # Skip, don't fail
+                duration_ms=0,
+                error="Playwright not installed - test skipped",
+            )
+
+        details = {}
+
+        try:
+            async with async_playwright() as p:
+                browser = await p.chromium.launch(headless=True)
+                page = await browser.new_page()
+
+                for step in scenario.steps:
+                    action = step.get("action", "navigate")
+
+                    if action == "navigate":
+                        url = step.get("url", "/")
+                        # Use frontend URL (port 3003 for Vite dev server)
+                        full_url = f"http://localhost:3003{url}" if url.startswith("/") else url
+                        await page.goto(full_url, timeout=scenario.timeout_ms)
+                        details["navigated_to"] = full_url
+
+                    elif action == "wait_for":
+                        selector = step.get("selector")
+                        if selector:
+                            await page.wait_for_selector(selector, timeout=scenario.timeout_ms)
+                            details["found_selector"] = selector
+
+                    elif action == "click":
+                        selector = step.get("selector")
+                        if selector:
+                            await page.click(selector)
+                            details["clicked"] = selector
+
+                    elif action == "fill":
+                        selector = step.get("selector")
+                        value = step.get("value", "")
+                        if selector:
+                            await page.fill(selector, value)
+                            details["filled"] = {selector: value}
+
+                    elif action == "screenshot":
+                        path = step.get("path", f"test_{scenario.id}.png")
+                        await page.screenshot(path=path)
+                        details["screenshot"] = path
+
+                await browser.close()
+
+            passed = True
+
+        except Exception as e:
+            return TestResult(
+                scenario_id=scenario.id,
+                scenario_name=scenario.name,
+                passed=False,
+                duration_ms=0,
+                error=f"Browser test failed: {e}",
+                details=details,
+            )
+
+        return TestResult(
+            scenario_id=scenario.id,
+            scenario_name=scenario.name,
+            passed=passed,
+            duration_ms=0,
+            details=details,
+        )
+
+    async def _execute_cli_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute a CLI test scenario."""
+        details = {}
+
+        for step in scenario.steps:
+            command = step.get("command", step.get("target", ""))
+            cwd = step.get("cwd", str(self.studies_dir))
+
+            if not command:
+                continue
+
+            try:
+                # Use PowerShell on Windows
+                result = subprocess.run(
+                    ["powershell", "-Command", command],
+                    capture_output=True,
+                    text=True,
+                    cwd=cwd,
+                    timeout=scenario.timeout_ms / 1000,
+                )
+
+                details["command"] = command
+                details["returncode"] = result.returncode
+                details["stdout"] = result.stdout[:1000] if result.stdout else ""
+                details["stderr"] = result.stderr[:1000] if result.stderr else ""
+
+                if result.returncode != 0:
+                    return TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=0,
+                        error=f"Command failed with code {result.returncode}",
+                        details=details,
+                    )
+
+            except subprocess.TimeoutExpired:
+                return TestResult(
+                    scenario_id=scenario.id,
+                    scenario_name=scenario.name,
+                    passed=False,
+                    duration_ms=0,
+                    error=f"Command timed out after {scenario.timeout_ms}ms",
+                    details={"command": command},
+                )
+            except Exception as e:
+                return TestResult(
+                    scenario_id=scenario.id,
+                    scenario_name=scenario.name,
+                    passed=False,
+                    duration_ms=0,
+                    error=f"CLI execution failed: {e}",
+                    details={"command": command},
+                )
+
+        passed = self._check_outcome(details, scenario.expected_outcome)
+
+        return TestResult(
+            scenario_id=scenario.id,
+            scenario_name=scenario.name,
+            passed=passed,
+            duration_ms=0,
+            details=details,
+        )
+
+    async def _execute_filesystem_scenario(self, scenario: TestScenario) -> TestResult:
+        """Execute a filesystem test scenario."""
+        details = {}
+
+        for step in scenario.steps:
+            action = step.get("action", "check_exists")
+            path_str = step.get("path", "")
+
+            # Resolve relative paths
+            if not Path(path_str).is_absolute():
+                path = self.studies_dir.parent / path_str
+            else:
+                path = Path(path_str)
+
+            if action == "check_exists":
+                exists = path.exists()
+                details["path"] = str(path)
+                details["exists"] = exists
+
+                if scenario.expected_outcome.get("exists", True) != exists:
+                    return TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=0,
+                        error=f"Path {'does not exist' if not exists else 'exists but should not'}: {path}",
+                        details=details,
+                    )
+
+            elif action == "check_file_contains":
+                content_check = step.get("contains", "")
+                if path.exists() and path.is_file():
+                    content = path.read_text()
+                    contains = content_check in content
+                    details["contains"] = contains
+                    details["search_term"] = content_check
+
+                    if not contains:
+                        return TestResult(
+                            scenario_id=scenario.id,
+                            scenario_name=scenario.name,
+                            passed=False,
+                            duration_ms=0,
+                            error=f"File does not contain: {content_check}",
+                            details=details,
+                        )
+                else:
+                    return TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=0,
+                        error=f"File not found: {path}",
+                        details=details,
+                    )
+
+            elif action == "check_json_valid":
+                if path.exists() and path.is_file():
+                    try:
+                        with open(path) as f:
+                            json.load(f)
+                        details["valid_json"] = True
+                    except json.JSONDecodeError as e:
+                        return TestResult(
+                            scenario_id=scenario.id,
+                            scenario_name=scenario.name,
+                            passed=False,
+                            duration_ms=0,
+                            error=f"Invalid JSON: {e}",
+                            details={"path": str(path)},
+                        )
+                else:
+                    return TestResult(
+                        scenario_id=scenario.id,
+                        scenario_name=scenario.name,
+                        passed=False,
+                        duration_ms=0,
+                        error=f"File not found: {path}",
+                        details=details,
+                    )
+
+        return TestResult(
+            scenario_id=scenario.id,
+            scenario_name=scenario.name,
+            passed=True,
+            duration_ms=0,
+            details=details,
+        )
+
+    def _check_outcome(self, details: Dict, expected: Dict) -> bool:
+        """Check if test details match expected outcome."""
+        for key, expected_value in expected.items():
+            if key not in details:
+                continue
+
+            actual_value = details[key]
+
+            # Handle nested dicts
+            if isinstance(expected_value, dict) and isinstance(actual_value, dict):
+                if not self._check_outcome(actual_value, expected_value):
+                    return False
+            # Handle lists
+            elif isinstance(expected_value, list) and isinstance(actual_value, list):
+                if expected_value != actual_value:
+                    return False
+            # Handle simple values
+            elif actual_value != expected_value:
+                return False
+
+        return True
+
+    async def verify_fix(self, fix: Dict) -> Dict:
+        """
+        Verify that a specific fix was successful.
+
+        Args:
+            fix: Fix dict with issue_id and files_modified
+
+        Returns:
+            Verification result
+        """
+        issue_id = fix.get("issue_id", "unknown")
+        files_modified = fix.get("files_modified", [])
+
+        # Run quick verification
+        passed = True
+        details = {}
+
+        # Check that modified files exist
+        for file_path in files_modified:
+            path = Path(file_path)
+            if not path.exists():
+                passed = False
+                details["missing_file"] = str(path)
+                break
+
+        # Could add more sophisticated verification here
+
+        return {
+            "issue_id": issue_id,
+            "passed": passed,
+            "details": details,
+        }
+
+    async def run_health_check(self) -> Dict:
+        """
+        Run a quick health check on dashboard components.
+
+        Returns:
+            Health status dict
+        """
+        await self.connect()
+
+        health = {
+            "timestamp": datetime.now().isoformat(),
+            "api": "unknown",
+            "frontend": "unknown",
+            "websocket": "unknown",
+        }
+
+        # Check API
+        try:
+            async with self._session.get(f"{self.base_url}/health") as resp:
+                if resp.status == 200:
+                    health["api"] = "healthy"
+                else:
+                    health["api"] = f"unhealthy (status {resp.status})"
+        except Exception as e:
+            health["api"] = f"error: {e}"
+
+        # Check frontend (if available)
+        try:
+            async with self._session.get("http://localhost:3000") as resp:
+                if resp.status == 200:
+                    health["frontend"] = "healthy"
+                else:
+                    health["frontend"] = f"unhealthy (status {resp.status})"
+        except Exception as e:
+            health["frontend"] = f"error: {e}"
+
+        return health