Files
Atomizer/optimization_engine/devloop/test_runner.py
Anto01 3193831340 feat: Add DevLoop automation and HTML Reports
## DevLoop - Closed-Loop Development System
- Orchestrator for plan → build → test → analyze cycle
- Gemini planning via OpenCode CLI
- Claude implementation via CLI bridge
- Playwright browser testing integration
- Test runner with API, filesystem, and browser tests
- Persistent state in .devloop/ directory
- CLI tool: tools/devloop_cli.py

Usage:
  python tools/devloop_cli.py start 'Create new feature'
  python tools/devloop_cli.py plan 'Fix bug in X'
  python tools/devloop_cli.py test --study support_arm
  python tools/devloop_cli.py browser --level full

## HTML Reports (optimization_engine/reporting/)
- Interactive Plotly-based reports
- Convergence plot, Pareto front, parallel coordinates
- Parameter importance analysis
- Self-contained HTML (offline-capable)
- Tailwind CSS styling

## Playwright E2E Tests
- Home page tests
- Test results in test-results/

## LAC Knowledge Base Updates
- Session insights (failures, workarounds, patterns)
- Optimization memory for arm support study
2026-01-24 21:18:18 -05:00

586 lines
20 KiB
Python

"""
Dashboard Test Runner - Automated testing through the Atomizer dashboard.
Supports test types:
- API tests (REST endpoint verification)
- Browser tests (UI interaction via Playwright)
- CLI tests (command line execution)
- Filesystem tests (file/directory verification)
"""
import asyncio
import json
import logging
import subprocess
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
import aiohttp
logger = logging.getLogger(__name__)
@dataclass
class TestStep:
"""A single step in a test scenario."""
action: str
target: Optional[str] = None
data: Optional[Dict] = None
timeout_ms: int = 5000
@dataclass
class TestScenario:
"""A complete test scenario."""
id: str
name: str
type: str # "api", "browser", "cli", "filesystem"
steps: List[Dict] = field(default_factory=list)
expected_outcome: Dict = field(default_factory=lambda: {"status": "pass"})
timeout_ms: int = 30000
@dataclass
class TestResult:
"""Result of a single test."""
scenario_id: str
scenario_name: str
passed: bool
duration_ms: float
error: Optional[str] = None
details: Optional[Dict] = None
@dataclass
class TestReport:
"""Complete test report."""
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
scenarios: List[TestResult] = field(default_factory=list)
summary: Dict = field(default_factory=lambda: {"passed": 0, "failed": 0, "total": 0})
class DashboardTestRunner:
"""
Automated test runner for Atomizer dashboard.
Executes test scenarios against:
- Backend API endpoints
- Frontend UI (via Playwright if available)
- CLI commands
- Filesystem assertions
"""
def __init__(self, config: Optional[Dict] = None):
"""
Initialize the test runner.
Args:
config: Configuration with dashboard URLs and timeouts
"""
self.config = config or {}
self.base_url = self.config.get("dashboard_url", "http://localhost:8000")
self.ws_url = self.config.get("websocket_url", "ws://localhost:8000")
self.timeout_ms = self.config.get("test_timeout_ms", 30000)
self.studies_dir = Path(self.config.get("studies_dir", "C:/Users/antoi/Atomizer/studies"))
self._session: Optional[aiohttp.ClientSession] = None
self._ws: Optional[aiohttp.ClientWebSocketResponse] = None
self._playwright = None
self._browser = None
async def connect(self):
"""Initialize connections."""
if self._session is None:
self._session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=self.timeout_ms / 1000)
)
async def disconnect(self):
"""Clean up connections."""
if self._ws:
await self._ws.close()
self._ws = None
if self._session:
await self._session.close()
self._session = None
if self._browser:
await self._browser.close()
self._browser = None
async def run_test_suite(self, scenarios: List[Dict]) -> Dict:
"""
Run a complete test suite.
Args:
scenarios: List of test scenario dicts
Returns:
Test report as dict
"""
await self.connect()
report = TestReport()
for scenario_dict in scenarios:
scenario = self._parse_scenario(scenario_dict)
start_time = datetime.now()
try:
result = await self._execute_scenario(scenario)
result.duration_ms = (datetime.now() - start_time).total_seconds() * 1000
report.scenarios.append(result)
if result.passed:
report.summary["passed"] += 1
else:
report.summary["failed"] += 1
except Exception as e:
logger.error(f"Scenario {scenario.id} failed with error: {e}")
report.scenarios.append(
TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=(datetime.now() - start_time).total_seconds() * 1000,
error=str(e),
)
)
report.summary["failed"] += 1
report.summary["total"] += 1
return {
"timestamp": report.timestamp,
"scenarios": [self._result_to_dict(r) for r in report.scenarios],
"summary": report.summary,
}
def _parse_scenario(self, scenario_dict: Dict) -> TestScenario:
"""Parse a scenario dict into TestScenario."""
return TestScenario(
id=scenario_dict.get("id", "unknown"),
name=scenario_dict.get("name", "Unnamed test"),
type=scenario_dict.get("type", "api"),
steps=scenario_dict.get("steps", []),
expected_outcome=scenario_dict.get("expected_outcome", {"status": "pass"}),
timeout_ms=scenario_dict.get("timeout_ms", self.timeout_ms),
)
def _result_to_dict(self, result: TestResult) -> Dict:
"""Convert TestResult to dict."""
return {
"scenario_id": result.scenario_id,
"scenario_name": result.scenario_name,
"passed": result.passed,
"duration_ms": result.duration_ms,
"error": result.error,
"details": result.details,
}
async def _execute_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute a single test scenario."""
logger.info(f"Executing test: {scenario.name} ({scenario.type})")
if scenario.type == "api":
return await self._execute_api_scenario(scenario)
elif scenario.type == "browser":
return await self._execute_browser_scenario(scenario)
elif scenario.type == "cli":
return await self._execute_cli_scenario(scenario)
elif scenario.type == "filesystem":
return await self._execute_filesystem_scenario(scenario)
else:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Unknown test type: {scenario.type}",
)
async def _execute_api_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute an API test scenario."""
details = {}
for step in scenario.steps:
action = step.get("action", "get").lower()
endpoint = step.get("endpoint", step.get("target", "/"))
data = step.get("data")
url = f"{self.base_url}{endpoint}"
try:
if action == "get":
async with self._session.get(url) as resp:
details["status_code"] = resp.status
details["response"] = await resp.json()
elif action == "post":
async with self._session.post(url, json=data) as resp:
details["status_code"] = resp.status
details["response"] = await resp.json()
elif action == "put":
async with self._session.put(url, json=data) as resp:
details["status_code"] = resp.status
details["response"] = await resp.json()
elif action == "delete":
async with self._session.delete(url) as resp:
details["status_code"] = resp.status
details["response"] = await resp.json()
except aiohttp.ClientError as e:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"API request failed: {e}",
details={"url": url, "action": action},
)
except json.JSONDecodeError:
details["response"] = "Non-JSON response"
# Check expected outcome
passed = self._check_outcome(details, scenario.expected_outcome)
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=passed,
duration_ms=0,
details=details,
)
async def _execute_browser_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute a browser test scenario using Playwright."""
try:
from playwright.async_api import async_playwright
except ImportError:
logger.warning("Playwright not available, skipping browser test")
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=True, # Skip, don't fail
duration_ms=0,
error="Playwright not installed - test skipped",
)
details = {}
try:
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
for step in scenario.steps:
action = step.get("action", "navigate")
if action == "navigate":
url = step.get("url", "/")
# Use frontend URL (port 3003 for Vite dev server)
full_url = f"http://localhost:3003{url}" if url.startswith("/") else url
await page.goto(full_url, timeout=scenario.timeout_ms)
details["navigated_to"] = full_url
elif action == "wait_for":
selector = step.get("selector")
if selector:
await page.wait_for_selector(selector, timeout=scenario.timeout_ms)
details["found_selector"] = selector
elif action == "click":
selector = step.get("selector")
if selector:
await page.click(selector)
details["clicked"] = selector
elif action == "fill":
selector = step.get("selector")
value = step.get("value", "")
if selector:
await page.fill(selector, value)
details["filled"] = {selector: value}
elif action == "screenshot":
path = step.get("path", f"test_{scenario.id}.png")
await page.screenshot(path=path)
details["screenshot"] = path
await browser.close()
passed = True
except Exception as e:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Browser test failed: {e}",
details=details,
)
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=passed,
duration_ms=0,
details=details,
)
async def _execute_cli_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute a CLI test scenario."""
details = {}
for step in scenario.steps:
command = step.get("command", step.get("target", ""))
cwd = step.get("cwd", str(self.studies_dir))
if not command:
continue
try:
# Use PowerShell on Windows
result = subprocess.run(
["powershell", "-Command", command],
capture_output=True,
text=True,
cwd=cwd,
timeout=scenario.timeout_ms / 1000,
)
details["command"] = command
details["returncode"] = result.returncode
details["stdout"] = result.stdout[:1000] if result.stdout else ""
details["stderr"] = result.stderr[:1000] if result.stderr else ""
if result.returncode != 0:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Command failed with code {result.returncode}",
details=details,
)
except subprocess.TimeoutExpired:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Command timed out after {scenario.timeout_ms}ms",
details={"command": command},
)
except Exception as e:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"CLI execution failed: {e}",
details={"command": command},
)
passed = self._check_outcome(details, scenario.expected_outcome)
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=passed,
duration_ms=0,
details=details,
)
async def _execute_filesystem_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute a filesystem test scenario."""
details = {}
for step in scenario.steps:
action = step.get("action", "check_exists")
path_str = step.get("path", "")
# Resolve relative paths
if not Path(path_str).is_absolute():
path = self.studies_dir.parent / path_str
else:
path = Path(path_str)
if action == "check_exists":
exists = path.exists()
details["path"] = str(path)
details["exists"] = exists
if scenario.expected_outcome.get("exists", True) != exists:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Path {'does not exist' if not exists else 'exists but should not'}: {path}",
details=details,
)
elif action == "check_file_contains":
content_check = step.get("contains", "")
if path.exists() and path.is_file():
content = path.read_text()
contains = content_check in content
details["contains"] = contains
details["search_term"] = content_check
if not contains:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"File does not contain: {content_check}",
details=details,
)
else:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"File not found: {path}",
details=details,
)
elif action == "check_json_valid":
if path.exists() and path.is_file():
try:
with open(path) as f:
json.load(f)
details["valid_json"] = True
except json.JSONDecodeError as e:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Invalid JSON: {e}",
details={"path": str(path)},
)
else:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"File not found: {path}",
details=details,
)
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=True,
duration_ms=0,
details=details,
)
def _check_outcome(self, details: Dict, expected: Dict) -> bool:
"""Check if test details match expected outcome."""
for key, expected_value in expected.items():
if key not in details:
continue
actual_value = details[key]
# Handle nested dicts
if isinstance(expected_value, dict) and isinstance(actual_value, dict):
if not self._check_outcome(actual_value, expected_value):
return False
# Handle lists
elif isinstance(expected_value, list) and isinstance(actual_value, list):
if expected_value != actual_value:
return False
# Handle simple values
elif actual_value != expected_value:
return False
return True
async def verify_fix(self, fix: Dict) -> Dict:
"""
Verify that a specific fix was successful.
Args:
fix: Fix dict with issue_id and files_modified
Returns:
Verification result
"""
issue_id = fix.get("issue_id", "unknown")
files_modified = fix.get("files_modified", [])
# Run quick verification
passed = True
details = {}
# Check that modified files exist
for file_path in files_modified:
path = Path(file_path)
if not path.exists():
passed = False
details["missing_file"] = str(path)
break
# Could add more sophisticated verification here
return {
"issue_id": issue_id,
"passed": passed,
"details": details,
}
async def run_health_check(self) -> Dict:
"""
Run a quick health check on dashboard components.
Returns:
Health status dict
"""
await self.connect()
health = {
"timestamp": datetime.now().isoformat(),
"api": "unknown",
"frontend": "unknown",
"websocket": "unknown",
}
# Check API
try:
async with self._session.get(f"{self.base_url}/health") as resp:
if resp.status == 200:
health["api"] = "healthy"
else:
health["api"] = f"unhealthy (status {resp.status})"
except Exception as e:
health["api"] = f"error: {e}"
# Check frontend (if available)
try:
async with self._session.get("http://localhost:3000") as resp:
if resp.status == 200:
health["frontend"] = "healthy"
else:
health["frontend"] = f"unhealthy (status {resp.status})"
except Exception as e:
health["frontend"] = f"error: {e}"
return health