feat: Add DevLoop automation and HTML Reports
## DevLoop - Closed-Loop Development System - Orchestrator for plan → build → test → analyze cycle - Gemini planning via OpenCode CLI - Claude implementation via CLI bridge - Playwright browser testing integration - Test runner with API, filesystem, and browser tests - Persistent state in .devloop/ directory - CLI tool: tools/devloop_cli.py Usage: python tools/devloop_cli.py start 'Create new feature' python tools/devloop_cli.py plan 'Fix bug in X' python tools/devloop_cli.py test --study support_arm python tools/devloop_cli.py browser --level full ## HTML Reports (optimization_engine/reporting/) - Interactive Plotly-based reports - Convergence plot, Pareto front, parallel coordinates - Parameter importance analysis - Self-contained HTML (offline-capable) - Tailwind CSS styling ## Playwright E2E Tests - Home page tests - Test results in test-results/ ## LAC Knowledge Base Updates - Session insights (failures, workarounds, patterns) - Optimization memory for arm support study
This commit is contained in:
585
optimization_engine/devloop/test_runner.py
Normal file
585
optimization_engine/devloop/test_runner.py
Normal file
@@ -0,0 +1,585 @@
|
||||
"""
|
||||
Dashboard Test Runner - Automated testing through the Atomizer dashboard.
|
||||
|
||||
Supports test types:
|
||||
- API tests (REST endpoint verification)
|
||||
- Browser tests (UI interaction via Playwright)
|
||||
- CLI tests (command line execution)
|
||||
- Filesystem tests (file/directory verification)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import aiohttp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestStep:
|
||||
"""A single step in a test scenario."""
|
||||
|
||||
action: str
|
||||
target: Optional[str] = None
|
||||
data: Optional[Dict] = None
|
||||
timeout_ms: int = 5000
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestScenario:
|
||||
"""A complete test scenario."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
type: str # "api", "browser", "cli", "filesystem"
|
||||
steps: List[Dict] = field(default_factory=list)
|
||||
expected_outcome: Dict = field(default_factory=lambda: {"status": "pass"})
|
||||
timeout_ms: int = 30000
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestResult:
|
||||
"""Result of a single test."""
|
||||
|
||||
scenario_id: str
|
||||
scenario_name: str
|
||||
passed: bool
|
||||
duration_ms: float
|
||||
error: Optional[str] = None
|
||||
details: Optional[Dict] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestReport:
|
||||
"""Complete test report."""
|
||||
|
||||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
scenarios: List[TestResult] = field(default_factory=list)
|
||||
summary: Dict = field(default_factory=lambda: {"passed": 0, "failed": 0, "total": 0})
|
||||
|
||||
|
||||
class DashboardTestRunner:
|
||||
"""
|
||||
Automated test runner for Atomizer dashboard.
|
||||
|
||||
Executes test scenarios against:
|
||||
- Backend API endpoints
|
||||
- Frontend UI (via Playwright if available)
|
||||
- CLI commands
|
||||
- Filesystem assertions
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict] = None):
|
||||
"""
|
||||
Initialize the test runner.
|
||||
|
||||
Args:
|
||||
config: Configuration with dashboard URLs and timeouts
|
||||
"""
|
||||
self.config = config or {}
|
||||
self.base_url = self.config.get("dashboard_url", "http://localhost:8000")
|
||||
self.ws_url = self.config.get("websocket_url", "ws://localhost:8000")
|
||||
self.timeout_ms = self.config.get("test_timeout_ms", 30000)
|
||||
self.studies_dir = Path(self.config.get("studies_dir", "C:/Users/antoi/Atomizer/studies"))
|
||||
|
||||
self._session: Optional[aiohttp.ClientSession] = None
|
||||
self._ws: Optional[aiohttp.ClientWebSocketResponse] = None
|
||||
self._playwright = None
|
||||
self._browser = None
|
||||
|
||||
async def connect(self):
|
||||
"""Initialize connections."""
|
||||
if self._session is None:
|
||||
self._session = aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=self.timeout_ms / 1000)
|
||||
)
|
||||
|
||||
async def disconnect(self):
|
||||
"""Clean up connections."""
|
||||
if self._ws:
|
||||
await self._ws.close()
|
||||
self._ws = None
|
||||
if self._session:
|
||||
await self._session.close()
|
||||
self._session = None
|
||||
if self._browser:
|
||||
await self._browser.close()
|
||||
self._browser = None
|
||||
|
||||
async def run_test_suite(self, scenarios: List[Dict]) -> Dict:
|
||||
"""
|
||||
Run a complete test suite.
|
||||
|
||||
Args:
|
||||
scenarios: List of test scenario dicts
|
||||
|
||||
Returns:
|
||||
Test report as dict
|
||||
"""
|
||||
await self.connect()
|
||||
|
||||
report = TestReport()
|
||||
|
||||
for scenario_dict in scenarios:
|
||||
scenario = self._parse_scenario(scenario_dict)
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
result = await self._execute_scenario(scenario)
|
||||
result.duration_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
report.scenarios.append(result)
|
||||
|
||||
if result.passed:
|
||||
report.summary["passed"] += 1
|
||||
else:
|
||||
report.summary["failed"] += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Scenario {scenario.id} failed with error: {e}")
|
||||
report.scenarios.append(
|
||||
TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=(datetime.now() - start_time).total_seconds() * 1000,
|
||||
error=str(e),
|
||||
)
|
||||
)
|
||||
report.summary["failed"] += 1
|
||||
|
||||
report.summary["total"] += 1
|
||||
|
||||
return {
|
||||
"timestamp": report.timestamp,
|
||||
"scenarios": [self._result_to_dict(r) for r in report.scenarios],
|
||||
"summary": report.summary,
|
||||
}
|
||||
|
||||
def _parse_scenario(self, scenario_dict: Dict) -> TestScenario:
|
||||
"""Parse a scenario dict into TestScenario."""
|
||||
return TestScenario(
|
||||
id=scenario_dict.get("id", "unknown"),
|
||||
name=scenario_dict.get("name", "Unnamed test"),
|
||||
type=scenario_dict.get("type", "api"),
|
||||
steps=scenario_dict.get("steps", []),
|
||||
expected_outcome=scenario_dict.get("expected_outcome", {"status": "pass"}),
|
||||
timeout_ms=scenario_dict.get("timeout_ms", self.timeout_ms),
|
||||
)
|
||||
|
||||
def _result_to_dict(self, result: TestResult) -> Dict:
|
||||
"""Convert TestResult to dict."""
|
||||
return {
|
||||
"scenario_id": result.scenario_id,
|
||||
"scenario_name": result.scenario_name,
|
||||
"passed": result.passed,
|
||||
"duration_ms": result.duration_ms,
|
||||
"error": result.error,
|
||||
"details": result.details,
|
||||
}
|
||||
|
||||
async def _execute_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute a single test scenario."""
|
||||
logger.info(f"Executing test: {scenario.name} ({scenario.type})")
|
||||
|
||||
if scenario.type == "api":
|
||||
return await self._execute_api_scenario(scenario)
|
||||
elif scenario.type == "browser":
|
||||
return await self._execute_browser_scenario(scenario)
|
||||
elif scenario.type == "cli":
|
||||
return await self._execute_cli_scenario(scenario)
|
||||
elif scenario.type == "filesystem":
|
||||
return await self._execute_filesystem_scenario(scenario)
|
||||
else:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Unknown test type: {scenario.type}",
|
||||
)
|
||||
|
||||
async def _execute_api_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute an API test scenario."""
|
||||
details = {}
|
||||
|
||||
for step in scenario.steps:
|
||||
action = step.get("action", "get").lower()
|
||||
endpoint = step.get("endpoint", step.get("target", "/"))
|
||||
data = step.get("data")
|
||||
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
|
||||
try:
|
||||
if action == "get":
|
||||
async with self._session.get(url) as resp:
|
||||
details["status_code"] = resp.status
|
||||
details["response"] = await resp.json()
|
||||
|
||||
elif action == "post":
|
||||
async with self._session.post(url, json=data) as resp:
|
||||
details["status_code"] = resp.status
|
||||
details["response"] = await resp.json()
|
||||
|
||||
elif action == "put":
|
||||
async with self._session.put(url, json=data) as resp:
|
||||
details["status_code"] = resp.status
|
||||
details["response"] = await resp.json()
|
||||
|
||||
elif action == "delete":
|
||||
async with self._session.delete(url) as resp:
|
||||
details["status_code"] = resp.status
|
||||
details["response"] = await resp.json()
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"API request failed: {e}",
|
||||
details={"url": url, "action": action},
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
details["response"] = "Non-JSON response"
|
||||
|
||||
# Check expected outcome
|
||||
passed = self._check_outcome(details, scenario.expected_outcome)
|
||||
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=passed,
|
||||
duration_ms=0,
|
||||
details=details,
|
||||
)
|
||||
|
||||
async def _execute_browser_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute a browser test scenario using Playwright."""
|
||||
try:
|
||||
from playwright.async_api import async_playwright
|
||||
except ImportError:
|
||||
logger.warning("Playwright not available, skipping browser test")
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=True, # Skip, don't fail
|
||||
duration_ms=0,
|
||||
error="Playwright not installed - test skipped",
|
||||
)
|
||||
|
||||
details = {}
|
||||
|
||||
try:
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
page = await browser.new_page()
|
||||
|
||||
for step in scenario.steps:
|
||||
action = step.get("action", "navigate")
|
||||
|
||||
if action == "navigate":
|
||||
url = step.get("url", "/")
|
||||
# Use frontend URL (port 3003 for Vite dev server)
|
||||
full_url = f"http://localhost:3003{url}" if url.startswith("/") else url
|
||||
await page.goto(full_url, timeout=scenario.timeout_ms)
|
||||
details["navigated_to"] = full_url
|
||||
|
||||
elif action == "wait_for":
|
||||
selector = step.get("selector")
|
||||
if selector:
|
||||
await page.wait_for_selector(selector, timeout=scenario.timeout_ms)
|
||||
details["found_selector"] = selector
|
||||
|
||||
elif action == "click":
|
||||
selector = step.get("selector")
|
||||
if selector:
|
||||
await page.click(selector)
|
||||
details["clicked"] = selector
|
||||
|
||||
elif action == "fill":
|
||||
selector = step.get("selector")
|
||||
value = step.get("value", "")
|
||||
if selector:
|
||||
await page.fill(selector, value)
|
||||
details["filled"] = {selector: value}
|
||||
|
||||
elif action == "screenshot":
|
||||
path = step.get("path", f"test_{scenario.id}.png")
|
||||
await page.screenshot(path=path)
|
||||
details["screenshot"] = path
|
||||
|
||||
await browser.close()
|
||||
|
||||
passed = True
|
||||
|
||||
except Exception as e:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Browser test failed: {e}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=passed,
|
||||
duration_ms=0,
|
||||
details=details,
|
||||
)
|
||||
|
||||
async def _execute_cli_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute a CLI test scenario."""
|
||||
details = {}
|
||||
|
||||
for step in scenario.steps:
|
||||
command = step.get("command", step.get("target", ""))
|
||||
cwd = step.get("cwd", str(self.studies_dir))
|
||||
|
||||
if not command:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Use PowerShell on Windows
|
||||
result = subprocess.run(
|
||||
["powershell", "-Command", command],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=cwd,
|
||||
timeout=scenario.timeout_ms / 1000,
|
||||
)
|
||||
|
||||
details["command"] = command
|
||||
details["returncode"] = result.returncode
|
||||
details["stdout"] = result.stdout[:1000] if result.stdout else ""
|
||||
details["stderr"] = result.stderr[:1000] if result.stderr else ""
|
||||
|
||||
if result.returncode != 0:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Command failed with code {result.returncode}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Command timed out after {scenario.timeout_ms}ms",
|
||||
details={"command": command},
|
||||
)
|
||||
except Exception as e:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"CLI execution failed: {e}",
|
||||
details={"command": command},
|
||||
)
|
||||
|
||||
passed = self._check_outcome(details, scenario.expected_outcome)
|
||||
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=passed,
|
||||
duration_ms=0,
|
||||
details=details,
|
||||
)
|
||||
|
||||
async def _execute_filesystem_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute a filesystem test scenario."""
|
||||
details = {}
|
||||
|
||||
for step in scenario.steps:
|
||||
action = step.get("action", "check_exists")
|
||||
path_str = step.get("path", "")
|
||||
|
||||
# Resolve relative paths
|
||||
if not Path(path_str).is_absolute():
|
||||
path = self.studies_dir.parent / path_str
|
||||
else:
|
||||
path = Path(path_str)
|
||||
|
||||
if action == "check_exists":
|
||||
exists = path.exists()
|
||||
details["path"] = str(path)
|
||||
details["exists"] = exists
|
||||
|
||||
if scenario.expected_outcome.get("exists", True) != exists:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Path {'does not exist' if not exists else 'exists but should not'}: {path}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
elif action == "check_file_contains":
|
||||
content_check = step.get("contains", "")
|
||||
if path.exists() and path.is_file():
|
||||
content = path.read_text()
|
||||
contains = content_check in content
|
||||
details["contains"] = contains
|
||||
details["search_term"] = content_check
|
||||
|
||||
if not contains:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"File does not contain: {content_check}",
|
||||
details=details,
|
||||
)
|
||||
else:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"File not found: {path}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
elif action == "check_json_valid":
|
||||
if path.exists() and path.is_file():
|
||||
try:
|
||||
with open(path) as f:
|
||||
json.load(f)
|
||||
details["valid_json"] = True
|
||||
except json.JSONDecodeError as e:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Invalid JSON: {e}",
|
||||
details={"path": str(path)},
|
||||
)
|
||||
else:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"File not found: {path}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=True,
|
||||
duration_ms=0,
|
||||
details=details,
|
||||
)
|
||||
|
||||
def _check_outcome(self, details: Dict, expected: Dict) -> bool:
|
||||
"""Check if test details match expected outcome."""
|
||||
for key, expected_value in expected.items():
|
||||
if key not in details:
|
||||
continue
|
||||
|
||||
actual_value = details[key]
|
||||
|
||||
# Handle nested dicts
|
||||
if isinstance(expected_value, dict) and isinstance(actual_value, dict):
|
||||
if not self._check_outcome(actual_value, expected_value):
|
||||
return False
|
||||
# Handle lists
|
||||
elif isinstance(expected_value, list) and isinstance(actual_value, list):
|
||||
if expected_value != actual_value:
|
||||
return False
|
||||
# Handle simple values
|
||||
elif actual_value != expected_value:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
async def verify_fix(self, fix: Dict) -> Dict:
|
||||
"""
|
||||
Verify that a specific fix was successful.
|
||||
|
||||
Args:
|
||||
fix: Fix dict with issue_id and files_modified
|
||||
|
||||
Returns:
|
||||
Verification result
|
||||
"""
|
||||
issue_id = fix.get("issue_id", "unknown")
|
||||
files_modified = fix.get("files_modified", [])
|
||||
|
||||
# Run quick verification
|
||||
passed = True
|
||||
details = {}
|
||||
|
||||
# Check that modified files exist
|
||||
for file_path in files_modified:
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
passed = False
|
||||
details["missing_file"] = str(path)
|
||||
break
|
||||
|
||||
# Could add more sophisticated verification here
|
||||
|
||||
return {
|
||||
"issue_id": issue_id,
|
||||
"passed": passed,
|
||||
"details": details,
|
||||
}
|
||||
|
||||
async def run_health_check(self) -> Dict:
|
||||
"""
|
||||
Run a quick health check on dashboard components.
|
||||
|
||||
Returns:
|
||||
Health status dict
|
||||
"""
|
||||
await self.connect()
|
||||
|
||||
health = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"api": "unknown",
|
||||
"frontend": "unknown",
|
||||
"websocket": "unknown",
|
||||
}
|
||||
|
||||
# Check API
|
||||
try:
|
||||
async with self._session.get(f"{self.base_url}/health") as resp:
|
||||
if resp.status == 200:
|
||||
health["api"] = "healthy"
|
||||
else:
|
||||
health["api"] = f"unhealthy (status {resp.status})"
|
||||
except Exception as e:
|
||||
health["api"] = f"error: {e}"
|
||||
|
||||
# Check frontend (if available)
|
||||
try:
|
||||
async with self._session.get("http://localhost:3000") as resp:
|
||||
if resp.status == 200:
|
||||
health["frontend"] = "healthy"
|
||||
else:
|
||||
health["frontend"] = f"unhealthy (status {resp.status})"
|
||||
except Exception as e:
|
||||
health["frontend"] = f"error: {e}"
|
||||
|
||||
return health
|
||||
Reference in New Issue
Block a user