feat: Add DevLoop automation and HTML Reports

## DevLoop - Closed-Loop Development System
- Orchestrator for plan → build → test → analyze cycle
- Gemini planning via OpenCode CLI
- Claude implementation via CLI bridge
- Playwright browser testing integration
- Test runner with API, filesystem, and browser tests
- Persistent state in .devloop/ directory
- CLI tool: tools/devloop_cli.py

Usage:
  python tools/devloop_cli.py start 'Create new feature'
  python tools/devloop_cli.py plan 'Fix bug in X'
  python tools/devloop_cli.py test --study support_arm
  python tools/devloop_cli.py browser --level full

## HTML Reports (optimization_engine/reporting/)
- Interactive Plotly-based reports
- Convergence plot, Pareto front, parallel coordinates
- Parameter importance analysis
- Self-contained HTML (offline-capable)
- Tailwind CSS styling

## Playwright E2E Tests
- Home page tests
- Test results in test-results/

## LAC Knowledge Base Updates
- Session insights (failures, workarounds, patterns)
- Optimization memory for arm support study
This commit is contained in:
2026-01-24 21:18:18 -05:00
parent a3f18dc377
commit 3193831340
24 changed files with 6437 additions and 0 deletions

View File

@@ -0,0 +1,68 @@
"""
Atomizer DevLoop - Closed-Loop Development System
This module provides autonomous development cycle capabilities:
1. Gemini Pro for strategic planning and analysis
2. Claude Code (Opus 4.5) for implementation
3. Dashboard testing for verification
4. LAC integration for persistent learning
The DevLoop orchestrates the full cycle:
PLAN (Gemini) -> BUILD (Claude) -> TEST (Dashboard) -> ANALYZE (Gemini) -> FIX (Claude) -> VERIFY
Example usage:
from optimization_engine.devloop import DevLoopOrchestrator
orchestrator = DevLoopOrchestrator()
result = await orchestrator.run_development_cycle(
objective="Create support_arm optimization study"
)
"""
# Lazy imports to avoid circular dependencies
def __getattr__(name):
if name == "DevLoopOrchestrator":
from .orchestrator import DevLoopOrchestrator
return DevLoopOrchestrator
elif name == "LoopPhase":
from .orchestrator import LoopPhase
return LoopPhase
elif name == "LoopState":
from .orchestrator import LoopState
return LoopState
elif name == "DashboardTestRunner":
from .test_runner import DashboardTestRunner
return DashboardTestRunner
elif name == "TestScenario":
from .test_runner import TestScenario
return TestScenario
elif name == "GeminiPlanner":
from .planning import GeminiPlanner
return GeminiPlanner
elif name == "ProblemAnalyzer":
from .analyzer import ProblemAnalyzer
return ProblemAnalyzer
elif name == "ClaudeCodeBridge":
from .claude_bridge import ClaudeCodeBridge
return ClaudeCodeBridge
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
__all__ = [
"DevLoopOrchestrator",
"LoopPhase",
"LoopState",
"DashboardTestRunner",
"TestScenario",
"GeminiPlanner",
"ProblemAnalyzer",
]

View File

@@ -0,0 +1,421 @@
"""
Problem Analyzer - Analyze test results and generate fix plans using Gemini.
Handles:
- Root cause analysis from test failures
- Pattern detection across failures
- Fix plan generation
- Priority assessment
"""
import asyncio
import json
import logging
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class Issue:
"""A detected issue from test results."""
id: str
description: str
severity: str = "medium" # "critical", "high", "medium", "low"
category: str = "unknown"
affected_files: List[str] = field(default_factory=list)
test_ids: List[str] = field(default_factory=list)
root_cause: Optional[str] = None
@dataclass
class FixPlan:
"""Plan for fixing an issue."""
issue_id: str
approach: str
steps: List[Dict] = field(default_factory=list)
estimated_effort: str = "medium"
rollback_steps: List[str] = field(default_factory=list)
@dataclass
class AnalysisReport:
"""Complete analysis report."""
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
issues_found: bool = False
issues: List[Issue] = field(default_factory=list)
fix_plans: Dict[str, FixPlan] = field(default_factory=dict)
patterns: List[Dict] = field(default_factory=list)
recommendations: List[str] = field(default_factory=list)
class ProblemAnalyzer:
"""
Gemini-powered analysis of test failures and improvement opportunities.
Capabilities:
- Deep analysis of test results
- Root cause identification
- Pattern detection across failures
- Fix plan generation with priority
"""
def __init__(self, gemini_planner: Optional[Any] = None):
"""
Initialize the analyzer.
Args:
gemini_planner: GeminiPlanner instance for API access
"""
self._planner = gemini_planner
self._history: List[AnalysisReport] = []
@property
def planner(self):
"""Get or create Gemini planner."""
if self._planner is None:
from .planning import GeminiPlanner
self._planner = GeminiPlanner()
return self._planner
async def analyze_test_results(self, test_report: Dict) -> Dict:
"""
Perform deep analysis of test results.
Args:
test_report: Test report from DashboardTestRunner
Returns:
Analysis dict with issues, fix_plans, patterns
"""
summary = test_report.get("summary", {})
scenarios = test_report.get("scenarios", [])
# Quick return if all passed
if summary.get("failed", 0) == 0:
return {
"issues_found": False,
"issues": [],
"fix_plans": {},
"patterns": [],
"recommendations": ["All tests passed!"],
}
# Analyze failures
failures = [s for s in scenarios if not s.get("passed", True)]
# Use Gemini for deep analysis if available
if self.planner.client != "mock":
return await self._gemini_analysis(test_report, failures)
else:
return self._rule_based_analysis(test_report, failures)
async def _gemini_analysis(self, test_report: Dict, failures: List[Dict]) -> Dict:
"""Use Gemini for sophisticated analysis."""
prompt = self._build_analysis_prompt(test_report, failures)
try:
loop = asyncio.get_event_loop()
response = await loop.run_in_executor(
None, lambda: self.planner._model.generate_content(prompt)
)
text = response.text
# Parse JSON from response
if "```json" in text:
start = text.find("```json") + 7
end = text.find("```", start)
json_str = text[start:end].strip()
analysis = json.loads(json_str)
else:
analysis = self._rule_based_analysis(test_report, failures)
logger.info(f"Gemini analysis found {len(analysis.get('issues', []))} issues")
return analysis
except Exception as e:
logger.error(f"Gemini analysis failed: {e}, falling back to rule-based")
return self._rule_based_analysis(test_report, failures)
def _build_analysis_prompt(self, test_report: Dict, failures: List[Dict]) -> str:
"""Build analysis prompt for Gemini."""
return f"""## Test Failure Analysis
### Test Report Summary
- Total Tests: {test_report.get("summary", {}).get("total", 0)}
- Passed: {test_report.get("summary", {}).get("passed", 0)}
- Failed: {test_report.get("summary", {}).get("failed", 0)}
### Failed Tests
{json.dumps(failures, indent=2)}
### Analysis Required
Analyze these test failures and provide:
1. **Root Cause Analysis**: What caused each failure?
2. **Pattern Detection**: Are there recurring issues?
3. **Fix Priority**: Which issues should be addressed first?
4. **Implementation Plan**: Specific code changes needed
Output as JSON:
```json
{{
"issues_found": true,
"issues": [
{{
"id": "issue_001",
"description": "What went wrong",
"severity": "high|medium|low",
"category": "api|ui|config|filesystem|logic",
"affected_files": ["path/to/file.py"],
"test_ids": ["test_001"],
"root_cause": "Why it happened"
}}
],
"fix_plans": {{
"issue_001": {{
"issue_id": "issue_001",
"approach": "How to fix it",
"steps": [
{{"action": "edit", "file": "path/to/file.py", "description": "Change X to Y"}}
],
"estimated_effort": "low|medium|high",
"rollback_steps": ["How to undo if needed"]
}}
}},
"patterns": [
{{"pattern": "Common issue type", "occurrences": 3, "suggestion": "Systemic fix"}}
],
"recommendations": [
"High-level improvement suggestions"
]
}}
```
Focus on actionable, specific fixes that Claude Code can implement.
"""
def _rule_based_analysis(self, test_report: Dict, failures: List[Dict]) -> Dict:
"""Rule-based analysis when Gemini is not available."""
issues = []
fix_plans = {}
patterns = []
# Categorize failures
api_failures = []
filesystem_failures = []
browser_failures = []
cli_failures = []
for failure in failures:
scenario_id = failure.get("scenario_id", "unknown")
error = failure.get("error", "")
details = failure.get("details", {})
# Detect issue type
if "api" in scenario_id.lower() or "status_code" in details:
api_failures.append(failure)
elif "filesystem" in scenario_id.lower() or "exists" in details:
filesystem_failures.append(failure)
elif "browser" in scenario_id.lower():
browser_failures.append(failure)
elif "cli" in scenario_id.lower() or "command" in details:
cli_failures.append(failure)
# Generate issues for API failures
for i, failure in enumerate(api_failures):
issue_id = f"api_issue_{i + 1}"
status = failure.get("details", {}).get("status_code", "unknown")
issues.append(
{
"id": issue_id,
"description": f"API request failed with status {status}",
"severity": "high" if status in [500, 503] else "medium",
"category": "api",
"affected_files": self._guess_api_files(failure),
"test_ids": [failure.get("scenario_id")],
"root_cause": failure.get("error", "Unknown API error"),
}
)
fix_plans[issue_id] = {
"issue_id": issue_id,
"approach": "Check API endpoint implementation",
"steps": [
{"action": "check", "description": "Verify endpoint exists in routes"},
{"action": "test", "description": "Run endpoint manually with curl"},
],
"estimated_effort": "medium",
"rollback_steps": [],
}
# Generate issues for filesystem failures
for i, failure in enumerate(filesystem_failures):
issue_id = f"fs_issue_{i + 1}"
path = failure.get("details", {}).get("path", "unknown path")
issues.append(
{
"id": issue_id,
"description": f"Expected file/directory not found: {path}",
"severity": "high",
"category": "filesystem",
"affected_files": [path],
"test_ids": [failure.get("scenario_id")],
"root_cause": "File was not created during implementation",
}
)
fix_plans[issue_id] = {
"issue_id": issue_id,
"approach": "Create missing file/directory",
"steps": [
{"action": "create", "path": path, "description": f"Create {path}"},
],
"estimated_effort": "low",
"rollback_steps": [f"Remove {path}"],
}
# Detect patterns
if len(api_failures) > 1:
patterns.append(
{
"pattern": "Multiple API failures",
"occurrences": len(api_failures),
"suggestion": "Check if backend server is running",
}
)
if len(filesystem_failures) > 1:
patterns.append(
{
"pattern": "Multiple missing files",
"occurrences": len(filesystem_failures),
"suggestion": "Review study creation process",
}
)
# Generate recommendations
recommendations = []
if api_failures:
recommendations.append("Verify backend API is running on port 8000")
if filesystem_failures:
recommendations.append("Check that study directory structure is correctly created")
if browser_failures:
recommendations.append("Ensure frontend is running on port 3000")
if cli_failures:
recommendations.append("Check Python environment and script paths")
return {
"issues_found": len(issues) > 0,
"issues": issues,
"fix_plans": fix_plans,
"patterns": patterns,
"recommendations": recommendations,
}
def _guess_api_files(self, failure: Dict) -> List[str]:
"""Guess which API files might be affected."""
endpoint = failure.get("details", {}).get("response", {})
# Common API file patterns
return [
"atomizer-dashboard/backend/api/routes/",
"atomizer-dashboard/backend/api/services/",
]
async def analyze_iteration_history(self, iterations: List[Dict]) -> Dict:
"""
Analyze patterns across multiple iterations.
Args:
iterations: List of IterationResult dicts
Returns:
Cross-iteration analysis
"""
recurring_issues = {}
success_rate = 0
for iteration in iterations:
if iteration.get("success"):
success_rate += 1
# Track recurring issues
analysis = iteration.get("analysis", {})
for issue in analysis.get("issues", []):
issue_type = issue.get("category", "unknown")
if issue_type not in recurring_issues:
recurring_issues[issue_type] = 0
recurring_issues[issue_type] += 1
total = len(iterations) or 1
return {
"total_iterations": len(iterations),
"success_rate": success_rate / total,
"recurring_issues": recurring_issues,
"most_common_issue": max(recurring_issues, key=recurring_issues.get)
if recurring_issues
else None,
"recommendation": self._generate_meta_recommendation(
recurring_issues, success_rate / total
),
}
def _generate_meta_recommendation(self, recurring_issues: Dict, success_rate: float) -> str:
"""Generate high-level recommendation based on iteration history."""
if success_rate >= 0.8:
return "Development cycle is healthy. Minor issues detected."
elif success_rate >= 0.5:
most_common = (
max(recurring_issues, key=recurring_issues.get) if recurring_issues else "unknown"
)
return f"Focus on fixing {most_common} issues to improve success rate."
else:
return (
"Development cycle needs attention. Consider reviewing architecture or test design."
)
def get_priority_queue(self, analysis: Dict) -> List[Dict]:
"""
Get issues sorted by priority for fixing.
Args:
analysis: Analysis result dict
Returns:
Sorted list of issues with their fix plans
"""
issues = analysis.get("issues", [])
fix_plans = analysis.get("fix_plans", {})
# Priority order
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
# Sort by severity
sorted_issues = sorted(
issues, key=lambda x: severity_order.get(x.get("severity", "medium"), 2)
)
# Attach fix plans
queue = []
for issue in sorted_issues:
issue_id = issue.get("id")
queue.append(
{
"issue": issue,
"fix_plan": fix_plans.get(issue_id),
}
)
return queue

View File

@@ -0,0 +1,170 @@
"""
Browser Test Scenarios for DevLoop
Pre-built Playwright scenarios that can be used for dashboard verification.
These scenarios use the same structure as DashboardTestRunner browser tests
but provide ready-made tests for common dashboard operations.
"""
from typing import Dict, List
def get_study_browser_scenarios(study_name: str) -> List[Dict]:
"""
Get browser test scenarios for a specific study.
Args:
study_name: The study to test
Returns:
List of browser test scenarios
"""
return [
{
"id": "browser_home_loads",
"name": "Home page loads with studies",
"type": "browser",
"steps": [
{"action": "navigate", "url": "/"},
{"action": "wait_for", "selector": "text=Studies"},
{"action": "wait_for", "selector": "button:has-text('trials')"},
],
"expected_outcome": {"status": "pass"},
"timeout_ms": 15000,
},
{
"id": "browser_canvas_loads",
"name": f"Canvas loads for {study_name}",
"type": "browser",
"steps": [
{"action": "navigate", "url": f"/canvas/{study_name}"},
# Wait for ReactFlow nodes to render
{"action": "wait_for", "selector": ".react-flow__node"},
],
"expected_outcome": {"status": "pass"},
"timeout_ms": 20000,
},
{
"id": "browser_dashboard_loads",
"name": f"Dashboard loads for {study_name}",
"type": "browser",
"steps": [
{"action": "navigate", "url": f"/dashboard"},
# Wait for dashboard main element to load
{"action": "wait_for", "selector": "main"},
],
"expected_outcome": {"status": "pass"},
"timeout_ms": 15000,
},
]
def get_ui_verification_scenarios() -> List[Dict]:
"""
Get scenarios for verifying UI components.
These are general UI health checks, not study-specific.
"""
return [
{
"id": "browser_home_stats",
"name": "Home page shows statistics",
"type": "browser",
"steps": [
{"action": "navigate", "url": "/"},
{"action": "wait_for", "selector": "text=Total Studies"},
{"action": "wait_for", "selector": "text=Running"},
{"action": "wait_for", "selector": "text=Total Trials"},
],
"expected_outcome": {"status": "pass"},
"timeout_ms": 10000,
},
{
"id": "browser_expand_folder",
"name": "Topic folder expands on click",
"type": "browser",
"steps": [
{"action": "navigate", "url": "/"},
{"action": "wait_for", "selector": "button:has-text('trials')"},
{"action": "click", "selector": "button:has-text('trials')"},
# After click, should see study status badges
{
"action": "wait_for",
"selector": "span:has-text('completed'), span:has-text('running'), span:has-text('paused')",
},
],
"expected_outcome": {"status": "pass"},
"timeout_ms": 10000,
},
]
def get_chat_verification_scenarios() -> List[Dict]:
"""
Get scenarios for verifying chat/Claude integration.
"""
return [
{
"id": "browser_chat_panel",
"name": "Chat panel opens",
"type": "browser",
"steps": [
{"action": "navigate", "url": "/canvas/support_arm"},
{"action": "wait_for", "selector": ".react-flow__node"},
# Look for chat toggle or chat panel
{
"action": "click",
"selector": "button[aria-label='Chat'], button:has-text('Chat')",
},
{"action": "wait_for", "selector": "textarea, input[type='text']"},
],
"expected_outcome": {"status": "pass"},
"timeout_ms": 15000,
},
]
# Standard scenario sets
STANDARD_BROWSER_SCENARIOS: Dict[str, List[Dict]] = {
"quick": [
{
"id": "browser_smoke",
"name": "Dashboard smoke test",
"type": "browser",
"steps": [
{"action": "navigate", "url": "/"},
{"action": "wait_for", "selector": "text=Studies"},
],
"expected_outcome": {"status": "pass"},
"timeout_ms": 10000,
}
],
"home": get_ui_verification_scenarios(),
"full": get_ui_verification_scenarios() + get_study_browser_scenarios("support_arm"),
}
def get_browser_scenarios(level: str = "quick", study_name: str = None) -> List[Dict]:
"""
Get browser scenarios by level.
Args:
level: "quick" (smoke), "home" (home page), "full" (all scenarios)
study_name: Optional study name for study-specific tests
Returns:
List of browser test scenarios
"""
if level == "quick":
return STANDARD_BROWSER_SCENARIOS["quick"]
elif level == "home":
return STANDARD_BROWSER_SCENARIOS["home"]
elif level == "full":
scenarios = list(STANDARD_BROWSER_SCENARIOS["full"])
if study_name:
scenarios.extend(get_study_browser_scenarios(study_name))
return scenarios
elif level == "study" and study_name:
return get_study_browser_scenarios(study_name)
else:
return STANDARD_BROWSER_SCENARIOS["quick"]

View File

@@ -0,0 +1,392 @@
"""
Claude Code Bridge - Interface between DevLoop and Claude Code execution.
Handles:
- Translating Gemini plans into Claude Code instructions
- Executing code changes through OpenCode extension or CLI
- Capturing implementation results
"""
import asyncio
import json
import logging
import os
import subprocess
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class ImplementationResult:
"""Result of a Claude Code implementation."""
status: str # "success", "partial", "error"
files_modified: List[str]
warnings: List[str]
errors: List[str]
duration_seconds: float
class ClaudeCodeBridge:
"""
Bridge between Gemini plans and Claude Code execution.
Supports multiple execution modes:
- CLI: Direct Claude Code CLI invocation
- API: Anthropic API for code generation (if API key available)
- Manual: Generate instructions for human execution
"""
def __init__(self, config: Optional[Dict] = None):
"""
Initialize the bridge.
Args:
config: Configuration with execution mode and API settings
"""
self.config = config or {}
self.workspace = Path(self.config.get("workspace", "C:/Users/antoi/Atomizer"))
self.execution_mode = self.config.get("mode", "cli")
self._client = None
@property
def client(self):
"""Lazy-load Anthropic client if API mode."""
if self._client is None and self.execution_mode == "api":
try:
import anthropic
api_key = self.config.get("api_key") or os.environ.get("ANTHROPIC_API_KEY")
if api_key:
self._client = anthropic.Anthropic(api_key=api_key)
logger.info("Anthropic client initialized")
except ImportError:
logger.warning("anthropic package not installed")
return self._client
def create_implementation_session(self, plan: Dict) -> str:
"""
Generate Claude Code instruction from Gemini plan.
Args:
plan: Plan dict from GeminiPlanner
Returns:
Formatted instruction string for Claude Code
"""
objective = plan.get("objective", "Unknown objective")
approach = plan.get("approach", "")
tasks = plan.get("tasks", [])
acceptance_criteria = plan.get("acceptance_criteria", [])
instruction = f"""## Implementation Task: {objective}
### Approach
{approach}
### Tasks to Complete
"""
for i, task in enumerate(tasks, 1):
instruction += f"""
{i}. **{task.get("description", "Task")}**
- File: `{task.get("file", "TBD")}`
- Priority: {task.get("priority", "medium")}
"""
if task.get("code_hint"):
instruction += f" - Hint: {task.get('code_hint')}\n"
if task.get("dependencies"):
instruction += f" - Depends on: {', '.join(task['dependencies'])}\n"
instruction += """
### Acceptance Criteria
"""
for criterion in acceptance_criteria:
instruction += f"- [ ] {criterion}\n"
instruction += """
### Constraints
- Maintain existing API contracts
- Follow Atomizer coding standards
- Ensure AtomizerSpec v2.0 compatibility
- Create README.md for any new study
- Use existing extractors from SYS_12 when possible
"""
return instruction
async def execute_plan(self, plan: Dict) -> Dict:
"""
Execute an implementation plan.
Args:
plan: Plan dict from GeminiPlanner
Returns:
Implementation result dict
"""
instruction = self.create_implementation_session(plan)
if self.execution_mode == "cli":
return await self._execute_via_cli(instruction, plan)
elif self.execution_mode == "api":
return await self._execute_via_api(instruction, plan)
else:
return await self._execute_manual(instruction, plan)
async def _execute_via_cli(self, instruction: str, plan: Dict) -> Dict:
"""Execute through Claude Code CLI."""
start_time = datetime.now()
# Write instruction to temp file
instruction_file = self.workspace / ".devloop_instruction.md"
instruction_file.write_text(instruction)
files_modified = []
warnings = []
errors = []
try:
# Try to invoke Claude Code CLI
# Note: This assumes claude-code or similar CLI is available
result = subprocess.run(
[
"powershell",
"-Command",
f"cd {self.workspace}; claude --print '{instruction_file}'",
],
capture_output=True,
text=True,
timeout=300, # 5 minute timeout
cwd=str(self.workspace),
)
if result.returncode == 0:
# Parse output for modified files
output = result.stdout
for line in output.split("\n"):
if "Modified:" in line or "Created:" in line:
parts = line.split(":", 1)
if len(parts) > 1:
files_modified.append(parts[1].strip())
status = "success"
else:
errors.append(result.stderr or "CLI execution failed")
status = "error"
except subprocess.TimeoutExpired:
errors.append("CLI execution timed out after 5 minutes")
status = "error"
except FileNotFoundError:
# Claude CLI not found, fall back to manual mode
logger.warning("Claude CLI not found, switching to manual mode")
return await self._execute_manual(instruction, plan)
except Exception as e:
errors.append(str(e))
status = "error"
finally:
# Clean up temp file
if instruction_file.exists():
instruction_file.unlink()
duration = (datetime.now() - start_time).total_seconds()
return {
"status": status,
"files": files_modified,
"warnings": warnings,
"errors": errors,
"duration_seconds": duration,
}
async def _execute_via_api(self, instruction: str, plan: Dict) -> Dict:
"""Execute through Anthropic API for code generation."""
if not self.client:
return await self._execute_manual(instruction, plan)
start_time = datetime.now()
files_modified = []
warnings = []
errors = []
try:
# Use Claude API for code generation
response = self.client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=8192,
messages=[
{
"role": "user",
"content": f"""You are implementing code for the Atomizer FEA optimization framework.
{instruction}
For each file that needs to be created or modified, output the complete file content in this format:
### FILE: path/to/file.py
```python
# file content here
```
Be thorough and implement all tasks completely.
""",
}
],
)
# Parse response for file contents
content = response.content[0].text
# Extract files from response
import re
file_pattern = r"### FILE: (.+?)\n```\w*\n(.*?)```"
matches = re.findall(file_pattern, content, re.DOTALL)
for file_path, file_content in matches:
try:
full_path = self.workspace / file_path.strip()
full_path.parent.mkdir(parents=True, exist_ok=True)
full_path.write_text(file_content.strip())
files_modified.append(str(file_path.strip()))
logger.info(f"Created/modified: {file_path}")
except Exception as e:
errors.append(f"Failed to write {file_path}: {e}")
status = "success" if files_modified else "partial"
except Exception as e:
errors.append(str(e))
status = "error"
duration = (datetime.now() - start_time).total_seconds()
return {
"status": status,
"files": files_modified,
"warnings": warnings,
"errors": errors,
"duration_seconds": duration,
}
async def _execute_manual(self, instruction: str, plan: Dict) -> Dict:
"""
Generate manual instructions (when automation not available).
Saves instruction to file for human execution.
"""
start_time = datetime.now()
# Save instruction for manual execution
output_file = self.workspace / ".devloop" / "pending_instruction.md"
output_file.parent.mkdir(parents=True, exist_ok=True)
output_file.write_text(instruction)
logger.info(f"Manual instruction saved to: {output_file}")
return {
"status": "pending_manual",
"instruction_file": str(output_file),
"files": [],
"warnings": ["Automated execution not available. Please execute manually."],
"errors": [],
"duration_seconds": (datetime.now() - start_time).total_seconds(),
}
async def execute_fix(self, fix_plan: Dict) -> Dict:
"""
Execute a specific fix from analysis.
Args:
fix_plan: Fix plan dict from ProblemAnalyzer
Returns:
Fix result dict
"""
issue_id = fix_plan.get("issue_id", "unknown")
approach = fix_plan.get("approach", "")
steps = fix_plan.get("steps", [])
instruction = f"""## Bug Fix: {issue_id}
### Approach
{approach}
### Steps
"""
for i, step in enumerate(steps, 1):
instruction += f"{i}. {step.get('description', step.get('action', 'Step'))}\n"
if step.get("file"):
instruction += f" File: `{step['file']}`\n"
instruction += """
### Verification
After implementing the fix, verify that:
1. The specific test case passes
2. No regressions are introduced
3. Code follows Atomizer patterns
"""
# Execute as a mini-plan
return await self.execute_plan(
{
"objective": f"Fix: {issue_id}",
"approach": approach,
"tasks": [
{
"description": step.get("description", step.get("action")),
"file": step.get("file"),
"priority": "high",
}
for step in steps
],
"acceptance_criteria": [
"Original test passes",
"No new errors introduced",
],
}
)
def get_execution_status(self) -> Dict:
"""Get current execution status."""
pending_file = self.workspace / ".devloop" / "pending_instruction.md"
return {
"mode": self.execution_mode,
"workspace": str(self.workspace),
"has_pending_instruction": pending_file.exists(),
"api_available": self.client is not None,
}
async def verify_implementation(self, expected_files: List[str]) -> Dict:
"""
Verify that implementation created expected files.
Args:
expected_files: List of file paths that should exist
Returns:
Verification result
"""
missing = []
found = []
for file_path in expected_files:
path = (
self.workspace / file_path if not Path(file_path).is_absolute() else Path(file_path)
)
if path.exists():
found.append(str(file_path))
else:
missing.append(str(file_path))
return {
"complete": len(missing) == 0,
"found": found,
"missing": missing,
}

View File

@@ -0,0 +1,652 @@
"""
CLI Bridge - Execute AI tasks through Claude Code CLI and OpenCode CLI.
Uses your existing subscriptions via CLI tools:
- Claude Code CLI (claude.exe) for implementation
- OpenCode CLI (opencode) for Gemini planning
No API keys needed - leverages your CLI subscriptions.
"""
import asyncio
import json
import logging
import os
import subprocess
import tempfile
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import re
logger = logging.getLogger(__name__)
@dataclass
class CLIResult:
"""Result from CLI execution."""
success: bool
output: str
error: str
duration_seconds: float
files_modified: List[str]
class ClaudeCodeCLI:
"""
Execute tasks through Claude Code CLI.
Uses: claude.exe --print for non-interactive execution
"""
CLAUDE_PATH = r"C:\Users\antoi\.local\bin\claude.exe"
def __init__(self, workspace: Path):
self.workspace = workspace
async def execute(
self,
prompt: str,
timeout: int = 300,
model: str = "opus",
) -> CLIResult:
"""
Execute a prompt through Claude Code CLI.
Args:
prompt: The instruction/prompt to execute
timeout: Timeout in seconds
model: Model to use (opus, sonnet, haiku)
Returns:
CLIResult with output and modified files
"""
start_time = datetime.now()
# Build command
cmd = [
self.CLAUDE_PATH,
"--print", # Non-interactive mode
"--model",
model,
"--permission-mode",
"acceptEdits", # Auto-accept edits
prompt,
]
logger.info(f"Executing Claude Code CLI: {prompt[:100]}...")
try:
# Run in workspace directory
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
cwd=str(self.workspace),
env={**os.environ, "TERM": "dumb"}, # Disable colors
)
output = result.stdout
error = result.stderr
success = result.returncode == 0
# Extract modified files from output
files_modified = self._extract_modified_files(output)
duration = (datetime.now() - start_time).total_seconds()
logger.info(
f"Claude Code completed in {duration:.1f}s, modified {len(files_modified)} files"
)
return CLIResult(
success=success,
output=output,
error=error,
duration_seconds=duration,
files_modified=files_modified,
)
except subprocess.TimeoutExpired:
return CLIResult(
success=False,
output="",
error=f"Timeout after {timeout}s",
duration_seconds=timeout,
files_modified=[],
)
except Exception as e:
return CLIResult(
success=False,
output="",
error=str(e),
duration_seconds=(datetime.now() - start_time).total_seconds(),
files_modified=[],
)
def _extract_modified_files(self, output: str) -> List[str]:
"""Extract list of modified files from Claude Code output."""
files = []
# Look for file modification patterns
patterns = [
r"(?:Created|Modified|Wrote|Updated|Edited):\s*[`'\"]?([^\s`'\"]+)[`'\"]?",
r"Writing to [`'\"]?([^\s`'\"]+)[`'\"]?",
r"File saved: ([^\s]+)",
]
for pattern in patterns:
matches = re.findall(pattern, output, re.IGNORECASE)
files.extend(matches)
return list(set(files))
async def execute_with_context(
self,
prompt: str,
context_files: List[str],
timeout: int = 300,
) -> CLIResult:
"""
Execute with additional context files loaded.
Args:
prompt: The instruction
context_files: Files to read as context
timeout: Timeout in seconds
"""
# Build prompt with context
context_prompt = prompt
if context_files:
context_prompt += "\n\nContext files to consider:\n"
for f in context_files:
context_prompt += f"- {f}\n"
return await self.execute(context_prompt, timeout)
class OpenCodeCLI:
"""
Execute tasks through OpenCode CLI (Gemini).
Uses: opencode run for non-interactive execution
"""
OPENCODE_PATH = r"C:\Users\antoi\AppData\Roaming\npm\opencode.cmd"
def __init__(self, workspace: Path):
self.workspace = workspace
async def execute(
self,
prompt: str,
timeout: int = 180,
model: str = "google/gemini-3-pro-preview",
) -> CLIResult:
"""
Execute a prompt through OpenCode CLI.
Args:
prompt: The instruction/prompt
timeout: Timeout in seconds
model: Model to use
Returns:
CLIResult with output
"""
start_time = datetime.now()
# Build command
cmd = [self.OPENCODE_PATH, "run", "--model", model, prompt]
logger.info(f"Executing OpenCode CLI: {prompt[:100]}...")
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=timeout,
cwd=str(self.workspace),
env={**os.environ, "TERM": "dumb"},
)
output = result.stdout
error = result.stderr
success = result.returncode == 0
duration = (datetime.now() - start_time).total_seconds()
logger.info(f"OpenCode completed in {duration:.1f}s")
return CLIResult(
success=success,
output=output,
error=error,
duration_seconds=duration,
files_modified=[], # OpenCode typically doesn't modify files directly
)
except subprocess.TimeoutExpired:
return CLIResult(
success=False,
output="",
error=f"Timeout after {timeout}s",
duration_seconds=timeout,
files_modified=[],
)
except Exception as e:
return CLIResult(
success=False,
output="",
error=str(e),
duration_seconds=(datetime.now() - start_time).total_seconds(),
files_modified=[],
)
async def plan(self, objective: str, context: Dict = None) -> Dict:
"""
Create an implementation plan using Gemini via OpenCode.
Args:
objective: What to achieve
context: Additional context
Returns:
Plan dict with tasks and test scenarios
"""
prompt = f"""You are a strategic planner for Atomizer, an FEA optimization framework.
## Objective
{objective}
## Context
{json.dumps(context, indent=2) if context else "None provided"}
## Task
Create a detailed implementation plan in JSON format with:
1. tasks: List of implementation tasks for Claude Code
2. test_scenarios: Tests to verify implementation
3. acceptance_criteria: Success conditions
Output ONLY valid JSON in this format:
```json
{{
"objective": "{objective}",
"approach": "Brief description",
"tasks": [
{{
"id": "task_001",
"description": "What to do",
"file": "path/to/file.py",
"priority": "high"
}}
],
"test_scenarios": [
{{
"id": "test_001",
"name": "Test name",
"type": "filesystem",
"steps": [{{"action": "check_exists", "path": "some/path"}}],
"expected_outcome": {{"exists": true}}
}}
],
"acceptance_criteria": [
"Criterion 1"
]
}}
```
"""
result = await self.execute(prompt)
if not result.success:
logger.error(f"OpenCode planning failed: {result.error}")
return self._fallback_plan(objective, context)
# Parse JSON from output
try:
# Find JSON block in output
output = result.output
if "```json" in output:
start = output.find("```json") + 7
end = output.find("```", start)
json_str = output[start:end].strip()
elif "```" in output:
start = output.find("```") + 3
end = output.find("```", start)
json_str = output[start:end].strip()
else:
# Try to find JSON object directly
match = re.search(r"\{.*\}", output, re.DOTALL)
if match:
json_str = match.group()
else:
return self._fallback_plan(objective, context)
plan = json.loads(json_str)
logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
return plan
except json.JSONDecodeError as e:
logger.error(f"Failed to parse plan JSON: {e}")
return self._fallback_plan(objective, context)
def _fallback_plan(self, objective: str, context: Dict = None) -> Dict:
"""Generate a fallback plan when Gemini fails."""
logger.warning("Using fallback plan")
return {
"objective": objective,
"approach": "Fallback plan - manual implementation",
"tasks": [
{
"id": "task_001",
"description": f"Implement: {objective}",
"file": "TBD",
"priority": "high",
}
],
"test_scenarios": [],
"acceptance_criteria": [objective],
}
async def analyze(self, test_results: Dict) -> Dict:
"""
Analyze test results using Gemini via OpenCode.
Args:
test_results: Test report from dashboard
Returns:
Analysis with issues and fix plans
"""
summary = test_results.get("summary", {})
scenarios = test_results.get("scenarios", [])
if summary.get("failed", 0) == 0:
return {
"issues_found": False,
"issues": [],
"fix_plans": {},
"recommendations": ["All tests passed!"],
}
failures = [s for s in scenarios if not s.get("passed", True)]
prompt = f"""Analyze these test failures for Atomizer FEA optimization framework:
## Test Summary
- Total: {summary.get("total", 0)}
- Passed: {summary.get("passed", 0)}
- Failed: {summary.get("failed", 0)}
## Failed Tests
{json.dumps(failures, indent=2)}
## Task
Provide root cause analysis and fix plans in JSON:
```json
{{
"issues_found": true,
"issues": [
{{
"id": "issue_001",
"description": "What went wrong",
"severity": "high",
"root_cause": "Why it failed"
}}
],
"fix_plans": {{
"issue_001": {{
"approach": "How to fix",
"steps": [{{"action": "edit", "file": "path", "description": "change"}}]
}}
}},
"recommendations": ["suggestion"]
}}
```
"""
result = await self.execute(prompt)
if not result.success:
return self._fallback_analysis(failures)
try:
output = result.output
if "```json" in output:
start = output.find("```json") + 7
end = output.find("```", start)
json_str = output[start:end].strip()
else:
match = re.search(r"\{.*\}", output, re.DOTALL)
json_str = match.group() if match else "{}"
return json.loads(json_str)
except:
return self._fallback_analysis(failures)
def _fallback_analysis(self, failures: List[Dict]) -> Dict:
"""Generate fallback analysis."""
issues = []
fix_plans = {}
for i, failure in enumerate(failures):
issue_id = f"issue_{i + 1}"
issues.append(
{
"id": issue_id,
"description": failure.get("error", "Unknown error"),
"severity": "medium",
"root_cause": "Requires investigation",
}
)
fix_plans[issue_id] = {
"approach": "Manual investigation required",
"steps": [],
}
return {
"issues_found": len(issues) > 0,
"issues": issues,
"fix_plans": fix_plans,
"recommendations": ["Review failed tests manually"],
}
class DevLoopCLIOrchestrator:
"""
Orchestrate DevLoop using CLI tools.
- OpenCode (Gemini) for planning and analysis
- Claude Code for implementation and fixes
"""
def __init__(self, workspace: Path = None):
self.workspace = workspace or Path("C:/Users/antoi/Atomizer")
self.claude = ClaudeCodeCLI(self.workspace)
self.opencode = OpenCodeCLI(self.workspace)
self.iteration = 0
async def run_cycle(
self,
objective: str,
context: Dict = None,
max_iterations: int = 5,
) -> Dict:
"""
Run a complete development cycle.
Args:
objective: What to achieve
context: Additional context
max_iterations: Maximum fix iterations
Returns:
Cycle report
"""
from .test_runner import DashboardTestRunner
start_time = datetime.now()
results = {
"objective": objective,
"iterations": [],
"status": "in_progress",
}
logger.info(f"Starting DevLoop cycle: {objective}")
# Phase 1: Plan (Gemini via OpenCode)
logger.info("Phase 1: Planning with Gemini...")
plan = await self.opencode.plan(objective, context)
iteration = 0
while iteration < max_iterations:
iteration += 1
iter_result = {"iteration": iteration}
# Phase 2: Implement (Claude Code)
logger.info(f"Phase 2 (iter {iteration}): Implementing with Claude Code...")
impl_result = await self._implement(plan)
iter_result["implementation"] = {
"success": impl_result.success,
"files_modified": impl_result.files_modified,
}
# Phase 3: Test (Dashboard)
logger.info(f"Phase 3 (iter {iteration}): Testing...")
test_runner = DashboardTestRunner()
test_results = await test_runner.run_test_suite(plan.get("test_scenarios", []))
iter_result["test_results"] = test_results
# Check if all tests pass
summary = test_results.get("summary", {})
if summary.get("failed", 0) == 0:
logger.info("All tests passed!")
results["iterations"].append(iter_result)
results["status"] = "success"
break
# Phase 4: Analyze (Gemini via OpenCode)
logger.info(f"Phase 4 (iter {iteration}): Analyzing failures...")
analysis = await self.opencode.analyze(test_results)
iter_result["analysis"] = analysis
if not analysis.get("issues_found"):
results["status"] = "success"
results["iterations"].append(iter_result)
break
# Phase 5: Fix (Claude Code)
logger.info(f"Phase 5 (iter {iteration}): Fixing issues...")
fix_result = await self._fix(analysis)
iter_result["fixes"] = {
"success": fix_result.success,
"files_modified": fix_result.files_modified,
}
results["iterations"].append(iter_result)
if results["status"] == "in_progress":
results["status"] = "max_iterations_reached"
results["duration_seconds"] = (datetime.now() - start_time).total_seconds()
logger.info(f"DevLoop cycle completed: {results['status']}")
return results
async def _implement(self, plan: Dict) -> CLIResult:
"""Implement the plan using Claude Code."""
tasks = plan.get("tasks", [])
if not tasks:
return CLIResult(
success=True,
output="No tasks to implement",
error="",
duration_seconds=0,
files_modified=[],
)
# Build implementation prompt
prompt = f"""Implement the following tasks for Atomizer:
## Objective
{plan.get("objective", "Unknown")}
## Approach
{plan.get("approach", "Follow best practices")}
## Tasks
"""
for task in tasks:
prompt += f"""
### {task.get("id", "task")}: {task.get("description", "")}
- File: {task.get("file", "TBD")}
- Priority: {task.get("priority", "medium")}
"""
prompt += """
## Requirements
- Follow Atomizer coding standards
- Use AtomizerSpec v2.0 format
- Create README.md for any new study
- Use existing extractors from optimization_engine/extractors/
"""
return await self.claude.execute(prompt, timeout=300)
async def _fix(self, analysis: Dict) -> CLIResult:
"""Apply fixes using Claude Code."""
issues = analysis.get("issues", [])
fix_plans = analysis.get("fix_plans", {})
if not issues:
return CLIResult(
success=True,
output="No issues to fix",
error="",
duration_seconds=0,
files_modified=[],
)
# Build fix prompt
prompt = "Fix the following issues:\n\n"
for issue in issues:
issue_id = issue.get("id", "unknown")
prompt += f"""
## Issue: {issue_id}
- Description: {issue.get("description", "")}
- Root Cause: {issue.get("root_cause", "Unknown")}
- Severity: {issue.get("severity", "medium")}
"""
fix_plan = fix_plans.get(issue_id, {})
if fix_plan:
prompt += f"- Fix Approach: {fix_plan.get('approach', 'Investigate')}\n"
for step in fix_plan.get("steps", []):
prompt += f" - {step.get('description', step.get('action', 'step'))}\n"
return await self.claude.execute(prompt, timeout=300)
async def step_plan(self, objective: str, context: Dict = None) -> Dict:
"""Execute only the planning phase."""
return await self.opencode.plan(objective, context)
async def step_implement(self, plan: Dict) -> CLIResult:
"""Execute only the implementation phase."""
return await self._implement(plan)
async def step_analyze(self, test_results: Dict) -> Dict:
"""Execute only the analysis phase."""
return await self.opencode.analyze(test_results)

View File

@@ -0,0 +1,561 @@
"""
DevLoop Orchestrator - Master controller for closed-loop development.
Coordinates:
- Gemini Pro: Strategic planning, analysis, test design
- Claude Code: Implementation, code changes, fixes
- Dashboard: Automated testing, verification
- LAC: Learning capture and retrieval
"""
import asyncio
import json
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional, Callable
import logging
logger = logging.getLogger(__name__)
class LoopPhase(Enum):
"""Current phase in the development loop."""
IDLE = "idle"
PLANNING = "planning"
IMPLEMENTING = "implementing"
TESTING = "testing"
ANALYZING = "analyzing"
FIXING = "fixing"
VERIFYING = "verifying"
@dataclass
class LoopState:
"""Current state of the development loop."""
phase: LoopPhase = LoopPhase.IDLE
iteration: int = 0
current_task: Optional[str] = None
test_results: Optional[Dict] = None
analysis: Optional[Dict] = None
last_update: str = field(default_factory=lambda: datetime.now().isoformat())
@dataclass
class IterationResult:
"""Result of a single development iteration."""
iteration: int
plan: Optional[Dict] = None
implementation: Optional[Dict] = None
test_results: Optional[Dict] = None
analysis: Optional[Dict] = None
fixes: Optional[List[Dict]] = None
verification: Optional[Dict] = None
success: bool = False
duration_seconds: float = 0.0
@dataclass
class CycleReport:
"""Complete report for a development cycle."""
objective: str
start_time: str = field(default_factory=lambda: datetime.now().isoformat())
end_time: Optional[str] = None
iterations: List[IterationResult] = field(default_factory=list)
status: str = "in_progress"
total_duration_seconds: float = 0.0
class DevLoopOrchestrator:
"""
Autonomous development loop orchestrator.
Coordinates Gemini (planning) + Claude Code (implementation) + Dashboard (testing)
in a continuous improvement cycle.
Flow:
1. Gemini: Plan features/fixes
2. Claude Code: Implement
3. Dashboard: Test
4. Gemini: Analyze results
5. Claude Code: Fix issues
6. Dashboard: Verify
7. Loop back with learnings
"""
def __init__(
self,
config: Optional[Dict] = None,
gemini_client: Optional[Any] = None,
claude_bridge: Optional[Any] = None,
dashboard_runner: Optional[Any] = None,
):
"""
Initialize the orchestrator.
Args:
config: Configuration dict with API keys and settings
gemini_client: Pre-configured Gemini client (optional)
claude_bridge: Pre-configured Claude Code bridge (optional)
dashboard_runner: Pre-configured Dashboard test runner (optional)
"""
self.config = config or self._default_config()
self.state = LoopState()
self.subscribers: List[Callable] = []
# Initialize components lazily
self._gemini = gemini_client
self._claude_bridge = claude_bridge
self._dashboard = dashboard_runner
self._lac = None
# History for learning
self.cycle_history: List[CycleReport] = []
def _default_config(self) -> Dict:
"""Default configuration."""
return {
"max_iterations": 10,
"auto_fix_threshold": "high", # Only auto-fix high+ severity
"learning_enabled": True,
"dashboard_url": "http://localhost:3000",
"websocket_url": "ws://localhost:8000",
"test_timeout_ms": 30000,
}
@property
def gemini(self):
"""Lazy-load Gemini planner."""
if self._gemini is None:
from .planning import GeminiPlanner
self._gemini = GeminiPlanner(self.config.get("gemini", {}))
return self._gemini
@property
def claude_bridge(self):
"""Lazy-load Claude Code bridge."""
if self._claude_bridge is None:
from .claude_bridge import ClaudeCodeBridge
self._claude_bridge = ClaudeCodeBridge(self.config.get("claude", {}))
return self._claude_bridge
@property
def dashboard(self):
"""Lazy-load Dashboard test runner."""
if self._dashboard is None:
from .test_runner import DashboardTestRunner
self._dashboard = DashboardTestRunner(self.config)
return self._dashboard
@property
def lac(self):
"""Lazy-load LAC (Learning Atomizer Core)."""
if self._lac is None and self.config.get("learning_enabled", True):
try:
from knowledge_base.lac import get_lac
self._lac = get_lac()
except ImportError:
logger.warning("LAC not available, learning disabled")
return self._lac
def subscribe(self, callback: Callable[[LoopState], None]):
"""Subscribe to state updates."""
self.subscribers.append(callback)
def unsubscribe(self, callback: Callable):
"""Unsubscribe from state updates."""
if callback in self.subscribers:
self.subscribers.remove(callback)
def _notify_subscribers(self):
"""Notify all subscribers of state change."""
self.state.last_update = datetime.now().isoformat()
for callback in self.subscribers:
try:
callback(self.state)
except Exception as e:
logger.error(f"Subscriber error: {e}")
def _update_state(self, phase: Optional[LoopPhase] = None, task: Optional[str] = None):
"""Update state and notify subscribers."""
if phase:
self.state.phase = phase
if task:
self.state.current_task = task
self._notify_subscribers()
async def run_development_cycle(
self,
objective: str,
context: Optional[Dict] = None,
max_iterations: Optional[int] = None,
) -> CycleReport:
"""
Execute a complete development cycle.
Args:
objective: What to achieve (e.g., "Create support_arm optimization study")
context: Additional context (study spec, problem statement, etc.)
max_iterations: Override default max iterations
Returns:
CycleReport with all iteration results
"""
max_iter = max_iterations or self.config.get("max_iterations", 10)
report = CycleReport(objective=objective)
start_time = datetime.now()
logger.info(f"Starting development cycle: {objective}")
try:
while not self._is_objective_complete(report) and len(report.iterations) < max_iter:
iteration_result = await self._run_iteration(objective, context)
report.iterations.append(iteration_result)
# Record learning from successful patterns
if iteration_result.success and self.lac:
await self._record_learning(iteration_result)
# Check for max iterations
if len(report.iterations) >= max_iter:
report.status = "max_iterations_reached"
logger.warning(f"Max iterations ({max_iter}) reached")
break
except Exception as e:
report.status = f"error: {str(e)}"
logger.error(f"Development cycle error: {e}")
report.end_time = datetime.now().isoformat()
report.total_duration_seconds = (datetime.now() - start_time).total_seconds()
if report.status == "in_progress":
report.status = "completed"
self.cycle_history.append(report)
self._update_state(LoopPhase.IDLE)
return report
def _is_objective_complete(self, report: CycleReport) -> bool:
"""Check if the objective has been achieved."""
if not report.iterations:
return False
last_iter = report.iterations[-1]
# Success if last iteration passed all tests
if last_iter.success and last_iter.test_results:
tests = last_iter.test_results
if tests.get("summary", {}).get("failed", 0) == 0:
return True
return False
async def _run_iteration(self, objective: str, context: Optional[Dict]) -> IterationResult:
"""Run a single iteration through all phases."""
start_time = datetime.now()
result = IterationResult(iteration=self.state.iteration)
try:
# Phase 1: Planning (Gemini)
self._update_state(LoopPhase.PLANNING, "Creating implementation plan")
result.plan = await self._planning_phase(objective, context)
# Phase 2: Implementation (Claude Code)
self._update_state(LoopPhase.IMPLEMENTING, "Implementing changes")
result.implementation = await self._implementation_phase(result.plan)
# Phase 3: Testing (Dashboard)
self._update_state(LoopPhase.TESTING, "Running tests")
result.test_results = await self._testing_phase(result.plan)
self.state.test_results = result.test_results
# Phase 4: Analysis (Gemini)
self._update_state(LoopPhase.ANALYZING, "Analyzing results")
result.analysis = await self._analysis_phase(result.test_results)
self.state.analysis = result.analysis
# Phases 5-6: Fix & Verify if needed
if result.analysis and result.analysis.get("issues_found"):
self._update_state(LoopPhase.FIXING, "Implementing fixes")
result.fixes = await self._fixing_phase(result.analysis)
self._update_state(LoopPhase.VERIFYING, "Verifying fixes")
result.verification = await self._verification_phase(result.fixes)
result.success = result.verification.get("all_passed", False)
else:
result.success = True
except Exception as e:
logger.error(f"Iteration {self.state.iteration} failed: {e}")
result.success = False
result.duration_seconds = (datetime.now() - start_time).total_seconds()
self.state.iteration += 1
return result
async def _planning_phase(self, objective: str, context: Optional[Dict]) -> Dict:
"""Gemini creates implementation plan."""
# Gather context
historical_learnings = []
if self.lac:
historical_learnings = self.lac.get_relevant_insights(objective)
plan_request = {
"objective": objective,
"context": context or {},
"previous_results": self.state.test_results,
"historical_learnings": historical_learnings,
}
try:
plan = await self.gemini.create_plan(plan_request)
logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
return plan
except Exception as e:
logger.error(f"Planning phase failed: {e}")
return {"error": str(e), "tasks": [], "test_scenarios": []}
async def _implementation_phase(self, plan: Dict) -> Dict:
"""Claude Code implements the plan."""
if not plan or plan.get("error"):
return {"status": "skipped", "reason": "No valid plan"}
try:
result = await self.claude_bridge.execute_plan(plan)
return {
"status": result.get("status", "unknown"),
"files_modified": result.get("files", []),
"warnings": result.get("warnings", []),
}
except Exception as e:
logger.error(f"Implementation phase failed: {e}")
return {"status": "error", "error": str(e)}
async def _testing_phase(self, plan: Dict) -> Dict:
"""Dashboard runs automated tests."""
test_scenarios = plan.get("test_scenarios", [])
if not test_scenarios:
# Generate default tests based on objective
test_scenarios = self._generate_default_tests(plan)
try:
results = await self.dashboard.run_test_suite(test_scenarios)
return results
except Exception as e:
logger.error(f"Testing phase failed: {e}")
return {
"status": "error",
"error": str(e),
"summary": {"passed": 0, "failed": 1, "total": 1},
}
def _generate_default_tests(self, plan: Dict) -> List[Dict]:
"""Generate default test scenarios based on the plan."""
objective = plan.get("objective", "")
tests = []
# Study creation tests
if "study" in objective.lower() or "create" in objective.lower():
tests.extend(
[
{
"id": "test_study_exists",
"name": "Study directory exists",
"type": "filesystem",
"check": "directory_exists",
},
{
"id": "test_spec_valid",
"name": "AtomizerSpec is valid",
"type": "api",
"endpoint": "/api/studies/{study_id}/spec/validate",
},
{
"id": "test_dashboard_loads",
"name": "Dashboard loads study",
"type": "browser",
"action": "load_study",
},
]
)
# Optimization tests
if "optimi" in objective.lower():
tests.extend(
[
{
"id": "test_run_trial",
"name": "Single trial executes",
"type": "cli",
"command": "python run_optimization.py --test",
},
]
)
return tests
async def _analysis_phase(self, test_results: Dict) -> Dict:
"""Gemini analyzes test results."""
try:
from .analyzer import ProblemAnalyzer
analyzer = ProblemAnalyzer(self.gemini)
return await analyzer.analyze_test_results(test_results)
except Exception as e:
logger.error(f"Analysis phase failed: {e}")
return {
"issues_found": True,
"issues": [{"description": str(e), "severity": "high"}],
"fix_plans": {},
}
async def _fixing_phase(self, analysis: Dict) -> List[Dict]:
"""Claude Code implements fixes."""
fixes = []
for issue in analysis.get("issues", []):
fix_plan = analysis.get("fix_plans", {}).get(issue.get("id", "unknown"))
if fix_plan:
try:
result = await self.claude_bridge.execute_fix(fix_plan)
fixes.append(
{
"issue_id": issue.get("id"),
"status": result.get("status"),
"files_modified": result.get("files", []),
}
)
except Exception as e:
fixes.append(
{
"issue_id": issue.get("id"),
"status": "error",
"error": str(e),
}
)
return fixes
async def _verification_phase(self, fixes: List[Dict]) -> Dict:
"""Dashboard verifies fixes."""
# Re-run tests for each fix
all_passed = True
verification_results = []
for fix in fixes:
if fix.get("status") == "error":
all_passed = False
verification_results.append(
{
"issue_id": fix.get("issue_id"),
"passed": False,
"reason": fix.get("error"),
}
)
else:
# Run targeted test
result = await self.dashboard.verify_fix(fix)
verification_results.append(result)
if not result.get("passed", False):
all_passed = False
return {
"all_passed": all_passed,
"results": verification_results,
}
async def _record_learning(self, iteration: IterationResult):
"""Store successful patterns for future reference."""
if not self.lac:
return
try:
self.lac.record_insight(
category="success_pattern",
context=f"DevLoop iteration {iteration.iteration}",
insight=f"Successfully completed: {iteration.plan.get('objective', 'unknown')}",
confidence=0.8,
tags=["devloop", "success"],
)
except Exception as e:
logger.warning(f"Failed to record learning: {e}")
# ========================================================================
# Single-step operations (for manual control)
# ========================================================================
async def step_plan(self, objective: str, context: Optional[Dict] = None) -> Dict:
"""Execute only the planning phase."""
self._update_state(LoopPhase.PLANNING, objective)
plan = await self._planning_phase(objective, context)
self._update_state(LoopPhase.IDLE)
return plan
async def step_implement(self, plan: Dict) -> Dict:
"""Execute only the implementation phase."""
self._update_state(LoopPhase.IMPLEMENTING)
result = await self._implementation_phase(plan)
self._update_state(LoopPhase.IDLE)
return result
async def step_test(self, scenarios: List[Dict]) -> Dict:
"""Execute only the testing phase."""
self._update_state(LoopPhase.TESTING)
result = await self._testing_phase({"test_scenarios": scenarios})
self._update_state(LoopPhase.IDLE)
return result
async def step_analyze(self, test_results: Dict) -> Dict:
"""Execute only the analysis phase."""
self._update_state(LoopPhase.ANALYZING)
result = await self._analysis_phase(test_results)
self._update_state(LoopPhase.IDLE)
return result
def get_state(self) -> Dict:
"""Get current state as dict."""
return {
"phase": self.state.phase.value,
"iteration": self.state.iteration,
"current_task": self.state.current_task,
"test_results": self.state.test_results,
"last_update": self.state.last_update,
}
def export_history(self, filepath: Optional[Path] = None) -> Dict:
"""Export cycle history for analysis."""
history = {
"exported_at": datetime.now().isoformat(),
"total_cycles": len(self.cycle_history),
"cycles": [
{
"objective": c.objective,
"status": c.status,
"iterations": len(c.iterations),
"duration_seconds": c.total_duration_seconds,
}
for c in self.cycle_history
],
}
if filepath:
with open(filepath, "w") as f:
json.dump(history, f, indent=2)
return history

View File

@@ -0,0 +1,451 @@
"""
Gemini Planner - Strategic planning and test design using Gemini Pro.
Handles:
- Implementation planning from objectives
- Test scenario generation
- Architecture decisions
- Risk assessment
"""
import asyncio
import json
import logging
import os
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class PlanTask:
"""A single task in the implementation plan."""
id: str
description: str
file: Optional[str] = None
code_hint: Optional[str] = None
priority: str = "medium"
dependencies: List[str] = None
def __post_init__(self):
if self.dependencies is None:
self.dependencies = []
@dataclass
class TestScenario:
"""A test scenario for dashboard verification."""
id: str
name: str
type: str # "api", "browser", "cli", "filesystem"
steps: List[Dict] = None
expected_outcome: Dict = None
def __post_init__(self):
if self.steps is None:
self.steps = []
if self.expected_outcome is None:
self.expected_outcome = {"status": "pass"}
class GeminiPlanner:
"""
Strategic planner using Gemini Pro.
Generates:
- Implementation tasks for Claude Code
- Test scenarios for dashboard verification
- Architecture decisions
- Risk assessments
"""
def __init__(self, config: Optional[Dict] = None):
"""
Initialize the planner.
Args:
config: Configuration with API key and model settings
"""
self.config = config or {}
self._client = None
self._model = None
@property
def client(self):
"""Lazy-load Gemini client."""
if self._client is None:
try:
import google.generativeai as genai
api_key = self.config.get("api_key") or os.environ.get("GEMINI_API_KEY")
if not api_key:
raise ValueError("GEMINI_API_KEY not set")
genai.configure(api_key=api_key)
self._client = genai
model_name = self.config.get("model", "gemini-2.0-flash-thinking-exp-01-21")
self._model = genai.GenerativeModel(model_name)
logger.info(f"Gemini client initialized with model: {model_name}")
except ImportError:
logger.warning("google-generativeai not installed, using mock planner")
self._client = "mock"
return self._client
async def create_plan(self, request: Dict) -> Dict:
"""
Create an implementation plan from an objective.
Args:
request: Dict with:
- objective: What to achieve
- context: Additional context (study spec, etc.)
- previous_results: Results from last iteration
- historical_learnings: Relevant LAC insights
Returns:
Plan dict with tasks, test_scenarios, risks
"""
objective = request.get("objective", "")
context = request.get("context", {})
previous_results = request.get("previous_results")
learnings = request.get("historical_learnings", [])
# Build planning prompt
prompt = self._build_planning_prompt(objective, context, previous_results, learnings)
# Get response from Gemini
if self.client == "mock":
plan = self._mock_plan(objective, context)
else:
plan = await self._query_gemini(prompt)
return plan
def _build_planning_prompt(
self,
objective: str,
context: Dict,
previous_results: Optional[Dict],
learnings: List[Dict],
) -> str:
"""Build the planning prompt for Gemini."""
prompt = f"""## Atomizer Development Planning Session
### Objective
{objective}
### Context
{json.dumps(context, indent=2) if context else "No additional context provided."}
### Previous Iteration Results
{json.dumps(previous_results, indent=2) if previous_results else "First iteration - no previous results."}
### Historical Learnings (from LAC)
{self._format_learnings(learnings)}
### Required Outputs
Generate a detailed implementation plan in JSON format with the following structure:
```json
{{
"objective": "{objective}",
"approach": "Brief description of the approach",
"tasks": [
{{
"id": "task_001",
"description": "What to do",
"file": "path/to/file.py",
"code_hint": "Pseudo-code or pattern to use",
"priority": "high|medium|low",
"dependencies": ["task_000"]
}}
],
"test_scenarios": [
{{
"id": "test_001",
"name": "Test name",
"type": "api|browser|cli|filesystem",
"steps": [
{{"action": "navigate", "target": "/canvas"}}
],
"expected_outcome": {{"status": "pass", "assertions": []}}
}}
],
"risks": [
{{
"description": "What could go wrong",
"mitigation": "How to handle it",
"severity": "high|medium|low"
}}
],
"acceptance_criteria": [
"Criteria 1",
"Criteria 2"
]
}}
```
### Guidelines
1. **Tasks should be specific and actionable** - Each task should be completable by Claude Code
2. **Test scenarios must be verifiable** - Use dashboard endpoints and browser actions
3. **Consider Atomizer architecture** - Use existing extractors (SYS_12), follow AtomizerSpec v2.0
4. **Apply historical learnings** - Avoid known failure patterns
### Important Atomizer Patterns
- Studies use `atomizer_spec.json` (AtomizerSpec v2.0)
- Design variables have bounds: {{"min": X, "max": Y}}
- Objectives use extractors: E1 (displacement), E3 (stress), E4 (mass)
- Constraints define limits with operators: <, >, <=, >=
Output ONLY the JSON plan, no additional text.
"""
return prompt
def _format_learnings(self, learnings: List[Dict]) -> str:
"""Format LAC learnings for the prompt."""
if not learnings:
return "No relevant historical learnings."
formatted = []
for learning in learnings[:5]: # Limit to 5 most relevant
formatted.append(
f"- [{learning.get('category', 'insight')}] {learning.get('insight', '')}"
)
return "\n".join(formatted)
async def _query_gemini(self, prompt: str) -> Dict:
"""Query Gemini and parse response."""
try:
# Run in executor to not block
loop = asyncio.get_event_loop()
response = await loop.run_in_executor(
None, lambda: self._model.generate_content(prompt)
)
# Extract JSON from response
text = response.text
# Try to parse JSON
try:
# Find JSON block
if "```json" in text:
start = text.find("```json") + 7
end = text.find("```", start)
json_str = text[start:end].strip()
elif "```" in text:
start = text.find("```") + 3
end = text.find("```", start)
json_str = text[start:end].strip()
else:
json_str = text.strip()
plan = json.loads(json_str)
logger.info(f"Gemini plan parsed: {len(plan.get('tasks', []))} tasks")
return plan
except json.JSONDecodeError as e:
logger.error(f"Failed to parse Gemini response: {e}")
return {
"objective": "Parse error",
"error": str(e),
"raw_response": text[:500],
"tasks": [],
"test_scenarios": [],
}
except Exception as e:
logger.error(f"Gemini query failed: {e}")
return {
"objective": "Query error",
"error": str(e),
"tasks": [],
"test_scenarios": [],
}
def _mock_plan(self, objective: str, context: Dict) -> Dict:
"""Generate a mock plan for testing without Gemini API."""
logger.info("Using mock planner (Gemini not available)")
# Detect objective type
is_study_creation = any(
kw in objective.lower() for kw in ["create", "study", "new", "setup"]
)
tasks = []
test_scenarios = []
if is_study_creation:
study_name = context.get("study_name", "support_arm")
tasks = [
{
"id": "task_001",
"description": f"Create study directory structure for {study_name}",
"file": f"studies/_Other/{study_name}/",
"priority": "high",
"dependencies": [],
},
{
"id": "task_002",
"description": "Copy NX model files to study directory",
"file": f"studies/_Other/{study_name}/1_setup/model/",
"priority": "high",
"dependencies": ["task_001"],
},
{
"id": "task_003",
"description": "Create AtomizerSpec v2.0 configuration",
"file": f"studies/_Other/{study_name}/atomizer_spec.json",
"priority": "high",
"dependencies": ["task_002"],
},
{
"id": "task_004",
"description": "Create run_optimization.py script",
"file": f"studies/_Other/{study_name}/run_optimization.py",
"priority": "high",
"dependencies": ["task_003"],
},
{
"id": "task_005",
"description": "Create README.md documentation",
"file": f"studies/_Other/{study_name}/README.md",
"priority": "medium",
"dependencies": ["task_003"],
},
]
test_scenarios = [
{
"id": "test_001",
"name": "Study directory exists",
"type": "filesystem",
"steps": [{"action": "check_exists", "path": f"studies/_Other/{study_name}"}],
"expected_outcome": {"exists": True},
},
{
"id": "test_002",
"name": "AtomizerSpec is valid",
"type": "api",
"steps": [
{"action": "get", "endpoint": f"/api/studies/{study_name}/spec/validate"}
],
"expected_outcome": {"valid": True},
},
{
"id": "test_003",
"name": "Dashboard loads study",
"type": "browser",
"steps": [
{"action": "navigate", "url": f"/canvas/{study_name}"},
{"action": "wait_for", "selector": "[data-testid='canvas-container']"},
],
"expected_outcome": {"loaded": True},
},
]
return {
"objective": objective,
"approach": "Mock plan for development testing",
"tasks": tasks,
"test_scenarios": test_scenarios,
"risks": [
{
"description": "NX model files may have dependencies",
"mitigation": "Copy all related files (_i.prt, .fem, .sim)",
"severity": "high",
}
],
"acceptance_criteria": [
"Study directory structure created",
"AtomizerSpec validates without errors",
"Dashboard loads study canvas",
],
}
async def analyze_codebase(self, query: str) -> Dict:
"""
Use Gemini to analyze codebase state.
Args:
query: What to analyze (e.g., "current dashboard components")
Returns:
Analysis results
"""
# This would integrate with codebase scanning
# For now, return a stub
return {
"query": query,
"analysis": "Codebase analysis not yet implemented",
"recommendations": [],
}
async def generate_test_scenarios(
self,
feature: str,
context: Optional[Dict] = None,
) -> List[Dict]:
"""
Generate test scenarios for a specific feature.
Args:
feature: Feature to test (e.g., "study creation", "spec validation")
context: Additional context
Returns:
List of test scenarios
"""
prompt = f"""Generate test scenarios for the Atomizer feature: {feature}
Context: {json.dumps(context, indent=2) if context else "None"}
Output as JSON array of test scenarios:
```json
[
{{
"id": "test_001",
"name": "Test name",
"type": "api|browser|cli|filesystem",
"steps": [...]
"expected_outcome": {{...}}
}}
]
```
"""
if self.client == "mock":
return self._mock_plan(feature, context or {}).get("test_scenarios", [])
# Query Gemini
try:
loop = asyncio.get_event_loop()
response = await loop.run_in_executor(
None, lambda: self._model.generate_content(prompt)
)
text = response.text
if "```json" in text:
start = text.find("```json") + 7
end = text.find("```", start)
json_str = text[start:end].strip()
return json.loads(json_str)
except Exception as e:
logger.error(f"Failed to generate test scenarios: {e}")
return []

View File

@@ -0,0 +1,585 @@
"""
Dashboard Test Runner - Automated testing through the Atomizer dashboard.
Supports test types:
- API tests (REST endpoint verification)
- Browser tests (UI interaction via Playwright)
- CLI tests (command line execution)
- Filesystem tests (file/directory verification)
"""
import asyncio
import json
import logging
import subprocess
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
import aiohttp
logger = logging.getLogger(__name__)
@dataclass
class TestStep:
"""A single step in a test scenario."""
action: str
target: Optional[str] = None
data: Optional[Dict] = None
timeout_ms: int = 5000
@dataclass
class TestScenario:
"""A complete test scenario."""
id: str
name: str
type: str # "api", "browser", "cli", "filesystem"
steps: List[Dict] = field(default_factory=list)
expected_outcome: Dict = field(default_factory=lambda: {"status": "pass"})
timeout_ms: int = 30000
@dataclass
class TestResult:
"""Result of a single test."""
scenario_id: str
scenario_name: str
passed: bool
duration_ms: float
error: Optional[str] = None
details: Optional[Dict] = None
@dataclass
class TestReport:
"""Complete test report."""
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
scenarios: List[TestResult] = field(default_factory=list)
summary: Dict = field(default_factory=lambda: {"passed": 0, "failed": 0, "total": 0})
class DashboardTestRunner:
"""
Automated test runner for Atomizer dashboard.
Executes test scenarios against:
- Backend API endpoints
- Frontend UI (via Playwright if available)
- CLI commands
- Filesystem assertions
"""
def __init__(self, config: Optional[Dict] = None):
"""
Initialize the test runner.
Args:
config: Configuration with dashboard URLs and timeouts
"""
self.config = config or {}
self.base_url = self.config.get("dashboard_url", "http://localhost:8000")
self.ws_url = self.config.get("websocket_url", "ws://localhost:8000")
self.timeout_ms = self.config.get("test_timeout_ms", 30000)
self.studies_dir = Path(self.config.get("studies_dir", "C:/Users/antoi/Atomizer/studies"))
self._session: Optional[aiohttp.ClientSession] = None
self._ws: Optional[aiohttp.ClientWebSocketResponse] = None
self._playwright = None
self._browser = None
async def connect(self):
"""Initialize connections."""
if self._session is None:
self._session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=self.timeout_ms / 1000)
)
async def disconnect(self):
"""Clean up connections."""
if self._ws:
await self._ws.close()
self._ws = None
if self._session:
await self._session.close()
self._session = None
if self._browser:
await self._browser.close()
self._browser = None
async def run_test_suite(self, scenarios: List[Dict]) -> Dict:
"""
Run a complete test suite.
Args:
scenarios: List of test scenario dicts
Returns:
Test report as dict
"""
await self.connect()
report = TestReport()
for scenario_dict in scenarios:
scenario = self._parse_scenario(scenario_dict)
start_time = datetime.now()
try:
result = await self._execute_scenario(scenario)
result.duration_ms = (datetime.now() - start_time).total_seconds() * 1000
report.scenarios.append(result)
if result.passed:
report.summary["passed"] += 1
else:
report.summary["failed"] += 1
except Exception as e:
logger.error(f"Scenario {scenario.id} failed with error: {e}")
report.scenarios.append(
TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=(datetime.now() - start_time).total_seconds() * 1000,
error=str(e),
)
)
report.summary["failed"] += 1
report.summary["total"] += 1
return {
"timestamp": report.timestamp,
"scenarios": [self._result_to_dict(r) for r in report.scenarios],
"summary": report.summary,
}
def _parse_scenario(self, scenario_dict: Dict) -> TestScenario:
"""Parse a scenario dict into TestScenario."""
return TestScenario(
id=scenario_dict.get("id", "unknown"),
name=scenario_dict.get("name", "Unnamed test"),
type=scenario_dict.get("type", "api"),
steps=scenario_dict.get("steps", []),
expected_outcome=scenario_dict.get("expected_outcome", {"status": "pass"}),
timeout_ms=scenario_dict.get("timeout_ms", self.timeout_ms),
)
def _result_to_dict(self, result: TestResult) -> Dict:
"""Convert TestResult to dict."""
return {
"scenario_id": result.scenario_id,
"scenario_name": result.scenario_name,
"passed": result.passed,
"duration_ms": result.duration_ms,
"error": result.error,
"details": result.details,
}
async def _execute_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute a single test scenario."""
logger.info(f"Executing test: {scenario.name} ({scenario.type})")
if scenario.type == "api":
return await self._execute_api_scenario(scenario)
elif scenario.type == "browser":
return await self._execute_browser_scenario(scenario)
elif scenario.type == "cli":
return await self._execute_cli_scenario(scenario)
elif scenario.type == "filesystem":
return await self._execute_filesystem_scenario(scenario)
else:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Unknown test type: {scenario.type}",
)
async def _execute_api_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute an API test scenario."""
details = {}
for step in scenario.steps:
action = step.get("action", "get").lower()
endpoint = step.get("endpoint", step.get("target", "/"))
data = step.get("data")
url = f"{self.base_url}{endpoint}"
try:
if action == "get":
async with self._session.get(url) as resp:
details["status_code"] = resp.status
details["response"] = await resp.json()
elif action == "post":
async with self._session.post(url, json=data) as resp:
details["status_code"] = resp.status
details["response"] = await resp.json()
elif action == "put":
async with self._session.put(url, json=data) as resp:
details["status_code"] = resp.status
details["response"] = await resp.json()
elif action == "delete":
async with self._session.delete(url) as resp:
details["status_code"] = resp.status
details["response"] = await resp.json()
except aiohttp.ClientError as e:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"API request failed: {e}",
details={"url": url, "action": action},
)
except json.JSONDecodeError:
details["response"] = "Non-JSON response"
# Check expected outcome
passed = self._check_outcome(details, scenario.expected_outcome)
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=passed,
duration_ms=0,
details=details,
)
async def _execute_browser_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute a browser test scenario using Playwright."""
try:
from playwright.async_api import async_playwright
except ImportError:
logger.warning("Playwright not available, skipping browser test")
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=True, # Skip, don't fail
duration_ms=0,
error="Playwright not installed - test skipped",
)
details = {}
try:
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
for step in scenario.steps:
action = step.get("action", "navigate")
if action == "navigate":
url = step.get("url", "/")
# Use frontend URL (port 3003 for Vite dev server)
full_url = f"http://localhost:3003{url}" if url.startswith("/") else url
await page.goto(full_url, timeout=scenario.timeout_ms)
details["navigated_to"] = full_url
elif action == "wait_for":
selector = step.get("selector")
if selector:
await page.wait_for_selector(selector, timeout=scenario.timeout_ms)
details["found_selector"] = selector
elif action == "click":
selector = step.get("selector")
if selector:
await page.click(selector)
details["clicked"] = selector
elif action == "fill":
selector = step.get("selector")
value = step.get("value", "")
if selector:
await page.fill(selector, value)
details["filled"] = {selector: value}
elif action == "screenshot":
path = step.get("path", f"test_{scenario.id}.png")
await page.screenshot(path=path)
details["screenshot"] = path
await browser.close()
passed = True
except Exception as e:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Browser test failed: {e}",
details=details,
)
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=passed,
duration_ms=0,
details=details,
)
async def _execute_cli_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute a CLI test scenario."""
details = {}
for step in scenario.steps:
command = step.get("command", step.get("target", ""))
cwd = step.get("cwd", str(self.studies_dir))
if not command:
continue
try:
# Use PowerShell on Windows
result = subprocess.run(
["powershell", "-Command", command],
capture_output=True,
text=True,
cwd=cwd,
timeout=scenario.timeout_ms / 1000,
)
details["command"] = command
details["returncode"] = result.returncode
details["stdout"] = result.stdout[:1000] if result.stdout else ""
details["stderr"] = result.stderr[:1000] if result.stderr else ""
if result.returncode != 0:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Command failed with code {result.returncode}",
details=details,
)
except subprocess.TimeoutExpired:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Command timed out after {scenario.timeout_ms}ms",
details={"command": command},
)
except Exception as e:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"CLI execution failed: {e}",
details={"command": command},
)
passed = self._check_outcome(details, scenario.expected_outcome)
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=passed,
duration_ms=0,
details=details,
)
async def _execute_filesystem_scenario(self, scenario: TestScenario) -> TestResult:
"""Execute a filesystem test scenario."""
details = {}
for step in scenario.steps:
action = step.get("action", "check_exists")
path_str = step.get("path", "")
# Resolve relative paths
if not Path(path_str).is_absolute():
path = self.studies_dir.parent / path_str
else:
path = Path(path_str)
if action == "check_exists":
exists = path.exists()
details["path"] = str(path)
details["exists"] = exists
if scenario.expected_outcome.get("exists", True) != exists:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Path {'does not exist' if not exists else 'exists but should not'}: {path}",
details=details,
)
elif action == "check_file_contains":
content_check = step.get("contains", "")
if path.exists() and path.is_file():
content = path.read_text()
contains = content_check in content
details["contains"] = contains
details["search_term"] = content_check
if not contains:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"File does not contain: {content_check}",
details=details,
)
else:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"File not found: {path}",
details=details,
)
elif action == "check_json_valid":
if path.exists() and path.is_file():
try:
with open(path) as f:
json.load(f)
details["valid_json"] = True
except json.JSONDecodeError as e:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"Invalid JSON: {e}",
details={"path": str(path)},
)
else:
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=False,
duration_ms=0,
error=f"File not found: {path}",
details=details,
)
return TestResult(
scenario_id=scenario.id,
scenario_name=scenario.name,
passed=True,
duration_ms=0,
details=details,
)
def _check_outcome(self, details: Dict, expected: Dict) -> bool:
"""Check if test details match expected outcome."""
for key, expected_value in expected.items():
if key not in details:
continue
actual_value = details[key]
# Handle nested dicts
if isinstance(expected_value, dict) and isinstance(actual_value, dict):
if not self._check_outcome(actual_value, expected_value):
return False
# Handle lists
elif isinstance(expected_value, list) and isinstance(actual_value, list):
if expected_value != actual_value:
return False
# Handle simple values
elif actual_value != expected_value:
return False
return True
async def verify_fix(self, fix: Dict) -> Dict:
"""
Verify that a specific fix was successful.
Args:
fix: Fix dict with issue_id and files_modified
Returns:
Verification result
"""
issue_id = fix.get("issue_id", "unknown")
files_modified = fix.get("files_modified", [])
# Run quick verification
passed = True
details = {}
# Check that modified files exist
for file_path in files_modified:
path = Path(file_path)
if not path.exists():
passed = False
details["missing_file"] = str(path)
break
# Could add more sophisticated verification here
return {
"issue_id": issue_id,
"passed": passed,
"details": details,
}
async def run_health_check(self) -> Dict:
"""
Run a quick health check on dashboard components.
Returns:
Health status dict
"""
await self.connect()
health = {
"timestamp": datetime.now().isoformat(),
"api": "unknown",
"frontend": "unknown",
"websocket": "unknown",
}
# Check API
try:
async with self._session.get(f"{self.base_url}/health") as resp:
if resp.status == 200:
health["api"] = "healthy"
else:
health["api"] = f"unhealthy (status {resp.status})"
except Exception as e:
health["api"] = f"error: {e}"
# Check frontend (if available)
try:
async with self._session.get("http://localhost:3000") as resp:
if resp.status == 200:
health["frontend"] = "healthy"
else:
health["frontend"] = f"unhealthy (status {resp.status})"
except Exception as e:
health["frontend"] = f"error: {e}"
return health

File diff suppressed because it is too large Load Diff