feat: Add DevLoop automation and HTML Reports
## DevLoop - Closed-Loop Development System - Orchestrator for plan → build → test → analyze cycle - Gemini planning via OpenCode CLI - Claude implementation via CLI bridge - Playwright browser testing integration - Test runner with API, filesystem, and browser tests - Persistent state in .devloop/ directory - CLI tool: tools/devloop_cli.py Usage: python tools/devloop_cli.py start 'Create new feature' python tools/devloop_cli.py plan 'Fix bug in X' python tools/devloop_cli.py test --study support_arm python tools/devloop_cli.py browser --level full ## HTML Reports (optimization_engine/reporting/) - Interactive Plotly-based reports - Convergence plot, Pareto front, parallel coordinates - Parameter importance analysis - Self-contained HTML (offline-capable) - Tailwind CSS styling ## Playwright E2E Tests - Home page tests - Test results in test-results/ ## LAC Knowledge Base Updates - Session insights (failures, workarounds, patterns) - Optimization memory for arm support study
This commit is contained in:
652
optimization_engine/devloop/cli_bridge.py
Normal file
652
optimization_engine/devloop/cli_bridge.py
Normal file
@@ -0,0 +1,652 @@
|
||||
"""
|
||||
CLI Bridge - Execute AI tasks through Claude Code CLI and OpenCode CLI.
|
||||
|
||||
Uses your existing subscriptions via CLI tools:
|
||||
- Claude Code CLI (claude.exe) for implementation
|
||||
- OpenCode CLI (opencode) for Gemini planning
|
||||
|
||||
No API keys needed - leverages your CLI subscriptions.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CLIResult:
|
||||
"""Result from CLI execution."""
|
||||
|
||||
success: bool
|
||||
output: str
|
||||
error: str
|
||||
duration_seconds: float
|
||||
files_modified: List[str]
|
||||
|
||||
|
||||
class ClaudeCodeCLI:
|
||||
"""
|
||||
Execute tasks through Claude Code CLI.
|
||||
|
||||
Uses: claude.exe --print for non-interactive execution
|
||||
"""
|
||||
|
||||
CLAUDE_PATH = r"C:\Users\antoi\.local\bin\claude.exe"
|
||||
|
||||
def __init__(self, workspace: Path):
|
||||
self.workspace = workspace
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
prompt: str,
|
||||
timeout: int = 300,
|
||||
model: str = "opus",
|
||||
) -> CLIResult:
|
||||
"""
|
||||
Execute a prompt through Claude Code CLI.
|
||||
|
||||
Args:
|
||||
prompt: The instruction/prompt to execute
|
||||
timeout: Timeout in seconds
|
||||
model: Model to use (opus, sonnet, haiku)
|
||||
|
||||
Returns:
|
||||
CLIResult with output and modified files
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
|
||||
# Build command
|
||||
cmd = [
|
||||
self.CLAUDE_PATH,
|
||||
"--print", # Non-interactive mode
|
||||
"--model",
|
||||
model,
|
||||
"--permission-mode",
|
||||
"acceptEdits", # Auto-accept edits
|
||||
prompt,
|
||||
]
|
||||
|
||||
logger.info(f"Executing Claude Code CLI: {prompt[:100]}...")
|
||||
|
||||
try:
|
||||
# Run in workspace directory
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
cwd=str(self.workspace),
|
||||
env={**os.environ, "TERM": "dumb"}, # Disable colors
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
error = result.stderr
|
||||
success = result.returncode == 0
|
||||
|
||||
# Extract modified files from output
|
||||
files_modified = self._extract_modified_files(output)
|
||||
|
||||
duration = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
logger.info(
|
||||
f"Claude Code completed in {duration:.1f}s, modified {len(files_modified)} files"
|
||||
)
|
||||
|
||||
return CLIResult(
|
||||
success=success,
|
||||
output=output,
|
||||
error=error,
|
||||
duration_seconds=duration,
|
||||
files_modified=files_modified,
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return CLIResult(
|
||||
success=False,
|
||||
output="",
|
||||
error=f"Timeout after {timeout}s",
|
||||
duration_seconds=timeout,
|
||||
files_modified=[],
|
||||
)
|
||||
except Exception as e:
|
||||
return CLIResult(
|
||||
success=False,
|
||||
output="",
|
||||
error=str(e),
|
||||
duration_seconds=(datetime.now() - start_time).total_seconds(),
|
||||
files_modified=[],
|
||||
)
|
||||
|
||||
def _extract_modified_files(self, output: str) -> List[str]:
|
||||
"""Extract list of modified files from Claude Code output."""
|
||||
files = []
|
||||
|
||||
# Look for file modification patterns
|
||||
patterns = [
|
||||
r"(?:Created|Modified|Wrote|Updated|Edited):\s*[`'\"]?([^\s`'\"]+)[`'\"]?",
|
||||
r"Writing to [`'\"]?([^\s`'\"]+)[`'\"]?",
|
||||
r"File saved: ([^\s]+)",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, output, re.IGNORECASE)
|
||||
files.extend(matches)
|
||||
|
||||
return list(set(files))
|
||||
|
||||
async def execute_with_context(
|
||||
self,
|
||||
prompt: str,
|
||||
context_files: List[str],
|
||||
timeout: int = 300,
|
||||
) -> CLIResult:
|
||||
"""
|
||||
Execute with additional context files loaded.
|
||||
|
||||
Args:
|
||||
prompt: The instruction
|
||||
context_files: Files to read as context
|
||||
timeout: Timeout in seconds
|
||||
"""
|
||||
# Build prompt with context
|
||||
context_prompt = prompt
|
||||
|
||||
if context_files:
|
||||
context_prompt += "\n\nContext files to consider:\n"
|
||||
for f in context_files:
|
||||
context_prompt += f"- {f}\n"
|
||||
|
||||
return await self.execute(context_prompt, timeout)
|
||||
|
||||
|
||||
class OpenCodeCLI:
|
||||
"""
|
||||
Execute tasks through OpenCode CLI (Gemini).
|
||||
|
||||
Uses: opencode run for non-interactive execution
|
||||
"""
|
||||
|
||||
OPENCODE_PATH = r"C:\Users\antoi\AppData\Roaming\npm\opencode.cmd"
|
||||
|
||||
def __init__(self, workspace: Path):
|
||||
self.workspace = workspace
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
prompt: str,
|
||||
timeout: int = 180,
|
||||
model: str = "google/gemini-3-pro-preview",
|
||||
) -> CLIResult:
|
||||
"""
|
||||
Execute a prompt through OpenCode CLI.
|
||||
|
||||
Args:
|
||||
prompt: The instruction/prompt
|
||||
timeout: Timeout in seconds
|
||||
model: Model to use
|
||||
|
||||
Returns:
|
||||
CLIResult with output
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
|
||||
# Build command
|
||||
cmd = [self.OPENCODE_PATH, "run", "--model", model, prompt]
|
||||
|
||||
logger.info(f"Executing OpenCode CLI: {prompt[:100]}...")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
cwd=str(self.workspace),
|
||||
env={**os.environ, "TERM": "dumb"},
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
error = result.stderr
|
||||
success = result.returncode == 0
|
||||
|
||||
duration = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
logger.info(f"OpenCode completed in {duration:.1f}s")
|
||||
|
||||
return CLIResult(
|
||||
success=success,
|
||||
output=output,
|
||||
error=error,
|
||||
duration_seconds=duration,
|
||||
files_modified=[], # OpenCode typically doesn't modify files directly
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return CLIResult(
|
||||
success=False,
|
||||
output="",
|
||||
error=f"Timeout after {timeout}s",
|
||||
duration_seconds=timeout,
|
||||
files_modified=[],
|
||||
)
|
||||
except Exception as e:
|
||||
return CLIResult(
|
||||
success=False,
|
||||
output="",
|
||||
error=str(e),
|
||||
duration_seconds=(datetime.now() - start_time).total_seconds(),
|
||||
files_modified=[],
|
||||
)
|
||||
|
||||
async def plan(self, objective: str, context: Dict = None) -> Dict:
|
||||
"""
|
||||
Create an implementation plan using Gemini via OpenCode.
|
||||
|
||||
Args:
|
||||
objective: What to achieve
|
||||
context: Additional context
|
||||
|
||||
Returns:
|
||||
Plan dict with tasks and test scenarios
|
||||
"""
|
||||
prompt = f"""You are a strategic planner for Atomizer, an FEA optimization framework.
|
||||
|
||||
## Objective
|
||||
{objective}
|
||||
|
||||
## Context
|
||||
{json.dumps(context, indent=2) if context else "None provided"}
|
||||
|
||||
## Task
|
||||
Create a detailed implementation plan in JSON format with:
|
||||
1. tasks: List of implementation tasks for Claude Code
|
||||
2. test_scenarios: Tests to verify implementation
|
||||
3. acceptance_criteria: Success conditions
|
||||
|
||||
Output ONLY valid JSON in this format:
|
||||
```json
|
||||
{{
|
||||
"objective": "{objective}",
|
||||
"approach": "Brief description",
|
||||
"tasks": [
|
||||
{{
|
||||
"id": "task_001",
|
||||
"description": "What to do",
|
||||
"file": "path/to/file.py",
|
||||
"priority": "high"
|
||||
}}
|
||||
],
|
||||
"test_scenarios": [
|
||||
{{
|
||||
"id": "test_001",
|
||||
"name": "Test name",
|
||||
"type": "filesystem",
|
||||
"steps": [{{"action": "check_exists", "path": "some/path"}}],
|
||||
"expected_outcome": {{"exists": true}}
|
||||
}}
|
||||
],
|
||||
"acceptance_criteria": [
|
||||
"Criterion 1"
|
||||
]
|
||||
}}
|
||||
```
|
||||
"""
|
||||
|
||||
result = await self.execute(prompt)
|
||||
|
||||
if not result.success:
|
||||
logger.error(f"OpenCode planning failed: {result.error}")
|
||||
return self._fallback_plan(objective, context)
|
||||
|
||||
# Parse JSON from output
|
||||
try:
|
||||
# Find JSON block in output
|
||||
output = result.output
|
||||
|
||||
if "```json" in output:
|
||||
start = output.find("```json") + 7
|
||||
end = output.find("```", start)
|
||||
json_str = output[start:end].strip()
|
||||
elif "```" in output:
|
||||
start = output.find("```") + 3
|
||||
end = output.find("```", start)
|
||||
json_str = output[start:end].strip()
|
||||
else:
|
||||
# Try to find JSON object directly
|
||||
match = re.search(r"\{.*\}", output, re.DOTALL)
|
||||
if match:
|
||||
json_str = match.group()
|
||||
else:
|
||||
return self._fallback_plan(objective, context)
|
||||
|
||||
plan = json.loads(json_str)
|
||||
logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
|
||||
return plan
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse plan JSON: {e}")
|
||||
return self._fallback_plan(objective, context)
|
||||
|
||||
def _fallback_plan(self, objective: str, context: Dict = None) -> Dict:
|
||||
"""Generate a fallback plan when Gemini fails."""
|
||||
logger.warning("Using fallback plan")
|
||||
|
||||
return {
|
||||
"objective": objective,
|
||||
"approach": "Fallback plan - manual implementation",
|
||||
"tasks": [
|
||||
{
|
||||
"id": "task_001",
|
||||
"description": f"Implement: {objective}",
|
||||
"file": "TBD",
|
||||
"priority": "high",
|
||||
}
|
||||
],
|
||||
"test_scenarios": [],
|
||||
"acceptance_criteria": [objective],
|
||||
}
|
||||
|
||||
async def analyze(self, test_results: Dict) -> Dict:
|
||||
"""
|
||||
Analyze test results using Gemini via OpenCode.
|
||||
|
||||
Args:
|
||||
test_results: Test report from dashboard
|
||||
|
||||
Returns:
|
||||
Analysis with issues and fix plans
|
||||
"""
|
||||
summary = test_results.get("summary", {})
|
||||
scenarios = test_results.get("scenarios", [])
|
||||
|
||||
if summary.get("failed", 0) == 0:
|
||||
return {
|
||||
"issues_found": False,
|
||||
"issues": [],
|
||||
"fix_plans": {},
|
||||
"recommendations": ["All tests passed!"],
|
||||
}
|
||||
|
||||
failures = [s for s in scenarios if not s.get("passed", True)]
|
||||
|
||||
prompt = f"""Analyze these test failures for Atomizer FEA optimization framework:
|
||||
|
||||
## Test Summary
|
||||
- Total: {summary.get("total", 0)}
|
||||
- Passed: {summary.get("passed", 0)}
|
||||
- Failed: {summary.get("failed", 0)}
|
||||
|
||||
## Failed Tests
|
||||
{json.dumps(failures, indent=2)}
|
||||
|
||||
## Task
|
||||
Provide root cause analysis and fix plans in JSON:
|
||||
|
||||
```json
|
||||
{{
|
||||
"issues_found": true,
|
||||
"issues": [
|
||||
{{
|
||||
"id": "issue_001",
|
||||
"description": "What went wrong",
|
||||
"severity": "high",
|
||||
"root_cause": "Why it failed"
|
||||
}}
|
||||
],
|
||||
"fix_plans": {{
|
||||
"issue_001": {{
|
||||
"approach": "How to fix",
|
||||
"steps": [{{"action": "edit", "file": "path", "description": "change"}}]
|
||||
}}
|
||||
}},
|
||||
"recommendations": ["suggestion"]
|
||||
}}
|
||||
```
|
||||
"""
|
||||
|
||||
result = await self.execute(prompt)
|
||||
|
||||
if not result.success:
|
||||
return self._fallback_analysis(failures)
|
||||
|
||||
try:
|
||||
output = result.output
|
||||
if "```json" in output:
|
||||
start = output.find("```json") + 7
|
||||
end = output.find("```", start)
|
||||
json_str = output[start:end].strip()
|
||||
else:
|
||||
match = re.search(r"\{.*\}", output, re.DOTALL)
|
||||
json_str = match.group() if match else "{}"
|
||||
|
||||
return json.loads(json_str)
|
||||
|
||||
except:
|
||||
return self._fallback_analysis(failures)
|
||||
|
||||
def _fallback_analysis(self, failures: List[Dict]) -> Dict:
|
||||
"""Generate fallback analysis."""
|
||||
issues = []
|
||||
fix_plans = {}
|
||||
|
||||
for i, failure in enumerate(failures):
|
||||
issue_id = f"issue_{i + 1}"
|
||||
issues.append(
|
||||
{
|
||||
"id": issue_id,
|
||||
"description": failure.get("error", "Unknown error"),
|
||||
"severity": "medium",
|
||||
"root_cause": "Requires investigation",
|
||||
}
|
||||
)
|
||||
fix_plans[issue_id] = {
|
||||
"approach": "Manual investigation required",
|
||||
"steps": [],
|
||||
}
|
||||
|
||||
return {
|
||||
"issues_found": len(issues) > 0,
|
||||
"issues": issues,
|
||||
"fix_plans": fix_plans,
|
||||
"recommendations": ["Review failed tests manually"],
|
||||
}
|
||||
|
||||
|
||||
class DevLoopCLIOrchestrator:
|
||||
"""
|
||||
Orchestrate DevLoop using CLI tools.
|
||||
|
||||
- OpenCode (Gemini) for planning and analysis
|
||||
- Claude Code for implementation and fixes
|
||||
"""
|
||||
|
||||
def __init__(self, workspace: Path = None):
|
||||
self.workspace = workspace or Path("C:/Users/antoi/Atomizer")
|
||||
self.claude = ClaudeCodeCLI(self.workspace)
|
||||
self.opencode = OpenCodeCLI(self.workspace)
|
||||
self.iteration = 0
|
||||
|
||||
async def run_cycle(
|
||||
self,
|
||||
objective: str,
|
||||
context: Dict = None,
|
||||
max_iterations: int = 5,
|
||||
) -> Dict:
|
||||
"""
|
||||
Run a complete development cycle.
|
||||
|
||||
Args:
|
||||
objective: What to achieve
|
||||
context: Additional context
|
||||
max_iterations: Maximum fix iterations
|
||||
|
||||
Returns:
|
||||
Cycle report
|
||||
"""
|
||||
from .test_runner import DashboardTestRunner
|
||||
|
||||
start_time = datetime.now()
|
||||
results = {
|
||||
"objective": objective,
|
||||
"iterations": [],
|
||||
"status": "in_progress",
|
||||
}
|
||||
|
||||
logger.info(f"Starting DevLoop cycle: {objective}")
|
||||
|
||||
# Phase 1: Plan (Gemini via OpenCode)
|
||||
logger.info("Phase 1: Planning with Gemini...")
|
||||
plan = await self.opencode.plan(objective, context)
|
||||
|
||||
iteration = 0
|
||||
while iteration < max_iterations:
|
||||
iteration += 1
|
||||
iter_result = {"iteration": iteration}
|
||||
|
||||
# Phase 2: Implement (Claude Code)
|
||||
logger.info(f"Phase 2 (iter {iteration}): Implementing with Claude Code...")
|
||||
impl_result = await self._implement(plan)
|
||||
iter_result["implementation"] = {
|
||||
"success": impl_result.success,
|
||||
"files_modified": impl_result.files_modified,
|
||||
}
|
||||
|
||||
# Phase 3: Test (Dashboard)
|
||||
logger.info(f"Phase 3 (iter {iteration}): Testing...")
|
||||
test_runner = DashboardTestRunner()
|
||||
test_results = await test_runner.run_test_suite(plan.get("test_scenarios", []))
|
||||
iter_result["test_results"] = test_results
|
||||
|
||||
# Check if all tests pass
|
||||
summary = test_results.get("summary", {})
|
||||
if summary.get("failed", 0) == 0:
|
||||
logger.info("All tests passed!")
|
||||
results["iterations"].append(iter_result)
|
||||
results["status"] = "success"
|
||||
break
|
||||
|
||||
# Phase 4: Analyze (Gemini via OpenCode)
|
||||
logger.info(f"Phase 4 (iter {iteration}): Analyzing failures...")
|
||||
analysis = await self.opencode.analyze(test_results)
|
||||
iter_result["analysis"] = analysis
|
||||
|
||||
if not analysis.get("issues_found"):
|
||||
results["status"] = "success"
|
||||
results["iterations"].append(iter_result)
|
||||
break
|
||||
|
||||
# Phase 5: Fix (Claude Code)
|
||||
logger.info(f"Phase 5 (iter {iteration}): Fixing issues...")
|
||||
fix_result = await self._fix(analysis)
|
||||
iter_result["fixes"] = {
|
||||
"success": fix_result.success,
|
||||
"files_modified": fix_result.files_modified,
|
||||
}
|
||||
|
||||
results["iterations"].append(iter_result)
|
||||
|
||||
if results["status"] == "in_progress":
|
||||
results["status"] = "max_iterations_reached"
|
||||
|
||||
results["duration_seconds"] = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
logger.info(f"DevLoop cycle completed: {results['status']}")
|
||||
|
||||
return results
|
||||
|
||||
async def _implement(self, plan: Dict) -> CLIResult:
|
||||
"""Implement the plan using Claude Code."""
|
||||
tasks = plan.get("tasks", [])
|
||||
|
||||
if not tasks:
|
||||
return CLIResult(
|
||||
success=True,
|
||||
output="No tasks to implement",
|
||||
error="",
|
||||
duration_seconds=0,
|
||||
files_modified=[],
|
||||
)
|
||||
|
||||
# Build implementation prompt
|
||||
prompt = f"""Implement the following tasks for Atomizer:
|
||||
|
||||
## Objective
|
||||
{plan.get("objective", "Unknown")}
|
||||
|
||||
## Approach
|
||||
{plan.get("approach", "Follow best practices")}
|
||||
|
||||
## Tasks
|
||||
"""
|
||||
for task in tasks:
|
||||
prompt += f"""
|
||||
### {task.get("id", "task")}: {task.get("description", "")}
|
||||
- File: {task.get("file", "TBD")}
|
||||
- Priority: {task.get("priority", "medium")}
|
||||
"""
|
||||
|
||||
prompt += """
|
||||
## Requirements
|
||||
- Follow Atomizer coding standards
|
||||
- Use AtomizerSpec v2.0 format
|
||||
- Create README.md for any new study
|
||||
- Use existing extractors from optimization_engine/extractors/
|
||||
"""
|
||||
|
||||
return await self.claude.execute(prompt, timeout=300)
|
||||
|
||||
async def _fix(self, analysis: Dict) -> CLIResult:
|
||||
"""Apply fixes using Claude Code."""
|
||||
issues = analysis.get("issues", [])
|
||||
fix_plans = analysis.get("fix_plans", {})
|
||||
|
||||
if not issues:
|
||||
return CLIResult(
|
||||
success=True,
|
||||
output="No issues to fix",
|
||||
error="",
|
||||
duration_seconds=0,
|
||||
files_modified=[],
|
||||
)
|
||||
|
||||
# Build fix prompt
|
||||
prompt = "Fix the following issues:\n\n"
|
||||
|
||||
for issue in issues:
|
||||
issue_id = issue.get("id", "unknown")
|
||||
prompt += f"""
|
||||
## Issue: {issue_id}
|
||||
- Description: {issue.get("description", "")}
|
||||
- Root Cause: {issue.get("root_cause", "Unknown")}
|
||||
- Severity: {issue.get("severity", "medium")}
|
||||
"""
|
||||
|
||||
fix_plan = fix_plans.get(issue_id, {})
|
||||
if fix_plan:
|
||||
prompt += f"- Fix Approach: {fix_plan.get('approach', 'Investigate')}\n"
|
||||
for step in fix_plan.get("steps", []):
|
||||
prompt += f" - {step.get('description', step.get('action', 'step'))}\n"
|
||||
|
||||
return await self.claude.execute(prompt, timeout=300)
|
||||
|
||||
async def step_plan(self, objective: str, context: Dict = None) -> Dict:
|
||||
"""Execute only the planning phase."""
|
||||
return await self.opencode.plan(objective, context)
|
||||
|
||||
async def step_implement(self, plan: Dict) -> CLIResult:
|
||||
"""Execute only the implementation phase."""
|
||||
return await self._implement(plan)
|
||||
|
||||
async def step_analyze(self, test_results: Dict) -> Dict:
|
||||
"""Execute only the analysis phase."""
|
||||
return await self.opencode.analyze(test_results)
|
||||
Reference in New Issue
Block a user