feat: Add DevLoop automation and HTML Reports
## DevLoop - Closed-Loop Development System - Orchestrator for plan → build → test → analyze cycle - Gemini planning via OpenCode CLI - Claude implementation via CLI bridge - Playwright browser testing integration - Test runner with API, filesystem, and browser tests - Persistent state in .devloop/ directory - CLI tool: tools/devloop_cli.py Usage: python tools/devloop_cli.py start 'Create new feature' python tools/devloop_cli.py plan 'Fix bug in X' python tools/devloop_cli.py test --study support_arm python tools/devloop_cli.py browser --level full ## HTML Reports (optimization_engine/reporting/) - Interactive Plotly-based reports - Convergence plot, Pareto front, parallel coordinates - Parameter importance analysis - Self-contained HTML (offline-capable) - Tailwind CSS styling ## Playwright E2E Tests - Home page tests - Test results in test-results/ ## LAC Knowledge Base Updates - Session insights (failures, workarounds, patterns) - Optimization memory for arm support study
This commit is contained in:
68
optimization_engine/devloop/__init__.py
Normal file
68
optimization_engine/devloop/__init__.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""
|
||||
Atomizer DevLoop - Closed-Loop Development System
|
||||
|
||||
This module provides autonomous development cycle capabilities:
|
||||
1. Gemini Pro for strategic planning and analysis
|
||||
2. Claude Code (Opus 4.5) for implementation
|
||||
3. Dashboard testing for verification
|
||||
4. LAC integration for persistent learning
|
||||
|
||||
The DevLoop orchestrates the full cycle:
|
||||
PLAN (Gemini) -> BUILD (Claude) -> TEST (Dashboard) -> ANALYZE (Gemini) -> FIX (Claude) -> VERIFY
|
||||
|
||||
Example usage:
|
||||
from optimization_engine.devloop import DevLoopOrchestrator
|
||||
|
||||
orchestrator = DevLoopOrchestrator()
|
||||
result = await orchestrator.run_development_cycle(
|
||||
objective="Create support_arm optimization study"
|
||||
)
|
||||
"""
|
||||
|
||||
|
||||
# Lazy imports to avoid circular dependencies
|
||||
def __getattr__(name):
|
||||
if name == "DevLoopOrchestrator":
|
||||
from .orchestrator import DevLoopOrchestrator
|
||||
|
||||
return DevLoopOrchestrator
|
||||
elif name == "LoopPhase":
|
||||
from .orchestrator import LoopPhase
|
||||
|
||||
return LoopPhase
|
||||
elif name == "LoopState":
|
||||
from .orchestrator import LoopState
|
||||
|
||||
return LoopState
|
||||
elif name == "DashboardTestRunner":
|
||||
from .test_runner import DashboardTestRunner
|
||||
|
||||
return DashboardTestRunner
|
||||
elif name == "TestScenario":
|
||||
from .test_runner import TestScenario
|
||||
|
||||
return TestScenario
|
||||
elif name == "GeminiPlanner":
|
||||
from .planning import GeminiPlanner
|
||||
|
||||
return GeminiPlanner
|
||||
elif name == "ProblemAnalyzer":
|
||||
from .analyzer import ProblemAnalyzer
|
||||
|
||||
return ProblemAnalyzer
|
||||
elif name == "ClaudeCodeBridge":
|
||||
from .claude_bridge import ClaudeCodeBridge
|
||||
|
||||
return ClaudeCodeBridge
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
"DevLoopOrchestrator",
|
||||
"LoopPhase",
|
||||
"LoopState",
|
||||
"DashboardTestRunner",
|
||||
"TestScenario",
|
||||
"GeminiPlanner",
|
||||
"ProblemAnalyzer",
|
||||
]
|
||||
421
optimization_engine/devloop/analyzer.py
Normal file
421
optimization_engine/devloop/analyzer.py
Normal file
@@ -0,0 +1,421 @@
|
||||
"""
|
||||
Problem Analyzer - Analyze test results and generate fix plans using Gemini.
|
||||
|
||||
Handles:
|
||||
- Root cause analysis from test failures
|
||||
- Pattern detection across failures
|
||||
- Fix plan generation
|
||||
- Priority assessment
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Issue:
|
||||
"""A detected issue from test results."""
|
||||
|
||||
id: str
|
||||
description: str
|
||||
severity: str = "medium" # "critical", "high", "medium", "low"
|
||||
category: str = "unknown"
|
||||
affected_files: List[str] = field(default_factory=list)
|
||||
test_ids: List[str] = field(default_factory=list)
|
||||
root_cause: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class FixPlan:
|
||||
"""Plan for fixing an issue."""
|
||||
|
||||
issue_id: str
|
||||
approach: str
|
||||
steps: List[Dict] = field(default_factory=list)
|
||||
estimated_effort: str = "medium"
|
||||
rollback_steps: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AnalysisReport:
|
||||
"""Complete analysis report."""
|
||||
|
||||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
issues_found: bool = False
|
||||
issues: List[Issue] = field(default_factory=list)
|
||||
fix_plans: Dict[str, FixPlan] = field(default_factory=dict)
|
||||
patterns: List[Dict] = field(default_factory=list)
|
||||
recommendations: List[str] = field(default_factory=list)
|
||||
|
||||
|
||||
class ProblemAnalyzer:
|
||||
"""
|
||||
Gemini-powered analysis of test failures and improvement opportunities.
|
||||
|
||||
Capabilities:
|
||||
- Deep analysis of test results
|
||||
- Root cause identification
|
||||
- Pattern detection across failures
|
||||
- Fix plan generation with priority
|
||||
"""
|
||||
|
||||
def __init__(self, gemini_planner: Optional[Any] = None):
|
||||
"""
|
||||
Initialize the analyzer.
|
||||
|
||||
Args:
|
||||
gemini_planner: GeminiPlanner instance for API access
|
||||
"""
|
||||
self._planner = gemini_planner
|
||||
self._history: List[AnalysisReport] = []
|
||||
|
||||
@property
|
||||
def planner(self):
|
||||
"""Get or create Gemini planner."""
|
||||
if self._planner is None:
|
||||
from .planning import GeminiPlanner
|
||||
|
||||
self._planner = GeminiPlanner()
|
||||
return self._planner
|
||||
|
||||
async def analyze_test_results(self, test_report: Dict) -> Dict:
|
||||
"""
|
||||
Perform deep analysis of test results.
|
||||
|
||||
Args:
|
||||
test_report: Test report from DashboardTestRunner
|
||||
|
||||
Returns:
|
||||
Analysis dict with issues, fix_plans, patterns
|
||||
"""
|
||||
summary = test_report.get("summary", {})
|
||||
scenarios = test_report.get("scenarios", [])
|
||||
|
||||
# Quick return if all passed
|
||||
if summary.get("failed", 0) == 0:
|
||||
return {
|
||||
"issues_found": False,
|
||||
"issues": [],
|
||||
"fix_plans": {},
|
||||
"patterns": [],
|
||||
"recommendations": ["All tests passed!"],
|
||||
}
|
||||
|
||||
# Analyze failures
|
||||
failures = [s for s in scenarios if not s.get("passed", True)]
|
||||
|
||||
# Use Gemini for deep analysis if available
|
||||
if self.planner.client != "mock":
|
||||
return await self._gemini_analysis(test_report, failures)
|
||||
else:
|
||||
return self._rule_based_analysis(test_report, failures)
|
||||
|
||||
async def _gemini_analysis(self, test_report: Dict, failures: List[Dict]) -> Dict:
|
||||
"""Use Gemini for sophisticated analysis."""
|
||||
prompt = self._build_analysis_prompt(test_report, failures)
|
||||
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
response = await loop.run_in_executor(
|
||||
None, lambda: self.planner._model.generate_content(prompt)
|
||||
)
|
||||
|
||||
text = response.text
|
||||
|
||||
# Parse JSON from response
|
||||
if "```json" in text:
|
||||
start = text.find("```json") + 7
|
||||
end = text.find("```", start)
|
||||
json_str = text[start:end].strip()
|
||||
analysis = json.loads(json_str)
|
||||
else:
|
||||
analysis = self._rule_based_analysis(test_report, failures)
|
||||
|
||||
logger.info(f"Gemini analysis found {len(analysis.get('issues', []))} issues")
|
||||
return analysis
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Gemini analysis failed: {e}, falling back to rule-based")
|
||||
return self._rule_based_analysis(test_report, failures)
|
||||
|
||||
def _build_analysis_prompt(self, test_report: Dict, failures: List[Dict]) -> str:
|
||||
"""Build analysis prompt for Gemini."""
|
||||
return f"""## Test Failure Analysis
|
||||
|
||||
### Test Report Summary
|
||||
- Total Tests: {test_report.get("summary", {}).get("total", 0)}
|
||||
- Passed: {test_report.get("summary", {}).get("passed", 0)}
|
||||
- Failed: {test_report.get("summary", {}).get("failed", 0)}
|
||||
|
||||
### Failed Tests
|
||||
{json.dumps(failures, indent=2)}
|
||||
|
||||
### Analysis Required
|
||||
|
||||
Analyze these test failures and provide:
|
||||
|
||||
1. **Root Cause Analysis**: What caused each failure?
|
||||
2. **Pattern Detection**: Are there recurring issues?
|
||||
3. **Fix Priority**: Which issues should be addressed first?
|
||||
4. **Implementation Plan**: Specific code changes needed
|
||||
|
||||
Output as JSON:
|
||||
```json
|
||||
{{
|
||||
"issues_found": true,
|
||||
"issues": [
|
||||
{{
|
||||
"id": "issue_001",
|
||||
"description": "What went wrong",
|
||||
"severity": "high|medium|low",
|
||||
"category": "api|ui|config|filesystem|logic",
|
||||
"affected_files": ["path/to/file.py"],
|
||||
"test_ids": ["test_001"],
|
||||
"root_cause": "Why it happened"
|
||||
}}
|
||||
],
|
||||
"fix_plans": {{
|
||||
"issue_001": {{
|
||||
"issue_id": "issue_001",
|
||||
"approach": "How to fix it",
|
||||
"steps": [
|
||||
{{"action": "edit", "file": "path/to/file.py", "description": "Change X to Y"}}
|
||||
],
|
||||
"estimated_effort": "low|medium|high",
|
||||
"rollback_steps": ["How to undo if needed"]
|
||||
}}
|
||||
}},
|
||||
"patterns": [
|
||||
{{"pattern": "Common issue type", "occurrences": 3, "suggestion": "Systemic fix"}}
|
||||
],
|
||||
"recommendations": [
|
||||
"High-level improvement suggestions"
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
Focus on actionable, specific fixes that Claude Code can implement.
|
||||
"""
|
||||
|
||||
def _rule_based_analysis(self, test_report: Dict, failures: List[Dict]) -> Dict:
|
||||
"""Rule-based analysis when Gemini is not available."""
|
||||
issues = []
|
||||
fix_plans = {}
|
||||
patterns = []
|
||||
|
||||
# Categorize failures
|
||||
api_failures = []
|
||||
filesystem_failures = []
|
||||
browser_failures = []
|
||||
cli_failures = []
|
||||
|
||||
for failure in failures:
|
||||
scenario_id = failure.get("scenario_id", "unknown")
|
||||
error = failure.get("error", "")
|
||||
details = failure.get("details", {})
|
||||
|
||||
# Detect issue type
|
||||
if "api" in scenario_id.lower() or "status_code" in details:
|
||||
api_failures.append(failure)
|
||||
elif "filesystem" in scenario_id.lower() or "exists" in details:
|
||||
filesystem_failures.append(failure)
|
||||
elif "browser" in scenario_id.lower():
|
||||
browser_failures.append(failure)
|
||||
elif "cli" in scenario_id.lower() or "command" in details:
|
||||
cli_failures.append(failure)
|
||||
|
||||
# Generate issues for API failures
|
||||
for i, failure in enumerate(api_failures):
|
||||
issue_id = f"api_issue_{i + 1}"
|
||||
status = failure.get("details", {}).get("status_code", "unknown")
|
||||
|
||||
issues.append(
|
||||
{
|
||||
"id": issue_id,
|
||||
"description": f"API request failed with status {status}",
|
||||
"severity": "high" if status in [500, 503] else "medium",
|
||||
"category": "api",
|
||||
"affected_files": self._guess_api_files(failure),
|
||||
"test_ids": [failure.get("scenario_id")],
|
||||
"root_cause": failure.get("error", "Unknown API error"),
|
||||
}
|
||||
)
|
||||
|
||||
fix_plans[issue_id] = {
|
||||
"issue_id": issue_id,
|
||||
"approach": "Check API endpoint implementation",
|
||||
"steps": [
|
||||
{"action": "check", "description": "Verify endpoint exists in routes"},
|
||||
{"action": "test", "description": "Run endpoint manually with curl"},
|
||||
],
|
||||
"estimated_effort": "medium",
|
||||
"rollback_steps": [],
|
||||
}
|
||||
|
||||
# Generate issues for filesystem failures
|
||||
for i, failure in enumerate(filesystem_failures):
|
||||
issue_id = f"fs_issue_{i + 1}"
|
||||
path = failure.get("details", {}).get("path", "unknown path")
|
||||
|
||||
issues.append(
|
||||
{
|
||||
"id": issue_id,
|
||||
"description": f"Expected file/directory not found: {path}",
|
||||
"severity": "high",
|
||||
"category": "filesystem",
|
||||
"affected_files": [path],
|
||||
"test_ids": [failure.get("scenario_id")],
|
||||
"root_cause": "File was not created during implementation",
|
||||
}
|
||||
)
|
||||
|
||||
fix_plans[issue_id] = {
|
||||
"issue_id": issue_id,
|
||||
"approach": "Create missing file/directory",
|
||||
"steps": [
|
||||
{"action": "create", "path": path, "description": f"Create {path}"},
|
||||
],
|
||||
"estimated_effort": "low",
|
||||
"rollback_steps": [f"Remove {path}"],
|
||||
}
|
||||
|
||||
# Detect patterns
|
||||
if len(api_failures) > 1:
|
||||
patterns.append(
|
||||
{
|
||||
"pattern": "Multiple API failures",
|
||||
"occurrences": len(api_failures),
|
||||
"suggestion": "Check if backend server is running",
|
||||
}
|
||||
)
|
||||
|
||||
if len(filesystem_failures) > 1:
|
||||
patterns.append(
|
||||
{
|
||||
"pattern": "Multiple missing files",
|
||||
"occurrences": len(filesystem_failures),
|
||||
"suggestion": "Review study creation process",
|
||||
}
|
||||
)
|
||||
|
||||
# Generate recommendations
|
||||
recommendations = []
|
||||
if api_failures:
|
||||
recommendations.append("Verify backend API is running on port 8000")
|
||||
if filesystem_failures:
|
||||
recommendations.append("Check that study directory structure is correctly created")
|
||||
if browser_failures:
|
||||
recommendations.append("Ensure frontend is running on port 3000")
|
||||
if cli_failures:
|
||||
recommendations.append("Check Python environment and script paths")
|
||||
|
||||
return {
|
||||
"issues_found": len(issues) > 0,
|
||||
"issues": issues,
|
||||
"fix_plans": fix_plans,
|
||||
"patterns": patterns,
|
||||
"recommendations": recommendations,
|
||||
}
|
||||
|
||||
def _guess_api_files(self, failure: Dict) -> List[str]:
|
||||
"""Guess which API files might be affected."""
|
||||
endpoint = failure.get("details", {}).get("response", {})
|
||||
|
||||
# Common API file patterns
|
||||
return [
|
||||
"atomizer-dashboard/backend/api/routes/",
|
||||
"atomizer-dashboard/backend/api/services/",
|
||||
]
|
||||
|
||||
async def analyze_iteration_history(self, iterations: List[Dict]) -> Dict:
|
||||
"""
|
||||
Analyze patterns across multiple iterations.
|
||||
|
||||
Args:
|
||||
iterations: List of IterationResult dicts
|
||||
|
||||
Returns:
|
||||
Cross-iteration analysis
|
||||
"""
|
||||
recurring_issues = {}
|
||||
success_rate = 0
|
||||
|
||||
for iteration in iterations:
|
||||
if iteration.get("success"):
|
||||
success_rate += 1
|
||||
|
||||
# Track recurring issues
|
||||
analysis = iteration.get("analysis", {})
|
||||
for issue in analysis.get("issues", []):
|
||||
issue_type = issue.get("category", "unknown")
|
||||
if issue_type not in recurring_issues:
|
||||
recurring_issues[issue_type] = 0
|
||||
recurring_issues[issue_type] += 1
|
||||
|
||||
total = len(iterations) or 1
|
||||
|
||||
return {
|
||||
"total_iterations": len(iterations),
|
||||
"success_rate": success_rate / total,
|
||||
"recurring_issues": recurring_issues,
|
||||
"most_common_issue": max(recurring_issues, key=recurring_issues.get)
|
||||
if recurring_issues
|
||||
else None,
|
||||
"recommendation": self._generate_meta_recommendation(
|
||||
recurring_issues, success_rate / total
|
||||
),
|
||||
}
|
||||
|
||||
def _generate_meta_recommendation(self, recurring_issues: Dict, success_rate: float) -> str:
|
||||
"""Generate high-level recommendation based on iteration history."""
|
||||
if success_rate >= 0.8:
|
||||
return "Development cycle is healthy. Minor issues detected."
|
||||
elif success_rate >= 0.5:
|
||||
most_common = (
|
||||
max(recurring_issues, key=recurring_issues.get) if recurring_issues else "unknown"
|
||||
)
|
||||
return f"Focus on fixing {most_common} issues to improve success rate."
|
||||
else:
|
||||
return (
|
||||
"Development cycle needs attention. Consider reviewing architecture or test design."
|
||||
)
|
||||
|
||||
def get_priority_queue(self, analysis: Dict) -> List[Dict]:
|
||||
"""
|
||||
Get issues sorted by priority for fixing.
|
||||
|
||||
Args:
|
||||
analysis: Analysis result dict
|
||||
|
||||
Returns:
|
||||
Sorted list of issues with their fix plans
|
||||
"""
|
||||
issues = analysis.get("issues", [])
|
||||
fix_plans = analysis.get("fix_plans", {})
|
||||
|
||||
# Priority order
|
||||
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
||||
|
||||
# Sort by severity
|
||||
sorted_issues = sorted(
|
||||
issues, key=lambda x: severity_order.get(x.get("severity", "medium"), 2)
|
||||
)
|
||||
|
||||
# Attach fix plans
|
||||
queue = []
|
||||
for issue in sorted_issues:
|
||||
issue_id = issue.get("id")
|
||||
queue.append(
|
||||
{
|
||||
"issue": issue,
|
||||
"fix_plan": fix_plans.get(issue_id),
|
||||
}
|
||||
)
|
||||
|
||||
return queue
|
||||
170
optimization_engine/devloop/browser_scenarios.py
Normal file
170
optimization_engine/devloop/browser_scenarios.py
Normal file
@@ -0,0 +1,170 @@
|
||||
"""
|
||||
Browser Test Scenarios for DevLoop
|
||||
Pre-built Playwright scenarios that can be used for dashboard verification.
|
||||
|
||||
These scenarios use the same structure as DashboardTestRunner browser tests
|
||||
but provide ready-made tests for common dashboard operations.
|
||||
"""
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
def get_study_browser_scenarios(study_name: str) -> List[Dict]:
|
||||
"""
|
||||
Get browser test scenarios for a specific study.
|
||||
|
||||
Args:
|
||||
study_name: The study to test
|
||||
|
||||
Returns:
|
||||
List of browser test scenarios
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"id": "browser_home_loads",
|
||||
"name": "Home page loads with studies",
|
||||
"type": "browser",
|
||||
"steps": [
|
||||
{"action": "navigate", "url": "/"},
|
||||
{"action": "wait_for", "selector": "text=Studies"},
|
||||
{"action": "wait_for", "selector": "button:has-text('trials')"},
|
||||
],
|
||||
"expected_outcome": {"status": "pass"},
|
||||
"timeout_ms": 15000,
|
||||
},
|
||||
{
|
||||
"id": "browser_canvas_loads",
|
||||
"name": f"Canvas loads for {study_name}",
|
||||
"type": "browser",
|
||||
"steps": [
|
||||
{"action": "navigate", "url": f"/canvas/{study_name}"},
|
||||
# Wait for ReactFlow nodes to render
|
||||
{"action": "wait_for", "selector": ".react-flow__node"},
|
||||
],
|
||||
"expected_outcome": {"status": "pass"},
|
||||
"timeout_ms": 20000,
|
||||
},
|
||||
{
|
||||
"id": "browser_dashboard_loads",
|
||||
"name": f"Dashboard loads for {study_name}",
|
||||
"type": "browser",
|
||||
"steps": [
|
||||
{"action": "navigate", "url": f"/dashboard"},
|
||||
# Wait for dashboard main element to load
|
||||
{"action": "wait_for", "selector": "main"},
|
||||
],
|
||||
"expected_outcome": {"status": "pass"},
|
||||
"timeout_ms": 15000,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def get_ui_verification_scenarios() -> List[Dict]:
|
||||
"""
|
||||
Get scenarios for verifying UI components.
|
||||
|
||||
These are general UI health checks, not study-specific.
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"id": "browser_home_stats",
|
||||
"name": "Home page shows statistics",
|
||||
"type": "browser",
|
||||
"steps": [
|
||||
{"action": "navigate", "url": "/"},
|
||||
{"action": "wait_for", "selector": "text=Total Studies"},
|
||||
{"action": "wait_for", "selector": "text=Running"},
|
||||
{"action": "wait_for", "selector": "text=Total Trials"},
|
||||
],
|
||||
"expected_outcome": {"status": "pass"},
|
||||
"timeout_ms": 10000,
|
||||
},
|
||||
{
|
||||
"id": "browser_expand_folder",
|
||||
"name": "Topic folder expands on click",
|
||||
"type": "browser",
|
||||
"steps": [
|
||||
{"action": "navigate", "url": "/"},
|
||||
{"action": "wait_for", "selector": "button:has-text('trials')"},
|
||||
{"action": "click", "selector": "button:has-text('trials')"},
|
||||
# After click, should see study status badges
|
||||
{
|
||||
"action": "wait_for",
|
||||
"selector": "span:has-text('completed'), span:has-text('running'), span:has-text('paused')",
|
||||
},
|
||||
],
|
||||
"expected_outcome": {"status": "pass"},
|
||||
"timeout_ms": 10000,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def get_chat_verification_scenarios() -> List[Dict]:
|
||||
"""
|
||||
Get scenarios for verifying chat/Claude integration.
|
||||
"""
|
||||
return [
|
||||
{
|
||||
"id": "browser_chat_panel",
|
||||
"name": "Chat panel opens",
|
||||
"type": "browser",
|
||||
"steps": [
|
||||
{"action": "navigate", "url": "/canvas/support_arm"},
|
||||
{"action": "wait_for", "selector": ".react-flow__node"},
|
||||
# Look for chat toggle or chat panel
|
||||
{
|
||||
"action": "click",
|
||||
"selector": "button[aria-label='Chat'], button:has-text('Chat')",
|
||||
},
|
||||
{"action": "wait_for", "selector": "textarea, input[type='text']"},
|
||||
],
|
||||
"expected_outcome": {"status": "pass"},
|
||||
"timeout_ms": 15000,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# Standard scenario sets
|
||||
STANDARD_BROWSER_SCENARIOS: Dict[str, List[Dict]] = {
|
||||
"quick": [
|
||||
{
|
||||
"id": "browser_smoke",
|
||||
"name": "Dashboard smoke test",
|
||||
"type": "browser",
|
||||
"steps": [
|
||||
{"action": "navigate", "url": "/"},
|
||||
{"action": "wait_for", "selector": "text=Studies"},
|
||||
],
|
||||
"expected_outcome": {"status": "pass"},
|
||||
"timeout_ms": 10000,
|
||||
}
|
||||
],
|
||||
"home": get_ui_verification_scenarios(),
|
||||
"full": get_ui_verification_scenarios() + get_study_browser_scenarios("support_arm"),
|
||||
}
|
||||
|
||||
|
||||
def get_browser_scenarios(level: str = "quick", study_name: str = None) -> List[Dict]:
|
||||
"""
|
||||
Get browser scenarios by level.
|
||||
|
||||
Args:
|
||||
level: "quick" (smoke), "home" (home page), "full" (all scenarios)
|
||||
study_name: Optional study name for study-specific tests
|
||||
|
||||
Returns:
|
||||
List of browser test scenarios
|
||||
"""
|
||||
if level == "quick":
|
||||
return STANDARD_BROWSER_SCENARIOS["quick"]
|
||||
elif level == "home":
|
||||
return STANDARD_BROWSER_SCENARIOS["home"]
|
||||
elif level == "full":
|
||||
scenarios = list(STANDARD_BROWSER_SCENARIOS["full"])
|
||||
if study_name:
|
||||
scenarios.extend(get_study_browser_scenarios(study_name))
|
||||
return scenarios
|
||||
elif level == "study" and study_name:
|
||||
return get_study_browser_scenarios(study_name)
|
||||
else:
|
||||
return STANDARD_BROWSER_SCENARIOS["quick"]
|
||||
392
optimization_engine/devloop/claude_bridge.py
Normal file
392
optimization_engine/devloop/claude_bridge.py
Normal file
@@ -0,0 +1,392 @@
|
||||
"""
|
||||
Claude Code Bridge - Interface between DevLoop and Claude Code execution.
|
||||
|
||||
Handles:
|
||||
- Translating Gemini plans into Claude Code instructions
|
||||
- Executing code changes through OpenCode extension or CLI
|
||||
- Capturing implementation results
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ImplementationResult:
|
||||
"""Result of a Claude Code implementation."""
|
||||
|
||||
status: str # "success", "partial", "error"
|
||||
files_modified: List[str]
|
||||
warnings: List[str]
|
||||
errors: List[str]
|
||||
duration_seconds: float
|
||||
|
||||
|
||||
class ClaudeCodeBridge:
|
||||
"""
|
||||
Bridge between Gemini plans and Claude Code execution.
|
||||
|
||||
Supports multiple execution modes:
|
||||
- CLI: Direct Claude Code CLI invocation
|
||||
- API: Anthropic API for code generation (if API key available)
|
||||
- Manual: Generate instructions for human execution
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict] = None):
|
||||
"""
|
||||
Initialize the bridge.
|
||||
|
||||
Args:
|
||||
config: Configuration with execution mode and API settings
|
||||
"""
|
||||
self.config = config or {}
|
||||
self.workspace = Path(self.config.get("workspace", "C:/Users/antoi/Atomizer"))
|
||||
self.execution_mode = self.config.get("mode", "cli")
|
||||
self._client = None
|
||||
|
||||
@property
|
||||
def client(self):
|
||||
"""Lazy-load Anthropic client if API mode."""
|
||||
if self._client is None and self.execution_mode == "api":
|
||||
try:
|
||||
import anthropic
|
||||
|
||||
api_key = self.config.get("api_key") or os.environ.get("ANTHROPIC_API_KEY")
|
||||
if api_key:
|
||||
self._client = anthropic.Anthropic(api_key=api_key)
|
||||
logger.info("Anthropic client initialized")
|
||||
except ImportError:
|
||||
logger.warning("anthropic package not installed")
|
||||
return self._client
|
||||
|
||||
def create_implementation_session(self, plan: Dict) -> str:
|
||||
"""
|
||||
Generate Claude Code instruction from Gemini plan.
|
||||
|
||||
Args:
|
||||
plan: Plan dict from GeminiPlanner
|
||||
|
||||
Returns:
|
||||
Formatted instruction string for Claude Code
|
||||
"""
|
||||
objective = plan.get("objective", "Unknown objective")
|
||||
approach = plan.get("approach", "")
|
||||
tasks = plan.get("tasks", [])
|
||||
acceptance_criteria = plan.get("acceptance_criteria", [])
|
||||
|
||||
instruction = f"""## Implementation Task: {objective}
|
||||
|
||||
### Approach
|
||||
{approach}
|
||||
|
||||
### Tasks to Complete
|
||||
"""
|
||||
|
||||
for i, task in enumerate(tasks, 1):
|
||||
instruction += f"""
|
||||
{i}. **{task.get("description", "Task")}**
|
||||
- File: `{task.get("file", "TBD")}`
|
||||
- Priority: {task.get("priority", "medium")}
|
||||
"""
|
||||
if task.get("code_hint"):
|
||||
instruction += f" - Hint: {task.get('code_hint')}\n"
|
||||
if task.get("dependencies"):
|
||||
instruction += f" - Depends on: {', '.join(task['dependencies'])}\n"
|
||||
|
||||
instruction += """
|
||||
### Acceptance Criteria
|
||||
"""
|
||||
for criterion in acceptance_criteria:
|
||||
instruction += f"- [ ] {criterion}\n"
|
||||
|
||||
instruction += """
|
||||
### Constraints
|
||||
- Maintain existing API contracts
|
||||
- Follow Atomizer coding standards
|
||||
- Ensure AtomizerSpec v2.0 compatibility
|
||||
- Create README.md for any new study
|
||||
- Use existing extractors from SYS_12 when possible
|
||||
"""
|
||||
|
||||
return instruction
|
||||
|
||||
async def execute_plan(self, plan: Dict) -> Dict:
|
||||
"""
|
||||
Execute an implementation plan.
|
||||
|
||||
Args:
|
||||
plan: Plan dict from GeminiPlanner
|
||||
|
||||
Returns:
|
||||
Implementation result dict
|
||||
"""
|
||||
instruction = self.create_implementation_session(plan)
|
||||
|
||||
if self.execution_mode == "cli":
|
||||
return await self._execute_via_cli(instruction, plan)
|
||||
elif self.execution_mode == "api":
|
||||
return await self._execute_via_api(instruction, plan)
|
||||
else:
|
||||
return await self._execute_manual(instruction, plan)
|
||||
|
||||
async def _execute_via_cli(self, instruction: str, plan: Dict) -> Dict:
|
||||
"""Execute through Claude Code CLI."""
|
||||
start_time = datetime.now()
|
||||
|
||||
# Write instruction to temp file
|
||||
instruction_file = self.workspace / ".devloop_instruction.md"
|
||||
instruction_file.write_text(instruction)
|
||||
|
||||
files_modified = []
|
||||
warnings = []
|
||||
errors = []
|
||||
|
||||
try:
|
||||
# Try to invoke Claude Code CLI
|
||||
# Note: This assumes claude-code or similar CLI is available
|
||||
result = subprocess.run(
|
||||
[
|
||||
"powershell",
|
||||
"-Command",
|
||||
f"cd {self.workspace}; claude --print '{instruction_file}'",
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300, # 5 minute timeout
|
||||
cwd=str(self.workspace),
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
# Parse output for modified files
|
||||
output = result.stdout
|
||||
for line in output.split("\n"):
|
||||
if "Modified:" in line or "Created:" in line:
|
||||
parts = line.split(":", 1)
|
||||
if len(parts) > 1:
|
||||
files_modified.append(parts[1].strip())
|
||||
|
||||
status = "success"
|
||||
else:
|
||||
errors.append(result.stderr or "CLI execution failed")
|
||||
status = "error"
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
errors.append("CLI execution timed out after 5 minutes")
|
||||
status = "error"
|
||||
except FileNotFoundError:
|
||||
# Claude CLI not found, fall back to manual mode
|
||||
logger.warning("Claude CLI not found, switching to manual mode")
|
||||
return await self._execute_manual(instruction, plan)
|
||||
except Exception as e:
|
||||
errors.append(str(e))
|
||||
status = "error"
|
||||
finally:
|
||||
# Clean up temp file
|
||||
if instruction_file.exists():
|
||||
instruction_file.unlink()
|
||||
|
||||
duration = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"files": files_modified,
|
||||
"warnings": warnings,
|
||||
"errors": errors,
|
||||
"duration_seconds": duration,
|
||||
}
|
||||
|
||||
async def _execute_via_api(self, instruction: str, plan: Dict) -> Dict:
|
||||
"""Execute through Anthropic API for code generation."""
|
||||
if not self.client:
|
||||
return await self._execute_manual(instruction, plan)
|
||||
|
||||
start_time = datetime.now()
|
||||
files_modified = []
|
||||
warnings = []
|
||||
errors = []
|
||||
|
||||
try:
|
||||
# Use Claude API for code generation
|
||||
response = self.client.messages.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
max_tokens=8192,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"""You are implementing code for the Atomizer FEA optimization framework.
|
||||
|
||||
{instruction}
|
||||
|
||||
For each file that needs to be created or modified, output the complete file content in this format:
|
||||
|
||||
### FILE: path/to/file.py
|
||||
```python
|
||||
# file content here
|
||||
```
|
||||
|
||||
Be thorough and implement all tasks completely.
|
||||
""",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
# Parse response for file contents
|
||||
content = response.content[0].text
|
||||
|
||||
# Extract files from response
|
||||
import re
|
||||
|
||||
file_pattern = r"### FILE: (.+?)\n```\w*\n(.*?)```"
|
||||
matches = re.findall(file_pattern, content, re.DOTALL)
|
||||
|
||||
for file_path, file_content in matches:
|
||||
try:
|
||||
full_path = self.workspace / file_path.strip()
|
||||
full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
full_path.write_text(file_content.strip())
|
||||
files_modified.append(str(file_path.strip()))
|
||||
logger.info(f"Created/modified: {file_path}")
|
||||
except Exception as e:
|
||||
errors.append(f"Failed to write {file_path}: {e}")
|
||||
|
||||
status = "success" if files_modified else "partial"
|
||||
|
||||
except Exception as e:
|
||||
errors.append(str(e))
|
||||
status = "error"
|
||||
|
||||
duration = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"files": files_modified,
|
||||
"warnings": warnings,
|
||||
"errors": errors,
|
||||
"duration_seconds": duration,
|
||||
}
|
||||
|
||||
async def _execute_manual(self, instruction: str, plan: Dict) -> Dict:
|
||||
"""
|
||||
Generate manual instructions (when automation not available).
|
||||
|
||||
Saves instruction to file for human execution.
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
|
||||
# Save instruction for manual execution
|
||||
output_file = self.workspace / ".devloop" / "pending_instruction.md"
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_file.write_text(instruction)
|
||||
|
||||
logger.info(f"Manual instruction saved to: {output_file}")
|
||||
|
||||
return {
|
||||
"status": "pending_manual",
|
||||
"instruction_file": str(output_file),
|
||||
"files": [],
|
||||
"warnings": ["Automated execution not available. Please execute manually."],
|
||||
"errors": [],
|
||||
"duration_seconds": (datetime.now() - start_time).total_seconds(),
|
||||
}
|
||||
|
||||
async def execute_fix(self, fix_plan: Dict) -> Dict:
|
||||
"""
|
||||
Execute a specific fix from analysis.
|
||||
|
||||
Args:
|
||||
fix_plan: Fix plan dict from ProblemAnalyzer
|
||||
|
||||
Returns:
|
||||
Fix result dict
|
||||
"""
|
||||
issue_id = fix_plan.get("issue_id", "unknown")
|
||||
approach = fix_plan.get("approach", "")
|
||||
steps = fix_plan.get("steps", [])
|
||||
|
||||
instruction = f"""## Bug Fix: {issue_id}
|
||||
|
||||
### Approach
|
||||
{approach}
|
||||
|
||||
### Steps
|
||||
"""
|
||||
for i, step in enumerate(steps, 1):
|
||||
instruction += f"{i}. {step.get('description', step.get('action', 'Step'))}\n"
|
||||
if step.get("file"):
|
||||
instruction += f" File: `{step['file']}`\n"
|
||||
|
||||
instruction += """
|
||||
### Verification
|
||||
After implementing the fix, verify that:
|
||||
1. The specific test case passes
|
||||
2. No regressions are introduced
|
||||
3. Code follows Atomizer patterns
|
||||
"""
|
||||
|
||||
# Execute as a mini-plan
|
||||
return await self.execute_plan(
|
||||
{
|
||||
"objective": f"Fix: {issue_id}",
|
||||
"approach": approach,
|
||||
"tasks": [
|
||||
{
|
||||
"description": step.get("description", step.get("action")),
|
||||
"file": step.get("file"),
|
||||
"priority": "high",
|
||||
}
|
||||
for step in steps
|
||||
],
|
||||
"acceptance_criteria": [
|
||||
"Original test passes",
|
||||
"No new errors introduced",
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
def get_execution_status(self) -> Dict:
|
||||
"""Get current execution status."""
|
||||
pending_file = self.workspace / ".devloop" / "pending_instruction.md"
|
||||
|
||||
return {
|
||||
"mode": self.execution_mode,
|
||||
"workspace": str(self.workspace),
|
||||
"has_pending_instruction": pending_file.exists(),
|
||||
"api_available": self.client is not None,
|
||||
}
|
||||
|
||||
async def verify_implementation(self, expected_files: List[str]) -> Dict:
|
||||
"""
|
||||
Verify that implementation created expected files.
|
||||
|
||||
Args:
|
||||
expected_files: List of file paths that should exist
|
||||
|
||||
Returns:
|
||||
Verification result
|
||||
"""
|
||||
missing = []
|
||||
found = []
|
||||
|
||||
for file_path in expected_files:
|
||||
path = (
|
||||
self.workspace / file_path if not Path(file_path).is_absolute() else Path(file_path)
|
||||
)
|
||||
if path.exists():
|
||||
found.append(str(file_path))
|
||||
else:
|
||||
missing.append(str(file_path))
|
||||
|
||||
return {
|
||||
"complete": len(missing) == 0,
|
||||
"found": found,
|
||||
"missing": missing,
|
||||
}
|
||||
652
optimization_engine/devloop/cli_bridge.py
Normal file
652
optimization_engine/devloop/cli_bridge.py
Normal file
@@ -0,0 +1,652 @@
|
||||
"""
|
||||
CLI Bridge - Execute AI tasks through Claude Code CLI and OpenCode CLI.
|
||||
|
||||
Uses your existing subscriptions via CLI tools:
|
||||
- Claude Code CLI (claude.exe) for implementation
|
||||
- OpenCode CLI (opencode) for Gemini planning
|
||||
|
||||
No API keys needed - leverages your CLI subscriptions.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
import re
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class CLIResult:
|
||||
"""Result from CLI execution."""
|
||||
|
||||
success: bool
|
||||
output: str
|
||||
error: str
|
||||
duration_seconds: float
|
||||
files_modified: List[str]
|
||||
|
||||
|
||||
class ClaudeCodeCLI:
|
||||
"""
|
||||
Execute tasks through Claude Code CLI.
|
||||
|
||||
Uses: claude.exe --print for non-interactive execution
|
||||
"""
|
||||
|
||||
CLAUDE_PATH = r"C:\Users\antoi\.local\bin\claude.exe"
|
||||
|
||||
def __init__(self, workspace: Path):
|
||||
self.workspace = workspace
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
prompt: str,
|
||||
timeout: int = 300,
|
||||
model: str = "opus",
|
||||
) -> CLIResult:
|
||||
"""
|
||||
Execute a prompt through Claude Code CLI.
|
||||
|
||||
Args:
|
||||
prompt: The instruction/prompt to execute
|
||||
timeout: Timeout in seconds
|
||||
model: Model to use (opus, sonnet, haiku)
|
||||
|
||||
Returns:
|
||||
CLIResult with output and modified files
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
|
||||
# Build command
|
||||
cmd = [
|
||||
self.CLAUDE_PATH,
|
||||
"--print", # Non-interactive mode
|
||||
"--model",
|
||||
model,
|
||||
"--permission-mode",
|
||||
"acceptEdits", # Auto-accept edits
|
||||
prompt,
|
||||
]
|
||||
|
||||
logger.info(f"Executing Claude Code CLI: {prompt[:100]}...")
|
||||
|
||||
try:
|
||||
# Run in workspace directory
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
cwd=str(self.workspace),
|
||||
env={**os.environ, "TERM": "dumb"}, # Disable colors
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
error = result.stderr
|
||||
success = result.returncode == 0
|
||||
|
||||
# Extract modified files from output
|
||||
files_modified = self._extract_modified_files(output)
|
||||
|
||||
duration = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
logger.info(
|
||||
f"Claude Code completed in {duration:.1f}s, modified {len(files_modified)} files"
|
||||
)
|
||||
|
||||
return CLIResult(
|
||||
success=success,
|
||||
output=output,
|
||||
error=error,
|
||||
duration_seconds=duration,
|
||||
files_modified=files_modified,
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return CLIResult(
|
||||
success=False,
|
||||
output="",
|
||||
error=f"Timeout after {timeout}s",
|
||||
duration_seconds=timeout,
|
||||
files_modified=[],
|
||||
)
|
||||
except Exception as e:
|
||||
return CLIResult(
|
||||
success=False,
|
||||
output="",
|
||||
error=str(e),
|
||||
duration_seconds=(datetime.now() - start_time).total_seconds(),
|
||||
files_modified=[],
|
||||
)
|
||||
|
||||
def _extract_modified_files(self, output: str) -> List[str]:
|
||||
"""Extract list of modified files from Claude Code output."""
|
||||
files = []
|
||||
|
||||
# Look for file modification patterns
|
||||
patterns = [
|
||||
r"(?:Created|Modified|Wrote|Updated|Edited):\s*[`'\"]?([^\s`'\"]+)[`'\"]?",
|
||||
r"Writing to [`'\"]?([^\s`'\"]+)[`'\"]?",
|
||||
r"File saved: ([^\s]+)",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, output, re.IGNORECASE)
|
||||
files.extend(matches)
|
||||
|
||||
return list(set(files))
|
||||
|
||||
async def execute_with_context(
|
||||
self,
|
||||
prompt: str,
|
||||
context_files: List[str],
|
||||
timeout: int = 300,
|
||||
) -> CLIResult:
|
||||
"""
|
||||
Execute with additional context files loaded.
|
||||
|
||||
Args:
|
||||
prompt: The instruction
|
||||
context_files: Files to read as context
|
||||
timeout: Timeout in seconds
|
||||
"""
|
||||
# Build prompt with context
|
||||
context_prompt = prompt
|
||||
|
||||
if context_files:
|
||||
context_prompt += "\n\nContext files to consider:\n"
|
||||
for f in context_files:
|
||||
context_prompt += f"- {f}\n"
|
||||
|
||||
return await self.execute(context_prompt, timeout)
|
||||
|
||||
|
||||
class OpenCodeCLI:
|
||||
"""
|
||||
Execute tasks through OpenCode CLI (Gemini).
|
||||
|
||||
Uses: opencode run for non-interactive execution
|
||||
"""
|
||||
|
||||
OPENCODE_PATH = r"C:\Users\antoi\AppData\Roaming\npm\opencode.cmd"
|
||||
|
||||
def __init__(self, workspace: Path):
|
||||
self.workspace = workspace
|
||||
|
||||
async def execute(
|
||||
self,
|
||||
prompt: str,
|
||||
timeout: int = 180,
|
||||
model: str = "google/gemini-3-pro-preview",
|
||||
) -> CLIResult:
|
||||
"""
|
||||
Execute a prompt through OpenCode CLI.
|
||||
|
||||
Args:
|
||||
prompt: The instruction/prompt
|
||||
timeout: Timeout in seconds
|
||||
model: Model to use
|
||||
|
||||
Returns:
|
||||
CLIResult with output
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
|
||||
# Build command
|
||||
cmd = [self.OPENCODE_PATH, "run", "--model", model, prompt]
|
||||
|
||||
logger.info(f"Executing OpenCode CLI: {prompt[:100]}...")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
cwd=str(self.workspace),
|
||||
env={**os.environ, "TERM": "dumb"},
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
error = result.stderr
|
||||
success = result.returncode == 0
|
||||
|
||||
duration = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
logger.info(f"OpenCode completed in {duration:.1f}s")
|
||||
|
||||
return CLIResult(
|
||||
success=success,
|
||||
output=output,
|
||||
error=error,
|
||||
duration_seconds=duration,
|
||||
files_modified=[], # OpenCode typically doesn't modify files directly
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return CLIResult(
|
||||
success=False,
|
||||
output="",
|
||||
error=f"Timeout after {timeout}s",
|
||||
duration_seconds=timeout,
|
||||
files_modified=[],
|
||||
)
|
||||
except Exception as e:
|
||||
return CLIResult(
|
||||
success=False,
|
||||
output="",
|
||||
error=str(e),
|
||||
duration_seconds=(datetime.now() - start_time).total_seconds(),
|
||||
files_modified=[],
|
||||
)
|
||||
|
||||
async def plan(self, objective: str, context: Dict = None) -> Dict:
|
||||
"""
|
||||
Create an implementation plan using Gemini via OpenCode.
|
||||
|
||||
Args:
|
||||
objective: What to achieve
|
||||
context: Additional context
|
||||
|
||||
Returns:
|
||||
Plan dict with tasks and test scenarios
|
||||
"""
|
||||
prompt = f"""You are a strategic planner for Atomizer, an FEA optimization framework.
|
||||
|
||||
## Objective
|
||||
{objective}
|
||||
|
||||
## Context
|
||||
{json.dumps(context, indent=2) if context else "None provided"}
|
||||
|
||||
## Task
|
||||
Create a detailed implementation plan in JSON format with:
|
||||
1. tasks: List of implementation tasks for Claude Code
|
||||
2. test_scenarios: Tests to verify implementation
|
||||
3. acceptance_criteria: Success conditions
|
||||
|
||||
Output ONLY valid JSON in this format:
|
||||
```json
|
||||
{{
|
||||
"objective": "{objective}",
|
||||
"approach": "Brief description",
|
||||
"tasks": [
|
||||
{{
|
||||
"id": "task_001",
|
||||
"description": "What to do",
|
||||
"file": "path/to/file.py",
|
||||
"priority": "high"
|
||||
}}
|
||||
],
|
||||
"test_scenarios": [
|
||||
{{
|
||||
"id": "test_001",
|
||||
"name": "Test name",
|
||||
"type": "filesystem",
|
||||
"steps": [{{"action": "check_exists", "path": "some/path"}}],
|
||||
"expected_outcome": {{"exists": true}}
|
||||
}}
|
||||
],
|
||||
"acceptance_criteria": [
|
||||
"Criterion 1"
|
||||
]
|
||||
}}
|
||||
```
|
||||
"""
|
||||
|
||||
result = await self.execute(prompt)
|
||||
|
||||
if not result.success:
|
||||
logger.error(f"OpenCode planning failed: {result.error}")
|
||||
return self._fallback_plan(objective, context)
|
||||
|
||||
# Parse JSON from output
|
||||
try:
|
||||
# Find JSON block in output
|
||||
output = result.output
|
||||
|
||||
if "```json" in output:
|
||||
start = output.find("```json") + 7
|
||||
end = output.find("```", start)
|
||||
json_str = output[start:end].strip()
|
||||
elif "```" in output:
|
||||
start = output.find("```") + 3
|
||||
end = output.find("```", start)
|
||||
json_str = output[start:end].strip()
|
||||
else:
|
||||
# Try to find JSON object directly
|
||||
match = re.search(r"\{.*\}", output, re.DOTALL)
|
||||
if match:
|
||||
json_str = match.group()
|
||||
else:
|
||||
return self._fallback_plan(objective, context)
|
||||
|
||||
plan = json.loads(json_str)
|
||||
logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
|
||||
return plan
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse plan JSON: {e}")
|
||||
return self._fallback_plan(objective, context)
|
||||
|
||||
def _fallback_plan(self, objective: str, context: Dict = None) -> Dict:
|
||||
"""Generate a fallback plan when Gemini fails."""
|
||||
logger.warning("Using fallback plan")
|
||||
|
||||
return {
|
||||
"objective": objective,
|
||||
"approach": "Fallback plan - manual implementation",
|
||||
"tasks": [
|
||||
{
|
||||
"id": "task_001",
|
||||
"description": f"Implement: {objective}",
|
||||
"file": "TBD",
|
||||
"priority": "high",
|
||||
}
|
||||
],
|
||||
"test_scenarios": [],
|
||||
"acceptance_criteria": [objective],
|
||||
}
|
||||
|
||||
async def analyze(self, test_results: Dict) -> Dict:
|
||||
"""
|
||||
Analyze test results using Gemini via OpenCode.
|
||||
|
||||
Args:
|
||||
test_results: Test report from dashboard
|
||||
|
||||
Returns:
|
||||
Analysis with issues and fix plans
|
||||
"""
|
||||
summary = test_results.get("summary", {})
|
||||
scenarios = test_results.get("scenarios", [])
|
||||
|
||||
if summary.get("failed", 0) == 0:
|
||||
return {
|
||||
"issues_found": False,
|
||||
"issues": [],
|
||||
"fix_plans": {},
|
||||
"recommendations": ["All tests passed!"],
|
||||
}
|
||||
|
||||
failures = [s for s in scenarios if not s.get("passed", True)]
|
||||
|
||||
prompt = f"""Analyze these test failures for Atomizer FEA optimization framework:
|
||||
|
||||
## Test Summary
|
||||
- Total: {summary.get("total", 0)}
|
||||
- Passed: {summary.get("passed", 0)}
|
||||
- Failed: {summary.get("failed", 0)}
|
||||
|
||||
## Failed Tests
|
||||
{json.dumps(failures, indent=2)}
|
||||
|
||||
## Task
|
||||
Provide root cause analysis and fix plans in JSON:
|
||||
|
||||
```json
|
||||
{{
|
||||
"issues_found": true,
|
||||
"issues": [
|
||||
{{
|
||||
"id": "issue_001",
|
||||
"description": "What went wrong",
|
||||
"severity": "high",
|
||||
"root_cause": "Why it failed"
|
||||
}}
|
||||
],
|
||||
"fix_plans": {{
|
||||
"issue_001": {{
|
||||
"approach": "How to fix",
|
||||
"steps": [{{"action": "edit", "file": "path", "description": "change"}}]
|
||||
}}
|
||||
}},
|
||||
"recommendations": ["suggestion"]
|
||||
}}
|
||||
```
|
||||
"""
|
||||
|
||||
result = await self.execute(prompt)
|
||||
|
||||
if not result.success:
|
||||
return self._fallback_analysis(failures)
|
||||
|
||||
try:
|
||||
output = result.output
|
||||
if "```json" in output:
|
||||
start = output.find("```json") + 7
|
||||
end = output.find("```", start)
|
||||
json_str = output[start:end].strip()
|
||||
else:
|
||||
match = re.search(r"\{.*\}", output, re.DOTALL)
|
||||
json_str = match.group() if match else "{}"
|
||||
|
||||
return json.loads(json_str)
|
||||
|
||||
except:
|
||||
return self._fallback_analysis(failures)
|
||||
|
||||
def _fallback_analysis(self, failures: List[Dict]) -> Dict:
|
||||
"""Generate fallback analysis."""
|
||||
issues = []
|
||||
fix_plans = {}
|
||||
|
||||
for i, failure in enumerate(failures):
|
||||
issue_id = f"issue_{i + 1}"
|
||||
issues.append(
|
||||
{
|
||||
"id": issue_id,
|
||||
"description": failure.get("error", "Unknown error"),
|
||||
"severity": "medium",
|
||||
"root_cause": "Requires investigation",
|
||||
}
|
||||
)
|
||||
fix_plans[issue_id] = {
|
||||
"approach": "Manual investigation required",
|
||||
"steps": [],
|
||||
}
|
||||
|
||||
return {
|
||||
"issues_found": len(issues) > 0,
|
||||
"issues": issues,
|
||||
"fix_plans": fix_plans,
|
||||
"recommendations": ["Review failed tests manually"],
|
||||
}
|
||||
|
||||
|
||||
class DevLoopCLIOrchestrator:
|
||||
"""
|
||||
Orchestrate DevLoop using CLI tools.
|
||||
|
||||
- OpenCode (Gemini) for planning and analysis
|
||||
- Claude Code for implementation and fixes
|
||||
"""
|
||||
|
||||
def __init__(self, workspace: Path = None):
|
||||
self.workspace = workspace or Path("C:/Users/antoi/Atomizer")
|
||||
self.claude = ClaudeCodeCLI(self.workspace)
|
||||
self.opencode = OpenCodeCLI(self.workspace)
|
||||
self.iteration = 0
|
||||
|
||||
async def run_cycle(
|
||||
self,
|
||||
objective: str,
|
||||
context: Dict = None,
|
||||
max_iterations: int = 5,
|
||||
) -> Dict:
|
||||
"""
|
||||
Run a complete development cycle.
|
||||
|
||||
Args:
|
||||
objective: What to achieve
|
||||
context: Additional context
|
||||
max_iterations: Maximum fix iterations
|
||||
|
||||
Returns:
|
||||
Cycle report
|
||||
"""
|
||||
from .test_runner import DashboardTestRunner
|
||||
|
||||
start_time = datetime.now()
|
||||
results = {
|
||||
"objective": objective,
|
||||
"iterations": [],
|
||||
"status": "in_progress",
|
||||
}
|
||||
|
||||
logger.info(f"Starting DevLoop cycle: {objective}")
|
||||
|
||||
# Phase 1: Plan (Gemini via OpenCode)
|
||||
logger.info("Phase 1: Planning with Gemini...")
|
||||
plan = await self.opencode.plan(objective, context)
|
||||
|
||||
iteration = 0
|
||||
while iteration < max_iterations:
|
||||
iteration += 1
|
||||
iter_result = {"iteration": iteration}
|
||||
|
||||
# Phase 2: Implement (Claude Code)
|
||||
logger.info(f"Phase 2 (iter {iteration}): Implementing with Claude Code...")
|
||||
impl_result = await self._implement(plan)
|
||||
iter_result["implementation"] = {
|
||||
"success": impl_result.success,
|
||||
"files_modified": impl_result.files_modified,
|
||||
}
|
||||
|
||||
# Phase 3: Test (Dashboard)
|
||||
logger.info(f"Phase 3 (iter {iteration}): Testing...")
|
||||
test_runner = DashboardTestRunner()
|
||||
test_results = await test_runner.run_test_suite(plan.get("test_scenarios", []))
|
||||
iter_result["test_results"] = test_results
|
||||
|
||||
# Check if all tests pass
|
||||
summary = test_results.get("summary", {})
|
||||
if summary.get("failed", 0) == 0:
|
||||
logger.info("All tests passed!")
|
||||
results["iterations"].append(iter_result)
|
||||
results["status"] = "success"
|
||||
break
|
||||
|
||||
# Phase 4: Analyze (Gemini via OpenCode)
|
||||
logger.info(f"Phase 4 (iter {iteration}): Analyzing failures...")
|
||||
analysis = await self.opencode.analyze(test_results)
|
||||
iter_result["analysis"] = analysis
|
||||
|
||||
if not analysis.get("issues_found"):
|
||||
results["status"] = "success"
|
||||
results["iterations"].append(iter_result)
|
||||
break
|
||||
|
||||
# Phase 5: Fix (Claude Code)
|
||||
logger.info(f"Phase 5 (iter {iteration}): Fixing issues...")
|
||||
fix_result = await self._fix(analysis)
|
||||
iter_result["fixes"] = {
|
||||
"success": fix_result.success,
|
||||
"files_modified": fix_result.files_modified,
|
||||
}
|
||||
|
||||
results["iterations"].append(iter_result)
|
||||
|
||||
if results["status"] == "in_progress":
|
||||
results["status"] = "max_iterations_reached"
|
||||
|
||||
results["duration_seconds"] = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
logger.info(f"DevLoop cycle completed: {results['status']}")
|
||||
|
||||
return results
|
||||
|
||||
async def _implement(self, plan: Dict) -> CLIResult:
|
||||
"""Implement the plan using Claude Code."""
|
||||
tasks = plan.get("tasks", [])
|
||||
|
||||
if not tasks:
|
||||
return CLIResult(
|
||||
success=True,
|
||||
output="No tasks to implement",
|
||||
error="",
|
||||
duration_seconds=0,
|
||||
files_modified=[],
|
||||
)
|
||||
|
||||
# Build implementation prompt
|
||||
prompt = f"""Implement the following tasks for Atomizer:
|
||||
|
||||
## Objective
|
||||
{plan.get("objective", "Unknown")}
|
||||
|
||||
## Approach
|
||||
{plan.get("approach", "Follow best practices")}
|
||||
|
||||
## Tasks
|
||||
"""
|
||||
for task in tasks:
|
||||
prompt += f"""
|
||||
### {task.get("id", "task")}: {task.get("description", "")}
|
||||
- File: {task.get("file", "TBD")}
|
||||
- Priority: {task.get("priority", "medium")}
|
||||
"""
|
||||
|
||||
prompt += """
|
||||
## Requirements
|
||||
- Follow Atomizer coding standards
|
||||
- Use AtomizerSpec v2.0 format
|
||||
- Create README.md for any new study
|
||||
- Use existing extractors from optimization_engine/extractors/
|
||||
"""
|
||||
|
||||
return await self.claude.execute(prompt, timeout=300)
|
||||
|
||||
async def _fix(self, analysis: Dict) -> CLIResult:
|
||||
"""Apply fixes using Claude Code."""
|
||||
issues = analysis.get("issues", [])
|
||||
fix_plans = analysis.get("fix_plans", {})
|
||||
|
||||
if not issues:
|
||||
return CLIResult(
|
||||
success=True,
|
||||
output="No issues to fix",
|
||||
error="",
|
||||
duration_seconds=0,
|
||||
files_modified=[],
|
||||
)
|
||||
|
||||
# Build fix prompt
|
||||
prompt = "Fix the following issues:\n\n"
|
||||
|
||||
for issue in issues:
|
||||
issue_id = issue.get("id", "unknown")
|
||||
prompt += f"""
|
||||
## Issue: {issue_id}
|
||||
- Description: {issue.get("description", "")}
|
||||
- Root Cause: {issue.get("root_cause", "Unknown")}
|
||||
- Severity: {issue.get("severity", "medium")}
|
||||
"""
|
||||
|
||||
fix_plan = fix_plans.get(issue_id, {})
|
||||
if fix_plan:
|
||||
prompt += f"- Fix Approach: {fix_plan.get('approach', 'Investigate')}\n"
|
||||
for step in fix_plan.get("steps", []):
|
||||
prompt += f" - {step.get('description', step.get('action', 'step'))}\n"
|
||||
|
||||
return await self.claude.execute(prompt, timeout=300)
|
||||
|
||||
async def step_plan(self, objective: str, context: Dict = None) -> Dict:
|
||||
"""Execute only the planning phase."""
|
||||
return await self.opencode.plan(objective, context)
|
||||
|
||||
async def step_implement(self, plan: Dict) -> CLIResult:
|
||||
"""Execute only the implementation phase."""
|
||||
return await self._implement(plan)
|
||||
|
||||
async def step_analyze(self, test_results: Dict) -> Dict:
|
||||
"""Execute only the analysis phase."""
|
||||
return await self.opencode.analyze(test_results)
|
||||
561
optimization_engine/devloop/orchestrator.py
Normal file
561
optimization_engine/devloop/orchestrator.py
Normal file
@@ -0,0 +1,561 @@
|
||||
"""
|
||||
DevLoop Orchestrator - Master controller for closed-loop development.
|
||||
|
||||
Coordinates:
|
||||
- Gemini Pro: Strategic planning, analysis, test design
|
||||
- Claude Code: Implementation, code changes, fixes
|
||||
- Dashboard: Automated testing, verification
|
||||
- LAC: Learning capture and retrieval
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Callable
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LoopPhase(Enum):
|
||||
"""Current phase in the development loop."""
|
||||
|
||||
IDLE = "idle"
|
||||
PLANNING = "planning"
|
||||
IMPLEMENTING = "implementing"
|
||||
TESTING = "testing"
|
||||
ANALYZING = "analyzing"
|
||||
FIXING = "fixing"
|
||||
VERIFYING = "verifying"
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoopState:
|
||||
"""Current state of the development loop."""
|
||||
|
||||
phase: LoopPhase = LoopPhase.IDLE
|
||||
iteration: int = 0
|
||||
current_task: Optional[str] = None
|
||||
test_results: Optional[Dict] = None
|
||||
analysis: Optional[Dict] = None
|
||||
last_update: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
|
||||
|
||||
@dataclass
|
||||
class IterationResult:
|
||||
"""Result of a single development iteration."""
|
||||
|
||||
iteration: int
|
||||
plan: Optional[Dict] = None
|
||||
implementation: Optional[Dict] = None
|
||||
test_results: Optional[Dict] = None
|
||||
analysis: Optional[Dict] = None
|
||||
fixes: Optional[List[Dict]] = None
|
||||
verification: Optional[Dict] = None
|
||||
success: bool = False
|
||||
duration_seconds: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class CycleReport:
|
||||
"""Complete report for a development cycle."""
|
||||
|
||||
objective: str
|
||||
start_time: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
end_time: Optional[str] = None
|
||||
iterations: List[IterationResult] = field(default_factory=list)
|
||||
status: str = "in_progress"
|
||||
total_duration_seconds: float = 0.0
|
||||
|
||||
|
||||
class DevLoopOrchestrator:
|
||||
"""
|
||||
Autonomous development loop orchestrator.
|
||||
|
||||
Coordinates Gemini (planning) + Claude Code (implementation) + Dashboard (testing)
|
||||
in a continuous improvement cycle.
|
||||
|
||||
Flow:
|
||||
1. Gemini: Plan features/fixes
|
||||
2. Claude Code: Implement
|
||||
3. Dashboard: Test
|
||||
4. Gemini: Analyze results
|
||||
5. Claude Code: Fix issues
|
||||
6. Dashboard: Verify
|
||||
7. Loop back with learnings
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Optional[Dict] = None,
|
||||
gemini_client: Optional[Any] = None,
|
||||
claude_bridge: Optional[Any] = None,
|
||||
dashboard_runner: Optional[Any] = None,
|
||||
):
|
||||
"""
|
||||
Initialize the orchestrator.
|
||||
|
||||
Args:
|
||||
config: Configuration dict with API keys and settings
|
||||
gemini_client: Pre-configured Gemini client (optional)
|
||||
claude_bridge: Pre-configured Claude Code bridge (optional)
|
||||
dashboard_runner: Pre-configured Dashboard test runner (optional)
|
||||
"""
|
||||
self.config = config or self._default_config()
|
||||
self.state = LoopState()
|
||||
self.subscribers: List[Callable] = []
|
||||
|
||||
# Initialize components lazily
|
||||
self._gemini = gemini_client
|
||||
self._claude_bridge = claude_bridge
|
||||
self._dashboard = dashboard_runner
|
||||
self._lac = None
|
||||
|
||||
# History for learning
|
||||
self.cycle_history: List[CycleReport] = []
|
||||
|
||||
def _default_config(self) -> Dict:
|
||||
"""Default configuration."""
|
||||
return {
|
||||
"max_iterations": 10,
|
||||
"auto_fix_threshold": "high", # Only auto-fix high+ severity
|
||||
"learning_enabled": True,
|
||||
"dashboard_url": "http://localhost:3000",
|
||||
"websocket_url": "ws://localhost:8000",
|
||||
"test_timeout_ms": 30000,
|
||||
}
|
||||
|
||||
@property
|
||||
def gemini(self):
|
||||
"""Lazy-load Gemini planner."""
|
||||
if self._gemini is None:
|
||||
from .planning import GeminiPlanner
|
||||
|
||||
self._gemini = GeminiPlanner(self.config.get("gemini", {}))
|
||||
return self._gemini
|
||||
|
||||
@property
|
||||
def claude_bridge(self):
|
||||
"""Lazy-load Claude Code bridge."""
|
||||
if self._claude_bridge is None:
|
||||
from .claude_bridge import ClaudeCodeBridge
|
||||
|
||||
self._claude_bridge = ClaudeCodeBridge(self.config.get("claude", {}))
|
||||
return self._claude_bridge
|
||||
|
||||
@property
|
||||
def dashboard(self):
|
||||
"""Lazy-load Dashboard test runner."""
|
||||
if self._dashboard is None:
|
||||
from .test_runner import DashboardTestRunner
|
||||
|
||||
self._dashboard = DashboardTestRunner(self.config)
|
||||
return self._dashboard
|
||||
|
||||
@property
|
||||
def lac(self):
|
||||
"""Lazy-load LAC (Learning Atomizer Core)."""
|
||||
if self._lac is None and self.config.get("learning_enabled", True):
|
||||
try:
|
||||
from knowledge_base.lac import get_lac
|
||||
|
||||
self._lac = get_lac()
|
||||
except ImportError:
|
||||
logger.warning("LAC not available, learning disabled")
|
||||
return self._lac
|
||||
|
||||
def subscribe(self, callback: Callable[[LoopState], None]):
|
||||
"""Subscribe to state updates."""
|
||||
self.subscribers.append(callback)
|
||||
|
||||
def unsubscribe(self, callback: Callable):
|
||||
"""Unsubscribe from state updates."""
|
||||
if callback in self.subscribers:
|
||||
self.subscribers.remove(callback)
|
||||
|
||||
def _notify_subscribers(self):
|
||||
"""Notify all subscribers of state change."""
|
||||
self.state.last_update = datetime.now().isoformat()
|
||||
for callback in self.subscribers:
|
||||
try:
|
||||
callback(self.state)
|
||||
except Exception as e:
|
||||
logger.error(f"Subscriber error: {e}")
|
||||
|
||||
def _update_state(self, phase: Optional[LoopPhase] = None, task: Optional[str] = None):
|
||||
"""Update state and notify subscribers."""
|
||||
if phase:
|
||||
self.state.phase = phase
|
||||
if task:
|
||||
self.state.current_task = task
|
||||
self._notify_subscribers()
|
||||
|
||||
async def run_development_cycle(
|
||||
self,
|
||||
objective: str,
|
||||
context: Optional[Dict] = None,
|
||||
max_iterations: Optional[int] = None,
|
||||
) -> CycleReport:
|
||||
"""
|
||||
Execute a complete development cycle.
|
||||
|
||||
Args:
|
||||
objective: What to achieve (e.g., "Create support_arm optimization study")
|
||||
context: Additional context (study spec, problem statement, etc.)
|
||||
max_iterations: Override default max iterations
|
||||
|
||||
Returns:
|
||||
CycleReport with all iteration results
|
||||
"""
|
||||
max_iter = max_iterations or self.config.get("max_iterations", 10)
|
||||
|
||||
report = CycleReport(objective=objective)
|
||||
start_time = datetime.now()
|
||||
|
||||
logger.info(f"Starting development cycle: {objective}")
|
||||
|
||||
try:
|
||||
while not self._is_objective_complete(report) and len(report.iterations) < max_iter:
|
||||
iteration_result = await self._run_iteration(objective, context)
|
||||
report.iterations.append(iteration_result)
|
||||
|
||||
# Record learning from successful patterns
|
||||
if iteration_result.success and self.lac:
|
||||
await self._record_learning(iteration_result)
|
||||
|
||||
# Check for max iterations
|
||||
if len(report.iterations) >= max_iter:
|
||||
report.status = "max_iterations_reached"
|
||||
logger.warning(f"Max iterations ({max_iter}) reached")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
report.status = f"error: {str(e)}"
|
||||
logger.error(f"Development cycle error: {e}")
|
||||
|
||||
report.end_time = datetime.now().isoformat()
|
||||
report.total_duration_seconds = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
if report.status == "in_progress":
|
||||
report.status = "completed"
|
||||
|
||||
self.cycle_history.append(report)
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
|
||||
return report
|
||||
|
||||
def _is_objective_complete(self, report: CycleReport) -> bool:
|
||||
"""Check if the objective has been achieved."""
|
||||
if not report.iterations:
|
||||
return False
|
||||
|
||||
last_iter = report.iterations[-1]
|
||||
|
||||
# Success if last iteration passed all tests
|
||||
if last_iter.success and last_iter.test_results:
|
||||
tests = last_iter.test_results
|
||||
if tests.get("summary", {}).get("failed", 0) == 0:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def _run_iteration(self, objective: str, context: Optional[Dict]) -> IterationResult:
|
||||
"""Run a single iteration through all phases."""
|
||||
start_time = datetime.now()
|
||||
result = IterationResult(iteration=self.state.iteration)
|
||||
|
||||
try:
|
||||
# Phase 1: Planning (Gemini)
|
||||
self._update_state(LoopPhase.PLANNING, "Creating implementation plan")
|
||||
result.plan = await self._planning_phase(objective, context)
|
||||
|
||||
# Phase 2: Implementation (Claude Code)
|
||||
self._update_state(LoopPhase.IMPLEMENTING, "Implementing changes")
|
||||
result.implementation = await self._implementation_phase(result.plan)
|
||||
|
||||
# Phase 3: Testing (Dashboard)
|
||||
self._update_state(LoopPhase.TESTING, "Running tests")
|
||||
result.test_results = await self._testing_phase(result.plan)
|
||||
self.state.test_results = result.test_results
|
||||
|
||||
# Phase 4: Analysis (Gemini)
|
||||
self._update_state(LoopPhase.ANALYZING, "Analyzing results")
|
||||
result.analysis = await self._analysis_phase(result.test_results)
|
||||
self.state.analysis = result.analysis
|
||||
|
||||
# Phases 5-6: Fix & Verify if needed
|
||||
if result.analysis and result.analysis.get("issues_found"):
|
||||
self._update_state(LoopPhase.FIXING, "Implementing fixes")
|
||||
result.fixes = await self._fixing_phase(result.analysis)
|
||||
|
||||
self._update_state(LoopPhase.VERIFYING, "Verifying fixes")
|
||||
result.verification = await self._verification_phase(result.fixes)
|
||||
result.success = result.verification.get("all_passed", False)
|
||||
else:
|
||||
result.success = True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Iteration {self.state.iteration} failed: {e}")
|
||||
result.success = False
|
||||
|
||||
result.duration_seconds = (datetime.now() - start_time).total_seconds()
|
||||
self.state.iteration += 1
|
||||
|
||||
return result
|
||||
|
||||
async def _planning_phase(self, objective: str, context: Optional[Dict]) -> Dict:
|
||||
"""Gemini creates implementation plan."""
|
||||
# Gather context
|
||||
historical_learnings = []
|
||||
if self.lac:
|
||||
historical_learnings = self.lac.get_relevant_insights(objective)
|
||||
|
||||
plan_request = {
|
||||
"objective": objective,
|
||||
"context": context or {},
|
||||
"previous_results": self.state.test_results,
|
||||
"historical_learnings": historical_learnings,
|
||||
}
|
||||
|
||||
try:
|
||||
plan = await self.gemini.create_plan(plan_request)
|
||||
logger.info(f"Plan created with {len(plan.get('tasks', []))} tasks")
|
||||
return plan
|
||||
except Exception as e:
|
||||
logger.error(f"Planning phase failed: {e}")
|
||||
return {"error": str(e), "tasks": [], "test_scenarios": []}
|
||||
|
||||
async def _implementation_phase(self, plan: Dict) -> Dict:
|
||||
"""Claude Code implements the plan."""
|
||||
if not plan or plan.get("error"):
|
||||
return {"status": "skipped", "reason": "No valid plan"}
|
||||
|
||||
try:
|
||||
result = await self.claude_bridge.execute_plan(plan)
|
||||
return {
|
||||
"status": result.get("status", "unknown"),
|
||||
"files_modified": result.get("files", []),
|
||||
"warnings": result.get("warnings", []),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Implementation phase failed: {e}")
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
async def _testing_phase(self, plan: Dict) -> Dict:
|
||||
"""Dashboard runs automated tests."""
|
||||
test_scenarios = plan.get("test_scenarios", [])
|
||||
|
||||
if not test_scenarios:
|
||||
# Generate default tests based on objective
|
||||
test_scenarios = self._generate_default_tests(plan)
|
||||
|
||||
try:
|
||||
results = await self.dashboard.run_test_suite(test_scenarios)
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Testing phase failed: {e}")
|
||||
return {
|
||||
"status": "error",
|
||||
"error": str(e),
|
||||
"summary": {"passed": 0, "failed": 1, "total": 1},
|
||||
}
|
||||
|
||||
def _generate_default_tests(self, plan: Dict) -> List[Dict]:
|
||||
"""Generate default test scenarios based on the plan."""
|
||||
objective = plan.get("objective", "")
|
||||
|
||||
tests = []
|
||||
|
||||
# Study creation tests
|
||||
if "study" in objective.lower() or "create" in objective.lower():
|
||||
tests.extend(
|
||||
[
|
||||
{
|
||||
"id": "test_study_exists",
|
||||
"name": "Study directory exists",
|
||||
"type": "filesystem",
|
||||
"check": "directory_exists",
|
||||
},
|
||||
{
|
||||
"id": "test_spec_valid",
|
||||
"name": "AtomizerSpec is valid",
|
||||
"type": "api",
|
||||
"endpoint": "/api/studies/{study_id}/spec/validate",
|
||||
},
|
||||
{
|
||||
"id": "test_dashboard_loads",
|
||||
"name": "Dashboard loads study",
|
||||
"type": "browser",
|
||||
"action": "load_study",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
# Optimization tests
|
||||
if "optimi" in objective.lower():
|
||||
tests.extend(
|
||||
[
|
||||
{
|
||||
"id": "test_run_trial",
|
||||
"name": "Single trial executes",
|
||||
"type": "cli",
|
||||
"command": "python run_optimization.py --test",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
return tests
|
||||
|
||||
async def _analysis_phase(self, test_results: Dict) -> Dict:
|
||||
"""Gemini analyzes test results."""
|
||||
try:
|
||||
from .analyzer import ProblemAnalyzer
|
||||
|
||||
analyzer = ProblemAnalyzer(self.gemini)
|
||||
return await analyzer.analyze_test_results(test_results)
|
||||
except Exception as e:
|
||||
logger.error(f"Analysis phase failed: {e}")
|
||||
return {
|
||||
"issues_found": True,
|
||||
"issues": [{"description": str(e), "severity": "high"}],
|
||||
"fix_plans": {},
|
||||
}
|
||||
|
||||
async def _fixing_phase(self, analysis: Dict) -> List[Dict]:
|
||||
"""Claude Code implements fixes."""
|
||||
fixes = []
|
||||
|
||||
for issue in analysis.get("issues", []):
|
||||
fix_plan = analysis.get("fix_plans", {}).get(issue.get("id", "unknown"))
|
||||
|
||||
if fix_plan:
|
||||
try:
|
||||
result = await self.claude_bridge.execute_fix(fix_plan)
|
||||
fixes.append(
|
||||
{
|
||||
"issue_id": issue.get("id"),
|
||||
"status": result.get("status"),
|
||||
"files_modified": result.get("files", []),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
fixes.append(
|
||||
{
|
||||
"issue_id": issue.get("id"),
|
||||
"status": "error",
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
|
||||
return fixes
|
||||
|
||||
async def _verification_phase(self, fixes: List[Dict]) -> Dict:
|
||||
"""Dashboard verifies fixes."""
|
||||
# Re-run tests for each fix
|
||||
all_passed = True
|
||||
verification_results = []
|
||||
|
||||
for fix in fixes:
|
||||
if fix.get("status") == "error":
|
||||
all_passed = False
|
||||
verification_results.append(
|
||||
{
|
||||
"issue_id": fix.get("issue_id"),
|
||||
"passed": False,
|
||||
"reason": fix.get("error"),
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Run targeted test
|
||||
result = await self.dashboard.verify_fix(fix)
|
||||
verification_results.append(result)
|
||||
if not result.get("passed", False):
|
||||
all_passed = False
|
||||
|
||||
return {
|
||||
"all_passed": all_passed,
|
||||
"results": verification_results,
|
||||
}
|
||||
|
||||
async def _record_learning(self, iteration: IterationResult):
|
||||
"""Store successful patterns for future reference."""
|
||||
if not self.lac:
|
||||
return
|
||||
|
||||
try:
|
||||
self.lac.record_insight(
|
||||
category="success_pattern",
|
||||
context=f"DevLoop iteration {iteration.iteration}",
|
||||
insight=f"Successfully completed: {iteration.plan.get('objective', 'unknown')}",
|
||||
confidence=0.8,
|
||||
tags=["devloop", "success"],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to record learning: {e}")
|
||||
|
||||
# ========================================================================
|
||||
# Single-step operations (for manual control)
|
||||
# ========================================================================
|
||||
|
||||
async def step_plan(self, objective: str, context: Optional[Dict] = None) -> Dict:
|
||||
"""Execute only the planning phase."""
|
||||
self._update_state(LoopPhase.PLANNING, objective)
|
||||
plan = await self._planning_phase(objective, context)
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
return plan
|
||||
|
||||
async def step_implement(self, plan: Dict) -> Dict:
|
||||
"""Execute only the implementation phase."""
|
||||
self._update_state(LoopPhase.IMPLEMENTING)
|
||||
result = await self._implementation_phase(plan)
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
return result
|
||||
|
||||
async def step_test(self, scenarios: List[Dict]) -> Dict:
|
||||
"""Execute only the testing phase."""
|
||||
self._update_state(LoopPhase.TESTING)
|
||||
result = await self._testing_phase({"test_scenarios": scenarios})
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
return result
|
||||
|
||||
async def step_analyze(self, test_results: Dict) -> Dict:
|
||||
"""Execute only the analysis phase."""
|
||||
self._update_state(LoopPhase.ANALYZING)
|
||||
result = await self._analysis_phase(test_results)
|
||||
self._update_state(LoopPhase.IDLE)
|
||||
return result
|
||||
|
||||
def get_state(self) -> Dict:
|
||||
"""Get current state as dict."""
|
||||
return {
|
||||
"phase": self.state.phase.value,
|
||||
"iteration": self.state.iteration,
|
||||
"current_task": self.state.current_task,
|
||||
"test_results": self.state.test_results,
|
||||
"last_update": self.state.last_update,
|
||||
}
|
||||
|
||||
def export_history(self, filepath: Optional[Path] = None) -> Dict:
|
||||
"""Export cycle history for analysis."""
|
||||
history = {
|
||||
"exported_at": datetime.now().isoformat(),
|
||||
"total_cycles": len(self.cycle_history),
|
||||
"cycles": [
|
||||
{
|
||||
"objective": c.objective,
|
||||
"status": c.status,
|
||||
"iterations": len(c.iterations),
|
||||
"duration_seconds": c.total_duration_seconds,
|
||||
}
|
||||
for c in self.cycle_history
|
||||
],
|
||||
}
|
||||
|
||||
if filepath:
|
||||
with open(filepath, "w") as f:
|
||||
json.dump(history, f, indent=2)
|
||||
|
||||
return history
|
||||
451
optimization_engine/devloop/planning.py
Normal file
451
optimization_engine/devloop/planning.py
Normal file
@@ -0,0 +1,451 @@
|
||||
"""
|
||||
Gemini Planner - Strategic planning and test design using Gemini Pro.
|
||||
|
||||
Handles:
|
||||
- Implementation planning from objectives
|
||||
- Test scenario generation
|
||||
- Architecture decisions
|
||||
- Risk assessment
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PlanTask:
|
||||
"""A single task in the implementation plan."""
|
||||
|
||||
id: str
|
||||
description: str
|
||||
file: Optional[str] = None
|
||||
code_hint: Optional[str] = None
|
||||
priority: str = "medium"
|
||||
dependencies: List[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.dependencies is None:
|
||||
self.dependencies = []
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestScenario:
|
||||
"""A test scenario for dashboard verification."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
type: str # "api", "browser", "cli", "filesystem"
|
||||
steps: List[Dict] = None
|
||||
expected_outcome: Dict = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.steps is None:
|
||||
self.steps = []
|
||||
if self.expected_outcome is None:
|
||||
self.expected_outcome = {"status": "pass"}
|
||||
|
||||
|
||||
class GeminiPlanner:
|
||||
"""
|
||||
Strategic planner using Gemini Pro.
|
||||
|
||||
Generates:
|
||||
- Implementation tasks for Claude Code
|
||||
- Test scenarios for dashboard verification
|
||||
- Architecture decisions
|
||||
- Risk assessments
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict] = None):
|
||||
"""
|
||||
Initialize the planner.
|
||||
|
||||
Args:
|
||||
config: Configuration with API key and model settings
|
||||
"""
|
||||
self.config = config or {}
|
||||
self._client = None
|
||||
self._model = None
|
||||
|
||||
@property
|
||||
def client(self):
|
||||
"""Lazy-load Gemini client."""
|
||||
if self._client is None:
|
||||
try:
|
||||
import google.generativeai as genai
|
||||
|
||||
api_key = self.config.get("api_key") or os.environ.get("GEMINI_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("GEMINI_API_KEY not set")
|
||||
|
||||
genai.configure(api_key=api_key)
|
||||
self._client = genai
|
||||
|
||||
model_name = self.config.get("model", "gemini-2.0-flash-thinking-exp-01-21")
|
||||
self._model = genai.GenerativeModel(model_name)
|
||||
|
||||
logger.info(f"Gemini client initialized with model: {model_name}")
|
||||
|
||||
except ImportError:
|
||||
logger.warning("google-generativeai not installed, using mock planner")
|
||||
self._client = "mock"
|
||||
|
||||
return self._client
|
||||
|
||||
async def create_plan(self, request: Dict) -> Dict:
|
||||
"""
|
||||
Create an implementation plan from an objective.
|
||||
|
||||
Args:
|
||||
request: Dict with:
|
||||
- objective: What to achieve
|
||||
- context: Additional context (study spec, etc.)
|
||||
- previous_results: Results from last iteration
|
||||
- historical_learnings: Relevant LAC insights
|
||||
|
||||
Returns:
|
||||
Plan dict with tasks, test_scenarios, risks
|
||||
"""
|
||||
objective = request.get("objective", "")
|
||||
context = request.get("context", {})
|
||||
previous_results = request.get("previous_results")
|
||||
learnings = request.get("historical_learnings", [])
|
||||
|
||||
# Build planning prompt
|
||||
prompt = self._build_planning_prompt(objective, context, previous_results, learnings)
|
||||
|
||||
# Get response from Gemini
|
||||
if self.client == "mock":
|
||||
plan = self._mock_plan(objective, context)
|
||||
else:
|
||||
plan = await self._query_gemini(prompt)
|
||||
|
||||
return plan
|
||||
|
||||
def _build_planning_prompt(
|
||||
self,
|
||||
objective: str,
|
||||
context: Dict,
|
||||
previous_results: Optional[Dict],
|
||||
learnings: List[Dict],
|
||||
) -> str:
|
||||
"""Build the planning prompt for Gemini."""
|
||||
|
||||
prompt = f"""## Atomizer Development Planning Session
|
||||
|
||||
### Objective
|
||||
{objective}
|
||||
|
||||
### Context
|
||||
{json.dumps(context, indent=2) if context else "No additional context provided."}
|
||||
|
||||
### Previous Iteration Results
|
||||
{json.dumps(previous_results, indent=2) if previous_results else "First iteration - no previous results."}
|
||||
|
||||
### Historical Learnings (from LAC)
|
||||
{self._format_learnings(learnings)}
|
||||
|
||||
### Required Outputs
|
||||
|
||||
Generate a detailed implementation plan in JSON format with the following structure:
|
||||
|
||||
```json
|
||||
{{
|
||||
"objective": "{objective}",
|
||||
"approach": "Brief description of the approach",
|
||||
"tasks": [
|
||||
{{
|
||||
"id": "task_001",
|
||||
"description": "What to do",
|
||||
"file": "path/to/file.py",
|
||||
"code_hint": "Pseudo-code or pattern to use",
|
||||
"priority": "high|medium|low",
|
||||
"dependencies": ["task_000"]
|
||||
}}
|
||||
],
|
||||
"test_scenarios": [
|
||||
{{
|
||||
"id": "test_001",
|
||||
"name": "Test name",
|
||||
"type": "api|browser|cli|filesystem",
|
||||
"steps": [
|
||||
{{"action": "navigate", "target": "/canvas"}}
|
||||
],
|
||||
"expected_outcome": {{"status": "pass", "assertions": []}}
|
||||
}}
|
||||
],
|
||||
"risks": [
|
||||
{{
|
||||
"description": "What could go wrong",
|
||||
"mitigation": "How to handle it",
|
||||
"severity": "high|medium|low"
|
||||
}}
|
||||
],
|
||||
"acceptance_criteria": [
|
||||
"Criteria 1",
|
||||
"Criteria 2"
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
### Guidelines
|
||||
|
||||
1. **Tasks should be specific and actionable** - Each task should be completable by Claude Code
|
||||
2. **Test scenarios must be verifiable** - Use dashboard endpoints and browser actions
|
||||
3. **Consider Atomizer architecture** - Use existing extractors (SYS_12), follow AtomizerSpec v2.0
|
||||
4. **Apply historical learnings** - Avoid known failure patterns
|
||||
|
||||
### Important Atomizer Patterns
|
||||
|
||||
- Studies use `atomizer_spec.json` (AtomizerSpec v2.0)
|
||||
- Design variables have bounds: {{"min": X, "max": Y}}
|
||||
- Objectives use extractors: E1 (displacement), E3 (stress), E4 (mass)
|
||||
- Constraints define limits with operators: <, >, <=, >=
|
||||
|
||||
Output ONLY the JSON plan, no additional text.
|
||||
"""
|
||||
return prompt
|
||||
|
||||
def _format_learnings(self, learnings: List[Dict]) -> str:
|
||||
"""Format LAC learnings for the prompt."""
|
||||
if not learnings:
|
||||
return "No relevant historical learnings."
|
||||
|
||||
formatted = []
|
||||
for learning in learnings[:5]: # Limit to 5 most relevant
|
||||
formatted.append(
|
||||
f"- [{learning.get('category', 'insight')}] {learning.get('insight', '')}"
|
||||
)
|
||||
|
||||
return "\n".join(formatted)
|
||||
|
||||
async def _query_gemini(self, prompt: str) -> Dict:
|
||||
"""Query Gemini and parse response."""
|
||||
try:
|
||||
# Run in executor to not block
|
||||
loop = asyncio.get_event_loop()
|
||||
response = await loop.run_in_executor(
|
||||
None, lambda: self._model.generate_content(prompt)
|
||||
)
|
||||
|
||||
# Extract JSON from response
|
||||
text = response.text
|
||||
|
||||
# Try to parse JSON
|
||||
try:
|
||||
# Find JSON block
|
||||
if "```json" in text:
|
||||
start = text.find("```json") + 7
|
||||
end = text.find("```", start)
|
||||
json_str = text[start:end].strip()
|
||||
elif "```" in text:
|
||||
start = text.find("```") + 3
|
||||
end = text.find("```", start)
|
||||
json_str = text[start:end].strip()
|
||||
else:
|
||||
json_str = text.strip()
|
||||
|
||||
plan = json.loads(json_str)
|
||||
logger.info(f"Gemini plan parsed: {len(plan.get('tasks', []))} tasks")
|
||||
return plan
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse Gemini response: {e}")
|
||||
return {
|
||||
"objective": "Parse error",
|
||||
"error": str(e),
|
||||
"raw_response": text[:500],
|
||||
"tasks": [],
|
||||
"test_scenarios": [],
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Gemini query failed: {e}")
|
||||
return {
|
||||
"objective": "Query error",
|
||||
"error": str(e),
|
||||
"tasks": [],
|
||||
"test_scenarios": [],
|
||||
}
|
||||
|
||||
def _mock_plan(self, objective: str, context: Dict) -> Dict:
|
||||
"""Generate a mock plan for testing without Gemini API."""
|
||||
logger.info("Using mock planner (Gemini not available)")
|
||||
|
||||
# Detect objective type
|
||||
is_study_creation = any(
|
||||
kw in objective.lower() for kw in ["create", "study", "new", "setup"]
|
||||
)
|
||||
|
||||
tasks = []
|
||||
test_scenarios = []
|
||||
|
||||
if is_study_creation:
|
||||
study_name = context.get("study_name", "support_arm")
|
||||
|
||||
tasks = [
|
||||
{
|
||||
"id": "task_001",
|
||||
"description": f"Create study directory structure for {study_name}",
|
||||
"file": f"studies/_Other/{study_name}/",
|
||||
"priority": "high",
|
||||
"dependencies": [],
|
||||
},
|
||||
{
|
||||
"id": "task_002",
|
||||
"description": "Copy NX model files to study directory",
|
||||
"file": f"studies/_Other/{study_name}/1_setup/model/",
|
||||
"priority": "high",
|
||||
"dependencies": ["task_001"],
|
||||
},
|
||||
{
|
||||
"id": "task_003",
|
||||
"description": "Create AtomizerSpec v2.0 configuration",
|
||||
"file": f"studies/_Other/{study_name}/atomizer_spec.json",
|
||||
"priority": "high",
|
||||
"dependencies": ["task_002"],
|
||||
},
|
||||
{
|
||||
"id": "task_004",
|
||||
"description": "Create run_optimization.py script",
|
||||
"file": f"studies/_Other/{study_name}/run_optimization.py",
|
||||
"priority": "high",
|
||||
"dependencies": ["task_003"],
|
||||
},
|
||||
{
|
||||
"id": "task_005",
|
||||
"description": "Create README.md documentation",
|
||||
"file": f"studies/_Other/{study_name}/README.md",
|
||||
"priority": "medium",
|
||||
"dependencies": ["task_003"],
|
||||
},
|
||||
]
|
||||
|
||||
test_scenarios = [
|
||||
{
|
||||
"id": "test_001",
|
||||
"name": "Study directory exists",
|
||||
"type": "filesystem",
|
||||
"steps": [{"action": "check_exists", "path": f"studies/_Other/{study_name}"}],
|
||||
"expected_outcome": {"exists": True},
|
||||
},
|
||||
{
|
||||
"id": "test_002",
|
||||
"name": "AtomizerSpec is valid",
|
||||
"type": "api",
|
||||
"steps": [
|
||||
{"action": "get", "endpoint": f"/api/studies/{study_name}/spec/validate"}
|
||||
],
|
||||
"expected_outcome": {"valid": True},
|
||||
},
|
||||
{
|
||||
"id": "test_003",
|
||||
"name": "Dashboard loads study",
|
||||
"type": "browser",
|
||||
"steps": [
|
||||
{"action": "navigate", "url": f"/canvas/{study_name}"},
|
||||
{"action": "wait_for", "selector": "[data-testid='canvas-container']"},
|
||||
],
|
||||
"expected_outcome": {"loaded": True},
|
||||
},
|
||||
]
|
||||
|
||||
return {
|
||||
"objective": objective,
|
||||
"approach": "Mock plan for development testing",
|
||||
"tasks": tasks,
|
||||
"test_scenarios": test_scenarios,
|
||||
"risks": [
|
||||
{
|
||||
"description": "NX model files may have dependencies",
|
||||
"mitigation": "Copy all related files (_i.prt, .fem, .sim)",
|
||||
"severity": "high",
|
||||
}
|
||||
],
|
||||
"acceptance_criteria": [
|
||||
"Study directory structure created",
|
||||
"AtomizerSpec validates without errors",
|
||||
"Dashboard loads study canvas",
|
||||
],
|
||||
}
|
||||
|
||||
async def analyze_codebase(self, query: str) -> Dict:
|
||||
"""
|
||||
Use Gemini to analyze codebase state.
|
||||
|
||||
Args:
|
||||
query: What to analyze (e.g., "current dashboard components")
|
||||
|
||||
Returns:
|
||||
Analysis results
|
||||
"""
|
||||
# This would integrate with codebase scanning
|
||||
# For now, return a stub
|
||||
return {
|
||||
"query": query,
|
||||
"analysis": "Codebase analysis not yet implemented",
|
||||
"recommendations": [],
|
||||
}
|
||||
|
||||
async def generate_test_scenarios(
|
||||
self,
|
||||
feature: str,
|
||||
context: Optional[Dict] = None,
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Generate test scenarios for a specific feature.
|
||||
|
||||
Args:
|
||||
feature: Feature to test (e.g., "study creation", "spec validation")
|
||||
context: Additional context
|
||||
|
||||
Returns:
|
||||
List of test scenarios
|
||||
"""
|
||||
prompt = f"""Generate test scenarios for the Atomizer feature: {feature}
|
||||
|
||||
Context: {json.dumps(context, indent=2) if context else "None"}
|
||||
|
||||
Output as JSON array of test scenarios:
|
||||
```json
|
||||
[
|
||||
{{
|
||||
"id": "test_001",
|
||||
"name": "Test name",
|
||||
"type": "api|browser|cli|filesystem",
|
||||
"steps": [...]
|
||||
"expected_outcome": {{...}}
|
||||
}}
|
||||
]
|
||||
```
|
||||
"""
|
||||
|
||||
if self.client == "mock":
|
||||
return self._mock_plan(feature, context or {}).get("test_scenarios", [])
|
||||
|
||||
# Query Gemini
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
response = await loop.run_in_executor(
|
||||
None, lambda: self._model.generate_content(prompt)
|
||||
)
|
||||
|
||||
text = response.text
|
||||
if "```json" in text:
|
||||
start = text.find("```json") + 7
|
||||
end = text.find("```", start)
|
||||
json_str = text[start:end].strip()
|
||||
return json.loads(json_str)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate test scenarios: {e}")
|
||||
|
||||
return []
|
||||
585
optimization_engine/devloop/test_runner.py
Normal file
585
optimization_engine/devloop/test_runner.py
Normal file
@@ -0,0 +1,585 @@
|
||||
"""
|
||||
Dashboard Test Runner - Automated testing through the Atomizer dashboard.
|
||||
|
||||
Supports test types:
|
||||
- API tests (REST endpoint verification)
|
||||
- Browser tests (UI interaction via Playwright)
|
||||
- CLI tests (command line execution)
|
||||
- Filesystem tests (file/directory verification)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import aiohttp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestStep:
|
||||
"""A single step in a test scenario."""
|
||||
|
||||
action: str
|
||||
target: Optional[str] = None
|
||||
data: Optional[Dict] = None
|
||||
timeout_ms: int = 5000
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestScenario:
|
||||
"""A complete test scenario."""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
type: str # "api", "browser", "cli", "filesystem"
|
||||
steps: List[Dict] = field(default_factory=list)
|
||||
expected_outcome: Dict = field(default_factory=lambda: {"status": "pass"})
|
||||
timeout_ms: int = 30000
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestResult:
|
||||
"""Result of a single test."""
|
||||
|
||||
scenario_id: str
|
||||
scenario_name: str
|
||||
passed: bool
|
||||
duration_ms: float
|
||||
error: Optional[str] = None
|
||||
details: Optional[Dict] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class TestReport:
|
||||
"""Complete test report."""
|
||||
|
||||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
scenarios: List[TestResult] = field(default_factory=list)
|
||||
summary: Dict = field(default_factory=lambda: {"passed": 0, "failed": 0, "total": 0})
|
||||
|
||||
|
||||
class DashboardTestRunner:
|
||||
"""
|
||||
Automated test runner for Atomizer dashboard.
|
||||
|
||||
Executes test scenarios against:
|
||||
- Backend API endpoints
|
||||
- Frontend UI (via Playwright if available)
|
||||
- CLI commands
|
||||
- Filesystem assertions
|
||||
"""
|
||||
|
||||
def __init__(self, config: Optional[Dict] = None):
|
||||
"""
|
||||
Initialize the test runner.
|
||||
|
||||
Args:
|
||||
config: Configuration with dashboard URLs and timeouts
|
||||
"""
|
||||
self.config = config or {}
|
||||
self.base_url = self.config.get("dashboard_url", "http://localhost:8000")
|
||||
self.ws_url = self.config.get("websocket_url", "ws://localhost:8000")
|
||||
self.timeout_ms = self.config.get("test_timeout_ms", 30000)
|
||||
self.studies_dir = Path(self.config.get("studies_dir", "C:/Users/antoi/Atomizer/studies"))
|
||||
|
||||
self._session: Optional[aiohttp.ClientSession] = None
|
||||
self._ws: Optional[aiohttp.ClientWebSocketResponse] = None
|
||||
self._playwright = None
|
||||
self._browser = None
|
||||
|
||||
async def connect(self):
|
||||
"""Initialize connections."""
|
||||
if self._session is None:
|
||||
self._session = aiohttp.ClientSession(
|
||||
timeout=aiohttp.ClientTimeout(total=self.timeout_ms / 1000)
|
||||
)
|
||||
|
||||
async def disconnect(self):
|
||||
"""Clean up connections."""
|
||||
if self._ws:
|
||||
await self._ws.close()
|
||||
self._ws = None
|
||||
if self._session:
|
||||
await self._session.close()
|
||||
self._session = None
|
||||
if self._browser:
|
||||
await self._browser.close()
|
||||
self._browser = None
|
||||
|
||||
async def run_test_suite(self, scenarios: List[Dict]) -> Dict:
|
||||
"""
|
||||
Run a complete test suite.
|
||||
|
||||
Args:
|
||||
scenarios: List of test scenario dicts
|
||||
|
||||
Returns:
|
||||
Test report as dict
|
||||
"""
|
||||
await self.connect()
|
||||
|
||||
report = TestReport()
|
||||
|
||||
for scenario_dict in scenarios:
|
||||
scenario = self._parse_scenario(scenario_dict)
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
result = await self._execute_scenario(scenario)
|
||||
result.duration_ms = (datetime.now() - start_time).total_seconds() * 1000
|
||||
report.scenarios.append(result)
|
||||
|
||||
if result.passed:
|
||||
report.summary["passed"] += 1
|
||||
else:
|
||||
report.summary["failed"] += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Scenario {scenario.id} failed with error: {e}")
|
||||
report.scenarios.append(
|
||||
TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=(datetime.now() - start_time).total_seconds() * 1000,
|
||||
error=str(e),
|
||||
)
|
||||
)
|
||||
report.summary["failed"] += 1
|
||||
|
||||
report.summary["total"] += 1
|
||||
|
||||
return {
|
||||
"timestamp": report.timestamp,
|
||||
"scenarios": [self._result_to_dict(r) for r in report.scenarios],
|
||||
"summary": report.summary,
|
||||
}
|
||||
|
||||
def _parse_scenario(self, scenario_dict: Dict) -> TestScenario:
|
||||
"""Parse a scenario dict into TestScenario."""
|
||||
return TestScenario(
|
||||
id=scenario_dict.get("id", "unknown"),
|
||||
name=scenario_dict.get("name", "Unnamed test"),
|
||||
type=scenario_dict.get("type", "api"),
|
||||
steps=scenario_dict.get("steps", []),
|
||||
expected_outcome=scenario_dict.get("expected_outcome", {"status": "pass"}),
|
||||
timeout_ms=scenario_dict.get("timeout_ms", self.timeout_ms),
|
||||
)
|
||||
|
||||
def _result_to_dict(self, result: TestResult) -> Dict:
|
||||
"""Convert TestResult to dict."""
|
||||
return {
|
||||
"scenario_id": result.scenario_id,
|
||||
"scenario_name": result.scenario_name,
|
||||
"passed": result.passed,
|
||||
"duration_ms": result.duration_ms,
|
||||
"error": result.error,
|
||||
"details": result.details,
|
||||
}
|
||||
|
||||
async def _execute_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute a single test scenario."""
|
||||
logger.info(f"Executing test: {scenario.name} ({scenario.type})")
|
||||
|
||||
if scenario.type == "api":
|
||||
return await self._execute_api_scenario(scenario)
|
||||
elif scenario.type == "browser":
|
||||
return await self._execute_browser_scenario(scenario)
|
||||
elif scenario.type == "cli":
|
||||
return await self._execute_cli_scenario(scenario)
|
||||
elif scenario.type == "filesystem":
|
||||
return await self._execute_filesystem_scenario(scenario)
|
||||
else:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Unknown test type: {scenario.type}",
|
||||
)
|
||||
|
||||
async def _execute_api_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute an API test scenario."""
|
||||
details = {}
|
||||
|
||||
for step in scenario.steps:
|
||||
action = step.get("action", "get").lower()
|
||||
endpoint = step.get("endpoint", step.get("target", "/"))
|
||||
data = step.get("data")
|
||||
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
|
||||
try:
|
||||
if action == "get":
|
||||
async with self._session.get(url) as resp:
|
||||
details["status_code"] = resp.status
|
||||
details["response"] = await resp.json()
|
||||
|
||||
elif action == "post":
|
||||
async with self._session.post(url, json=data) as resp:
|
||||
details["status_code"] = resp.status
|
||||
details["response"] = await resp.json()
|
||||
|
||||
elif action == "put":
|
||||
async with self._session.put(url, json=data) as resp:
|
||||
details["status_code"] = resp.status
|
||||
details["response"] = await resp.json()
|
||||
|
||||
elif action == "delete":
|
||||
async with self._session.delete(url) as resp:
|
||||
details["status_code"] = resp.status
|
||||
details["response"] = await resp.json()
|
||||
|
||||
except aiohttp.ClientError as e:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"API request failed: {e}",
|
||||
details={"url": url, "action": action},
|
||||
)
|
||||
except json.JSONDecodeError:
|
||||
details["response"] = "Non-JSON response"
|
||||
|
||||
# Check expected outcome
|
||||
passed = self._check_outcome(details, scenario.expected_outcome)
|
||||
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=passed,
|
||||
duration_ms=0,
|
||||
details=details,
|
||||
)
|
||||
|
||||
async def _execute_browser_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute a browser test scenario using Playwright."""
|
||||
try:
|
||||
from playwright.async_api import async_playwright
|
||||
except ImportError:
|
||||
logger.warning("Playwright not available, skipping browser test")
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=True, # Skip, don't fail
|
||||
duration_ms=0,
|
||||
error="Playwright not installed - test skipped",
|
||||
)
|
||||
|
||||
details = {}
|
||||
|
||||
try:
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
page = await browser.new_page()
|
||||
|
||||
for step in scenario.steps:
|
||||
action = step.get("action", "navigate")
|
||||
|
||||
if action == "navigate":
|
||||
url = step.get("url", "/")
|
||||
# Use frontend URL (port 3003 for Vite dev server)
|
||||
full_url = f"http://localhost:3003{url}" if url.startswith("/") else url
|
||||
await page.goto(full_url, timeout=scenario.timeout_ms)
|
||||
details["navigated_to"] = full_url
|
||||
|
||||
elif action == "wait_for":
|
||||
selector = step.get("selector")
|
||||
if selector:
|
||||
await page.wait_for_selector(selector, timeout=scenario.timeout_ms)
|
||||
details["found_selector"] = selector
|
||||
|
||||
elif action == "click":
|
||||
selector = step.get("selector")
|
||||
if selector:
|
||||
await page.click(selector)
|
||||
details["clicked"] = selector
|
||||
|
||||
elif action == "fill":
|
||||
selector = step.get("selector")
|
||||
value = step.get("value", "")
|
||||
if selector:
|
||||
await page.fill(selector, value)
|
||||
details["filled"] = {selector: value}
|
||||
|
||||
elif action == "screenshot":
|
||||
path = step.get("path", f"test_{scenario.id}.png")
|
||||
await page.screenshot(path=path)
|
||||
details["screenshot"] = path
|
||||
|
||||
await browser.close()
|
||||
|
||||
passed = True
|
||||
|
||||
except Exception as e:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Browser test failed: {e}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=passed,
|
||||
duration_ms=0,
|
||||
details=details,
|
||||
)
|
||||
|
||||
async def _execute_cli_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute a CLI test scenario."""
|
||||
details = {}
|
||||
|
||||
for step in scenario.steps:
|
||||
command = step.get("command", step.get("target", ""))
|
||||
cwd = step.get("cwd", str(self.studies_dir))
|
||||
|
||||
if not command:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Use PowerShell on Windows
|
||||
result = subprocess.run(
|
||||
["powershell", "-Command", command],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=cwd,
|
||||
timeout=scenario.timeout_ms / 1000,
|
||||
)
|
||||
|
||||
details["command"] = command
|
||||
details["returncode"] = result.returncode
|
||||
details["stdout"] = result.stdout[:1000] if result.stdout else ""
|
||||
details["stderr"] = result.stderr[:1000] if result.stderr else ""
|
||||
|
||||
if result.returncode != 0:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Command failed with code {result.returncode}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Command timed out after {scenario.timeout_ms}ms",
|
||||
details={"command": command},
|
||||
)
|
||||
except Exception as e:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"CLI execution failed: {e}",
|
||||
details={"command": command},
|
||||
)
|
||||
|
||||
passed = self._check_outcome(details, scenario.expected_outcome)
|
||||
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=passed,
|
||||
duration_ms=0,
|
||||
details=details,
|
||||
)
|
||||
|
||||
async def _execute_filesystem_scenario(self, scenario: TestScenario) -> TestResult:
|
||||
"""Execute a filesystem test scenario."""
|
||||
details = {}
|
||||
|
||||
for step in scenario.steps:
|
||||
action = step.get("action", "check_exists")
|
||||
path_str = step.get("path", "")
|
||||
|
||||
# Resolve relative paths
|
||||
if not Path(path_str).is_absolute():
|
||||
path = self.studies_dir.parent / path_str
|
||||
else:
|
||||
path = Path(path_str)
|
||||
|
||||
if action == "check_exists":
|
||||
exists = path.exists()
|
||||
details["path"] = str(path)
|
||||
details["exists"] = exists
|
||||
|
||||
if scenario.expected_outcome.get("exists", True) != exists:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Path {'does not exist' if not exists else 'exists but should not'}: {path}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
elif action == "check_file_contains":
|
||||
content_check = step.get("contains", "")
|
||||
if path.exists() and path.is_file():
|
||||
content = path.read_text()
|
||||
contains = content_check in content
|
||||
details["contains"] = contains
|
||||
details["search_term"] = content_check
|
||||
|
||||
if not contains:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"File does not contain: {content_check}",
|
||||
details=details,
|
||||
)
|
||||
else:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"File not found: {path}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
elif action == "check_json_valid":
|
||||
if path.exists() and path.is_file():
|
||||
try:
|
||||
with open(path) as f:
|
||||
json.load(f)
|
||||
details["valid_json"] = True
|
||||
except json.JSONDecodeError as e:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"Invalid JSON: {e}",
|
||||
details={"path": str(path)},
|
||||
)
|
||||
else:
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=False,
|
||||
duration_ms=0,
|
||||
error=f"File not found: {path}",
|
||||
details=details,
|
||||
)
|
||||
|
||||
return TestResult(
|
||||
scenario_id=scenario.id,
|
||||
scenario_name=scenario.name,
|
||||
passed=True,
|
||||
duration_ms=0,
|
||||
details=details,
|
||||
)
|
||||
|
||||
def _check_outcome(self, details: Dict, expected: Dict) -> bool:
|
||||
"""Check if test details match expected outcome."""
|
||||
for key, expected_value in expected.items():
|
||||
if key not in details:
|
||||
continue
|
||||
|
||||
actual_value = details[key]
|
||||
|
||||
# Handle nested dicts
|
||||
if isinstance(expected_value, dict) and isinstance(actual_value, dict):
|
||||
if not self._check_outcome(actual_value, expected_value):
|
||||
return False
|
||||
# Handle lists
|
||||
elif isinstance(expected_value, list) and isinstance(actual_value, list):
|
||||
if expected_value != actual_value:
|
||||
return False
|
||||
# Handle simple values
|
||||
elif actual_value != expected_value:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
async def verify_fix(self, fix: Dict) -> Dict:
|
||||
"""
|
||||
Verify that a specific fix was successful.
|
||||
|
||||
Args:
|
||||
fix: Fix dict with issue_id and files_modified
|
||||
|
||||
Returns:
|
||||
Verification result
|
||||
"""
|
||||
issue_id = fix.get("issue_id", "unknown")
|
||||
files_modified = fix.get("files_modified", [])
|
||||
|
||||
# Run quick verification
|
||||
passed = True
|
||||
details = {}
|
||||
|
||||
# Check that modified files exist
|
||||
for file_path in files_modified:
|
||||
path = Path(file_path)
|
||||
if not path.exists():
|
||||
passed = False
|
||||
details["missing_file"] = str(path)
|
||||
break
|
||||
|
||||
# Could add more sophisticated verification here
|
||||
|
||||
return {
|
||||
"issue_id": issue_id,
|
||||
"passed": passed,
|
||||
"details": details,
|
||||
}
|
||||
|
||||
async def run_health_check(self) -> Dict:
|
||||
"""
|
||||
Run a quick health check on dashboard components.
|
||||
|
||||
Returns:
|
||||
Health status dict
|
||||
"""
|
||||
await self.connect()
|
||||
|
||||
health = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"api": "unknown",
|
||||
"frontend": "unknown",
|
||||
"websocket": "unknown",
|
||||
}
|
||||
|
||||
# Check API
|
||||
try:
|
||||
async with self._session.get(f"{self.base_url}/health") as resp:
|
||||
if resp.status == 200:
|
||||
health["api"] = "healthy"
|
||||
else:
|
||||
health["api"] = f"unhealthy (status {resp.status})"
|
||||
except Exception as e:
|
||||
health["api"] = f"error: {e}"
|
||||
|
||||
# Check frontend (if available)
|
||||
try:
|
||||
async with self._session.get("http://localhost:3000") as resp:
|
||||
if resp.status == 200:
|
||||
health["frontend"] = "healthy"
|
||||
else:
|
||||
health["frontend"] = f"unhealthy (status {resp.status})"
|
||||
except Exception as e:
|
||||
health["frontend"] = f"error: {e}"
|
||||
|
||||
return health
|
||||
1042
optimization_engine/reporting/html_report.py
Normal file
1042
optimization_engine/reporting/html_report.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user