From 319383134070702637fb2d87b32c26f52d9873f1 Mon Sep 17 00:00:00 2001 From: Anto01 Date: Sat, 24 Jan 2026 21:18:18 -0500 Subject: [PATCH] feat: Add DevLoop automation and HTML Reports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## DevLoop - Closed-Loop Development System - Orchestrator for plan → build → test → analyze cycle - Gemini planning via OpenCode CLI - Claude implementation via CLI bridge - Playwright browser testing integration - Test runner with API, filesystem, and browser tests - Persistent state in .devloop/ directory - CLI tool: tools/devloop_cli.py Usage: python tools/devloop_cli.py start 'Create new feature' python tools/devloop_cli.py plan 'Fix bug in X' python tools/devloop_cli.py test --study support_arm python tools/devloop_cli.py browser --level full ## HTML Reports (optimization_engine/reporting/) - Interactive Plotly-based reports - Convergence plot, Pareto front, parallel coordinates - Parameter importance analysis - Self-contained HTML (offline-capable) - Tailwind CSS styling ## Playwright E2E Tests - Home page tests - Test results in test-results/ ## LAC Knowledge Base Updates - Session insights (failures, workarounds, patterns) - Optimization memory for arm support study --- .devloop/browser_test_results.json | 33 + .devloop/current_plan.json | 16 + .devloop/test_results.json | 64 + .../backend/api/routes/devloop.py | 416 +++++++ .../src/components/devloop/DevLoopPanel.tsx | 342 ++++++ .../frontend/test-results/.last-run.json | 4 + .../frontend/tests/e2e/home.spec.ts | 171 +++ docs/guides/DEVLOOP.md | 540 +++++++++ .../lac/optimization_memory/arm_support.jsonl | 1 + .../lac/session_insights/failure.jsonl | 8 + .../protocol_clarification.jsonl | 1 + .../session_insights/success_pattern.jsonl | 9 + .../session_insights/user_preference.jsonl | 1 + .../lac/session_insights/workaround.jsonl | 4 + optimization_engine/devloop/__init__.py | 68 ++ optimization_engine/devloop/analyzer.py | 421 +++++++ .../devloop/browser_scenarios.py | 170 +++ optimization_engine/devloop/claude_bridge.py | 392 +++++++ optimization_engine/devloop/cli_bridge.py | 652 +++++++++++ optimization_engine/devloop/orchestrator.py | 561 +++++++++ optimization_engine/devloop/planning.py | 451 +++++++ optimization_engine/devloop/test_runner.py | 585 +++++++++ optimization_engine/reporting/html_report.py | 1042 +++++++++++++++++ tools/devloop_cli.py | 485 ++++++++ 24 files changed, 6437 insertions(+) create mode 100644 .devloop/browser_test_results.json create mode 100644 .devloop/current_plan.json create mode 100644 .devloop/test_results.json create mode 100644 atomizer-dashboard/backend/api/routes/devloop.py create mode 100644 atomizer-dashboard/frontend/src/components/devloop/DevLoopPanel.tsx create mode 100644 atomizer-dashboard/frontend/test-results/.last-run.json create mode 100644 atomizer-dashboard/frontend/tests/e2e/home.spec.ts create mode 100644 docs/guides/DEVLOOP.md create mode 100644 knowledge_base/lac/optimization_memory/arm_support.jsonl create mode 100644 optimization_engine/devloop/__init__.py create mode 100644 optimization_engine/devloop/analyzer.py create mode 100644 optimization_engine/devloop/browser_scenarios.py create mode 100644 optimization_engine/devloop/claude_bridge.py create mode 100644 optimization_engine/devloop/cli_bridge.py create mode 100644 optimization_engine/devloop/orchestrator.py create mode 100644 optimization_engine/devloop/planning.py create mode 100644 optimization_engine/devloop/test_runner.py create mode 100644 optimization_engine/reporting/html_report.py create mode 100644 tools/devloop_cli.py diff --git a/.devloop/browser_test_results.json b/.devloop/browser_test_results.json new file mode 100644 index 00000000..904ac0da --- /dev/null +++ b/.devloop/browser_test_results.json @@ -0,0 +1,33 @@ +{ + "timestamp": "2026-01-22T18:13:30.884945", + "scenarios": [ + { + "scenario_id": "browser_home_stats", + "scenario_name": "Home page shows statistics", + "passed": true, + "duration_ms": 1413.166, + "error": null, + "details": { + "navigated_to": "http://localhost:3003/", + "found_selector": "text=Total Trials" + } + }, + { + "scenario_id": "browser_expand_folder", + "scenario_name": "Topic folder expands on click", + "passed": true, + "duration_ms": 2785.3219999999997, + "error": null, + "details": { + "navigated_to": "http://localhost:3003/", + "found_selector": "span:has-text('completed'), span:has-text('running'), span:has-text('paused')", + "clicked": "button:has-text('trials')" + } + } + ], + "summary": { + "passed": 2, + "failed": 0, + "total": 2 + } +} \ No newline at end of file diff --git a/.devloop/current_plan.json b/.devloop/current_plan.json new file mode 100644 index 00000000..9a309567 --- /dev/null +++ b/.devloop/current_plan.json @@ -0,0 +1,16 @@ +{ + "objective": "Implement Dashboard Intake & AtomizerSpec Integration: Phase 1 - Create backend intake API routes (create, introspect, list, topics endpoints) and spec_manager service. The spec_models.py and JSON schema have already been updated with SpecStatus, IntrospectionData, BaselineData, and ExpressionInfo models. Now need to create: 1) backend/api/services/spec_manager.py for centralized spec CRUD, 2) backend/api/routes/intake.py with endpoints for creating inbox folders, running introspection, listing inbox contents, and listing topics, 3) Register the intake router in main.py. Reference the plan at docs/plans/DASHBOARD_INTAKE_ATOMIZERSPEC_INTEGRATION.md", + "approach": "Fallback plan - manual implementation", + "tasks": [ + { + "id": "task_001", + "description": "Implement: Implement Dashboard Intake & AtomizerSpec Integration: Phase 1 - Create backend intake API routes (create, introspect, list, topics endpoints) and spec_manager service. The spec_models.py and JSON schema have already been updated with SpecStatus, IntrospectionData, BaselineData, and ExpressionInfo models. Now need to create: 1) backend/api/services/spec_manager.py for centralized spec CRUD, 2) backend/api/routes/intake.py with endpoints for creating inbox folders, running introspection, listing inbox contents, and listing topics, 3) Register the intake router in main.py. Reference the plan at docs/plans/DASHBOARD_INTAKE_ATOMIZERSPEC_INTEGRATION.md", + "file": "TBD", + "priority": "high" + } + ], + "test_scenarios": [], + "acceptance_criteria": [ + "Implement Dashboard Intake & AtomizerSpec Integration: Phase 1 - Create backend intake API routes (create, introspect, list, topics endpoints) and spec_manager service. The spec_models.py and JSON schema have already been updated with SpecStatus, IntrospectionData, BaselineData, and ExpressionInfo models. Now need to create: 1) backend/api/services/spec_manager.py for centralized spec CRUD, 2) backend/api/routes/intake.py with endpoints for creating inbox folders, running introspection, listing inbox contents, and listing topics, 3) Register the intake router in main.py. Reference the plan at docs/plans/DASHBOARD_INTAKE_ATOMIZERSPEC_INTEGRATION.md" + ] +} \ No newline at end of file diff --git a/.devloop/test_results.json b/.devloop/test_results.json new file mode 100644 index 00000000..f0d0cb71 --- /dev/null +++ b/.devloop/test_results.json @@ -0,0 +1,64 @@ +{ + "timestamp": "2026-01-22T21:10:54.742272", + "scenarios": [ + { + "scenario_id": "test_study_dir", + "scenario_name": "Study directory exists: stage_3_arm", + "passed": true, + "duration_ms": 0.0, + "error": null, + "details": { + "path": "C:\\Users\\antoi\\Atomizer\\studies\\Stage3\\stage_3_arm", + "exists": true + } + }, + { + "scenario_id": "test_spec", + "scenario_name": "AtomizerSpec is valid JSON", + "passed": true, + "duration_ms": 1.045, + "error": null, + "details": { + "valid_json": true + } + }, + { + "scenario_id": "test_readme", + "scenario_name": "README exists", + "passed": true, + "duration_ms": 0.0, + "error": null, + "details": { + "path": "C:\\Users\\antoi\\Atomizer\\studies\\Stage3\\stage_3_arm\\README.md", + "exists": true + } + }, + { + "scenario_id": "test_run_script", + "scenario_name": "run_optimization.py exists", + "passed": true, + "duration_ms": 0.0, + "error": null, + "details": { + "path": "C:\\Users\\antoi\\Atomizer\\studies\\Stage3\\stage_3_arm\\run_optimization.py", + "exists": true + } + }, + { + "scenario_id": "test_model_dir", + "scenario_name": "Model directory exists", + "passed": true, + "duration_ms": 0.0, + "error": null, + "details": { + "path": "C:\\Users\\antoi\\Atomizer\\studies\\Stage3\\stage_3_arm\\1_setup\\model", + "exists": true + } + } + ], + "summary": { + "passed": 5, + "failed": 0, + "total": 5 + } +} \ No newline at end of file diff --git a/atomizer-dashboard/backend/api/routes/devloop.py b/atomizer-dashboard/backend/api/routes/devloop.py new file mode 100644 index 00000000..9d577a07 --- /dev/null +++ b/atomizer-dashboard/backend/api/routes/devloop.py @@ -0,0 +1,416 @@ +""" +DevLoop API Endpoints - Closed-loop development orchestration. + +Provides REST API and WebSocket for: +- Starting/stopping development cycles +- Monitoring progress +- Executing single phases +- Viewing history and learnings +""" + +from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect, BackgroundTasks +from pydantic import BaseModel, Field +from typing import Any, Dict, List, Optional +import asyncio +import json +import sys +from pathlib import Path +from datetime import datetime + +# Add project root to path +sys.path.append(str(Path(__file__).parent.parent.parent.parent.parent)) + +router = APIRouter(prefix="/devloop", tags=["devloop"]) + +# Global orchestrator instance +_orchestrator = None +_active_cycle = None +_websocket_clients: List[WebSocket] = [] + + +def get_orchestrator(): + """Get or create the DevLoop orchestrator.""" + global _orchestrator + if _orchestrator is None: + from optimization_engine.devloop import DevLoopOrchestrator + + _orchestrator = DevLoopOrchestrator( + { + "dashboard_url": "http://localhost:8000", + "websocket_url": "ws://localhost:8000", + "studies_dir": str(Path(__file__).parent.parent.parent.parent.parent / "studies"), + "learning_enabled": True, + } + ) + + # Subscribe to state updates + _orchestrator.subscribe(_broadcast_state_update) + + return _orchestrator + + +def _broadcast_state_update(state): + """Broadcast state updates to all WebSocket clients.""" + asyncio.create_task( + _send_to_all_clients( + { + "type": "state_update", + "state": { + "phase": state.phase.value, + "iteration": state.iteration, + "current_task": state.current_task, + "last_update": state.last_update, + }, + } + ) + ) + + +async def _send_to_all_clients(message: Dict): + """Send message to all connected WebSocket clients.""" + disconnected = [] + for client in _websocket_clients: + try: + await client.send_json(message) + except Exception: + disconnected.append(client) + + # Clean up disconnected clients + for client in disconnected: + if client in _websocket_clients: + _websocket_clients.remove(client) + + +# ============================================================================ +# Request/Response Models +# ============================================================================ + + +class StartCycleRequest(BaseModel): + """Request to start a development cycle.""" + + objective: str = Field(..., description="What to achieve") + context: Optional[Dict[str, Any]] = Field(default=None, description="Additional context") + max_iterations: Optional[int] = Field(default=10, description="Maximum iterations") + + +class StepRequest(BaseModel): + """Request to execute a single step.""" + + phase: str = Field(..., description="Phase to execute: plan, implement, test, analyze") + data: Optional[Dict[str, Any]] = Field(default=None, description="Phase-specific data") + + +class CycleStatusResponse(BaseModel): + """Response with cycle status.""" + + active: bool + phase: str + iteration: int + current_task: Optional[str] + last_update: str + + +# ============================================================================ +# REST Endpoints +# ============================================================================ + + +@router.get("/status") +async def get_status() -> CycleStatusResponse: + """Get current DevLoop status.""" + orchestrator = get_orchestrator() + state = orchestrator.get_state() + + return CycleStatusResponse( + active=state["phase"] != "idle", + phase=state["phase"], + iteration=state["iteration"], + current_task=state.get("current_task"), + last_update=state["last_update"], + ) + + +@router.post("/start") +async def start_cycle(request: StartCycleRequest, background_tasks: BackgroundTasks): + """ + Start a new development cycle. + + The cycle runs in the background and broadcasts progress via WebSocket. + """ + global _active_cycle + + orchestrator = get_orchestrator() + + # Check if already running + if orchestrator.state.phase.value != "idle": + raise HTTPException(status_code=409, detail="A development cycle is already running") + + # Start cycle in background + async def run_cycle(): + global _active_cycle + try: + result = await orchestrator.run_development_cycle( + objective=request.objective, + context=request.context, + max_iterations=request.max_iterations, + ) + _active_cycle = result + + # Broadcast completion + await _send_to_all_clients( + { + "type": "cycle_complete", + "result": { + "objective": result.objective, + "status": result.status, + "iterations": len(result.iterations), + "duration_seconds": result.total_duration_seconds, + }, + } + ) + except Exception as e: + await _send_to_all_clients({"type": "cycle_error", "error": str(e)}) + + background_tasks.add_task(run_cycle) + + return { + "message": "Development cycle started", + "objective": request.objective, + } + + +@router.post("/stop") +async def stop_cycle(): + """Stop the current development cycle.""" + orchestrator = get_orchestrator() + + if orchestrator.state.phase.value == "idle": + raise HTTPException(status_code=400, detail="No active cycle to stop") + + # Set state to idle (will stop at next phase boundary) + orchestrator._update_state(phase=orchestrator.state.phase.__class__.IDLE, task="Stopping...") + + return {"message": "Cycle stop requested"} + + +@router.post("/step") +async def execute_step(request: StepRequest): + """ + Execute a single phase step. + + Useful for manual control or debugging. + """ + orchestrator = get_orchestrator() + + if request.phase == "plan": + objective = request.data.get("objective", "") if request.data else "" + context = request.data.get("context") if request.data else None + result = await orchestrator.step_plan(objective, context) + + elif request.phase == "implement": + plan = request.data if request.data else {} + result = await orchestrator.step_implement(plan) + + elif request.phase == "test": + scenarios = request.data.get("scenarios", []) if request.data else [] + result = await orchestrator.step_test(scenarios) + + elif request.phase == "analyze": + test_results = request.data if request.data else {} + result = await orchestrator.step_analyze(test_results) + + else: + raise HTTPException( + status_code=400, + detail=f"Unknown phase: {request.phase}. Valid: plan, implement, test, analyze", + ) + + return {"phase": request.phase, "result": result} + + +@router.get("/history") +async def get_history(): + """Get history of past development cycles.""" + orchestrator = get_orchestrator() + return orchestrator.export_history() + + +@router.get("/last-cycle") +async def get_last_cycle(): + """Get details of the most recent cycle.""" + global _active_cycle + + if _active_cycle is None: + raise HTTPException(status_code=404, detail="No cycle has been run yet") + + return { + "objective": _active_cycle.objective, + "status": _active_cycle.status, + "start_time": _active_cycle.start_time, + "end_time": _active_cycle.end_time, + "iterations": [ + { + "iteration": it.iteration, + "success": it.success, + "duration_seconds": it.duration_seconds, + "has_plan": it.plan is not None, + "has_tests": it.test_results is not None, + "has_fixes": it.fixes is not None, + } + for it in _active_cycle.iterations + ], + "total_duration_seconds": _active_cycle.total_duration_seconds, + } + + +@router.get("/health") +async def health_check(): + """Check DevLoop system health.""" + orchestrator = get_orchestrator() + + # Check dashboard connection + from optimization_engine.devloop import DashboardTestRunner + + runner = DashboardTestRunner() + dashboard_health = await runner.run_health_check() + + return { + "devloop": "healthy", + "orchestrator_state": orchestrator.get_state()["phase"], + "dashboard": dashboard_health, + } + + +# ============================================================================ +# WebSocket Endpoint +# ============================================================================ + + +@router.websocket("/ws") +async def websocket_endpoint(websocket: WebSocket): + """ + WebSocket endpoint for real-time DevLoop updates. + + Messages sent: + - state_update: Phase/iteration changes + - cycle_complete: Cycle finished + - cycle_error: Cycle failed + - test_progress: Individual test results + """ + await websocket.accept() + _websocket_clients.append(websocket) + + orchestrator = get_orchestrator() + + try: + # Send initial state + await websocket.send_json( + { + "type": "connection_ack", + "state": orchestrator.get_state(), + } + ) + + # Handle incoming messages + while True: + try: + data = await asyncio.wait_for(websocket.receive_json(), timeout=30.0) + + msg_type = data.get("type") + + if msg_type == "ping": + await websocket.send_json({"type": "pong"}) + + elif msg_type == "get_state": + await websocket.send_json( + { + "type": "state", + "state": orchestrator.get_state(), + } + ) + + elif msg_type == "start_cycle": + # Allow starting cycle via WebSocket + objective = data.get("objective", "") + context = data.get("context") + + asyncio.create_task(orchestrator.run_development_cycle(objective, context)) + + await websocket.send_json( + { + "type": "cycle_started", + "objective": objective, + } + ) + + except asyncio.TimeoutError: + # Send heartbeat + await websocket.send_json({"type": "heartbeat"}) + + except WebSocketDisconnect: + pass + finally: + if websocket in _websocket_clients: + _websocket_clients.remove(websocket) + + +# ============================================================================ +# Convenience Endpoints for Common Tasks +# ============================================================================ + + +@router.post("/create-study") +async def create_study_cycle( + study_name: str, + problem_statement: Optional[str] = None, + background_tasks: BackgroundTasks = None, +): + """ + Convenience endpoint to start a study creation cycle. + + This is a common workflow that combines planning, implementation, and testing. + """ + orchestrator = get_orchestrator() + + context = { + "study_name": study_name, + "task_type": "create_study", + } + + if problem_statement: + context["problem_statement"] = problem_statement + + # Start the cycle + async def run_cycle(): + result = await orchestrator.run_development_cycle( + objective=f"Create optimization study: {study_name}", + context=context, + ) + return result + + if background_tasks: + background_tasks.add_task(run_cycle) + return {"message": f"Study creation cycle started for '{study_name}'"} + else: + result = await run_cycle() + return { + "message": f"Study '{study_name}' creation completed", + "status": result.status, + "iterations": len(result.iterations), + } + + +@router.post("/run-tests") +async def run_tests(scenarios: List[Dict[str, Any]]): + """ + Run a set of test scenarios directly. + + Useful for testing specific features without a full cycle. + """ + from optimization_engine.devloop import DashboardTestRunner + + runner = DashboardTestRunner() + results = await runner.run_test_suite(scenarios) + + return results diff --git a/atomizer-dashboard/frontend/src/components/devloop/DevLoopPanel.tsx b/atomizer-dashboard/frontend/src/components/devloop/DevLoopPanel.tsx new file mode 100644 index 00000000..8880664f --- /dev/null +++ b/atomizer-dashboard/frontend/src/components/devloop/DevLoopPanel.tsx @@ -0,0 +1,342 @@ +/** + * DevLoopPanel - Control panel for closed-loop development + * + * Features: + * - Start/stop development cycles + * - Real-time phase monitoring + * - Iteration history view + * - Test result visualization + */ + +import { useState, useEffect, useCallback } from 'react'; +import { + PlayCircle, + StopCircle, + RefreshCw, + CheckCircle, + XCircle, + AlertCircle, + Clock, + ListChecks, + Zap, + ChevronDown, + ChevronRight, +} from 'lucide-react'; +import useWebSocket from 'react-use-websocket'; + +interface LoopState { + phase: string; + iteration: number; + current_task: string | null; + last_update: string; +} + +interface CycleResult { + objective: string; + status: string; + iterations: number; + duration_seconds: number; +} + +interface TestResult { + scenario_id: string; + scenario_name: string; + passed: boolean; + duration_ms: number; + error?: string; +} + +const PHASE_COLORS: Record = { + idle: 'bg-gray-500', + planning: 'bg-blue-500', + implementing: 'bg-purple-500', + testing: 'bg-yellow-500', + analyzing: 'bg-orange-500', + fixing: 'bg-red-500', + verifying: 'bg-green-500', +}; + +const PHASE_ICONS: Record = { + idle: , + planning: , + implementing: , + testing: , + analyzing: , + fixing: , + verifying: , +}; + +export function DevLoopPanel() { + const [state, setState] = useState({ + phase: 'idle', + iteration: 0, + current_task: null, + last_update: new Date().toISOString(), + }); + const [objective, setObjective] = useState(''); + const [history, setHistory] = useState([]); + const [testResults, setTestResults] = useState([]); + const [expanded, setExpanded] = useState(true); + const [isStarting, setIsStarting] = useState(false); + + // WebSocket connection for real-time updates + const { lastJsonMessage, readyState } = useWebSocket( + 'ws://localhost:8000/api/devloop/ws', + { + shouldReconnect: () => true, + reconnectInterval: 3000, + } + ); + + // Handle WebSocket messages + useEffect(() => { + if (!lastJsonMessage) return; + + const msg = lastJsonMessage as any; + + switch (msg.type) { + case 'connection_ack': + case 'state_update': + case 'state': + if (msg.state) { + setState(msg.state); + } + break; + case 'cycle_complete': + setHistory(prev => [msg.result, ...prev].slice(0, 10)); + setIsStarting(false); + break; + case 'cycle_error': + console.error('DevLoop error:', msg.error); + setIsStarting(false); + break; + case 'test_progress': + if (msg.result) { + setTestResults(prev => [...prev, msg.result]); + } + break; + } + }, [lastJsonMessage]); + + // Start a development cycle + const startCycle = useCallback(async () => { + if (!objective.trim()) return; + + setIsStarting(true); + setTestResults([]); + + try { + const response = await fetch('http://localhost:8000/api/devloop/start', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + objective: objective.trim(), + max_iterations: 10, + }), + }); + + if (!response.ok) { + const error = await response.json(); + console.error('Failed to start cycle:', error); + setIsStarting(false); + } + } catch (error) { + console.error('Failed to start cycle:', error); + setIsStarting(false); + } + }, [objective]); + + // Stop the current cycle + const stopCycle = useCallback(async () => { + try { + await fetch('http://localhost:8000/api/devloop/stop', { + method: 'POST', + }); + } catch (error) { + console.error('Failed to stop cycle:', error); + } + }, []); + + // Quick start: Create support_arm study + const quickStartSupportArm = useCallback(() => { + setObjective('Create support_arm optimization study with 5 design variables (center_space, arm_thk, arm_angle, end_thk, base_thk), objectives (minimize displacement, minimize mass), and stress constraint (< 30% yield)'); + // Auto-start after a brief delay + setTimeout(() => { + startCycle(); + }, 500); + }, [startCycle]); + + const isActive = state.phase !== 'idle'; + const wsConnected = readyState === WebSocket.OPEN; + + return ( +
+ {/* Header */} +
setExpanded(!expanded)} + > +
+ {expanded ? ( + + ) : ( + + )} + +

DevLoop Control

+
+ + {/* Status indicator */} +
+
+ + {state.phase.toUpperCase()} + +
+
+ + {expanded && ( +
+ {/* Objective Input */} +
+ +