feat: Add Studio UI, intake system, and extractor improvements

Dashboard: - Add Studio page with drag-drop model upload and Claude chat - Add intake system for study creation workflow - Improve session manager and context builder - Add intake API routes and frontend components Optimization Engine: - Add CLI module for command-line operations - Add intake module for study preprocessing - Add validation module with gate checks - Improve Zernike extractor documentation - Update spec models with better validation - Enhance solve_simulation robustness Documentation: - Add ATOMIZER_STUDIO.md planning doc - Add ATOMIZER_UX_SYSTEM.md for UX patterns - Update extractor library docs - Add study-readme-generator skill Tools: - Add test scripts for extraction validation - Add Zernike recentering test Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-27 12:02:30 -05:00
parent 3193831340
commit a26914bbe8
56 changed files with 14173 additions and 646 deletions
--- a/atomizer-dashboard/backend/api/services/session_manager.py
+++ b/atomizer-dashboard/backend/api/services/session_manager.py
@@ -1,11 +1,15 @@
 """
 Session Manager

-Manages persistent Claude Code sessions with MCP integration.
+Manages persistent Claude Code sessions with direct file editing.
 Fixed for Windows compatibility - uses subprocess.Popen with ThreadPoolExecutor.
+
+Strategy: Claude edits atomizer_spec.json directly using Edit/Write tools
+(no MCP dependency for reliability).
 """

 import asyncio
+import hashlib
 import json
 import os
 import subprocess
@@ -26,6 +30,10 @@ MCP_SERVER_PATH = ATOMIZER_ROOT / "mcp-server" / "atomizer-tools"
 # Thread pool for subprocess operations (Windows compatible)
 _executor = ThreadPoolExecutor(max_workers=4)

+import logging
+
+logger = logging.getLogger(__name__)
+

@dataclass
 class ClaudeSession:
@@ -130,6 +138,7 @@ class SessionManager:
        Send a message to a session and stream the response.

        Uses synchronous subprocess.Popen via ThreadPoolExecutor for Windows compatibility.
+        Claude edits atomizer_spec.json directly using Edit/Write tools (no MCP).

        Args:
            session_id: The session ID
@@ -147,45 +156,48 @@ class SessionManager:
        # Store user message
        self.store.add_message(session_id, "user", message)

+        # Get spec path and hash BEFORE Claude runs (to detect changes)
+        spec_path = self._get_spec_path(session.study_id) if session.study_id else None
+        spec_hash_before = self._get_file_hash(spec_path) if spec_path else None
+
        # Build context with conversation history AND canvas state
        history = self.store.get_history(session_id, limit=10)
        full_prompt = self.context_builder.build(
            mode=session.mode,
            study_id=session.study_id,
            conversation_history=history[:-1],
-            canvas_state=canvas_state,  # Pass canvas state for context
+            canvas_state=canvas_state,
+            spec_path=str(spec_path) if spec_path else None,  # Tell Claude where the spec is
        )
        full_prompt += f"\n\nUser: {message}\n\nRespond helpfully and concisely:"

-        # Build CLI arguments
+        # Build CLI arguments - NO MCP for reliability
        cli_args = ["claude", "--print"]

-        # Ensure MCP config exists
-        mcp_config_path = ATOMIZER_ROOT / f".claude-mcp-{session_id}.json"
-        if not mcp_config_path.exists():
-            mcp_config = self._build_mcp_config(session.mode)
-            with open(mcp_config_path, "w") as f:
-                json.dump(mcp_config, f)
-        cli_args.extend(["--mcp-config", str(mcp_config_path)])
-
        if session.mode == "user":
-            cli_args.extend([
-                "--allowedTools",
-                "Read Write(**/STUDY_REPORT.md) Write(**/3_results/*.md) Bash(python:*) mcp__atomizer-tools__*"
-            ])
+            # User mode: limited tools
+            cli_args.extend(
+                [
+                    "--allowedTools",
+                    "Read Bash(python:*)",
+                ]
+            )
        else:
+            # Power mode: full access to edit files
            cli_args.append("--dangerously-skip-permissions")

        cli_args.append("-")  # Read from stdin

        full_response = ""
        tool_calls: List[Dict] = []
+        process: Optional[subprocess.Popen] = None

        try:
            loop = asyncio.get_event_loop()

            # Run subprocess in thread pool (Windows compatible)
            def run_claude():
+                nonlocal process
                try:
                    process = subprocess.Popen(
                        cli_args,
@@ -194,8 +206,8 @@ class SessionManager:
                        stderr=subprocess.PIPE,
                        cwd=str(ATOMIZER_ROOT),
                        text=True,
-                        encoding='utf-8',
-                        errors='replace',
+                        encoding="utf-8",
+                        errors="replace",
                    )
                    stdout, stderr = process.communicate(input=full_prompt, timeout=300)
                    return {
@@ -204,10 +216,13 @@ class SessionManager:
                        "returncode": process.returncode,
                    }
                except subprocess.TimeoutExpired:
-                    process.kill()
+                    if process:
+                        process.kill()
                    return {"error": "Response timeout (5 minutes)"}
                except FileNotFoundError:
-                    return {"error": "Claude CLI not found in PATH. Install with: npm install -g @anthropic-ai/claude-code"}
+                    return {
+                        "error": "Claude CLI not found in PATH. Install with: npm install -g @anthropic-ai/claude-code"
+                    }
                except Exception as e:
                    return {"error": str(e)}

@@ -219,24 +234,14 @@ class SessionManager:
                full_response = result["stdout"] or ""

                if full_response:
-                    # Check if response contains canvas modifications (from MCP tools)
-                    import logging
-                    logger = logging.getLogger(__name__)
-
-                    modifications = self._extract_canvas_modifications(full_response)
-                    logger.info(f"[SEND_MSG] Found {len(modifications)} canvas modifications to send")
-
-                    for mod in modifications:
-                        logger.info(f"[SEND_MSG] Sending canvas_modification: {mod.get('action')} {mod.get('nodeType')}")
-                        yield {"type": "canvas_modification", "modification": mod}
-
-                    # Always send the text response
+                    # Always send the text response first
                    yield {"type": "text", "content": full_response}

                if result["returncode"] != 0 and result["stderr"]:
-                    yield {"type": "error", "message": f"CLI error: {result['stderr']}"}
+                    logger.warning(f"[SEND_MSG] CLI stderr: {result['stderr']}")

        except Exception as e:
+            logger.error(f"[SEND_MSG] Exception: {e}")
            yield {"type": "error", "message": str(e)}

        # Store assistant response
@@ -248,8 +253,46 @@ class SessionManager:
                tool_calls=tool_calls if tool_calls else None,
            )

+        # Check if spec was modified by comparing hashes
+        if spec_path and session.mode == "power" and session.study_id:
+            spec_hash_after = self._get_file_hash(spec_path)
+            if spec_hash_before != spec_hash_after:
+                logger.info(f"[SEND_MSG] Spec file was modified! Sending update.")
+                spec_update = await self._check_spec_updated(session.study_id)
+                if spec_update:
+                    yield {
+                        "type": "spec_updated",
+                        "spec": spec_update,
+                        "tool": "direct_edit",
+                        "reason": "Claude modified spec file directly",
+                    }
+
        yield {"type": "done", "tool_calls": tool_calls}

+    def _get_spec_path(self, study_id: str) -> Optional[Path]:
+        """Get the atomizer_spec.json path for a study."""
+        if not study_id:
+            return None
+
+        if study_id.startswith("draft_"):
+            spec_path = ATOMIZER_ROOT / "studies" / "_inbox" / study_id / "atomizer_spec.json"
+        else:
+            spec_path = ATOMIZER_ROOT / "studies" / study_id / "atomizer_spec.json"
+            if not spec_path.exists():
+                spec_path = ATOMIZER_ROOT / "studies" / study_id / "1_setup" / "atomizer_spec.json"
+
+        return spec_path if spec_path.exists() else None
+
+    def _get_file_hash(self, path: Optional[Path]) -> Optional[str]:
+        """Get MD5 hash of a file for change detection."""
+        if not path or not path.exists():
+            return None
+        try:
+            with open(path, "rb") as f:
+                return hashlib.md5(f.read()).hexdigest()
+        except Exception:
+            return None
+
    async def switch_mode(
        self,
        session_id: str,
@@ -313,6 +356,7 @@ class SessionManager:
        """
        import re
        import logging
+
        logger = logging.getLogger(__name__)

        modifications = []
@@ -327,14 +371,16 @@ class SessionManager:

        try:
            # Method 1: Look for JSON in code fences
-            code_block_pattern = r'```(?:json)?\s*([\s\S]*?)```'
+            code_block_pattern = r"```(?:json)?\s*([\s\S]*?)```"
            for match in re.finditer(code_block_pattern, response):
                block_content = match.group(1).strip()
                try:
                    obj = json.loads(block_content)
-                    if isinstance(obj, dict) and 'modification' in obj:
-                        logger.info(f"[CANVAS_MOD] Found modification in code fence: {obj['modification']}")
-                        modifications.append(obj['modification'])
+                    if isinstance(obj, dict) and "modification" in obj:
+                        logger.info(
+                            f"[CANVAS_MOD] Found modification in code fence: {obj['modification']}"
+                        )
+                        modifications.append(obj["modification"])
                except json.JSONDecodeError:
                    continue

@@ -342,7 +388,7 @@ class SessionManager:
            # This handles nested objects correctly
            i = 0
            while i < len(response):
-                if response[i] == '{':
+                if response[i] == "{":
                    # Found a potential JSON start, find matching close
                    brace_count = 1
                    j = i + 1
@@ -354,14 +400,14 @@ class SessionManager:

                        if escape_next:
                            escape_next = False
-                        elif char == '\\':
+                        elif char == "\\":
                            escape_next = True
                        elif char == '"' and not escape_next:
                            in_string = not in_string
                        elif not in_string:
-                            if char == '{':
+                            if char == "{":
                                brace_count += 1
-                            elif char == '}':
+                            elif char == "}":
                                brace_count -= 1
                        j += 1

@@ -369,11 +415,13 @@ class SessionManager:
                        potential_json = response[i:j]
                        try:
                            obj = json.loads(potential_json)
-                            if isinstance(obj, dict) and 'modification' in obj:
-                                mod = obj['modification']
+                            if isinstance(obj, dict) and "modification" in obj:
+                                mod = obj["modification"]
                                # Avoid duplicates
                                if mod not in modifications:
-                                    logger.info(f"[CANVAS_MOD] Found inline modification: action={mod.get('action')}, nodeType={mod.get('nodeType')}")
+                                    logger.info(
+                                        f"[CANVAS_MOD] Found inline modification: action={mod.get('action')}, nodeType={mod.get('nodeType')}"
+                                    )
                                    modifications.append(mod)
                        except json.JSONDecodeError as e:
                            # Not valid JSON, skip
@@ -388,6 +436,43 @@ class SessionManager:
        logger.info(f"[CANVAS_MOD] Extracted {len(modifications)} modification(s)")
        return modifications

+    async def _check_spec_updated(self, study_id: str) -> Optional[Dict]:
+        """
+        Check if the atomizer_spec.json was modified and return the updated spec.
+
+        For drafts in _inbox/, we check the spec file directly.
+        """
+        import logging
+
+        logger = logging.getLogger(__name__)
+
+        try:
+            # Determine spec path based on study_id
+            if study_id.startswith("draft_"):
+                spec_path = ATOMIZER_ROOT / "studies" / "_inbox" / study_id / "atomizer_spec.json"
+            else:
+                # Regular study path
+                spec_path = ATOMIZER_ROOT / "studies" / study_id / "atomizer_spec.json"
+                if not spec_path.exists():
+                    spec_path = (
+                        ATOMIZER_ROOT / "studies" / study_id / "1_setup" / "atomizer_spec.json"
+                    )
+
+            if not spec_path.exists():
+                logger.debug(f"[SPEC_CHECK] Spec not found at {spec_path}")
+                return None
+
+            # Read and return the spec
+            with open(spec_path, "r", encoding="utf-8") as f:
+                spec = json.load(f)
+
+            logger.info(f"[SPEC_CHECK] Loaded spec from {spec_path}")
+            return spec
+
+        except Exception as e:
+            logger.error(f"[SPEC_CHECK] Error checking spec: {e}")
+            return None
+
    def _build_mcp_config(self, mode: Literal["user", "power"]) -> dict:
        """Build MCP configuration for Claude"""
        return {