feat: Add Studio UI, intake system, and extractor improvements

Dashboard:
- Add Studio page with drag-drop model upload and Claude chat
- Add intake system for study creation workflow
- Improve session manager and context builder
- Add intake API routes and frontend components

Optimization Engine:
- Add CLI module for command-line operations
- Add intake module for study preprocessing
- Add validation module with gate checks
- Improve Zernike extractor documentation
- Update spec models with better validation
- Enhance solve_simulation robustness

Documentation:
- Add ATOMIZER_STUDIO.md planning doc
- Add ATOMIZER_UX_SYSTEM.md for UX patterns
- Update extractor library docs
- Add study-readme-generator skill

Tools:
- Add test scripts for extraction validation
- Add Zernike recentering test

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-27 12:02:30 -05:00
parent 3193831340
commit a26914bbe8
56 changed files with 14173 additions and 646 deletions

View File

@@ -1,11 +1,15 @@
"""
Session Manager
Manages persistent Claude Code sessions with MCP integration.
Manages persistent Claude Code sessions with direct file editing.
Fixed for Windows compatibility - uses subprocess.Popen with ThreadPoolExecutor.
Strategy: Claude edits atomizer_spec.json directly using Edit/Write tools
(no MCP dependency for reliability).
"""
import asyncio
import hashlib
import json
import os
import subprocess
@@ -26,6 +30,10 @@ MCP_SERVER_PATH = ATOMIZER_ROOT / "mcp-server" / "atomizer-tools"
# Thread pool for subprocess operations (Windows compatible)
_executor = ThreadPoolExecutor(max_workers=4)
import logging
logger = logging.getLogger(__name__)
@dataclass
class ClaudeSession:
@@ -130,6 +138,7 @@ class SessionManager:
Send a message to a session and stream the response.
Uses synchronous subprocess.Popen via ThreadPoolExecutor for Windows compatibility.
Claude edits atomizer_spec.json directly using Edit/Write tools (no MCP).
Args:
session_id: The session ID
@@ -147,45 +156,48 @@ class SessionManager:
# Store user message
self.store.add_message(session_id, "user", message)
# Get spec path and hash BEFORE Claude runs (to detect changes)
spec_path = self._get_spec_path(session.study_id) if session.study_id else None
spec_hash_before = self._get_file_hash(spec_path) if spec_path else None
# Build context with conversation history AND canvas state
history = self.store.get_history(session_id, limit=10)
full_prompt = self.context_builder.build(
mode=session.mode,
study_id=session.study_id,
conversation_history=history[:-1],
canvas_state=canvas_state, # Pass canvas state for context
canvas_state=canvas_state,
spec_path=str(spec_path) if spec_path else None, # Tell Claude where the spec is
)
full_prompt += f"\n\nUser: {message}\n\nRespond helpfully and concisely:"
# Build CLI arguments
# Build CLI arguments - NO MCP for reliability
cli_args = ["claude", "--print"]
# Ensure MCP config exists
mcp_config_path = ATOMIZER_ROOT / f".claude-mcp-{session_id}.json"
if not mcp_config_path.exists():
mcp_config = self._build_mcp_config(session.mode)
with open(mcp_config_path, "w") as f:
json.dump(mcp_config, f)
cli_args.extend(["--mcp-config", str(mcp_config_path)])
if session.mode == "user":
cli_args.extend([
"--allowedTools",
"Read Write(**/STUDY_REPORT.md) Write(**/3_results/*.md) Bash(python:*) mcp__atomizer-tools__*"
])
# User mode: limited tools
cli_args.extend(
[
"--allowedTools",
"Read Bash(python:*)",
]
)
else:
# Power mode: full access to edit files
cli_args.append("--dangerously-skip-permissions")
cli_args.append("-") # Read from stdin
full_response = ""
tool_calls: List[Dict] = []
process: Optional[subprocess.Popen] = None
try:
loop = asyncio.get_event_loop()
# Run subprocess in thread pool (Windows compatible)
def run_claude():
nonlocal process
try:
process = subprocess.Popen(
cli_args,
@@ -194,8 +206,8 @@ class SessionManager:
stderr=subprocess.PIPE,
cwd=str(ATOMIZER_ROOT),
text=True,
encoding='utf-8',
errors='replace',
encoding="utf-8",
errors="replace",
)
stdout, stderr = process.communicate(input=full_prompt, timeout=300)
return {
@@ -204,10 +216,13 @@ class SessionManager:
"returncode": process.returncode,
}
except subprocess.TimeoutExpired:
process.kill()
if process:
process.kill()
return {"error": "Response timeout (5 minutes)"}
except FileNotFoundError:
return {"error": "Claude CLI not found in PATH. Install with: npm install -g @anthropic-ai/claude-code"}
return {
"error": "Claude CLI not found in PATH. Install with: npm install -g @anthropic-ai/claude-code"
}
except Exception as e:
return {"error": str(e)}
@@ -219,24 +234,14 @@ class SessionManager:
full_response = result["stdout"] or ""
if full_response:
# Check if response contains canvas modifications (from MCP tools)
import logging
logger = logging.getLogger(__name__)
modifications = self._extract_canvas_modifications(full_response)
logger.info(f"[SEND_MSG] Found {len(modifications)} canvas modifications to send")
for mod in modifications:
logger.info(f"[SEND_MSG] Sending canvas_modification: {mod.get('action')} {mod.get('nodeType')}")
yield {"type": "canvas_modification", "modification": mod}
# Always send the text response
# Always send the text response first
yield {"type": "text", "content": full_response}
if result["returncode"] != 0 and result["stderr"]:
yield {"type": "error", "message": f"CLI error: {result['stderr']}"}
logger.warning(f"[SEND_MSG] CLI stderr: {result['stderr']}")
except Exception as e:
logger.error(f"[SEND_MSG] Exception: {e}")
yield {"type": "error", "message": str(e)}
# Store assistant response
@@ -248,8 +253,46 @@ class SessionManager:
tool_calls=tool_calls if tool_calls else None,
)
# Check if spec was modified by comparing hashes
if spec_path and session.mode == "power" and session.study_id:
spec_hash_after = self._get_file_hash(spec_path)
if spec_hash_before != spec_hash_after:
logger.info(f"[SEND_MSG] Spec file was modified! Sending update.")
spec_update = await self._check_spec_updated(session.study_id)
if spec_update:
yield {
"type": "spec_updated",
"spec": spec_update,
"tool": "direct_edit",
"reason": "Claude modified spec file directly",
}
yield {"type": "done", "tool_calls": tool_calls}
def _get_spec_path(self, study_id: str) -> Optional[Path]:
"""Get the atomizer_spec.json path for a study."""
if not study_id:
return None
if study_id.startswith("draft_"):
spec_path = ATOMIZER_ROOT / "studies" / "_inbox" / study_id / "atomizer_spec.json"
else:
spec_path = ATOMIZER_ROOT / "studies" / study_id / "atomizer_spec.json"
if not spec_path.exists():
spec_path = ATOMIZER_ROOT / "studies" / study_id / "1_setup" / "atomizer_spec.json"
return spec_path if spec_path.exists() else None
def _get_file_hash(self, path: Optional[Path]) -> Optional[str]:
"""Get MD5 hash of a file for change detection."""
if not path or not path.exists():
return None
try:
with open(path, "rb") as f:
return hashlib.md5(f.read()).hexdigest()
except Exception:
return None
async def switch_mode(
self,
session_id: str,
@@ -313,6 +356,7 @@ class SessionManager:
"""
import re
import logging
logger = logging.getLogger(__name__)
modifications = []
@@ -327,14 +371,16 @@ class SessionManager:
try:
# Method 1: Look for JSON in code fences
code_block_pattern = r'```(?:json)?\s*([\s\S]*?)```'
code_block_pattern = r"```(?:json)?\s*([\s\S]*?)```"
for match in re.finditer(code_block_pattern, response):
block_content = match.group(1).strip()
try:
obj = json.loads(block_content)
if isinstance(obj, dict) and 'modification' in obj:
logger.info(f"[CANVAS_MOD] Found modification in code fence: {obj['modification']}")
modifications.append(obj['modification'])
if isinstance(obj, dict) and "modification" in obj:
logger.info(
f"[CANVAS_MOD] Found modification in code fence: {obj['modification']}"
)
modifications.append(obj["modification"])
except json.JSONDecodeError:
continue
@@ -342,7 +388,7 @@ class SessionManager:
# This handles nested objects correctly
i = 0
while i < len(response):
if response[i] == '{':
if response[i] == "{":
# Found a potential JSON start, find matching close
brace_count = 1
j = i + 1
@@ -354,14 +400,14 @@ class SessionManager:
if escape_next:
escape_next = False
elif char == '\\':
elif char == "\\":
escape_next = True
elif char == '"' and not escape_next:
in_string = not in_string
elif not in_string:
if char == '{':
if char == "{":
brace_count += 1
elif char == '}':
elif char == "}":
brace_count -= 1
j += 1
@@ -369,11 +415,13 @@ class SessionManager:
potential_json = response[i:j]
try:
obj = json.loads(potential_json)
if isinstance(obj, dict) and 'modification' in obj:
mod = obj['modification']
if isinstance(obj, dict) and "modification" in obj:
mod = obj["modification"]
# Avoid duplicates
if mod not in modifications:
logger.info(f"[CANVAS_MOD] Found inline modification: action={mod.get('action')}, nodeType={mod.get('nodeType')}")
logger.info(
f"[CANVAS_MOD] Found inline modification: action={mod.get('action')}, nodeType={mod.get('nodeType')}"
)
modifications.append(mod)
except json.JSONDecodeError as e:
# Not valid JSON, skip
@@ -388,6 +436,43 @@ class SessionManager:
logger.info(f"[CANVAS_MOD] Extracted {len(modifications)} modification(s)")
return modifications
async def _check_spec_updated(self, study_id: str) -> Optional[Dict]:
"""
Check if the atomizer_spec.json was modified and return the updated spec.
For drafts in _inbox/, we check the spec file directly.
"""
import logging
logger = logging.getLogger(__name__)
try:
# Determine spec path based on study_id
if study_id.startswith("draft_"):
spec_path = ATOMIZER_ROOT / "studies" / "_inbox" / study_id / "atomizer_spec.json"
else:
# Regular study path
spec_path = ATOMIZER_ROOT / "studies" / study_id / "atomizer_spec.json"
if not spec_path.exists():
spec_path = (
ATOMIZER_ROOT / "studies" / study_id / "1_setup" / "atomizer_spec.json"
)
if not spec_path.exists():
logger.debug(f"[SPEC_CHECK] Spec not found at {spec_path}")
return None
# Read and return the spec
with open(spec_path, "r", encoding="utf-8") as f:
spec = json.load(f)
logger.info(f"[SPEC_CHECK] Loaded spec from {spec_path}")
return spec
except Exception as e:
logger.error(f"[SPEC_CHECK] Error checking spec: {e}")
return None
def _build_mcp_config(self, mode: Literal["user", "power"]) -> dict:
"""Build MCP configuration for Claude"""
return {