feat: Add panel management, validation, and error handling to canvas

Phase 1 - Panel Management System:
- Create usePanelStore.ts for centralized panel state management
- Add PanelContainer.tsx for draggable floating panels
- Create FloatingIntrospectionPanel.tsx (persistent, doesn't disappear on node click)
- Create ResultsPanel.tsx for trial result details
- Refactor NodeConfigPanelV2 to use panel store for introspection
- Integrate PanelContainer into CanvasView

Phase 2 - Pre-run Validation:
- Create specValidator.ts with comprehensive validation rules
- Add ValidationPanel (enhanced version with error navigation)
- Add Validate button to SpecRenderer with status indicator
- Block run if validation fails
- Check for: design vars, objectives, extractors, bounds, connections

Phase 3 - Error Handling & Recovery:
- Create ErrorPanel.tsx for displaying optimization errors
- Add error classification (nx_crash, solver_fail, extractor_error, etc.)
- Add recovery suggestions based on error type
- Update status endpoint to return error info
- Add _get_study_error_info helper to check error_status.json and DB
- Integrate error detection into status polling

Documentation:
- Add CANVAS_ROBUSTNESS_PLAN.md with full implementation plan
This commit is contained in:
2026-01-21 21:35:31 -05:00
parent e1c59a51c1
commit c224b16ac3
12 changed files with 2853 additions and 29 deletions

View File

@@ -15,6 +15,7 @@ import shutil
import subprocess
import psutil
import signal
import time
from datetime import datetime
# Add project root to path
@@ -155,6 +156,93 @@ def get_accurate_study_status(
return "paused"
def _get_study_error_info(study_dir: Path, results_dir: Path) -> dict:
"""Get error information from study if any errors occurred.
Checks for:
1. error_status.json file (written by optimization process on error)
2. Failed trials in database
3. Error logs
Returns:
dict with keys: error, error_details, error_timestamp, current_trial, status_override
"""
error_info = {}
# Check for error_status.json (written by optimization process)
error_file = results_dir / "error_status.json"
if error_file.exists():
try:
with open(error_file) as f:
error_data = json.load(f)
error_info["error"] = error_data.get("error", "Unknown error")
error_info["error_details"] = error_data.get("details")
error_info["error_timestamp"] = error_data.get("timestamp")
error_info["current_trial"] = error_data.get("trial")
# If error is recent (within last 5 minutes), set status to failed
if error_data.get("timestamp"):
error_age = time.time() - error_data["timestamp"]
if error_age < 300: # 5 minutes
error_info["status_override"] = "failed"
except Exception:
pass
# Check for failed trials in database
study_db = results_dir / "study.db"
if study_db.exists() and "error" not in error_info:
try:
conn = sqlite3.connect(str(study_db), timeout=2.0)
cursor = conn.cursor()
# Check for FAIL state trials (Optuna uses 'FAIL' not 'FAILED')
cursor.execute("""
SELECT number, datetime_complete
FROM trials
WHERE state = 'FAIL'
ORDER BY datetime_complete DESC
LIMIT 1
""")
failed = cursor.fetchone()
if failed:
trial_number, fail_time = failed
error_info["error"] = f"Trial {trial_number} failed"
error_info["current_trial"] = trial_number
# Parse datetime to timestamp if available
if fail_time:
try:
from datetime import datetime
dt = datetime.fromisoformat(fail_time)
error_info["error_timestamp"] = dt.timestamp()
except Exception:
error_info["error_timestamp"] = int(time.time())
conn.close()
except Exception:
pass
# Check optimization log for errors
log_file = results_dir / "optimization.log"
if log_file.exists() and "error" not in error_info:
try:
# Read last 50 lines of log
with open(log_file, "r") as f:
lines = f.readlines()[-50:]
for line in reversed(lines):
line_lower = line.lower()
if "error" in line_lower or "failed" in line_lower or "exception" in line_lower:
error_info["error"] = line.strip()[:200] # Truncate long messages
error_info["error_timestamp"] = int(log_file.stat().st_mtime)
break
except Exception:
pass
return error_info
def _load_study_info(study_dir: Path, topic: Optional[str] = None) -> Optional[dict]:
"""Load study info from a study directory. Returns None if not a valid study."""
# Look for optimization config (check multiple locations)
@@ -394,9 +482,12 @@ async def get_study_status(study_id: str):
total_trials = config.get("optimization_settings", {}).get("n_trials", 50)
status = get_accurate_study_status(study_id, trial_count, total_trials, True)
# Check for error status
error_info = _get_study_error_info(study_dir, results_dir)
return {
"study_id": study_id,
"status": status,
"status": error_info.get("status_override") or status,
"progress": {
"current": trial_count,
"total": total_trials,
@@ -405,6 +496,10 @@ async def get_study_status(study_id: str):
"best_trial": best_trial,
"pruned_trials": pruned_count,
"config": config,
"error": error_info.get("error"),
"error_details": error_info.get("error_details"),
"error_timestamp": error_info.get("error_timestamp"),
"current_trial": error_info.get("current_trial"),
}
# Legacy: Read from JSON history
@@ -437,9 +532,12 @@ async def get_study_status(study_id: str):
status = "completed" if trial_count >= total_trials else "running"
# Check for error status
error_info = _get_study_error_info(study_dir, results_dir)
return {
"study_id": study_id,
"status": status,
"status": error_info.get("status_override") or status,
"progress": {
"current": trial_count,
"total": total_trials,
@@ -448,6 +546,10 @@ async def get_study_status(study_id: str):
"best_trial": best_trial,
"pruned_trials": pruned_count,
"config": config,
"error": error_info.get("error"),
"error_details": error_info.get("error_details"),
"error_timestamp": error_info.get("error_timestamp"),
"current_trial": error_info.get("current_trial"),
}
except FileNotFoundError: