feat: Add panel management, validation, and error handling to canvas
Phase 1 - Panel Management System: - Create usePanelStore.ts for centralized panel state management - Add PanelContainer.tsx for draggable floating panels - Create FloatingIntrospectionPanel.tsx (persistent, doesn't disappear on node click) - Create ResultsPanel.tsx for trial result details - Refactor NodeConfigPanelV2 to use panel store for introspection - Integrate PanelContainer into CanvasView Phase 2 - Pre-run Validation: - Create specValidator.ts with comprehensive validation rules - Add ValidationPanel (enhanced version with error navigation) - Add Validate button to SpecRenderer with status indicator - Block run if validation fails - Check for: design vars, objectives, extractors, bounds, connections Phase 3 - Error Handling & Recovery: - Create ErrorPanel.tsx for displaying optimization errors - Add error classification (nx_crash, solver_fail, extractor_error, etc.) - Add recovery suggestions based on error type - Update status endpoint to return error info - Add _get_study_error_info helper to check error_status.json and DB - Integrate error detection into status polling Documentation: - Add CANVAS_ROBUSTNESS_PLAN.md with full implementation plan
This commit is contained in:
@@ -15,6 +15,7 @@ import shutil
|
||||
import subprocess
|
||||
import psutil
|
||||
import signal
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
# Add project root to path
|
||||
@@ -155,6 +156,93 @@ def get_accurate_study_status(
|
||||
return "paused"
|
||||
|
||||
|
||||
def _get_study_error_info(study_dir: Path, results_dir: Path) -> dict:
|
||||
"""Get error information from study if any errors occurred.
|
||||
|
||||
Checks for:
|
||||
1. error_status.json file (written by optimization process on error)
|
||||
2. Failed trials in database
|
||||
3. Error logs
|
||||
|
||||
Returns:
|
||||
dict with keys: error, error_details, error_timestamp, current_trial, status_override
|
||||
"""
|
||||
error_info = {}
|
||||
|
||||
# Check for error_status.json (written by optimization process)
|
||||
error_file = results_dir / "error_status.json"
|
||||
if error_file.exists():
|
||||
try:
|
||||
with open(error_file) as f:
|
||||
error_data = json.load(f)
|
||||
error_info["error"] = error_data.get("error", "Unknown error")
|
||||
error_info["error_details"] = error_data.get("details")
|
||||
error_info["error_timestamp"] = error_data.get("timestamp")
|
||||
error_info["current_trial"] = error_data.get("trial")
|
||||
|
||||
# If error is recent (within last 5 minutes), set status to failed
|
||||
if error_data.get("timestamp"):
|
||||
error_age = time.time() - error_data["timestamp"]
|
||||
if error_age < 300: # 5 minutes
|
||||
error_info["status_override"] = "failed"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check for failed trials in database
|
||||
study_db = results_dir / "study.db"
|
||||
if study_db.exists() and "error" not in error_info:
|
||||
try:
|
||||
conn = sqlite3.connect(str(study_db), timeout=2.0)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check for FAIL state trials (Optuna uses 'FAIL' not 'FAILED')
|
||||
cursor.execute("""
|
||||
SELECT number, datetime_complete
|
||||
FROM trials
|
||||
WHERE state = 'FAIL'
|
||||
ORDER BY datetime_complete DESC
|
||||
LIMIT 1
|
||||
""")
|
||||
failed = cursor.fetchone()
|
||||
|
||||
if failed:
|
||||
trial_number, fail_time = failed
|
||||
error_info["error"] = f"Trial {trial_number} failed"
|
||||
error_info["current_trial"] = trial_number
|
||||
# Parse datetime to timestamp if available
|
||||
if fail_time:
|
||||
try:
|
||||
from datetime import datetime
|
||||
|
||||
dt = datetime.fromisoformat(fail_time)
|
||||
error_info["error_timestamp"] = dt.timestamp()
|
||||
except Exception:
|
||||
error_info["error_timestamp"] = int(time.time())
|
||||
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check optimization log for errors
|
||||
log_file = results_dir / "optimization.log"
|
||||
if log_file.exists() and "error" not in error_info:
|
||||
try:
|
||||
# Read last 50 lines of log
|
||||
with open(log_file, "r") as f:
|
||||
lines = f.readlines()[-50:]
|
||||
|
||||
for line in reversed(lines):
|
||||
line_lower = line.lower()
|
||||
if "error" in line_lower or "failed" in line_lower or "exception" in line_lower:
|
||||
error_info["error"] = line.strip()[:200] # Truncate long messages
|
||||
error_info["error_timestamp"] = int(log_file.stat().st_mtime)
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return error_info
|
||||
|
||||
|
||||
def _load_study_info(study_dir: Path, topic: Optional[str] = None) -> Optional[dict]:
|
||||
"""Load study info from a study directory. Returns None if not a valid study."""
|
||||
# Look for optimization config (check multiple locations)
|
||||
@@ -394,9 +482,12 @@ async def get_study_status(study_id: str):
|
||||
total_trials = config.get("optimization_settings", {}).get("n_trials", 50)
|
||||
status = get_accurate_study_status(study_id, trial_count, total_trials, True)
|
||||
|
||||
# Check for error status
|
||||
error_info = _get_study_error_info(study_dir, results_dir)
|
||||
|
||||
return {
|
||||
"study_id": study_id,
|
||||
"status": status,
|
||||
"status": error_info.get("status_override") or status,
|
||||
"progress": {
|
||||
"current": trial_count,
|
||||
"total": total_trials,
|
||||
@@ -405,6 +496,10 @@ async def get_study_status(study_id: str):
|
||||
"best_trial": best_trial,
|
||||
"pruned_trials": pruned_count,
|
||||
"config": config,
|
||||
"error": error_info.get("error"),
|
||||
"error_details": error_info.get("error_details"),
|
||||
"error_timestamp": error_info.get("error_timestamp"),
|
||||
"current_trial": error_info.get("current_trial"),
|
||||
}
|
||||
|
||||
# Legacy: Read from JSON history
|
||||
@@ -437,9 +532,12 @@ async def get_study_status(study_id: str):
|
||||
|
||||
status = "completed" if trial_count >= total_trials else "running"
|
||||
|
||||
# Check for error status
|
||||
error_info = _get_study_error_info(study_dir, results_dir)
|
||||
|
||||
return {
|
||||
"study_id": study_id,
|
||||
"status": status,
|
||||
"status": error_info.get("status_override") or status,
|
||||
"progress": {
|
||||
"current": trial_count,
|
||||
"total": total_trials,
|
||||
@@ -448,6 +546,10 @@ async def get_study_status(study_id: str):
|
||||
"best_trial": best_trial,
|
||||
"pruned_trials": pruned_count,
|
||||
"config": config,
|
||||
"error": error_info.get("error"),
|
||||
"error_details": error_info.get("error_details"),
|
||||
"error_timestamp": error_info.get("error_timestamp"),
|
||||
"current_trial": error_info.get("current_trial"),
|
||||
}
|
||||
|
||||
except FileNotFoundError:
|
||||
|
||||
Reference in New Issue
Block a user