""" Study State Detector for Atomizer This module provides utilities to detect and summarize the state of an optimization study. Used by Claude sessions to quickly understand study context on initialization. """ import json import sqlite3 from pathlib import Path from typing import Dict, Any, Optional, List from datetime import datetime def detect_study_state(study_dir: Path) -> Dict[str, Any]: """ Detect the current state of an optimization study. Args: study_dir: Path to the study directory Returns: Dictionary with study state information """ study_dir = Path(study_dir) state = { "is_study": False, "study_name": study_dir.name, "status": "unknown", "config": None, "fea_trials": 0, "nn_trials": 0, "pareto_solutions": 0, "best_trial": None, "last_activity": None, "has_turbo_report": False, "has_surrogate": False, "warnings": [], "next_actions": [] } # Check if this is a valid study directory config_path = study_dir / "optimization_config.json" if not config_path.exists(): # Try 1_setup subdirectory config_path = study_dir / "1_setup" / "optimization_config.json" if not config_path.exists(): state["warnings"].append("No optimization_config.json found") return state state["is_study"] = True # Load config try: with open(config_path, 'r') as f: config = json.load(f) state["config"] = _summarize_config(config) except Exception as e: state["warnings"].append(f"Failed to parse config: {e}") # Check results directory results_dir = study_dir / "2_results" if not results_dir.exists(): state["status"] = "not_started" state["next_actions"].append("Run: python run_optimization.py --discover") return state # Check study.db for FEA trials db_path = results_dir / "study.db" if db_path.exists(): fea_stats = _query_study_db(db_path) state.update(fea_stats) # Check nn_study.db for NN trials nn_db_path = results_dir / "nn_study.db" if nn_db_path.exists(): nn_stats = _query_study_db(nn_db_path, prefix="nn_") state["nn_trials"] = nn_stats.get("nn_fea_trials", 0) # Check for turbo report turbo_report_path = results_dir / "turbo_report.json" if turbo_report_path.exists(): state["has_turbo_report"] = True try: with open(turbo_report_path, 'r') as f: turbo = json.load(f) state["turbo_summary"] = { "mode": turbo.get("mode"), "nn_trials": turbo.get("total_nn_trials", 0), "fea_validations": turbo.get("fea_validations", 0), "time_minutes": round(turbo.get("time_minutes", 0), 1) } except Exception: pass # Check for trained surrogate surrogate_path = results_dir / "surrogate.pt" state["has_surrogate"] = surrogate_path.exists() # Determine overall status state["status"] = _determine_status(state) # Suggest next actions state["next_actions"] = _suggest_next_actions(state) return state def _summarize_config(config: Dict) -> Dict[str, Any]: """Extract key information from config.""" # Handle different config formats variables = config.get("design_variables", config.get("variables", [])) objectives = config.get("objectives", []) constraints = config.get("constraints", []) # Get variable names (handle different key names) var_names = [] for v in variables: name = v.get("parameter") or v.get("name") or v.get("expression_name", "unknown") var_names.append(name) # Get objective names obj_names = [] for o in objectives: name = o.get("name") or o.get("metric", "unknown") direction = o.get("goal") or o.get("direction", "minimize") obj_names.append(f"{name} ({direction})") return { "n_variables": len(variables), "n_objectives": len(objectives), "n_constraints": len(constraints), "variable_names": var_names[:5], # First 5 only "objective_names": obj_names, "study_type": "multi_objective" if len(objectives) > 1 else "single_objective" } def _query_study_db(db_path: Path, prefix: str = "") -> Dict[str, Any]: """Query Optuna study database for statistics.""" stats = { f"{prefix}fea_trials": 0, f"{prefix}completed_trials": 0, f"{prefix}failed_trials": 0, f"{prefix}pareto_solutions": 0, "best_trial": None, "last_activity": None } try: conn = sqlite3.connect(str(db_path)) cursor = conn.cursor() # Count trials by state cursor.execute(""" SELECT state, COUNT(*) FROM trials GROUP BY state """) for state, count in cursor.fetchall(): if state == "COMPLETE": stats[f"{prefix}completed_trials"] = count stats[f"{prefix}fea_trials"] = count elif state == "FAIL": stats[f"{prefix}failed_trials"] = count # Get last activity time cursor.execute(""" SELECT MAX(datetime_complete) FROM trials WHERE datetime_complete IS NOT NULL """) result = cursor.fetchone() if result and result[0]: stats["last_activity"] = result[0] # Get best trial (for single objective) cursor.execute(""" SELECT trial_id, value FROM trial_values WHERE objective_id = 0 ORDER BY value ASC LIMIT 1 """) result = cursor.fetchone() if result: stats["best_trial"] = {"trial_id": result[0], "value": result[1]} # Count Pareto solutions (trials with user_attr pareto=True or non-dominated) # Simplified: count distinct trials in trial_values cursor.execute(""" SELECT COUNT(DISTINCT trial_id) FROM trial_values """) result = cursor.fetchone() if result: # For multi-objective, this is a rough estimate stats[f"{prefix}pareto_solutions"] = min(result[0], 50) # Cap at 50 conn.close() except Exception as e: stats["db_error"] = str(e) return stats def _determine_status(state: Dict) -> str: """Determine overall study status.""" if state["fea_trials"] == 0: return "not_started" elif state["fea_trials"] < 3: return "discovery" elif state["fea_trials"] < 10: return "validation" elif state["has_turbo_report"]: return "turbo_complete" elif state["has_surrogate"]: return "training_complete" elif state["fea_trials"] >= 50: return "fea_complete" else: return "in_progress" def _suggest_next_actions(state: Dict) -> List[str]: """Suggest next actions based on study state.""" actions = [] if state["status"] == "not_started": actions.append("Run: python run_optimization.py --discover") elif state["status"] == "discovery": actions.append("Run: python run_optimization.py --validate") elif state["status"] == "validation": actions.append("Run: python run_optimization.py --test") actions.append("Or run full: python run_optimization.py --run --trials 50") elif state["status"] == "in_progress": actions.append("Continue: python run_optimization.py --resume") elif state["status"] == "fea_complete": actions.append("Analyze: python -m optimization_engine.method_selector optimization_config.json 2_results/study.db") actions.append("Or run turbo: python run_nn_optimization.py --turbo") elif state["status"] == "turbo_complete": actions.append("View results in dashboard: cd atomizer-dashboard && npm run dev") actions.append("Generate report: python generate_report.py") return actions def format_study_summary(state: Dict) -> str: """Format study state as a human-readable summary.""" if not state["is_study"]: return f"❌ Not a valid study directory: {state['study_name']}" lines = [ f"📊 **Study: {state['study_name']}**", f"Status: {state['status'].replace('_', ' ').title()}", "" ] if state["config"]: cfg = state["config"] lines.append(f"**Configuration:**") lines.append(f"- Variables: {cfg['n_variables']} ({', '.join(cfg['variable_names'][:3])}{'...' if cfg['n_variables'] > 3 else ''})") lines.append(f"- Objectives: {cfg['n_objectives']} ({', '.join(cfg['objective_names'])})") lines.append(f"- Constraints: {cfg['n_constraints']}") lines.append(f"- Type: {cfg['study_type']}") lines.append("") lines.append("**Progress:**") lines.append(f"- FEA trials: {state['fea_trials']}") if state["nn_trials"] > 0: lines.append(f"- NN trials: {state['nn_trials']}") if state["has_turbo_report"] and "turbo_summary" in state: ts = state["turbo_summary"] lines.append(f"- Turbo mode: {ts['nn_trials']} NN + {ts['fea_validations']} FEA validations ({ts['time_minutes']} min)") if state["last_activity"]: lines.append(f"- Last activity: {state['last_activity']}") lines.append("") if state["next_actions"]: lines.append("**Suggested Next Actions:**") for action in state["next_actions"]: lines.append(f" → {action}") if state["warnings"]: lines.append("") lines.append("**Warnings:**") for warning in state["warnings"]: lines.append(f" ⚠️ {warning}") return "\n".join(lines) def get_all_studies(atomizer_root: Path) -> List[Dict[str, Any]]: """Get state of all studies in the Atomizer studies directory.""" studies_dir = atomizer_root / "studies" if not studies_dir.exists(): return [] studies = [] for study_path in studies_dir.iterdir(): if study_path.is_dir() and not study_path.name.startswith("."): state = detect_study_state(study_path) if state["is_study"]: studies.append(state) # Sort by last activity (most recent first) studies.sort( key=lambda s: s.get("last_activity") or "1970-01-01", reverse=True ) return studies if __name__ == "__main__": import sys if len(sys.argv) > 1: study_path = Path(sys.argv[1]) else: # Default to current directory study_path = Path.cwd() state = detect_study_state(study_path) print(format_study_summary(state))