Files
Atomizer/optimization_engine/study/state.py

323 lines
10 KiB
Python
Raw Normal View History

"""
Study State Detector for Atomizer
This module provides utilities to detect and summarize the state of an optimization study.
Used by Claude sessions to quickly understand study context on initialization.
"""
import json
import sqlite3
from pathlib import Path
from typing import Dict, Any, Optional, List
from datetime import datetime
def detect_study_state(study_dir: Path) -> Dict[str, Any]:
"""
Detect the current state of an optimization study.
Args:
study_dir: Path to the study directory
Returns:
Dictionary with study state information
"""
study_dir = Path(study_dir)
state = {
"is_study": False,
"study_name": study_dir.name,
"status": "unknown",
"config": None,
"fea_trials": 0,
"nn_trials": 0,
"pareto_solutions": 0,
"best_trial": None,
"last_activity": None,
"has_turbo_report": False,
"has_surrogate": False,
"warnings": [],
"next_actions": []
}
# Check if this is a valid study directory
config_path = study_dir / "optimization_config.json"
if not config_path.exists():
# Try 1_setup subdirectory
config_path = study_dir / "1_setup" / "optimization_config.json"
if not config_path.exists():
state["warnings"].append("No optimization_config.json found")
return state
state["is_study"] = True
# Load config
try:
with open(config_path, 'r') as f:
config = json.load(f)
state["config"] = _summarize_config(config)
except Exception as e:
state["warnings"].append(f"Failed to parse config: {e}")
# Check results directory
results_dir = study_dir / "2_results"
if not results_dir.exists():
state["status"] = "not_started"
state["next_actions"].append("Run: python run_optimization.py --discover")
return state
# Check study.db for FEA trials
db_path = results_dir / "study.db"
if db_path.exists():
fea_stats = _query_study_db(db_path)
state.update(fea_stats)
# Check nn_study.db for NN trials
nn_db_path = results_dir / "nn_study.db"
if nn_db_path.exists():
nn_stats = _query_study_db(nn_db_path, prefix="nn_")
state["nn_trials"] = nn_stats.get("nn_fea_trials", 0)
# Check for turbo report
turbo_report_path = results_dir / "turbo_report.json"
if turbo_report_path.exists():
state["has_turbo_report"] = True
try:
with open(turbo_report_path, 'r') as f:
turbo = json.load(f)
state["turbo_summary"] = {
"mode": turbo.get("mode"),
"nn_trials": turbo.get("total_nn_trials", 0),
"fea_validations": turbo.get("fea_validations", 0),
"time_minutes": round(turbo.get("time_minutes", 0), 1)
}
except Exception:
pass
# Check for trained surrogate
surrogate_path = results_dir / "surrogate.pt"
state["has_surrogate"] = surrogate_path.exists()
# Determine overall status
state["status"] = _determine_status(state)
# Suggest next actions
state["next_actions"] = _suggest_next_actions(state)
return state
def _summarize_config(config: Dict) -> Dict[str, Any]:
"""Extract key information from config."""
# Handle different config formats
variables = config.get("design_variables", config.get("variables", []))
objectives = config.get("objectives", [])
constraints = config.get("constraints", [])
# Get variable names (handle different key names)
var_names = []
for v in variables:
name = v.get("parameter") or v.get("name") or v.get("expression_name", "unknown")
var_names.append(name)
# Get objective names
obj_names = []
for o in objectives:
name = o.get("name") or o.get("metric", "unknown")
direction = o.get("goal") or o.get("direction", "minimize")
obj_names.append(f"{name} ({direction})")
return {
"n_variables": len(variables),
"n_objectives": len(objectives),
"n_constraints": len(constraints),
"variable_names": var_names[:5], # First 5 only
"objective_names": obj_names,
"study_type": "multi_objective" if len(objectives) > 1 else "single_objective"
}
def _query_study_db(db_path: Path, prefix: str = "") -> Dict[str, Any]:
"""Query Optuna study database for statistics."""
stats = {
f"{prefix}fea_trials": 0,
f"{prefix}completed_trials": 0,
f"{prefix}failed_trials": 0,
f"{prefix}pareto_solutions": 0,
"best_trial": None,
"last_activity": None
}
try:
conn = sqlite3.connect(str(db_path))
cursor = conn.cursor()
# Count trials by state
cursor.execute("""
SELECT state, COUNT(*) FROM trials
GROUP BY state
""")
for state, count in cursor.fetchall():
if state == "COMPLETE":
stats[f"{prefix}completed_trials"] = count
stats[f"{prefix}fea_trials"] = count
elif state == "FAIL":
stats[f"{prefix}failed_trials"] = count
# Get last activity time
cursor.execute("""
SELECT MAX(datetime_complete) FROM trials
WHERE datetime_complete IS NOT NULL
""")
result = cursor.fetchone()
if result and result[0]:
stats["last_activity"] = result[0]
# Get best trial (for single objective)
cursor.execute("""
SELECT trial_id, value FROM trial_values
WHERE objective_id = 0
ORDER BY value ASC
LIMIT 1
""")
result = cursor.fetchone()
if result:
stats["best_trial"] = {"trial_id": result[0], "value": result[1]}
# Count Pareto solutions (trials with user_attr pareto=True or non-dominated)
# Simplified: count distinct trials in trial_values
cursor.execute("""
SELECT COUNT(DISTINCT trial_id) FROM trial_values
""")
result = cursor.fetchone()
if result:
# For multi-objective, this is a rough estimate
stats[f"{prefix}pareto_solutions"] = min(result[0], 50) # Cap at 50
conn.close()
except Exception as e:
stats["db_error"] = str(e)
return stats
def _determine_status(state: Dict) -> str:
"""Determine overall study status."""
if state["fea_trials"] == 0:
return "not_started"
elif state["fea_trials"] < 3:
return "discovery"
elif state["fea_trials"] < 10:
return "validation"
elif state["has_turbo_report"]:
return "turbo_complete"
elif state["has_surrogate"]:
return "training_complete"
elif state["fea_trials"] >= 50:
return "fea_complete"
else:
return "in_progress"
def _suggest_next_actions(state: Dict) -> List[str]:
"""Suggest next actions based on study state."""
actions = []
if state["status"] == "not_started":
actions.append("Run: python run_optimization.py --discover")
elif state["status"] == "discovery":
actions.append("Run: python run_optimization.py --validate")
elif state["status"] == "validation":
actions.append("Run: python run_optimization.py --test")
actions.append("Or run full: python run_optimization.py --run --trials 50")
elif state["status"] == "in_progress":
actions.append("Continue: python run_optimization.py --resume")
elif state["status"] == "fea_complete":
actions.append("Analyze: python -m optimization_engine.method_selector optimization_config.json 2_results/study.db")
actions.append("Or run turbo: python run_nn_optimization.py --turbo")
elif state["status"] == "turbo_complete":
actions.append("View results in dashboard: cd atomizer-dashboard && npm run dev")
actions.append("Generate report: python generate_report.py")
return actions
def format_study_summary(state: Dict) -> str:
"""Format study state as a human-readable summary."""
if not state["is_study"]:
return f"❌ Not a valid study directory: {state['study_name']}"
lines = [
f"📊 **Study: {state['study_name']}**",
f"Status: {state['status'].replace('_', ' ').title()}",
""
]
if state["config"]:
cfg = state["config"]
lines.append(f"**Configuration:**")
lines.append(f"- Variables: {cfg['n_variables']} ({', '.join(cfg['variable_names'][:3])}{'...' if cfg['n_variables'] > 3 else ''})")
lines.append(f"- Objectives: {cfg['n_objectives']} ({', '.join(cfg['objective_names'])})")
lines.append(f"- Constraints: {cfg['n_constraints']}")
lines.append(f"- Type: {cfg['study_type']}")
lines.append("")
lines.append("**Progress:**")
lines.append(f"- FEA trials: {state['fea_trials']}")
if state["nn_trials"] > 0:
lines.append(f"- NN trials: {state['nn_trials']}")
if state["has_turbo_report"] and "turbo_summary" in state:
ts = state["turbo_summary"]
lines.append(f"- Turbo mode: {ts['nn_trials']} NN + {ts['fea_validations']} FEA validations ({ts['time_minutes']} min)")
if state["last_activity"]:
lines.append(f"- Last activity: {state['last_activity']}")
lines.append("")
if state["next_actions"]:
lines.append("**Suggested Next Actions:**")
for action in state["next_actions"]:
lines.append(f"{action}")
if state["warnings"]:
lines.append("")
lines.append("**Warnings:**")
for warning in state["warnings"]:
lines.append(f" ⚠️ {warning}")
return "\n".join(lines)
def get_all_studies(atomizer_root: Path) -> List[Dict[str, Any]]:
"""Get state of all studies in the Atomizer studies directory."""
studies_dir = atomizer_root / "studies"
if not studies_dir.exists():
return []
studies = []
for study_path in studies_dir.iterdir():
if study_path.is_dir() and not study_path.name.startswith("."):
state = detect_study_state(study_path)
if state["is_study"]:
studies.append(state)
# Sort by last activity (most recent first)
studies.sort(
key=lambda s: s.get("last_activity") or "1970-01-01",
reverse=True
)
return studies
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
study_path = Path(sys.argv[1])
else:
# Default to current directory
study_path = Path.cwd()
state = detect_study_state(study_path)
print(format_study_summary(state))