Phase 1 - Session Bootstrap: - Add .claude/ATOMIZER_CONTEXT.md as single entry point for new sessions - Add study state detection and task routing Phase 2 - Code Deduplication: - Add optimization_engine/base_runner.py (ConfigDrivenRunner) - Add optimization_engine/generic_surrogate.py (ConfigDrivenSurrogate) - Add optimization_engine/study_state.py for study detection - Add optimization_engine/templates/ with registry and templates - Studies now require ~50 lines instead of ~300 Phase 3 - Skill Consolidation: - Add YAML frontmatter metadata to all skills (versioning, dependencies) - Consolidate create-study.md into core/study-creation-core.md - Update 00_BOOTSTRAP.md, 01_CHEATSHEET.md, 02_CONTEXT_LOADER.md Phase 4 - Self-Expanding Knowledge: - Add optimization_engine/auto_doc.py for auto-generating documentation - Generate docs/generated/EXTRACTORS.md (27 extractors documented) - Generate docs/generated/TEMPLATES.md (6 templates) - Generate docs/generated/EXTRACTOR_CHEATSHEET.md Phase 5 - Subagent Implementation: - Add .claude/commands/study-builder.md (create studies) - Add .claude/commands/nx-expert.md (NX Open API) - Add .claude/commands/protocol-auditor.md (config validation) - Add .claude/commands/results-analyzer.md (results analysis) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
323 lines
10 KiB
Python
323 lines
10 KiB
Python
"""
|
|
Study State Detector for Atomizer
|
|
|
|
This module provides utilities to detect and summarize the state of an optimization study.
|
|
Used by Claude sessions to quickly understand study context on initialization.
|
|
"""
|
|
|
|
import json
|
|
import sqlite3
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional, List
|
|
from datetime import datetime
|
|
|
|
|
|
def detect_study_state(study_dir: Path) -> Dict[str, Any]:
|
|
"""
|
|
Detect the current state of an optimization study.
|
|
|
|
Args:
|
|
study_dir: Path to the study directory
|
|
|
|
Returns:
|
|
Dictionary with study state information
|
|
"""
|
|
study_dir = Path(study_dir)
|
|
state = {
|
|
"is_study": False,
|
|
"study_name": study_dir.name,
|
|
"status": "unknown",
|
|
"config": None,
|
|
"fea_trials": 0,
|
|
"nn_trials": 0,
|
|
"pareto_solutions": 0,
|
|
"best_trial": None,
|
|
"last_activity": None,
|
|
"has_turbo_report": False,
|
|
"has_surrogate": False,
|
|
"warnings": [],
|
|
"next_actions": []
|
|
}
|
|
|
|
# Check if this is a valid study directory
|
|
config_path = study_dir / "optimization_config.json"
|
|
if not config_path.exists():
|
|
# Try 1_setup subdirectory
|
|
config_path = study_dir / "1_setup" / "optimization_config.json"
|
|
|
|
if not config_path.exists():
|
|
state["warnings"].append("No optimization_config.json found")
|
|
return state
|
|
|
|
state["is_study"] = True
|
|
|
|
# Load config
|
|
try:
|
|
with open(config_path, 'r') as f:
|
|
config = json.load(f)
|
|
state["config"] = _summarize_config(config)
|
|
except Exception as e:
|
|
state["warnings"].append(f"Failed to parse config: {e}")
|
|
|
|
# Check results directory
|
|
results_dir = study_dir / "2_results"
|
|
if not results_dir.exists():
|
|
state["status"] = "not_started"
|
|
state["next_actions"].append("Run: python run_optimization.py --discover")
|
|
return state
|
|
|
|
# Check study.db for FEA trials
|
|
db_path = results_dir / "study.db"
|
|
if db_path.exists():
|
|
fea_stats = _query_study_db(db_path)
|
|
state.update(fea_stats)
|
|
|
|
# Check nn_study.db for NN trials
|
|
nn_db_path = results_dir / "nn_study.db"
|
|
if nn_db_path.exists():
|
|
nn_stats = _query_study_db(nn_db_path, prefix="nn_")
|
|
state["nn_trials"] = nn_stats.get("nn_fea_trials", 0)
|
|
|
|
# Check for turbo report
|
|
turbo_report_path = results_dir / "turbo_report.json"
|
|
if turbo_report_path.exists():
|
|
state["has_turbo_report"] = True
|
|
try:
|
|
with open(turbo_report_path, 'r') as f:
|
|
turbo = json.load(f)
|
|
state["turbo_summary"] = {
|
|
"mode": turbo.get("mode"),
|
|
"nn_trials": turbo.get("total_nn_trials", 0),
|
|
"fea_validations": turbo.get("fea_validations", 0),
|
|
"time_minutes": round(turbo.get("time_minutes", 0), 1)
|
|
}
|
|
except Exception:
|
|
pass
|
|
|
|
# Check for trained surrogate
|
|
surrogate_path = results_dir / "surrogate.pt"
|
|
state["has_surrogate"] = surrogate_path.exists()
|
|
|
|
# Determine overall status
|
|
state["status"] = _determine_status(state)
|
|
|
|
# Suggest next actions
|
|
state["next_actions"] = _suggest_next_actions(state)
|
|
|
|
return state
|
|
|
|
|
|
def _summarize_config(config: Dict) -> Dict[str, Any]:
|
|
"""Extract key information from config."""
|
|
# Handle different config formats
|
|
variables = config.get("design_variables", config.get("variables", []))
|
|
objectives = config.get("objectives", [])
|
|
constraints = config.get("constraints", [])
|
|
|
|
# Get variable names (handle different key names)
|
|
var_names = []
|
|
for v in variables:
|
|
name = v.get("parameter") or v.get("name") or v.get("expression_name", "unknown")
|
|
var_names.append(name)
|
|
|
|
# Get objective names
|
|
obj_names = []
|
|
for o in objectives:
|
|
name = o.get("name") or o.get("metric", "unknown")
|
|
direction = o.get("goal") or o.get("direction", "minimize")
|
|
obj_names.append(f"{name} ({direction})")
|
|
|
|
return {
|
|
"n_variables": len(variables),
|
|
"n_objectives": len(objectives),
|
|
"n_constraints": len(constraints),
|
|
"variable_names": var_names[:5], # First 5 only
|
|
"objective_names": obj_names,
|
|
"study_type": "multi_objective" if len(objectives) > 1 else "single_objective"
|
|
}
|
|
|
|
|
|
def _query_study_db(db_path: Path, prefix: str = "") -> Dict[str, Any]:
|
|
"""Query Optuna study database for statistics."""
|
|
stats = {
|
|
f"{prefix}fea_trials": 0,
|
|
f"{prefix}completed_trials": 0,
|
|
f"{prefix}failed_trials": 0,
|
|
f"{prefix}pareto_solutions": 0,
|
|
"best_trial": None,
|
|
"last_activity": None
|
|
}
|
|
|
|
try:
|
|
conn = sqlite3.connect(str(db_path))
|
|
cursor = conn.cursor()
|
|
|
|
# Count trials by state
|
|
cursor.execute("""
|
|
SELECT state, COUNT(*) FROM trials
|
|
GROUP BY state
|
|
""")
|
|
for state, count in cursor.fetchall():
|
|
if state == "COMPLETE":
|
|
stats[f"{prefix}completed_trials"] = count
|
|
stats[f"{prefix}fea_trials"] = count
|
|
elif state == "FAIL":
|
|
stats[f"{prefix}failed_trials"] = count
|
|
|
|
# Get last activity time
|
|
cursor.execute("""
|
|
SELECT MAX(datetime_complete) FROM trials
|
|
WHERE datetime_complete IS NOT NULL
|
|
""")
|
|
result = cursor.fetchone()
|
|
if result and result[0]:
|
|
stats["last_activity"] = result[0]
|
|
|
|
# Get best trial (for single objective)
|
|
cursor.execute("""
|
|
SELECT trial_id, value FROM trial_values
|
|
WHERE objective_id = 0
|
|
ORDER BY value ASC
|
|
LIMIT 1
|
|
""")
|
|
result = cursor.fetchone()
|
|
if result:
|
|
stats["best_trial"] = {"trial_id": result[0], "value": result[1]}
|
|
|
|
# Count Pareto solutions (trials with user_attr pareto=True or non-dominated)
|
|
# Simplified: count distinct trials in trial_values
|
|
cursor.execute("""
|
|
SELECT COUNT(DISTINCT trial_id) FROM trial_values
|
|
""")
|
|
result = cursor.fetchone()
|
|
if result:
|
|
# For multi-objective, this is a rough estimate
|
|
stats[f"{prefix}pareto_solutions"] = min(result[0], 50) # Cap at 50
|
|
|
|
conn.close()
|
|
except Exception as e:
|
|
stats["db_error"] = str(e)
|
|
|
|
return stats
|
|
|
|
|
|
def _determine_status(state: Dict) -> str:
|
|
"""Determine overall study status."""
|
|
if state["fea_trials"] == 0:
|
|
return "not_started"
|
|
elif state["fea_trials"] < 3:
|
|
return "discovery"
|
|
elif state["fea_trials"] < 10:
|
|
return "validation"
|
|
elif state["has_turbo_report"]:
|
|
return "turbo_complete"
|
|
elif state["has_surrogate"]:
|
|
return "training_complete"
|
|
elif state["fea_trials"] >= 50:
|
|
return "fea_complete"
|
|
else:
|
|
return "in_progress"
|
|
|
|
|
|
def _suggest_next_actions(state: Dict) -> List[str]:
|
|
"""Suggest next actions based on study state."""
|
|
actions = []
|
|
|
|
if state["status"] == "not_started":
|
|
actions.append("Run: python run_optimization.py --discover")
|
|
elif state["status"] == "discovery":
|
|
actions.append("Run: python run_optimization.py --validate")
|
|
elif state["status"] == "validation":
|
|
actions.append("Run: python run_optimization.py --test")
|
|
actions.append("Or run full: python run_optimization.py --run --trials 50")
|
|
elif state["status"] == "in_progress":
|
|
actions.append("Continue: python run_optimization.py --resume")
|
|
elif state["status"] == "fea_complete":
|
|
actions.append("Analyze: python -m optimization_engine.method_selector optimization_config.json 2_results/study.db")
|
|
actions.append("Or run turbo: python run_nn_optimization.py --turbo")
|
|
elif state["status"] == "turbo_complete":
|
|
actions.append("View results in dashboard: cd atomizer-dashboard && npm run dev")
|
|
actions.append("Generate report: python generate_report.py")
|
|
|
|
return actions
|
|
|
|
|
|
def format_study_summary(state: Dict) -> str:
|
|
"""Format study state as a human-readable summary."""
|
|
if not state["is_study"]:
|
|
return f"❌ Not a valid study directory: {state['study_name']}"
|
|
|
|
lines = [
|
|
f"📊 **Study: {state['study_name']}**",
|
|
f"Status: {state['status'].replace('_', ' ').title()}",
|
|
""
|
|
]
|
|
|
|
if state["config"]:
|
|
cfg = state["config"]
|
|
lines.append(f"**Configuration:**")
|
|
lines.append(f"- Variables: {cfg['n_variables']} ({', '.join(cfg['variable_names'][:3])}{'...' if cfg['n_variables'] > 3 else ''})")
|
|
lines.append(f"- Objectives: {cfg['n_objectives']} ({', '.join(cfg['objective_names'])})")
|
|
lines.append(f"- Constraints: {cfg['n_constraints']}")
|
|
lines.append(f"- Type: {cfg['study_type']}")
|
|
lines.append("")
|
|
|
|
lines.append("**Progress:**")
|
|
lines.append(f"- FEA trials: {state['fea_trials']}")
|
|
if state["nn_trials"] > 0:
|
|
lines.append(f"- NN trials: {state['nn_trials']}")
|
|
if state["has_turbo_report"] and "turbo_summary" in state:
|
|
ts = state["turbo_summary"]
|
|
lines.append(f"- Turbo mode: {ts['nn_trials']} NN + {ts['fea_validations']} FEA validations ({ts['time_minutes']} min)")
|
|
if state["last_activity"]:
|
|
lines.append(f"- Last activity: {state['last_activity']}")
|
|
lines.append("")
|
|
|
|
if state["next_actions"]:
|
|
lines.append("**Suggested Next Actions:**")
|
|
for action in state["next_actions"]:
|
|
lines.append(f" → {action}")
|
|
|
|
if state["warnings"]:
|
|
lines.append("")
|
|
lines.append("**Warnings:**")
|
|
for warning in state["warnings"]:
|
|
lines.append(f" ⚠️ {warning}")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def get_all_studies(atomizer_root: Path) -> List[Dict[str, Any]]:
|
|
"""Get state of all studies in the Atomizer studies directory."""
|
|
studies_dir = atomizer_root / "studies"
|
|
if not studies_dir.exists():
|
|
return []
|
|
|
|
studies = []
|
|
for study_path in studies_dir.iterdir():
|
|
if study_path.is_dir() and not study_path.name.startswith("."):
|
|
state = detect_study_state(study_path)
|
|
if state["is_study"]:
|
|
studies.append(state)
|
|
|
|
# Sort by last activity (most recent first)
|
|
studies.sort(
|
|
key=lambda s: s.get("last_activity") or "1970-01-01",
|
|
reverse=True
|
|
)
|
|
|
|
return studies
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if len(sys.argv) > 1:
|
|
study_path = Path(sys.argv[1])
|
|
else:
|
|
# Default to current directory
|
|
study_path = Path.cwd()
|
|
|
|
state = detect_study_state(study_path)
|
|
print(format_study_summary(state))
|