optimization_engine/study/state.py

"""
Study State Detector for Atomizer

This module provides utilities to detect and summarize the state of an optimization study.
Used by Claude sessions to quickly understand study context on initialization.
"""

import json
import sqlite3
from pathlib import Path
from typing import Dict, Any, Optional, List
from datetime import datetime


def detect_study_state(study_dir: Path) -> Dict[str, Any]:
    """
    Detect the current state of an optimization study.

    Args:
        study_dir: Path to the study directory

    Returns:
        Dictionary with study state information
    """
    study_dir = Path(study_dir)
    state = {
        "is_study": False,
        "study_name": study_dir.name,
        "status": "unknown",
        "config": None,
        "fea_trials": 0,
        "nn_trials": 0,
        "pareto_solutions": 0,
        "best_trial": None,
        "last_activity": None,
        "has_turbo_report": False,
        "has_surrogate": False,
        "warnings": [],
        "next_actions": []
    }

    # Check if this is a valid study directory
    config_path = study_dir / "optimization_config.json"
    if not config_path.exists():
        # Try 1_setup subdirectory
        config_path = study_dir / "1_setup" / "optimization_config.json"

    if not config_path.exists():
        state["warnings"].append("No optimization_config.json found")
        return state

    state["is_study"] = True

    # Load config
    try:
        with open(config_path, 'r') as f:
            config = json.load(f)
        state["config"] = _summarize_config(config)
    except Exception as e:
        state["warnings"].append(f"Failed to parse config: {e}")

    # Check results directory
    results_dir = study_dir / "2_results"
    if not results_dir.exists():
        state["status"] = "not_started"
        state["next_actions"].append("Run: python run_optimization.py --discover")
        return state

    # Check study.db for FEA trials
    db_path = results_dir / "study.db"
    if db_path.exists():
        fea_stats = _query_study_db(db_path)
        state.update(fea_stats)

    # Check nn_study.db for NN trials
    nn_db_path = results_dir / "nn_study.db"
    if nn_db_path.exists():
        nn_stats = _query_study_db(nn_db_path, prefix="nn_")
        state["nn_trials"] = nn_stats.get("nn_fea_trials", 0)

    # Check for turbo report
    turbo_report_path = results_dir / "turbo_report.json"
    if turbo_report_path.exists():
        state["has_turbo_report"] = True
        try:
            with open(turbo_report_path, 'r') as f:
                turbo = json.load(f)
            state["turbo_summary"] = {
                "mode": turbo.get("mode"),
                "nn_trials": turbo.get("total_nn_trials", 0),
                "fea_validations": turbo.get("fea_validations", 0),
                "time_minutes": round(turbo.get("time_minutes", 0), 1)
            }
        except Exception:
            pass

    # Check for trained surrogate
    surrogate_path = results_dir / "surrogate.pt"
    state["has_surrogate"] = surrogate_path.exists()

    # Determine overall status
    state["status"] = _determine_status(state)

    # Suggest next actions
    state["next_actions"] = _suggest_next_actions(state)

    return state


def _summarize_config(config: Dict) -> Dict[str, Any]:
    """Extract key information from config."""
    # Handle different config formats
    variables = config.get("design_variables", config.get("variables", []))
    objectives = config.get("objectives", [])
    constraints = config.get("constraints", [])

    # Get variable names (handle different key names)
    var_names = []
    for v in variables:
        name = v.get("parameter") or v.get("name") or v.get("expression_name", "unknown")
        var_names.append(name)

    # Get objective names
    obj_names = []
    for o in objectives:
        name = o.get("name") or o.get("metric", "unknown")
        direction = o.get("goal") or o.get("direction", "minimize")
        obj_names.append(f"{name} ({direction})")

    return {
        "n_variables": len(variables),
        "n_objectives": len(objectives),
        "n_constraints": len(constraints),
        "variable_names": var_names[:5],  # First 5 only
        "objective_names": obj_names,
        "study_type": "multi_objective" if len(objectives) > 1 else "single_objective"
    }


def _query_study_db(db_path: Path, prefix: str = "") -> Dict[str, Any]:
    """Query Optuna study database for statistics."""
    stats = {
        f"{prefix}fea_trials": 0,
        f"{prefix}completed_trials": 0,
        f"{prefix}failed_trials": 0,
        f"{prefix}pareto_solutions": 0,
        "best_trial": None,
        "last_activity": None
    }

    try:
        conn = sqlite3.connect(str(db_path))
        cursor = conn.cursor()

        # Count trials by state
        cursor.execute("""
            SELECT state, COUNT(*) FROM trials
            GROUP BY state
        """)
        for state, count in cursor.fetchall():
            if state == "COMPLETE":
                stats[f"{prefix}completed_trials"] = count
                stats[f"{prefix}fea_trials"] = count
            elif state == "FAIL":
                stats[f"{prefix}failed_trials"] = count

        # Get last activity time
        cursor.execute("""
            SELECT MAX(datetime_complete) FROM trials
            WHERE datetime_complete IS NOT NULL
        """)
        result = cursor.fetchone()
        if result and result[0]:
            stats["last_activity"] = result[0]

        # Get best trial (for single objective)
        cursor.execute("""
            SELECT trial_id, value FROM trial_values
            WHERE objective_id = 0
            ORDER BY value ASC
            LIMIT 1
        """)
        result = cursor.fetchone()
        if result:
            stats["best_trial"] = {"trial_id": result[0], "value": result[1]}

        # Count Pareto solutions (trials with user_attr pareto=True or non-dominated)
        # Simplified: count distinct trials in trial_values
        cursor.execute("""
            SELECT COUNT(DISTINCT trial_id) FROM trial_values
        """)
        result = cursor.fetchone()
        if result:
            # For multi-objective, this is a rough estimate
            stats[f"{prefix}pareto_solutions"] = min(result[0], 50)  # Cap at 50

        conn.close()
    except Exception as e:
        stats["db_error"] = str(e)

    return stats


def _determine_status(state: Dict) -> str:
    """Determine overall study status."""
    if state["fea_trials"] == 0:
        return "not_started"
    elif state["fea_trials"] < 3:
        return "discovery"
    elif state["fea_trials"] < 10:
        return "validation"
    elif state["has_turbo_report"]:
        return "turbo_complete"
    elif state["has_surrogate"]:
        return "training_complete"
    elif state["fea_trials"] >= 50:
        return "fea_complete"
    else:
        return "in_progress"


def _suggest_next_actions(state: Dict) -> List[str]:
    """Suggest next actions based on study state."""
    actions = []

    if state["status"] == "not_started":
        actions.append("Run: python run_optimization.py --discover")
    elif state["status"] == "discovery":
        actions.append("Run: python run_optimization.py --validate")
    elif state["status"] == "validation":
        actions.append("Run: python run_optimization.py --test")
        actions.append("Or run full: python run_optimization.py --run --trials 50")
    elif state["status"] == "in_progress":
        actions.append("Continue: python run_optimization.py --resume")
    elif state["status"] == "fea_complete":
        actions.append("Analyze: python -m optimization_engine.method_selector optimization_config.json 2_results/study.db")
        actions.append("Or run turbo: python run_nn_optimization.py --turbo")
    elif state["status"] == "turbo_complete":
        actions.append("View results in dashboard: cd atomizer-dashboard && npm run dev")
        actions.append("Generate report: python generate_report.py")

    return actions


def format_study_summary(state: Dict) -> str:
    """Format study state as a human-readable summary."""
    if not state["is_study"]:
        return f"❌ Not a valid study directory: {state['study_name']}"

    lines = [
        f"📊 **Study: {state['study_name']}**",
        f"Status: {state['status'].replace('_', ' ').title()}",
        ""
    ]

    if state["config"]:
        cfg = state["config"]
        lines.append(f"**Configuration:**")
        lines.append(f"- Variables: {cfg['n_variables']} ({', '.join(cfg['variable_names'][:3])}{'...' if cfg['n_variables'] > 3 else ''})")
        lines.append(f"- Objectives: {cfg['n_objectives']} ({', '.join(cfg['objective_names'])})")
        lines.append(f"- Constraints: {cfg['n_constraints']}")
        lines.append(f"- Type: {cfg['study_type']}")
        lines.append("")

    lines.append("**Progress:**")
    lines.append(f"- FEA trials: {state['fea_trials']}")
    if state["nn_trials"] > 0:
        lines.append(f"- NN trials: {state['nn_trials']}")
    if state["has_turbo_report"] and "turbo_summary" in state:
        ts = state["turbo_summary"]
        lines.append(f"- Turbo mode: {ts['nn_trials']} NN + {ts['fea_validations']} FEA validations ({ts['time_minutes']} min)")
    if state["last_activity"]:
        lines.append(f"- Last activity: {state['last_activity']}")
    lines.append("")

    if state["next_actions"]:
        lines.append("**Suggested Next Actions:**")
        for action in state["next_actions"]:
            lines.append(f"  → {action}")

    if state["warnings"]:
        lines.append("")
        lines.append("**Warnings:**")
        for warning in state["warnings"]:
            lines.append(f"  ⚠️ {warning}")

    return "\n".join(lines)


def get_all_studies(atomizer_root: Path) -> List[Dict[str, Any]]:
    """Get state of all studies in the Atomizer studies directory."""
    studies_dir = atomizer_root / "studies"
    if not studies_dir.exists():
        return []

    studies = []
    for study_path in studies_dir.iterdir():
        if study_path.is_dir() and not study_path.name.startswith("."):
            state = detect_study_state(study_path)
            if state["is_study"]:
                studies.append(state)

    # Sort by last activity (most recent first)
    studies.sort(
        key=lambda s: s.get("last_activity") or "1970-01-01",
        reverse=True
    )

    return studies


if __name__ == "__main__":
    import sys

    if len(sys.argv) > 1:
        study_path = Path(sys.argv[1])
    else:
        # Default to current directory
        study_path = Path.cwd()

    state = detect_study_state(study_path)
    print(format_study_summary(state))
feat: Implement Agentic Architecture for robust session workflows Phase 1 - Session Bootstrap: - Add .claude/ATOMIZER_CONTEXT.md as single entry point for new sessions - Add study state detection and task routing Phase 2 - Code Deduplication: - Add optimization_engine/base_runner.py (ConfigDrivenRunner) - Add optimization_engine/generic_surrogate.py (ConfigDrivenSurrogate) - Add optimization_engine/study_state.py for study detection - Add optimization_engine/templates/ with registry and templates - Studies now require ~50 lines instead of ~300 Phase 3 - Skill Consolidation: - Add YAML frontmatter metadata to all skills (versioning, dependencies) - Consolidate create-study.md into core/study-creation-core.md - Update 00_BOOTSTRAP.md, 01_CHEATSHEET.md, 02_CONTEXT_LOADER.md Phase 4 - Self-Expanding Knowledge: - Add optimization_engine/auto_doc.py for auto-generating documentation - Generate docs/generated/EXTRACTORS.md (27 extractors documented) - Generate docs/generated/TEMPLATES.md (6 templates) - Generate docs/generated/EXTRACTOR_CHEATSHEET.md Phase 5 - Subagent Implementation: - Add .claude/commands/study-builder.md (create studies) - Add .claude/commands/nx-expert.md (NX Open API) - Add .claude/commands/protocol-auditor.md (config validation) - Add .claude/commands/results-analyzer.md (results analysis) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> 2025-12-07 14:52:25 -05:00			`"""`
			`Study State Detector for Atomizer`

			`This module provides utilities to detect and summarize the state of an optimization study.`
			`Used by Claude sessions to quickly understand study context on initialization.`
			`"""`

			`import json`
			`import sqlite3`
			`from pathlib import Path`
			`from typing import Dict, Any, Optional, List`
			`from datetime import datetime`


			`def detect_study_state(study_dir: Path) -> Dict[str, Any]:`
			`"""`
			`Detect the current state of an optimization study.`

			`Args:`
			`study_dir: Path to the study directory`

			`Returns:`
			`Dictionary with study state information`
			`"""`
			`study_dir = Path(study_dir)`
			`state = {`
			`"is_study": False,`
			`"study_name": study_dir.name,`
			`"status": "unknown",`
			`"config": None,`
			`"fea_trials": 0,`
			`"nn_trials": 0,`
			`"pareto_solutions": 0,`
			`"best_trial": None,`
			`"last_activity": None,`
			`"has_turbo_report": False,`
			`"has_surrogate": False,`
			`"warnings": [],`
			`"next_actions": []`
			`}`

			`# Check if this is a valid study directory`
			`config_path = study_dir / "optimization_config.json"`
			`if not config_path.exists():`
			`# Try 1_setup subdirectory`
			`config_path = study_dir / "1_setup" / "optimization_config.json"`

			`if not config_path.exists():`
			`state["warnings"].append("No optimization_config.json found")`
			`return state`

			`state["is_study"] = True`

			`# Load config`
			`try:`
			`with open(config_path, 'r') as f:`
			`config = json.load(f)`
			`state["config"] = _summarize_config(config)`
			`except Exception as e:`
			`state["warnings"].append(f"Failed to parse config: {e}")`

			`# Check results directory`
			`results_dir = study_dir / "2_results"`
			`if not results_dir.exists():`
			`state["status"] = "not_started"`
			`state["next_actions"].append("Run: python run_optimization.py --discover")`
			`return state`

			`# Check study.db for FEA trials`
			`db_path = results_dir / "study.db"`
			`if db_path.exists():`
			`fea_stats = _query_study_db(db_path)`
			`state.update(fea_stats)`

			`# Check nn_study.db for NN trials`
			`nn_db_path = results_dir / "nn_study.db"`
			`if nn_db_path.exists():`
			`nn_stats = _query_study_db(nn_db_path, prefix="nn_")`
			`state["nn_trials"] = nn_stats.get("nn_fea_trials", 0)`

			`# Check for turbo report`
			`turbo_report_path = results_dir / "turbo_report.json"`
			`if turbo_report_path.exists():`
			`state["has_turbo_report"] = True`
			`try:`
			`with open(turbo_report_path, 'r') as f:`
			`turbo = json.load(f)`
			`state["turbo_summary"] = {`
			`"mode": turbo.get("mode"),`
			`"nn_trials": turbo.get("total_nn_trials", 0),`
			`"fea_validations": turbo.get("fea_validations", 0),`
			`"time_minutes": round(turbo.get("time_minutes", 0), 1)`
			`}`
			`except Exception:`
			`pass`

			`# Check for trained surrogate`
			`surrogate_path = results_dir / "surrogate.pt"`
			`state["has_surrogate"] = surrogate_path.exists()`

			`# Determine overall status`
			`state["status"] = _determine_status(state)`

			`# Suggest next actions`
			`state["next_actions"] = _suggest_next_actions(state)`

			`return state`


			`def _summarize_config(config: Dict) -> Dict[str, Any]:`
			`"""Extract key information from config."""`
			`# Handle different config formats`
			`variables = config.get("design_variables", config.get("variables", []))`
			`objectives = config.get("objectives", [])`
			`constraints = config.get("constraints", [])`

			`# Get variable names (handle different key names)`
			`var_names = []`
			`for v in variables:`
			`name = v.get("parameter") or v.get("name") or v.get("expression_name", "unknown")`
			`var_names.append(name)`

			`# Get objective names`
			`obj_names = []`
			`for o in objectives:`
			`name = o.get("name") or o.get("metric", "unknown")`
			`direction = o.get("goal") or o.get("direction", "minimize")`
			`obj_names.append(f"{name} ({direction})")`

			`return {`
			`"n_variables": len(variables),`
			`"n_objectives": len(objectives),`
			`"n_constraints": len(constraints),`
			`"variable_names": var_names[:5], # First 5 only`
			`"objective_names": obj_names,`
			`"study_type": "multi_objective" if len(objectives) > 1 else "single_objective"`
			`}`


			`def _query_study_db(db_path: Path, prefix: str = "") -> Dict[str, Any]:`
			`"""Query Optuna study database for statistics."""`
			`stats = {`
			`f"{prefix}fea_trials": 0,`
			`f"{prefix}completed_trials": 0,`
			`f"{prefix}failed_trials": 0,`
			`f"{prefix}pareto_solutions": 0,`
			`"best_trial": None,`
			`"last_activity": None`
			`}`

			`try:`
			`conn = sqlite3.connect(str(db_path))`
			`cursor = conn.cursor()`

			`# Count trials by state`
			`cursor.execute("""`
			`SELECT state, COUNT(*) FROM trials`
			`GROUP BY state`
			`""")`
			`for state, count in cursor.fetchall():`
			`if state == "COMPLETE":`
			`stats[f"{prefix}completed_trials"] = count`
			`stats[f"{prefix}fea_trials"] = count`
			`elif state == "FAIL":`
			`stats[f"{prefix}failed_trials"] = count`

			`# Get last activity time`
			`cursor.execute("""`
			`SELECT MAX(datetime_complete) FROM trials`
			`WHERE datetime_complete IS NOT NULL`
			`""")`
			`result = cursor.fetchone()`
			`if result and result[0]:`
			`stats["last_activity"] = result[0]`

			`# Get best trial (for single objective)`
			`cursor.execute("""`
			`SELECT trial_id, value FROM trial_values`
			`WHERE objective_id = 0`
			`ORDER BY value ASC`
			`LIMIT 1`
			`""")`
			`result = cursor.fetchone()`
			`if result:`
			`stats["best_trial"] = {"trial_id": result[0], "value": result[1]}`

			`# Count Pareto solutions (trials with user_attr pareto=True or non-dominated)`
			`# Simplified: count distinct trials in trial_values`
			`cursor.execute("""`
			`SELECT COUNT(DISTINCT trial_id) FROM trial_values`
			`""")`
			`result = cursor.fetchone()`
			`if result:`
			`# For multi-objective, this is a rough estimate`
			`stats[f"{prefix}pareto_solutions"] = min(result[0], 50) # Cap at 50`

			`conn.close()`
			`except Exception as e:`
			`stats["db_error"] = str(e)`

			`return stats`


			`def _determine_status(state: Dict) -> str:`
			`"""Determine overall study status."""`
			`if state["fea_trials"] == 0:`
			`return "not_started"`
			`elif state["fea_trials"] < 3:`
			`return "discovery"`
			`elif state["fea_trials"] < 10:`
			`return "validation"`
			`elif state["has_turbo_report"]:`
			`return "turbo_complete"`
			`elif state["has_surrogate"]:`
			`return "training_complete"`
			`elif state["fea_trials"] >= 50:`
			`return "fea_complete"`
			`else:`
			`return "in_progress"`


			`def _suggest_next_actions(state: Dict) -> List[str]:`
			`"""Suggest next actions based on study state."""`
			`actions = []`

			`if state["status"] == "not_started":`
			`actions.append("Run: python run_optimization.py --discover")`
			`elif state["status"] == "discovery":`
			`actions.append("Run: python run_optimization.py --validate")`
			`elif state["status"] == "validation":`
			`actions.append("Run: python run_optimization.py --test")`
			`actions.append("Or run full: python run_optimization.py --run --trials 50")`
			`elif state["status"] == "in_progress":`
			`actions.append("Continue: python run_optimization.py --resume")`
			`elif state["status"] == "fea_complete":`
			`actions.append("Analyze: python -m optimization_engine.method_selector optimization_config.json 2_results/study.db")`
			`actions.append("Or run turbo: python run_nn_optimization.py --turbo")`
			`elif state["status"] == "turbo_complete":`
			`actions.append("View results in dashboard: cd atomizer-dashboard && npm run dev")`
			`actions.append("Generate report: python generate_report.py")`

			`return actions`


			`def format_study_summary(state: Dict) -> str:`
			`"""Format study state as a human-readable summary."""`
			`if not state["is_study"]:`
			`return f"❌ Not a valid study directory: {state['study_name']}"`

			`lines = [`
			`f"📊 Study: {state['study_name']}",`
			`f"Status: {state['status'].replace('_', ' ').title()}",`
			`""`
			`]`

			`if state["config"]:`
			`cfg = state["config"]`
			`lines.append(f"Configuration:")`
			`lines.append(f"- Variables: {cfg['n_variables']} ({', '.join(cfg['variable_names'][:3])}{'...' if cfg['n_variables'] > 3 else ''})")`
			`lines.append(f"- Objectives: {cfg['n_objectives']} ({', '.join(cfg['objective_names'])})")`
			`lines.append(f"- Constraints: {cfg['n_constraints']}")`
			`lines.append(f"- Type: {cfg['study_type']}")`
			`lines.append("")`

			`lines.append("Progress:")`
			`lines.append(f"- FEA trials: {state['fea_trials']}")`
			`if state["nn_trials"] > 0:`
			`lines.append(f"- NN trials: {state['nn_trials']}")`
			`if state["has_turbo_report"] and "turbo_summary" in state:`
			`ts = state["turbo_summary"]`
			`lines.append(f"- Turbo mode: {ts['nn_trials']} NN + {ts['fea_validations']} FEA validations ({ts['time_minutes']} min)")`
			`if state["last_activity"]:`
			`lines.append(f"- Last activity: {state['last_activity']}")`
			`lines.append("")`

			`if state["next_actions"]:`
			`lines.append("Suggested Next Actions:")`
			`for action in state["next_actions"]:`
			`lines.append(f" → {action}")`

			`if state["warnings"]:`
			`lines.append("")`
			`lines.append("Warnings:")`
			`for warning in state["warnings"]:`
			`lines.append(f" ⚠️ {warning}")`

			`return "\n".join(lines)`


			`def get_all_studies(atomizer_root: Path) -> List[Dict[str, Any]]:`
			`"""Get state of all studies in the Atomizer studies directory."""`
			`studies_dir = atomizer_root / "studies"`
			`if not studies_dir.exists():`
			`return []`

			`studies = []`
			`for study_path in studies_dir.iterdir():`
			`if study_path.is_dir() and not study_path.name.startswith("."):`
			`state = detect_study_state(study_path)`
			`if state["is_study"]:`
			`studies.append(state)`

			`# Sort by last activity (most recent first)`
			`studies.sort(`
			`key=lambda s: s.get("last_activity") or "1970-01-01",`
			`reverse=True`
			`)`

			`return studies`


			`if __name__ == "__main__":`
			`import sys`

			`if len(sys.argv) > 1:`
			`study_path = Path(sys.argv[1])`
			`else:`
			`# Default to current directory`
			`study_path = Path.cwd()`

			`state = detect_study_state(study_path)`
			`print(format_study_summary(state))`