feat: Implement Agentic Architecture for robust session workflows

Phase 1 - Session Bootstrap: - Add .claude/ATOMIZER_CONTEXT.md as single entry point for new sessions - Add study state detection and task routing Phase 2 - Code Deduplication: - Add optimization_engine/base_runner.py (ConfigDrivenRunner) - Add optimization_engine/generic_surrogate.py (ConfigDrivenSurrogate) - Add optimization_engine/study_state.py for study detection - Add optimization_engine/templates/ with registry and templates - Studies now require ~50 lines instead of ~300 Phase 3 - Skill Consolidation: - Add YAML frontmatter metadata to all skills (versioning, dependencies) - Consolidate create-study.md into core/study-creation-core.md - Update 00_BOOTSTRAP.md, 01_CHEATSHEET.md, 02_CONTEXT_LOADER.md Phase 4 - Self-Expanding Knowledge: - Add optimization_engine/auto_doc.py for auto-generating documentation - Generate docs/generated/EXTRACTORS.md (27 extractors documented) - Generate docs/generated/TEMPLATES.md (6 templates) - Generate docs/generated/EXTRACTOR_CHEATSHEET.md Phase 5 - Subagent Implementation: - Add .claude/commands/study-builder.md (create studies) - Add .claude/commands/nx-expert.md (NX Open API) - Add .claude/commands/protocol-auditor.md (config validation) - Add .claude/commands/results-analyzer.md (results analysis) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-07 14:52:25 -05:00
parent 6cf12d9344
commit 0e04457539
22 changed files with 4708 additions and 2212 deletions
--- a/optimization_engine/study_state.py
+++ b/optimization_engine/study_state.py
@@ -0,0 +1,322 @@
+"""
+Study State Detector for Atomizer
+
+This module provides utilities to detect and summarize the state of an optimization study.
+Used by Claude sessions to quickly understand study context on initialization.
+"""
+
+import json
+import sqlite3
+from pathlib import Path
+from typing import Dict, Any, Optional, List
+from datetime import datetime
+
+
+def detect_study_state(study_dir: Path) -> Dict[str, Any]:
+    """
+    Detect the current state of an optimization study.
+
+    Args:
+        study_dir: Path to the study directory
+
+    Returns:
+        Dictionary with study state information
+    """
+    study_dir = Path(study_dir)
+    state = {
+        "is_study": False,
+        "study_name": study_dir.name,
+        "status": "unknown",
+        "config": None,
+        "fea_trials": 0,
+        "nn_trials": 0,
+        "pareto_solutions": 0,
+        "best_trial": None,
+        "last_activity": None,
+        "has_turbo_report": False,
+        "has_surrogate": False,
+        "warnings": [],
+        "next_actions": []
+    }
+
+    # Check if this is a valid study directory
+    config_path = study_dir / "optimization_config.json"
+    if not config_path.exists():
+        # Try 1_setup subdirectory
+        config_path = study_dir / "1_setup" / "optimization_config.json"
+
+    if not config_path.exists():
+        state["warnings"].append("No optimization_config.json found")
+        return state
+
+    state["is_study"] = True
+
+    # Load config
+    try:
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+        state["config"] = _summarize_config(config)
+    except Exception as e:
+        state["warnings"].append(f"Failed to parse config: {e}")
+
+    # Check results directory
+    results_dir = study_dir / "2_results"
+    if not results_dir.exists():
+        state["status"] = "not_started"
+        state["next_actions"].append("Run: python run_optimization.py --discover")
+        return state
+
+    # Check study.db for FEA trials
+    db_path = results_dir / "study.db"
+    if db_path.exists():
+        fea_stats = _query_study_db(db_path)
+        state.update(fea_stats)
+
+    # Check nn_study.db for NN trials
+    nn_db_path = results_dir / "nn_study.db"
+    if nn_db_path.exists():
+        nn_stats = _query_study_db(nn_db_path, prefix="nn_")
+        state["nn_trials"] = nn_stats.get("nn_fea_trials", 0)
+
+    # Check for turbo report
+    turbo_report_path = results_dir / "turbo_report.json"
+    if turbo_report_path.exists():
+        state["has_turbo_report"] = True
+        try:
+            with open(turbo_report_path, 'r') as f:
+                turbo = json.load(f)
+            state["turbo_summary"] = {
+                "mode": turbo.get("mode"),
+                "nn_trials": turbo.get("total_nn_trials", 0),
+                "fea_validations": turbo.get("fea_validations", 0),
+                "time_minutes": round(turbo.get("time_minutes", 0), 1)
+            }
+        except Exception:
+            pass
+
+    # Check for trained surrogate
+    surrogate_path = results_dir / "surrogate.pt"
+    state["has_surrogate"] = surrogate_path.exists()
+
+    # Determine overall status
+    state["status"] = _determine_status(state)
+
+    # Suggest next actions
+    state["next_actions"] = _suggest_next_actions(state)
+
+    return state
+
+
+def _summarize_config(config: Dict) -> Dict[str, Any]:
+    """Extract key information from config."""
+    # Handle different config formats
+    variables = config.get("design_variables", config.get("variables", []))
+    objectives = config.get("objectives", [])
+    constraints = config.get("constraints", [])
+
+    # Get variable names (handle different key names)
+    var_names = []
+    for v in variables:
+        name = v.get("parameter") or v.get("name") or v.get("expression_name", "unknown")
+        var_names.append(name)
+
+    # Get objective names
+    obj_names = []
+    for o in objectives:
+        name = o.get("name") or o.get("metric", "unknown")
+        direction = o.get("goal") or o.get("direction", "minimize")
+        obj_names.append(f"{name} ({direction})")
+
+    return {
+        "n_variables": len(variables),
+        "n_objectives": len(objectives),
+        "n_constraints": len(constraints),
+        "variable_names": var_names[:5],  # First 5 only
+        "objective_names": obj_names,
+        "study_type": "multi_objective" if len(objectives) > 1 else "single_objective"
+    }
+
+
+def _query_study_db(db_path: Path, prefix: str = "") -> Dict[str, Any]:
+    """Query Optuna study database for statistics."""
+    stats = {
+        f"{prefix}fea_trials": 0,
+        f"{prefix}completed_trials": 0,
+        f"{prefix}failed_trials": 0,
+        f"{prefix}pareto_solutions": 0,
+        "best_trial": None,
+        "last_activity": None
+    }
+
+    try:
+        conn = sqlite3.connect(str(db_path))
+        cursor = conn.cursor()
+
+        # Count trials by state
+        cursor.execute("""
+            SELECT state, COUNT(*) FROM trials
+            GROUP BY state
+        """)
+        for state, count in cursor.fetchall():
+            if state == "COMPLETE":
+                stats[f"{prefix}completed_trials"] = count
+                stats[f"{prefix}fea_trials"] = count
+            elif state == "FAIL":
+                stats[f"{prefix}failed_trials"] = count
+
+        # Get last activity time
+        cursor.execute("""
+            SELECT MAX(datetime_complete) FROM trials
+            WHERE datetime_complete IS NOT NULL
+        """)
+        result = cursor.fetchone()
+        if result and result[0]:
+            stats["last_activity"] = result[0]
+
+        # Get best trial (for single objective)
+        cursor.execute("""
+            SELECT trial_id, value FROM trial_values
+            WHERE objective_id = 0
+            ORDER BY value ASC
+            LIMIT 1
+        """)
+        result = cursor.fetchone()
+        if result:
+            stats["best_trial"] = {"trial_id": result[0], "value": result[1]}
+
+        # Count Pareto solutions (trials with user_attr pareto=True or non-dominated)
+        # Simplified: count distinct trials in trial_values
+        cursor.execute("""
+            SELECT COUNT(DISTINCT trial_id) FROM trial_values
+        """)
+        result = cursor.fetchone()
+        if result:
+            # For multi-objective, this is a rough estimate
+            stats[f"{prefix}pareto_solutions"] = min(result[0], 50)  # Cap at 50
+
+        conn.close()
+    except Exception as e:
+        stats["db_error"] = str(e)
+
+    return stats
+
+
+def _determine_status(state: Dict) -> str:
+    """Determine overall study status."""
+    if state["fea_trials"] == 0:
+        return "not_started"
+    elif state["fea_trials"] < 3:
+        return "discovery"
+    elif state["fea_trials"] < 10:
+        return "validation"
+    elif state["has_turbo_report"]:
+        return "turbo_complete"
+    elif state["has_surrogate"]:
+        return "training_complete"
+    elif state["fea_trials"] >= 50:
+        return "fea_complete"
+    else:
+        return "in_progress"
+
+
+def _suggest_next_actions(state: Dict) -> List[str]:
+    """Suggest next actions based on study state."""
+    actions = []
+
+    if state["status"] == "not_started":
+        actions.append("Run: python run_optimization.py --discover")
+    elif state["status"] == "discovery":
+        actions.append("Run: python run_optimization.py --validate")
+    elif state["status"] == "validation":
+        actions.append("Run: python run_optimization.py --test")
+        actions.append("Or run full: python run_optimization.py --run --trials 50")
+    elif state["status"] == "in_progress":
+        actions.append("Continue: python run_optimization.py --resume")
+    elif state["status"] == "fea_complete":
+        actions.append("Analyze: python -m optimization_engine.method_selector optimization_config.json 2_results/study.db")
+        actions.append("Or run turbo: python run_nn_optimization.py --turbo")
+    elif state["status"] == "turbo_complete":
+        actions.append("View results in dashboard: cd atomizer-dashboard && npm run dev")
+        actions.append("Generate report: python generate_report.py")
+
+    return actions
+
+
+def format_study_summary(state: Dict) -> str:
+    """Format study state as a human-readable summary."""
+    if not state["is_study"]:
+        return f"❌ Not a valid study directory: {state['study_name']}"
+
+    lines = [
+        f"📊 **Study: {state['study_name']}**",
+        f"Status: {state['status'].replace('_', ' ').title()}",
+        ""
+    ]
+
+    if state["config"]:
+        cfg = state["config"]
+        lines.append(f"**Configuration:**")
+        lines.append(f"- Variables: {cfg['n_variables']} ({', '.join(cfg['variable_names'][:3])}{'...' if cfg['n_variables'] > 3 else ''})")
+        lines.append(f"- Objectives: {cfg['n_objectives']} ({', '.join(cfg['objective_names'])})")
+        lines.append(f"- Constraints: {cfg['n_constraints']}")
+        lines.append(f"- Type: {cfg['study_type']}")
+        lines.append("")
+
+    lines.append("**Progress:**")
+    lines.append(f"- FEA trials: {state['fea_trials']}")
+    if state["nn_trials"] > 0:
+        lines.append(f"- NN trials: {state['nn_trials']}")
+    if state["has_turbo_report"] and "turbo_summary" in state:
+        ts = state["turbo_summary"]
+        lines.append(f"- Turbo mode: {ts['nn_trials']} NN + {ts['fea_validations']} FEA validations ({ts['time_minutes']} min)")
+    if state["last_activity"]:
+        lines.append(f"- Last activity: {state['last_activity']}")
+    lines.append("")
+
+    if state["next_actions"]:
+        lines.append("**Suggested Next Actions:**")
+        for action in state["next_actions"]:
+            lines.append(f"  → {action}")
+
+    if state["warnings"]:
+        lines.append("")
+        lines.append("**Warnings:**")
+        for warning in state["warnings"]:
+            lines.append(f"  ⚠️ {warning}")
+
+    return "\n".join(lines)
+
+
+def get_all_studies(atomizer_root: Path) -> List[Dict[str, Any]]:
+    """Get state of all studies in the Atomizer studies directory."""
+    studies_dir = atomizer_root / "studies"
+    if not studies_dir.exists():
+        return []
+
+    studies = []
+    for study_path in studies_dir.iterdir():
+        if study_path.is_dir() and not study_path.name.startswith("."):
+            state = detect_study_state(study_path)
+            if state["is_study"]:
+                studies.append(state)
+
+    # Sort by last activity (most recent first)
+    studies.sort(
+        key=lambda s: s.get("last_activity") or "1970-01-01",
+        reverse=True
+    )
+
+    return studies
+
+
+if __name__ == "__main__":
+    import sys
+
+    if len(sys.argv) > 1:
+        study_path = Path(sys.argv[1])
+    else:
+        # Default to current directory
+        study_path = Path.cwd()
+
+    state = detect_study_state(study_path)
+    print(format_study_summary(state))