Atomizer/hq/workspaces/shared/skills/orchestrate/metrics.py

#!/usr/bin/env python3
"""Orchestration metrics — analyze handoff files and workflow runs."""

import json, os, sys, glob
from collections import defaultdict
from datetime import datetime
from pathlib import Path

HANDOFFS_DIR = Path("/home/papa/atomizer/handoffs")
WORKFLOWS_DIR = HANDOFFS_DIR / "workflows"

def load_handoffs():
    """Load all individual handoff JSON files."""
    results = []
    for f in HANDOFFS_DIR.glob("orch-*.json"):
        try:
            with open(f) as fh:
                data = json.load(fh)
                data["_file"] = f.name
                results.append(data)
        except Exception:
            pass
    return results

def load_workflow_summaries():
    """Load all workflow summary.json files."""
    results = []
    for d in WORKFLOWS_DIR.iterdir():
        summary = d / "summary.json"
        if summary.exists():
            try:
                with open(summary) as fh:
                    data = json.load(fh)
                    results.append(data)
            except Exception:
                pass
    return results

def compute_metrics():
    handoffs = load_handoffs()
    workflows = load_workflow_summaries()

    # Per-agent stats
    agent_stats = defaultdict(lambda: {"total": 0, "complete": 0, "failed": 0, "partial": 0, "blocked": 0, "avg_latency_ms": 0, "latencies": []})

    for h in handoffs:
        agent = h.get("agent", "unknown")
        status = h.get("status", "unknown")
        agent_stats[agent]["total"] += 1
        if status in agent_stats[agent]:
            agent_stats[agent][status] += 1
        lat = h.get("latencyMs")
        if lat:
            agent_stats[agent]["latencies"].append(lat)

    # Compute averages
    for agent, stats in agent_stats.items():
        lats = stats.pop("latencies")
        if lats:
            stats["avg_latency_ms"] = int(sum(lats) / len(lats))
            stats["min_latency_ms"] = min(lats)
            stats["max_latency_ms"] = max(lats)
        stats["success_rate"] = f"{stats['complete']/stats['total']*100:.0f}%" if stats["total"] > 0 else "N/A"

    # Workflow stats
    wf_stats = {"total": len(workflows), "complete": 0, "failed": 0, "partial": 0, "avg_duration_s": 0, "durations": []}
    for w in workflows:
        status = w.get("status", "unknown")
        if status == "complete":
            wf_stats["complete"] += 1
        elif status in ("failed", "error"):
            wf_stats["failed"] += 1
        else:
            wf_stats["partial"] += 1
        dur = w.get("duration_s")
        if dur:
            wf_stats["durations"].append(dur)

    durs = wf_stats.pop("durations")
    if durs:
        wf_stats["avg_duration_s"] = round(sum(durs) / len(durs), 1)
        wf_stats["min_duration_s"] = round(min(durs), 1)
        wf_stats["max_duration_s"] = round(max(durs), 1)
    wf_stats["success_rate"] = f"{wf_stats['complete']/wf_stats['total']*100:.0f}%" if wf_stats["total"] > 0 else "N/A"

    return {
        "generated_at": datetime.utcnow().isoformat() + "Z",
        "total_handoffs": len(handoffs),
        "total_workflows": len(workflows),
        "agent_stats": dict(agent_stats),
        "workflow_stats": wf_stats
    }

def main():
    fmt = sys.argv[1] if len(sys.argv) > 1 else "json"
    metrics = compute_metrics()

    if fmt == "json":
        print(json.dumps(metrics, indent=2))
    elif fmt == "text":
        print("=== Orchestration Metrics ===")
        print(f"Generated: {metrics['generated_at']}")
        print(f"Total handoffs: {metrics['total_handoffs']}")
        print(f"Total workflows: {metrics['total_workflows']}")
        print()
        print("--- Per-Agent Stats ---")
        for agent, stats in sorted(metrics["agent_stats"].items()):
            print(f"  {agent}: {stats['total']} tasks, {stats['success_rate']} success, avg {stats.get('avg_latency_ms', 'N/A')}ms")
        print()
        print("--- Workflow Stats ---")
        ws = metrics["workflow_stats"]
        print(f"  {ws['total']} runs, {ws['success_rate']} success, avg {ws.get('avg_duration_s', 'N/A')}s")
    else:
        print(json.dumps(metrics, indent=2))

if __name__ == "__main__":
    main()