#!/usr/bin/env python3 """Orchestration metrics — analyze handoff files and workflow runs.""" import json, os, sys, glob from collections import defaultdict from datetime import datetime from pathlib import Path HANDOFFS_DIR = Path("/home/papa/atomizer/handoffs") WORKFLOWS_DIR = HANDOFFS_DIR / "workflows" def load_handoffs(): """Load all individual handoff JSON files.""" results = [] for f in HANDOFFS_DIR.glob("orch-*.json"): try: with open(f) as fh: data = json.load(fh) data["_file"] = f.name results.append(data) except Exception: pass return results def load_workflow_summaries(): """Load all workflow summary.json files.""" results = [] for d in WORKFLOWS_DIR.iterdir(): summary = d / "summary.json" if summary.exists(): try: with open(summary) as fh: data = json.load(fh) results.append(data) except Exception: pass return results def compute_metrics(): handoffs = load_handoffs() workflows = load_workflow_summaries() # Per-agent stats agent_stats = defaultdict(lambda: {"total": 0, "complete": 0, "failed": 0, "partial": 0, "blocked": 0, "avg_latency_ms": 0, "latencies": []}) for h in handoffs: agent = h.get("agent", "unknown") status = h.get("status", "unknown") agent_stats[agent]["total"] += 1 if status in agent_stats[agent]: agent_stats[agent][status] += 1 lat = h.get("latencyMs") if lat: agent_stats[agent]["latencies"].append(lat) # Compute averages for agent, stats in agent_stats.items(): lats = stats.pop("latencies") if lats: stats["avg_latency_ms"] = int(sum(lats) / len(lats)) stats["min_latency_ms"] = min(lats) stats["max_latency_ms"] = max(lats) stats["success_rate"] = f"{stats['complete']/stats['total']*100:.0f}%" if stats["total"] > 0 else "N/A" # Workflow stats wf_stats = {"total": len(workflows), "complete": 0, "failed": 0, "partial": 0, "avg_duration_s": 0, "durations": []} for w in workflows: status = w.get("status", "unknown") if status == "complete": wf_stats["complete"] += 1 elif status in ("failed", "error"): wf_stats["failed"] += 1 else: wf_stats["partial"] += 1 dur = w.get("duration_s") if dur: wf_stats["durations"].append(dur) durs = wf_stats.pop("durations") if durs: wf_stats["avg_duration_s"] = round(sum(durs) / len(durs), 1) wf_stats["min_duration_s"] = round(min(durs), 1) wf_stats["max_duration_s"] = round(max(durs), 1) wf_stats["success_rate"] = f"{wf_stats['complete']/wf_stats['total']*100:.0f}%" if wf_stats["total"] > 0 else "N/A" return { "generated_at": datetime.utcnow().isoformat() + "Z", "total_handoffs": len(handoffs), "total_workflows": len(workflows), "agent_stats": dict(agent_stats), "workflow_stats": wf_stats } def main(): fmt = sys.argv[1] if len(sys.argv) > 1 else "json" metrics = compute_metrics() if fmt == "json": print(json.dumps(metrics, indent=2)) elif fmt == "text": print("=== Orchestration Metrics ===") print(f"Generated: {metrics['generated_at']}") print(f"Total handoffs: {metrics['total_handoffs']}") print(f"Total workflows: {metrics['total_workflows']}") print() print("--- Per-Agent Stats ---") for agent, stats in sorted(metrics["agent_stats"].items()): print(f" {agent}: {stats['total']} tasks, {stats['success_rate']} success, avg {stats.get('avg_latency_ms', 'N/A')}ms") print() print("--- Workflow Stats ---") ws = metrics["workflow_stats"] print(f" {ws['total']} runs, {ws['success_rate']} success, avg {ws.get('avg_duration_s', 'N/A')}s") else: print(json.dumps(metrics, indent=2)) if __name__ == "__main__": main()