feat: add Atomizer HQ multi-agent cluster infrastructure

- 8-agent OpenClaw cluster (Manager, Tech-Lead, Secretary, Auditor, Optimizer, Study-Builder, NX-Expert, Webster) - Orchestration engine: orchestrate.py (sync delegation + handoffs) - Workflow engine: YAML-defined multi-step pipelines - Agent workspaces: SOUL.md, AGENTS.md, MEMORY.md per agent - Shared skills: delegate, orchestrate, atomizer-protocols - Capability registry (AGENTS_REGISTRY.json) - Cluster management: cluster.sh, systemd template - All secrets replaced with env var references
2026-02-15 21:18:18 +00:00
parent d6a1d6eee1
commit 3289a76e19
170 changed files with 24949 additions and 0 deletions
--- a/hq/workspaces/shared/skills/orchestrate/metrics.py
+++ b/hq/workspaces/shared/skills/orchestrate/metrics.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+"""Orchestration metrics — analyze handoff files and workflow runs."""
+
+import json, os, sys, glob
+from collections import defaultdict
+from datetime import datetime
+from pathlib import Path
+
+HANDOFFS_DIR = Path("/home/papa/atomizer/handoffs")
+WORKFLOWS_DIR = HANDOFFS_DIR / "workflows"
+
+def load_handoffs():
+    """Load all individual handoff JSON files."""
+    results = []
+    for f in HANDOFFS_DIR.glob("orch-*.json"):
+        try:
+            with open(f) as fh:
+                data = json.load(fh)
+                data["_file"] = f.name
+                results.append(data)
+        except Exception:
+            pass
+    return results
+
+def load_workflow_summaries():
+    """Load all workflow summary.json files."""
+    results = []
+    for d in WORKFLOWS_DIR.iterdir():
+        summary = d / "summary.json"
+        if summary.exists():
+            try:
+                with open(summary) as fh:
+                    data = json.load(fh)
+                    results.append(data)
+            except Exception:
+                pass
+    return results
+
+def compute_metrics():
+    handoffs = load_handoffs()
+    workflows = load_workflow_summaries()
+
+    # Per-agent stats
+    agent_stats = defaultdict(lambda: {"total": 0, "complete": 0, "failed": 0, "partial": 0, "blocked": 0, "avg_latency_ms": 0, "latencies": []})
+
+    for h in handoffs:
+        agent = h.get("agent", "unknown")
+        status = h.get("status", "unknown")
+        agent_stats[agent]["total"] += 1
+        if status in agent_stats[agent]:
+            agent_stats[agent][status] += 1
+        lat = h.get("latencyMs")
+        if lat:
+            agent_stats[agent]["latencies"].append(lat)
+
+    # Compute averages
+    for agent, stats in agent_stats.items():
+        lats = stats.pop("latencies")
+        if lats:
+            stats["avg_latency_ms"] = int(sum(lats) / len(lats))
+            stats["min_latency_ms"] = min(lats)
+            stats["max_latency_ms"] = max(lats)
+        stats["success_rate"] = f"{stats['complete']/stats['total']*100:.0f}%" if stats["total"] > 0 else "N/A"
+
+    # Workflow stats
+    wf_stats = {"total": len(workflows), "complete": 0, "failed": 0, "partial": 0, "avg_duration_s": 0, "durations": []}
+    for w in workflows:
+        status = w.get("status", "unknown")
+        if status == "complete":
+            wf_stats["complete"] += 1
+        elif status in ("failed", "error"):
+            wf_stats["failed"] += 1
+        else:
+            wf_stats["partial"] += 1
+        dur = w.get("duration_s")
+        if dur:
+            wf_stats["durations"].append(dur)
+
+    durs = wf_stats.pop("durations")
+    if durs:
+        wf_stats["avg_duration_s"] = round(sum(durs) / len(durs), 1)
+        wf_stats["min_duration_s"] = round(min(durs), 1)
+        wf_stats["max_duration_s"] = round(max(durs), 1)
+    wf_stats["success_rate"] = f"{wf_stats['complete']/wf_stats['total']*100:.0f}%" if wf_stats["total"] > 0 else "N/A"
+
+    return {
+        "generated_at": datetime.utcnow().isoformat() + "Z",
+        "total_handoffs": len(handoffs),
+        "total_workflows": len(workflows),
+        "agent_stats": dict(agent_stats),
+        "workflow_stats": wf_stats
+    }
+
+def main():
+    fmt = sys.argv[1] if len(sys.argv) > 1 else "json"
+    metrics = compute_metrics()
+
+    if fmt == "json":
+        print(json.dumps(metrics, indent=2))
+    elif fmt == "text":
+        print("=== Orchestration Metrics ===")
+        print(f"Generated: {metrics['generated_at']}")
+        print(f"Total handoffs: {metrics['total_handoffs']}")
+        print(f"Total workflows: {metrics['total_workflows']}")
+        print()
+        print("--- Per-Agent Stats ---")
+        for agent, stats in sorted(metrics["agent_stats"].items()):
+            print(f"  {agent}: {stats['total']} tasks, {stats['success_rate']} success, avg {stats.get('avg_latency_ms', 'N/A')}ms")
+        print()
+        print("--- Workflow Stats ---")
+        ws = metrics["workflow_stats"]
+        print(f"  {ws['total']} runs, {ws['success_rate']} success, avg {ws.get('avg_duration_s', 'N/A')}s")
+    else:
+        print(json.dumps(metrics, indent=2))
+
+if __name__ == "__main__":
+    main()