feat: add Atomizer HQ multi-agent cluster infrastructure
- 8-agent OpenClaw cluster (Manager, Tech-Lead, Secretary, Auditor, Optimizer, Study-Builder, NX-Expert, Webster) - Orchestration engine: orchestrate.py (sync delegation + handoffs) - Workflow engine: YAML-defined multi-step pipelines - Agent workspaces: SOUL.md, AGENTS.md, MEMORY.md per agent - Shared skills: delegate, orchestrate, atomizer-protocols - Capability registry (AGENTS_REGISTRY.json) - Cluster management: cluster.sh, systemd template - All secrets replaced with env var references
This commit is contained in:
117
hq/workspaces/shared/skills/orchestrate/metrics.py
Executable file
117
hq/workspaces/shared/skills/orchestrate/metrics.py
Executable file
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Orchestration metrics — analyze handoff files and workflow runs."""
|
||||
|
||||
import json, os, sys, glob
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
HANDOFFS_DIR = Path("/home/papa/atomizer/handoffs")
|
||||
WORKFLOWS_DIR = HANDOFFS_DIR / "workflows"
|
||||
|
||||
def load_handoffs():
|
||||
"""Load all individual handoff JSON files."""
|
||||
results = []
|
||||
for f in HANDOFFS_DIR.glob("orch-*.json"):
|
||||
try:
|
||||
with open(f) as fh:
|
||||
data = json.load(fh)
|
||||
data["_file"] = f.name
|
||||
results.append(data)
|
||||
except Exception:
|
||||
pass
|
||||
return results
|
||||
|
||||
def load_workflow_summaries():
|
||||
"""Load all workflow summary.json files."""
|
||||
results = []
|
||||
for d in WORKFLOWS_DIR.iterdir():
|
||||
summary = d / "summary.json"
|
||||
if summary.exists():
|
||||
try:
|
||||
with open(summary) as fh:
|
||||
data = json.load(fh)
|
||||
results.append(data)
|
||||
except Exception:
|
||||
pass
|
||||
return results
|
||||
|
||||
def compute_metrics():
|
||||
handoffs = load_handoffs()
|
||||
workflows = load_workflow_summaries()
|
||||
|
||||
# Per-agent stats
|
||||
agent_stats = defaultdict(lambda: {"total": 0, "complete": 0, "failed": 0, "partial": 0, "blocked": 0, "avg_latency_ms": 0, "latencies": []})
|
||||
|
||||
for h in handoffs:
|
||||
agent = h.get("agent", "unknown")
|
||||
status = h.get("status", "unknown")
|
||||
agent_stats[agent]["total"] += 1
|
||||
if status in agent_stats[agent]:
|
||||
agent_stats[agent][status] += 1
|
||||
lat = h.get("latencyMs")
|
||||
if lat:
|
||||
agent_stats[agent]["latencies"].append(lat)
|
||||
|
||||
# Compute averages
|
||||
for agent, stats in agent_stats.items():
|
||||
lats = stats.pop("latencies")
|
||||
if lats:
|
||||
stats["avg_latency_ms"] = int(sum(lats) / len(lats))
|
||||
stats["min_latency_ms"] = min(lats)
|
||||
stats["max_latency_ms"] = max(lats)
|
||||
stats["success_rate"] = f"{stats['complete']/stats['total']*100:.0f}%" if stats["total"] > 0 else "N/A"
|
||||
|
||||
# Workflow stats
|
||||
wf_stats = {"total": len(workflows), "complete": 0, "failed": 0, "partial": 0, "avg_duration_s": 0, "durations": []}
|
||||
for w in workflows:
|
||||
status = w.get("status", "unknown")
|
||||
if status == "complete":
|
||||
wf_stats["complete"] += 1
|
||||
elif status in ("failed", "error"):
|
||||
wf_stats["failed"] += 1
|
||||
else:
|
||||
wf_stats["partial"] += 1
|
||||
dur = w.get("duration_s")
|
||||
if dur:
|
||||
wf_stats["durations"].append(dur)
|
||||
|
||||
durs = wf_stats.pop("durations")
|
||||
if durs:
|
||||
wf_stats["avg_duration_s"] = round(sum(durs) / len(durs), 1)
|
||||
wf_stats["min_duration_s"] = round(min(durs), 1)
|
||||
wf_stats["max_duration_s"] = round(max(durs), 1)
|
||||
wf_stats["success_rate"] = f"{wf_stats['complete']/wf_stats['total']*100:.0f}%" if wf_stats["total"] > 0 else "N/A"
|
||||
|
||||
return {
|
||||
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||
"total_handoffs": len(handoffs),
|
||||
"total_workflows": len(workflows),
|
||||
"agent_stats": dict(agent_stats),
|
||||
"workflow_stats": wf_stats
|
||||
}
|
||||
|
||||
def main():
|
||||
fmt = sys.argv[1] if len(sys.argv) > 1 else "json"
|
||||
metrics = compute_metrics()
|
||||
|
||||
if fmt == "json":
|
||||
print(json.dumps(metrics, indent=2))
|
||||
elif fmt == "text":
|
||||
print("=== Orchestration Metrics ===")
|
||||
print(f"Generated: {metrics['generated_at']}")
|
||||
print(f"Total handoffs: {metrics['total_handoffs']}")
|
||||
print(f"Total workflows: {metrics['total_workflows']}")
|
||||
print()
|
||||
print("--- Per-Agent Stats ---")
|
||||
for agent, stats in sorted(metrics["agent_stats"].items()):
|
||||
print(f" {agent}: {stats['total']} tasks, {stats['success_rate']} success, avg {stats.get('avg_latency_ms', 'N/A')}ms")
|
||||
print()
|
||||
print("--- Workflow Stats ---")
|
||||
ws = metrics["workflow_stats"]
|
||||
print(f" {ws['total']} runs, {ws['success_rate']} success, avg {ws.get('avg_duration_s', 'N/A')}s")
|
||||
else:
|
||||
print(json.dumps(metrics, indent=2))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user