- 8-agent OpenClaw cluster (Manager, Tech-Lead, Secretary, Auditor, Optimizer, Study-Builder, NX-Expert, Webster) - Orchestration engine: orchestrate.py (sync delegation + handoffs) - Workflow engine: YAML-defined multi-step pipelines - Agent workspaces: SOUL.md, AGENTS.md, MEMORY.md per agent - Shared skills: delegate, orchestrate, atomizer-protocols - Capability registry (AGENTS_REGISTRY.json) - Cluster management: cluster.sh, systemd template - All secrets replaced with env var references
118 lines
4.1 KiB
Python
Executable File
118 lines
4.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Orchestration metrics — analyze handoff files and workflow runs."""
|
|
|
|
import json, os, sys, glob
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
HANDOFFS_DIR = Path("/home/papa/atomizer/handoffs")
|
|
WORKFLOWS_DIR = HANDOFFS_DIR / "workflows"
|
|
|
|
def load_handoffs():
|
|
"""Load all individual handoff JSON files."""
|
|
results = []
|
|
for f in HANDOFFS_DIR.glob("orch-*.json"):
|
|
try:
|
|
with open(f) as fh:
|
|
data = json.load(fh)
|
|
data["_file"] = f.name
|
|
results.append(data)
|
|
except Exception:
|
|
pass
|
|
return results
|
|
|
|
def load_workflow_summaries():
|
|
"""Load all workflow summary.json files."""
|
|
results = []
|
|
for d in WORKFLOWS_DIR.iterdir():
|
|
summary = d / "summary.json"
|
|
if summary.exists():
|
|
try:
|
|
with open(summary) as fh:
|
|
data = json.load(fh)
|
|
results.append(data)
|
|
except Exception:
|
|
pass
|
|
return results
|
|
|
|
def compute_metrics():
|
|
handoffs = load_handoffs()
|
|
workflows = load_workflow_summaries()
|
|
|
|
# Per-agent stats
|
|
agent_stats = defaultdict(lambda: {"total": 0, "complete": 0, "failed": 0, "partial": 0, "blocked": 0, "avg_latency_ms": 0, "latencies": []})
|
|
|
|
for h in handoffs:
|
|
agent = h.get("agent", "unknown")
|
|
status = h.get("status", "unknown")
|
|
agent_stats[agent]["total"] += 1
|
|
if status in agent_stats[agent]:
|
|
agent_stats[agent][status] += 1
|
|
lat = h.get("latencyMs")
|
|
if lat:
|
|
agent_stats[agent]["latencies"].append(lat)
|
|
|
|
# Compute averages
|
|
for agent, stats in agent_stats.items():
|
|
lats = stats.pop("latencies")
|
|
if lats:
|
|
stats["avg_latency_ms"] = int(sum(lats) / len(lats))
|
|
stats["min_latency_ms"] = min(lats)
|
|
stats["max_latency_ms"] = max(lats)
|
|
stats["success_rate"] = f"{stats['complete']/stats['total']*100:.0f}%" if stats["total"] > 0 else "N/A"
|
|
|
|
# Workflow stats
|
|
wf_stats = {"total": len(workflows), "complete": 0, "failed": 0, "partial": 0, "avg_duration_s": 0, "durations": []}
|
|
for w in workflows:
|
|
status = w.get("status", "unknown")
|
|
if status == "complete":
|
|
wf_stats["complete"] += 1
|
|
elif status in ("failed", "error"):
|
|
wf_stats["failed"] += 1
|
|
else:
|
|
wf_stats["partial"] += 1
|
|
dur = w.get("duration_s")
|
|
if dur:
|
|
wf_stats["durations"].append(dur)
|
|
|
|
durs = wf_stats.pop("durations")
|
|
if durs:
|
|
wf_stats["avg_duration_s"] = round(sum(durs) / len(durs), 1)
|
|
wf_stats["min_duration_s"] = round(min(durs), 1)
|
|
wf_stats["max_duration_s"] = round(max(durs), 1)
|
|
wf_stats["success_rate"] = f"{wf_stats['complete']/wf_stats['total']*100:.0f}%" if wf_stats["total"] > 0 else "N/A"
|
|
|
|
return {
|
|
"generated_at": datetime.utcnow().isoformat() + "Z",
|
|
"total_handoffs": len(handoffs),
|
|
"total_workflows": len(workflows),
|
|
"agent_stats": dict(agent_stats),
|
|
"workflow_stats": wf_stats
|
|
}
|
|
|
|
def main():
|
|
fmt = sys.argv[1] if len(sys.argv) > 1 else "json"
|
|
metrics = compute_metrics()
|
|
|
|
if fmt == "json":
|
|
print(json.dumps(metrics, indent=2))
|
|
elif fmt == "text":
|
|
print("=== Orchestration Metrics ===")
|
|
print(f"Generated: {metrics['generated_at']}")
|
|
print(f"Total handoffs: {metrics['total_handoffs']}")
|
|
print(f"Total workflows: {metrics['total_workflows']}")
|
|
print()
|
|
print("--- Per-Agent Stats ---")
|
|
for agent, stats in sorted(metrics["agent_stats"].items()):
|
|
print(f" {agent}: {stats['total']} tasks, {stats['success_rate']} success, avg {stats.get('avg_latency_ms', 'N/A')}ms")
|
|
print()
|
|
print("--- Workflow Stats ---")
|
|
ws = metrics["workflow_stats"]
|
|
print(f" {ws['total']} runs, {ws['success_rate']} success, avg {ws.get('avg_duration_s', 'N/A')}s")
|
|
else:
|
|
print(json.dumps(metrics, indent=2))
|
|
|
|
if __name__ == "__main__":
|
|
main()
|