#!/usr/bin/env python3 """YAML workflow engine for Atomizer orchestration.""" from __future__ import annotations import argparse import json import os import re import subprocess import sys import threading import time import uuid from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone from pathlib import Path from typing import Any try: import yaml except ImportError: print(json.dumps({"status": "error", "error": "PyYAML is required (pip install pyyaml)"}, indent=2)) sys.exit(1) WORKFLOWS_DIR = Path("/home/papa/atomizer/workspaces/shared/workflows") ORCHESTRATE_PY = Path("/home/papa/atomizer/workspaces/shared/skills/orchestrate/orchestrate.py") HANDOFF_WORKFLOWS_DIR = Path("/home/papa/atomizer/handoffs/workflows") def now_iso() -> str: return datetime.now(timezone.utc).isoformat() def parse_inputs(items: list[str]) -> dict[str, Any]: parsed: dict[str, Any] = {} for item in items: if "=" not in item: raise ValueError(f"Invalid --input '{item}', expected key=value") k, v = item.split("=", 1) parsed[k.strip()] = v.strip() return parsed def resolve_workflow_path(name_or_path: str) -> Path: p = Path(name_or_path) if p.exists(): return p candidates = [WORKFLOWS_DIR / name_or_path, WORKFLOWS_DIR / f"{name_or_path}.yaml", WORKFLOWS_DIR / f"{name_or_path}.yml"] for c in candidates: if c.exists(): return c raise FileNotFoundError(f"Workflow not found: {name_or_path}") def load_workflow(path: Path) -> dict[str, Any]: data = yaml.safe_load(path.read_text()) if not isinstance(data, dict): raise ValueError("Workflow YAML must be an object") if not isinstance(data.get("steps"), list) or not data["steps"]: raise ValueError("Workflow must define non-empty 'steps'") return data def validate_graph(steps: list[dict[str, Any]]) -> tuple[dict[str, dict[str, Any]], dict[str, set[str]], dict[str, set[str]], list[list[str]]]: step_map: dict[str, dict[str, Any]] = {} deps: dict[str, set[str]] = {} reverse: dict[str, set[str]] = {} for step in steps: sid = step.get("id") if not sid or not isinstance(sid, str): raise ValueError("Each step needs string 'id'") if sid in step_map: raise ValueError(f"Duplicate step id: {sid}") step_map[sid] = step deps[sid] = set(step.get("depends_on", []) or []) reverse[sid] = set() for sid, dset in deps.items(): for dep in dset: if dep not in step_map: raise ValueError(f"Step '{sid}' depends on unknown step '{dep}'") reverse[dep].add(sid) # topological layering + cycle check indeg = {sid: len(dset) for sid, dset in deps.items()} ready = sorted([sid for sid, d in indeg.items() if d == 0]) visited = 0 layers: list[list[str]] = [] while ready: layer = list(ready) layers.append(layer) visited += len(layer) next_ready: list[str] = [] for sid in layer: for child in sorted(reverse[sid]): indeg[child] -= 1 if indeg[child] == 0: next_ready.append(child) ready = sorted(next_ready) if visited != len(step_map): cycle_nodes = [sid for sid, d in indeg.items() if d > 0] raise ValueError(f"Dependency cycle detected involving: {', '.join(cycle_nodes)}") return step_map, deps, reverse, layers _VAR_RE = re.compile(r"\{([^{}]+)\}") def substitute(text: str, step_outputs: dict[str, Any], inputs: dict[str, Any]) -> str: def repl(match: re.Match[str]) -> str: key = match.group(1).strip() if key.startswith("inputs."): iv = key.split(".", 1)[1] if iv not in inputs: return match.group(0) return str(inputs[iv]) if key in step_outputs: val = step_outputs[key] if isinstance(val, (dict, list)): return json.dumps(val, ensure_ascii=False) return str(val) return match.group(0) return _VAR_RE.sub(repl, text) def approval_check(step: dict[str, Any], non_interactive: bool) -> bool: gate = step.get("approval_gate") if not gate: return True if non_interactive: print(f"WARNING: non-interactive mode, skipping approval gate '{gate}' for step '{step['id']}'", file=sys.stderr) return True print(f"Approval gate required for step '{step['id']}' ({gate}). Approve? [yes/no]: ", end="", flush=True) response = sys.stdin.readline().strip().lower() return response in {"y", "yes"} def run_orchestrate(agent: str, task: str, timeout_s: int, caller: str, workflow_run_id: str, step_id: str, retries: int) -> dict[str, Any]: cmd = [ "python3", str(ORCHESTRATE_PY), agent, task, "--timeout", str(timeout_s), "--caller", caller, "--workflow-id", workflow_run_id, "--step-id", step_id, "--retries", str(max(1, retries)), "--format", "json", ] proc = subprocess.run(cmd, capture_output=True, text=True) out = (proc.stdout or "").strip() if not out: return { "status": "failed", "result": None, "notes": f"No stdout from orchestrate.py; stderr: {(proc.stderr or '').strip()[:1000]}", "exitCode": proc.returncode, } try: data = json.loads(out) except json.JSONDecodeError: return { "status": "failed", "result": out, "notes": f"Non-JSON response from orchestrate.py; stderr: {(proc.stderr or '').strip()[:1000]}", "exitCode": proc.returncode, } data["exitCode"] = proc.returncode if proc.stderr: data["stderr"] = proc.stderr.strip()[:2000] return data def validation_passed(validation_result: dict[str, Any]) -> bool: if validation_result.get("status") not in {"complete", "partial"}: return False body = str(validation_result.get("result", "")).strip() # If validator returned JSON in result, try to parse decision. try: obj = json.loads(body) decision = str(obj.get("decision", "")).lower() if decision in {"accept", "approved", "pass", "passed"}: return True if decision in {"reject", "fail", "failed"}: return False except Exception: pass lowered = body.lower() if "reject" in lowered or "fail" in lowered: return False return True def execute_step( step: dict[str, Any], inputs: dict[str, Any], step_outputs: dict[str, Any], caller: str, workflow_run_id: str, remaining_timeout: int, non_interactive: bool, out_dir: Path, lock: threading.Lock, ) -> dict[str, Any]: sid = step["id"] start = time.time() if not approval_check(step, non_interactive): result = { "step_id": sid, "status": "failed", "error": "approval_denied", "started_at": now_iso(), "finished_at": now_iso(), "duration_s": 0, } (out_dir / f"{sid}.json").write_text(json.dumps(result, indent=2)) return result task = substitute(str(step.get("task", "")), step_outputs, inputs) agent = step.get("agent") if not agent: result = { "step_id": sid, "status": "failed", "error": "missing_agent", "started_at": now_iso(), "finished_at": now_iso(), "duration_s": 0, } (out_dir / f"{sid}.json").write_text(json.dumps(result, indent=2)) return result step_timeout = int(step.get("timeout", 300)) timeout_s = max(1, min(step_timeout, remaining_timeout)) retries = int(step.get("retries", 1)) run_res = run_orchestrate(agent, task, timeout_s, caller, workflow_run_id, sid, retries) step_result: dict[str, Any] = { "step_id": sid, "agent": agent, "status": run_res.get("status", "failed"), "result": run_res.get("result"), "notes": run_res.get("notes"), "run": run_res, "started_at": datetime.fromtimestamp(start, tz=timezone.utc).isoformat(), "finished_at": now_iso(), "duration_s": round(time.time() - start, 3), } validation_cfg = step.get("validation") if validation_cfg and step_result["status"] in {"complete", "partial"}: v_agent = validation_cfg.get("agent") criteria = validation_cfg.get("criteria", "Validate this output for quality and correctness.") if v_agent: v_task = ( "Validate the following workflow step output. Return a decision in JSON like " "{\"decision\":\"accept|reject\",\"reason\":\"...\"}.\n\n" f"Step ID: {sid}\n" f"Criteria: {criteria}\n\n" f"Output to validate:\n{step_result.get('result')}" ) v_timeout = int(validation_cfg.get("timeout", min(180, timeout_s))) validation_res = run_orchestrate(v_agent, v_task, max(1, v_timeout), caller, workflow_run_id, f"{sid}__validation", 1) step_result["validation"] = validation_res if not validation_passed(validation_res): step_result["status"] = "failed" step_result["error"] = "validation_failed" step_result["notes"] = f"Validation failed by {v_agent}: {validation_res.get('result') or validation_res.get('notes')}" with lock: (out_dir / f"{sid}.json").write_text(json.dumps(step_result, indent=2)) return step_result def main() -> None: parser = argparse.ArgumentParser(description="Run YAML workflows using orchestrate.py") parser.add_argument("workflow") parser.add_argument("--input", action="append", default=[], help="key=value (repeatable)") parser.add_argument("--caller", default="manager") parser.add_argument("--dry-run", action="store_true") parser.add_argument("--non-interactive", action="store_true") parser.add_argument("--timeout", type=int, default=1800, help="Overall workflow timeout seconds") args = parser.parse_args() wf_path = resolve_workflow_path(args.workflow) wf = load_workflow(wf_path) inputs = parse_inputs(args.input) steps = wf["steps"] step_map, deps, reverse, layers = validate_graph(steps) workflow_run_id = f"wf-{int(time.time())}-{uuid.uuid4().hex[:8]}" out_dir = HANDOFF_WORKFLOWS_DIR / workflow_run_id out_dir.mkdir(parents=True, exist_ok=True) if args.dry_run: plan = { "status": "dry_run", "workflow": wf.get("name", wf_path.name), "workflow_file": str(wf_path), "workflow_run_id": workflow_run_id, "inputs": inputs, "steps": [ { "id": s["id"], "agent": s.get("agent"), "depends_on": s.get("depends_on", []), "timeout": s.get("timeout", 300), "retries": s.get("retries", 1), "approval_gate": s.get("approval_gate"), "has_validation": bool(s.get("validation")), } for s in steps ], "execution_layers": layers, "result_dir": str(out_dir), } print(json.dumps(plan, indent=2)) return started = time.time() deadline = started + args.timeout lock = threading.Lock() state: dict[str, str] = {sid: "pending" for sid in step_map} step_results: dict[str, dict[str, Any]] = {} step_outputs: dict[str, Any] = {} overall_status = "complete" max_workers = max(1, min(len(step_map), (os.cpu_count() or 4))) while True: if time.time() >= deadline: overall_status = "timeout" break pending = [sid for sid, st in state.items() if st == "pending"] if not pending: break ready = [] for sid in pending: if all(state[d] in {"complete", "skipped"} for d in deps[sid]): ready.append(sid) if not ready: # deadlock due to upstream abort/fail on pending deps if any(st == "aborted" for st in state.values()): break overall_status = "failed" break futures = {} with ThreadPoolExecutor(max_workers=max_workers) as pool: for sid in ready: state[sid] = "running" remaining_timeout = int(max(1, deadline - time.time())) futures[pool.submit( execute_step, step_map[sid], inputs, step_outputs, args.caller, workflow_run_id, remaining_timeout, args.non_interactive, out_dir, lock, )] = sid for fut in as_completed(futures): sid = futures[fut] res = fut.result() step_results[sid] = res st = res.get("status", "failed") if st in {"complete", "partial"}: state[sid] = "complete" step_outputs[sid] = res.get("result") out_name = step_map[sid].get("output") if out_name: step_outputs[str(out_name)] = res.get("result") else: on_fail = str(step_map[sid].get("on_fail", "abort")).lower() if on_fail == "skip": state[sid] = "skipped" overall_status = "partial" else: state[sid] = "failed" overall_status = "failed" # abort all pending steps for psid in list(state): if state[psid] == "pending": state[psid] = "aborted" finished = time.time() if overall_status == "complete" and any(st == "skipped" for st in state.values()): overall_status = "partial" summary = { "status": overall_status, "workflow": wf.get("name", wf_path.name), "workflow_file": str(wf_path), "workflow_run_id": workflow_run_id, "caller": args.caller, "started_at": datetime.fromtimestamp(started, tz=timezone.utc).isoformat(), "finished_at": datetime.fromtimestamp(finished, tz=timezone.utc).isoformat(), "duration_s": round(finished - started, 3), "timeout_s": args.timeout, "inputs": inputs, "state": state, "results": step_results, "result_dir": str(out_dir), "notifications": wf.get("notifications", {}), } (out_dir / "summary.json").write_text(json.dumps(summary, indent=2)) print(json.dumps(summary, indent=2)) if overall_status in {"complete", "partial"}: sys.exit(0) sys.exit(1) if __name__ == "__main__": main()