2026-04-14 10:39:27 -04:00
|
|
|
"""OpenClaw state importer — one-way pull from clawdbot into AtoCore.
|
|
|
|
|
|
|
|
|
|
Reads OpenClaw's file continuity layer (SOUL.md, USER.md, MODEL-ROUTING.md,
|
|
|
|
|
MEMORY.md, memory/YYYY-MM-DD.md) from the T420 via SSH and imports them
|
|
|
|
|
into AtoCore as candidate memories. Hash-based delta detection — only
|
|
|
|
|
re-imports files that changed since the last run.
|
|
|
|
|
|
|
|
|
|
Classification per codex's integration proposal:
|
|
|
|
|
- SOUL.md -> identity candidates
|
|
|
|
|
- USER.md -> identity + preference candidates
|
|
|
|
|
- MODEL-ROUTING.md -> adaptation candidates (routing rules)
|
|
|
|
|
- MEMORY.md -> long-term memory candidates (type varies)
|
|
|
|
|
- memory/YYYY-MM-DD.md -> episodic memory candidates (daily logs)
|
|
|
|
|
- heartbeat-state.json -> skipped (ops metadata only)
|
|
|
|
|
|
|
|
|
|
All candidates land as status=candidate. Auto-triage filters noise.
|
|
|
|
|
This importer is conservative: it doesn't promote directly, it just
|
|
|
|
|
feeds signal. The triage pipeline decides what graduates to active.
|
|
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
python3 scripts/import_openclaw_state.py \
|
|
|
|
|
--base-url http://localhost:8100 \
|
|
|
|
|
--openclaw-host papa@192.168.86.39 \
|
|
|
|
|
--openclaw-path /home/papa/openclaw-workspace
|
|
|
|
|
|
|
|
|
|
Runs nightly via cron (added as Step 2c in cron-backup.sh).
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
import hashlib
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import shutil
|
|
|
|
|
import subprocess
|
|
|
|
|
import sys
|
|
|
|
|
import tempfile
|
|
|
|
|
import urllib.error
|
|
|
|
|
import urllib.request
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
DEFAULT_BASE_URL = os.environ.get("ATOCORE_BASE_URL", "http://localhost:8100")
|
|
|
|
|
DEFAULT_OPENCLAW_HOST = os.environ.get("ATOCORE_OPENCLAW_HOST", "papa@192.168.86.39")
|
fix: OpenClaw importer default path = /home/papa/clawd
The .openclaw/workspace-* dirs were empty templates. Antoine's real
OpenClaw workspace is /home/papa/clawd with SOUL.md, USER.md,
MEMORY.md, MODEL-ROUTING.md, IDENTITY.md, PROJECT_STATE.md and
rich continuity subdirs (decisions/, lessons/, knowledge/,
commitments/, preferences/, goals/, projects/, handoffs/, memory/).
First real import: 10 candidates produced from 11 files scanned.
MEMORY.md (36K chars) skipped as duplicate content; needs smarter
section-level splitting in a follow-up.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 10:41:49 -04:00
|
|
|
DEFAULT_OPENCLAW_PATH = os.environ.get("ATOCORE_OPENCLAW_PATH", "/home/papa/clawd")
|
2026-04-14 10:39:27 -04:00
|
|
|
|
|
|
|
|
# Files to pull and how to classify them
|
|
|
|
|
DURABLE_FILES = [
|
|
|
|
|
("SOUL.md", "identity"),
|
|
|
|
|
("USER.md", "identity"),
|
|
|
|
|
("MODEL-ROUTING.md", "adaptation"),
|
|
|
|
|
("MEMORY.md", "memory"), # type parsed from entries
|
|
|
|
|
]
|
|
|
|
|
DAILY_MEMORY_GLOB = "memory/*.md"
|
|
|
|
|
HASH_STATE_KEY = "openclaw_import_hashes"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def api_get(base_url, path):
|
|
|
|
|
try:
|
|
|
|
|
with urllib.request.urlopen(f"{base_url}{path}", timeout=15) as r:
|
|
|
|
|
return json.loads(r.read())
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def api_post(base_url, path, body):
|
|
|
|
|
data = json.dumps(body).encode("utf-8")
|
|
|
|
|
req = urllib.request.Request(
|
|
|
|
|
f"{base_url}{path}", method="POST",
|
|
|
|
|
headers={"Content-Type": "application/json"}, data=data,
|
|
|
|
|
)
|
|
|
|
|
try:
|
|
|
|
|
with urllib.request.urlopen(req, timeout=15) as r:
|
|
|
|
|
return json.loads(r.read())
|
|
|
|
|
except urllib.error.HTTPError as exc:
|
|
|
|
|
if exc.code == 400:
|
|
|
|
|
return {"skipped": True}
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def ssh_cat(host, remote_path):
|
|
|
|
|
"""Cat a remote file via SSH. Returns content or None if missing."""
|
|
|
|
|
try:
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes",
|
|
|
|
|
host, f"cat {remote_path}"],
|
|
|
|
|
capture_output=True, text=True, timeout=30,
|
|
|
|
|
encoding="utf-8", errors="replace",
|
|
|
|
|
)
|
|
|
|
|
if result.returncode == 0:
|
|
|
|
|
return result.stdout
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def ssh_ls(host, remote_glob):
|
|
|
|
|
"""List files matching a glob on the remote host."""
|
|
|
|
|
try:
|
|
|
|
|
result = subprocess.run(
|
|
|
|
|
["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes",
|
|
|
|
|
host, f"ls -1 {remote_glob} 2>/dev/null"],
|
|
|
|
|
capture_output=True, text=True, timeout=10,
|
|
|
|
|
encoding="utf-8", errors="replace",
|
|
|
|
|
)
|
|
|
|
|
if result.returncode == 0:
|
|
|
|
|
return [line.strip() for line in result.stdout.splitlines() if line.strip()]
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def content_hash(text):
|
|
|
|
|
return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_hash_state(base_url):
|
|
|
|
|
"""Load the hash state from project_state so we know what's changed."""
|
|
|
|
|
state = api_get(base_url, "/project/state/atocore?category=status")
|
|
|
|
|
if not state:
|
|
|
|
|
return {}
|
|
|
|
|
for entry in state.get("entries", []):
|
|
|
|
|
if entry.get("key") == HASH_STATE_KEY:
|
|
|
|
|
try:
|
|
|
|
|
return json.loads(entry["value"])
|
|
|
|
|
except Exception:
|
|
|
|
|
return {}
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_hash_state(base_url, hashes):
|
|
|
|
|
api_post(base_url, "/project/state", {
|
|
|
|
|
"project": "atocore",
|
|
|
|
|
"category": "status",
|
|
|
|
|
"key": HASH_STATE_KEY,
|
|
|
|
|
"value": json.dumps(hashes),
|
|
|
|
|
"source": "import_openclaw_state.py",
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def import_file_as_memory(base_url, filename, content, memory_type, source_tag):
|
|
|
|
|
"""Import a file's content as a single candidate memory for triage."""
|
|
|
|
|
# Trim to reasonable size — auto-triage can handle long content but
|
|
|
|
|
# we don't want single mega-memories dominating the queue
|
|
|
|
|
trimmed = content[:2000]
|
|
|
|
|
if len(content) > 2000:
|
|
|
|
|
trimmed += f"\n\n[...truncated from {len(content)} chars]"
|
|
|
|
|
|
|
|
|
|
body = {
|
|
|
|
|
"memory_type": memory_type,
|
|
|
|
|
"content": f"From OpenClaw/{filename}: {trimmed}",
|
|
|
|
|
"project": "", # global/identity, not project-scoped
|
|
|
|
|
"confidence": 0.5,
|
|
|
|
|
"status": "candidate",
|
|
|
|
|
}
|
|
|
|
|
return api_post(base_url, "/memory", body)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
|
parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
|
|
|
|
|
parser.add_argument("--openclaw-host", default=DEFAULT_OPENCLAW_HOST)
|
|
|
|
|
parser.add_argument("--openclaw-path", default=DEFAULT_OPENCLAW_PATH)
|
|
|
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
print(f"openclaw_host={args.openclaw_host} openclaw_path={args.openclaw_path}")
|
|
|
|
|
print(f"dry_run={args.dry_run}")
|
|
|
|
|
|
|
|
|
|
# Check SSH connectivity first
|
|
|
|
|
test = ssh_cat(args.openclaw_host, f"{args.openclaw_path}/SOUL.md")
|
|
|
|
|
if test is None:
|
|
|
|
|
print("ERROR: cannot reach OpenClaw workspace via SSH or SOUL.md not found")
|
|
|
|
|
print("Check: ssh key installed? path correct? workspace exists?")
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
hashes = load_hash_state(args.base_url)
|
|
|
|
|
imported = skipped = errors = 0
|
|
|
|
|
|
|
|
|
|
# 1. Durable files
|
|
|
|
|
for filename, mem_type in DURABLE_FILES:
|
|
|
|
|
remote = f"{args.openclaw_path}/{filename}"
|
|
|
|
|
content = ssh_cat(args.openclaw_host, remote)
|
|
|
|
|
if content is None or not content.strip():
|
|
|
|
|
print(f" - {filename}: not found or empty")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
h = content_hash(content)
|
|
|
|
|
if hashes.get(filename) == h:
|
|
|
|
|
print(f" = {filename}: unchanged (hash {h})")
|
|
|
|
|
skipped += 1
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
print(f" + {filename}: changed (hash {h}, {len(content)}ch)")
|
|
|
|
|
if not args.dry_run:
|
|
|
|
|
try:
|
|
|
|
|
result = import_file_as_memory(
|
|
|
|
|
args.base_url, filename, content, mem_type,
|
|
|
|
|
source_tag="openclaw-durable",
|
|
|
|
|
)
|
|
|
|
|
if result.get("skipped"):
|
|
|
|
|
print(f" (duplicate content, skipped)")
|
|
|
|
|
else:
|
|
|
|
|
print(f" -> candidate {result.get('id', '?')[:8]}")
|
|
|
|
|
imported += 1
|
|
|
|
|
hashes[filename] = h
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f" ! error: {e}")
|
|
|
|
|
errors += 1
|
|
|
|
|
|
|
|
|
|
# 2. Daily memory logs (memory/YYYY-MM-DD.md)
|
|
|
|
|
daily_glob = f"{args.openclaw_path}/{DAILY_MEMORY_GLOB}"
|
|
|
|
|
daily_files = ssh_ls(args.openclaw_host, daily_glob)
|
|
|
|
|
print(f"\ndaily memory files: {len(daily_files)}")
|
|
|
|
|
|
|
|
|
|
# Only process the most recent 7 daily files to avoid flooding
|
|
|
|
|
for remote_path in sorted(daily_files)[-7:]:
|
|
|
|
|
filename = Path(remote_path).name
|
|
|
|
|
content = ssh_cat(args.openclaw_host, remote_path)
|
|
|
|
|
if content is None or not content.strip():
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
h = content_hash(content)
|
|
|
|
|
key = f"daily/{filename}"
|
|
|
|
|
if hashes.get(key) == h:
|
|
|
|
|
print(f" = {filename}: unchanged")
|
|
|
|
|
skipped += 1
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
print(f" + {filename}: changed ({len(content)}ch)")
|
|
|
|
|
if not args.dry_run:
|
|
|
|
|
try:
|
|
|
|
|
result = import_file_as_memory(
|
|
|
|
|
args.base_url, filename, content, "episodic",
|
|
|
|
|
source_tag="openclaw-daily",
|
|
|
|
|
)
|
|
|
|
|
if not result.get("skipped"):
|
|
|
|
|
print(f" -> candidate {result.get('id', '?')[:8]}")
|
|
|
|
|
imported += 1
|
|
|
|
|
hashes[key] = h
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f" ! error: {e}")
|
|
|
|
|
errors += 1
|
|
|
|
|
|
|
|
|
|
# Save hash state
|
|
|
|
|
if not args.dry_run and imported > 0:
|
|
|
|
|
save_hash_state(args.base_url, hashes)
|
|
|
|
|
|
|
|
|
|
print(f"\nimported={imported} skipped={skipped} errors={errors}")
|
|
|
|
|
print("Candidates queued — auto-triage will filter them on next run.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
raise SystemExit(main() or 0)
|