Files
ATOCore/scripts/atocore_proxy.py
Anto01 86637f8eee feat: universal LLM consumption (Phase 1 complete)
Completes the Phase 1 master brain keystone: every LLM interaction
across the ecosystem now pulls context from AtoCore automatically.

Three adapters, one HTTP backend:

1. OpenClaw plugin pull (handler.js):
   - Added before_prompt_build hook that calls /context/build and
     injects the pack via prependContext
   - Existing capture hooks (before_agent_start + llm_output)
     unchanged
   - 6s context timeout, fail-open on AtoCore unreachable
   - Deployed to T420, gateway restarted, "7 plugins loaded"

2. atocore-proxy (scripts/atocore_proxy.py):
   - Stdlib-only OpenAI-compatible HTTP middleware
   - Drop-in layer for Codex, Ollama, LiteLLM, any OpenAI-compat client
   - Intercepts /chat/completions: extracts query, pulls context,
     injects as system message, forwards to upstream, captures back
   - Fail-open: AtoCore down = passthrough without injection
   - Configurable via env: UPSTREAM, PORT, CLIENT_LABEL, INJECT, CAPTURE

3. (from prior commit c49363f) atocore-mcp:
   - stdio MCP server, stdlib Python, 7 tools exposed
   - Registered in Claude Code: "✓ Connected"

Plus quick win:
- Project synthesis moved from Sunday-only to daily cron so wiki /
  mirror pages stay fresh (Step C in batch-extract.sh). Lint stays
  weekly.

Plus docs:
- docs/universal-consumption.md: configuration guide for all 3 adapters
  with registration/env-var tables and verification checklist

Plus housekeeping:
- .gitignore: add .mypy_cache/

Tests: 303/303 passing.

This closes the consumption gap: the reinforcement feedback loop
can now actually work (memories get injected → get referenced →
reinforcement fires → auto-promotion). Every Claude, OpenClaw,
Codex, or Ollama session is automatically AtoCore-grounded.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 20:14:25 -04:00

322 lines
12 KiB
Python

#!/usr/bin/env python3
"""AtoCore Proxy — OpenAI-compatible HTTP middleware.
Acts as a drop-in layer for any client that speaks the OpenAI Chat
Completions API (Codex, Ollama, LiteLLM, custom agents). Sits between
the client and the real model provider:
client -> atocore_proxy -> real_provider (OpenAI, Ollama, Anthropic, ...)
For each chat completion request:
1. Extract the user's last message as the "query"
2. Call AtoCore /context/build to get a context pack
3. Inject the pack as a system message (or prepend to existing system)
4. Forward the enriched request to the real provider
5. Capture the full interaction back to AtoCore /interactions
Fail-open: if AtoCore is unreachable, the request passes through
unchanged. If the real provider fails, the error is propagated to the
client as-is.
Configuration (env vars):
ATOCORE_URL AtoCore base URL (default http://dalidou:8100)
ATOCORE_UPSTREAM real provider base URL (e.g. http://localhost:11434/v1 for Ollama)
ATOCORE_PROXY_PORT port to listen on (default 11435)
ATOCORE_PROXY_HOST bind address (default 127.0.0.1)
ATOCORE_CLIENT_LABEL client id recorded in captures (default "proxy")
ATOCORE_CAPTURE "1" to capture interactions back (default "1")
ATOCORE_INJECT "1" to inject context (default "1")
Usage:
# Proxy for Ollama:
ATOCORE_UPSTREAM=http://localhost:11434/v1 python atocore_proxy.py
# Then point your client at http://localhost:11435/v1 instead of the
# real provider.
Stdlib only — deliberate to keep the dependency footprint at zero.
"""
from __future__ import annotations
import http.server
import json
import os
import socketserver
import sys
import threading
import urllib.error
import urllib.parse
import urllib.request
from typing import Any
ATOCORE_URL = os.environ.get("ATOCORE_URL", "http://dalidou:8100").rstrip("/")
UPSTREAM_URL = os.environ.get("ATOCORE_UPSTREAM", "").rstrip("/")
PROXY_PORT = int(os.environ.get("ATOCORE_PROXY_PORT", "11435"))
PROXY_HOST = os.environ.get("ATOCORE_PROXY_HOST", "127.0.0.1")
CLIENT_LABEL = os.environ.get("ATOCORE_CLIENT_LABEL", "proxy")
CAPTURE_ENABLED = os.environ.get("ATOCORE_CAPTURE", "1") == "1"
INJECT_ENABLED = os.environ.get("ATOCORE_INJECT", "1") == "1"
ATOCORE_TIMEOUT = float(os.environ.get("ATOCORE_TIMEOUT", "6"))
UPSTREAM_TIMEOUT = float(os.environ.get("ATOCORE_UPSTREAM_TIMEOUT", "300"))
PROJECT_HINTS = [
("p04-gigabit", ["p04", "gigabit"]),
("p05-interferometer", ["p05", "interferometer"]),
("p06-polisher", ["p06", "polisher", "fullum"]),
("abb-space", ["abb"]),
("atomizer-v2", ["atomizer"]),
("atocore", ["atocore", "dalidou"]),
]
def log(msg: str) -> None:
print(f"[atocore-proxy] {msg}", file=sys.stderr, flush=True)
def detect_project(text: str) -> str:
lower = (text or "").lower()
for proj, tokens in PROJECT_HINTS:
if any(t in lower for t in tokens):
return proj
return ""
def get_last_user_message(body: dict) -> str:
messages = body.get("messages", []) or []
for m in reversed(messages):
if m.get("role") == "user":
content = m.get("content", "")
if isinstance(content, list):
# OpenAI multi-part content: extract text parts
parts = [p.get("text", "") for p in content if p.get("type") == "text"]
return "\n".join(parts)
return str(content)
return ""
def get_assistant_text(response: dict) -> str:
"""Extract assistant text from an OpenAI-style completion response."""
choices = response.get("choices", []) or []
if not choices:
return ""
msg = choices[0].get("message", {}) or {}
content = msg.get("content", "")
if isinstance(content, list):
parts = [p.get("text", "") for p in content if p.get("type") == "text"]
return "\n".join(parts)
return str(content)
def fetch_context(query: str, project: str) -> str:
"""Pull a context pack from AtoCore. Returns '' on any failure."""
if not INJECT_ENABLED or not query:
return ""
try:
data = json.dumps({"prompt": query, "project": project}).encode("utf-8")
req = urllib.request.Request(
ATOCORE_URL + "/context/build",
data=data,
method="POST",
headers={"Content-Type": "application/json"},
)
with urllib.request.urlopen(req, timeout=ATOCORE_TIMEOUT) as resp:
result = json.loads(resp.read().decode("utf-8"))
return result.get("formatted_context", "") or ""
except Exception as e:
log(f"context fetch failed: {type(e).__name__}: {e}")
return ""
def capture_interaction(prompt: str, response: str, project: str) -> None:
"""POST the completed turn back to AtoCore. Fire-and-forget."""
if not CAPTURE_ENABLED or not prompt or not response:
return
def _post():
try:
data = json.dumps({
"prompt": prompt,
"response": response,
"client": CLIENT_LABEL,
"project": project,
"reinforce": True,
}).encode("utf-8")
req = urllib.request.Request(
ATOCORE_URL + "/interactions",
data=data,
method="POST",
headers={"Content-Type": "application/json"},
)
urllib.request.urlopen(req, timeout=ATOCORE_TIMEOUT)
except Exception as e:
log(f"capture failed: {type(e).__name__}: {e}")
threading.Thread(target=_post, daemon=True).start()
def inject_context(body: dict, context_pack: str) -> dict:
"""Prepend the AtoCore context as a system message, or augment existing."""
if not context_pack.strip():
return body
header = "--- AtoCore Context (auto-injected) ---\n"
footer = "\n--- End AtoCore Context ---\n"
injection = header + context_pack + footer
messages = list(body.get("messages", []) or [])
if messages and messages[0].get("role") == "system":
# Augment existing system message
existing = messages[0].get("content", "") or ""
if isinstance(existing, list):
# multi-part: prepend a text part
messages[0]["content"] = [{"type": "text", "text": injection}] + existing
else:
messages[0]["content"] = injection + "\n" + str(existing)
else:
messages.insert(0, {"role": "system", "content": injection})
body["messages"] = messages
return body
def forward_to_upstream(body: dict, headers: dict[str, str], path: str) -> tuple[int, dict]:
"""Forward the enriched body to the upstream provider. Returns (status, response_dict)."""
if not UPSTREAM_URL:
return 503, {"error": {"message": "ATOCORE_UPSTREAM not configured"}}
url = UPSTREAM_URL + path
data = json.dumps(body).encode("utf-8")
# Strip hop-by-hop / host-specific headers
fwd_headers = {"Content-Type": "application/json"}
for k, v in headers.items():
lk = k.lower()
if lk in ("authorization", "x-api-key", "anthropic-version"):
fwd_headers[k] = v
req = urllib.request.Request(url, data=data, method="POST", headers=fwd_headers)
try:
with urllib.request.urlopen(req, timeout=UPSTREAM_TIMEOUT) as resp:
return resp.status, json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as e:
try:
body_bytes = e.read()
payload = json.loads(body_bytes.decode("utf-8"))
except Exception:
payload = {"error": {"message": f"upstream HTTP {e.code}"}}
return e.code, payload
except Exception as e:
log(f"upstream error: {e}")
return 502, {"error": {"message": f"upstream unreachable: {e}"}}
class ProxyHandler(http.server.BaseHTTPRequestHandler):
# Silence default request logging (we log what matters ourselves)
def log_message(self, format: str, *args: Any) -> None:
pass
def _read_body(self) -> dict:
length = int(self.headers.get("Content-Length", "0") or "0")
if length <= 0:
return {}
raw = self.rfile.read(length)
try:
return json.loads(raw.decode("utf-8"))
except Exception:
return {}
def _send_json(self, status: int, payload: dict) -> None:
body = json.dumps(payload).encode("utf-8")
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
self.wfile.write(body)
def do_OPTIONS(self) -> None: # CORS preflight
self.send_response(204)
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-API-Key")
self.end_headers()
def do_GET(self) -> None:
parsed = urllib.parse.urlparse(self.path)
if parsed.path == "/healthz":
self._send_json(200, {
"status": "ok",
"atocore": ATOCORE_URL,
"upstream": UPSTREAM_URL or "(not configured)",
"inject": INJECT_ENABLED,
"capture": CAPTURE_ENABLED,
})
return
# Pass through GET to upstream (model listing etc)
if not UPSTREAM_URL:
self._send_json(503, {"error": {"message": "ATOCORE_UPSTREAM not configured"}})
return
try:
req = urllib.request.Request(UPSTREAM_URL + parsed.path + (f"?{parsed.query}" if parsed.query else ""))
for k in ("Authorization", "X-API-Key"):
v = self.headers.get(k)
if v:
req.add_header(k, v)
with urllib.request.urlopen(req, timeout=UPSTREAM_TIMEOUT) as resp:
data = resp.read()
self.send_response(resp.status)
self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
self.send_header("Content-Length", str(len(data)))
self.end_headers()
self.wfile.write(data)
except Exception as e:
self._send_json(502, {"error": {"message": f"upstream error: {e}"}})
def do_POST(self) -> None:
parsed = urllib.parse.urlparse(self.path)
body = self._read_body()
# Only enrich chat completions; other endpoints pass through
if parsed.path.endswith("/chat/completions") or parsed.path == "/v1/chat/completions":
prompt = get_last_user_message(body)
project = detect_project(prompt)
context = fetch_context(prompt, project) if prompt else ""
if context:
log(f"inject: project={project or '(none)'} chars={len(context)}")
body = inject_context(body, context)
status, response = forward_to_upstream(body, dict(self.headers), parsed.path)
self._send_json(status, response)
if status == 200:
assistant_text = get_assistant_text(response)
capture_interaction(prompt, assistant_text, project)
else:
# Non-chat endpoints (embeddings, completions, etc.) — pure passthrough
status, response = forward_to_upstream(body, dict(self.headers), parsed.path)
self._send_json(status, response)
class ThreadedServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
daemon_threads = True
allow_reuse_address = True
def main() -> int:
if not UPSTREAM_URL:
log("WARNING: ATOCORE_UPSTREAM not set. Chat completions will fail.")
log("Example: ATOCORE_UPSTREAM=http://localhost:11434/v1 for Ollama")
server = ThreadedServer((PROXY_HOST, PROXY_PORT), ProxyHandler)
log(f"listening on {PROXY_HOST}:{PROXY_PORT}")
log(f"AtoCore: {ATOCORE_URL} inject={INJECT_ENABLED} capture={CAPTURE_ENABLED}")
log(f"Upstream: {UPSTREAM_URL or '(not configured)'}")
log(f"Client label: {CLIENT_LABEL}")
log("Ready. Point your OpenAI-compatible client at /v1/chat/completions")
try:
server.serve_forever()
except KeyboardInterrupt:
log("stopping")
server.server_close()
return 0
if __name__ == "__main__":
sys.exit(main())