#!/usr/bin/env python3 """AtoCore Proxy — OpenAI-compatible HTTP middleware. Acts as a drop-in layer for any client that speaks the OpenAI Chat Completions API (Codex, Ollama, LiteLLM, custom agents). Sits between the client and the real model provider: client -> atocore_proxy -> real_provider (OpenAI, Ollama, Anthropic, ...) For each chat completion request: 1. Extract the user's last message as the "query" 2. Call AtoCore /context/build to get a context pack 3. Inject the pack as a system message (or prepend to existing system) 4. Forward the enriched request to the real provider 5. Capture the full interaction back to AtoCore /interactions Fail-open: if AtoCore is unreachable, the request passes through unchanged. If the real provider fails, the error is propagated to the client as-is. Configuration (env vars): ATOCORE_URL AtoCore base URL (default http://dalidou:8100) ATOCORE_UPSTREAM real provider base URL (e.g. http://localhost:11434/v1 for Ollama) ATOCORE_PROXY_PORT port to listen on (default 11435) ATOCORE_PROXY_HOST bind address (default 127.0.0.1) ATOCORE_CLIENT_LABEL client id recorded in captures (default "proxy") ATOCORE_CAPTURE "1" to capture interactions back (default "1") ATOCORE_INJECT "1" to inject context (default "1") Usage: # Proxy for Ollama: ATOCORE_UPSTREAM=http://localhost:11434/v1 python atocore_proxy.py # Then point your client at http://localhost:11435/v1 instead of the # real provider. Stdlib only — deliberate to keep the dependency footprint at zero. """ from __future__ import annotations import http.server import json import os import socketserver import sys import threading import urllib.error import urllib.parse import urllib.request from typing import Any ATOCORE_URL = os.environ.get("ATOCORE_URL", "http://dalidou:8100").rstrip("/") UPSTREAM_URL = os.environ.get("ATOCORE_UPSTREAM", "").rstrip("/") PROXY_PORT = int(os.environ.get("ATOCORE_PROXY_PORT", "11435")) PROXY_HOST = os.environ.get("ATOCORE_PROXY_HOST", "127.0.0.1") CLIENT_LABEL = os.environ.get("ATOCORE_CLIENT_LABEL", "proxy") CAPTURE_ENABLED = os.environ.get("ATOCORE_CAPTURE", "1") == "1" INJECT_ENABLED = os.environ.get("ATOCORE_INJECT", "1") == "1" ATOCORE_TIMEOUT = float(os.environ.get("ATOCORE_TIMEOUT", "6")) UPSTREAM_TIMEOUT = float(os.environ.get("ATOCORE_UPSTREAM_TIMEOUT", "300")) PROJECT_HINTS = [ ("p04-gigabit", ["p04", "gigabit"]), ("p05-interferometer", ["p05", "interferometer"]), ("p06-polisher", ["p06", "polisher", "fullum"]), ("abb-space", ["abb"]), ("atomizer-v2", ["atomizer"]), ("atocore", ["atocore", "dalidou"]), ] def log(msg: str) -> None: print(f"[atocore-proxy] {msg}", file=sys.stderr, flush=True) def detect_project(text: str) -> str: lower = (text or "").lower() for proj, tokens in PROJECT_HINTS: if any(t in lower for t in tokens): return proj return "" def get_last_user_message(body: dict) -> str: messages = body.get("messages", []) or [] for m in reversed(messages): if m.get("role") == "user": content = m.get("content", "") if isinstance(content, list): # OpenAI multi-part content: extract text parts parts = [p.get("text", "") for p in content if p.get("type") == "text"] return "\n".join(parts) return str(content) return "" def get_assistant_text(response: dict) -> str: """Extract assistant text from an OpenAI-style completion response.""" choices = response.get("choices", []) or [] if not choices: return "" msg = choices[0].get("message", {}) or {} content = msg.get("content", "") if isinstance(content, list): parts = [p.get("text", "") for p in content if p.get("type") == "text"] return "\n".join(parts) return str(content) def fetch_context(query: str, project: str) -> str: """Pull a context pack from AtoCore. Returns '' on any failure.""" if not INJECT_ENABLED or not query: return "" try: data = json.dumps({"prompt": query, "project": project}).encode("utf-8") req = urllib.request.Request( ATOCORE_URL + "/context/build", data=data, method="POST", headers={"Content-Type": "application/json"}, ) with urllib.request.urlopen(req, timeout=ATOCORE_TIMEOUT) as resp: result = json.loads(resp.read().decode("utf-8")) return result.get("formatted_context", "") or "" except Exception as e: log(f"context fetch failed: {type(e).__name__}: {e}") return "" def capture_interaction(prompt: str, response: str, project: str) -> None: """POST the completed turn back to AtoCore. Fire-and-forget.""" if not CAPTURE_ENABLED or not prompt or not response: return def _post(): try: data = json.dumps({ "prompt": prompt, "response": response, "client": CLIENT_LABEL, "project": project, "reinforce": True, }).encode("utf-8") req = urllib.request.Request( ATOCORE_URL + "/interactions", data=data, method="POST", headers={"Content-Type": "application/json"}, ) urllib.request.urlopen(req, timeout=ATOCORE_TIMEOUT) except Exception as e: log(f"capture failed: {type(e).__name__}: {e}") threading.Thread(target=_post, daemon=True).start() def inject_context(body: dict, context_pack: str) -> dict: """Prepend the AtoCore context as a system message, or augment existing.""" if not context_pack.strip(): return body header = "--- AtoCore Context (auto-injected) ---\n" footer = "\n--- End AtoCore Context ---\n" injection = header + context_pack + footer messages = list(body.get("messages", []) or []) if messages and messages[0].get("role") == "system": # Augment existing system message existing = messages[0].get("content", "") or "" if isinstance(existing, list): # multi-part: prepend a text part messages[0]["content"] = [{"type": "text", "text": injection}] + existing else: messages[0]["content"] = injection + "\n" + str(existing) else: messages.insert(0, {"role": "system", "content": injection}) body["messages"] = messages return body def forward_to_upstream(body: dict, headers: dict[str, str], path: str) -> tuple[int, dict]: """Forward the enriched body to the upstream provider. Returns (status, response_dict).""" if not UPSTREAM_URL: return 503, {"error": {"message": "ATOCORE_UPSTREAM not configured"}} url = UPSTREAM_URL + path data = json.dumps(body).encode("utf-8") # Strip hop-by-hop / host-specific headers fwd_headers = {"Content-Type": "application/json"} for k, v in headers.items(): lk = k.lower() if lk in ("authorization", "x-api-key", "anthropic-version"): fwd_headers[k] = v req = urllib.request.Request(url, data=data, method="POST", headers=fwd_headers) try: with urllib.request.urlopen(req, timeout=UPSTREAM_TIMEOUT) as resp: return resp.status, json.loads(resp.read().decode("utf-8")) except urllib.error.HTTPError as e: try: body_bytes = e.read() payload = json.loads(body_bytes.decode("utf-8")) except Exception: payload = {"error": {"message": f"upstream HTTP {e.code}"}} return e.code, payload except Exception as e: log(f"upstream error: {e}") return 502, {"error": {"message": f"upstream unreachable: {e}"}} class ProxyHandler(http.server.BaseHTTPRequestHandler): # Silence default request logging (we log what matters ourselves) def log_message(self, format: str, *args: Any) -> None: pass def _read_body(self) -> dict: length = int(self.headers.get("Content-Length", "0") or "0") if length <= 0: return {} raw = self.rfile.read(length) try: return json.loads(raw.decode("utf-8")) except Exception: return {} def _send_json(self, status: int, payload: dict) -> None: body = json.dumps(payload).encode("utf-8") self.send_response(status) self.send_header("Content-Type", "application/json") self.send_header("Content-Length", str(len(body))) self.send_header("Access-Control-Allow-Origin", "*") self.end_headers() self.wfile.write(body) def do_OPTIONS(self) -> None: # CORS preflight self.send_response(204) self.send_header("Access-Control-Allow-Origin", "*") self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-API-Key") self.end_headers() def do_GET(self) -> None: parsed = urllib.parse.urlparse(self.path) if parsed.path == "/healthz": self._send_json(200, { "status": "ok", "atocore": ATOCORE_URL, "upstream": UPSTREAM_URL or "(not configured)", "inject": INJECT_ENABLED, "capture": CAPTURE_ENABLED, }) return # Pass through GET to upstream (model listing etc) if not UPSTREAM_URL: self._send_json(503, {"error": {"message": "ATOCORE_UPSTREAM not configured"}}) return try: req = urllib.request.Request(UPSTREAM_URL + parsed.path + (f"?{parsed.query}" if parsed.query else "")) for k in ("Authorization", "X-API-Key"): v = self.headers.get(k) if v: req.add_header(k, v) with urllib.request.urlopen(req, timeout=UPSTREAM_TIMEOUT) as resp: data = resp.read() self.send_response(resp.status) self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json")) self.send_header("Content-Length", str(len(data))) self.end_headers() self.wfile.write(data) except Exception as e: self._send_json(502, {"error": {"message": f"upstream error: {e}"}}) def do_POST(self) -> None: parsed = urllib.parse.urlparse(self.path) body = self._read_body() # Only enrich chat completions; other endpoints pass through if parsed.path.endswith("/chat/completions") or parsed.path == "/v1/chat/completions": prompt = get_last_user_message(body) project = detect_project(prompt) context = fetch_context(prompt, project) if prompt else "" if context: log(f"inject: project={project or '(none)'} chars={len(context)}") body = inject_context(body, context) status, response = forward_to_upstream(body, dict(self.headers), parsed.path) self._send_json(status, response) if status == 200: assistant_text = get_assistant_text(response) capture_interaction(prompt, assistant_text, project) else: # Non-chat endpoints (embeddings, completions, etc.) — pure passthrough status, response = forward_to_upstream(body, dict(self.headers), parsed.path) self._send_json(status, response) class ThreadedServer(socketserver.ThreadingMixIn, http.server.HTTPServer): daemon_threads = True allow_reuse_address = True def main() -> int: if not UPSTREAM_URL: log("WARNING: ATOCORE_UPSTREAM not set. Chat completions will fail.") log("Example: ATOCORE_UPSTREAM=http://localhost:11434/v1 for Ollama") server = ThreadedServer((PROXY_HOST, PROXY_PORT), ProxyHandler) log(f"listening on {PROXY_HOST}:{PROXY_PORT}") log(f"AtoCore: {ATOCORE_URL} inject={INJECT_ENABLED} capture={CAPTURE_ENABLED}") log(f"Upstream: {UPSTREAM_URL or '(not configured)'}") log(f"Client label: {CLIENT_LABEL}") log("Ready. Point your OpenAI-compatible client at /v1/chat/completions") try: server.serve_forever() except KeyboardInterrupt: log("stopping") server.server_close() return 0 if __name__ == "__main__": sys.exit(main())