ATOCore/scripts/atocore_proxy.py

#!/usr/bin/env python3
"""AtoCore Proxy — OpenAI-compatible HTTP middleware.

Acts as a drop-in layer for any client that speaks the OpenAI Chat
Completions API (Codex, Ollama, LiteLLM, custom agents). Sits between
the client and the real model provider:

  client -> atocore_proxy -> real_provider (OpenAI, Ollama, Anthropic, ...)

For each chat completion request:
  1. Extract the user's last message as the "query"
  2. Call AtoCore /context/build to get a context pack
  3. Inject the pack as a system message (or prepend to existing system)
  4. Forward the enriched request to the real provider
  5. Capture the full interaction back to AtoCore /interactions

Fail-open: if AtoCore is unreachable, the request passes through
unchanged. If the real provider fails, the error is propagated to the
client as-is.

Configuration (env vars):
  ATOCORE_URL            AtoCore base URL (default http://dalidou:8100)
  ATOCORE_UPSTREAM       real provider base URL (e.g. http://localhost:11434/v1 for Ollama)
  ATOCORE_PROXY_PORT     port to listen on (default 11435)
  ATOCORE_PROXY_HOST     bind address (default 127.0.0.1)
  ATOCORE_CLIENT_LABEL   client id recorded in captures (default "proxy")
  ATOCORE_CAPTURE        "1" to capture interactions back (default "1")
  ATOCORE_INJECT         "1" to inject context (default "1")

Usage:
  # Proxy for Ollama:
  ATOCORE_UPSTREAM=http://localhost:11434/v1 python atocore_proxy.py

  # Then point your client at http://localhost:11435/v1 instead of the
  # real provider.

Stdlib only — deliberate to keep the dependency footprint at zero.
"""

from __future__ import annotations

import http.server
import json
import os
import socketserver
import sys
import threading
import urllib.error
import urllib.parse
import urllib.request
from typing import Any

ATOCORE_URL = os.environ.get("ATOCORE_URL", "http://dalidou:8100").rstrip("/")
UPSTREAM_URL = os.environ.get("ATOCORE_UPSTREAM", "").rstrip("/")
PROXY_PORT = int(os.environ.get("ATOCORE_PROXY_PORT", "11435"))
PROXY_HOST = os.environ.get("ATOCORE_PROXY_HOST", "127.0.0.1")
CLIENT_LABEL = os.environ.get("ATOCORE_CLIENT_LABEL", "proxy")
CAPTURE_ENABLED = os.environ.get("ATOCORE_CAPTURE", "1") == "1"
INJECT_ENABLED = os.environ.get("ATOCORE_INJECT", "1") == "1"
ATOCORE_TIMEOUT = float(os.environ.get("ATOCORE_TIMEOUT", "6"))
UPSTREAM_TIMEOUT = float(os.environ.get("ATOCORE_UPSTREAM_TIMEOUT", "300"))

PROJECT_HINTS = [
    ("p04-gigabit", ["p04", "gigabit"]),
    ("p05-interferometer", ["p05", "interferometer"]),
    ("p06-polisher", ["p06", "polisher", "fullum"]),
    ("abb-space", ["abb"]),
    ("atomizer-v2", ["atomizer"]),
    ("atocore", ["atocore", "dalidou"]),
]


def log(msg: str) -> None:
    print(f"[atocore-proxy] {msg}", file=sys.stderr, flush=True)


def detect_project(text: str) -> str:
    lower = (text or "").lower()
    for proj, tokens in PROJECT_HINTS:
        if any(t in lower for t in tokens):
            return proj
    return ""


def get_last_user_message(body: dict) -> str:
    messages = body.get("messages", []) or []
    for m in reversed(messages):
        if m.get("role") == "user":
            content = m.get("content", "")
            if isinstance(content, list):
                # OpenAI multi-part content: extract text parts
                parts = [p.get("text", "") for p in content if p.get("type") == "text"]
                return "\n".join(parts)
            return str(content)
    return ""


def get_assistant_text(response: dict) -> str:
    """Extract assistant text from an OpenAI-style completion response."""
    choices = response.get("choices", []) or []
    if not choices:
        return ""
    msg = choices[0].get("message", {}) or {}
    content = msg.get("content", "")
    if isinstance(content, list):
        parts = [p.get("text", "") for p in content if p.get("type") == "text"]
        return "\n".join(parts)
    return str(content)


def fetch_context(query: str, project: str) -> str:
    """Pull a context pack from AtoCore. Returns '' on any failure."""
    if not INJECT_ENABLED or not query:
        return ""
    try:
        data = json.dumps({"prompt": query, "project": project}).encode("utf-8")
        req = urllib.request.Request(
            ATOCORE_URL + "/context/build",
            data=data,
            method="POST",
            headers={"Content-Type": "application/json"},
        )
        with urllib.request.urlopen(req, timeout=ATOCORE_TIMEOUT) as resp:
            result = json.loads(resp.read().decode("utf-8"))
        return result.get("formatted_context", "") or ""
    except Exception as e:
        log(f"context fetch failed: {type(e).__name__}: {e}")
        return ""


def capture_interaction(prompt: str, response: str, project: str) -> None:
    """POST the completed turn back to AtoCore. Fire-and-forget."""
    if not CAPTURE_ENABLED or not prompt or not response:
        return

    def _post():
        try:
            data = json.dumps({
                "prompt": prompt,
                "response": response,
                "client": CLIENT_LABEL,
                "project": project,
                "reinforce": True,
            }).encode("utf-8")
            req = urllib.request.Request(
                ATOCORE_URL + "/interactions",
                data=data,
                method="POST",
                headers={"Content-Type": "application/json"},
            )
            urllib.request.urlopen(req, timeout=ATOCORE_TIMEOUT)
        except Exception as e:
            log(f"capture failed: {type(e).__name__}: {e}")

    threading.Thread(target=_post, daemon=True).start()


def inject_context(body: dict, context_pack: str) -> dict:
    """Prepend the AtoCore context as a system message, or augment existing."""
    if not context_pack.strip():
        return body
    header = "--- AtoCore Context (auto-injected) ---\n"
    footer = "\n--- End AtoCore Context ---\n"
    injection = header + context_pack + footer

    messages = list(body.get("messages", []) or [])
    if messages and messages[0].get("role") == "system":
        # Augment existing system message
        existing = messages[0].get("content", "") or ""
        if isinstance(existing, list):
            # multi-part: prepend a text part
            messages[0]["content"] = [{"type": "text", "text": injection}] + existing
        else:
            messages[0]["content"] = injection + "\n" + str(existing)
    else:
        messages.insert(0, {"role": "system", "content": injection})

    body["messages"] = messages
    return body


def forward_to_upstream(body: dict, headers: dict[str, str], path: str) -> tuple[int, dict]:
    """Forward the enriched body to the upstream provider. Returns (status, response_dict)."""
    if not UPSTREAM_URL:
        return 503, {"error": {"message": "ATOCORE_UPSTREAM not configured"}}
    url = UPSTREAM_URL + path
    data = json.dumps(body).encode("utf-8")
    # Strip hop-by-hop / host-specific headers
    fwd_headers = {"Content-Type": "application/json"}
    for k, v in headers.items():
        lk = k.lower()
        if lk in ("authorization", "x-api-key", "anthropic-version"):
            fwd_headers[k] = v
    req = urllib.request.Request(url, data=data, method="POST", headers=fwd_headers)
    try:
        with urllib.request.urlopen(req, timeout=UPSTREAM_TIMEOUT) as resp:
            return resp.status, json.loads(resp.read().decode("utf-8"))
    except urllib.error.HTTPError as e:
        try:
            body_bytes = e.read()
            payload = json.loads(body_bytes.decode("utf-8"))
        except Exception:
            payload = {"error": {"message": f"upstream HTTP {e.code}"}}
        return e.code, payload
    except Exception as e:
        log(f"upstream error: {e}")
        return 502, {"error": {"message": f"upstream unreachable: {e}"}}


class ProxyHandler(http.server.BaseHTTPRequestHandler):
    # Silence default request logging (we log what matters ourselves)
    def log_message(self, format: str, *args: Any) -> None:
        pass

    def _read_body(self) -> dict:
        length = int(self.headers.get("Content-Length", "0") or "0")
        if length <= 0:
            return {}
        raw = self.rfile.read(length)
        try:
            return json.loads(raw.decode("utf-8"))
        except Exception:
            return {}

    def _send_json(self, status: int, payload: dict) -> None:
        body = json.dumps(payload).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.send_header("Access-Control-Allow-Origin", "*")
        self.end_headers()
        self.wfile.write(body)

    def do_OPTIONS(self) -> None:  # CORS preflight
        self.send_response(204)
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
        self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization, X-API-Key")
        self.end_headers()

    def do_GET(self) -> None:
        parsed = urllib.parse.urlparse(self.path)
        if parsed.path == "/healthz":
            self._send_json(200, {
                "status": "ok",
                "atocore": ATOCORE_URL,
                "upstream": UPSTREAM_URL or "(not configured)",
                "inject": INJECT_ENABLED,
                "capture": CAPTURE_ENABLED,
            })
            return
        # Pass through GET to upstream (model listing etc)
        if not UPSTREAM_URL:
            self._send_json(503, {"error": {"message": "ATOCORE_UPSTREAM not configured"}})
            return
        try:
            req = urllib.request.Request(UPSTREAM_URL + parsed.path + (f"?{parsed.query}" if parsed.query else ""))
            for k in ("Authorization", "X-API-Key"):
                v = self.headers.get(k)
                if v:
                    req.add_header(k, v)
            with urllib.request.urlopen(req, timeout=UPSTREAM_TIMEOUT) as resp:
                data = resp.read()
                self.send_response(resp.status)
                self.send_header("Content-Type", resp.headers.get("Content-Type", "application/json"))
                self.send_header("Content-Length", str(len(data)))
                self.end_headers()
                self.wfile.write(data)
        except Exception as e:
            self._send_json(502, {"error": {"message": f"upstream error: {e}"}})

    def do_POST(self) -> None:
        parsed = urllib.parse.urlparse(self.path)
        body = self._read_body()

        # Only enrich chat completions; other endpoints pass through
        if parsed.path.endswith("/chat/completions") or parsed.path == "/v1/chat/completions":
            prompt = get_last_user_message(body)
            project = detect_project(prompt)
            context = fetch_context(prompt, project) if prompt else ""
            if context:
                log(f"inject: project={project or '(none)'} chars={len(context)}")
                body = inject_context(body, context)

            status, response = forward_to_upstream(body, dict(self.headers), parsed.path)
            self._send_json(status, response)

            if status == 200:
                assistant_text = get_assistant_text(response)
                capture_interaction(prompt, assistant_text, project)
        else:
            # Non-chat endpoints (embeddings, completions, etc.) — pure passthrough
            status, response = forward_to_upstream(body, dict(self.headers), parsed.path)
            self._send_json(status, response)


class ThreadedServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
    daemon_threads = True
    allow_reuse_address = True


def main() -> int:
    if not UPSTREAM_URL:
        log("WARNING: ATOCORE_UPSTREAM not set. Chat completions will fail.")
        log("Example: ATOCORE_UPSTREAM=http://localhost:11434/v1 for Ollama")
    server = ThreadedServer((PROXY_HOST, PROXY_PORT), ProxyHandler)
    log(f"listening on {PROXY_HOST}:{PROXY_PORT}")
    log(f"AtoCore: {ATOCORE_URL}  inject={INJECT_ENABLED}  capture={CAPTURE_ENABLED}")
    log(f"Upstream: {UPSTREAM_URL or '(not configured)'}")
    log(f"Client label: {CLIENT_LABEL}")
    log("Ready. Point your OpenAI-compatible client at /v1/chat/completions")
    try:
        server.serve_forever()
    except KeyboardInterrupt:
        log("stopping")
    server.server_close()
    return 0


if __name__ == "__main__":
    sys.exit(main())