"""Interaction capture service. An *interaction* is one round-trip of: - a user prompt - the AtoCore context pack that was assembled for it - the LLM response (full text or a summary, caller's choice) - which memories and chunks were actually used in the pack - a client identifier (e.g. ``openclaw``, ``claude-code``, ``manual``) - an optional session identifier so multi-turn conversations can be reconstructed later The capture is intentionally additive: it never modifies memories, project state, or chunks. Reflection (Phase 9 Commit B/C) and write-back (Phase 10) are layered on top of this audit trail without violating the AtoCore trust hierarchy. """ from __future__ import annotations import json import re import uuid from dataclasses import dataclass, field from datetime import datetime, timezone from atocore.models.database import get_connection from atocore.observability.logger import get_logger from atocore.projects.registry import resolve_project_name log = get_logger("interactions") # Stored timestamps use 'YYYY-MM-DD HH:MM:SS' (no timezone offset, UTC by # convention) so they sort lexically and compare cleanly with the SQLite # CURRENT_TIMESTAMP default. The since filter accepts ISO 8601 strings # (with 'T', optional 'Z' or +offset, optional fractional seconds) and # normalizes them to the storage format before the SQL comparison. _STORAGE_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S" @dataclass class Interaction: id: str prompt: str response: str response_summary: str project: str client: str session_id: str memories_used: list[str] = field(default_factory=list) chunks_used: list[str] = field(default_factory=list) context_pack: dict = field(default_factory=dict) created_at: str = "" def record_interaction( prompt: str, response: str = "", response_summary: str = "", project: str = "", client: str = "", session_id: str = "", memories_used: list[str] | None = None, chunks_used: list[str] | None = None, context_pack: dict | None = None, reinforce: bool = True, ) -> Interaction: """Persist a single interaction to the audit trail. The only required field is ``prompt`` so this can be called even when the caller is in the middle of a partial turn (for example to record that AtoCore was queried even before the LLM response is back). When ``reinforce`` is True (default) and the interaction has response content, the Phase 9 Commit B reinforcement pass runs automatically against the active memory set. This bumps the confidence of any memory whose content is echoed in the response. Set ``reinforce`` to False to capture the interaction without touching memory confidence, which is useful for backfill and for tests that want to isolate the audit trail from the reinforcement loop. """ if not prompt or not prompt.strip(): raise ValueError("Interaction prompt must be non-empty") # Canonicalize the project through the registry so an alias and # the canonical id store under the same bucket. Without this, # reinforcement and extraction (which both query by raw # interaction.project) would silently miss memories and create # candidates in the wrong project. project = resolve_project_name(project) interaction_id = str(uuid.uuid4()) # Store created_at explicitly so the same string lives in both the DB # column and the returned dataclass. SQLite's CURRENT_TIMESTAMP uses # 'YYYY-MM-DD HH:MM:SS' which would not compare cleanly against ISO # timestamps with 'T' and tz offset, breaking the `since` filter on # list_interactions. now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") memories_used = list(memories_used or []) chunks_used = list(chunks_used or []) context_pack_payload = context_pack or {} with get_connection() as conn: conn.execute( """ INSERT INTO interactions ( id, prompt, context_pack, response_summary, response, memories_used, chunks_used, client, session_id, project, created_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( interaction_id, prompt, json.dumps(context_pack_payload, ensure_ascii=True), response_summary, response, json.dumps(memories_used, ensure_ascii=True), json.dumps(chunks_used, ensure_ascii=True), client, session_id, project, now, ), ) log.info( "interaction_recorded", interaction_id=interaction_id, project=project, client=client, session_id=session_id, memories_used=len(memories_used), chunks_used=len(chunks_used), response_chars=len(response), ) interaction = Interaction( id=interaction_id, prompt=prompt, response=response, response_summary=response_summary, project=project, client=client, session_id=session_id, memories_used=memories_used, chunks_used=chunks_used, context_pack=context_pack_payload, created_at=now, ) if reinforce and (response or response_summary): # Import inside the function to avoid a circular import between # the interactions service and the reinforcement module which # depends on it. try: from atocore.memory.reinforcement import reinforce_from_interaction reinforce_from_interaction(interaction) except Exception as exc: # pragma: no cover - reinforcement must never block capture log.error( "reinforcement_failed_on_capture", interaction_id=interaction_id, error=str(exc), ) return interaction def list_interactions( project: str | None = None, session_id: str | None = None, client: str | None = None, since: str | None = None, limit: int = 50, ) -> list[Interaction]: """List captured interactions, optionally filtered. ``since`` accepts an ISO 8601 timestamp string (with ``T``, an optional ``Z`` or numeric offset, optional fractional seconds). The value is normalized to the storage format (UTC, ``YYYY-MM-DD HH:MM:SS``) before the SQL comparison so external callers can pass any of the common ISO shapes without filter drift. ``project`` is canonicalized through the registry so an alias finds rows stored under the canonical project id. ``limit`` is hard-capped at 500 to keep casual API listings cheap. """ if limit <= 0: return [] limit = min(limit, 500) query = "SELECT * FROM interactions WHERE 1=1" params: list = [] if project: query += " AND project = ?" params.append(resolve_project_name(project)) if session_id: query += " AND session_id = ?" params.append(session_id) if client: query += " AND client = ?" params.append(client) if since: query += " AND created_at >= ?" params.append(_normalize_since(since)) query += " ORDER BY created_at DESC LIMIT ?" params.append(limit) with get_connection() as conn: rows = conn.execute(query, params).fetchall() return [_row_to_interaction(row) for row in rows] def get_interaction(interaction_id: str) -> Interaction | None: """Fetch one interaction by id, or return None if it does not exist.""" if not interaction_id: return None with get_connection() as conn: row = conn.execute( "SELECT * FROM interactions WHERE id = ?", (interaction_id,) ).fetchone() if row is None: return None return _row_to_interaction(row) def _row_to_interaction(row) -> Interaction: return Interaction( id=row["id"], prompt=row["prompt"], response=row["response"] or "", response_summary=row["response_summary"] or "", project=row["project"] or "", client=row["client"] or "", session_id=row["session_id"] or "", memories_used=_safe_json_list(row["memories_used"]), chunks_used=_safe_json_list(row["chunks_used"]), context_pack=_safe_json_dict(row["context_pack"]), created_at=row["created_at"] or "", ) def _safe_json_list(raw: str | None) -> list[str]: if not raw: return [] try: value = json.loads(raw) except json.JSONDecodeError: return [] if not isinstance(value, list): return [] return [str(item) for item in value] def _safe_json_dict(raw: str | None) -> dict: if not raw: return {} try: value = json.loads(raw) except json.JSONDecodeError: return {} if not isinstance(value, dict): return {} return value def _normalize_since(since: str) -> str: """Normalize an ISO 8601 ``since`` filter to the storage format. Stored ``created_at`` values are ``YYYY-MM-DD HH:MM:SS`` (no timezone, UTC by convention). External callers naturally pass ISO 8601 with ``T`` separator, optional ``Z`` suffix, optional fractional seconds, and optional ``+HH:MM`` offsets. A naive string comparison between the two formats fails on the same day because the lexically-greater ``T`` makes any ISO value sort after any space-separated value. This helper accepts the common ISO shapes plus the bare storage format and returns the storage format. On a parse failure it returns the input unchanged so the SQL comparison fails open (no rows match) instead of raising and breaking the listing endpoint. """ if not since: return since candidate = since.strip() # Python's fromisoformat understands trailing 'Z' from 3.11+ but # we replace it explicitly for safety against earlier shapes. if candidate.endswith("Z"): candidate = candidate[:-1] + "+00:00" try: dt = datetime.fromisoformat(candidate) except ValueError: # Already in storage format, or unparseable: best-effort # match the storage format with a regex; if that fails too, # return the raw input. if re.fullmatch(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", since): return since return since if dt.tzinfo is not None: dt = dt.astimezone(timezone.utc).replace(tzinfo=None) return dt.strftime(_STORAGE_TIMESTAMP_FORMAT)