- POST /admin/backup/cleanup — retention cleanup via API (dry-run by default) - record_interaction() accepts extract=True to auto-extract candidate memories from response text using the Phase 9C rule-based extractor - POST /interactions accepts extract field to enable extraction on capture - deploy/dalidou/cron-backup.sh — daily backup + cleanup for cron Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
330 lines
11 KiB
Python
330 lines
11 KiB
Python
"""Interaction capture service.
|
|
|
|
An *interaction* is one round-trip of:
|
|
- a user prompt
|
|
- the AtoCore context pack that was assembled for it
|
|
- the LLM response (full text or a summary, caller's choice)
|
|
- which memories and chunks were actually used in the pack
|
|
- a client identifier (e.g. ``openclaw``, ``claude-code``, ``manual``)
|
|
- an optional session identifier so multi-turn conversations can be
|
|
reconstructed later
|
|
|
|
The capture is intentionally additive: it never modifies memories,
|
|
project state, or chunks. Reflection (Phase 9 Commit B/C) and
|
|
write-back (Phase 10) are layered on top of this audit trail without
|
|
violating the AtoCore trust hierarchy.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
import uuid
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
|
|
from atocore.models.database import get_connection
|
|
from atocore.observability.logger import get_logger
|
|
from atocore.projects.registry import resolve_project_name
|
|
|
|
log = get_logger("interactions")
|
|
|
|
# Stored timestamps use 'YYYY-MM-DD HH:MM:SS' (no timezone offset, UTC by
|
|
# convention) so they sort lexically and compare cleanly with the SQLite
|
|
# CURRENT_TIMESTAMP default. The since filter accepts ISO 8601 strings
|
|
# (with 'T', optional 'Z' or +offset, optional fractional seconds) and
|
|
# normalizes them to the storage format before the SQL comparison.
|
|
_STORAGE_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
|
|
|
|
|
|
@dataclass
|
|
class Interaction:
|
|
id: str
|
|
prompt: str
|
|
response: str
|
|
response_summary: str
|
|
project: str
|
|
client: str
|
|
session_id: str
|
|
memories_used: list[str] = field(default_factory=list)
|
|
chunks_used: list[str] = field(default_factory=list)
|
|
context_pack: dict = field(default_factory=dict)
|
|
created_at: str = ""
|
|
|
|
|
|
def record_interaction(
|
|
prompt: str,
|
|
response: str = "",
|
|
response_summary: str = "",
|
|
project: str = "",
|
|
client: str = "",
|
|
session_id: str = "",
|
|
memories_used: list[str] | None = None,
|
|
chunks_used: list[str] | None = None,
|
|
context_pack: dict | None = None,
|
|
reinforce: bool = True,
|
|
extract: bool = False,
|
|
) -> Interaction:
|
|
"""Persist a single interaction to the audit trail.
|
|
|
|
The only required field is ``prompt`` so this can be called even when
|
|
the caller is in the middle of a partial turn (for example to record
|
|
that AtoCore was queried even before the LLM response is back).
|
|
|
|
When ``reinforce`` is True (default) and the interaction has response
|
|
content, the Phase 9 Commit B reinforcement pass runs automatically
|
|
against the active memory set. This bumps the confidence of any
|
|
memory whose content is echoed in the response. Set ``reinforce`` to
|
|
False to capture the interaction without touching memory confidence,
|
|
which is useful for backfill and for tests that want to isolate the
|
|
audit trail from the reinforcement loop.
|
|
"""
|
|
if not prompt or not prompt.strip():
|
|
raise ValueError("Interaction prompt must be non-empty")
|
|
|
|
# Canonicalize the project through the registry so an alias and
|
|
# the canonical id store under the same bucket. Without this,
|
|
# reinforcement and extraction (which both query by raw
|
|
# interaction.project) would silently miss memories and create
|
|
# candidates in the wrong project.
|
|
project = resolve_project_name(project)
|
|
|
|
interaction_id = str(uuid.uuid4())
|
|
# Store created_at explicitly so the same string lives in both the DB
|
|
# column and the returned dataclass. SQLite's CURRENT_TIMESTAMP uses
|
|
# 'YYYY-MM-DD HH:MM:SS' which would not compare cleanly against ISO
|
|
# timestamps with 'T' and tz offset, breaking the `since` filter on
|
|
# list_interactions.
|
|
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
|
memories_used = list(memories_used or [])
|
|
chunks_used = list(chunks_used or [])
|
|
context_pack_payload = context_pack or {}
|
|
|
|
with get_connection() as conn:
|
|
conn.execute(
|
|
"""
|
|
INSERT INTO interactions (
|
|
id, prompt, context_pack, response_summary, response,
|
|
memories_used, chunks_used, client, session_id, project,
|
|
created_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
interaction_id,
|
|
prompt,
|
|
json.dumps(context_pack_payload, ensure_ascii=True),
|
|
response_summary,
|
|
response,
|
|
json.dumps(memories_used, ensure_ascii=True),
|
|
json.dumps(chunks_used, ensure_ascii=True),
|
|
client,
|
|
session_id,
|
|
project,
|
|
now,
|
|
),
|
|
)
|
|
|
|
log.info(
|
|
"interaction_recorded",
|
|
interaction_id=interaction_id,
|
|
project=project,
|
|
client=client,
|
|
session_id=session_id,
|
|
memories_used=len(memories_used),
|
|
chunks_used=len(chunks_used),
|
|
response_chars=len(response),
|
|
)
|
|
|
|
interaction = Interaction(
|
|
id=interaction_id,
|
|
prompt=prompt,
|
|
response=response,
|
|
response_summary=response_summary,
|
|
project=project,
|
|
client=client,
|
|
session_id=session_id,
|
|
memories_used=memories_used,
|
|
chunks_used=chunks_used,
|
|
context_pack=context_pack_payload,
|
|
created_at=now,
|
|
)
|
|
|
|
if reinforce and (response or response_summary):
|
|
# Import inside the function to avoid a circular import between
|
|
# the interactions service and the reinforcement module which
|
|
# depends on it.
|
|
try:
|
|
from atocore.memory.reinforcement import reinforce_from_interaction
|
|
|
|
reinforce_from_interaction(interaction)
|
|
except Exception as exc: # pragma: no cover - reinforcement must never block capture
|
|
log.error(
|
|
"reinforcement_failed_on_capture",
|
|
interaction_id=interaction_id,
|
|
error=str(exc),
|
|
)
|
|
|
|
if extract and (response or response_summary):
|
|
try:
|
|
from atocore.memory.extractor import extract_candidates_from_interaction
|
|
from atocore.memory.service import create_memory
|
|
|
|
candidates = extract_candidates_from_interaction(interaction)
|
|
for candidate in candidates:
|
|
try:
|
|
create_memory(
|
|
memory_type=candidate.memory_type,
|
|
content=candidate.content,
|
|
project=candidate.project,
|
|
confidence=candidate.confidence,
|
|
status="candidate",
|
|
)
|
|
except ValueError:
|
|
pass # duplicate or validation error — skip silently
|
|
except Exception as exc: # pragma: no cover - extraction must never block capture
|
|
log.error(
|
|
"extraction_failed_on_capture",
|
|
interaction_id=interaction_id,
|
|
error=str(exc),
|
|
)
|
|
|
|
return interaction
|
|
|
|
|
|
def list_interactions(
|
|
project: str | None = None,
|
|
session_id: str | None = None,
|
|
client: str | None = None,
|
|
since: str | None = None,
|
|
limit: int = 50,
|
|
) -> list[Interaction]:
|
|
"""List captured interactions, optionally filtered.
|
|
|
|
``since`` accepts an ISO 8601 timestamp string (with ``T``, an
|
|
optional ``Z`` or numeric offset, optional fractional seconds).
|
|
The value is normalized to the storage format (UTC,
|
|
``YYYY-MM-DD HH:MM:SS``) before the SQL comparison so external
|
|
callers can pass any of the common ISO shapes without filter
|
|
drift. ``project`` is canonicalized through the registry so an
|
|
alias finds rows stored under the canonical project id.
|
|
``limit`` is hard-capped at 500 to keep casual API listings cheap.
|
|
"""
|
|
if limit <= 0:
|
|
return []
|
|
limit = min(limit, 500)
|
|
|
|
query = "SELECT * FROM interactions WHERE 1=1"
|
|
params: list = []
|
|
|
|
if project:
|
|
query += " AND project = ?"
|
|
params.append(resolve_project_name(project))
|
|
if session_id:
|
|
query += " AND session_id = ?"
|
|
params.append(session_id)
|
|
if client:
|
|
query += " AND client = ?"
|
|
params.append(client)
|
|
if since:
|
|
query += " AND created_at >= ?"
|
|
params.append(_normalize_since(since))
|
|
|
|
query += " ORDER BY created_at DESC LIMIT ?"
|
|
params.append(limit)
|
|
|
|
with get_connection() as conn:
|
|
rows = conn.execute(query, params).fetchall()
|
|
|
|
return [_row_to_interaction(row) for row in rows]
|
|
|
|
|
|
def get_interaction(interaction_id: str) -> Interaction | None:
|
|
"""Fetch one interaction by id, or return None if it does not exist."""
|
|
if not interaction_id:
|
|
return None
|
|
with get_connection() as conn:
|
|
row = conn.execute(
|
|
"SELECT * FROM interactions WHERE id = ?", (interaction_id,)
|
|
).fetchone()
|
|
if row is None:
|
|
return None
|
|
return _row_to_interaction(row)
|
|
|
|
|
|
def _row_to_interaction(row) -> Interaction:
|
|
return Interaction(
|
|
id=row["id"],
|
|
prompt=row["prompt"],
|
|
response=row["response"] or "",
|
|
response_summary=row["response_summary"] or "",
|
|
project=row["project"] or "",
|
|
client=row["client"] or "",
|
|
session_id=row["session_id"] or "",
|
|
memories_used=_safe_json_list(row["memories_used"]),
|
|
chunks_used=_safe_json_list(row["chunks_used"]),
|
|
context_pack=_safe_json_dict(row["context_pack"]),
|
|
created_at=row["created_at"] or "",
|
|
)
|
|
|
|
|
|
def _safe_json_list(raw: str | None) -> list[str]:
|
|
if not raw:
|
|
return []
|
|
try:
|
|
value = json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
return []
|
|
if not isinstance(value, list):
|
|
return []
|
|
return [str(item) for item in value]
|
|
|
|
|
|
def _safe_json_dict(raw: str | None) -> dict:
|
|
if not raw:
|
|
return {}
|
|
try:
|
|
value = json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
return {}
|
|
if not isinstance(value, dict):
|
|
return {}
|
|
return value
|
|
|
|
|
|
def _normalize_since(since: str) -> str:
|
|
"""Normalize an ISO 8601 ``since`` filter to the storage format.
|
|
|
|
Stored ``created_at`` values are ``YYYY-MM-DD HH:MM:SS`` (no
|
|
timezone, UTC by convention). External callers naturally pass
|
|
ISO 8601 with ``T`` separator, optional ``Z`` suffix, optional
|
|
fractional seconds, and optional ``+HH:MM`` offsets. A naive
|
|
string comparison between the two formats fails on the same
|
|
day because the lexically-greater ``T`` makes any ISO value
|
|
sort after any space-separated value.
|
|
|
|
This helper accepts the common ISO shapes plus the bare
|
|
storage format and returns the storage format. On a parse
|
|
failure it returns the input unchanged so the SQL comparison
|
|
fails open (no rows match) instead of raising and breaking
|
|
the listing endpoint.
|
|
"""
|
|
if not since:
|
|
return since
|
|
candidate = since.strip()
|
|
# Python's fromisoformat understands trailing 'Z' from 3.11+ but
|
|
# we replace it explicitly for safety against earlier shapes.
|
|
if candidate.endswith("Z"):
|
|
candidate = candidate[:-1] + "+00:00"
|
|
try:
|
|
dt = datetime.fromisoformat(candidate)
|
|
except ValueError:
|
|
# Already in storage format, or unparseable: best-effort
|
|
# match the storage format with a regex; if that fails too,
|
|
# return the raw input.
|
|
if re.fullmatch(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", since):
|
|
return since
|
|
return since
|
|
if dt.tzinfo is not None:
|
|
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
|
|
return dt.strftime(_STORAGE_TIMESTAMP_FORMAT)
|