Files
ATOCore/src/atocore/interactions/service.py
Anto01 0b1742770a feat: cleanup endpoint, auto-extraction on capture, daily cron script
- POST /admin/backup/cleanup — retention cleanup via API (dry-run by default)
- record_interaction() accepts extract=True to auto-extract candidate
  memories from response text using the Phase 9C rule-based extractor
- POST /interactions accepts extract field to enable extraction on capture
- deploy/dalidou/cron-backup.sh — daily backup + cleanup for cron

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-11 10:28:32 -04:00

330 lines
11 KiB
Python

"""Interaction capture service.
An *interaction* is one round-trip of:
- a user prompt
- the AtoCore context pack that was assembled for it
- the LLM response (full text or a summary, caller's choice)
- which memories and chunks were actually used in the pack
- a client identifier (e.g. ``openclaw``, ``claude-code``, ``manual``)
- an optional session identifier so multi-turn conversations can be
reconstructed later
The capture is intentionally additive: it never modifies memories,
project state, or chunks. Reflection (Phase 9 Commit B/C) and
write-back (Phase 10) are layered on top of this audit trail without
violating the AtoCore trust hierarchy.
"""
from __future__ import annotations
import json
import re
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from atocore.models.database import get_connection
from atocore.observability.logger import get_logger
from atocore.projects.registry import resolve_project_name
log = get_logger("interactions")
# Stored timestamps use 'YYYY-MM-DD HH:MM:SS' (no timezone offset, UTC by
# convention) so they sort lexically and compare cleanly with the SQLite
# CURRENT_TIMESTAMP default. The since filter accepts ISO 8601 strings
# (with 'T', optional 'Z' or +offset, optional fractional seconds) and
# normalizes them to the storage format before the SQL comparison.
_STORAGE_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"
@dataclass
class Interaction:
id: str
prompt: str
response: str
response_summary: str
project: str
client: str
session_id: str
memories_used: list[str] = field(default_factory=list)
chunks_used: list[str] = field(default_factory=list)
context_pack: dict = field(default_factory=dict)
created_at: str = ""
def record_interaction(
prompt: str,
response: str = "",
response_summary: str = "",
project: str = "",
client: str = "",
session_id: str = "",
memories_used: list[str] | None = None,
chunks_used: list[str] | None = None,
context_pack: dict | None = None,
reinforce: bool = True,
extract: bool = False,
) -> Interaction:
"""Persist a single interaction to the audit trail.
The only required field is ``prompt`` so this can be called even when
the caller is in the middle of a partial turn (for example to record
that AtoCore was queried even before the LLM response is back).
When ``reinforce`` is True (default) and the interaction has response
content, the Phase 9 Commit B reinforcement pass runs automatically
against the active memory set. This bumps the confidence of any
memory whose content is echoed in the response. Set ``reinforce`` to
False to capture the interaction without touching memory confidence,
which is useful for backfill and for tests that want to isolate the
audit trail from the reinforcement loop.
"""
if not prompt or not prompt.strip():
raise ValueError("Interaction prompt must be non-empty")
# Canonicalize the project through the registry so an alias and
# the canonical id store under the same bucket. Without this,
# reinforcement and extraction (which both query by raw
# interaction.project) would silently miss memories and create
# candidates in the wrong project.
project = resolve_project_name(project)
interaction_id = str(uuid.uuid4())
# Store created_at explicitly so the same string lives in both the DB
# column and the returned dataclass. SQLite's CURRENT_TIMESTAMP uses
# 'YYYY-MM-DD HH:MM:SS' which would not compare cleanly against ISO
# timestamps with 'T' and tz offset, breaking the `since` filter on
# list_interactions.
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
memories_used = list(memories_used or [])
chunks_used = list(chunks_used or [])
context_pack_payload = context_pack or {}
with get_connection() as conn:
conn.execute(
"""
INSERT INTO interactions (
id, prompt, context_pack, response_summary, response,
memories_used, chunks_used, client, session_id, project,
created_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
interaction_id,
prompt,
json.dumps(context_pack_payload, ensure_ascii=True),
response_summary,
response,
json.dumps(memories_used, ensure_ascii=True),
json.dumps(chunks_used, ensure_ascii=True),
client,
session_id,
project,
now,
),
)
log.info(
"interaction_recorded",
interaction_id=interaction_id,
project=project,
client=client,
session_id=session_id,
memories_used=len(memories_used),
chunks_used=len(chunks_used),
response_chars=len(response),
)
interaction = Interaction(
id=interaction_id,
prompt=prompt,
response=response,
response_summary=response_summary,
project=project,
client=client,
session_id=session_id,
memories_used=memories_used,
chunks_used=chunks_used,
context_pack=context_pack_payload,
created_at=now,
)
if reinforce and (response or response_summary):
# Import inside the function to avoid a circular import between
# the interactions service and the reinforcement module which
# depends on it.
try:
from atocore.memory.reinforcement import reinforce_from_interaction
reinforce_from_interaction(interaction)
except Exception as exc: # pragma: no cover - reinforcement must never block capture
log.error(
"reinforcement_failed_on_capture",
interaction_id=interaction_id,
error=str(exc),
)
if extract and (response or response_summary):
try:
from atocore.memory.extractor import extract_candidates_from_interaction
from atocore.memory.service import create_memory
candidates = extract_candidates_from_interaction(interaction)
for candidate in candidates:
try:
create_memory(
memory_type=candidate.memory_type,
content=candidate.content,
project=candidate.project,
confidence=candidate.confidence,
status="candidate",
)
except ValueError:
pass # duplicate or validation error — skip silently
except Exception as exc: # pragma: no cover - extraction must never block capture
log.error(
"extraction_failed_on_capture",
interaction_id=interaction_id,
error=str(exc),
)
return interaction
def list_interactions(
project: str | None = None,
session_id: str | None = None,
client: str | None = None,
since: str | None = None,
limit: int = 50,
) -> list[Interaction]:
"""List captured interactions, optionally filtered.
``since`` accepts an ISO 8601 timestamp string (with ``T``, an
optional ``Z`` or numeric offset, optional fractional seconds).
The value is normalized to the storage format (UTC,
``YYYY-MM-DD HH:MM:SS``) before the SQL comparison so external
callers can pass any of the common ISO shapes without filter
drift. ``project`` is canonicalized through the registry so an
alias finds rows stored under the canonical project id.
``limit`` is hard-capped at 500 to keep casual API listings cheap.
"""
if limit <= 0:
return []
limit = min(limit, 500)
query = "SELECT * FROM interactions WHERE 1=1"
params: list = []
if project:
query += " AND project = ?"
params.append(resolve_project_name(project))
if session_id:
query += " AND session_id = ?"
params.append(session_id)
if client:
query += " AND client = ?"
params.append(client)
if since:
query += " AND created_at >= ?"
params.append(_normalize_since(since))
query += " ORDER BY created_at DESC LIMIT ?"
params.append(limit)
with get_connection() as conn:
rows = conn.execute(query, params).fetchall()
return [_row_to_interaction(row) for row in rows]
def get_interaction(interaction_id: str) -> Interaction | None:
"""Fetch one interaction by id, or return None if it does not exist."""
if not interaction_id:
return None
with get_connection() as conn:
row = conn.execute(
"SELECT * FROM interactions WHERE id = ?", (interaction_id,)
).fetchone()
if row is None:
return None
return _row_to_interaction(row)
def _row_to_interaction(row) -> Interaction:
return Interaction(
id=row["id"],
prompt=row["prompt"],
response=row["response"] or "",
response_summary=row["response_summary"] or "",
project=row["project"] or "",
client=row["client"] or "",
session_id=row["session_id"] or "",
memories_used=_safe_json_list(row["memories_used"]),
chunks_used=_safe_json_list(row["chunks_used"]),
context_pack=_safe_json_dict(row["context_pack"]),
created_at=row["created_at"] or "",
)
def _safe_json_list(raw: str | None) -> list[str]:
if not raw:
return []
try:
value = json.loads(raw)
except json.JSONDecodeError:
return []
if not isinstance(value, list):
return []
return [str(item) for item in value]
def _safe_json_dict(raw: str | None) -> dict:
if not raw:
return {}
try:
value = json.loads(raw)
except json.JSONDecodeError:
return {}
if not isinstance(value, dict):
return {}
return value
def _normalize_since(since: str) -> str:
"""Normalize an ISO 8601 ``since`` filter to the storage format.
Stored ``created_at`` values are ``YYYY-MM-DD HH:MM:SS`` (no
timezone, UTC by convention). External callers naturally pass
ISO 8601 with ``T`` separator, optional ``Z`` suffix, optional
fractional seconds, and optional ``+HH:MM`` offsets. A naive
string comparison between the two formats fails on the same
day because the lexically-greater ``T`` makes any ISO value
sort after any space-separated value.
This helper accepts the common ISO shapes plus the bare
storage format and returns the storage format. On a parse
failure it returns the input unchanged so the SQL comparison
fails open (no rows match) instead of raising and breaking
the listing endpoint.
"""
if not since:
return since
candidate = since.strip()
# Python's fromisoformat understands trailing 'Z' from 3.11+ but
# we replace it explicitly for safety against earlier shapes.
if candidate.endswith("Z"):
candidate = candidate[:-1] + "+00:00"
try:
dt = datetime.fromisoformat(candidate)
except ValueError:
# Already in storage format, or unparseable: best-effort
# match the storage format with a regex; if that fails too,
# return the raw input.
if re.fullmatch(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", since):
return since
return since
if dt.tzinfo is not None:
dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
return dt.strftime(_STORAGE_TIMESTAMP_FORMAT)