"""Interaction capture service.

An *interaction* is one round-trip of:
- a user prompt
- the AtoCore context pack that was assembled for it
- the LLM response (full text or a summary, caller's choice)
- which memories and chunks were actually used in the pack
- a client identifier (e.g. ``openclaw``, ``claude-code``, ``manual``)
- an optional session identifier so multi-turn conversations can be
  reconstructed later

The capture is intentionally additive: it never modifies memories,
project state, or chunks. Reflection (Phase 9 Commit B/C) and
write-back (Phase 10) are layered on top of this audit trail without
violating the AtoCore trust hierarchy.
"""

from __future__ import annotations

import json
import re
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone

from atocore.models.database import get_connection
from atocore.observability.logger import get_logger
from atocore.projects.registry import resolve_project_name

log = get_logger("interactions")

# Stored timestamps use 'YYYY-MM-DD HH:MM:SS' (no timezone offset, UTC by
# convention) so they sort lexically and compare cleanly with the SQLite
# CURRENT_TIMESTAMP default. The since filter accepts ISO 8601 strings
# (with 'T', optional 'Z' or +offset, optional fractional seconds) and
# normalizes them to the storage format before the SQL comparison.
_STORAGE_TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S"


@dataclass
class Interaction:
    id: str
    prompt: str
    response: str
    response_summary: str
    project: str
    client: str
    session_id: str
    memories_used: list[str] = field(default_factory=list)
    chunks_used: list[str] = field(default_factory=list)
    context_pack: dict = field(default_factory=dict)
    created_at: str = ""


def record_interaction(
    prompt: str,
    response: str = "",
    response_summary: str = "",
    project: str = "",
    client: str = "",
    session_id: str = "",
    memories_used: list[str] | None = None,
    chunks_used: list[str] | None = None,
    context_pack: dict | None = None,
    reinforce: bool = True,
) -> Interaction:
    """Persist a single interaction to the audit trail.

    The only required field is ``prompt`` so this can be called even when
    the caller is in the middle of a partial turn (for example to record
    that AtoCore was queried even before the LLM response is back).

    When ``reinforce`` is True (default) and the interaction has response
    content, the Phase 9 Commit B reinforcement pass runs automatically
    against the active memory set. This bumps the confidence of any
    memory whose content is echoed in the response. Set ``reinforce`` to
    False to capture the interaction without touching memory confidence,
    which is useful for backfill and for tests that want to isolate the
    audit trail from the reinforcement loop.
    """
    if not prompt or not prompt.strip():
        raise ValueError("Interaction prompt must be non-empty")

    # Canonicalize the project through the registry so an alias and
    # the canonical id store under the same bucket. Without this,
    # reinforcement and extraction (which both query by raw
    # interaction.project) would silently miss memories and create
    # candidates in the wrong project.
    project = resolve_project_name(project)

    interaction_id = str(uuid.uuid4())
    # Store created_at explicitly so the same string lives in both the DB
    # column and the returned dataclass. SQLite's CURRENT_TIMESTAMP uses
    # 'YYYY-MM-DD HH:MM:SS' which would not compare cleanly against ISO
    # timestamps with 'T' and tz offset, breaking the `since` filter on
    # list_interactions.
    now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
    memories_used = list(memories_used or [])
    chunks_used = list(chunks_used or [])
    context_pack_payload = context_pack or {}

    with get_connection() as conn:
        conn.execute(
            """
            INSERT INTO interactions (
                id, prompt, context_pack, response_summary, response,
                memories_used, chunks_used, client, session_id, project,
                created_at
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            """,
            (
                interaction_id,
                prompt,
                json.dumps(context_pack_payload, ensure_ascii=True),
                response_summary,
                response,
                json.dumps(memories_used, ensure_ascii=True),
                json.dumps(chunks_used, ensure_ascii=True),
                client,
                session_id,
                project,
                now,
            ),
        )

    log.info(
        "interaction_recorded",
        interaction_id=interaction_id,
        project=project,
        client=client,
        session_id=session_id,
        memories_used=len(memories_used),
        chunks_used=len(chunks_used),
        response_chars=len(response),
    )

    interaction = Interaction(
        id=interaction_id,
        prompt=prompt,
        response=response,
        response_summary=response_summary,
        project=project,
        client=client,
        session_id=session_id,
        memories_used=memories_used,
        chunks_used=chunks_used,
        context_pack=context_pack_payload,
        created_at=now,
    )

    if reinforce and (response or response_summary):
        # Import inside the function to avoid a circular import between
        # the interactions service and the reinforcement module which
        # depends on it.
        try:
            from atocore.memory.reinforcement import reinforce_from_interaction

            reinforce_from_interaction(interaction)
        except Exception as exc:  # pragma: no cover - reinforcement must never block capture
            log.error(
                "reinforcement_failed_on_capture",
                interaction_id=interaction_id,
                error=str(exc),
            )

    return interaction


def list_interactions(
    project: str | None = None,
    session_id: str | None = None,
    client: str | None = None,
    since: str | None = None,
    limit: int = 50,
) -> list[Interaction]:
    """List captured interactions, optionally filtered.

    ``since`` accepts an ISO 8601 timestamp string (with ``T``, an
    optional ``Z`` or numeric offset, optional fractional seconds).
    The value is normalized to the storage format (UTC,
    ``YYYY-MM-DD HH:MM:SS``) before the SQL comparison so external
    callers can pass any of the common ISO shapes without filter
    drift. ``project`` is canonicalized through the registry so an
    alias finds rows stored under the canonical project id.
    ``limit`` is hard-capped at 500 to keep casual API listings cheap.
    """
    if limit <= 0:
        return []
    limit = min(limit, 500)

    query = "SELECT * FROM interactions WHERE 1=1"
    params: list = []

    if project:
        query += " AND project = ?"
        params.append(resolve_project_name(project))
    if session_id:
        query += " AND session_id = ?"
        params.append(session_id)
    if client:
        query += " AND client = ?"
        params.append(client)
    if since:
        query += " AND created_at >= ?"
        params.append(_normalize_since(since))

    query += " ORDER BY created_at DESC LIMIT ?"
    params.append(limit)

    with get_connection() as conn:
        rows = conn.execute(query, params).fetchall()

    return [_row_to_interaction(row) for row in rows]


def get_interaction(interaction_id: str) -> Interaction | None:
    """Fetch one interaction by id, or return None if it does not exist."""
    if not interaction_id:
        return None
    with get_connection() as conn:
        row = conn.execute(
            "SELECT * FROM interactions WHERE id = ?", (interaction_id,)
        ).fetchone()
    if row is None:
        return None
    return _row_to_interaction(row)


def _row_to_interaction(row) -> Interaction:
    return Interaction(
        id=row["id"],
        prompt=row["prompt"],
        response=row["response"] or "",
        response_summary=row["response_summary"] or "",
        project=row["project"] or "",
        client=row["client"] or "",
        session_id=row["session_id"] or "",
        memories_used=_safe_json_list(row["memories_used"]),
        chunks_used=_safe_json_list(row["chunks_used"]),
        context_pack=_safe_json_dict(row["context_pack"]),
        created_at=row["created_at"] or "",
    )


def _safe_json_list(raw: str | None) -> list[str]:
    if not raw:
        return []
    try:
        value = json.loads(raw)
    except json.JSONDecodeError:
        return []
    if not isinstance(value, list):
        return []
    return [str(item) for item in value]


def _safe_json_dict(raw: str | None) -> dict:
    if not raw:
        return {}
    try:
        value = json.loads(raw)
    except json.JSONDecodeError:
        return {}
    if not isinstance(value, dict):
        return {}
    return value


def _normalize_since(since: str) -> str:
    """Normalize an ISO 8601 ``since`` filter to the storage format.

    Stored ``created_at`` values are ``YYYY-MM-DD HH:MM:SS`` (no
    timezone, UTC by convention). External callers naturally pass
    ISO 8601 with ``T`` separator, optional ``Z`` suffix, optional
    fractional seconds, and optional ``+HH:MM`` offsets. A naive
    string comparison between the two formats fails on the same
    day because the lexically-greater ``T`` makes any ISO value
    sort after any space-separated value.

    This helper accepts the common ISO shapes plus the bare
    storage format and returns the storage format. On a parse
    failure it returns the input unchanged so the SQL comparison
    fails open (no rows match) instead of raising and breaking
    the listing endpoint.
    """
    if not since:
        return since
    candidate = since.strip()
    # Python's fromisoformat understands trailing 'Z' from 3.11+ but
    # we replace it explicitly for safety against earlier shapes.
    if candidate.endswith("Z"):
        candidate = candidate[:-1] + "+00:00"
    try:
        dt = datetime.fromisoformat(candidate)
    except ValueError:
        # Already in storage format, or unparseable: best-effort
        # match the storage format with a regex; if that fails too,
        # return the raw input.
        if re.fullmatch(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", since):
            return since
        return since
    if dt.tzinfo is not None:
        dt = dt.astimezone(timezone.utc).replace(tzinfo=None)
    return dt.strftime(_STORAGE_TIMESTAMP_FORMAT)