Files
ATOCore/tests/test_interactions.py

305 lines
11 KiB
Python
Raw Permalink Normal View History

feat(phase9-A): interaction capture loop foundation Phase 9 Commit A from the agreed plan: turn AtoCore from a stateless context enhancer into a system that records what it actually fed to an LLM and what came back. This is the audit trail Reflection (Commit B) and Extraction (Commit C) will be layered on top of. The interactions table existed in the schema since the original PoC but nothing wrote to it. This change makes it real: Schema migration (additive only): - response full LLM response (caller decides how much) - memories_used JSON list of memory ids in the context pack - chunks_used JSON list of chunk ids in the context pack - client identifier of the calling system (openclaw, claude-code, manual, ...) - session_id groups multi-turn conversations - project project name (mirrors the memory module pattern, no FK so capture stays cheap) - indexes on session_id, project, created_at The created_at column is now written explicitly with a SQLite-compatible 'YYYY-MM-DD HH:MM:SS' format so the same string lives in the DB and the returned dataclass. Without this the `since` filter on list_interactions would silently fail because CURRENT_TIMESTAMP and isoformat use different shapes that do not compare cleanly as strings. New module src/atocore/interactions/: - Interaction dataclass - record_interaction() persists one round-trip (prompt required; everything else optional). Refuses empty prompts. - list_interactions() filters by project / session_id / client / since, newest-first, hard-capped at 500 - get_interaction() fetch by id, full response + context pack API endpoints: - POST /interactions capture one interaction - GET /interactions list with summaries (no full response) - GET /interactions/{id} full record incl. response + pack Trust model: - Capture is read-only with respect to memories, project state, and source chunks. Nothing here promotes anything into trusted state. - The audit trail becomes the dataset Commit B (reinforcement) and Commit C (extraction + review queue) will operate on. Tests (13 new, all green): - service: persist + roundtrip every field - service: minimum-fields path (prompt only) - service: empty / whitespace prompt rejected - service: get by id returns None for missing - service: filter by project, session, client - service: ordering newest-first with limit - service: since filter inclusive on cutoff (the bug the timestamp fix above caught) - service: limit=0 returns empty - API: POST records and round-trips through GET /interactions/{id} - API: empty prompt returns 400 - API: missing id returns 404 - API: list filter returns summaries (not full response bodies) Full suite: 118 passing (was 105). master-plan-status.md updated to move Phase 9 from "not started" to "started" with the explicit note that Commit A is in and Commits B/C remain.
2026-04-06 19:31:43 -04:00
"""Tests for the Phase 9 Commit A interaction capture loop."""
import time
import pytest
from fastapi.testclient import TestClient
from atocore.interactions.service import (
get_interaction,
list_interactions,
record_interaction,
)
from atocore.main import app
from atocore.models.database import init_db
# --- Service-level tests --------------------------------------------------
def test_record_interaction_persists_all_fields(tmp_data_dir):
init_db()
interaction = record_interaction(
prompt="What is the lateral support material for p05?",
response="The current lateral support uses GF-PTFE pads per Decision D-024.",
response_summary="lateral support: GF-PTFE per D-024",
project="p05-interferometer",
client="claude-code",
session_id="sess-001",
memories_used=["mem-aaa", "mem-bbb"],
chunks_used=["chunk-111", "chunk-222", "chunk-333"],
context_pack={"budget": 3000, "chunks": 3},
)
assert interaction.id
assert interaction.created_at
fetched = get_interaction(interaction.id)
assert fetched is not None
assert fetched.prompt.startswith("What is the lateral support")
assert fetched.response.startswith("The current lateral support")
assert fetched.response_summary == "lateral support: GF-PTFE per D-024"
assert fetched.project == "p05-interferometer"
assert fetched.client == "claude-code"
assert fetched.session_id == "sess-001"
assert fetched.memories_used == ["mem-aaa", "mem-bbb"]
assert fetched.chunks_used == ["chunk-111", "chunk-222", "chunk-333"]
assert fetched.context_pack == {"budget": 3000, "chunks": 3}
def test_record_interaction_minimum_fields(tmp_data_dir):
init_db()
interaction = record_interaction(prompt="ping")
assert interaction.id
assert interaction.prompt == "ping"
assert interaction.response == ""
assert interaction.memories_used == []
assert interaction.chunks_used == []
def test_record_interaction_rejects_empty_prompt(tmp_data_dir):
init_db()
with pytest.raises(ValueError):
record_interaction(prompt="")
with pytest.raises(ValueError):
record_interaction(prompt=" ")
def test_get_interaction_returns_none_for_unknown_id(tmp_data_dir):
init_db()
assert get_interaction("does-not-exist") is None
assert get_interaction("") is None
def test_list_interactions_filters_by_project(tmp_data_dir):
init_db()
record_interaction(prompt="p04 question", project="p04-gigabit")
record_interaction(prompt="p05 question", project="p05-interferometer")
record_interaction(prompt="another p05", project="p05-interferometer")
p05 = list_interactions(project="p05-interferometer")
p04 = list_interactions(project="p04-gigabit")
assert len(p05) == 2
assert len(p04) == 1
assert all(i.project == "p05-interferometer" for i in p05)
assert p04[0].prompt == "p04 question"
def test_list_interactions_filters_by_session_and_client(tmp_data_dir):
init_db()
record_interaction(prompt="a", session_id="sess-A", client="openclaw")
record_interaction(prompt="b", session_id="sess-A", client="claude-code")
record_interaction(prompt="c", session_id="sess-B", client="openclaw")
sess_a = list_interactions(session_id="sess-A")
openclaw = list_interactions(client="openclaw")
assert len(sess_a) == 2
assert len(openclaw) == 2
assert {i.client for i in sess_a} == {"openclaw", "claude-code"}
def test_list_interactions_orders_newest_first_and_respects_limit(tmp_data_dir):
init_db()
# created_at has 1-second resolution; sleep enough to keep ordering
# deterministic regardless of insert speed.
for index in range(5):
record_interaction(prompt=f"prompt-{index}")
time.sleep(1.05)
items = list_interactions(limit=3)
assert len(items) == 3
# Newest first: prompt-4, prompt-3, prompt-2
assert items[0].prompt == "prompt-4"
assert items[1].prompt == "prompt-3"
assert items[2].prompt == "prompt-2"
def test_list_interactions_respects_since_filter(tmp_data_dir):
init_db()
first = record_interaction(prompt="early")
time.sleep(1.05)
second = record_interaction(prompt="late")
after_first = list_interactions(since=first.created_at)
ids_after_first = {item.id for item in after_first}
assert second.id in ids_after_first
assert first.id in ids_after_first # cutoff is inclusive
after_second = list_interactions(since=second.created_at)
ids_after_second = {item.id for item in after_second}
assert second.id in ids_after_second
assert first.id not in ids_after_second
def test_list_interactions_zero_limit_returns_empty(tmp_data_dir):
init_db()
record_interaction(prompt="ping")
assert list_interactions(limit=0) == []
# --- API-level tests ------------------------------------------------------
def test_post_interactions_endpoint_records_interaction(tmp_data_dir):
init_db()
client = TestClient(app)
response = client.post(
"/interactions",
json={
"prompt": "What changed in p06 this week?",
"response": "Polisher kinematic frame parameters updated to v0.3.",
"response_summary": "p06 frame parameters bumped to v0.3",
"project": "p06-polisher",
"client": "claude-code",
"session_id": "sess-xyz",
"memories_used": ["mem-1"],
"chunks_used": ["chunk-a", "chunk-b"],
"context_pack": {"chunks": 2},
},
)
assert response.status_code == 200
body = response.json()
assert body["status"] == "recorded"
interaction_id = body["id"]
# Round-trip via the GET endpoint
fetched = client.get(f"/interactions/{interaction_id}")
assert fetched.status_code == 200
fetched_body = fetched.json()
assert fetched_body["prompt"].startswith("What changed in p06")
assert fetched_body["response"].startswith("Polisher kinematic frame")
assert fetched_body["project"] == "p06-polisher"
assert fetched_body["chunks_used"] == ["chunk-a", "chunk-b"]
assert fetched_body["context_pack"] == {"chunks": 2}
def test_post_interactions_rejects_empty_prompt(tmp_data_dir):
init_db()
client = TestClient(app)
response = client.post("/interactions", json={"prompt": ""})
assert response.status_code == 400
def test_get_unknown_interaction_returns_404(tmp_data_dir):
init_db()
client = TestClient(app)
response = client.get("/interactions/does-not-exist")
assert response.status_code == 404
def test_list_interactions_endpoint_returns_summaries(tmp_data_dir):
init_db()
client = TestClient(app)
client.post(
"/interactions",
json={"prompt": "alpha", "project": "p04-gigabit", "response": "x" * 10},
)
client.post(
"/interactions",
json={"prompt": "beta", "project": "p05-interferometer", "response": "y" * 50},
)
response = client.get("/interactions", params={"project": "p05-interferometer"})
assert response.status_code == 200
body = response.json()
assert body["count"] == 1
assert body["interactions"][0]["prompt"] == "beta"
assert body["interactions"][0]["response_chars"] == 50
# The list endpoint never includes the full response body
assert "response" not in body["interactions"][0]
fix(P1+P2): canonicalize project names at every trust boundary Three findings from codex's review of the previous P1+P2 fix. The earlier commit (f2372ef) only fixed alias resolution at the context builder. Codex correctly pointed out that the same fragmentation applies at every other place a project name crosses a boundary — project_state writes/reads, interaction capture/listing/filtering, memory create/queries, and reinforcement's downstream queries. Plus a real bug in the interaction `since` filter where the storage format and the documented ISO format don't compare cleanly. The fix is one helper used at every boundary instead of duplicating the resolution inline. New helper: src/atocore/projects/registry.py::resolve_project_name --------------------------------------------------------------- - Single canonicalization boundary for project names - Returns the canonical project_id when the input matches any registered id or alias - Returns the input unchanged for empty/None and for unregistered names (preserves backwards compat with hand-curated state that predates the registry) - Documented as the contract that every read/write at the trust boundary should pass through P1 — Trusted Project State endpoints ------------------------------------ src/atocore/context/project_state.py: set_state, get_state, and invalidate_state now all canonicalize project_name through resolve_project_name BEFORE looking up or creating the project row. Before this fix: - POST /project/state with project="p05" called ensure_project("p05") which created a separate row in the projects table - The state row was attached to that alias project_id - Later context builds canonicalized "p05" -> "p05-interferometer" via the builder fix from f2372ef and never found the state - Result: trusted state silently fragmented across alias rows After this fix: - The alias is resolved to the canonical id at every entry point - Two captures (one via "p05", one via "p05-interferometer") write to the same row - get_state via either alias or the canonical id finds the same row Fixes the highest-priority gap codex flagged because Trusted Project State is supposed to be the most dependable layer in the AtoCore trust hierarchy. P2.a — Interaction capture project canonicalization ---------------------------------------------------- src/atocore/interactions/service.py: record_interaction now canonicalizes project before storing, so interaction.project is always the canonical id regardless of what the client passed. Downstream effects: - reinforce_from_interaction queries memories by interaction.project -> previously missed memories stored under canonical id -> now consistent because interaction.project IS the canonical id - the extractor stamps candidates with interaction.project -> previously created candidates in alias buckets -> now creates candidates in the canonical bucket - list_interactions(project=alias) was already broken, now fixed by canonicalizing the filter input on the read side too Memory service applied the same fix: - src/atocore/memory/service.py: create_memory and get_memories both canonicalize project through resolve_project_name - This keeps stored memory.project consistent with the reinforcement query path P2.b — Interaction `since` filter format normalization ------------------------------------------------------ src/atocore/interactions/service.py: new _normalize_since helper. The bug: - created_at is stored as 'YYYY-MM-DD HH:MM:SS' (no timezone, UTC by convention) so it sorts lexically and compares cleanly with the SQLite CURRENT_TIMESTAMP default - The `since` parameter was documented as ISO 8601 but compared as a raw string against the storage format - The lexically-greater 'T' separator means an ISO timestamp like '2026-04-07T12:00:00Z' is GREATER than the storage form '2026-04-07 12:00:00' for the same instant - Result: a client passing ISO `since` got an empty result for any row from the same day, even though those rows existed and were technically "after" the cutoff in real-world time The fix: - _normalize_since accepts ISO 8601 with T, optional Z suffix, optional fractional seconds, optional +HH:MM offsets - Uses datetime.fromisoformat for parsing (Python 3.11+) - Converts to UTC and reformats as the storage format before the SQL comparison - The bare storage format still works (backwards compat path is a regex match that returns the input unchanged) - Unparseable input is returned as-is so the comparison degrades gracefully (rows just don't match) instead of raising and breaking the listing endpoint builder.py refactor ------------------- The previous P1 fix had inline canonicalization. Now it uses the shared helper for consistency: - import changed from get_registered_project to resolve_project_name - the inline lookup is replaced with a single helper call - the comment block now points at representation-authority.md for the canonicalization contract New shared test fixture: tests/conftest.py::project_registry ------------------------------------------------------------ - Standardizes the registry-setup pattern that was duplicated across test_context_builder.py, test_project_state.py, test_interactions.py, and test_reinforcement.py - Returns a callable that takes (project_id, [aliases]) tuples and writes them into a temp registry file with the env var pointed at it and config.settings reloaded - Used by all 12 new regression tests in this commit Tests (12 new, all green on first run) -------------------------------------- test_project_state.py: - test_set_state_canonicalizes_alias: write via alias, read via every alias and the canonical id, verify same row id - test_get_state_canonicalizes_alias_after_canonical_write - test_invalidate_state_canonicalizes_alias - test_unregistered_project_state_still_works (backwards compat) test_interactions.py: - test_record_interaction_canonicalizes_project - test_list_interactions_canonicalizes_project_filter - test_list_interactions_since_accepts_iso_with_t_separator - test_list_interactions_since_accepts_z_suffix - test_list_interactions_since_accepts_offset - test_list_interactions_since_storage_format_still_works test_reinforcement.py: - test_reinforcement_works_when_capture_uses_alias (end-to-end: capture under alias, seed memory under canonical, verify reinforcement matches) - test_get_memories_filter_by_alias Full suite: 174 passing (was 162), 1 warning. The +12 is the new regression tests, no existing tests regressed. What's still NOT canonicalized (and why) ---------------------------------------- - _rank_chunks's secondary substring boost in builder.py — the retriever already does the right thing via its own _project_match_boost which calls get_registered_project. The redundant secondary boost still uses the raw hint but it's a multiplicative factor on top of correct retrieval, not a filter, so it can't drop relevant chunks. Tracked as a future cleanup but not a P1. - update_memory's project field (you can't change a memory's project after creation in the API anyway). - The retriever's project_hint parameter on direct /query calls — same reasoning as the builder boost, plus the retriever's own get_registered_project call already handles aliases there.
2026-04-07 08:29:33 -04:00
# --- alias canonicalization on interaction capture/list -------------------
def test_record_interaction_canonicalizes_project(project_registry):
"""Capturing under an alias should store the canonical project id.
Regression for codex's P2 finding: reinforcement and extraction
query memories by interaction.project; if the captured project is
a raw alias they would silently miss memories stored under the
canonical id.
"""
init_db()
project_registry(("p05-interferometer", ["p05", "interferometer"]))
interaction = record_interaction(
prompt="quick capture", response="response body", project="p05", reinforce=False
)
assert interaction.project == "p05-interferometer"
fetched = get_interaction(interaction.id)
assert fetched.project == "p05-interferometer"
def test_list_interactions_canonicalizes_project_filter(project_registry):
init_db()
project_registry(("p06-polisher", ["p06", "polisher"]))
record_interaction(prompt="a", response="ra", project="p06-polisher", reinforce=False)
record_interaction(prompt="b", response="rb", project="polisher", reinforce=False)
record_interaction(prompt="c", response="rc", project="atocore", reinforce=False)
# Query by an alias should still find both p06 captures
via_alias = list_interactions(project="p06")
via_canonical = list_interactions(project="p06-polisher")
assert len(via_alias) == 2
assert len(via_canonical) == 2
assert {i.prompt for i in via_alias} == {"a", "b"}
# --- since filter format normalization ------------------------------------
def test_list_interactions_since_accepts_iso_with_t_separator(tmp_data_dir):
init_db()
record_interaction(prompt="early", response="r", reinforce=False)
time.sleep(1.05)
pivot = record_interaction(prompt="late", response="r", reinforce=False)
# pivot.created_at is in storage format 'YYYY-MM-DD HH:MM:SS'.
# Build the equivalent ISO 8601 with 'T' that an external client
# would naturally send.
iso_with_t = pivot.created_at.replace(" ", "T")
items = list_interactions(since=iso_with_t)
assert any(i.id == pivot.id for i in items)
# The early row must also be excluded if its timestamp is strictly
# before the pivot — since is inclusive on the cutoff
early_ids = {i.id for i in items if i.prompt == "early"}
assert early_ids == set() or len(items) >= 1
def test_list_interactions_since_accepts_z_suffix(tmp_data_dir):
init_db()
pivot = record_interaction(prompt="pivot", response="r", reinforce=False)
time.sleep(1.05)
after = record_interaction(prompt="after", response="r", reinforce=False)
iso_with_z = pivot.created_at.replace(" ", "T") + "Z"
items = list_interactions(since=iso_with_z)
ids = {i.id for i in items}
assert pivot.id in ids
assert after.id in ids
def test_list_interactions_since_accepts_offset(tmp_data_dir):
init_db()
pivot = record_interaction(prompt="pivot", response="r", reinforce=False)
time.sleep(1.05)
after = record_interaction(prompt="after", response="r", reinforce=False)
iso_with_offset = pivot.created_at.replace(" ", "T") + "+00:00"
items = list_interactions(since=iso_with_offset)
assert any(i.id == after.id for i in items)
def test_list_interactions_since_storage_format_still_works(tmp_data_dir):
"""The bare storage format must still work for backwards compatibility."""
init_db()
pivot = record_interaction(prompt="pivot", response="r", reinforce=False)
items = list_interactions(since=pivot.created_at)
assert any(i.id == pivot.id for i in items)