tests/test_reinforcement.py

"""Tests for Phase 9 Commit B reinforcement loop."""

from fastapi.testclient import TestClient

from atocore.interactions.service import record_interaction
from atocore.main import app
from atocore.memory.reinforcement import (
    DEFAULT_CONFIDENCE_DELTA,
    _stem,
    _tokenize,
    reinforce_from_interaction,
)
from atocore.memory.service import (
    create_memory,
    get_memories,
    reinforce_memory,
)
from atocore.models.database import init_db


# --- service-level tests: reinforce_memory primitive ----------------------


def test_reinforce_memory_bumps_active_memory(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="prefers Python over Ruby for scripting",
        confidence=0.6,
    )

    applied, old_conf, new_conf = reinforce_memory(mem.id, confidence_delta=0.05)

    assert applied is True
    assert old_conf == 0.6
    assert abs(new_conf - 0.65) < 1e-9

    reloaded = get_memories(memory_type="preference", limit=10)
    match = next((m for m in reloaded if m.id == mem.id), None)
    assert match is not None
    assert abs(match.confidence - 0.65) < 1e-9
    assert match.reference_count == 1
    assert match.last_referenced_at  # non-empty


def test_reinforce_memory_caps_at_one(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="identity",
        content="is a mechanical engineer who runs AtoCore",
        confidence=0.98,
    )
    applied, old_conf, new_conf = reinforce_memory(mem.id, confidence_delta=0.05)
    assert applied is True
    assert old_conf == 0.98
    assert new_conf == 1.0


def test_reinforce_memory_rejects_candidate_and_missing(tmp_data_dir):
    init_db()
    candidate = create_memory(
        memory_type="knowledge",
        content="the lateral support uses GF-PTFE pads",
        confidence=0.5,
        status="candidate",
    )
    applied, _, _ = reinforce_memory(candidate.id)
    assert applied is False

    missing, _, _ = reinforce_memory("no-such-id")
    assert missing is False


def test_reinforce_memory_accumulates_reference_count(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="likes concise code reviews that focus on the why",
        confidence=0.5,
    )
    for _ in range(5):
        reinforce_memory(mem.id, confidence_delta=0.01)
    reloaded = [m for m in get_memories(memory_type="preference", limit=10) if m.id == mem.id][0]
    assert reloaded.reference_count == 5
    assert abs(reloaded.confidence - 0.55) < 1e-9


def test_reinforce_memory_rejects_negative_delta(tmp_data_dir):
    init_db()
    mem = create_memory(memory_type="preference", content="always uses structured logging")
    import pytest

    with pytest.raises(ValueError):
        reinforce_memory(mem.id, confidence_delta=-0.01)


# --- reinforce_from_interaction: the high-level matcher -------------------


def _make_interaction(**overrides):
    return record_interaction(
        prompt=overrides.get("prompt", "ignored"),
        response=overrides.get("response", ""),
        response_summary=overrides.get("response_summary", ""),
        project=overrides.get("project", ""),
        client=overrides.get("client", ""),
        session_id=overrides.get("session_id", ""),
        reinforce=False,  # the matcher is tested in isolation here
    )


def test_reinforce_from_interaction_matches_active_memory(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="prefers tests that describe behaviour in plain English",
        confidence=0.5,
    )
    interaction = _make_interaction(
        response=(
            "I wrote the new tests in plain English, since the project "
            "prefers tests that describe behaviour in plain English and "
            "that makes them easier to review."
        ),
    )
    results = reinforce_from_interaction(interaction)
    assert len(results) == 1
    assert results[0].memory_id == mem.id
    assert abs(results[0].new_confidence - (0.5 + DEFAULT_CONFIDENCE_DELTA)) < 1e-9


def test_reinforce_from_interaction_ignores_candidates_and_inactive(tmp_data_dir):
    init_db()
    candidate = create_memory(
        memory_type="knowledge",
        content="the polisher frame uses kinematic mounts for thermal isolation",
        confidence=0.6,
        status="candidate",
    )
    interaction = _make_interaction(
        response=(
            "The polisher frame uses kinematic mounts for thermal isolation, "
            "which matches the note in the design log."
        ),
    )
    results = reinforce_from_interaction(interaction)
    # Candidate should NOT be reinforced even though the text matches
    assert all(r.memory_id != candidate.id for r in results)


def test_reinforce_from_interaction_requires_min_content_length(tmp_data_dir):
    init_db()
    short_mem = create_memory(
        memory_type="preference",
        content="uses SI",  # below min length
    )
    interaction = _make_interaction(
        response="Everything uses SI for this project, consistently.",
    )
    results = reinforce_from_interaction(interaction)
    assert all(r.memory_id != short_mem.id for r in results)


def test_reinforce_from_interaction_empty_response_is_noop(tmp_data_dir):
    init_db()
    create_memory(memory_type="preference", content="prefers structured logging")
    interaction = _make_interaction(response="", response_summary="")
    results = reinforce_from_interaction(interaction)
    assert results == []


def test_reinforce_from_interaction_is_normalized(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="Prefers concise commit messages focused on the why",
    )
    # Response has different casing and extra whitespace — should still match
    interaction = _make_interaction(
        response=(
            "The commit message was short on purpose — the user\n\n"
            "PREFERS concise commit MESSAGES   focused on the WHY, "
            "so I stuck to one sentence."
        ),
    )
    results = reinforce_from_interaction(interaction)
    assert any(r.memory_id == mem.id for r in results)


def test_reinforce_from_interaction_deduplicates_across_buckets(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="identity",
        content="mechanical engineer who runs AtoCore",
        project="",
    )
    # This memory belongs to the identity bucket AND would also be
    # fetched via the project query if project matched. We want to ensure
    # we don't double-reinforce.
    interaction = _make_interaction(
        response="The mechanical engineer who runs AtoCore asked for this patch.",
        project="p05-interferometer",
    )
    results = reinforce_from_interaction(interaction)
    assert sum(1 for r in results if r.memory_id == mem.id) == 1


# --- automatic reinforcement on record_interaction ------------------------


def test_record_interaction_auto_reinforces_by_default(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="writes tests before hooking features into API routes",
        confidence=0.5,
    )
    record_interaction(
        prompt="please add the /foo endpoint with tests",
        response=(
            "Wrote tests first, then added the /foo endpoint. The project "
            "writes tests before hooking features into API routes so the "
            "order is enforced."
        ),
    )
    reloaded = [m for m in get_memories(memory_type="preference", limit=20) if m.id == mem.id][0]
    assert reloaded.confidence > 0.5
    assert reloaded.reference_count == 1


def test_record_interaction_reinforce_false_skips_pass(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="always includes a rollback note in risky commits",
        confidence=0.5,
    )
    record_interaction(
        prompt="ignored",
        response=(
            "I always includes a rollback note in risky commits, so the "
            "commit message mentions how to revert if needed."
        ),
        reinforce=False,
    )
    reloaded = [m for m in get_memories(memory_type="preference", limit=20) if m.id == mem.id][0]
    assert reloaded.confidence == 0.5
    assert reloaded.reference_count == 0


def test_record_interaction_auto_reinforce_handles_empty_response(tmp_data_dir):
    init_db()
    mem = create_memory(memory_type="preference", content="prefers descriptive branch names")
    # No response text — reinforcement should be a silent no-op
    record_interaction(prompt="hi", response="", response_summary="")
    reloaded = [m for m in get_memories(memory_type="preference", limit=20) if m.id == mem.id][0]
    assert reloaded.reference_count == 0


# --- API level ------------------------------------------------------------


def test_api_reinforce_endpoint_runs_against_stored_interaction(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="rejects commits that touch credential files",
        confidence=0.5,
    )
    interaction = record_interaction(
        prompt="review commit",
        response=(
            "I rejects commits that touch credential files on sight. "
            "That commit touched ~/.git-credentials, so it was blocked."
        ),
        reinforce=False,  # leave untouched for the endpoint to do it
    )

    client = TestClient(app)
    response = client.post(f"/interactions/{interaction.id}/reinforce")
    assert response.status_code == 200
    body = response.json()
    assert body["interaction_id"] == interaction.id
    assert body["reinforced_count"] >= 1
    ids = [r["memory_id"] for r in body["reinforced"]]
    assert mem.id in ids


def test_api_reinforce_endpoint_returns_404_for_missing(tmp_data_dir):
    init_db()
    client = TestClient(app)
    response = client.post("/interactions/does-not-exist/reinforce")
    assert response.status_code == 404


def test_api_post_interactions_accepts_reinforce_false(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="writes runbooks alongside new services",
        confidence=0.5,
    )
    client = TestClient(app)
    response = client.post(
        "/interactions",
        json={
            "prompt": "review",
            "response": (
                "I writes runbooks alongside new services and the diff includes "
                "one under docs/runbooks/."
            ),
            "reinforce": False,
        },
    )
    assert response.status_code == 200
    reloaded = [m for m in get_memories(memory_type="preference", limit=20) if m.id == mem.id][0]
    assert reloaded.confidence == 0.5
    assert reloaded.reference_count == 0


# --- alias canonicalization end-to-end -------------------------------------


def test_reinforcement_works_when_capture_uses_alias(project_registry):
    """End-to-end: capture under an alias, seed memory under canonical id,
    verify reinforcement still finds and bumps the memory.

    Regression for codex's P2 finding: previously interaction.project
    was stored verbatim and reinforcement queried memories using that
    raw value, so capturing under "p05" while memories live under
    "p05-interferometer" silently missed everything.
    """
    init_db()
    project_registry(("p05-interferometer", ["p05", "interferometer"]))

    # Seed an active memory under the CANONICAL id
    mem = create_memory(
        memory_type="project",
        content="the lateral support pads use GF-PTFE for thermal stability",
        project="p05-interferometer",
        confidence=0.5,
    )

    # Capture an interaction under the ALIAS — this is the bug case
    record_interaction(
        prompt="status update",
        response=(
            "Quick note: the lateral support pads use GF-PTFE for thermal "
            "stability and that's still the current selection."
        ),
        project="p05",
    )

    # The seeded memory should have been reinforced
    reloaded = [
        m
        for m in get_memories(memory_type="project", project="p05-interferometer", limit=20)
        if m.id == mem.id
    ][0]
    assert reloaded.confidence > 0.5
    assert reloaded.reference_count == 1


def test_get_memories_filter_by_alias(project_registry):
    """Filtering memories by an alias should find rows stored under canonical."""
    init_db()
    project_registry(("p04-gigabit", ["p04", "gigabit"]))

    create_memory(memory_type="project", content="m1", project="p04-gigabit")
    create_memory(memory_type="project", content="m2", project="gigabit")

    via_alias = get_memories(memory_type="project", project="p04")
    via_canonical = get_memories(memory_type="project", project="p04-gigabit")

    assert len(via_alias) == 2
    assert len(via_canonical) == 2
    assert {m.content for m in via_alias} == {"m1", "m2"}


# --- token-overlap matcher: unit tests -------------------------------------


def test_stem_folds_s_ed_ing():
    assert _stem("prefers") == "prefer"
    assert _stem("preferred") == "prefer"
    assert _stem("services") == "service"
    assert _stem("processing") == "process"
    # Short words must not be over-stripped
    assert _stem("red") == "red"  # 3 chars, don't strip "ed"
    assert _stem("bus") == "bus"  # 3 chars, don't strip "s"
    assert _stem("sing") == "sing"  # 4 chars, don't strip "ing"
    assert _stem("being") == "being"  # 5 chars, "ing" strip leaves "be" (2) — too short


def test_tokenize_removes_stop_words():
    tokens = _tokenize("the quick brown fox jumps over the lazy dog")
    assert "the" not in tokens
    assert "quick" in tokens
    assert "brown" in tokens
    assert "fox" in tokens
    assert "dog" in tokens
    # "over" has len 4, not a stop word → kept (stemmed: "over")
    assert "over" in tokens


# --- token-overlap matcher: paraphrase matching ----------------------------


def test_reinforce_matches_paraphrase_prefers_vs_prefer(tmp_data_dir):
    """The canonical rebase case from phase9-first-real-use.md."""
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="prefers rebase-based workflows because history stays linear",
        confidence=0.5,
    )
    interaction = _make_interaction(
        response=(
            "I prefer rebase-based workflows because the history stays "
            "linear and reviewers have an easier time."
        ),
    )
    results = reinforce_from_interaction(interaction)
    assert any(r.memory_id == mem.id for r in results)


def test_reinforce_matches_paraphrase_with_articles_and_ed(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="preferred structured logging across all backend services",
        confidence=0.5,
    )
    interaction = _make_interaction(
        response=(
            "I set up structured logging across all the backend services, "
            "which the team prefers for consistency."
        ),
    )
    results = reinforce_from_interaction(interaction)
    assert any(r.memory_id == mem.id for r in results)


def test_reinforce_rejects_low_overlap(tmp_data_dir):
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="always uses Python for data processing scripts",
        confidence=0.5,
    )
    interaction = _make_interaction(
        response=(
            "The CI pipeline runs on Node.js and deploys to Kubernetes "
            "using Helm charts."
        ),
    )
    results = reinforce_from_interaction(interaction)
    assert all(r.memory_id != mem.id for r in results)


def test_reinforce_matches_at_70_percent_threshold(tmp_data_dir):
    """Exactly 7 of 10 content tokens present → should match."""
    init_db()
    # After stop-word removal and stemming, this has 10 tokens:
    # alpha, bravo, charlie, delta, echo, foxtrot, golf, hotel, india, juliet
    mem = create_memory(
        memory_type="preference",
        content="alpha bravo charlie delta echo foxtrot golf hotel india juliet",
        confidence=0.5,
    )
    # Echo 7 of 10 tokens (70%) plus some noise
    interaction = _make_interaction(
        response="alpha bravo charlie delta echo foxtrot golf noise words here",
    )
    results = reinforce_from_interaction(interaction)
    assert any(r.memory_id == mem.id for r in results)


def test_reinforce_long_memory_matches_on_absolute_overlap(tmp_data_dir):
    """A paragraph-length memory should reinforce when the response
    echoes a substantive subset of its distinctive tokens, even though
    the overlap fraction stays well under 70%."""
    init_db()
    mem = create_memory(
        memory_type="project",
        content=(
            "Interferometer architecture: a folded-beam configuration with a "
            "fixed horizontal interferometer, a forty-five degree fold mirror, "
            "a six-DOF CGH stage, and the mirror on its own tilting platform. "
            "The fold mirror redirects the beam while the CGH shapes the wavefront."
        ),
        project="p05-interferometer",
        confidence=0.5,
    )
    interaction = _make_interaction(
        project="p05-interferometer",
        response=(
            "For the interferometer we keep the folded-beam layout: horizontal "
            "interferometer, fold mirror at forty-five degrees, CGH stage with "
            "six DOF, and the mirror sitting on its tilting platform. The fold "
            "mirror redirects the beam and the CGH shapes the wavefront."
        ),
    )
    results = reinforce_from_interaction(interaction)
    assert any(r.memory_id == mem.id for r in results)


def test_reinforce_long_memory_rejects_thin_overlap(tmp_data_dir):
    """Long memory + a response that only brushes a few generic terms
    must NOT reinforce — otherwise the reflection loop rots."""
    init_db()
    mem = create_memory(
        memory_type="project",
        content=(
            "Polisher control system executes approved controller jobs, "
            "enforces state transitions and interlocks, supports pause "
            "resume and abort, and records auditable run logs while "
            "never reinterpreting metrology or inventing new strategies."
        ),
        project="p06-polisher",
        confidence=0.5,
    )
    interaction = _make_interaction(
        project="p06-polisher",
        response=(
            "I updated the polisher docs and fixed a typo in the run logs section."
        ),
    )
    results = reinforce_from_interaction(interaction)
    assert all(r.memory_id != mem.id for r in results)


def test_reinforce_rejects_below_70_percent(tmp_data_dir):
    """Only 6 of 10 content tokens present (60%) → should NOT match."""
    init_db()
    mem = create_memory(
        memory_type="preference",
        content="alpha bravo charlie delta echo foxtrot golf hotel india juliet",
        confidence=0.5,
    )
    # Echo 6 of 10 tokens (60%) plus noise
    interaction = _make_interaction(
        response="alpha bravo charlie delta echo foxtrot noise words here only",
    )
    results = reinforce_from_interaction(interaction)
    assert all(r.memory_id != mem.id for r in results)