fix: token-overlap matcher for reinforcement (Phase 9B)
Replace the substring-based _memory_matches() with a token-overlap
matcher that tokenizes both memory content and response, applies
lightweight stemming (trailing s/ed/ing) and stop-word removal, then
checks whether >= 70% of the memory's tokens appear in the response.
This fixes the paraphrase blindness that prevented reinforcement from
ever firing on natural responses ("prefers" vs "prefer", "because
history" vs "because the history").
7 new tests (26 total reinforcement tests, all passing).
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,8 @@ from atocore.interactions.service import record_interaction
|
||||
from atocore.main import app
|
||||
from atocore.memory.reinforcement import (
|
||||
DEFAULT_CONFIDENCE_DELTA,
|
||||
_stem,
|
||||
_tokenize,
|
||||
reinforce_from_interaction,
|
||||
)
|
||||
from atocore.memory.service import (
|
||||
@@ -373,3 +375,118 @@ def test_get_memories_filter_by_alias(project_registry):
|
||||
assert len(via_alias) == 2
|
||||
assert len(via_canonical) == 2
|
||||
assert {m.content for m in via_alias} == {"m1", "m2"}
|
||||
|
||||
|
||||
# --- token-overlap matcher: unit tests -------------------------------------
|
||||
|
||||
|
||||
def test_stem_folds_s_ed_ing():
|
||||
assert _stem("prefers") == "prefer"
|
||||
assert _stem("preferred") == "prefer"
|
||||
assert _stem("services") == "service"
|
||||
assert _stem("processing") == "process"
|
||||
# Short words must not be over-stripped
|
||||
assert _stem("red") == "red" # 3 chars, don't strip "ed"
|
||||
assert _stem("bus") == "bus" # 3 chars, don't strip "s"
|
||||
assert _stem("sing") == "sing" # 4 chars, don't strip "ing"
|
||||
assert _stem("being") == "being" # 5 chars, "ing" strip leaves "be" (2) — too short
|
||||
|
||||
|
||||
def test_tokenize_removes_stop_words():
|
||||
tokens = _tokenize("the quick brown fox jumps over the lazy dog")
|
||||
assert "the" not in tokens
|
||||
assert "quick" in tokens
|
||||
assert "brown" in tokens
|
||||
assert "fox" in tokens
|
||||
assert "dog" in tokens
|
||||
# "over" has len 4, not a stop word → kept (stemmed: "over")
|
||||
assert "over" in tokens
|
||||
|
||||
|
||||
# --- token-overlap matcher: paraphrase matching ----------------------------
|
||||
|
||||
|
||||
def test_reinforce_matches_paraphrase_prefers_vs_prefer(tmp_data_dir):
|
||||
"""The canonical rebase case from phase9-first-real-use.md."""
|
||||
init_db()
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="prefers rebase-based workflows because history stays linear",
|
||||
confidence=0.5,
|
||||
)
|
||||
interaction = _make_interaction(
|
||||
response=(
|
||||
"I prefer rebase-based workflows because the history stays "
|
||||
"linear and reviewers have an easier time."
|
||||
),
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert any(r.memory_id == mem.id for r in results)
|
||||
|
||||
|
||||
def test_reinforce_matches_paraphrase_with_articles_and_ed(tmp_data_dir):
|
||||
init_db()
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="preferred structured logging across all backend services",
|
||||
confidence=0.5,
|
||||
)
|
||||
interaction = _make_interaction(
|
||||
response=(
|
||||
"I set up structured logging across all the backend services, "
|
||||
"which the team prefers for consistency."
|
||||
),
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert any(r.memory_id == mem.id for r in results)
|
||||
|
||||
|
||||
def test_reinforce_rejects_low_overlap(tmp_data_dir):
|
||||
init_db()
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="always uses Python for data processing scripts",
|
||||
confidence=0.5,
|
||||
)
|
||||
interaction = _make_interaction(
|
||||
response=(
|
||||
"The CI pipeline runs on Node.js and deploys to Kubernetes "
|
||||
"using Helm charts."
|
||||
),
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert all(r.memory_id != mem.id for r in results)
|
||||
|
||||
|
||||
def test_reinforce_matches_at_70_percent_threshold(tmp_data_dir):
|
||||
"""Exactly 7 of 10 content tokens present → should match."""
|
||||
init_db()
|
||||
# After stop-word removal and stemming, this has 10 tokens:
|
||||
# alpha, bravo, charlie, delta, echo, foxtrot, golf, hotel, india, juliet
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="alpha bravo charlie delta echo foxtrot golf hotel india juliet",
|
||||
confidence=0.5,
|
||||
)
|
||||
# Echo 7 of 10 tokens (70%) plus some noise
|
||||
interaction = _make_interaction(
|
||||
response="alpha bravo charlie delta echo foxtrot golf noise words here",
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert any(r.memory_id == mem.id for r in results)
|
||||
|
||||
|
||||
def test_reinforce_rejects_below_70_percent(tmp_data_dir):
|
||||
"""Only 6 of 10 content tokens present (60%) → should NOT match."""
|
||||
init_db()
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="alpha bravo charlie delta echo foxtrot golf hotel india juliet",
|
||||
confidence=0.5,
|
||||
)
|
||||
# Echo 6 of 10 tokens (60%) plus noise
|
||||
interaction = _make_interaction(
|
||||
response="alpha bravo charlie delta echo foxtrot noise words here only",
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert all(r.memory_id != mem.id for r in results)
|
||||
|
||||
Reference in New Issue
Block a user