fix: token-overlap matcher for reinforcement (Phase 9B)
Replace the substring-based _memory_matches() with a token-overlap
matcher that tokenizes both memory content and response, applies
lightweight stemming (trailing s/ed/ing) and stop-word removal, then
checks whether >= 70% of the memory's tokens appear in the response.
This fixes the paraphrase blindness that prevented reinforcement from
ever firing on natural responses ("prefers" vs "prefer", "because
history" vs "because the history").
7 new tests (26 total reinforcement tests, all passing).
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,10 +8,11 @@ given memory, without ever promoting anything new into trusted state.
|
||||
|
||||
Design notes
|
||||
------------
|
||||
- Matching is intentionally simple and explainable:
|
||||
* normalize both sides (lowercase, collapse whitespace)
|
||||
* require the normalized memory content (or its first 80 chars) to
|
||||
appear as a substring in the normalized response
|
||||
- Matching uses token-overlap: tokenize both sides (lowercase, stem,
|
||||
drop stop words), then check whether >= 70 % of the memory's content
|
||||
tokens appear in the response token set. This handles natural
|
||||
paraphrases (e.g. "prefers" vs "prefer", "because history" vs
|
||||
"because the history") that substring matching missed.
|
||||
- Candidates and invalidated memories are NEVER considered — reinforcement
|
||||
must not revive history.
|
||||
- Reinforcement is capped at 1.0 and monotonically non-decreasing.
|
||||
@@ -43,9 +44,12 @@ log = get_logger("reinforcement")
|
||||
# memories like "prefers Python".
|
||||
_MIN_MEMORY_CONTENT_LENGTH = 12
|
||||
|
||||
# When a memory's content is very long, match on its leading window only
|
||||
# to avoid punishing small paraphrases further into the body.
|
||||
_MATCH_WINDOW_CHARS = 80
|
||||
# Token-overlap matching constants.
|
||||
_STOP_WORDS: frozenset[str] = frozenset({
|
||||
"the", "a", "an", "and", "or", "of", "to", "is", "was",
|
||||
"that", "this", "with", "for", "from", "into",
|
||||
})
|
||||
_MATCH_THRESHOLD = 0.70
|
||||
|
||||
DEFAULT_CONFIDENCE_DELTA = 0.02
|
||||
|
||||
@@ -144,12 +148,58 @@ def _normalize(text: str) -> str:
|
||||
return collapsed.strip()
|
||||
|
||||
|
||||
def _stem(word: str) -> str:
|
||||
"""Aggressive suffix-folding so inflected forms collapse.
|
||||
|
||||
Handles trailing ``ing``, ``ed``, and ``s`` — good enough for
|
||||
reinforcement matching without pulling in nltk/snowball.
|
||||
"""
|
||||
# Order matters: try longest suffix first.
|
||||
if word.endswith("ing") and len(word) >= 6:
|
||||
return word[:-3]
|
||||
if word.endswith("ed") and len(word) > 4:
|
||||
stem = word[:-2]
|
||||
# "preferred" → "preferr" → "prefer" (doubled consonant before -ed)
|
||||
if len(stem) >= 3 and stem[-1] == stem[-2]:
|
||||
stem = stem[:-1]
|
||||
return stem
|
||||
if word.endswith("s") and len(word) > 3:
|
||||
return word[:-1]
|
||||
return word
|
||||
|
||||
|
||||
def _tokenize(text: str) -> set[str]:
|
||||
"""Split normalized text into a stemmed token set.
|
||||
|
||||
Strips punctuation, drops words shorter than 3 chars and stop words.
|
||||
"""
|
||||
tokens: set[str] = set()
|
||||
for raw in text.split():
|
||||
# Strip leading/trailing punctuation (commas, periods, quotes, etc.)
|
||||
word = raw.strip(".,;:!?\"'()[]{}-/")
|
||||
if len(word) < 3:
|
||||
continue
|
||||
if word in _STOP_WORDS:
|
||||
continue
|
||||
tokens.add(_stem(word))
|
||||
return tokens
|
||||
|
||||
|
||||
def _memory_matches(memory_content: str, normalized_response: str) -> bool:
|
||||
"""Return True if the memory content appears in the response."""
|
||||
"""Return True if enough of the memory's tokens appear in the response.
|
||||
|
||||
Uses token-overlap: tokenize both sides (lowercase, stem, drop stop
|
||||
words), then check whether >= 70 % of the memory's content tokens
|
||||
appear in the response token set.
|
||||
"""
|
||||
if not memory_content:
|
||||
return False
|
||||
normalized_memory = _normalize(memory_content)
|
||||
if len(normalized_memory) < _MIN_MEMORY_CONTENT_LENGTH:
|
||||
return False
|
||||
window = normalized_memory[:_MATCH_WINDOW_CHARS]
|
||||
return window in normalized_response
|
||||
memory_tokens = _tokenize(normalized_memory)
|
||||
if not memory_tokens:
|
||||
return False
|
||||
response_tokens = _tokenize(normalized_response)
|
||||
overlap = memory_tokens & response_tokens
|
||||
return len(overlap) / len(memory_tokens) >= _MATCH_THRESHOLD
|
||||
|
||||
@@ -6,6 +6,8 @@ from atocore.interactions.service import record_interaction
|
||||
from atocore.main import app
|
||||
from atocore.memory.reinforcement import (
|
||||
DEFAULT_CONFIDENCE_DELTA,
|
||||
_stem,
|
||||
_tokenize,
|
||||
reinforce_from_interaction,
|
||||
)
|
||||
from atocore.memory.service import (
|
||||
@@ -373,3 +375,118 @@ def test_get_memories_filter_by_alias(project_registry):
|
||||
assert len(via_alias) == 2
|
||||
assert len(via_canonical) == 2
|
||||
assert {m.content for m in via_alias} == {"m1", "m2"}
|
||||
|
||||
|
||||
# --- token-overlap matcher: unit tests -------------------------------------
|
||||
|
||||
|
||||
def test_stem_folds_s_ed_ing():
|
||||
assert _stem("prefers") == "prefer"
|
||||
assert _stem("preferred") == "prefer"
|
||||
assert _stem("services") == "service"
|
||||
assert _stem("processing") == "process"
|
||||
# Short words must not be over-stripped
|
||||
assert _stem("red") == "red" # 3 chars, don't strip "ed"
|
||||
assert _stem("bus") == "bus" # 3 chars, don't strip "s"
|
||||
assert _stem("sing") == "sing" # 4 chars, don't strip "ing"
|
||||
assert _stem("being") == "being" # 5 chars, "ing" strip leaves "be" (2) — too short
|
||||
|
||||
|
||||
def test_tokenize_removes_stop_words():
|
||||
tokens = _tokenize("the quick brown fox jumps over the lazy dog")
|
||||
assert "the" not in tokens
|
||||
assert "quick" in tokens
|
||||
assert "brown" in tokens
|
||||
assert "fox" in tokens
|
||||
assert "dog" in tokens
|
||||
# "over" has len 4, not a stop word → kept (stemmed: "over")
|
||||
assert "over" in tokens
|
||||
|
||||
|
||||
# --- token-overlap matcher: paraphrase matching ----------------------------
|
||||
|
||||
|
||||
def test_reinforce_matches_paraphrase_prefers_vs_prefer(tmp_data_dir):
|
||||
"""The canonical rebase case from phase9-first-real-use.md."""
|
||||
init_db()
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="prefers rebase-based workflows because history stays linear",
|
||||
confidence=0.5,
|
||||
)
|
||||
interaction = _make_interaction(
|
||||
response=(
|
||||
"I prefer rebase-based workflows because the history stays "
|
||||
"linear and reviewers have an easier time."
|
||||
),
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert any(r.memory_id == mem.id for r in results)
|
||||
|
||||
|
||||
def test_reinforce_matches_paraphrase_with_articles_and_ed(tmp_data_dir):
|
||||
init_db()
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="preferred structured logging across all backend services",
|
||||
confidence=0.5,
|
||||
)
|
||||
interaction = _make_interaction(
|
||||
response=(
|
||||
"I set up structured logging across all the backend services, "
|
||||
"which the team prefers for consistency."
|
||||
),
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert any(r.memory_id == mem.id for r in results)
|
||||
|
||||
|
||||
def test_reinforce_rejects_low_overlap(tmp_data_dir):
|
||||
init_db()
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="always uses Python for data processing scripts",
|
||||
confidence=0.5,
|
||||
)
|
||||
interaction = _make_interaction(
|
||||
response=(
|
||||
"The CI pipeline runs on Node.js and deploys to Kubernetes "
|
||||
"using Helm charts."
|
||||
),
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert all(r.memory_id != mem.id for r in results)
|
||||
|
||||
|
||||
def test_reinforce_matches_at_70_percent_threshold(tmp_data_dir):
|
||||
"""Exactly 7 of 10 content tokens present → should match."""
|
||||
init_db()
|
||||
# After stop-word removal and stemming, this has 10 tokens:
|
||||
# alpha, bravo, charlie, delta, echo, foxtrot, golf, hotel, india, juliet
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="alpha bravo charlie delta echo foxtrot golf hotel india juliet",
|
||||
confidence=0.5,
|
||||
)
|
||||
# Echo 7 of 10 tokens (70%) plus some noise
|
||||
interaction = _make_interaction(
|
||||
response="alpha bravo charlie delta echo foxtrot golf noise words here",
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert any(r.memory_id == mem.id for r in results)
|
||||
|
||||
|
||||
def test_reinforce_rejects_below_70_percent(tmp_data_dir):
|
||||
"""Only 6 of 10 content tokens present (60%) → should NOT match."""
|
||||
init_db()
|
||||
mem = create_memory(
|
||||
memory_type="preference",
|
||||
content="alpha bravo charlie delta echo foxtrot golf hotel india juliet",
|
||||
confidence=0.5,
|
||||
)
|
||||
# Echo 6 of 10 tokens (60%) plus noise
|
||||
interaction = _make_interaction(
|
||||
response="alpha bravo charlie delta echo foxtrot noise words here only",
|
||||
)
|
||||
results = reinforce_from_interaction(interaction)
|
||||
assert all(r.memory_id != mem.id for r in results)
|
||||
|
||||
Reference in New Issue
Block a user