fix(projects): close Codex Wave 1.5 P2/P3 — stronger negatives + lowercase tokens

Codex's audit of e8ac8bb returned GO with two cheap improvements worth
folding in:

P2: the "short token does not match" test was trivially true because
apm and drill share no tokens at all. Replaced with a 4-label setup
that exercises both directions: apm + apm-fpga must NOT cluster (only
shared token is the 3-char 'apm'); foo-fpga + bar-fpga + apm-fpga MUST
cluster via the 4-char 'fpga'. Now a regression that lets <4-char
tokens through would fail.

P3: token comparison was case-sensitive. Lowercased before the length
check so 'HydroTech-Mining' clusters with 'hydrotech-split-tank' the
same way the all-lowercase variants do. Added a regression test.

Also added the registered-token-leak test Codex specifically called
out: p04-gigabit registered, gigabit-other unregistered — gigabit-other
must NOT surface p04-gigabit as a suggested alias (filter happens
before clustering).

Test count: 594 -> 596.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-28 22:16:00 -04:00
parent e8ac8bb536
commit f70fa6bb9a
2 changed files with 57 additions and 12 deletions

View File

@@ -495,7 +495,7 @@ def propose_emerging_projects(min_active: int = 10) -> list[dict]:
# they share a non-trivial token (length >= 4 after splitting on # they share a non-trivial token (length >= 4 after splitting on
# '-' and '_'). Cheap, defensible, and the operator gets to veto. # '-' and '_'). Cheap, defensible, and the operator gets to veto.
def _tokens(label: str) -> set[str]: def _tokens(label: str) -> set[str]:
parts = label.replace("_", "-").split("-") parts = label.lower().replace("_", "-").split("-")
return {p for p in parts if len(p) >= 4} return {p for p in parts if len(p) >= 4}
label_tokens = {label: _tokens(label) for label, _a, _c in unregistered} label_tokens = {label: _tokens(label) for label, _a, _c in unregistered}

View File

@@ -103,20 +103,65 @@ def test_proposals_suggest_sibling_aliases_via_shared_tokens(env):
def test_proposals_short_token_does_not_match(env): def test_proposals_short_token_does_not_match(env):
"""Two-or-three-letter tokens are too noisy to suggest aliases on. """Per Codex Wave 1.5 P2: previously this test only asserted apm
'apm' (3 chars) and 'apm-fpga' (3 + 4) share 'apm' (3 chars) — the and drill have empty siblings, which is trivially true because they
'apm' token alone is too short, but 'fpga' (4) is long enough so share no tokens at all. The real risk is an accidental relaxation
the match comes from the longer token. We test the negative: 'apm' that lets <4-char tokens trigger clustering. Construct a setup where
and 'drill' must NOT be siblings.""" that would matter:
- 'apm' and 'apm-fpga': only the 3-char 'apm' is shared. They must
NOT cluster, because 'apm' is too short.
- 'foo-fpga' and 'bar-fpga': the 4-char 'fpga' is shared. They
MUST cluster.
"""
for label in ("apm", "apm-fpga", "foo-fpga", "bar-fpga"):
for i in range(11): for i in range(11):
create_memory("knowledge", f"apm fact {i}", project="apm") create_memory("knowledge", f"{label} fact {i}", project=label)
for i in range(11):
create_memory("knowledge", f"drill fact {i}", project="drill")
client = TestClient(app) client = TestClient(app)
proposals = {p["project_id"]: p for p in client.get("/admin/projects/proposals").json()["proposals"]} proposals = {p["project_id"]: p for p in client.get("/admin/projects/proposals").json()["proposals"]}
assert proposals["apm"]["suggested_aliases"] == []
assert proposals["drill"]["suggested_aliases"] == [] # Negative: short-token match must not happen
assert "apm-fpga" not in proposals["apm"]["suggested_aliases"], (
"'apm' (3 chars) is below the 4-char minimum; 'apm' and 'apm-fpga' "
"must not cluster via the 'apm' token."
)
# Positive: long-token match must happen — both directions
assert "bar-fpga" in proposals["foo-fpga"]["suggested_aliases"]
assert "foo-fpga" in proposals["bar-fpga"]["suggested_aliases"]
# And 'apm-fpga' clusters with the others via 'fpga'
assert "apm-fpga" in proposals["foo-fpga"]["suggested_aliases"]
def test_proposals_clustering_is_case_insensitive(env):
"""Token comparison must be case-insensitive so labels captured
with mixed casing still cluster. Codex Wave 1.5 P3."""
for label in ("HydroTech-Mining", "hydrotech-split-tank"):
for i in range(11):
create_memory("knowledge", f"{label} fact {i}", project=label)
client = TestClient(app)
proposals = {p["project_id"]: p for p in client.get("/admin/projects/proposals").json()["proposals"]}
assert "hydrotech-split-tank" in proposals["HydroTech-Mining"]["suggested_aliases"]
assert "HydroTech-Mining" in proposals["hydrotech-split-tank"]["suggested_aliases"]
def test_proposals_registered_token_does_not_leak_into_sibling_set(env, monkeypatch):
"""Registered project ids must be filtered BEFORE clustering so a
registered token doesn't get suggested as an alias for an
unregistered sibling. p04-gigabit is registered in env; an
unregistered 'gigabit-other' must not list 'p04-gigabit' as alias."""
for i in range(15):
create_memory("knowledge", f"p04 fact {i}", project="p04-gigabit")
for i in range(11):
create_memory("knowledge", f"gigabit-other fact {i}", project="gigabit-other")
client = TestClient(app)
proposals = {p["project_id"]: p for p in client.get("/admin/projects/proposals").json()["proposals"]}
assert "p04-gigabit" not in proposals
assert "gigabit-other" in proposals
# And the registered name must not surface as a sibling
assert "p04-gigabit" not in proposals["gigabit-other"]["suggested_aliases"]
def test_proposals_include_sample_memories_and_guessed_root(env): def test_proposals_include_sample_memories_and_guessed_root(env):