feat(projects): live emerging-project registration proposals
Adds GET /admin/projects/proposals?min_active=N — an on-demand companion
to the nightly scripts/detect_emerging.py cache. Reads SQL + the registry
directly so the result is always current.
Each proposal is operator-ready:
- project_id (the literal label as captured)
- active_count / candidate_count from current SQL
- sample_memories: 3 most recent active memories with content preview
- suggested_aliases: sibling labels sharing a >=4-char token
(e.g. lead-space + lead-space-exploration-ltd + space-exploration-ltd
cluster naturally; apm and drill stay independent)
- guessed_ingest_root: vault:incoming/projects/<id>/
Workflow: operator hits /admin/projects/proposals to see the live "what
should I register?" view, picks aliases from the suggestions, then POSTs
to the existing /admin/projects/register-emerging.
Closes Codex's Wave 1.5 ask: "promote-to-registered-project proposal
with suggested aliases, sample memories, and guessed ingest root;
require one click." For apm at 165 active memories on prod, this is
overdue.
8 regression tests covering: registered-name (canonical + alias)
exclusion, threshold filtering, sibling clustering, short-token negative,
sample/root shape, candidate counting, param validation, sort order.
Test count: 586 -> 594.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
172
tests/test_emerging_project_proposals.py
Normal file
172
tests/test_emerging_project_proposals.py
Normal file
@@ -0,0 +1,172 @@
|
||||
"""Wave 1.5 — live emerging-project registration proposals.
|
||||
|
||||
The nightly `scripts/detect_emerging.py` writes a stale cache to
|
||||
`project_state.proposals.unregistered_projects`. This endpoint provides
|
||||
the on-demand alternative that operators can hit before deciding which
|
||||
unregistered project to register via `/admin/projects/register-emerging`.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import atocore.config as config
|
||||
from atocore.main import app
|
||||
from atocore.memory.service import create_memory
|
||||
from atocore.models.database import init_db
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def env(tmp_data_dir, tmp_path, monkeypatch):
|
||||
"""Fresh DB + a registry holding a single registered project so we
|
||||
can prove the proposals endpoint excludes registered names."""
|
||||
registry_path = tmp_path / "registry.json"
|
||||
registry_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"projects": [
|
||||
{
|
||||
"id": "p04-gigabit",
|
||||
"aliases": ["p04", "gigabit"],
|
||||
"description": "test",
|
||||
"ingest_roots": [
|
||||
{"source": "vault", "subpath": "incoming/projects/p04-gigabit"}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
|
||||
config.settings = config.Settings()
|
||||
init_db()
|
||||
yield tmp_data_dir
|
||||
|
||||
|
||||
def test_proposals_excludes_registered_project_and_its_aliases(env):
|
||||
"""Memories tagged on a registered canonical id or any of its
|
||||
aliases must not appear as a registration proposal."""
|
||||
# Registered: p04-gigabit (aliases p04, gigabit)
|
||||
for i in range(15):
|
||||
create_memory("knowledge", f"p04 fact {i}", project="p04-gigabit")
|
||||
for i in range(15):
|
||||
create_memory("knowledge", f"alias fact {i}", project="p04") # alias
|
||||
|
||||
# Unregistered, above threshold
|
||||
for i in range(12):
|
||||
create_memory("knowledge", f"apm fact {i}", project="apm")
|
||||
|
||||
client = TestClient(app)
|
||||
body = client.get("/admin/projects/proposals?min_active=10").json()
|
||||
ids = [p["project_id"] for p in body["proposals"]]
|
||||
assert "apm" in ids
|
||||
assert "p04-gigabit" not in ids
|
||||
assert "p04" not in ids
|
||||
assert "gigabit" not in ids
|
||||
|
||||
|
||||
def test_proposals_threshold_filters_low_count_labels(env):
|
||||
create_memory("knowledge", "single one-off", project="discrawl")
|
||||
for i in range(3):
|
||||
create_memory("knowledge", f"low-volume {i}", project="drill")
|
||||
for i in range(11):
|
||||
create_memory("knowledge", f"high-volume {i}", project="apm")
|
||||
|
||||
client = TestClient(app)
|
||||
proposals = client.get("/admin/projects/proposals?min_active=10").json()["proposals"]
|
||||
ids = [p["project_id"] for p in proposals]
|
||||
assert "apm" in ids
|
||||
assert "drill" not in ids
|
||||
assert "discrawl" not in ids
|
||||
|
||||
|
||||
def test_proposals_suggest_sibling_aliases_via_shared_tokens(env):
|
||||
"""Lead-space + lead-space-exploration-ltd + space-exploration-ltd
|
||||
should cluster: each proposes the others as suggested_aliases via
|
||||
shared non-trivial tokens (length >= 4)."""
|
||||
for label in ("lead-space", "lead-space-exploration-ltd", "space-exploration-ltd"):
|
||||
for i in range(11):
|
||||
create_memory("knowledge", f"{label} content {i}", project=label)
|
||||
|
||||
client = TestClient(app)
|
||||
proposals = {p["project_id"]: p for p in client.get("/admin/projects/proposals").json()["proposals"]}
|
||||
|
||||
# All three appear and each suggests at least one of the others
|
||||
for label in ("lead-space", "lead-space-exploration-ltd", "space-exploration-ltd"):
|
||||
assert label in proposals
|
||||
siblings = set(proposals[label]["suggested_aliases"])
|
||||
# Every label shares "space" (and others share "exploration"/"lead")
|
||||
# so at least one sibling must be present.
|
||||
assert siblings & {"lead-space", "lead-space-exploration-ltd", "space-exploration-ltd"} - {label}
|
||||
|
||||
|
||||
def test_proposals_short_token_does_not_match(env):
|
||||
"""Two-or-three-letter tokens are too noisy to suggest aliases on.
|
||||
'apm' (3 chars) and 'apm-fpga' (3 + 4) share 'apm' (3 chars) — the
|
||||
'apm' token alone is too short, but 'fpga' (4) is long enough so
|
||||
the match comes from the longer token. We test the negative: 'apm'
|
||||
and 'drill' must NOT be siblings."""
|
||||
for i in range(11):
|
||||
create_memory("knowledge", f"apm fact {i}", project="apm")
|
||||
for i in range(11):
|
||||
create_memory("knowledge", f"drill fact {i}", project="drill")
|
||||
|
||||
client = TestClient(app)
|
||||
proposals = {p["project_id"]: p for p in client.get("/admin/projects/proposals").json()["proposals"]}
|
||||
assert proposals["apm"]["suggested_aliases"] == []
|
||||
assert proposals["drill"]["suggested_aliases"] == []
|
||||
|
||||
|
||||
def test_proposals_include_sample_memories_and_guessed_root(env):
|
||||
for i in range(11):
|
||||
create_memory("knowledge", f"sample content {i}", project="apm")
|
||||
|
||||
client = TestClient(app)
|
||||
body = client.get("/admin/projects/proposals").json()
|
||||
apm = next(p for p in body["proposals"] if p["project_id"] == "apm")
|
||||
assert apm["active_count"] == 11
|
||||
assert apm["candidate_count"] == 0
|
||||
assert apm["guessed_ingest_root"] == {
|
||||
"source": "vault",
|
||||
"subpath": "incoming/projects/apm/",
|
||||
}
|
||||
assert len(apm["sample_memories"]) == 3
|
||||
for s in apm["sample_memories"]:
|
||||
assert s["id"]
|
||||
assert "sample content" in s["content_preview"]
|
||||
|
||||
|
||||
def test_proposals_count_candidates_separately(env):
|
||||
for i in range(11):
|
||||
create_memory("knowledge", f"active {i}", project="apm")
|
||||
for i in range(4):
|
||||
create_memory("knowledge", f"candidate {i}", project="apm", status="candidate")
|
||||
|
||||
client = TestClient(app)
|
||||
apm = next(
|
||||
p for p in client.get("/admin/projects/proposals").json()["proposals"]
|
||||
if p["project_id"] == "apm"
|
||||
)
|
||||
assert apm["active_count"] == 11
|
||||
assert apm["candidate_count"] == 4
|
||||
|
||||
|
||||
def test_proposals_min_active_param_validation(env):
|
||||
client = TestClient(app)
|
||||
r = client.get("/admin/projects/proposals?min_active=0")
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
def test_proposals_sorted_by_active_count_desc(env):
|
||||
for i in range(20):
|
||||
create_memory("knowledge", f"big {i}", project="apm")
|
||||
for i in range(11):
|
||||
create_memory("knowledge", f"small {i}", project="openclaw")
|
||||
|
||||
client = TestClient(app)
|
||||
proposals = client.get("/admin/projects/proposals").json()["proposals"]
|
||||
ids = [p["project_id"] for p in proposals]
|
||||
assert ids[0] == "apm"
|
||||
assert ids[1] == "openclaw"
|
||||
Reference in New Issue
Block a user