diff --git a/scripts/batch_llm_extract_live.py b/scripts/batch_llm_extract_live.py index 2397b69..311364c 100644 --- a/scripts/batch_llm_extract_live.py +++ b/scripts/batch_llm_extract_live.py @@ -100,6 +100,22 @@ def set_last_run(base_url, timestamp): pass +_known_projects: set[str] = set() + + +def _load_known_projects(base_url): + """Fetch registered project IDs from the API for R9 validation.""" + global _known_projects + try: + data = api_get(base_url, "/projects") + _known_projects = {p["id"] for p in data.get("projects", [])} + for p in data.get("projects", []): + for alias in p.get("aliases", []): + _known_projects.add(alias) + except Exception: + pass + + def extract_one(prompt, response, project, model, timeout_s): """Run claude -p on one interaction, return parsed candidates.""" if not shutil.which("claude"): @@ -178,6 +194,12 @@ def parse_candidates(raw, interaction_project): project = str(item.get("project") or "").strip() if not project and interaction_project: project = interaction_project + elif project and interaction_project and project != interaction_project: + # R9: model hallucinated an unrecognized project — fall back. + # The host-side script can't import the registry, so we + # check against a known set fetched from the API. + if project not in _known_projects: + project = interaction_project conf = item.get("confidence", 0.5) if mem_type not in MEMORY_TYPES or not content: continue @@ -202,8 +224,9 @@ def main(): parser.add_argument("--model", default=DEFAULT_MODEL) args = parser.parse_args() + _load_known_projects(args.base_url) since = args.since or get_last_run(args.base_url) - print(f"since={since or '(first run)'} limit={args.limit} model={args.model}") + print(f"since={since or '(first run)'} limit={args.limit} model={args.model} known_projects={len(_known_projects)}") params = [f"limit={args.limit}"] if since: diff --git a/src/atocore/memory/extractor_llm.py b/src/atocore/memory/extractor_llm.py index 1a1fd88..986222b 100644 --- a/src/atocore/memory/extractor_llm.py +++ b/src/atocore/memory/extractor_llm.py @@ -257,6 +257,27 @@ def _parse_candidates(raw_output: str, interaction: Interaction) -> list[MemoryC project = str(item.get("project") or "").strip() if not project and interaction.project: project = interaction.project + elif project and interaction.project and project != interaction.project: + # R9: model returned a different project than the interaction's + # known scope. Trust the model's project only if it resolves + # to a known registered project (the registry normalizes + # aliases and returns the canonical id). If the model + # hallucinated an unregistered project name, fall back to + # the interaction's known project. + try: + from atocore.projects.registry import ( + load_project_registry, + resolve_project_name, + ) + + registered_ids = {p.project_id for p in load_project_registry()} + resolved = resolve_project_name(project) + if resolved not in registered_ids: + project = interaction.project + else: + project = resolved + except Exception: + project = interaction.project confidence_raw = item.get("confidence", 0.5) if mem_type not in MEMORY_TYPES: continue diff --git a/src/atocore/memory/service.py b/src/atocore/memory/service.py index 34241e3..0e9a119 100644 --- a/src/atocore/memory/service.py +++ b/src/atocore/memory/service.py @@ -446,20 +446,27 @@ def _rank_memories_for_query( ) -> list["Memory"]: """Rerank a memory list by lexical overlap with a pre-tokenized query. - Ordering key: (overlap_count DESC, confidence DESC). When a query - shares no tokens with a memory, overlap is zero and confidence - acts as the sole tiebreaker — which matches the pre-query - behaviour and keeps no-query calls stable. + Primary key: overlap_density (overlap_count / memory_token_count), + which rewards short focused memories that match the query precisely + over long overview memories that incidentally share a few tokens. + Secondary: absolute overlap count. Tertiary: confidence. + + R7 fix: previously overlap_count alone was the primary key, so a + 40-token overview memory with 3 overlapping tokens tied a 5-token + memory with 3 overlapping tokens, and the overview won on + confidence. Now the short memory's density (0.6) beats the + overview's density (0.075). """ from atocore.memory.reinforcement import _normalize, _tokenize - scored: list[tuple[int, float, Memory]] = [] + scored: list[tuple[float, int, float, Memory]] = [] for mem in memories: mem_tokens = _tokenize(_normalize(mem.content)) overlap = len(mem_tokens & query_tokens) if mem_tokens else 0 - scored.append((overlap, mem.confidence, mem)) - scored.sort(key=lambda t: (t[0], t[1]), reverse=True) - return [mem for _, _, mem in scored] + density = overlap / len(mem_tokens) if mem_tokens else 0.0 + scored.append((density, overlap, mem.confidence, mem)) + scored.sort(key=lambda t: (t[0], t[1], t[2]), reverse=True) + return [mem for _, _, _, mem in scored] def _row_to_memory(row) -> Memory: diff --git a/tests/test_extractor_llm.py b/tests/test_extractor_llm.py index 1bbc79a..0f3a85a 100644 --- a/tests/test_extractor_llm.py +++ b/tests/test_extractor_llm.py @@ -107,8 +107,11 @@ def test_parser_falls_back_to_interaction_project(): assert result[0].project == "p06-polisher" -def test_parser_keeps_model_project_when_provided(): - """Model-supplied project takes precedence over interaction.""" +def test_parser_keeps_registered_model_project(tmp_data_dir, project_registry): + """R9: model-supplied project is kept when it's a registered project.""" + from atocore.models.database import init_db + init_db() + project_registry(("p04-gigabit", ["p04", "gigabit"]), ("p06-polisher", ["p06"])) raw = '[{"type": "project", "content": "x", "project": "p04-gigabit"}]' interaction = _make_interaction() interaction.project = "p06-polisher" @@ -116,6 +119,19 @@ def test_parser_keeps_model_project_when_provided(): assert result[0].project == "p04-gigabit" +def test_parser_rejects_hallucinated_project(tmp_data_dir, project_registry): + """R9: model-supplied project that is NOT registered falls back + to the interaction's known project.""" + from atocore.models.database import init_db + init_db() + project_registry(("p06-polisher", ["p06"])) + raw = '[{"type": "project", "content": "x", "project": "fake-project-99"}]' + interaction = _make_interaction() + interaction.project = "p06-polisher" + result = _parse_candidates(raw, interaction) + assert result[0].project == "p06-polisher" + + def test_missing_cli_returns_empty(monkeypatch): """If ``claude`` is not on PATH the extractor returns empty, never raises.""" monkeypatch.setattr(extractor_llm, "_cli_available", lambda: False)