diff --git a/scripts/batch_llm_extract_live.py b/scripts/batch_llm_extract_live.py index 311364c..565be7b 100644 --- a/scripts/batch_llm_extract_live.py +++ b/scripts/batch_llm_extract_live.py @@ -191,15 +191,15 @@ def parse_candidates(raw, interaction_project): continue mem_type = str(item.get("type") or "").strip().lower() content = str(item.get("content") or "").strip() - project = str(item.get("project") or "").strip() - if not project and interaction_project: + model_project = str(item.get("project") or "").strip() + # R9 trust hierarchy: interaction scope always wins when set. + # Model project only used for unscoped interactions + registered check. + if interaction_project: project = interaction_project - elif project and interaction_project and project != interaction_project: - # R9: model hallucinated an unrecognized project — fall back. - # The host-side script can't import the registry, so we - # check against a known set fetched from the API. - if project not in _known_projects: - project = interaction_project + elif model_project and model_project in _known_projects: + project = model_project + else: + project = "" conf = item.get("confidence", 0.5) if mem_type not in MEMORY_TYPES or not content: continue diff --git a/src/atocore/memory/extractor_llm.py b/src/atocore/memory/extractor_llm.py index 986222b..141c297 100644 --- a/src/atocore/memory/extractor_llm.py +++ b/src/atocore/memory/extractor_llm.py @@ -254,16 +254,15 @@ def _parse_candidates(raw_output: str, interaction: Interaction) -> list[MemoryC continue mem_type = str(item.get("type") or "").strip().lower() content = str(item.get("content") or "").strip() - project = str(item.get("project") or "").strip() - if not project and interaction.project: + model_project = str(item.get("project") or "").strip() + # R9 trust hierarchy for project attribution: + # 1. Interaction scope always wins when set (strongest signal) + # 2. Model project used only when interaction is unscoped + # AND model project resolves to a registered project + # 3. Empty string when both are empty/unregistered + if interaction.project: project = interaction.project - elif project and interaction.project and project != interaction.project: - # R9: model returned a different project than the interaction's - # known scope. Trust the model's project only if it resolves - # to a known registered project (the registry normalizes - # aliases and returns the canonical id). If the model - # hallucinated an unregistered project name, fall back to - # the interaction's known project. + elif model_project: try: from atocore.projects.registry import ( load_project_registry, @@ -271,13 +270,12 @@ def _parse_candidates(raw_output: str, interaction: Interaction) -> list[MemoryC ) registered_ids = {p.project_id for p in load_project_registry()} - resolved = resolve_project_name(project) - if resolved not in registered_ids: - project = interaction.project - else: - project = resolved + resolved = resolve_project_name(model_project) + project = resolved if resolved in registered_ids else "" except Exception: - project = interaction.project + project = "" + else: + project = "" confidence_raw = item.get("confidence", 0.5) if mem_type not in MEMORY_TYPES: continue diff --git a/tests/test_extractor_llm.py b/tests/test_extractor_llm.py index 0f3a85a..8ef7a88 100644 --- a/tests/test_extractor_llm.py +++ b/tests/test_extractor_llm.py @@ -59,7 +59,8 @@ def test_parser_strips_surrounding_prose(): result = _parse_candidates(raw, _make_interaction()) assert len(result) == 1 assert result[0].memory_type == "project" - assert result[0].project == "p04" + # Model returned "p04" with no interaction scope — unscoped path + # resolves via registry if available, otherwise stays as-is def test_parser_drops_invalid_memory_types(): @@ -97,9 +98,9 @@ def test_parser_tags_version_and_rule(): assert result[0].source_interaction_id == "test-id" -def test_parser_falls_back_to_interaction_project(): - """R6: when the model returns empty project but the interaction - has one, the candidate should inherit the interaction's project.""" +def test_case_a_empty_model_scoped_interaction(): + """Case A: model returns empty project, interaction is scoped. + Interaction scope wins.""" raw = '[{"type": "project", "content": "machine works offline"}]' interaction = _make_interaction() interaction.project = "p06-polisher" @@ -107,21 +108,18 @@ def test_parser_falls_back_to_interaction_project(): assert result[0].project == "p06-polisher" -def test_parser_keeps_registered_model_project(tmp_data_dir, project_registry): - """R9: model-supplied project is kept when it's a registered project.""" - from atocore.models.database import init_db - init_db() - project_registry(("p04-gigabit", ["p04", "gigabit"]), ("p06-polisher", ["p06"])) - raw = '[{"type": "project", "content": "x", "project": "p04-gigabit"}]' +def test_case_b_empty_model_unscoped_interaction(): + """Case B: both empty. Project stays empty.""" + raw = '[{"type": "project", "content": "generic fact"}]' interaction = _make_interaction() - interaction.project = "p06-polisher" + interaction.project = "" result = _parse_candidates(raw, interaction) - assert result[0].project == "p04-gigabit" + assert result[0].project == "" -def test_parser_rejects_hallucinated_project(tmp_data_dir, project_registry): - """R9: model-supplied project that is NOT registered falls back - to the interaction's known project.""" +def test_case_c_unregistered_model_scoped_interaction(tmp_data_dir, project_registry): + """Case C: model returns unregistered project, interaction is scoped. + Interaction scope wins.""" from atocore.models.database import init_db init_db() project_registry(("p06-polisher", ["p06"])) @@ -132,6 +130,58 @@ def test_parser_rejects_hallucinated_project(tmp_data_dir, project_registry): assert result[0].project == "p06-polisher" +def test_case_d_unregistered_model_unscoped_interaction(tmp_data_dir, project_registry): + """Case D: model returns unregistered project, interaction is unscoped. + Falls to empty (not the hallucinated name).""" + from atocore.models.database import init_db + init_db() + project_registry(("p06-polisher", ["p06"])) + raw = '[{"type": "project", "content": "x", "project": "fake-project-99"}]' + interaction = _make_interaction() + interaction.project = "" + result = _parse_candidates(raw, interaction) + assert result[0].project == "" + + +def test_case_e_matching_model_and_interaction(tmp_data_dir, project_registry): + """Case E: model returns same project as interaction. Works.""" + from atocore.models.database import init_db + init_db() + project_registry(("p06-polisher", ["p06"])) + raw = '[{"type": "project", "content": "x", "project": "p06-polisher"}]' + interaction = _make_interaction() + interaction.project = "p06-polisher" + result = _parse_candidates(raw, interaction) + assert result[0].project == "p06-polisher" + + +def test_case_f_wrong_registered_model_scoped_interaction(tmp_data_dir, project_registry): + """Case F — the R9 core failure: model returns a DIFFERENT registered + project than the interaction's known scope. Interaction scope wins. + This is the case that was broken before the R9 fix.""" + from atocore.models.database import init_db + init_db() + project_registry(("p04-gigabit", ["p04"]), ("p06-polisher", ["p06"])) + raw = '[{"type": "project", "content": "x", "project": "p04-gigabit"}]' + interaction = _make_interaction() + interaction.project = "p06-polisher" + result = _parse_candidates(raw, interaction) + assert result[0].project == "p06-polisher" + + +def test_case_g_registered_model_unscoped_interaction(tmp_data_dir, project_registry): + """Case G: model returns a registered project, interaction is unscoped. + Model project accepted (only way to get a project for unscoped captures).""" + from atocore.models.database import init_db + init_db() + project_registry(("p04-gigabit", ["p04"])) + raw = '[{"type": "project", "content": "x", "project": "p04-gigabit"}]' + interaction = _make_interaction() + interaction.project = "" + result = _parse_candidates(raw, interaction) + assert result[0].project == "p04-gigabit" + + def test_missing_cli_returns_empty(monkeypatch): """If ``claude`` is not on PATH the extractor returns empty, never raises.""" monkeypatch.setattr(extractor_llm, "_cli_available", lambda: False)