diff --git a/scripts/batch_llm_extract_live.py b/scripts/batch_llm_extract_live.py index f0de24e..f177796 100644 --- a/scripts/batch_llm_extract_live.py +++ b/scripts/batch_llm_extract_live.py @@ -35,7 +35,7 @@ SYSTEM_PROMPT = """You extract durable memory candidates from LLM conversation t AtoCore stores two kinds of knowledge: -A. PROJECT-SPECIFIC: applied decisions, constraints, and architecture for a named project (p04-gigabit, p05-interferometer, p06-polisher, atomizer-v2, atocore). These stay scoped to one project. +A. PROJECT-SPECIFIC: applied decisions, constraints, and architecture for a named project. Known projects include p04-gigabit, p05-interferometer, p06-polisher, atomizer-v2, atocore, abb-space. If the conversation discusses a project NOT in this list, still tag it with the project name you identify — the system will auto-detect it as a new project or lead. B. DOMAIN KNOWLEDGE: generalizable engineering insight that was EARNED through project work and is reusable across projects. Tag these with a domain instead of a project. @@ -213,9 +213,11 @@ def parse_candidates(raw, interaction_project): model_project = str(item.get("project") or "").strip() domain = str(item.get("domain") or "").strip().lower() # R9 trust hierarchy: interaction scope always wins when set. + # For unscoped interactions, keep model's project tag even if + # unregistered — the system will detect new projects/leads. if interaction_project: project = interaction_project - elif model_project and model_project in _known_projects: + elif model_project: project = model_project else: project = "" diff --git a/src/atocore/memory/extractor_llm.py b/src/atocore/memory/extractor_llm.py index c8728c8..19417cf 100644 --- a/src/atocore/memory/extractor_llm.py +++ b/src/atocore/memory/extractor_llm.py @@ -74,7 +74,7 @@ _SYSTEM_PROMPT = """You extract durable memory candidates from LLM conversation AtoCore stores two kinds of knowledge: -A. PROJECT-SPECIFIC: applied decisions, constraints, and architecture for a named project (p04-gigabit, p05-interferometer, p06-polisher, atomizer-v2, atocore). These stay scoped to one project. +A. PROJECT-SPECIFIC: applied decisions, constraints, and architecture for a named project. Known projects include p04-gigabit, p05-interferometer, p06-polisher, atomizer-v2, atocore, abb-space. If the conversation discusses a project NOT in this list, still tag it with the project name you identify — the system will auto-detect it as a new project or lead. B. DOMAIN KNOWLEDGE: generalizable engineering insight that was EARNED through project work and is reusable across projects. Tag these with a domain instead of a project. @@ -291,9 +291,20 @@ def _parse_candidates(raw_output: str, interaction: Interaction) -> list[MemoryC registered_ids = {p.project_id for p in load_project_registry()} resolved = resolve_project_name(model_project) - project = resolved if resolved in registered_ids else "" + if resolved in registered_ids: + project = resolved + else: + # Unregistered project — keep the model's tag so + # auto-triage / the operator can see it and decide + # whether to register it as a new project or lead. + project = model_project + log.info( + "unregistered_project_detected", + model_project=model_project, + interaction_id=interaction.id, + ) except Exception: - project = "" + project = model_project if model_project else "" else: project = "" domain = str(item.get("domain") or "").strip().lower() diff --git a/tests/test_extractor_llm.py b/tests/test_extractor_llm.py index 8ef7a88..f5da568 100644 --- a/tests/test_extractor_llm.py +++ b/tests/test_extractor_llm.py @@ -130,17 +130,17 @@ def test_case_c_unregistered_model_scoped_interaction(tmp_data_dir, project_regi assert result[0].project == "p06-polisher" -def test_case_d_unregistered_model_unscoped_interaction(tmp_data_dir, project_registry): +def test_case_d_unregistered_model_unscoped_keeps_tag(tmp_data_dir, project_registry): """Case D: model returns unregistered project, interaction is unscoped. - Falls to empty (not the hallucinated name).""" + Keeps the model's tag for auto-project-detection (new behavior).""" from atocore.models.database import init_db init_db() project_registry(("p06-polisher", ["p06"])) - raw = '[{"type": "project", "content": "x", "project": "fake-project-99"}]' + raw = '[{"type": "project", "content": "x", "project": "new-lead-project"}]' interaction = _make_interaction() interaction.project = "" result = _parse_candidates(raw, interaction) - assert result[0].project == "" + assert result[0].project == "new-lead-project" def test_case_e_matching_model_and_interaction(tmp_data_dir, project_registry):