From 39d73e91b4961e1e5ea4ee621616f1beb91a30af Mon Sep 17 00:00:00 2001 From: Anto01 Date: Sun, 12 Apr 2026 07:37:14 -0400 Subject: [PATCH] fix(R6): fall back to interaction.project when LLM returns empty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex R6: the LLM extractor accepted the model's project field verbatim. When the model returned empty string, clearly p06 memories got promoted as project='', making them invisible to the p06 project-memory band and explaining the p06-offline-design harness failure. Fix: if model returns empty project but interaction.project is set, inherit the interaction's project. Model-supplied project still takes precedence when non-empty. Two new tests lock the fallback and precedence behaviors. R5 acknowledged (LLM extractor not yet wired into API — next task). Test count: 278 -> 280. Harness re-run pending after deploy. Co-Authored-By: Claude Opus 4.6 (1M context) --- DEV-LEDGER.md | 4 ++-- src/atocore/memory/extractor_llm.py | 2 ++ tests/test_extractor_llm.py | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/DEV-LEDGER.md b/DEV-LEDGER.md index b754ae3..792765b 100644 --- a/DEV-LEDGER.md +++ b/DEV-LEDGER.md @@ -124,8 +124,8 @@ One branch `codex/extractor-eval-loop` for Day 1-5, a second `codex/retrieval-ha | R2 | Codex | P1 | src/atocore/context/builder.py | Project memories excluded from pack | fixed | Claude | 2026-04-11 | 8ea53f4 | | R3 | Claude | P2 | src/atocore/memory/extractor.py | Rule cues (`## Decision:`) never fire on conversational LLM text | open | Claude | 2026-04-11 | | | R4 | Codex | P2 | DEV-LEDGER.md:11 | Orientation `main_tip` was stale versus `HEAD` / `origin/main` | fixed | Codex | 2026-04-11 | 81307ce | -| R5 | Codex | P1 | src/atocore/interactions/service.py:157-174 | The deployed extraction path still calls only the rule extractor; the new LLM extractor is eval/script-only, so Day 4 "gate cleared" is true as a benchmark result but not as an operational extraction path | open | Claude | 2026-04-12 | | -| R6 | Codex | P1 | src/atocore/memory/extractor_llm.py:258-276 | LLM extraction accepts model-supplied `project` verbatim with no fallback to `interaction.project`; live triage promoted a clearly p06 memory (offline/network rule) as project=`""`, which explains the p06-offline-design harness miss and falsifies the current "all 3 failures are budget-contention" claim | open | Claude | 2026-04-12 | | +| R5 | Codex | P1 | src/atocore/interactions/service.py:157-174 | The deployed extraction path still calls only the rule extractor; the new LLM extractor is eval/script-only, so Day 4 "gate cleared" is true as a benchmark result but not as an operational extraction path | acknowledged | Claude | 2026-04-12 | | +| R6 | Codex | P1 | src/atocore/memory/extractor_llm.py:258-276 | LLM extraction accepts model-supplied `project` verbatim with no fallback to `interaction.project`; live triage promoted a clearly p06 memory (offline/network rule) as project=`""`, which explains the p06-offline-design harness miss and falsifies the current "all 3 failures are budget-contention" claim | fixed | Claude | 2026-04-12 | this commit | | R7 | Codex | P2 | src/atocore/memory/service.py:448-459 | Query ranking is overlap-count only, so broad overview memories can tie exact low-confidence memories and win on confidence; p06-firmware-interface is not just budget pressure, it also exposes a weak lexical scorer | open | Claude | 2026-04-12 | | | R8 | Codex | P2 | tests/test_extractor_llm.py:1-7 | LLM extractor tests stop at parser/failure contracts; there is no automated coverage for the script-only persistence/review path that produced the 16 promoted memories, including project-scope preservation | open | Claude | 2026-04-12 | | diff --git a/src/atocore/memory/extractor_llm.py b/src/atocore/memory/extractor_llm.py index 58a7312..b7b57bb 100644 --- a/src/atocore/memory/extractor_llm.py +++ b/src/atocore/memory/extractor_llm.py @@ -256,6 +256,8 @@ def _parse_candidates(raw_output: str, interaction: Interaction) -> list[MemoryC mem_type = str(item.get("type") or "").strip().lower() content = str(item.get("content") or "").strip() project = str(item.get("project") or "").strip() + if not project and interaction.project: + project = interaction.project confidence_raw = item.get("confidence", 0.5) if mem_type not in MEMORY_TYPES: continue diff --git a/tests/test_extractor_llm.py b/tests/test_extractor_llm.py index 3883787..1bbc79a 100644 --- a/tests/test_extractor_llm.py +++ b/tests/test_extractor_llm.py @@ -97,6 +97,25 @@ def test_parser_tags_version_and_rule(): assert result[0].source_interaction_id == "test-id" +def test_parser_falls_back_to_interaction_project(): + """R6: when the model returns empty project but the interaction + has one, the candidate should inherit the interaction's project.""" + raw = '[{"type": "project", "content": "machine works offline"}]' + interaction = _make_interaction() + interaction.project = "p06-polisher" + result = _parse_candidates(raw, interaction) + assert result[0].project == "p06-polisher" + + +def test_parser_keeps_model_project_when_provided(): + """Model-supplied project takes precedence over interaction.""" + raw = '[{"type": "project", "content": "x", "project": "p04-gigabit"}]' + interaction = _make_interaction() + interaction.project = "p06-polisher" + result = _parse_candidates(raw, interaction) + assert result[0].project == "p04-gigabit" + + def test_missing_cli_returns_empty(monkeypatch): """If ``claude`` is not on PATH the extractor returns empty, never raises.""" monkeypatch.setattr(extractor_llm, "_cli_available", lambda: False)