Files
ATOCore/tests/test_extractor_llm.py
Anto01 39d73e91b4 fix(R6): fall back to interaction.project when LLM returns empty
Codex R6: the LLM extractor accepted the model's project field
verbatim. When the model returned empty string, clearly p06 memories
got promoted as project='', making them invisible to the p06
project-memory band and explaining the p06-offline-design harness
failure.

Fix: if model returns empty project but interaction.project is set,
inherit the interaction's project. Model-supplied project still takes
precedence when non-empty.

Two new tests lock the fallback and precedence behaviors.
R5 acknowledged (LLM extractor not yet wired into API — next task).

Test count: 278 -> 280. Harness re-run pending after deploy.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 07:37:14 -04:00

178 lines
6.4 KiB
Python

"""Tests for the LLM-assisted extractor path.
Focused on the parser and failure-mode contracts — the actual network
call is exercised out of band by running
``python scripts/extractor_eval.py --mode llm`` against the frozen
labeled corpus with ``ANTHROPIC_API_KEY`` set. These tests only
exercise the pieces that don't need network.
"""
from __future__ import annotations
import os
from unittest.mock import patch
import pytest
from atocore.interactions.service import Interaction
from atocore.memory.extractor_llm import (
LLM_EXTRACTOR_VERSION,
_parse_candidates,
extract_candidates_llm,
extract_candidates_llm_verbose,
)
import atocore.memory.extractor_llm as extractor_llm
def _make_interaction(prompt: str = "p", response: str = "r") -> Interaction:
return Interaction(
id="test-id",
prompt=prompt,
response=response,
response_summary="",
project="",
client="test",
session_id="",
)
def test_parser_handles_empty_array():
result = _parse_candidates("[]", _make_interaction())
assert result == []
def test_parser_handles_malformed_json():
result = _parse_candidates("{ not valid json", _make_interaction())
assert result == []
def test_parser_strips_markdown_fences():
raw = "```json\n[{\"type\": \"knowledge\", \"content\": \"x is y\", \"project\": \"\", \"confidence\": 0.5}]\n```"
result = _parse_candidates(raw, _make_interaction())
assert len(result) == 1
assert result[0].memory_type == "knowledge"
assert result[0].content == "x is y"
def test_parser_strips_surrounding_prose():
raw = "Here are the candidates:\n[{\"type\": \"project\", \"content\": \"foo\", \"project\": \"p04\", \"confidence\": 0.6}]\nThat's it."
result = _parse_candidates(raw, _make_interaction())
assert len(result) == 1
assert result[0].memory_type == "project"
assert result[0].project == "p04"
def test_parser_drops_invalid_memory_types():
raw = '[{"type": "nonsense", "content": "x"}, {"type": "project", "content": "y"}]'
result = _parse_candidates(raw, _make_interaction())
assert len(result) == 1
assert result[0].memory_type == "project"
def test_parser_drops_empty_content():
raw = '[{"type": "knowledge", "content": " "}, {"type": "knowledge", "content": "real"}]'
result = _parse_candidates(raw, _make_interaction())
assert len(result) == 1
assert result[0].content == "real"
def test_parser_clamps_confidence_to_unit_interval():
raw = '[{"type": "knowledge", "content": "c1", "confidence": 2.5}, {"type": "knowledge", "content": "c2", "confidence": -0.4}]'
result = _parse_candidates(raw, _make_interaction())
assert result[0].confidence == 1.0
assert result[1].confidence == 0.0
def test_parser_defaults_confidence_on_missing_field():
raw = '[{"type": "knowledge", "content": "c1"}]'
result = _parse_candidates(raw, _make_interaction())
assert result[0].confidence == 0.5
def test_parser_tags_version_and_rule():
raw = '[{"type": "project", "content": "c1"}]'
result = _parse_candidates(raw, _make_interaction())
assert result[0].rule == "llm_extraction"
assert result[0].extractor_version == LLM_EXTRACTOR_VERSION
assert result[0].source_interaction_id == "test-id"
def test_parser_falls_back_to_interaction_project():
"""R6: when the model returns empty project but the interaction
has one, the candidate should inherit the interaction's project."""
raw = '[{"type": "project", "content": "machine works offline"}]'
interaction = _make_interaction()
interaction.project = "p06-polisher"
result = _parse_candidates(raw, interaction)
assert result[0].project == "p06-polisher"
def test_parser_keeps_model_project_when_provided():
"""Model-supplied project takes precedence over interaction."""
raw = '[{"type": "project", "content": "x", "project": "p04-gigabit"}]'
interaction = _make_interaction()
interaction.project = "p06-polisher"
result = _parse_candidates(raw, interaction)
assert result[0].project == "p04-gigabit"
def test_missing_cli_returns_empty(monkeypatch):
"""If ``claude`` is not on PATH the extractor returns empty, never raises."""
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: False)
result = extract_candidates_llm_verbose(_make_interaction("p", "some real response"))
assert result.candidates == []
assert result.error == "claude_cli_missing"
def test_empty_response_returns_empty(monkeypatch):
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
result = extract_candidates_llm_verbose(_make_interaction("p", ""))
assert result.candidates == []
assert result.error == "empty_response"
def test_subprocess_timeout_returns_empty(monkeypatch):
"""A subprocess timeout must not raise into the caller."""
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
import subprocess as _sp
def _boom(*a, **kw):
raise _sp.TimeoutExpired(cmd=a[0] if a else "claude", timeout=1)
monkeypatch.setattr(extractor_llm.subprocess, "run", _boom)
result = extract_candidates_llm_verbose(_make_interaction("p", "real response"))
assert result.candidates == []
assert result.error == "timeout"
def test_subprocess_nonzero_exit_returns_empty(monkeypatch):
"""A non-zero CLI exit (auth failure, etc.) must not raise."""
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
class _Completed:
returncode = 1
stdout = ""
stderr = "auth failed"
monkeypatch.setattr(extractor_llm.subprocess, "run", lambda *a, **kw: _Completed())
result = extract_candidates_llm_verbose(_make_interaction("p", "real response"))
assert result.candidates == []
assert result.error == "exit_1"
def test_happy_path_parses_stdout(monkeypatch):
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: True)
class _Completed:
returncode = 0
stdout = '[{"type": "project", "content": "p04 selected Option B", "project": "p04-gigabit", "confidence": 0.6}]'
stderr = ""
monkeypatch.setattr(extractor_llm.subprocess, "run", lambda *a, **kw: _Completed())
result = extract_candidates_llm_verbose(_make_interaction("p", "r"))
assert len(result.candidates) == 1
assert result.candidates[0].memory_type == "project"
assert result.candidates[0].project == "p04-gigabit"
assert abs(result.candidates[0].confidence - 0.6) < 1e-9