Compare commits
3 Commits
codex/audi
...
39d73e91b4
| Author | SHA1 | Date | |
|---|---|---|---|
| 39d73e91b4 | |||
| 7ddf0e38ee | |||
| b0fde3ee60 |
@@ -124,8 +124,8 @@ One branch `codex/extractor-eval-loop` for Day 1-5, a second `codex/retrieval-ha
|
|||||||
| R2 | Codex | P1 | src/atocore/context/builder.py | Project memories excluded from pack | fixed | Claude | 2026-04-11 | 8ea53f4 |
|
| R2 | Codex | P1 | src/atocore/context/builder.py | Project memories excluded from pack | fixed | Claude | 2026-04-11 | 8ea53f4 |
|
||||||
| R3 | Claude | P2 | src/atocore/memory/extractor.py | Rule cues (`## Decision:`) never fire on conversational LLM text | open | Claude | 2026-04-11 | |
|
| R3 | Claude | P2 | src/atocore/memory/extractor.py | Rule cues (`## Decision:`) never fire on conversational LLM text | open | Claude | 2026-04-11 | |
|
||||||
| R4 | Codex | P2 | DEV-LEDGER.md:11 | Orientation `main_tip` was stale versus `HEAD` / `origin/main` | fixed | Codex | 2026-04-11 | 81307ce |
|
| R4 | Codex | P2 | DEV-LEDGER.md:11 | Orientation `main_tip` was stale versus `HEAD` / `origin/main` | fixed | Codex | 2026-04-11 | 81307ce |
|
||||||
| R5 | Codex | P1 | src/atocore/interactions/service.py:157-174 | The deployed extraction path still calls only the rule extractor; the new LLM extractor is eval/script-only, so Day 4 "gate cleared" is true as a benchmark result but not as an operational extraction path | open | Claude | 2026-04-12 | |
|
| R5 | Codex | P1 | src/atocore/interactions/service.py:157-174 | The deployed extraction path still calls only the rule extractor; the new LLM extractor is eval/script-only, so Day 4 "gate cleared" is true as a benchmark result but not as an operational extraction path | acknowledged | Claude | 2026-04-12 | |
|
||||||
| R6 | Codex | P1 | src/atocore/memory/extractor_llm.py:258-276 | LLM extraction accepts model-supplied `project` verbatim with no fallback to `interaction.project`; live triage promoted a clearly p06 memory (offline/network rule) as project=`""`, which explains the p06-offline-design harness miss and falsifies the current "all 3 failures are budget-contention" claim | open | Claude | 2026-04-12 | |
|
| R6 | Codex | P1 | src/atocore/memory/extractor_llm.py:258-276 | LLM extraction accepts model-supplied `project` verbatim with no fallback to `interaction.project`; live triage promoted a clearly p06 memory (offline/network rule) as project=`""`, which explains the p06-offline-design harness miss and falsifies the current "all 3 failures are budget-contention" claim | fixed | Claude | 2026-04-12 | this commit |
|
||||||
| R7 | Codex | P2 | src/atocore/memory/service.py:448-459 | Query ranking is overlap-count only, so broad overview memories can tie exact low-confidence memories and win on confidence; p06-firmware-interface is not just budget pressure, it also exposes a weak lexical scorer | open | Claude | 2026-04-12 | |
|
| R7 | Codex | P2 | src/atocore/memory/service.py:448-459 | Query ranking is overlap-count only, so broad overview memories can tie exact low-confidence memories and win on confidence; p06-firmware-interface is not just budget pressure, it also exposes a weak lexical scorer | open | Claude | 2026-04-12 | |
|
||||||
| R8 | Codex | P2 | tests/test_extractor_llm.py:1-7 | LLM extractor tests stop at parser/failure contracts; there is no automated coverage for the script-only persistence/review path that produced the 16 promoted memories, including project-scope preservation | open | Claude | 2026-04-12 | |
|
| R8 | Codex | P2 | tests/test_extractor_llm.py:1-7 | LLM extractor tests stop at parser/failure contracts; there is no automated coverage for the script-only persistence/review path that produced the 16 promoted memories, including project-scope preservation | open | Claude | 2026-04-12 | |
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ Configuration:
|
|||||||
|
|
||||||
- Requires the ``claude`` CLI on PATH (``claude --version`` should work).
|
- Requires the ``claude`` CLI on PATH (``claude --version`` should work).
|
||||||
- ``ATOCORE_LLM_EXTRACTOR_MODEL`` overrides the model alias (default
|
- ``ATOCORE_LLM_EXTRACTOR_MODEL`` overrides the model alias (default
|
||||||
``haiku``).
|
``sonnet``).
|
||||||
- ``ATOCORE_LLM_EXTRACTOR_TIMEOUT_S`` overrides the per-call timeout
|
- ``ATOCORE_LLM_EXTRACTOR_TIMEOUT_S`` overrides the per-call timeout
|
||||||
(default 90 seconds — first invocation is slow because Node.js
|
(default 90 seconds — first invocation is slow because Node.js
|
||||||
startup plus OAuth check is non-trivial).
|
startup plus OAuth check is non-trivial).
|
||||||
@@ -65,7 +65,7 @@ from atocore.observability.logger import get_logger
|
|||||||
log = get_logger("extractor_llm")
|
log = get_logger("extractor_llm")
|
||||||
|
|
||||||
LLM_EXTRACTOR_VERSION = "llm-0.2.0"
|
LLM_EXTRACTOR_VERSION = "llm-0.2.0"
|
||||||
DEFAULT_MODEL = os.environ.get("ATOCORE_LLM_EXTRACTOR_MODEL", "haiku")
|
DEFAULT_MODEL = os.environ.get("ATOCORE_LLM_EXTRACTOR_MODEL", "sonnet")
|
||||||
DEFAULT_TIMEOUT_S = float(os.environ.get("ATOCORE_LLM_EXTRACTOR_TIMEOUT_S", "90"))
|
DEFAULT_TIMEOUT_S = float(os.environ.get("ATOCORE_LLM_EXTRACTOR_TIMEOUT_S", "90"))
|
||||||
MAX_RESPONSE_CHARS = 8000
|
MAX_RESPONSE_CHARS = 8000
|
||||||
MAX_PROMPT_CHARS = 2000
|
MAX_PROMPT_CHARS = 2000
|
||||||
@@ -256,6 +256,8 @@ def _parse_candidates(raw_output: str, interaction: Interaction) -> list[MemoryC
|
|||||||
mem_type = str(item.get("type") or "").strip().lower()
|
mem_type = str(item.get("type") or "").strip().lower()
|
||||||
content = str(item.get("content") or "").strip()
|
content = str(item.get("content") or "").strip()
|
||||||
project = str(item.get("project") or "").strip()
|
project = str(item.get("project") or "").strip()
|
||||||
|
if not project and interaction.project:
|
||||||
|
project = interaction.project
|
||||||
confidence_raw = item.get("confidence", 0.5)
|
confidence_raw = item.get("confidence", 0.5)
|
||||||
if mem_type not in MEMORY_TYPES:
|
if mem_type not in MEMORY_TYPES:
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -97,6 +97,25 @@ def test_parser_tags_version_and_rule():
|
|||||||
assert result[0].source_interaction_id == "test-id"
|
assert result[0].source_interaction_id == "test-id"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parser_falls_back_to_interaction_project():
|
||||||
|
"""R6: when the model returns empty project but the interaction
|
||||||
|
has one, the candidate should inherit the interaction's project."""
|
||||||
|
raw = '[{"type": "project", "content": "machine works offline"}]'
|
||||||
|
interaction = _make_interaction()
|
||||||
|
interaction.project = "p06-polisher"
|
||||||
|
result = _parse_candidates(raw, interaction)
|
||||||
|
assert result[0].project == "p06-polisher"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parser_keeps_model_project_when_provided():
|
||||||
|
"""Model-supplied project takes precedence over interaction."""
|
||||||
|
raw = '[{"type": "project", "content": "x", "project": "p04-gigabit"}]'
|
||||||
|
interaction = _make_interaction()
|
||||||
|
interaction.project = "p06-polisher"
|
||||||
|
result = _parse_candidates(raw, interaction)
|
||||||
|
assert result[0].project == "p04-gigabit"
|
||||||
|
|
||||||
|
|
||||||
def test_missing_cli_returns_empty(monkeypatch):
|
def test_missing_cli_returns_empty(monkeypatch):
|
||||||
"""If ``claude`` is not on PATH the extractor returns empty, never raises."""
|
"""If ``claude`` is not on PATH the extractor returns empty, never raises."""
|
||||||
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: False)
|
monkeypatch.setattr(extractor_llm, "_cli_available", lambda: False)
|
||||||
|
|||||||
Reference in New Issue
Block a user