Stabilize core correctness and sync project plan state
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""Tests for the retrieval system."""
|
||||
|
||||
from atocore.ingestion.pipeline import ingest_file
|
||||
from atocore.models.database import init_db
|
||||
from atocore.models.database import get_connection, init_db
|
||||
from atocore.retrieval.retriever import retrieve
|
||||
from atocore.retrieval.vector_store import get_vector_store
|
||||
|
||||
@@ -39,3 +39,31 @@ def test_vector_store_count(tmp_data_dir, sample_markdown):
|
||||
ingest_file(sample_markdown)
|
||||
store = get_vector_store()
|
||||
assert store.count > 0
|
||||
|
||||
|
||||
def test_retrieve_skips_stale_vector_entries(tmp_data_dir, sample_markdown, monkeypatch):
|
||||
"""Retriever should ignore vector hits whose chunk rows no longer exist."""
|
||||
init_db()
|
||||
ingest_file(sample_markdown)
|
||||
|
||||
with get_connection() as conn:
|
||||
chunk_ids = [row["id"] for row in conn.execute("SELECT id FROM source_chunks").fetchall()]
|
||||
|
||||
class FakeStore:
|
||||
def query(self, query_embedding, top_k=10, where=None):
|
||||
return {
|
||||
"ids": [[chunk_ids[0], "missing-chunk"]],
|
||||
"documents": [["valid doc", "stale doc"]],
|
||||
"metadatas": [[
|
||||
{"heading_path": "Overview", "source_file": "valid.md", "tags": "[]", "title": "Valid", "document_id": "doc-1"},
|
||||
{"heading_path": "Ghost", "source_file": "ghost.md", "tags": "[]", "title": "Ghost", "document_id": "doc-2"},
|
||||
]],
|
||||
"distances": [[0.1, 0.2]],
|
||||
}
|
||||
|
||||
monkeypatch.setattr("atocore.retrieval.retriever.get_vector_store", lambda: FakeStore())
|
||||
monkeypatch.setattr("atocore.retrieval.retriever.embed_query", lambda query: [0.0, 0.1])
|
||||
|
||||
results = retrieve("overview", top_k=2)
|
||||
assert len(results) == 1
|
||||
assert results[0].chunk_id == chunk_ids[0]
|
||||
|
||||
Reference in New Issue
Block a user