42 lines
1.2 KiB
Python
42 lines
1.2 KiB
Python
|
|
"""Tests for the retrieval system."""
|
||
|
|
|
||
|
|
from atocore.ingestion.pipeline import ingest_file
|
||
|
|
from atocore.models.database import init_db
|
||
|
|
from atocore.retrieval.retriever import retrieve
|
||
|
|
from atocore.retrieval.vector_store import get_vector_store
|
||
|
|
|
||
|
|
|
||
|
|
def test_retrieve_returns_results(tmp_data_dir, sample_markdown):
|
||
|
|
"""Test that retrieval returns relevant chunks."""
|
||
|
|
init_db()
|
||
|
|
ingest_file(sample_markdown)
|
||
|
|
|
||
|
|
results = retrieve("What are the memory types?", top_k=5)
|
||
|
|
assert len(results) > 0
|
||
|
|
assert all(r.score > 0 for r in results)
|
||
|
|
assert all(r.content for r in results)
|
||
|
|
|
||
|
|
|
||
|
|
def test_retrieve_scores_ranked(tmp_data_dir, sample_markdown):
|
||
|
|
"""Test that results are ranked by score."""
|
||
|
|
init_db()
|
||
|
|
ingest_file(sample_markdown)
|
||
|
|
|
||
|
|
results = retrieve("architecture layers", top_k=5)
|
||
|
|
if len(results) >= 2:
|
||
|
|
scores = [r.score for r in results]
|
||
|
|
assert scores == sorted(scores, reverse=True)
|
||
|
|
|
||
|
|
|
||
|
|
def test_vector_store_count(tmp_data_dir, sample_markdown):
|
||
|
|
"""Test that vector store tracks chunk count."""
|
||
|
|
init_db()
|
||
|
|
|
||
|
|
# Reset singleton for clean test
|
||
|
|
import atocore.retrieval.vector_store as vs
|
||
|
|
vs._store = None
|
||
|
|
|
||
|
|
ingest_file(sample_markdown)
|
||
|
|
store = get_vector_store()
|
||
|
|
assert store.count > 0
|