tests/test_retrieval.py

"""Tests for the retrieval system."""

from atocore.ingestion.pipeline import ingest_file
from atocore.models.database import init_db
from atocore.retrieval.retriever import retrieve
from atocore.retrieval.vector_store import get_vector_store


def test_retrieve_returns_results(tmp_data_dir, sample_markdown):
    """Test that retrieval returns relevant chunks."""
    init_db()
    ingest_file(sample_markdown)

    results = retrieve("What are the memory types?", top_k=5)
    assert len(results) > 0
    assert all(r.score > 0 for r in results)
    assert all(r.content for r in results)


def test_retrieve_scores_ranked(tmp_data_dir, sample_markdown):
    """Test that results are ranked by score."""
    init_db()
    ingest_file(sample_markdown)

    results = retrieve("architecture layers", top_k=5)
    if len(results) >= 2:
        scores = [r.score for r in results]
        assert scores == sorted(scores, reverse=True)


def test_vector_store_count(tmp_data_dir, sample_markdown):
    """Test that vector store tracks chunk count."""
    init_db()

    # Reset singleton for clean test
    import atocore.retrieval.vector_store as vs
    vs._store = None

    ingest_file(sample_markdown)
    store = get_vector_store()
    assert store.count > 0
feat: implement AtoCore Phase 0 + Phase 0.5 (foundation + PoC) Complete implementation of the personal context engine foundation: - FastAPI server with 5 endpoints (ingest, query, context/build, health, debug) - SQLite database with 5 tables (documents, chunks, memories, projects, interactions) - Heading-aware markdown chunker (800 char max, recursive splitting) - Multilingual embeddings via sentence-transformers (EN/FR) - ChromaDB vector store with cosine similarity retrieval - Context builder with project boosting, dedup, and budget enforcement - CLI scripts for batch ingestion and test prompt evaluation - 19 unit tests passing, 79% coverage - Validated on 482 real project files (8383 chunks, 0 errors) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-05 09:21:27 -04:00			`"""Tests for the retrieval system."""`

			`from atocore.ingestion.pipeline import ingest_file`
			`from atocore.models.database import init_db`
			`from atocore.retrieval.retriever import retrieve`
			`from atocore.retrieval.vector_store import get_vector_store`


			`def test_retrieve_returns_results(tmp_data_dir, sample_markdown):`
			`"""Test that retrieval returns relevant chunks."""`
			`init_db()`
			`ingest_file(sample_markdown)`

			`results = retrieve("What are the memory types?", top_k=5)`
			`assert len(results) > 0`
			`assert all(r.score > 0 for r in results)`
			`assert all(r.content for r in results)`


			`def test_retrieve_scores_ranked(tmp_data_dir, sample_markdown):`
			`"""Test that results are ranked by score."""`
			`init_db()`
			`ingest_file(sample_markdown)`

			`results = retrieve("architecture layers", top_k=5)`
			`if len(results) >= 2:`
			`scores = [r.score for r in results]`
			`assert scores == sorted(scores, reverse=True)`


			`def test_vector_store_count(tmp_data_dir, sample_markdown):`
			`"""Test that vector store tracks chunk count."""`
			`init_db()`

			`# Reset singleton for clean test`
			`import atocore.retrieval.vector_store as vs`
			`vs._store = None`

			`ingest_file(sample_markdown)`
			`store = get_vector_store()`
			`assert store.count > 0`