"""Tests for the ingestion pipeline.""" from pathlib import Path from atocore.ingestion.parser import parse_markdown from atocore.models.database import get_connection, init_db from atocore.ingestion.pipeline import ingest_file def test_parse_markdown(sample_markdown): """Test markdown parsing with frontmatter.""" parsed = parse_markdown(sample_markdown) assert parsed.title == "AtoCore Architecture" assert "atocore" in parsed.tags assert "architecture" in parsed.tags assert len(parsed.body) > 0 assert len(parsed.headings) > 0 def test_parse_extracts_headings(sample_markdown): """Test that headings are extracted correctly.""" parsed = parse_markdown(sample_markdown) heading_texts = [h[1] for h in parsed.headings] assert "AtoCore Architecture" in heading_texts assert "Overview" in heading_texts def test_ingest_file(tmp_data_dir, sample_markdown): """Test ingesting a single file.""" init_db() result = ingest_file(sample_markdown) assert result["status"] == "ingested" assert result["chunks"] > 0 # Verify the file was stored in DB with get_connection() as conn: doc = conn.execute( "SELECT COUNT(*) as c FROM source_documents WHERE file_path = ?", (str(sample_markdown.resolve()),), ).fetchone() assert doc["c"] == 1 chunks = conn.execute( "SELECT COUNT(*) as c FROM source_chunks sc " "JOIN source_documents sd ON sc.document_id = sd.id " "WHERE sd.file_path = ?", (str(sample_markdown.resolve()),), ).fetchone() assert chunks["c"] > 0 def test_ingest_skips_unchanged(tmp_data_dir, sample_markdown): """Test that re-ingesting unchanged file is skipped.""" init_db() ingest_file(sample_markdown) result = ingest_file(sample_markdown) assert result["status"] == "skipped" def test_ingest_updates_changed(tmp_data_dir, sample_markdown): """Test that changed files are re-ingested.""" init_db() ingest_file(sample_markdown) # Modify the file sample_markdown.write_text( sample_markdown.read_text(encoding="utf-8") + "\n\n## New Section\n\nNew content added.", encoding="utf-8", ) result = ingest_file(sample_markdown) assert result["status"] == "ingested"