Stabilize core correctness and sync project plan state

This commit is contained in:
2026-04-05 17:53:23 -04:00
parent b48f0c95ab
commit b0889b3925
20 changed files with 551 additions and 168 deletions

View File

@@ -1,11 +1,14 @@
"""pytest configuration and shared fixtures."""
import os
import sys
import tempfile
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
# Default test data directory — overridden per-test by fixtures
_default_test_dir = tempfile.mkdtemp(prefix="atocore_test_")
os.environ["ATOCORE_DATA_DIR"] = _default_test_dir

View File

@@ -27,6 +27,7 @@ def test_context_respects_budget(tmp_data_dir, sample_markdown):
pack = build_context("What is AtoCore?", budget=500)
assert pack.total_chars <= 500
assert len(pack.formatted_context) <= 500
def test_context_with_project_hint(tmp_data_dir, sample_markdown):
@@ -82,6 +83,18 @@ def test_project_state_included_in_context(tmp_data_dir, sample_markdown):
assert pack.project_state_chars > 0
def test_trusted_state_precedence_is_restated_in_retrieved_context(tmp_data_dir, sample_markdown):
"""When trusted state and retrieval coexist, the context should restate precedence explicitly."""
init_db()
init_project_state_schema()
ingest_file(sample_markdown)
set_state("atocore", "status", "phase", "Phase 2")
pack = build_context("What is AtoCore?", project_hint="atocore")
assert "If retrieved context conflicts with Trusted Project State above" in pack.formatted_context
def test_project_state_takes_priority_budget(tmp_data_dir, sample_markdown):
"""Test that project state is included even with tight budget."""
init_db()
@@ -95,6 +108,32 @@ def test_project_state_takes_priority_budget(tmp_data_dir, sample_markdown):
assert "Phase 1 in progress" in pack.formatted_context
def test_project_state_respects_total_budget(tmp_data_dir, sample_markdown):
"""Trusted state should still fit within the total context budget."""
init_db()
init_project_state_schema()
ingest_file(sample_markdown)
set_state("atocore", "status", "notes", "x" * 400)
set_state("atocore", "decision", "details", "y" * 400)
pack = build_context("status?", project_hint="atocore", budget=120)
assert pack.total_chars <= 120
assert pack.budget_remaining >= 0
assert len(pack.formatted_context) <= 120
def test_project_hint_matches_state_case_insensitively(tmp_data_dir, sample_markdown):
"""Project state lookup should not depend on exact casing."""
init_db()
init_project_state_schema()
ingest_file(sample_markdown)
set_state("AtoCore", "status", "phase", "Phase 2")
pack = build_context("status?", project_hint="atocore")
assert "Phase 2" in pack.formatted_context
def test_no_project_state_without_hint(tmp_data_dir, sample_markdown):
"""Test that project state is not included without project hint."""
init_db()

View File

@@ -1,10 +1,8 @@
"""Tests for the ingestion pipeline."""
from pathlib import Path
from atocore.ingestion.parser import parse_markdown
from atocore.models.database import get_connection, init_db
from atocore.ingestion.pipeline import ingest_file
from atocore.ingestion.pipeline import ingest_file, ingest_folder
def test_parse_markdown(sample_markdown):
@@ -69,3 +67,104 @@ def test_ingest_updates_changed(tmp_data_dir, sample_markdown):
)
result = ingest_file(sample_markdown)
assert result["status"] == "ingested"
def test_parse_markdown_uses_supplied_text(sample_markdown):
"""Parsing should be able to reuse pre-read content from ingestion."""
latin_text = """---\ntags: parser\n---\n# Parser Title\n\nBody text."""
parsed = parse_markdown(sample_markdown, text=latin_text)
assert parsed.title == "Parser Title"
assert "parser" in parsed.tags
def test_reingest_empty_replaces_stale_chunks(tmp_data_dir, sample_markdown, monkeypatch):
"""Re-ingesting a file with no chunks should clear stale DB/vector state."""
init_db()
class FakeVectorStore:
def __init__(self):
self.deleted_ids = []
def add(self, ids, documents, metadatas):
return None
def delete(self, ids):
self.deleted_ids.extend(ids)
fake_store = FakeVectorStore()
monkeypatch.setattr("atocore.ingestion.pipeline.get_vector_store", lambda: fake_store)
first = ingest_file(sample_markdown)
assert first["status"] == "ingested"
sample_markdown.write_text("# Changed\n\nThis update should now produce no chunks after monkeypatching.", encoding="utf-8")
monkeypatch.setattr("atocore.ingestion.pipeline.chunk_markdown", lambda *args, **kwargs: [])
second = ingest_file(sample_markdown)
assert second["status"] == "empty"
with get_connection() as conn:
chunk_count = conn.execute("SELECT COUNT(*) AS c FROM source_chunks").fetchone()
assert chunk_count["c"] == 0
assert fake_store.deleted_ids
def test_ingest_folder_includes_markdown_extension(tmp_data_dir, sample_folder, monkeypatch):
"""Folder ingestion should include both .md and .markdown files."""
init_db()
markdown_file = sample_folder / "third_note.markdown"
markdown_file.write_text("# Third Note\n\nThis file should be discovered during folder ingestion.", encoding="utf-8")
class FakeVectorStore:
def add(self, ids, documents, metadatas):
return None
def delete(self, ids):
return None
@property
def count(self):
return 0
monkeypatch.setattr("atocore.ingestion.pipeline.get_vector_store", lambda: FakeVectorStore())
results = ingest_folder(sample_folder)
files = {result["file"] for result in results if "file" in result}
assert str(markdown_file.resolve()) in files
def test_purge_deleted_files_does_not_match_sibling_prefix(tmp_data_dir, sample_folder, monkeypatch):
"""Purging one folder should not delete entries from a sibling folder with the same prefix."""
init_db()
class FakeVectorStore:
def add(self, ids, documents, metadatas):
return None
def delete(self, ids):
return None
@property
def count(self):
return 0
monkeypatch.setattr("atocore.ingestion.pipeline.get_vector_store", lambda: FakeVectorStore())
kept_folder = tmp_data_dir / "notes"
kept_folder.mkdir()
kept_file = kept_folder / "keep.md"
kept_file.write_text("# Keep\n\nThis document should survive purge.", encoding="utf-8")
ingest_file(kept_file)
purge_folder = tmp_data_dir / "notes-project"
purge_folder.mkdir()
purge_file = purge_folder / "gone.md"
purge_file.write_text("# Gone\n\nThis document will be purged.", encoding="utf-8")
ingest_file(purge_file)
purge_file.unlink()
ingest_folder(purge_folder, purge_deleted=True)
with get_connection() as conn:
rows = conn.execute("SELECT file_path FROM source_documents").fetchall()
remaining_paths = {row["file_path"] for row in rows}
assert str(kept_file.resolve()) in remaining_paths

View File

@@ -47,6 +47,23 @@ def test_create_memory_dedup(isolated_db):
assert m1.id == m2.id
def test_create_memory_dedup_is_project_scoped(isolated_db):
from atocore.memory.service import create_memory
m1 = create_memory("project", "Uses SQLite for local state", project="atocore")
m2 = create_memory("project", "Uses SQLite for local state", project="openclaw")
assert m1.id != m2.id
def test_project_is_persisted_and_filterable(isolated_db):
from atocore.memory.service import create_memory, get_memories
create_memory("project", "Uses SQLite for local state", project="atocore")
create_memory("project", "Uses Postgres in production", project="openclaw")
atocore_memories = get_memories(memory_type="project", project="atocore")
assert len(atocore_memories) == 1
assert atocore_memories[0].project == "atocore"
def test_get_memories_all(isolated_db):
from atocore.memory.service import create_memory, get_memories
create_memory("identity", "User is an engineer")
@@ -97,6 +114,25 @@ def test_update_memory(isolated_db):
assert mems[0].confidence == 0.8
def test_update_memory_rejects_duplicate_active_memory(isolated_db):
from atocore.memory.service import create_memory, update_memory
import pytest
first = create_memory("knowledge", "Canonical fact", project="atocore")
second = create_memory("knowledge", "Different fact", project="atocore")
with pytest.raises(ValueError, match="duplicate active memory"):
update_memory(second.id, content="Canonical fact")
def test_create_memory_validates_confidence(isolated_db):
from atocore.memory.service import create_memory
import pytest
with pytest.raises(ValueError, match="Confidence must be between 0.0 and 1.0"):
create_memory("knowledge", "Out of range", confidence=1.5)
def test_invalidate_memory(isolated_db):
from atocore.memory.service import create_memory, get_memories, invalidate_memory
mem = create_memory("knowledge", "Wrong fact")
@@ -126,6 +162,25 @@ def test_memories_for_context(isolated_db):
assert chars > 0
def test_memories_for_context_reserves_room_for_each_type(isolated_db):
from atocore.memory.service import create_memory, get_memories_for_context
create_memory("identity", "Identity entry that is intentionally long so it could consume the whole budget on its own")
create_memory("preference", "Preference entry that should still appear")
text, _ = get_memories_for_context(memory_types=["identity", "preference"], budget=120)
assert "[preference]" in text
def test_memories_for_context_respects_actual_serialized_budget(isolated_db):
from atocore.memory.service import create_memory, get_memories_for_context
create_memory("identity", "Identity text that should fit the wrapper-aware memory budget calculation")
create_memory("preference", "Preference text that should also fit")
text, chars = get_memories_for_context(memory_types=["identity", "preference"], budget=140)
assert chars == len(text)
assert chars <= 140
def test_memories_for_context_empty(isolated_db):
from atocore.memory.service import get_memories_for_context
text, chars = get_memories_for_context()

View File

@@ -57,6 +57,12 @@ def test_set_state_invalid_category():
set_state("myproject", "invalid_category", "key", "value")
def test_set_state_validates_confidence():
"""Project-state confidence should stay within the documented range."""
with pytest.raises(ValueError, match="Confidence must be between 0.0 and 1.0"):
set_state("myproject", "status", "phase", "Phase 1", confidence=1.2)
def test_get_state_all():
"""Test getting all state entries for a project."""
set_state("proj", "status", "phase", "Phase 1")

View File

@@ -1,7 +1,7 @@
"""Tests for the retrieval system."""
from atocore.ingestion.pipeline import ingest_file
from atocore.models.database import init_db
from atocore.models.database import get_connection, init_db
from atocore.retrieval.retriever import retrieve
from atocore.retrieval.vector_store import get_vector_store
@@ -39,3 +39,31 @@ def test_vector_store_count(tmp_data_dir, sample_markdown):
ingest_file(sample_markdown)
store = get_vector_store()
assert store.count > 0
def test_retrieve_skips_stale_vector_entries(tmp_data_dir, sample_markdown, monkeypatch):
"""Retriever should ignore vector hits whose chunk rows no longer exist."""
init_db()
ingest_file(sample_markdown)
with get_connection() as conn:
chunk_ids = [row["id"] for row in conn.execute("SELECT id FROM source_chunks").fetchall()]
class FakeStore:
def query(self, query_embedding, top_k=10, where=None):
return {
"ids": [[chunk_ids[0], "missing-chunk"]],
"documents": [["valid doc", "stale doc"]],
"metadatas": [[
{"heading_path": "Overview", "source_file": "valid.md", "tags": "[]", "title": "Valid", "document_id": "doc-1"},
{"heading_path": "Ghost", "source_file": "ghost.md", "tags": "[]", "title": "Ghost", "document_id": "doc-2"},
]],
"distances": [[0.1, 0.2]],
}
monkeypatch.setattr("atocore.retrieval.retriever.get_vector_store", lambda: FakeStore())
monkeypatch.setattr("atocore.retrieval.retriever.embed_query", lambda query: [0.0, 0.1])
results = retrieve("overview", top_k=2)
assert len(results) == 1
assert results[0].chunk_id == chunk_ids[0]