189 lines
5.0 KiB
Python
189 lines
5.0 KiB
Python
"""pytest configuration and shared fixtures."""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
|
|
|
|
# Default test data directory — overridden per-test by fixtures
|
|
_default_test_dir = tempfile.mkdtemp(prefix="atocore_test_")
|
|
os.environ["ATOCORE_DATA_DIR"] = _default_test_dir
|
|
os.environ["ATOCORE_DEBUG"] = "true"
|
|
|
|
|
|
# V1-0: every entity created in a test is "hand authored" by the test
|
|
# author — fixture data, not extracted content. Rather than rewrite 100+
|
|
# existing test call sites, wrap create_entity so that tests which don't
|
|
# provide source_refs get hand_authored=True automatically. Tests that
|
|
# explicitly pass source_refs or hand_authored are unaffected. This keeps
|
|
# the F-8 invariant enforced in production (the API, the wiki form, and
|
|
# graduation scripts all go through the unwrapped function) while leaving
|
|
# the existing test corpus intact.
|
|
def _patch_create_entity_for_tests():
|
|
from atocore.engineering import service as _svc
|
|
|
|
_original = _svc.create_entity
|
|
|
|
def _create_entity_test(*args, **kwargs):
|
|
# Only auto-flag when hand_authored isn't explicitly specified.
|
|
# Tests that want to exercise the F-8 raise path pass
|
|
# hand_authored=False explicitly and should hit the error.
|
|
if (
|
|
not kwargs.get("source_refs")
|
|
and "hand_authored" not in kwargs
|
|
):
|
|
kwargs["hand_authored"] = True
|
|
return _original(*args, **kwargs)
|
|
|
|
_svc.create_entity = _create_entity_test
|
|
|
|
|
|
_patch_create_entity_for_tests()
|
|
|
|
|
|
@pytest.fixture
|
|
def tmp_data_dir(tmp_path):
|
|
"""Provide a temporary data directory for tests."""
|
|
os.environ["ATOCORE_DATA_DIR"] = str(tmp_path)
|
|
# Reset singletons
|
|
from atocore import config
|
|
config.settings = config.Settings()
|
|
|
|
import atocore.retrieval.vector_store as vs
|
|
vs._store = None
|
|
|
|
return tmp_path
|
|
|
|
|
|
@pytest.fixture
|
|
def project_registry(tmp_path, monkeypatch):
|
|
"""Stand up an isolated project registry pointing at a temp file.
|
|
|
|
Returns a callable that takes one or more (project_id, [aliases])
|
|
tuples and writes them into the registry, then forces the in-process
|
|
settings singleton to re-resolve. Use this when a test needs the
|
|
canonicalization helpers (resolve_project_name, get_registered_project)
|
|
to recognize aliases.
|
|
"""
|
|
registry_path = tmp_path / "test-project-registry.json"
|
|
|
|
def _set(*projects):
|
|
payload = {"projects": []}
|
|
for entry in projects:
|
|
if isinstance(entry, str):
|
|
project_id, aliases = entry, []
|
|
else:
|
|
project_id, aliases = entry
|
|
payload["projects"].append(
|
|
{
|
|
"id": project_id,
|
|
"aliases": list(aliases),
|
|
"description": f"test project {project_id}",
|
|
"ingest_roots": [
|
|
{"source": "vault", "subpath": f"incoming/projects/{project_id}"}
|
|
],
|
|
}
|
|
)
|
|
registry_path.write_text(json.dumps(payload), encoding="utf-8")
|
|
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
|
|
from atocore import config
|
|
|
|
config.settings = config.Settings()
|
|
return registry_path
|
|
|
|
return _set
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_markdown(tmp_path) -> Path:
|
|
"""Create a sample markdown file for testing."""
|
|
md_file = tmp_path / "test_note.md"
|
|
md_file.write_text(
|
|
"""---
|
|
tags:
|
|
- atocore
|
|
- architecture
|
|
date: 2026-04-05
|
|
---
|
|
# AtoCore Architecture
|
|
|
|
## Overview
|
|
|
|
AtoCore is a personal context engine that enriches LLM interactions
|
|
with durable memory, structured context, and project knowledge.
|
|
|
|
## Layers
|
|
|
|
The system has these layers:
|
|
|
|
1. Main PKM (human, messy, exploratory)
|
|
2. AtoVault (system mirror)
|
|
3. AtoDrive (trusted project truth)
|
|
4. Structured Memory (DB)
|
|
5. Semantic Retrieval (vector DB)
|
|
|
|
## Memory Types
|
|
|
|
AtoCore supports these memory types:
|
|
|
|
- Identity
|
|
- Preferences
|
|
- Project Memory
|
|
- Episodic Memory
|
|
- Knowledge Objects
|
|
- Adaptation Memory
|
|
- Trusted Project State
|
|
|
|
## Trust Precedence
|
|
|
|
When sources conflict:
|
|
|
|
1. Trusted Project State wins
|
|
2. AtoDrive overrides PKM
|
|
3. Most recent confirmed wins
|
|
4. Higher confidence wins
|
|
5. Equal → flag conflict
|
|
|
|
No silent merging.
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
return md_file
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_folder(tmp_path, sample_markdown) -> Path:
|
|
"""Create a folder with multiple markdown files."""
|
|
# Already has test_note.md from sample_markdown
|
|
second = tmp_path / "second_note.md"
|
|
second.write_text(
|
|
"""---
|
|
tags:
|
|
- chunking
|
|
---
|
|
# Chunking Strategy
|
|
|
|
## Approach
|
|
|
|
Heading-aware recursive splitting:
|
|
|
|
1. Split on H2 boundaries first
|
|
2. If section > 800 chars, split on H3
|
|
3. If still > 800 chars, split on paragraphs
|
|
4. Hard split at 800 chars with 100 char overlap
|
|
|
|
## Parameters
|
|
|
|
- max_chunk_size: 800 characters
|
|
- overlap: 100 characters
|
|
- min_chunk_size: 50 characters
|
|
""",
|
|
encoding="utf-8",
|
|
)
|
|
return tmp_path
|