ATOCore/tests/conftest.py

"""pytest configuration and shared fixtures."""

import json
import os
import sys
import tempfile
from pathlib import Path

import pytest

sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))

# Default test data directory — overridden per-test by fixtures
_default_test_dir = tempfile.mkdtemp(prefix="atocore_test_")
os.environ["ATOCORE_DATA_DIR"] = _default_test_dir
os.environ["ATOCORE_DEBUG"] = "true"


# V1-0: every entity created in a test is "hand authored" by the test
# author — fixture data, not extracted content. Rather than rewrite 100+
# existing test call sites, wrap create_entity so that tests which don't
# provide source_refs get hand_authored=True automatically. Tests that
# explicitly pass source_refs or hand_authored are unaffected. This keeps
# the F-8 invariant enforced in production (the API, the wiki form, and
# graduation scripts all go through the unwrapped function) while leaving
# the existing test corpus intact.
def _patch_create_entity_for_tests():
    from atocore.engineering import service as _svc

    _original = _svc.create_entity

    def _create_entity_test(*args, **kwargs):
        # Only auto-flag when hand_authored isn't explicitly specified.
        # Tests that want to exercise the F-8 raise path pass
        # hand_authored=False explicitly and should hit the error.
        if (
            not kwargs.get("source_refs")
            and "hand_authored" not in kwargs
        ):
            kwargs["hand_authored"] = True
        return _original(*args, **kwargs)

    _svc.create_entity = _create_entity_test


_patch_create_entity_for_tests()


@pytest.fixture
def tmp_data_dir(tmp_path):
    """Provide a temporary data directory for tests."""
    os.environ["ATOCORE_DATA_DIR"] = str(tmp_path)
    # Reset singletons
    from atocore import config
    config.settings = config.Settings()

    import atocore.retrieval.vector_store as vs
    vs._store = None

    return tmp_path


@pytest.fixture
def project_registry(tmp_path, monkeypatch):
    """Stand up an isolated project registry pointing at a temp file.

    Returns a callable that takes one or more (project_id, [aliases])
    tuples and writes them into the registry, then forces the in-process
    settings singleton to re-resolve. Use this when a test needs the
    canonicalization helpers (resolve_project_name, get_registered_project)
    to recognize aliases.
    """
    registry_path = tmp_path / "test-project-registry.json"

    def _set(*projects):
        payload = {"projects": []}
        for entry in projects:
            if isinstance(entry, str):
                project_id, aliases = entry, []
            else:
                project_id, aliases = entry
            payload["projects"].append(
                {
                    "id": project_id,
                    "aliases": list(aliases),
                    "description": f"test project {project_id}",
                    "ingest_roots": [
                        {"source": "vault", "subpath": f"incoming/projects/{project_id}"}
                    ],
                }
            )
        registry_path.write_text(json.dumps(payload), encoding="utf-8")
        monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
        from atocore import config

        config.settings = config.Settings()
        return registry_path

    return _set


@pytest.fixture
def sample_markdown(tmp_path) -> Path:
    """Create a sample markdown file for testing."""
    md_file = tmp_path / "test_note.md"
    md_file.write_text(
        """---
tags:
  - atocore
  - architecture
date: 2026-04-05
---
# AtoCore Architecture

## Overview

AtoCore is a personal context engine that enriches LLM interactions
with durable memory, structured context, and project knowledge.

## Layers

The system has these layers:

1. Main PKM (human, messy, exploratory)
2. AtoVault (system mirror)
3. AtoDrive (trusted project truth)
4. Structured Memory (DB)
5. Semantic Retrieval (vector DB)

## Memory Types

AtoCore supports these memory types:

- Identity
- Preferences
- Project Memory
- Episodic Memory
- Knowledge Objects
- Adaptation Memory
- Trusted Project State

## Trust Precedence

When sources conflict:

1. Trusted Project State wins
2. AtoDrive overrides PKM
3. Most recent confirmed wins
4. Higher confidence wins
5. Equal → flag conflict

No silent merging.
""",
        encoding="utf-8",
    )
    return md_file


@pytest.fixture
def sample_folder(tmp_path, sample_markdown) -> Path:
    """Create a folder with multiple markdown files."""
    # Already has test_note.md from sample_markdown
    second = tmp_path / "second_note.md"
    second.write_text(
        """---
tags:
  - chunking
---
# Chunking Strategy

## Approach

Heading-aware recursive splitting:

1. Split on H2 boundaries first
2. If section > 800 chars, split on H3
3. If still > 800 chars, split on paragraphs
4. Hard split at 800 chars with 100 char overlap

## Parameters

- max_chunk_size: 800 characters
- overlap: 100 characters
- min_chunk_size: 50 characters
""",
        encoding="utf-8",
    )
    return tmp_path