ATOCore/tests/conftest.py

"""pytest configuration and shared fixtures."""

import os
import sys
import tempfile
from pathlib import Path

import pytest

sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))

# Default test data directory — overridden per-test by fixtures
_default_test_dir = tempfile.mkdtemp(prefix="atocore_test_")
os.environ["ATOCORE_DATA_DIR"] = _default_test_dir
os.environ["ATOCORE_DEBUG"] = "true"


@pytest.fixture
def tmp_data_dir(tmp_path):
    """Provide a temporary data directory for tests."""
    os.environ["ATOCORE_DATA_DIR"] = str(tmp_path)
    # Reset singletons
    from atocore import config
    config.settings = config.Settings()

    import atocore.retrieval.vector_store as vs
    vs._store = None

    return tmp_path


@pytest.fixture
def sample_markdown(tmp_path) -> Path:
    """Create a sample markdown file for testing."""
    md_file = tmp_path / "test_note.md"
    md_file.write_text(
        """---
tags:
  - atocore
  - architecture
date: 2026-04-05
---
# AtoCore Architecture

## Overview

AtoCore is a personal context engine that enriches LLM interactions
with durable memory, structured context, and project knowledge.

## Layers

The system has these layers:

1. Main PKM (human, messy, exploratory)
2. AtoVault (system mirror)
3. AtoDrive (trusted project truth)
4. Structured Memory (DB)
5. Semantic Retrieval (vector DB)

## Memory Types

AtoCore supports these memory types:

- Identity
- Preferences
- Project Memory
- Episodic Memory
- Knowledge Objects
- Adaptation Memory
- Trusted Project State

## Trust Precedence

When sources conflict:

1. Trusted Project State wins
2. AtoDrive overrides PKM
3. Most recent confirmed wins
4. Higher confidence wins
5. Equal → flag conflict

No silent merging.
""",
        encoding="utf-8",
    )
    return md_file


@pytest.fixture
def sample_folder(tmp_path, sample_markdown) -> Path:
    """Create a folder with multiple markdown files."""
    # Already has test_note.md from sample_markdown
    second = tmp_path / "second_note.md"
    second.write_text(
        """---
tags:
  - chunking
---
# Chunking Strategy

## Approach

Heading-aware recursive splitting:

1. Split on H2 boundaries first
2. If section > 800 chars, split on H3
3. If still > 800 chars, split on paragraphs
4. Hard split at 800 chars with 100 char overlap

## Parameters

- max_chunk_size: 800 characters
- overlap: 100 characters
- min_chunk_size: 50 characters
""",
        encoding="utf-8",
    )
    return tmp_path