From 6bfa1fcc37bbff0b334697bc2207c076e25527c3 Mon Sep 17 00:00:00 2001 From: Anto01 Date: Sun, 5 Apr 2026 18:33:52 -0400 Subject: [PATCH] Add Dalidou storage foundation and deployment prep --- .dockerignore | 9 +++ .env.example | 13 ++++ .gitignore | 1 + AGENTS.md | 37 ++++++++++ Dockerfile | 20 ++++++ deploy/dalidou/.env.example | 19 +++++ deploy/dalidou/docker-compose.yml | 27 +++++++ docs/dalidou-deployment.md | 77 ++++++++++++++++++++ docs/dalidou-storage-migration.md | 61 ++++++++++++++++ src/atocore/api/routes.py | 48 ++++++++++++- src/atocore/config.py | 106 +++++++++++++++++++++++++++- src/atocore/ingestion/pipeline.py | 47 ++++++++++++ src/atocore/main.py | 12 +++- src/atocore/models/database.py | 2 +- src/atocore/observability/logger.py | 9 ++- tests/test_api_storage.py | 48 +++++++++++++ tests/test_config.py | 63 +++++++++++++++++ tests/test_logging.py | 18 +++++ tests/test_sources.py | 70 ++++++++++++++++++ 19 files changed, 679 insertions(+), 8 deletions(-) create mode 100644 .dockerignore create mode 100644 AGENTS.md create mode 100644 Dockerfile create mode 100644 deploy/dalidou/.env.example create mode 100644 deploy/dalidou/docker-compose.yml create mode 100644 docs/dalidou-deployment.md create mode 100644 docs/dalidou-storage-migration.md create mode 100644 tests/test_api_storage.py create mode 100644 tests/test_config.py create mode 100644 tests/test_logging.py create mode 100644 tests/test_sources.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..27ba98b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,9 @@ +.git +.pytest_cache +.coverage +.claude +data +__pycache__ +*.pyc +tests +docs diff --git a/.env.example b/.env.example index 9cdb6a1..88c72c3 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,18 @@ +ATOCORE_ENV=development ATOCORE_DEBUG=false +ATOCORE_LOG_LEVEL=INFO ATOCORE_DATA_DIR=./data +ATOCORE_DB_DIR= +ATOCORE_CHROMA_DIR= +ATOCORE_CACHE_DIR= +ATOCORE_TMP_DIR= +ATOCORE_VAULT_SOURCE_DIR=./sources/vault +ATOCORE_DRIVE_SOURCE_DIR=./sources/drive +ATOCORE_SOURCE_VAULT_ENABLED=true +ATOCORE_SOURCE_DRIVE_ENABLED=true +ATOCORE_LOG_DIR=./logs +ATOCORE_BACKUP_DIR=./backups +ATOCORE_RUN_DIR=./run ATOCORE_HOST=127.0.0.1 ATOCORE_PORT=8100 ATOCORE_EMBEDDING_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 diff --git a/.gitignore b/.gitignore index 178704b..07957c6 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ htmlcov/ .coverage venv/ .venv/ +.claude/ diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..d0a24c6 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,37 @@ +# AGENTS.md + +## Project role +This repository is AtoCore, the runtime and machine-memory layer of the Ato ecosystem. + +## Ecosystem definitions +- AtoCore = app/runtime/API/ingestion/retrieval/context builder/machine DB logic +- AtoMind = future intelligence layer for promotion, reflection, conflict handling, trust decisions +- AtoVault = human-readable memory source, intended for Obsidian +- AtoDrive = trusted operational project source, higher trust than general vault notes + +## Storage principles +- Human-readable source layers and machine operational storage must remain separate +- AtoVault is not the live vector database location +- AtoDrive is not the live vector database location +- Machine operational storage includes SQLite, vector store, indexes, embeddings, and runtime metadata +- The machine DB is derived operational state, not the primary human source of truth + +## Deployment principles +- Dalidou is the canonical host for AtoCore service and machine database +- OpenClaw on the T420 should consume AtoCore over API/network/Tailscale +- Do not design around Syncthing for the live SQLite/vector DB +- Prefer one canonical running service over multi-node live DB replication + +## Coding guidance +- Keep path handling explicit and configurable via environment variables +- Do not hard-code machine-specific absolute paths +- Keep implementation small, testable, and reversible +- Preserve current working behavior unless a change is necessary +- Add or update tests when changing config, storage, or path logic + +## Change policy +Before large refactors: +1. explain the architectural reason +2. propose the smallest safe batch +3. implement incrementally +4. summarize changed files and migration impact diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..34a3ac5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.12-slim + +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN apt-get update \ + && apt-get install -y --no-install-recommends build-essential curl git \ + && rm -rf /var/lib/apt/lists/* + +COPY pyproject.toml README.md requirements.txt requirements-dev.txt ./ +COPY src ./src + +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir . + +EXPOSE 8100 + +CMD ["python", "-m", "uvicorn", "atocore.main:app", "--host", "0.0.0.0", "--port", "8100"] diff --git a/deploy/dalidou/.env.example b/deploy/dalidou/.env.example new file mode 100644 index 0000000..7982ae8 --- /dev/null +++ b/deploy/dalidou/.env.example @@ -0,0 +1,19 @@ +ATOCORE_ENV=production +ATOCORE_DEBUG=false +ATOCORE_LOG_LEVEL=INFO +ATOCORE_HOST=0.0.0.0 +ATOCORE_PORT=8100 + +ATOCORE_DATA_DIR=/srv/storage/atocore/data +ATOCORE_DB_DIR=/srv/storage/atocore/data/db +ATOCORE_CHROMA_DIR=/srv/storage/atocore/data/chroma +ATOCORE_CACHE_DIR=/srv/storage/atocore/data/cache +ATOCORE_TMP_DIR=/srv/storage/atocore/data/tmp +ATOCORE_LOG_DIR=/srv/storage/atocore/logs +ATOCORE_BACKUP_DIR=/srv/storage/atocore/backups +ATOCORE_RUN_DIR=/srv/storage/atocore/run + +ATOCORE_VAULT_SOURCE_DIR=/srv/storage/atocore/sources/vault +ATOCORE_DRIVE_SOURCE_DIR=/srv/storage/atocore/sources/drive +ATOCORE_SOURCE_VAULT_ENABLED=true +ATOCORE_SOURCE_DRIVE_ENABLED=true diff --git a/deploy/dalidou/docker-compose.yml b/deploy/dalidou/docker-compose.yml new file mode 100644 index 0000000..e00e9fa --- /dev/null +++ b/deploy/dalidou/docker-compose.yml @@ -0,0 +1,27 @@ +services: + atocore: + build: + context: ../../ + dockerfile: Dockerfile + container_name: atocore + restart: unless-stopped + ports: + - "${ATOCORE_PORT:-8100}:8100" + env_file: + - .env + volumes: + - ${ATOCORE_DB_DIR}:${ATOCORE_DB_DIR} + - ${ATOCORE_CHROMA_DIR}:${ATOCORE_CHROMA_DIR} + - ${ATOCORE_CACHE_DIR}:${ATOCORE_CACHE_DIR} + - ${ATOCORE_TMP_DIR}:${ATOCORE_TMP_DIR} + - ${ATOCORE_LOG_DIR}:${ATOCORE_LOG_DIR} + - ${ATOCORE_BACKUP_DIR}:${ATOCORE_BACKUP_DIR} + - ${ATOCORE_RUN_DIR}:${ATOCORE_RUN_DIR} + - ${ATOCORE_VAULT_SOURCE_DIR}:${ATOCORE_VAULT_SOURCE_DIR}:ro + - ${ATOCORE_DRIVE_SOURCE_DIR}:${ATOCORE_DRIVE_SOURCE_DIR}:ro + healthcheck: + test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8100/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 20s diff --git a/docs/dalidou-deployment.md b/docs/dalidou-deployment.md new file mode 100644 index 0000000..55e985c --- /dev/null +++ b/docs/dalidou-deployment.md @@ -0,0 +1,77 @@ +# Dalidou Deployment + +## Purpose +Deploy AtoCore on Dalidou as the canonical runtime and machine-memory host. + +## Model + +- Dalidou hosts the canonical AtoCore service. +- OpenClaw on the T420 consumes AtoCore over network/Tailscale API. +- `sources/vault` and `sources/drive` are read-only inputs by convention. +- SQLite/Chroma machine state stays on Dalidou and is not treated as a sync peer. + +## Directory layout + +```text +/srv/storage/atocore/ + app/ # deployed repo checkout + data/ + db/ + chroma/ + cache/ + tmp/ + sources/ + vault/ + drive/ + logs/ + backups/ + run/ +``` + +## Compose workflow + +The compose definition lives in: + +```text +deploy/dalidou/docker-compose.yml +``` + +The Dalidou environment file should be copied to: + +```text +deploy/dalidou/.env +``` + +starting from: + +```text +deploy/dalidou/.env.example +``` + +## Deployment steps + +1. Place the repository under `/srv/storage/atocore/app`. +2. Create the canonical directories listed above. +3. Copy `deploy/dalidou/.env.example` to `deploy/dalidou/.env`. +4. Adjust the source paths if your AtoVault/AtoDrive mirrors live elsewhere. +5. Run: + +```bash +cd /srv/storage/atocore/app/deploy/dalidou +docker compose up -d --build +``` + +6. Validate: + +```bash +curl http://127.0.0.1:8100/health +curl http://127.0.0.1:8100/sources +``` + +## Deferred + +- backup automation +- restore/snapshot tooling +- reverse proxy / TLS exposure +- automated source ingestion job +- OpenClaw client wiring diff --git a/docs/dalidou-storage-migration.md b/docs/dalidou-storage-migration.md new file mode 100644 index 0000000..856b778 --- /dev/null +++ b/docs/dalidou-storage-migration.md @@ -0,0 +1,61 @@ +# Dalidou Storage Migration + +## Goal +Establish Dalidou as the canonical AtoCore host while keeping human-readable +source layers separate from machine operational storage. + +## Canonical layout + +```text +/srv/atocore/ + app/ # git checkout of this repository + data/ # machine operational state + db/ + atocore.db + chroma/ + cache/ + tmp/ + sources/ + vault/ # AtoVault input, read-only by convention + drive/ # AtoDrive input, read-only by convention + logs/ + backups/ + run/ + config/ + .env +``` + +## Environment variables + +Suggested Dalidou values: + +```bash +ATOCORE_ENV=production +ATOCORE_DATA_DIR=/srv/atocore/data +ATOCORE_DB_DIR=/srv/atocore/data/db +ATOCORE_CHROMA_DIR=/srv/atocore/data/chroma +ATOCORE_CACHE_DIR=/srv/atocore/data/cache +ATOCORE_TMP_DIR=/srv/atocore/data/tmp +ATOCORE_VAULT_SOURCE_DIR=/srv/atocore/sources/vault +ATOCORE_DRIVE_SOURCE_DIR=/srv/atocore/sources/drive +ATOCORE_LOG_DIR=/srv/atocore/logs +ATOCORE_BACKUP_DIR=/srv/atocore/backups +ATOCORE_RUN_DIR=/srv/atocore/run +``` + +## Migration notes + +- Existing local installs remain backward-compatible. +- If `data/atocore.db` already exists, AtoCore continues using it. +- Fresh installs default to `data/db/atocore.db`. +- Source directories are inputs only; AtoCore should ingest from them but not + treat them as writable runtime state. +- Avoid syncing live SQLite/Chroma state between Dalidou and other machines. + Prefer one canonical running service and API access from OpenClaw. + +## Deferred work + +- service manager wiring +- backup/snapshot procedures +- automated source registration jobs +- OpenClaw integration diff --git a/src/atocore/api/routes.py b/src/atocore/api/routes.py index 350987f..b7b7d8d 100644 --- a/src/atocore/api/routes.py +++ b/src/atocore/api/routes.py @@ -5,6 +5,7 @@ from pathlib import Path from fastapi import APIRouter, HTTPException from pydantic import BaseModel +import atocore.config as _config from atocore.context.builder import ( build_context, get_last_context_pack, @@ -16,7 +17,13 @@ from atocore.context.project_state import ( invalidate_state, set_state, ) -from atocore.ingestion.pipeline import ingest_file, ingest_folder, get_ingestion_stats +from atocore.ingestion.pipeline import ( + get_ingestion_stats, + get_source_status, + ingest_configured_sources, + ingest_file, + ingest_folder, +) from atocore.memory.service import ( MEMORY_TYPES, create_memory, @@ -44,6 +51,10 @@ class IngestResponse(BaseModel): results: list[dict] +class IngestSourcesResponse(BaseModel): + results: list[dict] + + class QueryRequest(BaseModel): prompt: str top_k: int = 10 @@ -126,6 +137,17 @@ def api_ingest(req: IngestRequest) -> IngestResponse: return IngestResponse(results=results) +@router.post("/ingest/sources", response_model=IngestSourcesResponse) +def api_ingest_sources() -> IngestSourcesResponse: + """Ingest enabled configured source directories.""" + try: + results = ingest_configured_sources() + except Exception as e: + log.error("ingest_sources_failed", error=str(e)) + raise HTTPException(status_code=500, detail=f"Configured source ingestion failed: {e}") + return IngestSourcesResponse(results=results) + + @router.post("/query", response_model=QueryResponse) def api_query(req: QueryRequest) -> QueryResponse: """Retrieve relevant chunks for a prompt.""" @@ -304,10 +326,34 @@ def api_invalidate_project_state(req: ProjectStateInvalidateRequest) -> dict: def api_health() -> dict: """Health check.""" store = get_vector_store() + source_status = get_source_status() return { "status": "ok", "version": "0.1.0", "vectors_count": store.count, + "env": _config.settings.env, + "machine_paths": { + "db_path": str(_config.settings.db_path), + "chroma_path": str(_config.settings.chroma_path), + "log_dir": str(_config.settings.resolved_log_dir), + "backup_dir": str(_config.settings.resolved_backup_dir), + "run_dir": str(_config.settings.resolved_run_dir), + }, + "sources_ready": all( + (not source["enabled"]) or (source["exists"] and source["is_dir"]) + for source in source_status + ), + "source_status": source_status, + } + + +@router.get("/sources") +def api_sources() -> dict: + """Return configured ingestion source directories and readiness.""" + return { + "sources": get_source_status(), + "vault_enabled": _config.settings.source_vault_enabled, + "drive_enabled": _config.settings.source_drive_enabled, } diff --git a/src/atocore/config.py b/src/atocore/config.py index c487615..5811d2c 100644 --- a/src/atocore/config.py +++ b/src/atocore/config.py @@ -6,8 +6,21 @@ from pydantic_settings import BaseSettings class Settings(BaseSettings): + env: str = "development" debug: bool = False + log_level: str = "INFO" data_dir: Path = Path("./data") + db_dir: Path | None = None + chroma_dir: Path | None = None + cache_dir: Path | None = None + tmp_dir: Path | None = None + vault_source_dir: Path = Path("./sources/vault") + drive_source_dir: Path = Path("./sources/drive") + source_vault_enabled: bool = True + source_drive_enabled: bool = True + log_dir: Path = Path("./logs") + backup_dir: Path = Path("./backups") + run_dir: Path = Path("./run") host: str = "127.0.0.1" port: int = 8100 @@ -29,11 +42,100 @@ class Settings(BaseSettings): @property def db_path(self) -> Path: - return self.data_dir / "atocore.db" + legacy_path = self.resolved_data_dir / "atocore.db" + if self.db_dir is not None: + return self.resolved_db_dir / "atocore.db" + if legacy_path.exists(): + return legacy_path + return self.resolved_db_dir / "atocore.db" @property def chroma_path(self) -> Path: - return self.data_dir / "chroma" + return self._resolve_path(self.chroma_dir or (self.resolved_data_dir / "chroma")) + + @property + def cache_path(self) -> Path: + return self._resolve_path(self.cache_dir or (self.resolved_data_dir / "cache")) + + @property + def tmp_path(self) -> Path: + return self._resolve_path(self.tmp_dir or (self.resolved_data_dir / "tmp")) + + @property + def resolved_data_dir(self) -> Path: + return self._resolve_path(self.data_dir) + + @property + def resolved_db_dir(self) -> Path: + return self._resolve_path(self.db_dir or (self.resolved_data_dir / "db")) + + @property + def resolved_vault_source_dir(self) -> Path: + return self._resolve_path(self.vault_source_dir) + + @property + def resolved_drive_source_dir(self) -> Path: + return self._resolve_path(self.drive_source_dir) + + @property + def resolved_log_dir(self) -> Path: + return self._resolve_path(self.log_dir) + + @property + def resolved_backup_dir(self) -> Path: + return self._resolve_path(self.backup_dir) + + @property + def resolved_run_dir(self) -> Path: + if self.run_dir == Path("./run"): + return self._resolve_path(self.resolved_data_dir.parent / "run") + return self._resolve_path(self.run_dir) + + @property + def machine_dirs(self) -> list[Path]: + return [ + self.db_path.parent, + self.chroma_path, + self.cache_path, + self.tmp_path, + self.resolved_log_dir, + self.resolved_backup_dir, + self.resolved_run_dir, + ] + + @property + def source_specs(self) -> list[dict[str, object]]: + return [ + { + "name": "vault", + "enabled": self.source_vault_enabled, + "path": self.resolved_vault_source_dir, + "read_only": True, + }, + { + "name": "drive", + "enabled": self.source_drive_enabled, + "path": self.resolved_drive_source_dir, + "read_only": True, + }, + ] + + @property + def source_dirs(self) -> list[Path]: + return [spec["path"] for spec in self.source_specs if spec["enabled"]] + + def _resolve_path(self, path: Path) -> Path: + return path.expanduser().resolve(strict=False) settings = Settings() + + +def ensure_runtime_dirs() -> None: + """Create writable runtime directories for machine state and logs. + + Source directories are intentionally excluded because they are treated as + read-only ingestion inputs by convention. + """ + for directory in settings.machine_dirs: + directory.mkdir(parents=True, exist_ok=True) diff --git a/src/atocore/ingestion/pipeline.py b/src/atocore/ingestion/pipeline.py index d93fb63..ae2470b 100644 --- a/src/atocore/ingestion/pipeline.py +++ b/src/atocore/ingestion/pipeline.py @@ -6,6 +6,7 @@ import time import uuid from pathlib import Path +import atocore.config as _config from atocore.ingestion.chunker import chunk_markdown from atocore.ingestion.parser import parse_markdown from atocore.models.database import get_connection @@ -189,6 +190,52 @@ def ingest_folder(folder_path: Path, purge_deleted: bool = True) -> list[dict]: return results +def get_source_status() -> list[dict]: + """Describe configured source directories and their readiness.""" + sources = [] + for spec in _config.settings.source_specs: + path = spec["path"] + assert isinstance(path, Path) + sources.append( + { + "name": spec["name"], + "enabled": spec["enabled"], + "path": str(path), + "exists": path.exists(), + "is_dir": path.is_dir(), + "read_only": spec["read_only"], + } + ) + return sources + + +def ingest_configured_sources(purge_deleted: bool = False) -> list[dict]: + """Ingest enabled source directories declared in config. + + Purge is disabled by default here because sources are intended to be + read-only inputs and should not be treated as the primary writable state. + """ + results = [] + for source in get_source_status(): + if not source["enabled"]: + results.append({"source": source["name"], "status": "disabled", "path": source["path"]}) + continue + if not source["exists"] or not source["is_dir"]: + results.append({"source": source["name"], "status": "missing", "path": source["path"]}) + continue + + folder_results = ingest_folder(Path(source["path"]), purge_deleted=purge_deleted) + results.append( + { + "source": source["name"], + "status": "ingested", + "path": source["path"], + "results": folder_results, + } + ) + return results + + def get_ingestion_stats() -> dict: """Return ingestion statistics.""" with get_connection() as conn: diff --git a/src/atocore/main.py b/src/atocore/main.py index 7ee360c..0314b0c 100644 --- a/src/atocore/main.py +++ b/src/atocore/main.py @@ -5,8 +5,9 @@ from fastapi import FastAPI from atocore.api.routes import router import atocore.config as _config from atocore.context.project_state import init_project_state_schema +from atocore.ingestion.pipeline import get_source_status from atocore.models.database import init_db -from atocore.observability.logger import setup_logging +from atocore.observability.logger import get_logger, setup_logging app = FastAPI( title="AtoCore", @@ -15,13 +16,22 @@ app = FastAPI( ) app.include_router(router) +log = get_logger("main") @app.on_event("startup") def startup(): setup_logging() + _config.ensure_runtime_dirs() init_db() init_project_state_schema() + log.info( + "startup_ready", + env=_config.settings.env, + db_path=str(_config.settings.db_path), + chroma_path=str(_config.settings.chroma_path), + source_status=get_source_status(), + ) if __name__ == "__main__": diff --git a/src/atocore/models/database.py b/src/atocore/models/database.py index 2a57964..618d0fc 100644 --- a/src/atocore/models/database.py +++ b/src/atocore/models/database.py @@ -72,7 +72,7 @@ CREATE INDEX IF NOT EXISTS idx_interactions_project ON interactions(project_id); def _ensure_data_dir() -> None: - _config.settings.data_dir.mkdir(parents=True, exist_ok=True) + _config.ensure_runtime_dirs() def init_db() -> None: diff --git a/src/atocore/observability/logger.py b/src/atocore/observability/logger.py index 1f9fd4a..7bfab05 100644 --- a/src/atocore/observability/logger.py +++ b/src/atocore/observability/logger.py @@ -16,15 +16,18 @@ _LOG_LEVELS = { def setup_logging() -> None: """Configure structlog with JSON output.""" log_level = "DEBUG" if _config.settings.debug else "INFO" + renderer = ( + structlog.dev.ConsoleRenderer() + if _config.settings.debug + else structlog.processors.JSONRenderer() + ) structlog.configure( processors=[ structlog.contextvars.merge_contextvars, structlog.processors.add_log_level, structlog.processors.TimeStamper(fmt="iso"), - structlog.dev.ConsoleRenderer() - if settings.debug - else structlog.processors.JSONRenderer(), + renderer, ], wrapper_class=structlog.make_filtering_bound_logger( _LOG_LEVELS.get(log_level, logging.INFO) diff --git a/tests/test_api_storage.py b/tests/test_api_storage.py new file mode 100644 index 0000000..025cf65 --- /dev/null +++ b/tests/test_api_storage.py @@ -0,0 +1,48 @@ +"""Tests for storage-related API readiness endpoints.""" + +from fastapi.testclient import TestClient + +import atocore.config as config +from atocore.main import app + + +def test_sources_endpoint_reports_configured_sources(tmp_data_dir, monkeypatch): + vault_dir = tmp_data_dir / "vault-source" + drive_dir = tmp_data_dir / "drive-source" + vault_dir.mkdir() + drive_dir.mkdir() + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + config.settings = config.Settings() + + client = TestClient(app) + response = client.get("/sources") + + assert response.status_code == 200 + body = response.json() + assert body["vault_enabled"] is True + assert body["drive_enabled"] is True + assert len(body["sources"]) == 2 + assert all(source["read_only"] for source in body["sources"]) + + +def test_health_endpoint_exposes_machine_paths_and_source_readiness(tmp_data_dir, monkeypatch): + vault_dir = tmp_data_dir / "vault-source" + drive_dir = tmp_data_dir / "drive-source" + vault_dir.mkdir() + drive_dir.mkdir() + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + config.settings = config.Settings() + + client = TestClient(app) + response = client.get("/health") + + assert response.status_code == 200 + body = response.json() + assert body["status"] == "ok" + assert body["sources_ready"] is True + assert "db_path" in body["machine_paths"] + assert "run_dir" in body["machine_paths"] diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..3afde64 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,63 @@ +"""Tests for configuration and canonical path boundaries.""" + +import os +from pathlib import Path + +import atocore.config as config + + +def test_settings_resolve_canonical_directories(tmp_path, monkeypatch): + monkeypatch.setenv("ATOCORE_DATA_DIR", str(tmp_path / "data")) + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(tmp_path / "vault-source")) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(tmp_path / "drive-source")) + monkeypatch.setenv("ATOCORE_LOG_DIR", str(tmp_path / "logs")) + monkeypatch.setenv("ATOCORE_BACKUP_DIR", str(tmp_path / "backups")) + + settings = config.Settings() + + assert settings.db_path == (tmp_path / "data" / "db" / "atocore.db").resolve() + assert settings.chroma_path == (tmp_path / "data" / "chroma").resolve() + assert settings.cache_path == (tmp_path / "data" / "cache").resolve() + assert settings.tmp_path == (tmp_path / "data" / "tmp").resolve() + assert settings.resolved_vault_source_dir == (tmp_path / "vault-source").resolve() + assert settings.resolved_drive_source_dir == (tmp_path / "drive-source").resolve() + assert settings.resolved_log_dir == (tmp_path / "logs").resolve() + assert settings.resolved_backup_dir == (tmp_path / "backups").resolve() + assert settings.resolved_run_dir == (tmp_path / "run").resolve() + + +def test_settings_keep_legacy_db_path_when_present(tmp_path, monkeypatch): + data_dir = tmp_path / "data" + data_dir.mkdir() + legacy_db = data_dir / "atocore.db" + legacy_db.write_text("", encoding="utf-8") + monkeypatch.setenv("ATOCORE_DATA_DIR", str(data_dir)) + + settings = config.Settings() + + assert settings.db_path == legacy_db.resolve() + + +def test_ensure_runtime_dirs_creates_machine_dirs_only(tmp_path, monkeypatch): + monkeypatch.setenv("ATOCORE_DATA_DIR", str(tmp_path / "data")) + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(tmp_path / "vault-source")) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(tmp_path / "drive-source")) + monkeypatch.setenv("ATOCORE_LOG_DIR", str(tmp_path / "logs")) + monkeypatch.setenv("ATOCORE_BACKUP_DIR", str(tmp_path / "backups")) + + original_settings = config.settings + try: + config.settings = config.Settings() + config.ensure_runtime_dirs() + + assert config.settings.db_path.parent.exists() + assert config.settings.chroma_path.exists() + assert config.settings.cache_path.exists() + assert config.settings.tmp_path.exists() + assert config.settings.resolved_log_dir.exists() + assert config.settings.resolved_backup_dir.exists() + assert config.settings.resolved_run_dir.exists() + assert not config.settings.resolved_vault_source_dir.exists() + assert not config.settings.resolved_drive_source_dir.exists() + finally: + config.settings = original_settings diff --git a/tests/test_logging.py b/tests/test_logging.py new file mode 100644 index 0000000..623f03d --- /dev/null +++ b/tests/test_logging.py @@ -0,0 +1,18 @@ +"""Tests for logging configuration.""" + +from types import SimpleNamespace + +import atocore.config as config +from atocore.observability.logger import setup_logging + + +def test_setup_logging_uses_dynamic_settings_without_name_error(): + original_settings = config.settings + try: + config.settings = SimpleNamespace(debug=False) + setup_logging() + + config.settings = SimpleNamespace(debug=True) + setup_logging() + finally: + config.settings = original_settings diff --git a/tests/test_sources.py b/tests/test_sources.py new file mode 100644 index 0000000..e461462 --- /dev/null +++ b/tests/test_sources.py @@ -0,0 +1,70 @@ +"""Tests for configured source registration and readiness.""" + +import atocore.config as config +from atocore.ingestion.pipeline import get_source_status, ingest_configured_sources + + +def test_get_source_status_reports_read_only_inputs(tmp_path, monkeypatch): + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(tmp_path / "vault")) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(tmp_path / "drive")) + monkeypatch.setenv("ATOCORE_SOURCE_DRIVE_ENABLED", "false") + + original_settings = config.settings + try: + config.settings = config.Settings() + status = get_source_status() + finally: + config.settings = original_settings + + assert status[0]["name"] == "vault" + assert status[0]["enabled"] is True + assert status[0]["read_only"] is True + assert status[0]["exists"] is False + assert status[1]["name"] == "drive" + assert status[1]["enabled"] is False + + +def test_ingest_configured_sources_reports_missing_and_disabled(tmp_path, monkeypatch): + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(tmp_path / "vault")) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(tmp_path / "drive")) + monkeypatch.setenv("ATOCORE_SOURCE_DRIVE_ENABLED", "false") + + original_settings = config.settings + try: + config.settings = config.Settings() + results = ingest_configured_sources() + finally: + config.settings = original_settings + + assert results[0]["source"] == "vault" + assert results[0]["status"] == "missing" + assert results[1]["source"] == "drive" + assert results[1]["status"] == "disabled" + + +def test_ingest_configured_sources_uses_ingest_folder(tmp_path, monkeypatch): + vault_dir = tmp_path / "vault" + drive_dir = tmp_path / "drive" + vault_dir.mkdir() + drive_dir.mkdir() + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + + calls = [] + + def fake_ingest_folder(path, purge_deleted=True): + calls.append((str(path), purge_deleted)) + return [{"file": str(path / "note.md"), "status": "ingested"}] + + original_settings = config.settings + try: + config.settings = config.Settings() + monkeypatch.setattr("atocore.ingestion.pipeline.ingest_folder", fake_ingest_folder) + results = ingest_configured_sources() + finally: + config.settings = original_settings + + assert len(calls) == 2 + assert all(purge_deleted is False for _, purge_deleted in calls) + assert results[0]["status"] == "ingested" + assert "results" in results[0]