Add Dalidou storage foundation and deployment prep
This commit is contained in:
9
.dockerignore
Normal file
9
.dockerignore
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
.git
|
||||||
|
.pytest_cache
|
||||||
|
.coverage
|
||||||
|
.claude
|
||||||
|
data
|
||||||
|
__pycache__
|
||||||
|
*.pyc
|
||||||
|
tests
|
||||||
|
docs
|
||||||
13
.env.example
13
.env.example
@@ -1,5 +1,18 @@
|
|||||||
|
ATOCORE_ENV=development
|
||||||
ATOCORE_DEBUG=false
|
ATOCORE_DEBUG=false
|
||||||
|
ATOCORE_LOG_LEVEL=INFO
|
||||||
ATOCORE_DATA_DIR=./data
|
ATOCORE_DATA_DIR=./data
|
||||||
|
ATOCORE_DB_DIR=
|
||||||
|
ATOCORE_CHROMA_DIR=
|
||||||
|
ATOCORE_CACHE_DIR=
|
||||||
|
ATOCORE_TMP_DIR=
|
||||||
|
ATOCORE_VAULT_SOURCE_DIR=./sources/vault
|
||||||
|
ATOCORE_DRIVE_SOURCE_DIR=./sources/drive
|
||||||
|
ATOCORE_SOURCE_VAULT_ENABLED=true
|
||||||
|
ATOCORE_SOURCE_DRIVE_ENABLED=true
|
||||||
|
ATOCORE_LOG_DIR=./logs
|
||||||
|
ATOCORE_BACKUP_DIR=./backups
|
||||||
|
ATOCORE_RUN_DIR=./run
|
||||||
ATOCORE_HOST=127.0.0.1
|
ATOCORE_HOST=127.0.0.1
|
||||||
ATOCORE_PORT=8100
|
ATOCORE_PORT=8100
|
||||||
ATOCORE_EMBEDDING_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
ATOCORE_EMBEDDING_MODEL=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,3 +10,4 @@ htmlcov/
|
|||||||
.coverage
|
.coverage
|
||||||
venv/
|
venv/
|
||||||
.venv/
|
.venv/
|
||||||
|
.claude/
|
||||||
|
|||||||
37
AGENTS.md
Normal file
37
AGENTS.md
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
# AGENTS.md
|
||||||
|
|
||||||
|
## Project role
|
||||||
|
This repository is AtoCore, the runtime and machine-memory layer of the Ato ecosystem.
|
||||||
|
|
||||||
|
## Ecosystem definitions
|
||||||
|
- AtoCore = app/runtime/API/ingestion/retrieval/context builder/machine DB logic
|
||||||
|
- AtoMind = future intelligence layer for promotion, reflection, conflict handling, trust decisions
|
||||||
|
- AtoVault = human-readable memory source, intended for Obsidian
|
||||||
|
- AtoDrive = trusted operational project source, higher trust than general vault notes
|
||||||
|
|
||||||
|
## Storage principles
|
||||||
|
- Human-readable source layers and machine operational storage must remain separate
|
||||||
|
- AtoVault is not the live vector database location
|
||||||
|
- AtoDrive is not the live vector database location
|
||||||
|
- Machine operational storage includes SQLite, vector store, indexes, embeddings, and runtime metadata
|
||||||
|
- The machine DB is derived operational state, not the primary human source of truth
|
||||||
|
|
||||||
|
## Deployment principles
|
||||||
|
- Dalidou is the canonical host for AtoCore service and machine database
|
||||||
|
- OpenClaw on the T420 should consume AtoCore over API/network/Tailscale
|
||||||
|
- Do not design around Syncthing for the live SQLite/vector DB
|
||||||
|
- Prefer one canonical running service over multi-node live DB replication
|
||||||
|
|
||||||
|
## Coding guidance
|
||||||
|
- Keep path handling explicit and configurable via environment variables
|
||||||
|
- Do not hard-code machine-specific absolute paths
|
||||||
|
- Keep implementation small, testable, and reversible
|
||||||
|
- Preserve current working behavior unless a change is necessary
|
||||||
|
- Add or update tests when changing config, storage, or path logic
|
||||||
|
|
||||||
|
## Change policy
|
||||||
|
Before large refactors:
|
||||||
|
1. explain the architectural reason
|
||||||
|
2. propose the smallest safe batch
|
||||||
|
3. implement incrementally
|
||||||
|
4. summarize changed files and migration impact
|
||||||
20
Dockerfile
Normal file
20
Dockerfile
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends build-essential curl git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY pyproject.toml README.md requirements.txt requirements-dev.txt ./
|
||||||
|
COPY src ./src
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir --upgrade pip \
|
||||||
|
&& pip install --no-cache-dir .
|
||||||
|
|
||||||
|
EXPOSE 8100
|
||||||
|
|
||||||
|
CMD ["python", "-m", "uvicorn", "atocore.main:app", "--host", "0.0.0.0", "--port", "8100"]
|
||||||
19
deploy/dalidou/.env.example
Normal file
19
deploy/dalidou/.env.example
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
ATOCORE_ENV=production
|
||||||
|
ATOCORE_DEBUG=false
|
||||||
|
ATOCORE_LOG_LEVEL=INFO
|
||||||
|
ATOCORE_HOST=0.0.0.0
|
||||||
|
ATOCORE_PORT=8100
|
||||||
|
|
||||||
|
ATOCORE_DATA_DIR=/srv/storage/atocore/data
|
||||||
|
ATOCORE_DB_DIR=/srv/storage/atocore/data/db
|
||||||
|
ATOCORE_CHROMA_DIR=/srv/storage/atocore/data/chroma
|
||||||
|
ATOCORE_CACHE_DIR=/srv/storage/atocore/data/cache
|
||||||
|
ATOCORE_TMP_DIR=/srv/storage/atocore/data/tmp
|
||||||
|
ATOCORE_LOG_DIR=/srv/storage/atocore/logs
|
||||||
|
ATOCORE_BACKUP_DIR=/srv/storage/atocore/backups
|
||||||
|
ATOCORE_RUN_DIR=/srv/storage/atocore/run
|
||||||
|
|
||||||
|
ATOCORE_VAULT_SOURCE_DIR=/srv/storage/atocore/sources/vault
|
||||||
|
ATOCORE_DRIVE_SOURCE_DIR=/srv/storage/atocore/sources/drive
|
||||||
|
ATOCORE_SOURCE_VAULT_ENABLED=true
|
||||||
|
ATOCORE_SOURCE_DRIVE_ENABLED=true
|
||||||
27
deploy/dalidou/docker-compose.yml
Normal file
27
deploy/dalidou/docker-compose.yml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
services:
|
||||||
|
atocore:
|
||||||
|
build:
|
||||||
|
context: ../../
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: atocore
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "${ATOCORE_PORT:-8100}:8100"
|
||||||
|
env_file:
|
||||||
|
- .env
|
||||||
|
volumes:
|
||||||
|
- ${ATOCORE_DB_DIR}:${ATOCORE_DB_DIR}
|
||||||
|
- ${ATOCORE_CHROMA_DIR}:${ATOCORE_CHROMA_DIR}
|
||||||
|
- ${ATOCORE_CACHE_DIR}:${ATOCORE_CACHE_DIR}
|
||||||
|
- ${ATOCORE_TMP_DIR}:${ATOCORE_TMP_DIR}
|
||||||
|
- ${ATOCORE_LOG_DIR}:${ATOCORE_LOG_DIR}
|
||||||
|
- ${ATOCORE_BACKUP_DIR}:${ATOCORE_BACKUP_DIR}
|
||||||
|
- ${ATOCORE_RUN_DIR}:${ATOCORE_RUN_DIR}
|
||||||
|
- ${ATOCORE_VAULT_SOURCE_DIR}:${ATOCORE_VAULT_SOURCE_DIR}:ro
|
||||||
|
- ${ATOCORE_DRIVE_SOURCE_DIR}:${ATOCORE_DRIVE_SOURCE_DIR}:ro
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8100/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 5
|
||||||
|
start_period: 20s
|
||||||
77
docs/dalidou-deployment.md
Normal file
77
docs/dalidou-deployment.md
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# Dalidou Deployment
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
Deploy AtoCore on Dalidou as the canonical runtime and machine-memory host.
|
||||||
|
|
||||||
|
## Model
|
||||||
|
|
||||||
|
- Dalidou hosts the canonical AtoCore service.
|
||||||
|
- OpenClaw on the T420 consumes AtoCore over network/Tailscale API.
|
||||||
|
- `sources/vault` and `sources/drive` are read-only inputs by convention.
|
||||||
|
- SQLite/Chroma machine state stays on Dalidou and is not treated as a sync peer.
|
||||||
|
|
||||||
|
## Directory layout
|
||||||
|
|
||||||
|
```text
|
||||||
|
/srv/storage/atocore/
|
||||||
|
app/ # deployed repo checkout
|
||||||
|
data/
|
||||||
|
db/
|
||||||
|
chroma/
|
||||||
|
cache/
|
||||||
|
tmp/
|
||||||
|
sources/
|
||||||
|
vault/
|
||||||
|
drive/
|
||||||
|
logs/
|
||||||
|
backups/
|
||||||
|
run/
|
||||||
|
```
|
||||||
|
|
||||||
|
## Compose workflow
|
||||||
|
|
||||||
|
The compose definition lives in:
|
||||||
|
|
||||||
|
```text
|
||||||
|
deploy/dalidou/docker-compose.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
The Dalidou environment file should be copied to:
|
||||||
|
|
||||||
|
```text
|
||||||
|
deploy/dalidou/.env
|
||||||
|
```
|
||||||
|
|
||||||
|
starting from:
|
||||||
|
|
||||||
|
```text
|
||||||
|
deploy/dalidou/.env.example
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment steps
|
||||||
|
|
||||||
|
1. Place the repository under `/srv/storage/atocore/app`.
|
||||||
|
2. Create the canonical directories listed above.
|
||||||
|
3. Copy `deploy/dalidou/.env.example` to `deploy/dalidou/.env`.
|
||||||
|
4. Adjust the source paths if your AtoVault/AtoDrive mirrors live elsewhere.
|
||||||
|
5. Run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /srv/storage/atocore/app/deploy/dalidou
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
6. Validate:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://127.0.0.1:8100/health
|
||||||
|
curl http://127.0.0.1:8100/sources
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deferred
|
||||||
|
|
||||||
|
- backup automation
|
||||||
|
- restore/snapshot tooling
|
||||||
|
- reverse proxy / TLS exposure
|
||||||
|
- automated source ingestion job
|
||||||
|
- OpenClaw client wiring
|
||||||
61
docs/dalidou-storage-migration.md
Normal file
61
docs/dalidou-storage-migration.md
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
# Dalidou Storage Migration
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
Establish Dalidou as the canonical AtoCore host while keeping human-readable
|
||||||
|
source layers separate from machine operational storage.
|
||||||
|
|
||||||
|
## Canonical layout
|
||||||
|
|
||||||
|
```text
|
||||||
|
/srv/atocore/
|
||||||
|
app/ # git checkout of this repository
|
||||||
|
data/ # machine operational state
|
||||||
|
db/
|
||||||
|
atocore.db
|
||||||
|
chroma/
|
||||||
|
cache/
|
||||||
|
tmp/
|
||||||
|
sources/
|
||||||
|
vault/ # AtoVault input, read-only by convention
|
||||||
|
drive/ # AtoDrive input, read-only by convention
|
||||||
|
logs/
|
||||||
|
backups/
|
||||||
|
run/
|
||||||
|
config/
|
||||||
|
.env
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment variables
|
||||||
|
|
||||||
|
Suggested Dalidou values:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ATOCORE_ENV=production
|
||||||
|
ATOCORE_DATA_DIR=/srv/atocore/data
|
||||||
|
ATOCORE_DB_DIR=/srv/atocore/data/db
|
||||||
|
ATOCORE_CHROMA_DIR=/srv/atocore/data/chroma
|
||||||
|
ATOCORE_CACHE_DIR=/srv/atocore/data/cache
|
||||||
|
ATOCORE_TMP_DIR=/srv/atocore/data/tmp
|
||||||
|
ATOCORE_VAULT_SOURCE_DIR=/srv/atocore/sources/vault
|
||||||
|
ATOCORE_DRIVE_SOURCE_DIR=/srv/atocore/sources/drive
|
||||||
|
ATOCORE_LOG_DIR=/srv/atocore/logs
|
||||||
|
ATOCORE_BACKUP_DIR=/srv/atocore/backups
|
||||||
|
ATOCORE_RUN_DIR=/srv/atocore/run
|
||||||
|
```
|
||||||
|
|
||||||
|
## Migration notes
|
||||||
|
|
||||||
|
- Existing local installs remain backward-compatible.
|
||||||
|
- If `data/atocore.db` already exists, AtoCore continues using it.
|
||||||
|
- Fresh installs default to `data/db/atocore.db`.
|
||||||
|
- Source directories are inputs only; AtoCore should ingest from them but not
|
||||||
|
treat them as writable runtime state.
|
||||||
|
- Avoid syncing live SQLite/Chroma state between Dalidou and other machines.
|
||||||
|
Prefer one canonical running service and API access from OpenClaw.
|
||||||
|
|
||||||
|
## Deferred work
|
||||||
|
|
||||||
|
- service manager wiring
|
||||||
|
- backup/snapshot procedures
|
||||||
|
- automated source registration jobs
|
||||||
|
- OpenClaw integration
|
||||||
@@ -5,6 +5,7 @@ from pathlib import Path
|
|||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
import atocore.config as _config
|
||||||
from atocore.context.builder import (
|
from atocore.context.builder import (
|
||||||
build_context,
|
build_context,
|
||||||
get_last_context_pack,
|
get_last_context_pack,
|
||||||
@@ -16,7 +17,13 @@ from atocore.context.project_state import (
|
|||||||
invalidate_state,
|
invalidate_state,
|
||||||
set_state,
|
set_state,
|
||||||
)
|
)
|
||||||
from atocore.ingestion.pipeline import ingest_file, ingest_folder, get_ingestion_stats
|
from atocore.ingestion.pipeline import (
|
||||||
|
get_ingestion_stats,
|
||||||
|
get_source_status,
|
||||||
|
ingest_configured_sources,
|
||||||
|
ingest_file,
|
||||||
|
ingest_folder,
|
||||||
|
)
|
||||||
from atocore.memory.service import (
|
from atocore.memory.service import (
|
||||||
MEMORY_TYPES,
|
MEMORY_TYPES,
|
||||||
create_memory,
|
create_memory,
|
||||||
@@ -44,6 +51,10 @@ class IngestResponse(BaseModel):
|
|||||||
results: list[dict]
|
results: list[dict]
|
||||||
|
|
||||||
|
|
||||||
|
class IngestSourcesResponse(BaseModel):
|
||||||
|
results: list[dict]
|
||||||
|
|
||||||
|
|
||||||
class QueryRequest(BaseModel):
|
class QueryRequest(BaseModel):
|
||||||
prompt: str
|
prompt: str
|
||||||
top_k: int = 10
|
top_k: int = 10
|
||||||
@@ -126,6 +137,17 @@ def api_ingest(req: IngestRequest) -> IngestResponse:
|
|||||||
return IngestResponse(results=results)
|
return IngestResponse(results=results)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/ingest/sources", response_model=IngestSourcesResponse)
|
||||||
|
def api_ingest_sources() -> IngestSourcesResponse:
|
||||||
|
"""Ingest enabled configured source directories."""
|
||||||
|
try:
|
||||||
|
results = ingest_configured_sources()
|
||||||
|
except Exception as e:
|
||||||
|
log.error("ingest_sources_failed", error=str(e))
|
||||||
|
raise HTTPException(status_code=500, detail=f"Configured source ingestion failed: {e}")
|
||||||
|
return IngestSourcesResponse(results=results)
|
||||||
|
|
||||||
|
|
||||||
@router.post("/query", response_model=QueryResponse)
|
@router.post("/query", response_model=QueryResponse)
|
||||||
def api_query(req: QueryRequest) -> QueryResponse:
|
def api_query(req: QueryRequest) -> QueryResponse:
|
||||||
"""Retrieve relevant chunks for a prompt."""
|
"""Retrieve relevant chunks for a prompt."""
|
||||||
@@ -304,10 +326,34 @@ def api_invalidate_project_state(req: ProjectStateInvalidateRequest) -> dict:
|
|||||||
def api_health() -> dict:
|
def api_health() -> dict:
|
||||||
"""Health check."""
|
"""Health check."""
|
||||||
store = get_vector_store()
|
store = get_vector_store()
|
||||||
|
source_status = get_source_status()
|
||||||
return {
|
return {
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"vectors_count": store.count,
|
"vectors_count": store.count,
|
||||||
|
"env": _config.settings.env,
|
||||||
|
"machine_paths": {
|
||||||
|
"db_path": str(_config.settings.db_path),
|
||||||
|
"chroma_path": str(_config.settings.chroma_path),
|
||||||
|
"log_dir": str(_config.settings.resolved_log_dir),
|
||||||
|
"backup_dir": str(_config.settings.resolved_backup_dir),
|
||||||
|
"run_dir": str(_config.settings.resolved_run_dir),
|
||||||
|
},
|
||||||
|
"sources_ready": all(
|
||||||
|
(not source["enabled"]) or (source["exists"] and source["is_dir"])
|
||||||
|
for source in source_status
|
||||||
|
),
|
||||||
|
"source_status": source_status,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/sources")
|
||||||
|
def api_sources() -> dict:
|
||||||
|
"""Return configured ingestion source directories and readiness."""
|
||||||
|
return {
|
||||||
|
"sources": get_source_status(),
|
||||||
|
"vault_enabled": _config.settings.source_vault_enabled,
|
||||||
|
"drive_enabled": _config.settings.source_drive_enabled,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,8 +6,21 @@ from pydantic_settings import BaseSettings
|
|||||||
|
|
||||||
|
|
||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
|
env: str = "development"
|
||||||
debug: bool = False
|
debug: bool = False
|
||||||
|
log_level: str = "INFO"
|
||||||
data_dir: Path = Path("./data")
|
data_dir: Path = Path("./data")
|
||||||
|
db_dir: Path | None = None
|
||||||
|
chroma_dir: Path | None = None
|
||||||
|
cache_dir: Path | None = None
|
||||||
|
tmp_dir: Path | None = None
|
||||||
|
vault_source_dir: Path = Path("./sources/vault")
|
||||||
|
drive_source_dir: Path = Path("./sources/drive")
|
||||||
|
source_vault_enabled: bool = True
|
||||||
|
source_drive_enabled: bool = True
|
||||||
|
log_dir: Path = Path("./logs")
|
||||||
|
backup_dir: Path = Path("./backups")
|
||||||
|
run_dir: Path = Path("./run")
|
||||||
host: str = "127.0.0.1"
|
host: str = "127.0.0.1"
|
||||||
port: int = 8100
|
port: int = 8100
|
||||||
|
|
||||||
@@ -29,11 +42,100 @@ class Settings(BaseSettings):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def db_path(self) -> Path:
|
def db_path(self) -> Path:
|
||||||
return self.data_dir / "atocore.db"
|
legacy_path = self.resolved_data_dir / "atocore.db"
|
||||||
|
if self.db_dir is not None:
|
||||||
|
return self.resolved_db_dir / "atocore.db"
|
||||||
|
if legacy_path.exists():
|
||||||
|
return legacy_path
|
||||||
|
return self.resolved_db_dir / "atocore.db"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def chroma_path(self) -> Path:
|
def chroma_path(self) -> Path:
|
||||||
return self.data_dir / "chroma"
|
return self._resolve_path(self.chroma_dir or (self.resolved_data_dir / "chroma"))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cache_path(self) -> Path:
|
||||||
|
return self._resolve_path(self.cache_dir or (self.resolved_data_dir / "cache"))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def tmp_path(self) -> Path:
|
||||||
|
return self._resolve_path(self.tmp_dir or (self.resolved_data_dir / "tmp"))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolved_data_dir(self) -> Path:
|
||||||
|
return self._resolve_path(self.data_dir)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolved_db_dir(self) -> Path:
|
||||||
|
return self._resolve_path(self.db_dir or (self.resolved_data_dir / "db"))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolved_vault_source_dir(self) -> Path:
|
||||||
|
return self._resolve_path(self.vault_source_dir)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolved_drive_source_dir(self) -> Path:
|
||||||
|
return self._resolve_path(self.drive_source_dir)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolved_log_dir(self) -> Path:
|
||||||
|
return self._resolve_path(self.log_dir)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolved_backup_dir(self) -> Path:
|
||||||
|
return self._resolve_path(self.backup_dir)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolved_run_dir(self) -> Path:
|
||||||
|
if self.run_dir == Path("./run"):
|
||||||
|
return self._resolve_path(self.resolved_data_dir.parent / "run")
|
||||||
|
return self._resolve_path(self.run_dir)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def machine_dirs(self) -> list[Path]:
|
||||||
|
return [
|
||||||
|
self.db_path.parent,
|
||||||
|
self.chroma_path,
|
||||||
|
self.cache_path,
|
||||||
|
self.tmp_path,
|
||||||
|
self.resolved_log_dir,
|
||||||
|
self.resolved_backup_dir,
|
||||||
|
self.resolved_run_dir,
|
||||||
|
]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def source_specs(self) -> list[dict[str, object]]:
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"name": "vault",
|
||||||
|
"enabled": self.source_vault_enabled,
|
||||||
|
"path": self.resolved_vault_source_dir,
|
||||||
|
"read_only": True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "drive",
|
||||||
|
"enabled": self.source_drive_enabled,
|
||||||
|
"path": self.resolved_drive_source_dir,
|
||||||
|
"read_only": True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def source_dirs(self) -> list[Path]:
|
||||||
|
return [spec["path"] for spec in self.source_specs if spec["enabled"]]
|
||||||
|
|
||||||
|
def _resolve_path(self, path: Path) -> Path:
|
||||||
|
return path.expanduser().resolve(strict=False)
|
||||||
|
|
||||||
|
|
||||||
settings = Settings()
|
settings = Settings()
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_runtime_dirs() -> None:
|
||||||
|
"""Create writable runtime directories for machine state and logs.
|
||||||
|
|
||||||
|
Source directories are intentionally excluded because they are treated as
|
||||||
|
read-only ingestion inputs by convention.
|
||||||
|
"""
|
||||||
|
for directory in settings.machine_dirs:
|
||||||
|
directory.mkdir(parents=True, exist_ok=True)
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import time
|
|||||||
import uuid
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import atocore.config as _config
|
||||||
from atocore.ingestion.chunker import chunk_markdown
|
from atocore.ingestion.chunker import chunk_markdown
|
||||||
from atocore.ingestion.parser import parse_markdown
|
from atocore.ingestion.parser import parse_markdown
|
||||||
from atocore.models.database import get_connection
|
from atocore.models.database import get_connection
|
||||||
@@ -189,6 +190,52 @@ def ingest_folder(folder_path: Path, purge_deleted: bool = True) -> list[dict]:
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def get_source_status() -> list[dict]:
|
||||||
|
"""Describe configured source directories and their readiness."""
|
||||||
|
sources = []
|
||||||
|
for spec in _config.settings.source_specs:
|
||||||
|
path = spec["path"]
|
||||||
|
assert isinstance(path, Path)
|
||||||
|
sources.append(
|
||||||
|
{
|
||||||
|
"name": spec["name"],
|
||||||
|
"enabled": spec["enabled"],
|
||||||
|
"path": str(path),
|
||||||
|
"exists": path.exists(),
|
||||||
|
"is_dir": path.is_dir(),
|
||||||
|
"read_only": spec["read_only"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return sources
|
||||||
|
|
||||||
|
|
||||||
|
def ingest_configured_sources(purge_deleted: bool = False) -> list[dict]:
|
||||||
|
"""Ingest enabled source directories declared in config.
|
||||||
|
|
||||||
|
Purge is disabled by default here because sources are intended to be
|
||||||
|
read-only inputs and should not be treated as the primary writable state.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
for source in get_source_status():
|
||||||
|
if not source["enabled"]:
|
||||||
|
results.append({"source": source["name"], "status": "disabled", "path": source["path"]})
|
||||||
|
continue
|
||||||
|
if not source["exists"] or not source["is_dir"]:
|
||||||
|
results.append({"source": source["name"], "status": "missing", "path": source["path"]})
|
||||||
|
continue
|
||||||
|
|
||||||
|
folder_results = ingest_folder(Path(source["path"]), purge_deleted=purge_deleted)
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
"source": source["name"],
|
||||||
|
"status": "ingested",
|
||||||
|
"path": source["path"],
|
||||||
|
"results": folder_results,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
def get_ingestion_stats() -> dict:
|
def get_ingestion_stats() -> dict:
|
||||||
"""Return ingestion statistics."""
|
"""Return ingestion statistics."""
|
||||||
with get_connection() as conn:
|
with get_connection() as conn:
|
||||||
|
|||||||
@@ -5,8 +5,9 @@ from fastapi import FastAPI
|
|||||||
from atocore.api.routes import router
|
from atocore.api.routes import router
|
||||||
import atocore.config as _config
|
import atocore.config as _config
|
||||||
from atocore.context.project_state import init_project_state_schema
|
from atocore.context.project_state import init_project_state_schema
|
||||||
|
from atocore.ingestion.pipeline import get_source_status
|
||||||
from atocore.models.database import init_db
|
from atocore.models.database import init_db
|
||||||
from atocore.observability.logger import setup_logging
|
from atocore.observability.logger import get_logger, setup_logging
|
||||||
|
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
title="AtoCore",
|
title="AtoCore",
|
||||||
@@ -15,13 +16,22 @@ app = FastAPI(
|
|||||||
)
|
)
|
||||||
|
|
||||||
app.include_router(router)
|
app.include_router(router)
|
||||||
|
log = get_logger("main")
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
def startup():
|
def startup():
|
||||||
setup_logging()
|
setup_logging()
|
||||||
|
_config.ensure_runtime_dirs()
|
||||||
init_db()
|
init_db()
|
||||||
init_project_state_schema()
|
init_project_state_schema()
|
||||||
|
log.info(
|
||||||
|
"startup_ready",
|
||||||
|
env=_config.settings.env,
|
||||||
|
db_path=str(_config.settings.db_path),
|
||||||
|
chroma_path=str(_config.settings.chroma_path),
|
||||||
|
source_status=get_source_status(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ CREATE INDEX IF NOT EXISTS idx_interactions_project ON interactions(project_id);
|
|||||||
|
|
||||||
|
|
||||||
def _ensure_data_dir() -> None:
|
def _ensure_data_dir() -> None:
|
||||||
_config.settings.data_dir.mkdir(parents=True, exist_ok=True)
|
_config.ensure_runtime_dirs()
|
||||||
|
|
||||||
|
|
||||||
def init_db() -> None:
|
def init_db() -> None:
|
||||||
|
|||||||
@@ -16,15 +16,18 @@ _LOG_LEVELS = {
|
|||||||
def setup_logging() -> None:
|
def setup_logging() -> None:
|
||||||
"""Configure structlog with JSON output."""
|
"""Configure structlog with JSON output."""
|
||||||
log_level = "DEBUG" if _config.settings.debug else "INFO"
|
log_level = "DEBUG" if _config.settings.debug else "INFO"
|
||||||
|
renderer = (
|
||||||
|
structlog.dev.ConsoleRenderer()
|
||||||
|
if _config.settings.debug
|
||||||
|
else structlog.processors.JSONRenderer()
|
||||||
|
)
|
||||||
|
|
||||||
structlog.configure(
|
structlog.configure(
|
||||||
processors=[
|
processors=[
|
||||||
structlog.contextvars.merge_contextvars,
|
structlog.contextvars.merge_contextvars,
|
||||||
structlog.processors.add_log_level,
|
structlog.processors.add_log_level,
|
||||||
structlog.processors.TimeStamper(fmt="iso"),
|
structlog.processors.TimeStamper(fmt="iso"),
|
||||||
structlog.dev.ConsoleRenderer()
|
renderer,
|
||||||
if settings.debug
|
|
||||||
else structlog.processors.JSONRenderer(),
|
|
||||||
],
|
],
|
||||||
wrapper_class=structlog.make_filtering_bound_logger(
|
wrapper_class=structlog.make_filtering_bound_logger(
|
||||||
_LOG_LEVELS.get(log_level, logging.INFO)
|
_LOG_LEVELS.get(log_level, logging.INFO)
|
||||||
|
|||||||
48
tests/test_api_storage.py
Normal file
48
tests/test_api_storage.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
"""Tests for storage-related API readiness endpoints."""
|
||||||
|
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
import atocore.config as config
|
||||||
|
from atocore.main import app
|
||||||
|
|
||||||
|
|
||||||
|
def test_sources_endpoint_reports_configured_sources(tmp_data_dir, monkeypatch):
|
||||||
|
vault_dir = tmp_data_dir / "vault-source"
|
||||||
|
drive_dir = tmp_data_dir / "drive-source"
|
||||||
|
vault_dir.mkdir()
|
||||||
|
drive_dir.mkdir()
|
||||||
|
|
||||||
|
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
|
||||||
|
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
|
||||||
|
config.settings = config.Settings()
|
||||||
|
|
||||||
|
client = TestClient(app)
|
||||||
|
response = client.get("/sources")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
body = response.json()
|
||||||
|
assert body["vault_enabled"] is True
|
||||||
|
assert body["drive_enabled"] is True
|
||||||
|
assert len(body["sources"]) == 2
|
||||||
|
assert all(source["read_only"] for source in body["sources"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_health_endpoint_exposes_machine_paths_and_source_readiness(tmp_data_dir, monkeypatch):
|
||||||
|
vault_dir = tmp_data_dir / "vault-source"
|
||||||
|
drive_dir = tmp_data_dir / "drive-source"
|
||||||
|
vault_dir.mkdir()
|
||||||
|
drive_dir.mkdir()
|
||||||
|
|
||||||
|
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
|
||||||
|
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
|
||||||
|
config.settings = config.Settings()
|
||||||
|
|
||||||
|
client = TestClient(app)
|
||||||
|
response = client.get("/health")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
body = response.json()
|
||||||
|
assert body["status"] == "ok"
|
||||||
|
assert body["sources_ready"] is True
|
||||||
|
assert "db_path" in body["machine_paths"]
|
||||||
|
assert "run_dir" in body["machine_paths"]
|
||||||
63
tests/test_config.py
Normal file
63
tests/test_config.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
"""Tests for configuration and canonical path boundaries."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import atocore.config as config
|
||||||
|
|
||||||
|
|
||||||
|
def test_settings_resolve_canonical_directories(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("ATOCORE_DATA_DIR", str(tmp_path / "data"))
|
||||||
|
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(tmp_path / "vault-source"))
|
||||||
|
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(tmp_path / "drive-source"))
|
||||||
|
monkeypatch.setenv("ATOCORE_LOG_DIR", str(tmp_path / "logs"))
|
||||||
|
monkeypatch.setenv("ATOCORE_BACKUP_DIR", str(tmp_path / "backups"))
|
||||||
|
|
||||||
|
settings = config.Settings()
|
||||||
|
|
||||||
|
assert settings.db_path == (tmp_path / "data" / "db" / "atocore.db").resolve()
|
||||||
|
assert settings.chroma_path == (tmp_path / "data" / "chroma").resolve()
|
||||||
|
assert settings.cache_path == (tmp_path / "data" / "cache").resolve()
|
||||||
|
assert settings.tmp_path == (tmp_path / "data" / "tmp").resolve()
|
||||||
|
assert settings.resolved_vault_source_dir == (tmp_path / "vault-source").resolve()
|
||||||
|
assert settings.resolved_drive_source_dir == (tmp_path / "drive-source").resolve()
|
||||||
|
assert settings.resolved_log_dir == (tmp_path / "logs").resolve()
|
||||||
|
assert settings.resolved_backup_dir == (tmp_path / "backups").resolve()
|
||||||
|
assert settings.resolved_run_dir == (tmp_path / "run").resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def test_settings_keep_legacy_db_path_when_present(tmp_path, monkeypatch):
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
legacy_db = data_dir / "atocore.db"
|
||||||
|
legacy_db.write_text("", encoding="utf-8")
|
||||||
|
monkeypatch.setenv("ATOCORE_DATA_DIR", str(data_dir))
|
||||||
|
|
||||||
|
settings = config.Settings()
|
||||||
|
|
||||||
|
assert settings.db_path == legacy_db.resolve()
|
||||||
|
|
||||||
|
|
||||||
|
def test_ensure_runtime_dirs_creates_machine_dirs_only(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("ATOCORE_DATA_DIR", str(tmp_path / "data"))
|
||||||
|
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(tmp_path / "vault-source"))
|
||||||
|
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(tmp_path / "drive-source"))
|
||||||
|
monkeypatch.setenv("ATOCORE_LOG_DIR", str(tmp_path / "logs"))
|
||||||
|
monkeypatch.setenv("ATOCORE_BACKUP_DIR", str(tmp_path / "backups"))
|
||||||
|
|
||||||
|
original_settings = config.settings
|
||||||
|
try:
|
||||||
|
config.settings = config.Settings()
|
||||||
|
config.ensure_runtime_dirs()
|
||||||
|
|
||||||
|
assert config.settings.db_path.parent.exists()
|
||||||
|
assert config.settings.chroma_path.exists()
|
||||||
|
assert config.settings.cache_path.exists()
|
||||||
|
assert config.settings.tmp_path.exists()
|
||||||
|
assert config.settings.resolved_log_dir.exists()
|
||||||
|
assert config.settings.resolved_backup_dir.exists()
|
||||||
|
assert config.settings.resolved_run_dir.exists()
|
||||||
|
assert not config.settings.resolved_vault_source_dir.exists()
|
||||||
|
assert not config.settings.resolved_drive_source_dir.exists()
|
||||||
|
finally:
|
||||||
|
config.settings = original_settings
|
||||||
18
tests/test_logging.py
Normal file
18
tests/test_logging.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
"""Tests for logging configuration."""
|
||||||
|
|
||||||
|
from types import SimpleNamespace
|
||||||
|
|
||||||
|
import atocore.config as config
|
||||||
|
from atocore.observability.logger import setup_logging
|
||||||
|
|
||||||
|
|
||||||
|
def test_setup_logging_uses_dynamic_settings_without_name_error():
|
||||||
|
original_settings = config.settings
|
||||||
|
try:
|
||||||
|
config.settings = SimpleNamespace(debug=False)
|
||||||
|
setup_logging()
|
||||||
|
|
||||||
|
config.settings = SimpleNamespace(debug=True)
|
||||||
|
setup_logging()
|
||||||
|
finally:
|
||||||
|
config.settings = original_settings
|
||||||
70
tests/test_sources.py
Normal file
70
tests/test_sources.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""Tests for configured source registration and readiness."""
|
||||||
|
|
||||||
|
import atocore.config as config
|
||||||
|
from atocore.ingestion.pipeline import get_source_status, ingest_configured_sources
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_source_status_reports_read_only_inputs(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(tmp_path / "vault"))
|
||||||
|
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(tmp_path / "drive"))
|
||||||
|
monkeypatch.setenv("ATOCORE_SOURCE_DRIVE_ENABLED", "false")
|
||||||
|
|
||||||
|
original_settings = config.settings
|
||||||
|
try:
|
||||||
|
config.settings = config.Settings()
|
||||||
|
status = get_source_status()
|
||||||
|
finally:
|
||||||
|
config.settings = original_settings
|
||||||
|
|
||||||
|
assert status[0]["name"] == "vault"
|
||||||
|
assert status[0]["enabled"] is True
|
||||||
|
assert status[0]["read_only"] is True
|
||||||
|
assert status[0]["exists"] is False
|
||||||
|
assert status[1]["name"] == "drive"
|
||||||
|
assert status[1]["enabled"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_ingest_configured_sources_reports_missing_and_disabled(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(tmp_path / "vault"))
|
||||||
|
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(tmp_path / "drive"))
|
||||||
|
monkeypatch.setenv("ATOCORE_SOURCE_DRIVE_ENABLED", "false")
|
||||||
|
|
||||||
|
original_settings = config.settings
|
||||||
|
try:
|
||||||
|
config.settings = config.Settings()
|
||||||
|
results = ingest_configured_sources()
|
||||||
|
finally:
|
||||||
|
config.settings = original_settings
|
||||||
|
|
||||||
|
assert results[0]["source"] == "vault"
|
||||||
|
assert results[0]["status"] == "missing"
|
||||||
|
assert results[1]["source"] == "drive"
|
||||||
|
assert results[1]["status"] == "disabled"
|
||||||
|
|
||||||
|
|
||||||
|
def test_ingest_configured_sources_uses_ingest_folder(tmp_path, monkeypatch):
|
||||||
|
vault_dir = tmp_path / "vault"
|
||||||
|
drive_dir = tmp_path / "drive"
|
||||||
|
vault_dir.mkdir()
|
||||||
|
drive_dir.mkdir()
|
||||||
|
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
|
||||||
|
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
|
||||||
|
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_ingest_folder(path, purge_deleted=True):
|
||||||
|
calls.append((str(path), purge_deleted))
|
||||||
|
return [{"file": str(path / "note.md"), "status": "ingested"}]
|
||||||
|
|
||||||
|
original_settings = config.settings
|
||||||
|
try:
|
||||||
|
config.settings = config.Settings()
|
||||||
|
monkeypatch.setattr("atocore.ingestion.pipeline.ingest_folder", fake_ingest_folder)
|
||||||
|
results = ingest_configured_sources()
|
||||||
|
finally:
|
||||||
|
config.settings = original_settings
|
||||||
|
|
||||||
|
assert len(calls) == 2
|
||||||
|
assert all(purge_deleted is False for _, purge_deleted in calls)
|
||||||
|
assert results[0]["status"] == "ingested"
|
||||||
|
assert "results" in results[0]
|
||||||
Reference in New Issue
Block a user