Add Dalidou storage foundation and deployment prep

This commit is contained in:
2026-04-05 18:33:52 -04:00
parent b0889b3925
commit 6bfa1fcc37
19 changed files with 679 additions and 8 deletions

View File

@@ -5,6 +5,7 @@ from pathlib import Path
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
import atocore.config as _config
from atocore.context.builder import (
build_context,
get_last_context_pack,
@@ -16,7 +17,13 @@ from atocore.context.project_state import (
invalidate_state,
set_state,
)
from atocore.ingestion.pipeline import ingest_file, ingest_folder, get_ingestion_stats
from atocore.ingestion.pipeline import (
get_ingestion_stats,
get_source_status,
ingest_configured_sources,
ingest_file,
ingest_folder,
)
from atocore.memory.service import (
MEMORY_TYPES,
create_memory,
@@ -44,6 +51,10 @@ class IngestResponse(BaseModel):
results: list[dict]
class IngestSourcesResponse(BaseModel):
results: list[dict]
class QueryRequest(BaseModel):
prompt: str
top_k: int = 10
@@ -126,6 +137,17 @@ def api_ingest(req: IngestRequest) -> IngestResponse:
return IngestResponse(results=results)
@router.post("/ingest/sources", response_model=IngestSourcesResponse)
def api_ingest_sources() -> IngestSourcesResponse:
"""Ingest enabled configured source directories."""
try:
results = ingest_configured_sources()
except Exception as e:
log.error("ingest_sources_failed", error=str(e))
raise HTTPException(status_code=500, detail=f"Configured source ingestion failed: {e}")
return IngestSourcesResponse(results=results)
@router.post("/query", response_model=QueryResponse)
def api_query(req: QueryRequest) -> QueryResponse:
"""Retrieve relevant chunks for a prompt."""
@@ -304,10 +326,34 @@ def api_invalidate_project_state(req: ProjectStateInvalidateRequest) -> dict:
def api_health() -> dict:
"""Health check."""
store = get_vector_store()
source_status = get_source_status()
return {
"status": "ok",
"version": "0.1.0",
"vectors_count": store.count,
"env": _config.settings.env,
"machine_paths": {
"db_path": str(_config.settings.db_path),
"chroma_path": str(_config.settings.chroma_path),
"log_dir": str(_config.settings.resolved_log_dir),
"backup_dir": str(_config.settings.resolved_backup_dir),
"run_dir": str(_config.settings.resolved_run_dir),
},
"sources_ready": all(
(not source["enabled"]) or (source["exists"] and source["is_dir"])
for source in source_status
),
"source_status": source_status,
}
@router.get("/sources")
def api_sources() -> dict:
"""Return configured ingestion source directories and readiness."""
return {
"sources": get_source_status(),
"vault_enabled": _config.settings.source_vault_enabled,
"drive_enabled": _config.settings.source_drive_enabled,
}

View File

@@ -6,8 +6,21 @@ from pydantic_settings import BaseSettings
class Settings(BaseSettings):
env: str = "development"
debug: bool = False
log_level: str = "INFO"
data_dir: Path = Path("./data")
db_dir: Path | None = None
chroma_dir: Path | None = None
cache_dir: Path | None = None
tmp_dir: Path | None = None
vault_source_dir: Path = Path("./sources/vault")
drive_source_dir: Path = Path("./sources/drive")
source_vault_enabled: bool = True
source_drive_enabled: bool = True
log_dir: Path = Path("./logs")
backup_dir: Path = Path("./backups")
run_dir: Path = Path("./run")
host: str = "127.0.0.1"
port: int = 8100
@@ -29,11 +42,100 @@ class Settings(BaseSettings):
@property
def db_path(self) -> Path:
return self.data_dir / "atocore.db"
legacy_path = self.resolved_data_dir / "atocore.db"
if self.db_dir is not None:
return self.resolved_db_dir / "atocore.db"
if legacy_path.exists():
return legacy_path
return self.resolved_db_dir / "atocore.db"
@property
def chroma_path(self) -> Path:
return self.data_dir / "chroma"
return self._resolve_path(self.chroma_dir or (self.resolved_data_dir / "chroma"))
@property
def cache_path(self) -> Path:
return self._resolve_path(self.cache_dir or (self.resolved_data_dir / "cache"))
@property
def tmp_path(self) -> Path:
return self._resolve_path(self.tmp_dir or (self.resolved_data_dir / "tmp"))
@property
def resolved_data_dir(self) -> Path:
return self._resolve_path(self.data_dir)
@property
def resolved_db_dir(self) -> Path:
return self._resolve_path(self.db_dir or (self.resolved_data_dir / "db"))
@property
def resolved_vault_source_dir(self) -> Path:
return self._resolve_path(self.vault_source_dir)
@property
def resolved_drive_source_dir(self) -> Path:
return self._resolve_path(self.drive_source_dir)
@property
def resolved_log_dir(self) -> Path:
return self._resolve_path(self.log_dir)
@property
def resolved_backup_dir(self) -> Path:
return self._resolve_path(self.backup_dir)
@property
def resolved_run_dir(self) -> Path:
if self.run_dir == Path("./run"):
return self._resolve_path(self.resolved_data_dir.parent / "run")
return self._resolve_path(self.run_dir)
@property
def machine_dirs(self) -> list[Path]:
return [
self.db_path.parent,
self.chroma_path,
self.cache_path,
self.tmp_path,
self.resolved_log_dir,
self.resolved_backup_dir,
self.resolved_run_dir,
]
@property
def source_specs(self) -> list[dict[str, object]]:
return [
{
"name": "vault",
"enabled": self.source_vault_enabled,
"path": self.resolved_vault_source_dir,
"read_only": True,
},
{
"name": "drive",
"enabled": self.source_drive_enabled,
"path": self.resolved_drive_source_dir,
"read_only": True,
},
]
@property
def source_dirs(self) -> list[Path]:
return [spec["path"] for spec in self.source_specs if spec["enabled"]]
def _resolve_path(self, path: Path) -> Path:
return path.expanduser().resolve(strict=False)
settings = Settings()
def ensure_runtime_dirs() -> None:
"""Create writable runtime directories for machine state and logs.
Source directories are intentionally excluded because they are treated as
read-only ingestion inputs by convention.
"""
for directory in settings.machine_dirs:
directory.mkdir(parents=True, exist_ok=True)

View File

@@ -6,6 +6,7 @@ import time
import uuid
from pathlib import Path
import atocore.config as _config
from atocore.ingestion.chunker import chunk_markdown
from atocore.ingestion.parser import parse_markdown
from atocore.models.database import get_connection
@@ -189,6 +190,52 @@ def ingest_folder(folder_path: Path, purge_deleted: bool = True) -> list[dict]:
return results
def get_source_status() -> list[dict]:
"""Describe configured source directories and their readiness."""
sources = []
for spec in _config.settings.source_specs:
path = spec["path"]
assert isinstance(path, Path)
sources.append(
{
"name": spec["name"],
"enabled": spec["enabled"],
"path": str(path),
"exists": path.exists(),
"is_dir": path.is_dir(),
"read_only": spec["read_only"],
}
)
return sources
def ingest_configured_sources(purge_deleted: bool = False) -> list[dict]:
"""Ingest enabled source directories declared in config.
Purge is disabled by default here because sources are intended to be
read-only inputs and should not be treated as the primary writable state.
"""
results = []
for source in get_source_status():
if not source["enabled"]:
results.append({"source": source["name"], "status": "disabled", "path": source["path"]})
continue
if not source["exists"] or not source["is_dir"]:
results.append({"source": source["name"], "status": "missing", "path": source["path"]})
continue
folder_results = ingest_folder(Path(source["path"]), purge_deleted=purge_deleted)
results.append(
{
"source": source["name"],
"status": "ingested",
"path": source["path"],
"results": folder_results,
}
)
return results
def get_ingestion_stats() -> dict:
"""Return ingestion statistics."""
with get_connection() as conn:

View File

@@ -5,8 +5,9 @@ from fastapi import FastAPI
from atocore.api.routes import router
import atocore.config as _config
from atocore.context.project_state import init_project_state_schema
from atocore.ingestion.pipeline import get_source_status
from atocore.models.database import init_db
from atocore.observability.logger import setup_logging
from atocore.observability.logger import get_logger, setup_logging
app = FastAPI(
title="AtoCore",
@@ -15,13 +16,22 @@ app = FastAPI(
)
app.include_router(router)
log = get_logger("main")
@app.on_event("startup")
def startup():
setup_logging()
_config.ensure_runtime_dirs()
init_db()
init_project_state_schema()
log.info(
"startup_ready",
env=_config.settings.env,
db_path=str(_config.settings.db_path),
chroma_path=str(_config.settings.chroma_path),
source_status=get_source_status(),
)
if __name__ == "__main__":

View File

@@ -72,7 +72,7 @@ CREATE INDEX IF NOT EXISTS idx_interactions_project ON interactions(project_id);
def _ensure_data_dir() -> None:
_config.settings.data_dir.mkdir(parents=True, exist_ok=True)
_config.ensure_runtime_dirs()
def init_db() -> None:

View File

@@ -16,15 +16,18 @@ _LOG_LEVELS = {
def setup_logging() -> None:
"""Configure structlog with JSON output."""
log_level = "DEBUG" if _config.settings.debug else "INFO"
renderer = (
structlog.dev.ConsoleRenderer()
if _config.settings.debug
else structlog.processors.JSONRenderer()
)
structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.dev.ConsoleRenderer()
if settings.debug
else structlog.processors.JSONRenderer(),
renderer,
],
wrapper_class=structlog.make_filtering_bound_logger(
_LOG_LEVELS.get(log_level, logging.INFO)