Add Dalidou storage foundation and deployment prep
This commit is contained in:
@@ -5,6 +5,7 @@ from pathlib import Path
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
import atocore.config as _config
|
||||
from atocore.context.builder import (
|
||||
build_context,
|
||||
get_last_context_pack,
|
||||
@@ -16,7 +17,13 @@ from atocore.context.project_state import (
|
||||
invalidate_state,
|
||||
set_state,
|
||||
)
|
||||
from atocore.ingestion.pipeline import ingest_file, ingest_folder, get_ingestion_stats
|
||||
from atocore.ingestion.pipeline import (
|
||||
get_ingestion_stats,
|
||||
get_source_status,
|
||||
ingest_configured_sources,
|
||||
ingest_file,
|
||||
ingest_folder,
|
||||
)
|
||||
from atocore.memory.service import (
|
||||
MEMORY_TYPES,
|
||||
create_memory,
|
||||
@@ -44,6 +51,10 @@ class IngestResponse(BaseModel):
|
||||
results: list[dict]
|
||||
|
||||
|
||||
class IngestSourcesResponse(BaseModel):
|
||||
results: list[dict]
|
||||
|
||||
|
||||
class QueryRequest(BaseModel):
|
||||
prompt: str
|
||||
top_k: int = 10
|
||||
@@ -126,6 +137,17 @@ def api_ingest(req: IngestRequest) -> IngestResponse:
|
||||
return IngestResponse(results=results)
|
||||
|
||||
|
||||
@router.post("/ingest/sources", response_model=IngestSourcesResponse)
|
||||
def api_ingest_sources() -> IngestSourcesResponse:
|
||||
"""Ingest enabled configured source directories."""
|
||||
try:
|
||||
results = ingest_configured_sources()
|
||||
except Exception as e:
|
||||
log.error("ingest_sources_failed", error=str(e))
|
||||
raise HTTPException(status_code=500, detail=f"Configured source ingestion failed: {e}")
|
||||
return IngestSourcesResponse(results=results)
|
||||
|
||||
|
||||
@router.post("/query", response_model=QueryResponse)
|
||||
def api_query(req: QueryRequest) -> QueryResponse:
|
||||
"""Retrieve relevant chunks for a prompt."""
|
||||
@@ -304,10 +326,34 @@ def api_invalidate_project_state(req: ProjectStateInvalidateRequest) -> dict:
|
||||
def api_health() -> dict:
|
||||
"""Health check."""
|
||||
store = get_vector_store()
|
||||
source_status = get_source_status()
|
||||
return {
|
||||
"status": "ok",
|
||||
"version": "0.1.0",
|
||||
"vectors_count": store.count,
|
||||
"env": _config.settings.env,
|
||||
"machine_paths": {
|
||||
"db_path": str(_config.settings.db_path),
|
||||
"chroma_path": str(_config.settings.chroma_path),
|
||||
"log_dir": str(_config.settings.resolved_log_dir),
|
||||
"backup_dir": str(_config.settings.resolved_backup_dir),
|
||||
"run_dir": str(_config.settings.resolved_run_dir),
|
||||
},
|
||||
"sources_ready": all(
|
||||
(not source["enabled"]) or (source["exists"] and source["is_dir"])
|
||||
for source in source_status
|
||||
),
|
||||
"source_status": source_status,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/sources")
|
||||
def api_sources() -> dict:
|
||||
"""Return configured ingestion source directories and readiness."""
|
||||
return {
|
||||
"sources": get_source_status(),
|
||||
"vault_enabled": _config.settings.source_vault_enabled,
|
||||
"drive_enabled": _config.settings.source_drive_enabled,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -6,8 +6,21 @@ from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
env: str = "development"
|
||||
debug: bool = False
|
||||
log_level: str = "INFO"
|
||||
data_dir: Path = Path("./data")
|
||||
db_dir: Path | None = None
|
||||
chroma_dir: Path | None = None
|
||||
cache_dir: Path | None = None
|
||||
tmp_dir: Path | None = None
|
||||
vault_source_dir: Path = Path("./sources/vault")
|
||||
drive_source_dir: Path = Path("./sources/drive")
|
||||
source_vault_enabled: bool = True
|
||||
source_drive_enabled: bool = True
|
||||
log_dir: Path = Path("./logs")
|
||||
backup_dir: Path = Path("./backups")
|
||||
run_dir: Path = Path("./run")
|
||||
host: str = "127.0.0.1"
|
||||
port: int = 8100
|
||||
|
||||
@@ -29,11 +42,100 @@ class Settings(BaseSettings):
|
||||
|
||||
@property
|
||||
def db_path(self) -> Path:
|
||||
return self.data_dir / "atocore.db"
|
||||
legacy_path = self.resolved_data_dir / "atocore.db"
|
||||
if self.db_dir is not None:
|
||||
return self.resolved_db_dir / "atocore.db"
|
||||
if legacy_path.exists():
|
||||
return legacy_path
|
||||
return self.resolved_db_dir / "atocore.db"
|
||||
|
||||
@property
|
||||
def chroma_path(self) -> Path:
|
||||
return self.data_dir / "chroma"
|
||||
return self._resolve_path(self.chroma_dir or (self.resolved_data_dir / "chroma"))
|
||||
|
||||
@property
|
||||
def cache_path(self) -> Path:
|
||||
return self._resolve_path(self.cache_dir or (self.resolved_data_dir / "cache"))
|
||||
|
||||
@property
|
||||
def tmp_path(self) -> Path:
|
||||
return self._resolve_path(self.tmp_dir or (self.resolved_data_dir / "tmp"))
|
||||
|
||||
@property
|
||||
def resolved_data_dir(self) -> Path:
|
||||
return self._resolve_path(self.data_dir)
|
||||
|
||||
@property
|
||||
def resolved_db_dir(self) -> Path:
|
||||
return self._resolve_path(self.db_dir or (self.resolved_data_dir / "db"))
|
||||
|
||||
@property
|
||||
def resolved_vault_source_dir(self) -> Path:
|
||||
return self._resolve_path(self.vault_source_dir)
|
||||
|
||||
@property
|
||||
def resolved_drive_source_dir(self) -> Path:
|
||||
return self._resolve_path(self.drive_source_dir)
|
||||
|
||||
@property
|
||||
def resolved_log_dir(self) -> Path:
|
||||
return self._resolve_path(self.log_dir)
|
||||
|
||||
@property
|
||||
def resolved_backup_dir(self) -> Path:
|
||||
return self._resolve_path(self.backup_dir)
|
||||
|
||||
@property
|
||||
def resolved_run_dir(self) -> Path:
|
||||
if self.run_dir == Path("./run"):
|
||||
return self._resolve_path(self.resolved_data_dir.parent / "run")
|
||||
return self._resolve_path(self.run_dir)
|
||||
|
||||
@property
|
||||
def machine_dirs(self) -> list[Path]:
|
||||
return [
|
||||
self.db_path.parent,
|
||||
self.chroma_path,
|
||||
self.cache_path,
|
||||
self.tmp_path,
|
||||
self.resolved_log_dir,
|
||||
self.resolved_backup_dir,
|
||||
self.resolved_run_dir,
|
||||
]
|
||||
|
||||
@property
|
||||
def source_specs(self) -> list[dict[str, object]]:
|
||||
return [
|
||||
{
|
||||
"name": "vault",
|
||||
"enabled": self.source_vault_enabled,
|
||||
"path": self.resolved_vault_source_dir,
|
||||
"read_only": True,
|
||||
},
|
||||
{
|
||||
"name": "drive",
|
||||
"enabled": self.source_drive_enabled,
|
||||
"path": self.resolved_drive_source_dir,
|
||||
"read_only": True,
|
||||
},
|
||||
]
|
||||
|
||||
@property
|
||||
def source_dirs(self) -> list[Path]:
|
||||
return [spec["path"] for spec in self.source_specs if spec["enabled"]]
|
||||
|
||||
def _resolve_path(self, path: Path) -> Path:
|
||||
return path.expanduser().resolve(strict=False)
|
||||
|
||||
|
||||
settings = Settings()
|
||||
|
||||
|
||||
def ensure_runtime_dirs() -> None:
|
||||
"""Create writable runtime directories for machine state and logs.
|
||||
|
||||
Source directories are intentionally excluded because they are treated as
|
||||
read-only ingestion inputs by convention.
|
||||
"""
|
||||
for directory in settings.machine_dirs:
|
||||
directory.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -6,6 +6,7 @@ import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
import atocore.config as _config
|
||||
from atocore.ingestion.chunker import chunk_markdown
|
||||
from atocore.ingestion.parser import parse_markdown
|
||||
from atocore.models.database import get_connection
|
||||
@@ -189,6 +190,52 @@ def ingest_folder(folder_path: Path, purge_deleted: bool = True) -> list[dict]:
|
||||
return results
|
||||
|
||||
|
||||
def get_source_status() -> list[dict]:
|
||||
"""Describe configured source directories and their readiness."""
|
||||
sources = []
|
||||
for spec in _config.settings.source_specs:
|
||||
path = spec["path"]
|
||||
assert isinstance(path, Path)
|
||||
sources.append(
|
||||
{
|
||||
"name": spec["name"],
|
||||
"enabled": spec["enabled"],
|
||||
"path": str(path),
|
||||
"exists": path.exists(),
|
||||
"is_dir": path.is_dir(),
|
||||
"read_only": spec["read_only"],
|
||||
}
|
||||
)
|
||||
return sources
|
||||
|
||||
|
||||
def ingest_configured_sources(purge_deleted: bool = False) -> list[dict]:
|
||||
"""Ingest enabled source directories declared in config.
|
||||
|
||||
Purge is disabled by default here because sources are intended to be
|
||||
read-only inputs and should not be treated as the primary writable state.
|
||||
"""
|
||||
results = []
|
||||
for source in get_source_status():
|
||||
if not source["enabled"]:
|
||||
results.append({"source": source["name"], "status": "disabled", "path": source["path"]})
|
||||
continue
|
||||
if not source["exists"] or not source["is_dir"]:
|
||||
results.append({"source": source["name"], "status": "missing", "path": source["path"]})
|
||||
continue
|
||||
|
||||
folder_results = ingest_folder(Path(source["path"]), purge_deleted=purge_deleted)
|
||||
results.append(
|
||||
{
|
||||
"source": source["name"],
|
||||
"status": "ingested",
|
||||
"path": source["path"],
|
||||
"results": folder_results,
|
||||
}
|
||||
)
|
||||
return results
|
||||
|
||||
|
||||
def get_ingestion_stats() -> dict:
|
||||
"""Return ingestion statistics."""
|
||||
with get_connection() as conn:
|
||||
|
||||
@@ -5,8 +5,9 @@ from fastapi import FastAPI
|
||||
from atocore.api.routes import router
|
||||
import atocore.config as _config
|
||||
from atocore.context.project_state import init_project_state_schema
|
||||
from atocore.ingestion.pipeline import get_source_status
|
||||
from atocore.models.database import init_db
|
||||
from atocore.observability.logger import setup_logging
|
||||
from atocore.observability.logger import get_logger, setup_logging
|
||||
|
||||
app = FastAPI(
|
||||
title="AtoCore",
|
||||
@@ -15,13 +16,22 @@ app = FastAPI(
|
||||
)
|
||||
|
||||
app.include_router(router)
|
||||
log = get_logger("main")
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
def startup():
|
||||
setup_logging()
|
||||
_config.ensure_runtime_dirs()
|
||||
init_db()
|
||||
init_project_state_schema()
|
||||
log.info(
|
||||
"startup_ready",
|
||||
env=_config.settings.env,
|
||||
db_path=str(_config.settings.db_path),
|
||||
chroma_path=str(_config.settings.chroma_path),
|
||||
source_status=get_source_status(),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -72,7 +72,7 @@ CREATE INDEX IF NOT EXISTS idx_interactions_project ON interactions(project_id);
|
||||
|
||||
|
||||
def _ensure_data_dir() -> None:
|
||||
_config.settings.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
_config.ensure_runtime_dirs()
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
|
||||
@@ -16,15 +16,18 @@ _LOG_LEVELS = {
|
||||
def setup_logging() -> None:
|
||||
"""Configure structlog with JSON output."""
|
||||
log_level = "DEBUG" if _config.settings.debug else "INFO"
|
||||
renderer = (
|
||||
structlog.dev.ConsoleRenderer()
|
||||
if _config.settings.debug
|
||||
else structlog.processors.JSONRenderer()
|
||||
)
|
||||
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.contextvars.merge_contextvars,
|
||||
structlog.processors.add_log_level,
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.dev.ConsoleRenderer()
|
||||
if settings.debug
|
||||
else structlog.processors.JSONRenderer(),
|
||||
renderer,
|
||||
],
|
||||
wrapper_class=structlog.make_filtering_bound_logger(
|
||||
_LOG_LEVELS.get(log_level, logging.INFO)
|
||||
|
||||
Reference in New Issue
Block a user