"""AtoCore configuration via environment variables.""" from pathlib import Path from pydantic_settings import BaseSettings class Settings(BaseSettings): env: str = "development" debug: bool = False log_level: str = "INFO" data_dir: Path = Path("./data") db_dir: Path | None = None chroma_dir: Path | None = None cache_dir: Path | None = None tmp_dir: Path | None = None vault_source_dir: Path = Path("./sources/vault") drive_source_dir: Path = Path("./sources/drive") source_vault_enabled: bool = True source_drive_enabled: bool = True log_dir: Path = Path("./logs") backup_dir: Path = Path("./backups") run_dir: Path = Path("./run") project_registry_path: Path = Path("./config/project-registry.json") host: str = "127.0.0.1" port: int = 8100 db_busy_timeout_ms: int = 5000 # Embedding embedding_model: str = ( "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" ) # Chunking chunk_max_size: int = 800 chunk_overlap: int = 100 chunk_min_size: int = 50 # Context context_budget: int = 3000 context_top_k: int = 15 # Retrieval ranking weights (tunable per environment). # All multipliers default to the values used since Wave 1; tighten or # loosen them via ATOCORE_* env vars without touching code. rank_project_match_boost: float = 2.0 rank_query_token_step: float = 0.08 rank_query_token_cap: float = 1.32 rank_path_high_signal_boost: float = 1.18 rank_path_low_signal_penalty: float = 0.72 model_config = {"env_prefix": "ATOCORE_"} @property def db_path(self) -> Path: legacy_path = self.resolved_data_dir / "atocore.db" if self.db_dir is not None: return self.resolved_db_dir / "atocore.db" if legacy_path.exists(): return legacy_path return self.resolved_db_dir / "atocore.db" @property def chroma_path(self) -> Path: return self._resolve_path(self.chroma_dir or (self.resolved_data_dir / "chroma")) @property def cache_path(self) -> Path: return self._resolve_path(self.cache_dir or (self.resolved_data_dir / "cache")) @property def tmp_path(self) -> Path: return self._resolve_path(self.tmp_dir or (self.resolved_data_dir / "tmp")) @property def resolved_data_dir(self) -> Path: return self._resolve_path(self.data_dir) @property def resolved_db_dir(self) -> Path: return self._resolve_path(self.db_dir or (self.resolved_data_dir / "db")) @property def resolved_vault_source_dir(self) -> Path: return self._resolve_path(self.vault_source_dir) @property def resolved_drive_source_dir(self) -> Path: return self._resolve_path(self.drive_source_dir) @property def resolved_log_dir(self) -> Path: return self._resolve_path(self.log_dir) @property def resolved_backup_dir(self) -> Path: return self._resolve_path(self.backup_dir) @property def resolved_run_dir(self) -> Path: if self.run_dir == Path("./run"): return self._resolve_path(self.resolved_data_dir.parent / "run") return self._resolve_path(self.run_dir) @property def resolved_project_registry_path(self) -> Path: """Path to the project registry JSON file. If ``ATOCORE_PROJECT_REGISTRY_DIR`` env var is set, the registry lives at ``/project-registry.json``. Otherwise falls back to the configured ``project_registry_path`` field. This lets Docker deployments point at a mounted volume via env var without the ephemeral in-image ``/app/config/`` getting wiped on every rebuild. """ import os registry_dir = os.environ.get("ATOCORE_PROJECT_REGISTRY_DIR", "").strip() if registry_dir: return Path(registry_dir) / "project-registry.json" return self._resolve_path(self.project_registry_path) @property def machine_dirs(self) -> list[Path]: return [ self.db_path.parent, self.chroma_path, self.cache_path, self.tmp_path, self.resolved_log_dir, self.resolved_backup_dir, self.resolved_run_dir, self.resolved_project_registry_path.parent, ] @property def source_specs(self) -> list[dict[str, object]]: return [ { "name": "vault", "enabled": self.source_vault_enabled, "path": self.resolved_vault_source_dir, "read_only": True, }, { "name": "drive", "enabled": self.source_drive_enabled, "path": self.resolved_drive_source_dir, "read_only": True, }, ] @property def source_dirs(self) -> list[Path]: return [spec["path"] for spec in self.source_specs if spec["enabled"]] def _resolve_path(self, path: Path) -> Path: return path.expanduser().resolve(strict=False) settings = Settings() def ensure_runtime_dirs() -> None: """Create writable runtime directories for machine state and logs. Source directories are intentionally excluded because they are treated as read-only ingestion inputs by convention. """ for directory in settings.machine_dirs: directory.mkdir(parents=True, exist_ok=True)