From 1f1e6b5749f1dde634ca76ae1c2b6f95f2fdde99 Mon Sep 17 00:00:00 2001 From: Anto01 Date: Mon, 6 Apr 2026 09:11:11 -0400 Subject: [PATCH] Add project registration proposal preview --- src/atocore/api/routes.py | 22 ++++++++ src/atocore/projects/registry.py | 96 ++++++++++++++++++++++++++++++++ tests/test_api_storage.py | 41 ++++++++++++++ tests/test_project_registry.py | 85 ++++++++++++++++++++++++++++ 4 files changed, 244 insertions(+) diff --git a/src/atocore/api/routes.py b/src/atocore/api/routes.py index e9f7594..8231786 100644 --- a/src/atocore/api/routes.py +++ b/src/atocore/api/routes.py @@ -34,6 +34,7 @@ from atocore.memory.service import ( ) from atocore.observability.logger import get_logger from atocore.projects.registry import ( + build_project_registration_proposal, get_project_registry_template, list_registered_projects, refresh_registered_project, @@ -68,6 +69,13 @@ class ProjectRefreshResponse(BaseModel): roots: list[dict] +class ProjectRegistrationProposalRequest(BaseModel): + project_id: str + aliases: list[str] = [] + description: str = "" + ingest_roots: list[dict] + + class QueryRequest(BaseModel): prompt: str top_k: int = 10 @@ -180,6 +188,20 @@ def api_projects_template() -> dict: } +@router.post("/projects/proposal") +def api_project_registration_proposal(req: ProjectRegistrationProposalRequest) -> dict: + """Return a normalized project registration proposal without writing it.""" + try: + return build_project_registration_proposal( + project_id=req.project_id, + aliases=req.aliases, + description=req.description, + ingest_roots=req.ingest_roots, + ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + @router.post("/projects/{project_name}/refresh", response_model=ProjectRefreshResponse) def api_refresh_project(project_name: str, purge_deleted: bool = False) -> ProjectRefreshResponse: """Refresh one registered project from its configured ingest roots.""" diff --git a/src/atocore/projects/registry.py b/src/atocore/projects/registry.py index 08dc192..f3e2256 100644 --- a/src/atocore/projects/registry.py +++ b/src/atocore/projects/registry.py @@ -45,6 +45,54 @@ def get_project_registry_template() -> dict: } +def build_project_registration_proposal( + project_id: str, + aliases: list[str] | tuple[str, ...] | None = None, + description: str = "", + ingest_roots: list[dict] | tuple[dict, ...] | None = None, +) -> dict: + """Build a normalized project registration proposal without mutating state.""" + normalized_id = project_id.strip() + if not normalized_id: + raise ValueError("Project id must be non-empty") + + normalized_aliases = _normalize_aliases(aliases or []) + normalized_roots = _normalize_ingest_roots(ingest_roots or []) + if not normalized_roots: + raise ValueError("At least one ingest root is required") + + collisions = _find_name_collisions(normalized_id, normalized_aliases) + resolved_roots = [] + for root in normalized_roots: + source_ref = ProjectSourceRef( + source=root["source"], + subpath=root["subpath"], + label=root.get("label", ""), + ) + resolved_path = _resolve_ingest_root(source_ref) + resolved_roots.append( + { + **root, + "path": str(resolved_path), + "exists": resolved_path.exists(), + "is_dir": resolved_path.is_dir(), + } + ) + + return { + "project": { + "id": normalized_id, + "aliases": normalized_aliases, + "description": description.strip(), + "ingest_roots": normalized_roots, + }, + "resolved_ingest_roots": resolved_roots, + "collisions": collisions, + "registry_path": str(_config.settings.resolved_project_registry_path), + "valid": not collisions, + } + + def load_project_registry() -> list[RegisteredProject]: """Load project registry entries from JSON config.""" registry_path = _config.settings.resolved_project_registry_path @@ -147,6 +195,35 @@ def refresh_registered_project(project_name: str, purge_deleted: bool = False) - } +def _normalize_aliases(aliases: list[str] | tuple[str, ...]) -> list[str]: + deduped: list[str] = [] + seen: set[str] = set() + for alias in aliases: + candidate = alias.strip() + if not candidate: + continue + key = candidate.lower() + if key in seen: + continue + seen.add(key) + deduped.append(candidate) + return deduped + + +def _normalize_ingest_roots(ingest_roots: list[dict] | tuple[dict, ...]) -> list[dict]: + normalized: list[dict] = [] + for root in ingest_roots: + source = str(root.get("source", "")).strip() + subpath = str(root.get("subpath", "")).strip() + label = str(root.get("label", "")).strip() + if not source or not subpath: + continue + if source not in {"vault", "drive"}: + raise ValueError(f"Unsupported source root: {source}") + normalized.append({"source": source, "subpath": subpath, "label": label}) + return normalized + + def _project_to_dict(project: RegisteredProject) -> dict: return { "id": project.project_id, @@ -189,3 +266,22 @@ def _validate_unique_project_names(projects: list[RegisteredProject]) -> None: f"'{seen[key]}' and '{project.project_id}'" ) seen[key] = project.project_id + + +def _find_name_collisions(project_id: str, aliases: list[str]) -> list[dict]: + collisions: list[dict] = [] + existing = load_project_registry() + requested_names = [project_id, *aliases] + for requested in requested_names: + requested_key = requested.lower() + for project in existing: + project_names = [project.project_id, *project.aliases] + if requested_key in {name.lower() for name in project_names}: + collisions.append( + { + "name": requested, + "existing_project": project.project_id, + } + ) + break + return collisions diff --git a/tests/test_api_storage.py b/tests/test_api_storage.py index 8a252d5..b3e468e 100644 --- a/tests/test_api_storage.py +++ b/tests/test_api_storage.py @@ -162,3 +162,44 @@ def test_projects_template_endpoint_returns_template(tmp_data_dir, monkeypatch): body = response.json() assert body["allowed_sources"] == ["vault", "drive"] assert body["template"]["projects"][0]["id"] == "p07-example" + + +def test_project_proposal_endpoint_returns_normalized_preview(tmp_data_dir, monkeypatch): + vault_dir = tmp_data_dir / "vault-source" + drive_dir = tmp_data_dir / "drive-source" + config_dir = tmp_data_dir / "config" + staged = vault_dir / "incoming" / "projects" / "p07-example" + staged.mkdir(parents=True) + drive_dir.mkdir() + config_dir.mkdir() + + registry_path = config_dir / "project-registry.json" + registry_path.write_text('{"projects": []}', encoding="utf-8") + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) + config.settings = config.Settings() + + client = TestClient(app) + response = client.post( + "/projects/proposal", + json={ + "project_id": "p07-example", + "aliases": ["p07", "example-project", "p07"], + "description": "Example project", + "ingest_roots": [ + { + "source": "vault", + "subpath": "incoming/projects/p07-example", + "label": "Primary docs", + } + ], + }, + ) + + assert response.status_code == 200 + body = response.json() + assert body["project"]["aliases"] == ["p07", "example-project"] + assert body["resolved_ingest_roots"][0]["exists"] is True + assert body["valid"] is True diff --git a/tests/test_project_registry.py b/tests/test_project_registry.py index 8cf620f..8f6f55c 100644 --- a/tests/test_project_registry.py +++ b/tests/test_project_registry.py @@ -4,6 +4,7 @@ import json import atocore.config as config from atocore.projects.registry import ( + build_project_registration_proposal, get_registered_project, get_project_registry_template, list_registered_projects, @@ -208,3 +209,87 @@ def test_project_registry_rejects_alias_collision(tmp_path, monkeypatch): raise AssertionError("Expected project registry collision to raise") finally: config.settings = original_settings + + +def test_project_registration_proposal_normalizes_and_resolves_paths(tmp_path, monkeypatch): + vault_dir = tmp_path / "vault" + drive_dir = tmp_path / "drive" + config_dir = tmp_path / "config" + staged = vault_dir / "incoming" / "projects" / "p07-example" + staged.mkdir(parents=True) + drive_dir.mkdir() + config_dir.mkdir() + registry_path = config_dir / "project-registry.json" + registry_path.write_text(json.dumps({"projects": []}), encoding="utf-8") + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) + + original_settings = config.settings + try: + config.settings = config.Settings() + proposal = build_project_registration_proposal( + project_id="p07-example", + aliases=["p07", "example-project", "p07"], + description="Example project", + ingest_roots=[ + { + "source": "vault", + "subpath": "incoming/projects/p07-example", + "label": "Primary docs", + } + ], + ) + finally: + config.settings = original_settings + + assert proposal["project"]["aliases"] == ["p07", "example-project"] + assert proposal["resolved_ingest_roots"][0]["exists"] is True + assert proposal["valid"] is True + + +def test_project_registration_proposal_reports_collisions(tmp_path, monkeypatch): + vault_dir = tmp_path / "vault" + drive_dir = tmp_path / "drive" + config_dir = tmp_path / "config" + vault_dir.mkdir() + drive_dir.mkdir() + config_dir.mkdir() + registry_path = config_dir / "project-registry.json" + registry_path.write_text( + json.dumps( + { + "projects": [ + { + "id": "p05-interferometer", + "aliases": ["p05", "interferometer"], + "ingest_roots": [ + {"source": "vault", "subpath": "incoming/projects/p05-interferometer"} + ], + } + ] + } + ), + encoding="utf-8", + ) + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) + + original_settings = config.settings + try: + config.settings = config.Settings() + proposal = build_project_registration_proposal( + project_id="p08-example", + aliases=["interferometer"], + ingest_roots=[ + {"source": "vault", "subpath": "incoming/projects/p08-example"} + ], + ) + finally: + config.settings = original_settings + + assert proposal["valid"] is False + assert proposal["collisions"][0]["existing_project"] == "p05-interferometer"