From 827dcf2cd1de3055e94589d05eba11a918f0866c Mon Sep 17 00:00:00 2001 From: Anto01 Date: Mon, 6 Apr 2026 08:46:37 -0400 Subject: [PATCH] Add project registration policy and template --- config/project-registry.example.json | 21 ++++++ docs/next-steps.md | 1 + docs/project-registration-policy.md | 95 ++++++++++++++++++++++++++++ docs/source-refresh-model.md | 1 + src/atocore/api/routes.py | 11 ++++ src/atocore/projects/registry.py | 39 ++++++++++++ tests/test_api_storage.py | 12 ++++ tests/test_project_registry.py | 58 +++++++++++++++++ 8 files changed, 238 insertions(+) create mode 100644 config/project-registry.example.json create mode 100644 docs/project-registration-policy.md diff --git a/config/project-registry.example.json b/config/project-registry.example.json new file mode 100644 index 0000000..0df06c4 --- /dev/null +++ b/config/project-registry.example.json @@ -0,0 +1,21 @@ +{ + "projects": [ + { + "id": "p07-example", + "aliases": ["p07", "example-project"], + "description": "Short description of the project and the staged source set.", + "ingest_roots": [ + { + "source": "vault", + "subpath": "incoming/projects/p07-example", + "label": "Primary staged project docs" + }, + { + "source": "drive", + "subpath": "projects/p07-example", + "label": "Trusted operational docs" + } + ] + } + ] +} diff --git a/docs/next-steps.md b/docs/next-steps.md index c08fac2..24a1212 100644 --- a/docs/next-steps.md +++ b/docs/next-steps.md @@ -31,6 +31,7 @@ AtoCore now has: explicit - move toward a project source registry and refresh workflow - foundation now exists via project registry + per-project refresh API + - registration policy + template are now the next normal path for new projects 5. Define backup and export procedures for Dalidou - SQLite snapshot/backup strategy - Chroma backup or rebuild policy diff --git a/docs/project-registration-policy.md b/docs/project-registration-policy.md new file mode 100644 index 0000000..5a5fa69 --- /dev/null +++ b/docs/project-registration-policy.md @@ -0,0 +1,95 @@ +# AtoCore Project Registration Policy + +## Purpose + +This document defines the normal path for adding a new project to AtoCore. + +The goal is to make `register + refresh` the standard workflow instead of +relying on long custom ingestion prompts every time. + +## What Registration Means + +Registering a project does not ingest it by itself. + +Registration means: + +- the project gets a canonical AtoCore id +- known aliases are recorded +- the staged source roots for that project are defined +- AtoCore and OpenClaw can later refresh that project consistently + +## Required Fields + +Each project registry entry must include: + +- `id` + - stable canonical project id + - prefer lowercase kebab-case + - examples: + - `p04-gigabit` + - `p05-interferometer` + - `p06-polisher` +- `aliases` + - short common names or abbreviations + - examples: + - `p05` + - `interferometer` +- `description` + - short explanation of what the registered source set represents +- `ingest_roots` + - one or more staged roots under configured source layers + +## Allowed Source Roots + +Current allowed `source` values are: + +- `vault` +- `drive` + +These map to the configured Dalidou source boundaries. + +## Recommended Registration Rules + +1. Prefer one canonical project id +2. Keep aliases short and practical +3. Start with the smallest useful staged roots +4. Prefer curated high-signal docs before broad corpora +5. Keep repo context selective at first +6. Avoid registering noisy or generated trees +7. Use `drive` for trusted operational material when available +8. Use `vault` for curated staged PKM and repo-doc snapshots + +## Normal Workflow + +For a new project: + +1. stage the initial source docs on Dalidou +2. add the project entry to the registry +3. verify the entry with: + - `GET /projects` + - or the T420 helper `atocore.sh projects` +4. refresh it with: + - `POST /projects/{id}/refresh` + - or `atocore.sh refresh-project ` +5. verify retrieval and context quality +6. only later promote stable facts into Trusted Project State + +## What Not To Do + +Do not: + +- register giant noisy trees blindly +- treat registration as equivalent to trusted state +- dump the full PKM by default +- rely on aliases that collide across projects +- use the live machine DB as a source root + +## Template + +Use: + +- [project-registry.example.json](C:/Users/antoi/ATOCore/config/project-registry.example.json) + +And the API template endpoint: + +- `GET /projects/template` diff --git a/docs/source-refresh-model.md b/docs/source-refresh-model.md index 2f9c55d..dcdadf4 100644 --- a/docs/source-refresh-model.md +++ b/docs/source-refresh-model.md @@ -84,6 +84,7 @@ The first concrete foundation for this now exists in AtoCore: - a project registry file records known project ids, aliases, and ingest roots - the API can list those registered projects +- the API can return a registration template for new projects - the API can refresh a single registered project from its configured roots This is not full source automation yet, but it gives the refresh model a real diff --git a/src/atocore/api/routes.py b/src/atocore/api/routes.py index 8fef637..e9f7594 100644 --- a/src/atocore/api/routes.py +++ b/src/atocore/api/routes.py @@ -34,6 +34,7 @@ from atocore.memory.service import ( ) from atocore.observability.logger import get_logger from atocore.projects.registry import ( + get_project_registry_template, list_registered_projects, refresh_registered_project, ) @@ -169,6 +170,16 @@ def api_projects() -> dict: } +@router.get("/projects/template") +def api_projects_template() -> dict: + """Return a starter template for project registry entries.""" + return { + "template": get_project_registry_template(), + "registry_path": str(_config.settings.resolved_project_registry_path), + "allowed_sources": ["vault", "drive"], + } + + @router.post("/projects/{project_name}/refresh", response_model=ProjectRefreshResponse) def api_refresh_project(project_name: str, purge_deleted: bool = False) -> ProjectRefreshResponse: """Refresh one registered project from its configured ingest roots.""" diff --git a/src/atocore/projects/registry.py b/src/atocore/projects/registry.py index 31c72f4..08dc192 100644 --- a/src/atocore/projects/registry.py +++ b/src/atocore/projects/registry.py @@ -25,6 +25,26 @@ class RegisteredProject: ingest_roots: tuple[ProjectSourceRef, ...] +def get_project_registry_template() -> dict: + """Return a minimal template for registering a new project.""" + return { + "projects": [ + { + "id": "p07-example", + "aliases": ["p07", "example-project"], + "description": "Short description of the project and staged corpus.", + "ingest_roots": [ + { + "source": "vault", + "subpath": "incoming/projects/p07-example", + "label": "Primary staged project docs", + } + ], + } + ] + } + + def load_project_registry() -> list[RegisteredProject]: """Load project registry entries from JSON config.""" registry_path = _config.settings.resolved_project_registry_path @@ -37,6 +57,8 @@ def load_project_registry() -> list[RegisteredProject]: for entry in entries: project_id = str(entry["id"]).strip() + if not project_id: + raise ValueError("Project registry entry is missing a non-empty id") aliases = tuple( alias.strip() for alias in entry.get("aliases", []) @@ -53,6 +75,8 @@ def load_project_registry() -> list[RegisteredProject]: if str(root.get("source", "")).strip() and str(root.get("subpath", "")).strip() ) + if not ingest_roots: + raise ValueError(f"Project registry entry '{project_id}' has no ingest_roots") projects.append( RegisteredProject( project_id=project_id, @@ -62,6 +86,7 @@ def load_project_registry() -> list[RegisteredProject]: ) ) + _validate_unique_project_names(projects) return projects @@ -150,3 +175,17 @@ def _resolve_ingest_root(source_ref: ProjectSourceRef) -> Path: raise ValueError(f"Unsupported source root: {source_ref.source}") from exc return (base_dir / source_ref.subpath).resolve(strict=False) + + +def _validate_unique_project_names(projects: list[RegisteredProject]) -> None: + seen: dict[str, str] = {} + for project in projects: + names = [project.project_id, *project.aliases] + for name in names: + key = name.lower() + if key in seen and seen[key] != project.project_id: + raise ValueError( + f"Project registry name collision: '{name}' is used by both " + f"'{seen[key]}' and '{project.project_id}'" + ) + seen[key] = project.project_id diff --git a/tests/test_api_storage.py b/tests/test_api_storage.py index f824583..8a252d5 100644 --- a/tests/test_api_storage.py +++ b/tests/test_api_storage.py @@ -150,3 +150,15 @@ def test_project_refresh_endpoint_uses_registered_roots(tmp_data_dir, monkeypatc assert response.status_code == 200 assert calls == [("p05", False)] assert response.json()["project"] == "p05-interferometer" + + +def test_projects_template_endpoint_returns_template(tmp_data_dir, monkeypatch): + config.settings = config.Settings() + + client = TestClient(app) + response = client.get("/projects/template") + + assert response.status_code == 200 + body = response.json() + assert body["allowed_sources"] == ["vault", "drive"] + assert body["template"]["projects"][0]["id"] == "p07-example" diff --git a/tests/test_project_registry.py b/tests/test_project_registry.py index b273566..8cf620f 100644 --- a/tests/test_project_registry.py +++ b/tests/test_project_registry.py @@ -5,6 +5,7 @@ import json import atocore.config as config from atocore.projects.registry import ( get_registered_project, + get_project_registry_template, list_registered_projects, refresh_registered_project, ) @@ -150,3 +151,60 @@ def test_refresh_registered_project_ingests_registered_roots(tmp_path, monkeypat assert calls[0][0].endswith("p06-polisher") assert calls[0][1] is False assert result["roots"][0]["status"] == "ingested" + + +def test_project_registry_template_has_expected_shape(): + template = get_project_registry_template() + assert "projects" in template + assert template["projects"][0]["id"] == "p07-example" + assert template["projects"][0]["ingest_roots"][0]["source"] == "vault" + + +def test_project_registry_rejects_alias_collision(tmp_path, monkeypatch): + vault_dir = tmp_path / "vault" + drive_dir = tmp_path / "drive" + config_dir = tmp_path / "config" + vault_dir.mkdir() + drive_dir.mkdir() + config_dir.mkdir() + + registry_path = config_dir / "project-registry.json" + registry_path.write_text( + json.dumps( + { + "projects": [ + { + "id": "p04-gigabit", + "aliases": ["shared"], + "ingest_roots": [ + {"source": "vault", "subpath": "incoming/projects/p04-gigabit"} + ], + }, + { + "id": "p05-interferometer", + "aliases": ["shared"], + "ingest_roots": [ + {"source": "vault", "subpath": "incoming/projects/p05-interferometer"} + ], + }, + ] + } + ), + encoding="utf-8", + ) + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) + + original_settings = config.settings + try: + config.settings = config.Settings() + try: + list_registered_projects() + except ValueError as exc: + assert "collision" in str(exc) + else: + raise AssertionError("Expected project registry collision to raise") + finally: + config.settings = original_settings