Add project registration policy and template

This commit is contained in:
2026-04-06 08:46:37 -04:00
parent d8028f406e
commit 827dcf2cd1
8 changed files with 238 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
{
"projects": [
{
"id": "p07-example",
"aliases": ["p07", "example-project"],
"description": "Short description of the project and the staged source set.",
"ingest_roots": [
{
"source": "vault",
"subpath": "incoming/projects/p07-example",
"label": "Primary staged project docs"
},
{
"source": "drive",
"subpath": "projects/p07-example",
"label": "Trusted operational docs"
}
]
}
]
}

View File

@@ -31,6 +31,7 @@ AtoCore now has:
explicit
- move toward a project source registry and refresh workflow
- foundation now exists via project registry + per-project refresh API
- registration policy + template are now the next normal path for new projects
5. Define backup and export procedures for Dalidou
- SQLite snapshot/backup strategy
- Chroma backup or rebuild policy

View File

@@ -0,0 +1,95 @@
# AtoCore Project Registration Policy
## Purpose
This document defines the normal path for adding a new project to AtoCore.
The goal is to make `register + refresh` the standard workflow instead of
relying on long custom ingestion prompts every time.
## What Registration Means
Registering a project does not ingest it by itself.
Registration means:
- the project gets a canonical AtoCore id
- known aliases are recorded
- the staged source roots for that project are defined
- AtoCore and OpenClaw can later refresh that project consistently
## Required Fields
Each project registry entry must include:
- `id`
- stable canonical project id
- prefer lowercase kebab-case
- examples:
- `p04-gigabit`
- `p05-interferometer`
- `p06-polisher`
- `aliases`
- short common names or abbreviations
- examples:
- `p05`
- `interferometer`
- `description`
- short explanation of what the registered source set represents
- `ingest_roots`
- one or more staged roots under configured source layers
## Allowed Source Roots
Current allowed `source` values are:
- `vault`
- `drive`
These map to the configured Dalidou source boundaries.
## Recommended Registration Rules
1. Prefer one canonical project id
2. Keep aliases short and practical
3. Start with the smallest useful staged roots
4. Prefer curated high-signal docs before broad corpora
5. Keep repo context selective at first
6. Avoid registering noisy or generated trees
7. Use `drive` for trusted operational material when available
8. Use `vault` for curated staged PKM and repo-doc snapshots
## Normal Workflow
For a new project:
1. stage the initial source docs on Dalidou
2. add the project entry to the registry
3. verify the entry with:
- `GET /projects`
- or the T420 helper `atocore.sh projects`
4. refresh it with:
- `POST /projects/{id}/refresh`
- or `atocore.sh refresh-project <id>`
5. verify retrieval and context quality
6. only later promote stable facts into Trusted Project State
## What Not To Do
Do not:
- register giant noisy trees blindly
- treat registration as equivalent to trusted state
- dump the full PKM by default
- rely on aliases that collide across projects
- use the live machine DB as a source root
## Template
Use:
- [project-registry.example.json](C:/Users/antoi/ATOCore/config/project-registry.example.json)
And the API template endpoint:
- `GET /projects/template`

View File

@@ -84,6 +84,7 @@ The first concrete foundation for this now exists in AtoCore:
- a project registry file records known project ids, aliases, and ingest roots
- the API can list those registered projects
- the API can return a registration template for new projects
- the API can refresh a single registered project from its configured roots
This is not full source automation yet, but it gives the refresh model a real

View File

@@ -34,6 +34,7 @@ from atocore.memory.service import (
)
from atocore.observability.logger import get_logger
from atocore.projects.registry import (
get_project_registry_template,
list_registered_projects,
refresh_registered_project,
)
@@ -169,6 +170,16 @@ def api_projects() -> dict:
}
@router.get("/projects/template")
def api_projects_template() -> dict:
"""Return a starter template for project registry entries."""
return {
"template": get_project_registry_template(),
"registry_path": str(_config.settings.resolved_project_registry_path),
"allowed_sources": ["vault", "drive"],
}
@router.post("/projects/{project_name}/refresh", response_model=ProjectRefreshResponse)
def api_refresh_project(project_name: str, purge_deleted: bool = False) -> ProjectRefreshResponse:
"""Refresh one registered project from its configured ingest roots."""

View File

@@ -25,6 +25,26 @@ class RegisteredProject:
ingest_roots: tuple[ProjectSourceRef, ...]
def get_project_registry_template() -> dict:
"""Return a minimal template for registering a new project."""
return {
"projects": [
{
"id": "p07-example",
"aliases": ["p07", "example-project"],
"description": "Short description of the project and staged corpus.",
"ingest_roots": [
{
"source": "vault",
"subpath": "incoming/projects/p07-example",
"label": "Primary staged project docs",
}
],
}
]
}
def load_project_registry() -> list[RegisteredProject]:
"""Load project registry entries from JSON config."""
registry_path = _config.settings.resolved_project_registry_path
@@ -37,6 +57,8 @@ def load_project_registry() -> list[RegisteredProject]:
for entry in entries:
project_id = str(entry["id"]).strip()
if not project_id:
raise ValueError("Project registry entry is missing a non-empty id")
aliases = tuple(
alias.strip()
for alias in entry.get("aliases", [])
@@ -53,6 +75,8 @@ def load_project_registry() -> list[RegisteredProject]:
if str(root.get("source", "")).strip()
and str(root.get("subpath", "")).strip()
)
if not ingest_roots:
raise ValueError(f"Project registry entry '{project_id}' has no ingest_roots")
projects.append(
RegisteredProject(
project_id=project_id,
@@ -62,6 +86,7 @@ def load_project_registry() -> list[RegisteredProject]:
)
)
_validate_unique_project_names(projects)
return projects
@@ -150,3 +175,17 @@ def _resolve_ingest_root(source_ref: ProjectSourceRef) -> Path:
raise ValueError(f"Unsupported source root: {source_ref.source}") from exc
return (base_dir / source_ref.subpath).resolve(strict=False)
def _validate_unique_project_names(projects: list[RegisteredProject]) -> None:
seen: dict[str, str] = {}
for project in projects:
names = [project.project_id, *project.aliases]
for name in names:
key = name.lower()
if key in seen and seen[key] != project.project_id:
raise ValueError(
f"Project registry name collision: '{name}' is used by both "
f"'{seen[key]}' and '{project.project_id}'"
)
seen[key] = project.project_id

View File

@@ -150,3 +150,15 @@ def test_project_refresh_endpoint_uses_registered_roots(tmp_data_dir, monkeypatc
assert response.status_code == 200
assert calls == [("p05", False)]
assert response.json()["project"] == "p05-interferometer"
def test_projects_template_endpoint_returns_template(tmp_data_dir, monkeypatch):
config.settings = config.Settings()
client = TestClient(app)
response = client.get("/projects/template")
assert response.status_code == 200
body = response.json()
assert body["allowed_sources"] == ["vault", "drive"]
assert body["template"]["projects"][0]["id"] == "p07-example"

View File

@@ -5,6 +5,7 @@ import json
import atocore.config as config
from atocore.projects.registry import (
get_registered_project,
get_project_registry_template,
list_registered_projects,
refresh_registered_project,
)
@@ -150,3 +151,60 @@ def test_refresh_registered_project_ingests_registered_roots(tmp_path, monkeypat
assert calls[0][0].endswith("p06-polisher")
assert calls[0][1] is False
assert result["roots"][0]["status"] == "ingested"
def test_project_registry_template_has_expected_shape():
template = get_project_registry_template()
assert "projects" in template
assert template["projects"][0]["id"] == "p07-example"
assert template["projects"][0]["ingest_roots"][0]["source"] == "vault"
def test_project_registry_rejects_alias_collision(tmp_path, monkeypatch):
vault_dir = tmp_path / "vault"
drive_dir = tmp_path / "drive"
config_dir = tmp_path / "config"
vault_dir.mkdir()
drive_dir.mkdir()
config_dir.mkdir()
registry_path = config_dir / "project-registry.json"
registry_path.write_text(
json.dumps(
{
"projects": [
{
"id": "p04-gigabit",
"aliases": ["shared"],
"ingest_roots": [
{"source": "vault", "subpath": "incoming/projects/p04-gigabit"}
],
},
{
"id": "p05-interferometer",
"aliases": ["shared"],
"ingest_roots": [
{"source": "vault", "subpath": "incoming/projects/p05-interferometer"}
],
},
]
}
),
encoding="utf-8",
)
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
original_settings = config.settings
try:
config.settings = config.Settings()
try:
list_registered_projects()
except ValueError as exc:
assert "collision" in str(exc)
else:
raise AssertionError("Expected project registry collision to raise")
finally:
config.settings = original_settings