diff --git a/config/project-registry.json b/config/project-registry.json index a93a473..d987eda 100644 --- a/config/project-registry.json +++ b/config/project-registry.json @@ -15,7 +15,7 @@ { "id": "p04-gigabit", "aliases": ["p04", "gigabit", "gigaBIT"], - "description": "Curated staged docs for the P04 GigaBIT project.", + "description": "Curated staged docs for the P04 GigaBIT mirror architecture and OTA optics project.", "ingest_roots": [ { "source": "vault", diff --git a/docs/current-state.md b/docs/current-state.md index f1a9d8c..3196942 100644 --- a/docs/current-state.md +++ b/docs/current-state.md @@ -43,6 +43,7 @@ now includes a first curated ingestion batch for the active projects. - template - proposal preview - approved registration + - safe update of existing project registrations - refresh - env-driven storage and deployment paths - Dalidou Docker deployment foundation @@ -98,9 +99,9 @@ The Dalidou instance already contains: Current live stats after the latest documentation sync and active-project ingest passes: -- `source_documents`: 35 -- `source_chunks`: 560 -- `vectors`: 560 +- `source_documents`: 36 +- `source_chunks`: 568 +- `vectors`: 568 The broader long-term corpus is still not fully populated yet. Wider project and vault ingestion remains a deliberate next step rather than something already @@ -162,10 +163,19 @@ The source refresh model now has a concrete foundation in code: - the API can return a registration template - the API can preview a registration without mutating state - the API can persist an approved registration +- the API can update an existing registered project without changing its canonical id - the API can refresh one registered project at a time This lifecycle is now coherent end to end for normal use. +The first live update pass on an existing registered project has also now been +verified against `p04-gigabit`: + +- the registration description can be updated safely +- the canonical project id remains unchanged +- refresh still behaves cleanly after the update +- `context/build` still returns useful project-specific context afterward + ## Reliability Baseline The runtime has now been hardened in a few practical ways: diff --git a/docs/next-steps.md b/docs/next-steps.md index 5ef68ae..d0ef1b9 100644 --- a/docs/next-steps.md +++ b/docs/next-steps.md @@ -104,6 +104,8 @@ The next batch is successful if: - OpenClaw can use AtoCore naturally when context is needed - OpenClaw can also register a new project cleanly before refreshing it +- existing project registrations can be refined safely before refresh when the + staged source set evolves - AtoCore answers correctly for the active project set - retrieval surfaces the seeded project docs instead of mostly AtoCore meta-docs - trusted project state remains concise and high confidence diff --git a/docs/project-registration-policy.md b/docs/project-registration-policy.md index f94e4ca..6ecabcb 100644 --- a/docs/project-registration-policy.md +++ b/docs/project-registration-policy.md @@ -2,7 +2,8 @@ ## Purpose -This document defines the normal path for adding a new project to AtoCore. +This document defines the normal path for adding a new project to AtoCore and +for safely updating an existing registration later. The goal is to make `register + refresh` the standard workflow instead of relying on long custom ingestion prompts every time. @@ -18,6 +19,13 @@ Registration means: - the staged source roots for that project are defined - AtoCore and OpenClaw can later refresh that project consistently +Updating a project means: + +- aliases can be corrected or expanded +- the short registry description can be improved +- ingest roots can be adjusted deliberately +- the canonical project id remains stable + ## Required Fields Each project registry entry must include: @@ -82,6 +90,17 @@ For a new project: 7. verify retrieval and context quality 8. only later promote stable facts into Trusted Project State +For an existing registered project: + +1. inspect the current entry with: + - `GET /projects` + - or `atocore.sh projects` +2. update the registration if aliases, description, or roots need refinement: + - `PUT /projects/{id}` +3. verify the updated entry +4. refresh the project again +5. verify retrieval and context quality did not regress + ## What Not To Do Do not: @@ -106,4 +125,5 @@ Other lifecycle endpoints: - `POST /projects/proposal` - `POST /projects/register` +- `PUT /projects/{id}` - `POST /projects/{id}/refresh` diff --git a/src/atocore/api/routes.py b/src/atocore/api/routes.py index 11f86c2..d723eaf 100644 --- a/src/atocore/api/routes.py +++ b/src/atocore/api/routes.py @@ -39,6 +39,7 @@ from atocore.projects.registry import ( list_registered_projects, register_project, refresh_registered_project, + update_project, ) from atocore.retrieval.retriever import retrieve from atocore.retrieval.vector_store import get_vector_store @@ -77,6 +78,12 @@ class ProjectRegistrationProposalRequest(BaseModel): ingest_roots: list[dict] +class ProjectUpdateRequest(BaseModel): + aliases: list[str] | None = None + description: str | None = None + ingest_roots: list[dict] | None = None + + class QueryRequest(BaseModel): prompt: str top_k: int = 10 @@ -217,6 +224,23 @@ def api_project_registration(req: ProjectRegistrationProposalRequest) -> dict: raise HTTPException(status_code=400, detail=str(e)) +@router.put("/projects/{project_name}") +def api_project_update(project_name: str, req: ProjectUpdateRequest) -> dict: + """Update an existing project registration.""" + try: + return update_project( + project_name=project_name, + aliases=req.aliases, + description=req.description, + ingest_roots=req.ingest_roots, + ) + except ValueError as e: + detail = str(e) + if detail.startswith("Unknown project"): + raise HTTPException(status_code=404, detail=detail) + raise HTTPException(status_code=400, detail=detail) + + @router.post("/projects/{project_name}/refresh", response_model=ProjectRefreshResponse) def api_refresh_project(project_name: str, purge_deleted: bool = False) -> ProjectRefreshResponse: """Refresh one registered project from its configured ingest roots.""" diff --git a/src/atocore/projects/registry.py b/src/atocore/projects/registry.py index 613b8a1..021cd2f 100644 --- a/src/atocore/projects/registry.py +++ b/src/atocore/projects/registry.py @@ -122,6 +122,78 @@ def register_project( } +def update_project( + project_name: str, + aliases: list[str] | tuple[str, ...] | None = None, + description: str | None = None, + ingest_roots: list[dict] | tuple[dict, ...] | None = None, +) -> dict: + """Update an existing project registration in the registry file.""" + existing = get_registered_project(project_name) + if existing is None: + raise ValueError(f"Unknown project: {project_name}") + + final_aliases = _normalize_aliases(aliases) if aliases is not None else list(existing.aliases) + final_description = description.strip() if description is not None else existing.description + final_roots = ( + _normalize_ingest_roots(ingest_roots) + if ingest_roots is not None + else [asdict(root) for root in existing.ingest_roots] + ) + if not final_roots: + raise ValueError("At least one ingest root is required") + + collisions = _find_name_collisions( + existing.project_id, + final_aliases, + exclude_project_id=existing.project_id, + ) + if collisions: + collision_names = ", ".join(collision["name"] for collision in collisions) + raise ValueError(f"Project update has collisions: {collision_names}") + + updated_entry = { + "id": existing.project_id, + "aliases": final_aliases, + "description": final_description, + "ingest_roots": final_roots, + } + + resolved_roots = [] + for root in final_roots: + source_ref = ProjectSourceRef( + source=root["source"], + subpath=root["subpath"], + label=root.get("label", ""), + ) + resolved_path = _resolve_ingest_root(source_ref) + resolved_roots.append( + { + **root, + "path": str(resolved_path), + "exists": resolved_path.exists(), + "is_dir": resolved_path.is_dir(), + } + ) + + registry_path = _config.settings.resolved_project_registry_path + payload = _load_registry_payload(registry_path) + payload["projects"] = [ + updated_entry if str(entry.get("id", "")).strip() == existing.project_id else entry + for entry in payload.get("projects", []) + ] + _write_registry_payload(registry_path, payload) + + return { + "project": updated_entry, + "resolved_ingest_roots": resolved_roots, + "collisions": [], + "registry_path": str(registry_path), + "valid": True, + "status": "updated", + } + + def load_project_registry() -> list[RegisteredProject]: """Load project registry entries from JSON config.""" registry_path = _config.settings.resolved_project_registry_path @@ -294,13 +366,19 @@ def _validate_unique_project_names(projects: list[RegisteredProject]) -> None: seen[key] = project.project_id -def _find_name_collisions(project_id: str, aliases: list[str]) -> list[dict]: +def _find_name_collisions( + project_id: str, + aliases: list[str], + exclude_project_id: str | None = None, +) -> list[dict]: collisions: list[dict] = [] existing = load_project_registry() requested_names = [project_id, *aliases] for requested in requested_names: requested_key = requested.lower() for project in existing: + if exclude_project_id is not None and project.project_id == exclude_project_id: + continue project_names = [project.project_id, *project.aliases] if requested_key in {name.lower() for name in project_names}: collisions.append( diff --git a/tests/test_api_storage.py b/tests/test_api_storage.py index 67cafe1..7492bf3 100644 --- a/tests/test_api_storage.py +++ b/tests/test_api_storage.py @@ -294,3 +294,102 @@ def test_project_register_endpoint_rejects_collisions(tmp_data_dir, monkeypatch) assert response.status_code == 400 assert "collisions" in response.json()["detail"] + + +def test_project_update_endpoint_persists_changes(tmp_data_dir, monkeypatch): + vault_dir = tmp_data_dir / "vault-source" + drive_dir = tmp_data_dir / "drive-source" + config_dir = tmp_data_dir / "config" + project_dir = vault_dir / "incoming" / "projects" / "p04-gigabit" + project_dir.mkdir(parents=True) + drive_dir.mkdir() + config_dir.mkdir() + + registry_path = config_dir / "project-registry.json" + registry_path.write_text( + """ +{ + "projects": [ + { + "id": "p04-gigabit", + "aliases": ["p04", "gigabit"], + "description": "Old description", + "ingest_roots": [ + {"source": "vault", "subpath": "incoming/projects/p04-gigabit"} + ] + } + ] +} +""".strip(), + encoding="utf-8", + ) + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) + config.settings = config.Settings() + + client = TestClient(app) + response = client.put( + "/projects/p04", + json={ + "aliases": ["p04", "gigabit", "gigabit-project"], + "description": "Updated P04 docs", + }, + ) + + assert response.status_code == 200 + body = response.json() + assert body["status"] == "updated" + assert body["project"]["aliases"] == ["p04", "gigabit", "gigabit-project"] + assert body["project"]["description"] == "Updated P04 docs" + + +def test_project_update_endpoint_rejects_collisions(tmp_data_dir, monkeypatch): + vault_dir = tmp_data_dir / "vault-source" + drive_dir = tmp_data_dir / "drive-source" + config_dir = tmp_data_dir / "config" + vault_dir.mkdir() + drive_dir.mkdir() + config_dir.mkdir() + + registry_path = config_dir / "project-registry.json" + registry_path.write_text( + """ +{ + "projects": [ + { + "id": "p04-gigabit", + "aliases": ["p04", "gigabit"], + "ingest_roots": [ + {"source": "vault", "subpath": "incoming/projects/p04-gigabit"} + ] + }, + { + "id": "p05-interferometer", + "aliases": ["p05", "interferometer"], + "ingest_roots": [ + {"source": "vault", "subpath": "incoming/projects/p05-interferometer"} + ] + } + ] +} +""".strip(), + encoding="utf-8", + ) + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) + config.settings = config.Settings() + + client = TestClient(app) + response = client.put( + "/projects/p04", + json={ + "aliases": ["p04", "interferometer"], + }, + ) + + assert response.status_code == 400 + assert "collisions" in response.json()["detail"] diff --git a/tests/test_project_registry.py b/tests/test_project_registry.py index d03b2b1..a922ee6 100644 --- a/tests/test_project_registry.py +++ b/tests/test_project_registry.py @@ -10,6 +10,7 @@ from atocore.projects.registry import ( list_registered_projects, register_project, refresh_registered_project, + update_project, ) @@ -381,3 +382,111 @@ def test_register_project_rejects_collisions(tmp_path, monkeypatch): raise AssertionError("Expected collision to prevent project registration") finally: config.settings = original_settings + + +def test_update_project_persists_description_and_aliases(tmp_path, monkeypatch): + vault_dir = tmp_path / "vault" + drive_dir = tmp_path / "drive" + config_dir = tmp_path / "config" + staged = vault_dir / "incoming" / "projects" / "p04-gigabit" + staged.mkdir(parents=True) + drive_dir.mkdir() + config_dir.mkdir() + registry_path = config_dir / "project-registry.json" + registry_path.write_text( + json.dumps( + { + "projects": [ + { + "id": "p04-gigabit", + "aliases": ["p04", "gigabit"], + "description": "Old description", + "ingest_roots": [ + { + "source": "vault", + "subpath": "incoming/projects/p04-gigabit", + "label": "Primary docs", + } + ], + } + ] + } + ), + encoding="utf-8", + ) + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) + + original_settings = config.settings + try: + config.settings = config.Settings() + result = update_project( + "p04", + aliases=["p04", "gigabit", "gigabit-project"], + description="Updated P04 project docs", + ) + finally: + config.settings = original_settings + + assert result["status"] == "updated" + assert result["project"]["id"] == "p04-gigabit" + assert result["project"]["aliases"] == ["p04", "gigabit", "gigabit-project"] + assert result["project"]["description"] == "Updated P04 project docs" + + payload = json.loads(registry_path.read_text(encoding="utf-8")) + assert payload["projects"][0]["aliases"] == ["p04", "gigabit", "gigabit-project"] + assert payload["projects"][0]["description"] == "Updated P04 project docs" + + +def test_update_project_rejects_colliding_aliases(tmp_path, monkeypatch): + vault_dir = tmp_path / "vault" + drive_dir = tmp_path / "drive" + config_dir = tmp_path / "config" + vault_dir.mkdir() + drive_dir.mkdir() + config_dir.mkdir() + registry_path = config_dir / "project-registry.json" + registry_path.write_text( + json.dumps( + { + "projects": [ + { + "id": "p04-gigabit", + "aliases": ["p04", "gigabit"], + "ingest_roots": [ + {"source": "vault", "subpath": "incoming/projects/p04-gigabit"} + ], + }, + { + "id": "p05-interferometer", + "aliases": ["p05", "interferometer"], + "ingest_roots": [ + {"source": "vault", "subpath": "incoming/projects/p05-interferometer"} + ], + }, + ] + } + ), + encoding="utf-8", + ) + + monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir)) + monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir)) + monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path)) + + original_settings = config.settings + try: + config.settings = config.Settings() + try: + update_project( + "p04-gigabit", + aliases=["p04", "interferometer"], + ) + except ValueError as exc: + assert "collisions" in str(exc) + else: + raise AssertionError("Expected collision to prevent project update") + finally: + config.settings = original_settings