Add project registry update flow

This commit is contained in:
2026-04-06 12:31:24 -04:00
parent c9757e313a
commit 06aa931273
8 changed files with 348 additions and 6 deletions

View File

@@ -15,7 +15,7 @@
{
"id": "p04-gigabit",
"aliases": ["p04", "gigabit", "gigaBIT"],
"description": "Curated staged docs for the P04 GigaBIT project.",
"description": "Curated staged docs for the P04 GigaBIT mirror architecture and OTA optics project.",
"ingest_roots": [
{
"source": "vault",

View File

@@ -43,6 +43,7 @@ now includes a first curated ingestion batch for the active projects.
- template
- proposal preview
- approved registration
- safe update of existing project registrations
- refresh
- env-driven storage and deployment paths
- Dalidou Docker deployment foundation
@@ -98,9 +99,9 @@ The Dalidou instance already contains:
Current live stats after the latest documentation sync and active-project ingest
passes:
- `source_documents`: 35
- `source_chunks`: 560
- `vectors`: 560
- `source_documents`: 36
- `source_chunks`: 568
- `vectors`: 568
The broader long-term corpus is still not fully populated yet. Wider project and
vault ingestion remains a deliberate next step rather than something already
@@ -162,10 +163,19 @@ The source refresh model now has a concrete foundation in code:
- the API can return a registration template
- the API can preview a registration without mutating state
- the API can persist an approved registration
- the API can update an existing registered project without changing its canonical id
- the API can refresh one registered project at a time
This lifecycle is now coherent end to end for normal use.
The first live update pass on an existing registered project has also now been
verified against `p04-gigabit`:
- the registration description can be updated safely
- the canonical project id remains unchanged
- refresh still behaves cleanly after the update
- `context/build` still returns useful project-specific context afterward
## Reliability Baseline
The runtime has now been hardened in a few practical ways:

View File

@@ -104,6 +104,8 @@ The next batch is successful if:
- OpenClaw can use AtoCore naturally when context is needed
- OpenClaw can also register a new project cleanly before refreshing it
- existing project registrations can be refined safely before refresh when the
staged source set evolves
- AtoCore answers correctly for the active project set
- retrieval surfaces the seeded project docs instead of mostly AtoCore meta-docs
- trusted project state remains concise and high confidence

View File

@@ -2,7 +2,8 @@
## Purpose
This document defines the normal path for adding a new project to AtoCore.
This document defines the normal path for adding a new project to AtoCore and
for safely updating an existing registration later.
The goal is to make `register + refresh` the standard workflow instead of
relying on long custom ingestion prompts every time.
@@ -18,6 +19,13 @@ Registration means:
- the staged source roots for that project are defined
- AtoCore and OpenClaw can later refresh that project consistently
Updating a project means:
- aliases can be corrected or expanded
- the short registry description can be improved
- ingest roots can be adjusted deliberately
- the canonical project id remains stable
## Required Fields
Each project registry entry must include:
@@ -82,6 +90,17 @@ For a new project:
7. verify retrieval and context quality
8. only later promote stable facts into Trusted Project State
For an existing registered project:
1. inspect the current entry with:
- `GET /projects`
- or `atocore.sh projects`
2. update the registration if aliases, description, or roots need refinement:
- `PUT /projects/{id}`
3. verify the updated entry
4. refresh the project again
5. verify retrieval and context quality did not regress
## What Not To Do
Do not:
@@ -106,4 +125,5 @@ Other lifecycle endpoints:
- `POST /projects/proposal`
- `POST /projects/register`
- `PUT /projects/{id}`
- `POST /projects/{id}/refresh`

View File

@@ -39,6 +39,7 @@ from atocore.projects.registry import (
list_registered_projects,
register_project,
refresh_registered_project,
update_project,
)
from atocore.retrieval.retriever import retrieve
from atocore.retrieval.vector_store import get_vector_store
@@ -77,6 +78,12 @@ class ProjectRegistrationProposalRequest(BaseModel):
ingest_roots: list[dict]
class ProjectUpdateRequest(BaseModel):
aliases: list[str] | None = None
description: str | None = None
ingest_roots: list[dict] | None = None
class QueryRequest(BaseModel):
prompt: str
top_k: int = 10
@@ -217,6 +224,23 @@ def api_project_registration(req: ProjectRegistrationProposalRequest) -> dict:
raise HTTPException(status_code=400, detail=str(e))
@router.put("/projects/{project_name}")
def api_project_update(project_name: str, req: ProjectUpdateRequest) -> dict:
"""Update an existing project registration."""
try:
return update_project(
project_name=project_name,
aliases=req.aliases,
description=req.description,
ingest_roots=req.ingest_roots,
)
except ValueError as e:
detail = str(e)
if detail.startswith("Unknown project"):
raise HTTPException(status_code=404, detail=detail)
raise HTTPException(status_code=400, detail=detail)
@router.post("/projects/{project_name}/refresh", response_model=ProjectRefreshResponse)
def api_refresh_project(project_name: str, purge_deleted: bool = False) -> ProjectRefreshResponse:
"""Refresh one registered project from its configured ingest roots."""

View File

@@ -122,6 +122,78 @@ def register_project(
}
def update_project(
project_name: str,
aliases: list[str] | tuple[str, ...] | None = None,
description: str | None = None,
ingest_roots: list[dict] | tuple[dict, ...] | None = None,
) -> dict:
"""Update an existing project registration in the registry file."""
existing = get_registered_project(project_name)
if existing is None:
raise ValueError(f"Unknown project: {project_name}")
final_aliases = _normalize_aliases(aliases) if aliases is not None else list(existing.aliases)
final_description = description.strip() if description is not None else existing.description
final_roots = (
_normalize_ingest_roots(ingest_roots)
if ingest_roots is not None
else [asdict(root) for root in existing.ingest_roots]
)
if not final_roots:
raise ValueError("At least one ingest root is required")
collisions = _find_name_collisions(
existing.project_id,
final_aliases,
exclude_project_id=existing.project_id,
)
if collisions:
collision_names = ", ".join(collision["name"] for collision in collisions)
raise ValueError(f"Project update has collisions: {collision_names}")
updated_entry = {
"id": existing.project_id,
"aliases": final_aliases,
"description": final_description,
"ingest_roots": final_roots,
}
resolved_roots = []
for root in final_roots:
source_ref = ProjectSourceRef(
source=root["source"],
subpath=root["subpath"],
label=root.get("label", ""),
)
resolved_path = _resolve_ingest_root(source_ref)
resolved_roots.append(
{
**root,
"path": str(resolved_path),
"exists": resolved_path.exists(),
"is_dir": resolved_path.is_dir(),
}
)
registry_path = _config.settings.resolved_project_registry_path
payload = _load_registry_payload(registry_path)
payload["projects"] = [
updated_entry if str(entry.get("id", "")).strip() == existing.project_id else entry
for entry in payload.get("projects", [])
]
_write_registry_payload(registry_path, payload)
return {
"project": updated_entry,
"resolved_ingest_roots": resolved_roots,
"collisions": [],
"registry_path": str(registry_path),
"valid": True,
"status": "updated",
}
def load_project_registry() -> list[RegisteredProject]:
"""Load project registry entries from JSON config."""
registry_path = _config.settings.resolved_project_registry_path
@@ -294,13 +366,19 @@ def _validate_unique_project_names(projects: list[RegisteredProject]) -> None:
seen[key] = project.project_id
def _find_name_collisions(project_id: str, aliases: list[str]) -> list[dict]:
def _find_name_collisions(
project_id: str,
aliases: list[str],
exclude_project_id: str | None = None,
) -> list[dict]:
collisions: list[dict] = []
existing = load_project_registry()
requested_names = [project_id, *aliases]
for requested in requested_names:
requested_key = requested.lower()
for project in existing:
if exclude_project_id is not None and project.project_id == exclude_project_id:
continue
project_names = [project.project_id, *project.aliases]
if requested_key in {name.lower() for name in project_names}:
collisions.append(

View File

@@ -294,3 +294,102 @@ def test_project_register_endpoint_rejects_collisions(tmp_data_dir, monkeypatch)
assert response.status_code == 400
assert "collisions" in response.json()["detail"]
def test_project_update_endpoint_persists_changes(tmp_data_dir, monkeypatch):
vault_dir = tmp_data_dir / "vault-source"
drive_dir = tmp_data_dir / "drive-source"
config_dir = tmp_data_dir / "config"
project_dir = vault_dir / "incoming" / "projects" / "p04-gigabit"
project_dir.mkdir(parents=True)
drive_dir.mkdir()
config_dir.mkdir()
registry_path = config_dir / "project-registry.json"
registry_path.write_text(
"""
{
"projects": [
{
"id": "p04-gigabit",
"aliases": ["p04", "gigabit"],
"description": "Old description",
"ingest_roots": [
{"source": "vault", "subpath": "incoming/projects/p04-gigabit"}
]
}
]
}
""".strip(),
encoding="utf-8",
)
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
config.settings = config.Settings()
client = TestClient(app)
response = client.put(
"/projects/p04",
json={
"aliases": ["p04", "gigabit", "gigabit-project"],
"description": "Updated P04 docs",
},
)
assert response.status_code == 200
body = response.json()
assert body["status"] == "updated"
assert body["project"]["aliases"] == ["p04", "gigabit", "gigabit-project"]
assert body["project"]["description"] == "Updated P04 docs"
def test_project_update_endpoint_rejects_collisions(tmp_data_dir, monkeypatch):
vault_dir = tmp_data_dir / "vault-source"
drive_dir = tmp_data_dir / "drive-source"
config_dir = tmp_data_dir / "config"
vault_dir.mkdir()
drive_dir.mkdir()
config_dir.mkdir()
registry_path = config_dir / "project-registry.json"
registry_path.write_text(
"""
{
"projects": [
{
"id": "p04-gigabit",
"aliases": ["p04", "gigabit"],
"ingest_roots": [
{"source": "vault", "subpath": "incoming/projects/p04-gigabit"}
]
},
{
"id": "p05-interferometer",
"aliases": ["p05", "interferometer"],
"ingest_roots": [
{"source": "vault", "subpath": "incoming/projects/p05-interferometer"}
]
}
]
}
""".strip(),
encoding="utf-8",
)
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
config.settings = config.Settings()
client = TestClient(app)
response = client.put(
"/projects/p04",
json={
"aliases": ["p04", "interferometer"],
},
)
assert response.status_code == 400
assert "collisions" in response.json()["detail"]

View File

@@ -10,6 +10,7 @@ from atocore.projects.registry import (
list_registered_projects,
register_project,
refresh_registered_project,
update_project,
)
@@ -381,3 +382,111 @@ def test_register_project_rejects_collisions(tmp_path, monkeypatch):
raise AssertionError("Expected collision to prevent project registration")
finally:
config.settings = original_settings
def test_update_project_persists_description_and_aliases(tmp_path, monkeypatch):
vault_dir = tmp_path / "vault"
drive_dir = tmp_path / "drive"
config_dir = tmp_path / "config"
staged = vault_dir / "incoming" / "projects" / "p04-gigabit"
staged.mkdir(parents=True)
drive_dir.mkdir()
config_dir.mkdir()
registry_path = config_dir / "project-registry.json"
registry_path.write_text(
json.dumps(
{
"projects": [
{
"id": "p04-gigabit",
"aliases": ["p04", "gigabit"],
"description": "Old description",
"ingest_roots": [
{
"source": "vault",
"subpath": "incoming/projects/p04-gigabit",
"label": "Primary docs",
}
],
}
]
}
),
encoding="utf-8",
)
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
original_settings = config.settings
try:
config.settings = config.Settings()
result = update_project(
"p04",
aliases=["p04", "gigabit", "gigabit-project"],
description="Updated P04 project docs",
)
finally:
config.settings = original_settings
assert result["status"] == "updated"
assert result["project"]["id"] == "p04-gigabit"
assert result["project"]["aliases"] == ["p04", "gigabit", "gigabit-project"]
assert result["project"]["description"] == "Updated P04 project docs"
payload = json.loads(registry_path.read_text(encoding="utf-8"))
assert payload["projects"][0]["aliases"] == ["p04", "gigabit", "gigabit-project"]
assert payload["projects"][0]["description"] == "Updated P04 project docs"
def test_update_project_rejects_colliding_aliases(tmp_path, monkeypatch):
vault_dir = tmp_path / "vault"
drive_dir = tmp_path / "drive"
config_dir = tmp_path / "config"
vault_dir.mkdir()
drive_dir.mkdir()
config_dir.mkdir()
registry_path = config_dir / "project-registry.json"
registry_path.write_text(
json.dumps(
{
"projects": [
{
"id": "p04-gigabit",
"aliases": ["p04", "gigabit"],
"ingest_roots": [
{"source": "vault", "subpath": "incoming/projects/p04-gigabit"}
],
},
{
"id": "p05-interferometer",
"aliases": ["p05", "interferometer"],
"ingest_roots": [
{"source": "vault", "subpath": "incoming/projects/p05-interferometer"}
],
},
]
}
),
encoding="utf-8",
)
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
original_settings = config.settings
try:
config.settings = config.Settings()
try:
update_project(
"p04-gigabit",
aliases=["p04", "interferometer"],
)
except ValueError as exc:
assert "collisions" in str(exc)
else:
raise AssertionError("Expected collision to prevent project update")
finally:
config.settings = original_settings