feat: tunable ranking, refresh status, chroma backup + admin endpoints
Three small improvements that move the operational baseline forward
without changing the existing trust model.
1. Tunable retrieval ranking weights
- rank_project_match_boost, rank_query_token_step,
rank_query_token_cap, rank_path_high_signal_boost,
rank_path_low_signal_penalty are now Settings fields
- all overridable via ATOCORE_* env vars
- retriever no longer hard-codes 2.0 / 1.18 / 0.72 / 0.08 / 1.32
- lets ranking be tuned per environment as Wave 1 is exercised
without code changes
2. /projects/{name}/refresh status
- refresh_registered_project now returns an overall status field
("ingested", "partial", "nothing_to_ingest") plus roots_ingested
and roots_skipped counters
- ProjectRefreshResponse advertises the new fields so callers can
rely on them
- covers the case where every configured root is missing on disk
3. Chroma cold snapshot + admin backup endpoints
- create_runtime_backup now accepts include_chroma and writes a
cold directory copy of the chroma persistence path
- new list_runtime_backups() and validate_backup() helpers
- new endpoints:
- POST /admin/backup create snapshot (optional chroma)
- GET /admin/backup list snapshots
- GET /admin/backup/{stamp}/validate structural validation
- chroma snapshots are taken under exclusive_ingestion() so a refresh
or ingest cannot race with the cold copy
- backup metadata records what was actually included and how big
Tests:
- 8 new tests covering tunable weights, refresh status branches
(ingested / partial / nothing_to_ingest), chroma snapshot, list,
validate, and the API endpoints (including the lock-acquisition path)
- existing fake refresh stubs in test_api_storage.py updated for the
expanded ProjectRefreshResponse model
- full suite: 105 passing (was 97)
next-steps doc updated to reflect that the chroma snapshot + restore
validation gap from current-state.md is now closed in code; only the
operational retention policy remains.
This commit is contained in:
@@ -154,6 +154,110 @@ def test_refresh_registered_project_ingests_registered_roots(tmp_path, monkeypat
|
||||
assert calls[0][0].endswith("p06-polisher")
|
||||
assert calls[0][1] is False
|
||||
assert result["roots"][0]["status"] == "ingested"
|
||||
assert result["status"] == "ingested"
|
||||
assert result["roots_ingested"] == 1
|
||||
assert result["roots_skipped"] == 0
|
||||
|
||||
|
||||
def test_refresh_registered_project_reports_nothing_to_ingest_when_all_missing(
|
||||
tmp_path, monkeypatch
|
||||
):
|
||||
vault_dir = tmp_path / "vault"
|
||||
drive_dir = tmp_path / "drive"
|
||||
config_dir = tmp_path / "config"
|
||||
vault_dir.mkdir()
|
||||
drive_dir.mkdir()
|
||||
config_dir.mkdir()
|
||||
|
||||
registry_path = config_dir / "project-registry.json"
|
||||
registry_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"projects": [
|
||||
{
|
||||
"id": "p07-ghost",
|
||||
"aliases": ["ghost"],
|
||||
"description": "Project whose roots do not exist on disk",
|
||||
"ingest_roots": [
|
||||
{"source": "vault", "subpath": "incoming/projects/p07-ghost"}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def fail_ingest_folder(path, purge_deleted=True):
|
||||
raise AssertionError(f"ingest_folder should not be called for missing root: {path}")
|
||||
|
||||
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
|
||||
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
|
||||
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
|
||||
|
||||
original_settings = config.settings
|
||||
try:
|
||||
config.settings = config.Settings()
|
||||
monkeypatch.setattr("atocore.projects.registry.ingest_folder", fail_ingest_folder)
|
||||
result = refresh_registered_project("ghost")
|
||||
finally:
|
||||
config.settings = original_settings
|
||||
|
||||
assert result["status"] == "nothing_to_ingest"
|
||||
assert result["roots_ingested"] == 0
|
||||
assert result["roots_skipped"] == 1
|
||||
assert result["roots"][0]["status"] == "missing"
|
||||
|
||||
|
||||
def test_refresh_registered_project_reports_partial_status(tmp_path, monkeypatch):
|
||||
vault_dir = tmp_path / "vault"
|
||||
drive_dir = tmp_path / "drive"
|
||||
config_dir = tmp_path / "config"
|
||||
real_root = vault_dir / "incoming" / "projects" / "p08-mixed"
|
||||
real_root.mkdir(parents=True)
|
||||
drive_dir.mkdir()
|
||||
config_dir.mkdir()
|
||||
|
||||
registry_path = config_dir / "project-registry.json"
|
||||
registry_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"projects": [
|
||||
{
|
||||
"id": "p08-mixed",
|
||||
"aliases": ["mixed"],
|
||||
"description": "One root present, one missing",
|
||||
"ingest_roots": [
|
||||
{"source": "vault", "subpath": "incoming/projects/p08-mixed"},
|
||||
{"source": "vault", "subpath": "incoming/projects/p08-mixed-missing"},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
def fake_ingest_folder(path, purge_deleted=True):
|
||||
return [{"file": str(path / "README.md"), "status": "ingested"}]
|
||||
|
||||
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
|
||||
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
|
||||
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
|
||||
|
||||
original_settings = config.settings
|
||||
try:
|
||||
config.settings = config.Settings()
|
||||
monkeypatch.setattr("atocore.projects.registry.ingest_folder", fake_ingest_folder)
|
||||
result = refresh_registered_project("mixed")
|
||||
finally:
|
||||
config.settings = original_settings
|
||||
|
||||
assert result["status"] == "partial"
|
||||
assert result["roots_ingested"] == 1
|
||||
assert result["roots_skipped"] == 1
|
||||
statuses = sorted(root["status"] for root in result["roots"])
|
||||
assert statuses == ["ingested", "missing"]
|
||||
|
||||
|
||||
def test_project_registry_template_has_expected_shape():
|
||||
|
||||
Reference in New Issue
Block a user