2026-04-06 10:15:00 -04:00
|
|
|
"""Tests for runtime backup creation."""
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import sqlite3
|
|
|
|
|
from datetime import UTC, datetime
|
|
|
|
|
|
|
|
|
|
import atocore.config as config
|
|
|
|
|
from atocore.models.database import init_db
|
feat: tunable ranking, refresh status, chroma backup + admin endpoints
Three small improvements that move the operational baseline forward
without changing the existing trust model.
1. Tunable retrieval ranking weights
- rank_project_match_boost, rank_query_token_step,
rank_query_token_cap, rank_path_high_signal_boost,
rank_path_low_signal_penalty are now Settings fields
- all overridable via ATOCORE_* env vars
- retriever no longer hard-codes 2.0 / 1.18 / 0.72 / 0.08 / 1.32
- lets ranking be tuned per environment as Wave 1 is exercised
without code changes
2. /projects/{name}/refresh status
- refresh_registered_project now returns an overall status field
("ingested", "partial", "nothing_to_ingest") plus roots_ingested
and roots_skipped counters
- ProjectRefreshResponse advertises the new fields so callers can
rely on them
- covers the case where every configured root is missing on disk
3. Chroma cold snapshot + admin backup endpoints
- create_runtime_backup now accepts include_chroma and writes a
cold directory copy of the chroma persistence path
- new list_runtime_backups() and validate_backup() helpers
- new endpoints:
- POST /admin/backup create snapshot (optional chroma)
- GET /admin/backup list snapshots
- GET /admin/backup/{stamp}/validate structural validation
- chroma snapshots are taken under exclusive_ingestion() so a refresh
or ingest cannot race with the cold copy
- backup metadata records what was actually included and how big
Tests:
- 8 new tests covering tunable weights, refresh status branches
(ingested / partial / nothing_to_ingest), chroma snapshot, list,
validate, and the API endpoints (including the lock-acquisition path)
- existing fake refresh stubs in test_api_storage.py updated for the
expanded ProjectRefreshResponse model
- full suite: 105 passing (was 97)
next-steps doc updated to reflect that the chroma snapshot + restore
validation gap from current-state.md is now closed in code; only the
operational retention policy remains.
2026-04-06 18:42:19 -04:00
|
|
|
from atocore.ops.backup import (
|
|
|
|
|
create_runtime_backup,
|
|
|
|
|
list_runtime_backups,
|
|
|
|
|
validate_backup,
|
|
|
|
|
)
|
2026-04-06 10:15:00 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_create_runtime_backup_copies_db_and_registry(tmp_path, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("ATOCORE_DATA_DIR", str(tmp_path / "data"))
|
|
|
|
|
monkeypatch.setenv("ATOCORE_BACKUP_DIR", str(tmp_path / "backups"))
|
|
|
|
|
monkeypatch.setenv(
|
|
|
|
|
"ATOCORE_PROJECT_REGISTRY_PATH", str(tmp_path / "config" / "project-registry.json")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
registry_path = tmp_path / "config" / "project-registry.json"
|
|
|
|
|
registry_path.parent.mkdir(parents=True)
|
|
|
|
|
registry_path.write_text('{"projects":[{"id":"p01-example","aliases":[],"ingest_roots":[{"source":"vault","subpath":"incoming/projects/p01-example"}]}]}\n', encoding="utf-8")
|
|
|
|
|
|
|
|
|
|
original_settings = config.settings
|
|
|
|
|
try:
|
|
|
|
|
config.settings = config.Settings()
|
|
|
|
|
init_db()
|
|
|
|
|
with sqlite3.connect(str(config.settings.db_path)) as conn:
|
|
|
|
|
conn.execute("INSERT INTO projects (id, name) VALUES (?, ?)", ("p01", "P01 Example"))
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
|
|
result = create_runtime_backup(datetime(2026, 4, 6, 18, 0, 0, tzinfo=UTC))
|
|
|
|
|
finally:
|
|
|
|
|
config.settings = original_settings
|
|
|
|
|
|
|
|
|
|
db_snapshot = tmp_path / "backups" / "snapshots" / "20260406T180000Z" / "db" / "atocore.db"
|
|
|
|
|
registry_snapshot = (
|
|
|
|
|
tmp_path / "backups" / "snapshots" / "20260406T180000Z" / "config" / "project-registry.json"
|
|
|
|
|
)
|
|
|
|
|
metadata_path = (
|
|
|
|
|
tmp_path / "backups" / "snapshots" / "20260406T180000Z" / "backup-metadata.json"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
assert result["db_snapshot_path"] == str(db_snapshot)
|
|
|
|
|
assert db_snapshot.exists()
|
|
|
|
|
assert registry_snapshot.exists()
|
|
|
|
|
assert metadata_path.exists()
|
|
|
|
|
|
|
|
|
|
with sqlite3.connect(str(db_snapshot)) as conn:
|
|
|
|
|
row = conn.execute("SELECT name FROM projects WHERE id = ?", ("p01",)).fetchone()
|
|
|
|
|
assert row[0] == "P01 Example"
|
|
|
|
|
|
|
|
|
|
metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
|
|
|
|
|
assert metadata["registry_snapshot_path"] == str(registry_snapshot)
|
|
|
|
|
|
|
|
|
|
|
feat: tunable ranking, refresh status, chroma backup + admin endpoints
Three small improvements that move the operational baseline forward
without changing the existing trust model.
1. Tunable retrieval ranking weights
- rank_project_match_boost, rank_query_token_step,
rank_query_token_cap, rank_path_high_signal_boost,
rank_path_low_signal_penalty are now Settings fields
- all overridable via ATOCORE_* env vars
- retriever no longer hard-codes 2.0 / 1.18 / 0.72 / 0.08 / 1.32
- lets ranking be tuned per environment as Wave 1 is exercised
without code changes
2. /projects/{name}/refresh status
- refresh_registered_project now returns an overall status field
("ingested", "partial", "nothing_to_ingest") plus roots_ingested
and roots_skipped counters
- ProjectRefreshResponse advertises the new fields so callers can
rely on them
- covers the case where every configured root is missing on disk
3. Chroma cold snapshot + admin backup endpoints
- create_runtime_backup now accepts include_chroma and writes a
cold directory copy of the chroma persistence path
- new list_runtime_backups() and validate_backup() helpers
- new endpoints:
- POST /admin/backup create snapshot (optional chroma)
- GET /admin/backup list snapshots
- GET /admin/backup/{stamp}/validate structural validation
- chroma snapshots are taken under exclusive_ingestion() so a refresh
or ingest cannot race with the cold copy
- backup metadata records what was actually included and how big
Tests:
- 8 new tests covering tunable weights, refresh status branches
(ingested / partial / nothing_to_ingest), chroma snapshot, list,
validate, and the API endpoints (including the lock-acquisition path)
- existing fake refresh stubs in test_api_storage.py updated for the
expanded ProjectRefreshResponse model
- full suite: 105 passing (was 97)
next-steps doc updated to reflect that the chroma snapshot + restore
validation gap from current-state.md is now closed in code; only the
operational retention policy remains.
2026-04-06 18:42:19 -04:00
|
|
|
def test_create_runtime_backup_includes_chroma_when_requested(tmp_path, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("ATOCORE_DATA_DIR", str(tmp_path / "data"))
|
|
|
|
|
monkeypatch.setenv("ATOCORE_BACKUP_DIR", str(tmp_path / "backups"))
|
|
|
|
|
monkeypatch.setenv(
|
|
|
|
|
"ATOCORE_PROJECT_REGISTRY_PATH", str(tmp_path / "config" / "project-registry.json")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
original_settings = config.settings
|
|
|
|
|
try:
|
|
|
|
|
config.settings = config.Settings()
|
|
|
|
|
init_db()
|
|
|
|
|
|
|
|
|
|
# Create a fake chroma directory tree with a couple of files.
|
|
|
|
|
chroma_dir = config.settings.chroma_path
|
|
|
|
|
(chroma_dir / "collection-a").mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(chroma_dir / "collection-a" / "data.bin").write_bytes(b"\x00\x01\x02\x03")
|
|
|
|
|
(chroma_dir / "metadata.json").write_text('{"ok":true}', encoding="utf-8")
|
|
|
|
|
|
|
|
|
|
result = create_runtime_backup(
|
|
|
|
|
datetime(2026, 4, 6, 20, 0, 0, tzinfo=UTC),
|
|
|
|
|
include_chroma=True,
|
|
|
|
|
)
|
|
|
|
|
finally:
|
|
|
|
|
config.settings = original_settings
|
|
|
|
|
|
|
|
|
|
chroma_snapshot_root = (
|
|
|
|
|
tmp_path / "backups" / "snapshots" / "20260406T200000Z" / "chroma"
|
|
|
|
|
)
|
|
|
|
|
assert result["chroma_snapshot_included"] is True
|
|
|
|
|
assert result["chroma_snapshot_path"] == str(chroma_snapshot_root)
|
|
|
|
|
assert result["chroma_snapshot_files"] >= 2
|
|
|
|
|
assert result["chroma_snapshot_bytes"] > 0
|
|
|
|
|
assert (chroma_snapshot_root / "collection-a" / "data.bin").exists()
|
|
|
|
|
assert (chroma_snapshot_root / "metadata.json").exists()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_list_and_validate_runtime_backups(tmp_path, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("ATOCORE_DATA_DIR", str(tmp_path / "data"))
|
|
|
|
|
monkeypatch.setenv("ATOCORE_BACKUP_DIR", str(tmp_path / "backups"))
|
|
|
|
|
monkeypatch.setenv(
|
|
|
|
|
"ATOCORE_PROJECT_REGISTRY_PATH", str(tmp_path / "config" / "project-registry.json")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
original_settings = config.settings
|
|
|
|
|
try:
|
|
|
|
|
config.settings = config.Settings()
|
|
|
|
|
init_db()
|
|
|
|
|
first = create_runtime_backup(datetime(2026, 4, 6, 21, 0, 0, tzinfo=UTC))
|
|
|
|
|
second = create_runtime_backup(datetime(2026, 4, 6, 22, 0, 0, tzinfo=UTC))
|
|
|
|
|
|
|
|
|
|
listing = list_runtime_backups()
|
|
|
|
|
first_validation = validate_backup("20260406T210000Z")
|
|
|
|
|
second_validation = validate_backup("20260406T220000Z")
|
|
|
|
|
missing_validation = validate_backup("20260101T000000Z")
|
|
|
|
|
finally:
|
|
|
|
|
config.settings = original_settings
|
|
|
|
|
|
|
|
|
|
assert len(listing) == 2
|
|
|
|
|
assert {entry["stamp"] for entry in listing} == {
|
|
|
|
|
"20260406T210000Z",
|
|
|
|
|
"20260406T220000Z",
|
|
|
|
|
}
|
|
|
|
|
for entry in listing:
|
|
|
|
|
assert entry["has_metadata"] is True
|
|
|
|
|
assert entry["metadata"]["db_snapshot_path"]
|
|
|
|
|
|
|
|
|
|
assert first_validation["valid"] is True
|
|
|
|
|
assert first_validation["db_ok"] is True
|
|
|
|
|
assert first_validation["errors"] == []
|
|
|
|
|
|
|
|
|
|
assert second_validation["valid"] is True
|
|
|
|
|
|
|
|
|
|
assert missing_validation["exists"] is False
|
|
|
|
|
assert "snapshot_directory_missing" in missing_validation["errors"]
|
|
|
|
|
|
|
|
|
|
# both metadata paths are reachable on disk
|
|
|
|
|
assert json.loads(
|
|
|
|
|
(tmp_path / "backups" / "snapshots" / "20260406T210000Z" / "backup-metadata.json")
|
|
|
|
|
.read_text(encoding="utf-8")
|
|
|
|
|
)["db_snapshot_path"] == first["db_snapshot_path"]
|
|
|
|
|
assert second["db_snapshot_path"].endswith("atocore.db")
|
|
|
|
|
|
|
|
|
|
|
2026-04-06 10:15:00 -04:00
|
|
|
def test_create_runtime_backup_handles_missing_registry(tmp_path, monkeypatch):
|
|
|
|
|
monkeypatch.setenv("ATOCORE_DATA_DIR", str(tmp_path / "data"))
|
|
|
|
|
monkeypatch.setenv("ATOCORE_BACKUP_DIR", str(tmp_path / "backups"))
|
|
|
|
|
monkeypatch.setenv(
|
|
|
|
|
"ATOCORE_PROJECT_REGISTRY_PATH", str(tmp_path / "config" / "project-registry.json")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
original_settings = config.settings
|
|
|
|
|
try:
|
|
|
|
|
config.settings = config.Settings()
|
|
|
|
|
init_db()
|
|
|
|
|
result = create_runtime_backup(datetime(2026, 4, 6, 19, 0, 0, tzinfo=UTC))
|
|
|
|
|
finally:
|
|
|
|
|
config.settings = original_settings
|
|
|
|
|
|
|
|
|
|
assert result["registry_snapshot_path"] == ""
|