feat: post-backup validation + retention cleanup (Tasks B & C)
- create_runtime_backup() now auto-validates its output and includes validated/validation_errors fields in returned metadata - New cleanup_old_backups() with retention policy: 7 daily, 4 weekly (Sundays), 6 monthly (1st of month), dry-run by default - CLI `cleanup` subcommand added to backup module - 9 new tests (2 validation + 7 retention), 259 total passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -103,12 +103,27 @@ def create_runtime_backup(
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Automatic post-backup validation. Failures log a warning but do
|
||||
# not raise — the backup files are still on disk and may be useful.
|
||||
validation = validate_backup(stamp)
|
||||
validated = validation.get("valid", False)
|
||||
validation_errors = validation.get("errors", [])
|
||||
if not validated:
|
||||
log.warning(
|
||||
"post_backup_validation_failed",
|
||||
backup_root=str(backup_root),
|
||||
errors=validation_errors,
|
||||
)
|
||||
metadata["validated"] = validated
|
||||
metadata["validation_errors"] = validation_errors
|
||||
|
||||
log.info(
|
||||
"runtime_backup_created",
|
||||
backup_root=str(backup_root),
|
||||
db_snapshot=str(db_snapshot_path),
|
||||
chroma_included=include_chroma,
|
||||
chroma_bytes=chroma_bytes_copied,
|
||||
validated=validated,
|
||||
)
|
||||
return metadata
|
||||
|
||||
@@ -389,6 +404,113 @@ def restore_runtime_backup(
|
||||
return result
|
||||
|
||||
|
||||
def cleanup_old_backups(*, confirm: bool = False) -> dict:
|
||||
"""Apply retention policy and remove old snapshots.
|
||||
|
||||
Retention keeps:
|
||||
- Last 7 daily snapshots (most recent per calendar day)
|
||||
- Last 4 weekly snapshots (most recent on each Sunday)
|
||||
- Last 6 monthly snapshots (most recent on the 1st of each month)
|
||||
|
||||
All other snapshots are candidates for deletion. Runs as dry-run by
|
||||
default; pass ``confirm=True`` to actually delete.
|
||||
|
||||
Returns a dict with kept/deleted counts and any errors.
|
||||
"""
|
||||
snapshots_root = _config.settings.resolved_backup_dir / "snapshots"
|
||||
if not snapshots_root.exists() or not snapshots_root.is_dir():
|
||||
return {"kept": 0, "deleted": 0, "would_delete": 0, "dry_run": not confirm, "errors": []}
|
||||
|
||||
# Parse all stamp directories into (datetime, dir_path) pairs.
|
||||
stamps: list[tuple[datetime, Path]] = []
|
||||
unparseable: list[str] = []
|
||||
for entry in sorted(snapshots_root.iterdir()):
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
try:
|
||||
dt = datetime.strptime(entry.name, "%Y%m%dT%H%M%SZ").replace(tzinfo=UTC)
|
||||
stamps.append((dt, entry))
|
||||
except ValueError:
|
||||
unparseable.append(entry.name)
|
||||
|
||||
if not stamps:
|
||||
return {
|
||||
"kept": 0, "deleted": 0, "would_delete": 0,
|
||||
"dry_run": not confirm, "errors": [],
|
||||
"unparseable": unparseable,
|
||||
}
|
||||
|
||||
# Sort newest first so "most recent per bucket" is a simple first-seen.
|
||||
stamps.sort(key=lambda t: t[0], reverse=True)
|
||||
|
||||
keep_set: set[Path] = set()
|
||||
|
||||
# Last 7 daily: most recent snapshot per calendar day.
|
||||
seen_days: set[str] = set()
|
||||
for dt, path in stamps:
|
||||
day_key = dt.strftime("%Y-%m-%d")
|
||||
if day_key not in seen_days:
|
||||
seen_days.add(day_key)
|
||||
keep_set.add(path)
|
||||
if len(seen_days) >= 7:
|
||||
break
|
||||
|
||||
# Last 4 weekly: most recent snapshot that falls on a Sunday.
|
||||
seen_weeks: set[str] = set()
|
||||
for dt, path in stamps:
|
||||
if dt.weekday() == 6: # Sunday
|
||||
week_key = dt.strftime("%Y-W%W")
|
||||
if week_key not in seen_weeks:
|
||||
seen_weeks.add(week_key)
|
||||
keep_set.add(path)
|
||||
if len(seen_weeks) >= 4:
|
||||
break
|
||||
|
||||
# Last 6 monthly: most recent snapshot on the 1st of a month.
|
||||
seen_months: set[str] = set()
|
||||
for dt, path in stamps:
|
||||
if dt.day == 1:
|
||||
month_key = dt.strftime("%Y-%m")
|
||||
if month_key not in seen_months:
|
||||
seen_months.add(month_key)
|
||||
keep_set.add(path)
|
||||
if len(seen_months) >= 6:
|
||||
break
|
||||
|
||||
to_delete = [path for _, path in stamps if path not in keep_set]
|
||||
|
||||
errors: list[str] = []
|
||||
deleted_count = 0
|
||||
if confirm:
|
||||
for path in to_delete:
|
||||
try:
|
||||
shutil.rmtree(path)
|
||||
deleted_count += 1
|
||||
except OSError as exc:
|
||||
errors.append(f"{path.name}: {exc}")
|
||||
|
||||
result: dict = {
|
||||
"kept": len(keep_set),
|
||||
"dry_run": not confirm,
|
||||
"errors": errors,
|
||||
}
|
||||
if confirm:
|
||||
result["deleted"] = deleted_count
|
||||
else:
|
||||
result["would_delete"] = len(to_delete)
|
||||
if unparseable:
|
||||
result["unparseable"] = unparseable
|
||||
|
||||
log.info(
|
||||
"cleanup_old_backups",
|
||||
kept=len(keep_set),
|
||||
deleted=deleted_count if confirm else 0,
|
||||
would_delete=len(to_delete) if not confirm else 0,
|
||||
dry_run=not confirm,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _backup_sqlite_db(source_path: Path, dest_path: Path) -> None:
|
||||
source_conn = sqlite3.connect(str(source_path))
|
||||
dest_conn = sqlite3.connect(str(dest_path))
|
||||
@@ -448,6 +570,13 @@ def main() -> None:
|
||||
p_validate = sub.add_parser("validate", help="validate a snapshot by stamp")
|
||||
p_validate.add_argument("stamp", help="snapshot stamp (e.g. 20260409T010203Z)")
|
||||
|
||||
p_cleanup = sub.add_parser("cleanup", help="remove old snapshots per retention policy")
|
||||
p_cleanup.add_argument(
|
||||
"--confirm",
|
||||
action="store_true",
|
||||
help="actually delete (default is dry-run)",
|
||||
)
|
||||
|
||||
p_restore = sub.add_parser(
|
||||
"restore",
|
||||
help="restore a snapshot by stamp (service must be stopped)",
|
||||
@@ -488,6 +617,8 @@ def main() -> None:
|
||||
result = {"backups": list_runtime_backups()}
|
||||
elif command == "validate":
|
||||
result = validate_backup(args.stamp)
|
||||
elif command == "cleanup":
|
||||
result = cleanup_old_backups(confirm=getattr(args, "confirm", False))
|
||||
elif command == "restore":
|
||||
result = restore_runtime_backup(
|
||||
args.stamp,
|
||||
|
||||
Reference in New Issue
Block a user