fix: integrity check runs in container (host lacks deps)

scripts/integrity_check.py now POSTs to /admin/integrity-check
instead of importing atocore directly. The actual scan lives in
the container where DB access + deps are available. Host-side
cron just triggers and logs the result.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-16 22:01:43 -04:00
parent 88f2f7c4e1
commit bb46e21c9b
2 changed files with 106 additions and 110 deletions

View File

@@ -1,17 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Nightly integrity check for AtoCore (Phase 4 Robustness V1). """Trigger the integrity check inside the AtoCore container.
Scans the database for drift conditions that indicate something is The scan itself lives in the container (needs direct DB access via the
silently broken: already-loaded sqlite connection). This host-side wrapper just POSTs to
- Memories referencing a non-existent source chunk /admin/integrity-check so the nightly cron can kick it off from bash
- Active memories with duplicate content within the same project+type without needing the container's Python deps on the host.
- Project-state entries with invalid project_id
- Orphaned source chunks whose parent document was deleted
- Memory count vs vector count parity drift (Chroma vs SQLite)
Findings are written to project state
(atocore/status/integrity_check_result) and surfaced on the dashboard.
Any non-empty finding emits a warning alert via the alerts framework.
Usage: Usage:
python3 scripts/integrity_check.py [--base-url URL] [--dry-run] python3 scripts/integrity_check.py [--base-url URL] [--dry-run]
@@ -23,114 +16,33 @@ import argparse
import json import json
import os import os
import sys import sys
import urllib.parse
# Make src/ importable when run from repo root import urllib.request
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))
def main() -> None: def main() -> None:
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--base-url", default=os.environ.get("ATOCORE_BASE_URL", "http://127.0.0.1:8100")) parser.add_argument("--base-url", default=os.environ.get("ATOCORE_BASE_URL", "http://127.0.0.1:8100"))
parser.add_argument("--dry-run", action="store_true", help="Report without writing findings to state") parser.add_argument("--dry-run", action="store_true",
help="Report without persisting findings to state")
args = parser.parse_args() args = parser.parse_args()
from atocore.models.database import get_connection url = args.base_url.rstrip("/") + "/admin/integrity-check"
from atocore.observability.alerts import emit_alert if args.dry_run:
url += "?persist=false"
findings: dict = { req = urllib.request.Request(url, method="POST")
"orphan_chunk_refs": 0, try:
"duplicate_active": 0, with urllib.request.urlopen(req, timeout=30) as resp:
"orphan_project_state": 0, result = json.loads(resp.read().decode("utf-8"))
"orphan_chunks": 0, except Exception as e:
"memory_count": 0, print(f"ERROR: could not reach {url}: {e}", file=sys.stderr)
"active_memory_count": 0, sys.exit(1)
}
details: list[str] = []
with get_connection() as conn:
# 1) Memories referencing a non-existent source_chunk_id
row = conn.execute(
"SELECT COUNT(*) FROM memories m "
"WHERE m.source_chunk_id IS NOT NULL "
"AND m.source_chunk_id != '' "
"AND NOT EXISTS (SELECT 1 FROM source_chunks c WHERE c.id = m.source_chunk_id)"
).fetchone()
findings["orphan_chunk_refs"] = int(row[0] or 0)
if findings["orphan_chunk_refs"]:
details.append(f"{findings['orphan_chunk_refs']} memory(ies) reference a missing source_chunk_id")
# 2) Duplicate active memories (same content + project + type)
dup_rows = conn.execute(
"SELECT memory_type, project, content, COUNT(*) AS n "
"FROM memories WHERE status = 'active' "
"GROUP BY memory_type, project, content HAVING n > 1"
).fetchall()
findings["duplicate_active"] = sum(int(r[3]) - 1 for r in dup_rows)
if findings["duplicate_active"]:
details.append(f"{findings['duplicate_active']} duplicate active memory row(s) across {len(dup_rows)} group(s)")
# 3) Project-state entries with invalid project_id
row = conn.execute(
"SELECT COUNT(*) FROM project_state ps "
"WHERE NOT EXISTS (SELECT 1 FROM projects p WHERE p.id = ps.project_id)"
).fetchone()
findings["orphan_project_state"] = int(row[0] or 0)
if findings["orphan_project_state"]:
details.append(f"{findings['orphan_project_state']} project_state row(s) reference a missing project")
# 4) Orphaned source chunks
row = conn.execute(
"SELECT COUNT(*) FROM source_chunks c "
"WHERE NOT EXISTS (SELECT 1 FROM source_documents d WHERE d.id = c.document_id)"
).fetchone()
findings["orphan_chunks"] = int(row[0] or 0)
if findings["orphan_chunks"]:
details.append(f"{findings['orphan_chunks']} source chunk(s) have no parent document")
# 5) Memory counts (context for dashboard)
findings["memory_count"] = int(conn.execute("SELECT COUNT(*) FROM memories").fetchone()[0])
findings["active_memory_count"] = int(
conn.execute("SELECT COUNT(*) FROM memories WHERE status = 'active'").fetchone()[0]
)
# Compose result
result = {
"findings": findings,
"details": details,
"ok": not details,
}
print(json.dumps(result, indent=2)) print(json.dumps(result, indent=2))
if not result.get("ok", True):
# Write to project state unless dry-run # Non-zero exit so cron logs flag it
if not args.dry_run: sys.exit(2)
try:
import urllib.request
body = json.dumps({
"project": "atocore",
"category": "status",
"key": "integrity_check_result",
"value": json.dumps(result),
"source": "integrity check",
}).encode("utf-8")
req = urllib.request.Request(
f"{args.base_url}/project/state",
data=body,
method="POST",
headers={"Content-Type": "application/json"},
)
urllib.request.urlopen(req, timeout=10)
except Exception as e:
print(f"WARN: state write failed: {e}", file=sys.stderr)
# Raise an alert if anything drifted
if details:
emit_alert(
severity="warning",
title="Integrity drift detected",
message="; ".join(details),
context={k: v for k, v in findings.items() if not k.endswith("_count")},
)
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -565,6 +565,90 @@ def api_recent_audit(limit: int = 50) -> dict:
return {"entries": entries, "count": len(entries)} return {"entries": entries, "count": len(entries)}
@router.post("/admin/integrity-check")
def api_integrity_check(persist: bool = True) -> dict:
"""Run the integrity scan inside the container (where DB + deps live).
Returns findings and persists them to project state when persist=True.
"""
from atocore.models.database import get_connection
from atocore.context.project_state import set_state
import json as _json
findings = {
"orphan_chunk_refs": 0, "duplicate_active": 0,
"orphan_project_state": 0, "orphan_chunks": 0,
"memory_count": 0, "active_memory_count": 0,
}
details: list[str] = []
with get_connection() as conn:
r = conn.execute(
"SELECT COUNT(*) FROM memories m "
"WHERE m.source_chunk_id IS NOT NULL AND m.source_chunk_id != '' "
"AND NOT EXISTS (SELECT 1 FROM source_chunks c WHERE c.id = m.source_chunk_id)"
).fetchone()
findings["orphan_chunk_refs"] = int(r[0] or 0)
if findings["orphan_chunk_refs"]:
details.append(f"{findings['orphan_chunk_refs']} memory(ies) reference missing source_chunk_id")
dup_rows = conn.execute(
"SELECT memory_type, project, content, COUNT(*) AS n FROM memories "
"WHERE status = 'active' GROUP BY memory_type, project, content HAVING n > 1"
).fetchall()
findings["duplicate_active"] = sum(int(r[3]) - 1 for r in dup_rows)
if findings["duplicate_active"]:
details.append(f"{findings['duplicate_active']} duplicate active row(s) across {len(dup_rows)} group(s)")
r = conn.execute(
"SELECT COUNT(*) FROM project_state ps "
"WHERE NOT EXISTS (SELECT 1 FROM projects p WHERE p.id = ps.project_id)"
).fetchone()
findings["orphan_project_state"] = int(r[0] or 0)
if findings["orphan_project_state"]:
details.append(f"{findings['orphan_project_state']} project_state row(s) reference missing project")
r = conn.execute(
"SELECT COUNT(*) FROM source_chunks c "
"WHERE NOT EXISTS (SELECT 1 FROM source_documents d WHERE d.id = c.document_id)"
).fetchone()
findings["orphan_chunks"] = int(r[0] or 0)
if findings["orphan_chunks"]:
details.append(f"{findings['orphan_chunks']} chunk(s) have no parent document")
findings["memory_count"] = int(conn.execute("SELECT COUNT(*) FROM memories").fetchone()[0])
findings["active_memory_count"] = int(
conn.execute("SELECT COUNT(*) FROM memories WHERE status = 'active'").fetchone()[0]
)
result = {"findings": findings, "details": details, "ok": not details}
if persist:
try:
set_state(
project_name="atocore", category="status",
key="integrity_check_result",
value=_json.dumps(result),
source="integrity check endpoint",
)
except Exception as e:
log.warning("integrity_check_state_write_failed", error=str(e))
if details:
try:
from atocore.observability.alerts import emit_alert
emit_alert(
severity="warning",
title="Integrity drift detected",
message="; ".join(details),
context={k: v for k, v in findings.items() if not k.endswith("_count")},
)
except Exception:
pass
return result
@router.post("/memory/{memory_id}/promote") @router.post("/memory/{memory_id}/promote")
def api_promote_memory(memory_id: str) -> dict: def api_promote_memory(memory_id: str) -> dict:
"""Promote a candidate memory to active (Phase 9 Commit C).""" """Promote a candidate memory to active (Phase 9 Commit C)."""