fix: integrity check runs in container (host lacks deps)

scripts/integrity_check.py now POSTs to /admin/integrity-check
instead of importing atocore directly. The actual scan lives in
the container where DB access + deps are available. Host-side
cron just triggers and logs the result.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-16 22:01:43 -04:00
parent 88f2f7c4e1
commit bb46e21c9b
2 changed files with 106 additions and 110 deletions

View File

@@ -565,6 +565,90 @@ def api_recent_audit(limit: int = 50) -> dict:
return {"entries": entries, "count": len(entries)}
@router.post("/admin/integrity-check")
def api_integrity_check(persist: bool = True) -> dict:
"""Run the integrity scan inside the container (where DB + deps live).
Returns findings and persists them to project state when persist=True.
"""
from atocore.models.database import get_connection
from atocore.context.project_state import set_state
import json as _json
findings = {
"orphan_chunk_refs": 0, "duplicate_active": 0,
"orphan_project_state": 0, "orphan_chunks": 0,
"memory_count": 0, "active_memory_count": 0,
}
details: list[str] = []
with get_connection() as conn:
r = conn.execute(
"SELECT COUNT(*) FROM memories m "
"WHERE m.source_chunk_id IS NOT NULL AND m.source_chunk_id != '' "
"AND NOT EXISTS (SELECT 1 FROM source_chunks c WHERE c.id = m.source_chunk_id)"
).fetchone()
findings["orphan_chunk_refs"] = int(r[0] or 0)
if findings["orphan_chunk_refs"]:
details.append(f"{findings['orphan_chunk_refs']} memory(ies) reference missing source_chunk_id")
dup_rows = conn.execute(
"SELECT memory_type, project, content, COUNT(*) AS n FROM memories "
"WHERE status = 'active' GROUP BY memory_type, project, content HAVING n > 1"
).fetchall()
findings["duplicate_active"] = sum(int(r[3]) - 1 for r in dup_rows)
if findings["duplicate_active"]:
details.append(f"{findings['duplicate_active']} duplicate active row(s) across {len(dup_rows)} group(s)")
r = conn.execute(
"SELECT COUNT(*) FROM project_state ps "
"WHERE NOT EXISTS (SELECT 1 FROM projects p WHERE p.id = ps.project_id)"
).fetchone()
findings["orphan_project_state"] = int(r[0] or 0)
if findings["orphan_project_state"]:
details.append(f"{findings['orphan_project_state']} project_state row(s) reference missing project")
r = conn.execute(
"SELECT COUNT(*) FROM source_chunks c "
"WHERE NOT EXISTS (SELECT 1 FROM source_documents d WHERE d.id = c.document_id)"
).fetchone()
findings["orphan_chunks"] = int(r[0] or 0)
if findings["orphan_chunks"]:
details.append(f"{findings['orphan_chunks']} chunk(s) have no parent document")
findings["memory_count"] = int(conn.execute("SELECT COUNT(*) FROM memories").fetchone()[0])
findings["active_memory_count"] = int(
conn.execute("SELECT COUNT(*) FROM memories WHERE status = 'active'").fetchone()[0]
)
result = {"findings": findings, "details": details, "ok": not details}
if persist:
try:
set_state(
project_name="atocore", category="status",
key="integrity_check_result",
value=_json.dumps(result),
source="integrity check endpoint",
)
except Exception as e:
log.warning("integrity_check_state_write_failed", error=str(e))
if details:
try:
from atocore.observability.alerts import emit_alert
emit_alert(
severity="warning",
title="Integrity drift detected",
message="; ".join(details),
context={k: v for k, v in findings.items() if not k.endswith("_count")},
)
except Exception:
pass
return result
@router.post("/memory/{memory_id}/promote")
def api_promote_memory(memory_id: str) -> dict:
"""Promote a candidate memory to active (Phase 9 Commit C)."""