scripts/integrity_check.py

#!/usr/bin/env python3
"""Nightly integrity check for AtoCore (Phase 4 Robustness V1).

Scans the database for drift conditions that indicate something is
silently broken:
  - Memories referencing a non-existent source chunk
  - Active memories with duplicate content within the same project+type
  - Project-state entries with invalid project_id
  - Orphaned source chunks whose parent document was deleted
  - Memory count vs vector count parity drift (Chroma vs SQLite)

Findings are written to project state
(atocore/status/integrity_check_result) and surfaced on the dashboard.
Any non-empty finding emits a warning alert via the alerts framework.

Usage:
  python3 scripts/integrity_check.py [--base-url URL] [--dry-run]
"""

from __future__ import annotations

import argparse
import json
import os
import sys

# Make src/ importable when run from repo root
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--base-url", default=os.environ.get("ATOCORE_BASE_URL", "http://127.0.0.1:8100"))
    parser.add_argument("--dry-run", action="store_true", help="Report without writing findings to state")
    args = parser.parse_args()

    from atocore.models.database import get_connection
    from atocore.observability.alerts import emit_alert

    findings: dict = {
        "orphan_chunk_refs": 0,
        "duplicate_active": 0,
        "orphan_project_state": 0,
        "orphan_chunks": 0,
        "memory_count": 0,
        "active_memory_count": 0,
    }
    details: list[str] = []

    with get_connection() as conn:
        # 1) Memories referencing a non-existent source_chunk_id
        row = conn.execute(
            "SELECT COUNT(*) FROM memories m "
            "WHERE m.source_chunk_id IS NOT NULL "
            "AND m.source_chunk_id != '' "
            "AND NOT EXISTS (SELECT 1 FROM source_chunks c WHERE c.id = m.source_chunk_id)"
        ).fetchone()
        findings["orphan_chunk_refs"] = int(row[0] or 0)
        if findings["orphan_chunk_refs"]:
            details.append(f"{findings['orphan_chunk_refs']} memory(ies) reference a missing source_chunk_id")

        # 2) Duplicate active memories (same content + project + type)
        dup_rows = conn.execute(
            "SELECT memory_type, project, content, COUNT(*) AS n "
            "FROM memories WHERE status = 'active' "
            "GROUP BY memory_type, project, content HAVING n > 1"
        ).fetchall()
        findings["duplicate_active"] = sum(int(r[3]) - 1 for r in dup_rows)
        if findings["duplicate_active"]:
            details.append(f"{findings['duplicate_active']} duplicate active memory row(s) across {len(dup_rows)} group(s)")

        # 3) Project-state entries with invalid project_id
        row = conn.execute(
            "SELECT COUNT(*) FROM project_state ps "
            "WHERE NOT EXISTS (SELECT 1 FROM projects p WHERE p.id = ps.project_id)"
        ).fetchone()
        findings["orphan_project_state"] = int(row[0] or 0)
        if findings["orphan_project_state"]:
            details.append(f"{findings['orphan_project_state']} project_state row(s) reference a missing project")

        # 4) Orphaned source chunks
        row = conn.execute(
            "SELECT COUNT(*) FROM source_chunks c "
            "WHERE NOT EXISTS (SELECT 1 FROM source_documents d WHERE d.id = c.document_id)"
        ).fetchone()
        findings["orphan_chunks"] = int(row[0] or 0)
        if findings["orphan_chunks"]:
            details.append(f"{findings['orphan_chunks']} source chunk(s) have no parent document")

        # 5) Memory counts (context for dashboard)
        findings["memory_count"] = int(conn.execute("SELECT COUNT(*) FROM memories").fetchone()[0])
        findings["active_memory_count"] = int(
            conn.execute("SELECT COUNT(*) FROM memories WHERE status = 'active'").fetchone()[0]
        )

    # Compose result
    result = {
        "findings": findings,
        "details": details,
        "ok": not details,
    }

    print(json.dumps(result, indent=2))

    # Write to project state unless dry-run
    if not args.dry_run:
        try:
            import urllib.request
            body = json.dumps({
                "project": "atocore",
                "category": "status",
                "key": "integrity_check_result",
                "value": json.dumps(result),
                "source": "integrity check",
            }).encode("utf-8")
            req = urllib.request.Request(
                f"{args.base_url}/project/state",
                data=body,
                method="POST",
                headers={"Content-Type": "application/json"},
            )
            urllib.request.urlopen(req, timeout=10)
        except Exception as e:
            print(f"WARN: state write failed: {e}", file=sys.stderr)

    # Raise an alert if anything drifted
    if details:
        emit_alert(
            severity="warning",
            title="Integrity drift detected",
            message="; ".join(details),
            context={k: v for k, v in findings.items() if not k.endswith("_count")},
        )


if __name__ == "__main__":
    main()
feat: Phase 4 V1 — Robustness Hardening Adds the observability + safety layer that turns AtoCore from "works until something silently breaks" into "every mutation is traceable, drift is detected, failures raise alerts." 1. Audit log (memory_audit table): - New table with id, memory_id, action, actor, before/after JSON, note, timestamp; 3 indexes for memory_id/timestamp/action - _audit_memory() helper called from every mutation: create_memory, update_memory, promote_memory, reject_candidate_memory, invalidate_memory, supersede_memory, reinforce_memory, auto_promote_reinforced, expire_stale_candidates - Action verb auto-selected: promoted/rejected/invalidated/ superseded/updated based on state transition - "actor" threaded through: api-http, human-triage, phase10-auto- promote, candidate-expiry, reinforcement, etc. - Fail-open: audit write failure logs but never breaks the mutation - GET /memory/{id}/audit: full history for one memory - GET /admin/audit/recent: last 50 mutations across the system 2. Alerts framework (src/atocore/observability/alerts.py): - emit_alert(severity, title, message, context) fans out to: - structlog logger (always) - ~/atocore-logs/alerts.log append (configurable via ATOCORE_ALERT_LOG) - project_state atocore/alert/last_{severity} (dashboard surface) - ATOCORE_ALERT_WEBHOOK POST if set (auto-detects Discord webhook format for nice embeds; generic JSON otherwise) - Every sink fail-open — one failure doesn't prevent the others - Pipeline alert step in nightly cron: harness < 85% → warning; candidate queue > 200 → warning 3. Integrity checks (scripts/integrity_check.py): - Nightly scan for drift: - Memories → missing source_chunk_id references - Duplicate active memories (same type+content+project) - project_state → missing projects - Orphaned source_chunks (no parent document) - Results persisted to atocore/status/integrity_check_result - Any finding emits a warning alert - Added as Step G in deploy/dalidou/batch-extract.sh nightly cron 4. Dashboard surfaces it all: - integrity (findings + details) - alerts (last info/warning/critical per severity) - recent_audit (last 10 mutations with actor + action + preview) Tests: 308 → 317 (9 new): - test_audit_create_logs_entry - test_audit_promote_logs_entry - test_audit_reject_logs_entry - test_audit_update_captures_before_after - test_audit_reinforce_logs_entry - test_recent_audit_returns_cross_memory_entries - test_emit_alert_writes_log_file - test_emit_alert_invalid_severity_falls_back_to_info - test_emit_alert_fails_open_on_log_write_error Deferred: formal migration framework with rollback (current additive pattern is fine for V1); memory detail wiki page with audit view (quick follow-up). To enable Discord alerts: set ATOCORE_ALERT_WEBHOOK to a Discord webhook URL in Dalidou's environment. Default = log-only. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-16 21:54:10 -04:00			`#!/usr/bin/env python3`
			`"""Nightly integrity check for AtoCore (Phase 4 Robustness V1).`

			`Scans the database for drift conditions that indicate something is`
			`silently broken:`
			`- Memories referencing a non-existent source chunk`
			`- Active memories with duplicate content within the same project+type`
			`- Project-state entries with invalid project_id`
			`- Orphaned source chunks whose parent document was deleted`
			`- Memory count vs vector count parity drift (Chroma vs SQLite)`

			`Findings are written to project state`
			`(atocore/status/integrity_check_result) and surfaced on the dashboard.`
			`Any non-empty finding emits a warning alert via the alerts framework.`

			`Usage:`
			`python3 scripts/integrity_check.py [--base-url URL] [--dry-run]`
			`"""`

			`from __future__ import annotations`

			`import argparse`
			`import json`
			`import os`
			`import sys`

			`# Make src/ importable when run from repo root`
			`sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src"))`


			`def main() -> None:`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument("--base-url", default=os.environ.get("ATOCORE_BASE_URL", "http://127.0.0.1:8100"))`
			`parser.add_argument("--dry-run", action="store_true", help="Report without writing findings to state")`
			`args = parser.parse_args()`

			`from atocore.models.database import get_connection`
			`from atocore.observability.alerts import emit_alert`

			`findings: dict = {`
			`"orphan_chunk_refs": 0,`
			`"duplicate_active": 0,`
			`"orphan_project_state": 0,`
			`"orphan_chunks": 0,`
			`"memory_count": 0,`
			`"active_memory_count": 0,`
			`}`
			`details: list[str] = []`

			`with get_connection() as conn:`
			`# 1) Memories referencing a non-existent source_chunk_id`
			`row = conn.execute(`
			`"SELECT COUNT(*) FROM memories m "`
			`"WHERE m.source_chunk_id IS NOT NULL "`
			`"AND m.source_chunk_id != '' "`
			`"AND NOT EXISTS (SELECT 1 FROM source_chunks c WHERE c.id = m.source_chunk_id)"`
			`).fetchone()`
			`findings["orphan_chunk_refs"] = int(row[0] or 0)`
			`if findings["orphan_chunk_refs"]:`
			`details.append(f"{findings['orphan_chunk_refs']} memory(ies) reference a missing source_chunk_id")`

			`# 2) Duplicate active memories (same content + project + type)`
			`dup_rows = conn.execute(`
			`"SELECT memory_type, project, content, COUNT(*) AS n "`
			`"FROM memories WHERE status = 'active' "`
			`"GROUP BY memory_type, project, content HAVING n > 1"`
			`).fetchall()`
			`findings["duplicate_active"] = sum(int(r[3]) - 1 for r in dup_rows)`
			`if findings["duplicate_active"]:`
			`details.append(f"{findings['duplicate_active']} duplicate active memory row(s) across {len(dup_rows)} group(s)")`

			`# 3) Project-state entries with invalid project_id`
			`row = conn.execute(`
			`"SELECT COUNT(*) FROM project_state ps "`
			`"WHERE NOT EXISTS (SELECT 1 FROM projects p WHERE p.id = ps.project_id)"`
			`).fetchone()`
			`findings["orphan_project_state"] = int(row[0] or 0)`
			`if findings["orphan_project_state"]:`
			`details.append(f"{findings['orphan_project_state']} project_state row(s) reference a missing project")`

			`# 4) Orphaned source chunks`
			`row = conn.execute(`
			`"SELECT COUNT(*) FROM source_chunks c "`
			`"WHERE NOT EXISTS (SELECT 1 FROM source_documents d WHERE d.id = c.document_id)"`
			`).fetchone()`
			`findings["orphan_chunks"] = int(row[0] or 0)`
			`if findings["orphan_chunks"]:`
			`details.append(f"{findings['orphan_chunks']} source chunk(s) have no parent document")`

			`# 5) Memory counts (context for dashboard)`
			`findings["memory_count"] = int(conn.execute("SELECT COUNT(*) FROM memories").fetchone()[0])`
			`findings["active_memory_count"] = int(`
			`conn.execute("SELECT COUNT(*) FROM memories WHERE status = 'active'").fetchone()[0]`
			`)`

			`# Compose result`
			`result = {`
			`"findings": findings,`
			`"details": details,`
			`"ok": not details,`
			`}`

			`print(json.dumps(result, indent=2))`

			`# Write to project state unless dry-run`
			`if not args.dry_run:`
			`try:`
			`import urllib.request`
			`body = json.dumps({`
			`"project": "atocore",`
			`"category": "status",`
			`"key": "integrity_check_result",`
			`"value": json.dumps(result),`
			`"source": "integrity check",`
			`}).encode("utf-8")`
			`req = urllib.request.Request(`
			`f"{args.base_url}/project/state",`
			`data=body,`
			`method="POST",`
			`headers={"Content-Type": "application/json"},`
			`)`
			`urllib.request.urlopen(req, timeout=10)`
			`except Exception as e:`
			`print(f"WARN: state write failed: {e}", file=sys.stderr)`

			`# Raise an alert if anything drifted`
			`if details:`
			`emit_alert(`
			`severity="warning",`
			`title="Integrity drift detected",`
			`message="; ".join(details),`
			`context={k: v for k, v in findings.items() if not k.endswith("_count")},`
			`)`


			`if __name__ == "__main__":`
			`main()`