Phase V1-0 of the Engineering V1 Completion Plan. Establishes the
write-time invariants every later phase depends on so no later phase
can leak invalid state into the entity store.
F-1 shared-header fields per engineering-v1-acceptance.md:45:
- entities.extractor_version (default "", EXTRACTOR_VERSION="v1.0.0"
written by service.create_entity)
- entities.canonical_home (default "entity")
- entities.hand_authored (default 0, INTEGER boolean)
Idempotent ALTERs in both _apply_migrations (database.py) and
init_engineering_schema (service.py). CREATE TABLE also carries the
columns for fresh DBs. _row_to_entity tolerates old rows without
them so tests that predate V1-0 keep passing.
F-8 provenance enforcement per promotion-rules.md:243:
create_entity raises ValueError when source_refs is empty and
hand_authored is False. New kwargs hand_authored and
extractor_version threaded through the API (EntityCreateRequest)
and the /wiki/new form body (human wiki writes set hand_authored
true by definition). The non-negotiable invariant: every row either
carries provenance or is explicitly flagged as hand-authored.
F-5 synchronous conflict-detection hook on active create per
engineering-v1-acceptance.md:99:
create_entity(status="active") now runs detect_conflicts_for_entity
with fail-open per conflict-model.md:256. Detector errors log a
warning but never 4xx-block the write (Q-3 "flag, never block").
Doc note added to engineering-ontology-v1.md recording that `project`
IS the `project_id` per "fields equivalent to" wording. No storage
rename.
Backfill script scripts/v1_0_backfill_provenance.py reports and
optionally flags existing active entities that lack provenance.
Idempotent. Supports --dry-run and --invalidate-instead.
Tests: 10 new in test_v1_0_write_invariants.py covering F-1 fields,
F-8 raise + bypass, F-5 hook on active + no-hook on candidate, Q-3
fail-open, Q-4 partial scope_only=active excludes candidates.
Three pre-existing conflict tests adapted to read list_open_conflicts
rather than re-run the detector (which now dedups because the hook
already fired at create-time). One API test adds hand_authored=true
since its fixture has no source_refs.
conftest.py wraps create_entity so tests that don't pass source_refs
or hand_authored default to hand_authored=True (tests author their
own fixture data — reasonable default). Production paths (API route,
wiki form, graduation scripts) all pass explicit values and are
unaffected.
Test count: 533 -> 543 (+10). Full suite green in 77.86s.
Pending: Codex review on the branch before squash-merge to main.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
152 lines
5.2 KiB
Python
152 lines
5.2 KiB
Python
"""V1-0 one-time backfill: flag existing active entities that have no
|
|
provenance (empty source_refs) as hand_authored=1 so they stop failing
|
|
the F-8 invariant.
|
|
|
|
Runs against the live AtoCore DB. Idempotent: a second run after the
|
|
first touches nothing because the flagged rows already have
|
|
hand_authored=1.
|
|
|
|
Per the Engineering V1 Completion Plan (V1-0 scope), the three options
|
|
for an existing active entity without provenance are:
|
|
|
|
1. Attach provenance — impossible without human review, not automatable
|
|
2. Flag hand-authored — safe, additive, this script's default
|
|
3. Invalidate — destructive, requires operator sign-off
|
|
|
|
This script picks option (2) by default. Add --dry-run to see what
|
|
would change without writing. Add --invalidate-instead to pick option
|
|
(3) for all rows (not recommended for first run).
|
|
|
|
Usage:
|
|
python scripts/v1_0_backfill_provenance.py --base-url http://dalidou:8100 --dry-run
|
|
python scripts/v1_0_backfill_provenance.py --base-url http://dalidou:8100
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import sqlite3
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def run(db_path: Path, dry_run: bool, invalidate_instead: bool) -> int:
|
|
if not db_path.exists():
|
|
print(f"ERROR: db not found: {db_path}", file=sys.stderr)
|
|
return 2
|
|
|
|
conn = sqlite3.connect(str(db_path))
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
# Verify the V1-0 migration ran: if hand_authored column is missing
|
|
# the operator hasn't deployed V1-0 yet, and running this script
|
|
# would crash. Fail loud rather than attempt the ALTER here.
|
|
cols = {r["name"] for r in conn.execute("PRAGMA table_info(entities)").fetchall()}
|
|
if "hand_authored" not in cols:
|
|
print(
|
|
"ERROR: entities table lacks the hand_authored column. "
|
|
"Deploy V1-0 migrations first (init_db + init_engineering_schema).",
|
|
file=sys.stderr,
|
|
)
|
|
return 2
|
|
|
|
rows = conn.execute(
|
|
"SELECT id, entity_type, name, project, status, source_refs, hand_authored "
|
|
"FROM entities WHERE status IN ('active', 'superseded') "
|
|
"AND hand_authored = 0"
|
|
).fetchall()
|
|
|
|
needs_fix = []
|
|
for row in rows:
|
|
refs_raw = row["source_refs"] or "[]"
|
|
try:
|
|
refs = json.loads(refs_raw)
|
|
except Exception:
|
|
refs = []
|
|
if not refs:
|
|
needs_fix.append(row)
|
|
|
|
print(f"found {len(needs_fix)} active/superseded entities with no provenance")
|
|
for row in needs_fix:
|
|
print(
|
|
f" - {row['id'][:8]} [{row['entity_type']}] "
|
|
f"{row['name']!r} project={row['project']!r} status={row['status']}"
|
|
)
|
|
|
|
if dry_run:
|
|
print("--dry-run: no changes written")
|
|
return 0
|
|
|
|
if not needs_fix:
|
|
print("nothing to do")
|
|
return 0
|
|
|
|
action = "invalidate" if invalidate_instead else "flag hand_authored=1"
|
|
print(f"applying: {action}")
|
|
|
|
cur = conn.cursor()
|
|
for row in needs_fix:
|
|
if invalidate_instead:
|
|
cur.execute(
|
|
"UPDATE entities SET status = 'invalid', "
|
|
"updated_at = CURRENT_TIMESTAMP WHERE id = ?",
|
|
(row["id"],),
|
|
)
|
|
cur.execute(
|
|
"INSERT INTO memory_audit "
|
|
"(id, memory_id, action, actor, before_json, after_json, note, entity_kind) "
|
|
"VALUES (?, ?, 'invalidated', 'v1_0_backfill', ?, ?, ?, 'entity')",
|
|
(
|
|
f"v10bf-{row['id'][:8]}-inv",
|
|
row["id"],
|
|
json.dumps({"status": row["status"]}),
|
|
json.dumps({"status": "invalid"}),
|
|
"V1-0 backfill: invalidated, no provenance",
|
|
),
|
|
)
|
|
else:
|
|
cur.execute(
|
|
"UPDATE entities SET hand_authored = 1, "
|
|
"updated_at = CURRENT_TIMESTAMP WHERE id = ?",
|
|
(row["id"],),
|
|
)
|
|
cur.execute(
|
|
"INSERT INTO memory_audit "
|
|
"(id, memory_id, action, actor, before_json, after_json, note, entity_kind) "
|
|
"VALUES (?, ?, 'hand_authored_flagged', 'v1_0_backfill', ?, ?, ?, 'entity')",
|
|
(
|
|
f"v10bf-{row['id'][:8]}-ha",
|
|
row["id"],
|
|
json.dumps({"hand_authored": False}),
|
|
json.dumps({"hand_authored": True}),
|
|
"V1-0 backfill: flagged hand_authored since source_refs empty",
|
|
),
|
|
)
|
|
|
|
conn.commit()
|
|
print(f"done: updated {len(needs_fix)} entities")
|
|
return 0
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
parser.add_argument(
|
|
"--db",
|
|
type=Path,
|
|
default=Path("data/db/atocore.db"),
|
|
help="Path to the SQLite database (default: data/db/atocore.db)",
|
|
)
|
|
parser.add_argument("--dry-run", action="store_true", help="Report only; no writes")
|
|
parser.add_argument(
|
|
"--invalidate-instead",
|
|
action="store_true",
|
|
help="Invalidate no-provenance rows instead of flagging hand_authored",
|
|
)
|
|
args = parser.parse_args()
|
|
return run(args.db, args.dry_run, args.invalidate_instead)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|