From 2712c5d2d03cb2a6af38b559664afd1c4cd0e050 Mon Sep 17 00:00:00 2001
From: Anto01 <antoine.letarte@gmail.com>
Date: Wed, 22 Apr 2026 14:59:17 -0400
Subject: [PATCH] feat(engineering): enforce V1-0 write invariants

---
 DEV-LEDGER.md                                |   4 +
 docs/architecture/engineering-ontology-v1.md |  11 +
 scripts/v1_0_backfill_provenance.py          | 167 +++++++++
 src/atocore/api/routes.py                    |   7 +
 src/atocore/engineering/service.py           | 113 +++++-
 src/atocore/engineering/wiki.py              |   2 +
 src/atocore/models/database.py               |  30 ++
 tests/conftest.py                            |  30 ++
 tests/test_engineering_v1_phase5.py          |  15 +-
 tests/test_inbox_crossproject.py             |   1 +
 tests/test_v1_0_write_invariants.py          | 362 +++++++++++++++++++
 11 files changed, 734 insertions(+), 8 deletions(-)
 create mode 100644 scripts/v1_0_backfill_provenance.py
 create mode 100644 tests/test_v1_0_write_invariants.py

diff --git a/DEV-LEDGER.md b/DEV-LEDGER.md
index 329f2d4..c15fc83 100644
--- a/DEV-LEDGER.md
+++ b/DEV-LEDGER.md
@@ -164,6 +164,10 @@ One branch `codex/extractor-eval-loop` for Day 1-5, a second `codex/retrieval-ha
 
 ## Session Log
 
+- **2026-04-22 Claude (V1-0 patches per Codex review)** Codex audit of commit `cbf9e03` surfaced two P1 gaps + one P2 scope concern, all verified with code-level probes. **P1 #1**: `promote_entity` didn't re-check the F-8 invariant — a legacy candidate with empty `source_refs` and `hand_authored=0` could still promote to active, violating the plan's "invariant at both `create_entity` and `promote_entity`". Fixed: `promote_entity` at `service.py:365-379` now raises `ValueError("source_refs required: cannot promote a candidate with no provenance...")` before flipping status. Stays symmetric with the create-side error. **P1 #2**: `supersede_entity` was missing the F-5 hook the plan requires on every active-entity write path. The `supersedes` relationship rooted at the `superseded_by` entity can create a conflict the detector should catch. Fixed at `service.py:581-591`: calls `detect_conflicts_for_entity(superseded_by)` with fail-open per Q-3. **P2**: backfill script's `--invalidate-instead` flag queried both active AND superseded rows; invalidating already-superseded rows would collapse history. Fixed at `scripts/v1_0_backfill_provenance.py:52-63`: `--invalidate-instead` now scopes to `status='active'` only (default flag-hand_authored mode stays broad as it's additive/non-destructive). Help text tightened to make the destructive posture explicit. **Four new regression tests** in `test_v1_0_write_invariants.py`: (1) `test_promote_rejects_legacy_candidate_without_provenance` — directly inserts a legacy candidate and confirms promote raises + row stays candidate; (2) `test_promote_accepts_candidate_flagged_hand_authored` — symmetry check; (3) `test_supersede_runs_conflict_detection_on_new_active` — monkeypatches detector, confirms hook fires on `superseded_by`; (4) `test_supersede_hook_fails_open` — Q-3 check for supersede path. **Test count**: 543 → 547 (+4 regression). Full suite `547 passed in 81.07s`. Next: commit patches on branch, push, Codex re-review.
+
+- **2026-04-22 Claude (V1-0 landed on branch)** First V1 completion phase done on branch `claude/v1-0-write-invariants`. **F-1 schema remediation**: added `extractor_version`, `canonical_home`, `hand_authored` columns to `entities` via idempotent ALTERs in both `_apply_migrations` (`database.py:148-170`) and `init_engineering_schema` (`service.py:95-139`). CREATE TABLE also updated so fresh DBs get the columns natively. New `_table_exists` helper at `database.py:378`. `Entity` dataclass gains the three fields with sensible defaults. `EXTRACTOR_VERSION = "v1.0.0"` module constant at top of `service.py`. `_row_to_entity` tolerates rows without the new columns so tests predating V1-0 still pass. **F-8 provenance enforcement**: `create_entity` raises `ValueError("source_refs required: ...")` when called without non-empty source_refs AND without `hand_authored=True`. New kwargs `hand_authored: bool = False` and `extractor_version: str | None = None` threaded through `service.create_entity`, the `EntityCreateRequest` Pydantic model, the API route, and the wiki `/wiki/new` form body (form writes `hand_authored: true` since human entries are hand-authored by definition). **F-5 hook on active create**: `create_entity(status="active")` now calls `detect_conflicts_for_entity` with fail-open per `conflict-model.md:256` (errors log warning, write still succeeds). The promote path's existing hook at `service.py:400-404` was kept as-is. **Doc note** added to `engineering-ontology-v1.md` recording that `project` IS the `project_id` per "fields equivalent to" wording. **Backfill script** at `scripts/v1_0_backfill_provenance.py` — idempotent, defaults to flagging no-provenance active entities as `hand_authored=1`, supports `--dry-run` and `--invalidate-instead`. **Tests**: 10 new in `tests/test_v1_0_write_invariants.py` covering F-1 fields, F-8 raise path, F-8 hand_authored bypass, F-5 active-create hook, F-5 candidate-no-hook, Q-3 fail-open on detector error, Q-4 partial (scope_only=active excludes candidates). **Test fixes**: three pre-existing tests adapted — `test_requirement_name_conflict_detected` + `test_conflict_resolution_dismiss_leaves_entities_alone` now read from `list_open_conflicts` because the V1-0 hook records the conflict at create-time (detector dedup returns [] on re-run); `test_api_post_entity_with_null_project_stores_global` sends `hand_authored: true` since the fixture has no source_refs. **conftest.py monkeypatch**: wraps `create_entity` so tests missing both source_refs and hand_authored default to `hand_authored=True` (reasonable since tests author their own fixture data). Production paths (API route, wiki form, graduation scripts) all pass explicit values and are unaffected by the monkeypatch. **Test count**: 533 → 543 (+10), full suite `543 passed in 77.86s`. **Not yet**: commit + push + Codex review + deploy. **Branch**: `claude/v1-0-write-invariants`.
+
 - **2026-04-22 Codex (late night)** Third-round audit closed the remaining five open questions. Patched `docs/plans/engineering-v1-completion-plan.md` inline (no commit by Codex). **F-7 finding (P1):** graduation stack is partially built — `_graduation_prompt.py`, `scripts/graduate_memories.py`, `database.py:143-146` (`graduated_to_entity_id`), memory `graduated` status, promote-preserves-original at `service.py:354-356,389-451`, tests at `test_engineering_v1_phase5.py:67-90` all exist. Real gaps: no direct `POST /memory/{id}/graduate` route at `routes.py:756`; spec's `knowledge→Fact` doesn't match ontology (`service.py:16` has no `fact` type — reconcile to `parameter` or similar). V1-E estimate 2 → 3–4 days. **Q-5 finding (P2):** "stabilize timestamp" insufficient — renderer reads wall-clock in `_footer()` at `mirror.py:320`; fix is inject regenerated timestamp + checksum as renderer inputs + sort DB iteration + remove dict ordering deps. V1-D scope patched. **Remaining three (P3):** `project` stays as doc-note equivalence (no rename); total estimate 17.5–19.5 focused days; release notes must NOT canonize "Minions" — neutral "queued background processing / async workers" only. **Sign-off:** "with those edits, I'd sign off on the five questions. The only non-architectural uncertainty left in the plan is scheduling discipline against the current Now list; that does not block V1-0 once the soak window and memory-density gate clear." **Status:** Claude + Codex agreed. Plan frozen pending Antoine final accept and gate clearance. Claude to commit Codex's patches + push.
 
 - **2026-04-22 Claude (late night)** Codex second-round review did the full file-level audit and came back with three P1/P2 findings, all with exact file:line refs. Verified each against current code before revising. (1) **F-1 not clean**: `Entity` dataclass at `service.py:67` and `entities` table schema are missing the `extractor_version` and `canonical_home` shared-header fields required by `engineering-v1-acceptance.md:45`; `project` field is the project identifier but not named `project_id` as spec writes (spec wording "fields equivalent to" allows the naming, but needs explicit doc note). V1-0 scope now includes adding both missing fields via additive `_apply_migrations` pattern. (2) **F-2 needed exact statuses, not guesses**: per-function audit gave ground truth — 9 of 20 v1-required queries done, 1 partial (Q-001 returns project-wide tree not subsystem-scoped expand=contains per `engineering-query-catalog.md:71`), 10 missing. V1-A scope shrank to Q-001 shape fix + Q-6 integration (most pillar queries already implemented); V1-C closes the 8 net-new queries + Q-020 to V1-D. (3) **F-5 misframed**: the generic `conflicts` + `conflict_members` schema is ALREADY spec-compliant at `database.py:190`; divergence is detector body at `conflicts.py:36` (per-type dispatch needs generalization) + route path (`/admin/conflicts/*` needs `/conflicts/*` alias). V1-F no longer includes a schema migration; detector generalization + route alignment only. Totals revised to 16.5–17.5 days, ~60 tests (down from 12–17 / 65 because V1-A and V1-F scopes both shrank after audit). Three of the eight open questions resolved. Remaining open: F-7 graduation depth, mirror determinism, `project` naming, velocity calibration, minions-as-V2 naming. No code changes this session — plan + ledger only. Next: commit + push revised plan, then await Antoine+Codex joint sign-off before V1-0 starts.
diff --git a/docs/architecture/engineering-ontology-v1.md b/docs/architecture/engineering-ontology-v1.md
index 038ee3a..4dbad3a 100644
--- a/docs/architecture/engineering-ontology-v1.md
+++ b/docs/architecture/engineering-ontology-v1.md
@@ -159,6 +159,17 @@ Every major object should support fields equivalent to:
 - `created_at`
 - `updated_at`
 - `notes` (optional)
+- `extractor_version` (V1-0)
+- `canonical_home` (V1-0)
+
+**Naming note (V1-0, 2026-04-22).** The AtoCore `entities` table and
+`Entity` dataclass name the project-identifier field `project`, not
+`project_id`. This doc's "fields equivalent to" wording allows that
+naming flexibility — the `project` field on entity rows IS the
+`project_id` per spec. No storage rename is planned; downstream readers
+should treat `entity.project` as the project identifier. This was
+resolved in Codex's third-round audit of the V1 Completion Plan (see
+`docs/plans/engineering-v1-completion-plan.md`).
 
 ## Suggested Status Lifecycle
 
diff --git a/scripts/v1_0_backfill_provenance.py b/scripts/v1_0_backfill_provenance.py
new file mode 100644
index 0000000..b0f9b81
--- /dev/null
+++ b/scripts/v1_0_backfill_provenance.py
@@ -0,0 +1,167 @@
+"""V1-0 one-time backfill: flag existing active entities that have no
+provenance (empty source_refs) as hand_authored=1 so they stop failing
+the F-8 invariant.
+
+Runs against the live AtoCore DB. Idempotent: a second run after the
+first touches nothing because the flagged rows already have
+hand_authored=1.
+
+Per the Engineering V1 Completion Plan (V1-0 scope), the three options
+for an existing active entity without provenance are:
+
+1. Attach provenance — impossible without human review, not automatable
+2. Flag hand-authored — safe, additive, this script's default
+3. Invalidate — destructive, requires operator sign-off
+
+This script picks option (2) by default. Add --dry-run to see what
+would change without writing. Add --invalidate-instead to pick option
+(3) for all rows (not recommended for first run).
+
+Usage:
+    python scripts/v1_0_backfill_provenance.py --base-url http://dalidou:8100 --dry-run
+    python scripts/v1_0_backfill_provenance.py --base-url http://dalidou:8100
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sqlite3
+import sys
+from pathlib import Path
+
+
+def run(db_path: Path, dry_run: bool, invalidate_instead: bool) -> int:
+    if not db_path.exists():
+        print(f"ERROR: db not found: {db_path}", file=sys.stderr)
+        return 2
+
+    conn = sqlite3.connect(str(db_path))
+    conn.row_factory = sqlite3.Row
+
+    # Verify the V1-0 migration ran: if hand_authored column is missing
+    # the operator hasn't deployed V1-0 yet, and running this script
+    # would crash. Fail loud rather than attempt the ALTER here.
+    cols = {r["name"] for r in conn.execute("PRAGMA table_info(entities)").fetchall()}
+    if "hand_authored" not in cols:
+        print(
+            "ERROR: entities table lacks the hand_authored column. "
+            "Deploy V1-0 migrations first (init_db + init_engineering_schema).",
+            file=sys.stderr,
+        )
+        return 2
+
+    # Scope differs by mode:
+    # - Default (flag hand_authored=1): safe/additive, applies to active
+    #   AND superseded rows so the historical trail is consistent.
+    # - --invalidate-instead: destructive — scope to ACTIVE rows only.
+    #   Invalidating already-superseded history would collapse the audit
+    #   trail, which the plan's remediation scope never intended
+    #   (V1-0 talks about existing active no-provenance entities).
+    if invalidate_instead:
+        scope_sql = "status = 'active'"
+    else:
+        scope_sql = "status IN ('active', 'superseded')"
+    rows = conn.execute(
+        f"SELECT id, entity_type, name, project, status, source_refs, hand_authored "
+        f"FROM entities WHERE {scope_sql} AND hand_authored = 0"
+    ).fetchall()
+
+    needs_fix = []
+    for row in rows:
+        refs_raw = row["source_refs"] or "[]"
+        try:
+            refs = json.loads(refs_raw)
+        except Exception:
+            refs = []
+        if not refs:
+            needs_fix.append(row)
+
+    print(f"found {len(needs_fix)} active/superseded entities with no provenance")
+    for row in needs_fix:
+        print(
+            f"  - {row['id'][:8]} [{row['entity_type']}] "
+            f"{row['name']!r} project={row['project']!r} status={row['status']}"
+        )
+
+    if dry_run:
+        print("--dry-run: no changes written")
+        return 0
+
+    if not needs_fix:
+        print("nothing to do")
+        return 0
+
+    action = "invalidate" if invalidate_instead else "flag hand_authored=1"
+    print(f"applying: {action}")
+
+    cur = conn.cursor()
+    for row in needs_fix:
+        if invalidate_instead:
+            cur.execute(
+                "UPDATE entities SET status = 'invalid', "
+                "updated_at = CURRENT_TIMESTAMP WHERE id = ?",
+                (row["id"],),
+            )
+            cur.execute(
+                "INSERT INTO memory_audit "
+                "(id, memory_id, action, actor, before_json, after_json, note, entity_kind) "
+                "VALUES (?, ?, 'invalidated', 'v1_0_backfill', ?, ?, ?, 'entity')",
+                (
+                    f"v10bf-{row['id'][:8]}-inv",
+                    row["id"],
+                    json.dumps({"status": row["status"]}),
+                    json.dumps({"status": "invalid"}),
+                    "V1-0 backfill: invalidated, no provenance",
+                ),
+            )
+        else:
+            cur.execute(
+                "UPDATE entities SET hand_authored = 1, "
+                "updated_at = CURRENT_TIMESTAMP WHERE id = ?",
+                (row["id"],),
+            )
+            cur.execute(
+                "INSERT INTO memory_audit "
+                "(id, memory_id, action, actor, before_json, after_json, note, entity_kind) "
+                "VALUES (?, ?, 'hand_authored_flagged', 'v1_0_backfill', ?, ?, ?, 'entity')",
+                (
+                    f"v10bf-{row['id'][:8]}-ha",
+                    row["id"],
+                    json.dumps({"hand_authored": False}),
+                    json.dumps({"hand_authored": True}),
+                    "V1-0 backfill: flagged hand_authored since source_refs empty",
+                ),
+            )
+
+    conn.commit()
+    print(f"done: updated {len(needs_fix)} entities")
+    return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--db",
+        type=Path,
+        default=Path("data/db/atocore.db"),
+        help="Path to the SQLite database (default: data/db/atocore.db)",
+    )
+    parser.add_argument("--dry-run", action="store_true", help="Report only; no writes")
+    parser.add_argument(
+        "--invalidate-instead",
+        action="store_true",
+        help=(
+            "DESTRUCTIVE. Invalidate active rows with no provenance instead "
+            "of flagging them hand_authored. Scoped to status='active' only "
+            "(superseded rows are left alone to preserve audit history). "
+            "Not recommended for first run — start with --dry-run, then "
+            "the default hand_authored flag path."
+        ),
+    )
+    args = parser.parse_args()
+    return run(args.db, args.dry_run, args.invalidate_instead)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/atocore/api/routes.py b/src/atocore/api/routes.py
index ecffa93..cd93786 100644
--- a/src/atocore/api/routes.py
+++ b/src/atocore/api/routes.py
@@ -1457,6 +1457,11 @@ class EntityCreateRequest(BaseModel):
     status: str = "active"
     confidence: float = 1.0
     source_refs: list[str] | None = None
+    # V1-0 provenance enforcement (F-8). Clients must either pass
+    # non-empty source_refs or set hand_authored=true. The service layer
+    # raises ValueError otherwise, surfaced here as 400.
+    hand_authored: bool = False
+    extractor_version: str | None = None
 
 
 class EntityPromoteRequest(BaseModel):
@@ -1486,6 +1491,8 @@ def api_create_entity(req: EntityCreateRequest) -> dict:
             confidence=req.confidence,
             source_refs=req.source_refs,
             actor="api-http",
+            hand_authored=req.hand_authored,
+            extractor_version=req.extractor_version,
         )
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
diff --git a/src/atocore/engineering/service.py b/src/atocore/engineering/service.py
index 5fd314a..7608b6f 100644
--- a/src/atocore/engineering/service.py
+++ b/src/atocore/engineering/service.py
@@ -63,6 +63,12 @@ RELATIONSHIP_TYPES = [
 
 ENTITY_STATUSES = ["candidate", "active", "superseded", "invalid"]
 
+# V1-0: extractor version this module writes into new entity rows.
+# Per promotion-rules.md:268, every candidate must record the version of
+# the extractor that produced it so later re-evaluation is auditable.
+# Bump this when extraction logic materially changes.
+EXTRACTOR_VERSION = "v1.0.0"
+
 
 @dataclass
 class Entity:
@@ -77,6 +83,10 @@ class Entity:
     source_refs: list[str] = field(default_factory=list)
     created_at: str = ""
     updated_at: str = ""
+    # V1-0 shared-header fields per engineering-v1-acceptance.md:45.
+    extractor_version: str = ""
+    canonical_home: str = "entity"
+    hand_authored: bool = False
 
 
 @dataclass
@@ -103,10 +113,25 @@ def init_engineering_schema() -> None:
                 status TEXT NOT NULL DEFAULT 'active',
                 confidence REAL NOT NULL DEFAULT 1.0,
                 source_refs TEXT NOT NULL DEFAULT '[]',
+                extractor_version TEXT NOT NULL DEFAULT '',
+                canonical_home TEXT NOT NULL DEFAULT 'entity',
+                hand_authored INTEGER NOT NULL DEFAULT 0,
                 created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
                 updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
             )
         """)
+        # V1-0 (Engineering V1 completion): the three shared-header fields
+        # per engineering-v1-acceptance.md:45. Idempotent ALTERs for
+        # databases created before V1-0 land these columns without a full
+        # migration. Fresh DBs get them via the CREATE TABLE above; the
+        # ALTERs below are a no-op there.
+        from atocore.models.database import _column_exists  # late import; avoids cycle
+        if not _column_exists(conn, "entities", "extractor_version"):
+            conn.execute("ALTER TABLE entities ADD COLUMN extractor_version TEXT DEFAULT ''")
+        if not _column_exists(conn, "entities", "canonical_home"):
+            conn.execute("ALTER TABLE entities ADD COLUMN canonical_home TEXT DEFAULT 'entity'")
+        if not _column_exists(conn, "entities", "hand_authored"):
+            conn.execute("ALTER TABLE entities ADD COLUMN hand_authored INTEGER DEFAULT 0")
         conn.execute("""
             CREATE TABLE IF NOT EXISTS relationships (
                 id TEXT PRIMARY KEY,
@@ -149,6 +174,8 @@ def create_entity(
     confidence: float = 1.0,
     source_refs: list[str] | None = None,
     actor: str = "api",
+    hand_authored: bool = False,
+    extractor_version: str | None = None,
 ) -> Entity:
     if entity_type not in ENTITY_TYPES:
         raise ValueError(f"Invalid entity type: {entity_type}. Must be one of {ENTITY_TYPES}")
@@ -157,6 +184,21 @@ def create_entity(
     if not name or not name.strip():
         raise ValueError("Entity name must be non-empty")
 
+    refs = list(source_refs) if source_refs else []
+
+    # V1-0 (F-8 provenance enforcement, engineering-v1-acceptance.md:147):
+    # every new entity row must carry non-empty source_refs OR be explicitly
+    # flagged hand_authored. This is the non-negotiable invariant every
+    # later V1 phase depends on — without it, active entities can escape
+    # into the graph with no traceable origin. Raises at the write seam so
+    # the bug is impossible to introduce silently.
+    if not refs and not hand_authored:
+        raise ValueError(
+            "source_refs required: every entity must carry provenance "
+            "(source_chunk_id / source_interaction_id / kb_cad_export_id / ...) "
+            "or set hand_authored=True to explicitly flag a direct human write"
+        )
+
     # Phase 5: enforce project canonicalization contract at the write seam.
     # Aliases like "p04" become "p04-gigabit" so downstream reads stay
     # consistent with the registry.
@@ -165,18 +207,22 @@ def create_entity(
     entity_id = str(uuid.uuid4())
     now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
     props = properties or {}
-    refs = source_refs or []
+    ev = extractor_version if extractor_version is not None else EXTRACTOR_VERSION
 
     with get_connection() as conn:
         conn.execute(
             """INSERT INTO entities
                (id, entity_type, name, project, description, properties,
-                status, confidence, source_refs, created_at, updated_at)
-               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                status, confidence, source_refs,
+                extractor_version, canonical_home, hand_authored,
+                created_at, updated_at)
+               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
             (
                 entity_id, entity_type, name.strip(), project,
                 description, json.dumps(props), status, confidence,
-                json.dumps(refs), now, now,
+                json.dumps(refs),
+                ev, "entity", 1 if hand_authored else 0,
+                now, now,
             ),
         )
 
@@ -194,14 +240,31 @@ def create_entity(
             "project": project,
             "status": status,
             "confidence": confidence,
+            "hand_authored": hand_authored,
+            "extractor_version": ev,
         },
     )
 
+    # V1-0 (F-5 hook, engineering-v1-acceptance.md:99): synchronous
+    # conflict detection on any active-entity write. The promote path
+    # already had this hook (see promote_entity below); V1-0 adds it to
+    # direct-active creates so every active row — however it got that
+    # way — is checked. Fail-open per "flag, never block" rule in
+    # conflict-model.md:256: detector errors log but never fail the write.
+    if status == "active":
+        try:
+            from atocore.engineering.conflicts import detect_conflicts_for_entity
+            detect_conflicts_for_entity(entity_id)
+        except Exception as e:
+            log.warning("conflict_detection_failed", entity_id=entity_id, error=str(e))
+
     return Entity(
         id=entity_id, entity_type=entity_type, name=name.strip(),
         project=project, description=description, properties=props,
         status=status, confidence=confidence, source_refs=refs,
         created_at=now, updated_at=now,
+        extractor_version=ev, canonical_home="entity",
+        hand_authored=hand_authored,
     )
 
 
@@ -361,6 +424,20 @@ def promote_entity(
     if entity is None or entity.status != "candidate":
         return False
 
+    # V1-0 (F-8 provenance re-check at promote). The invariant must hold at
+    # BOTH create_entity AND promote_entity per the plan, because candidate
+    # rows can exist in the DB from before V1-0 (no enforcement at their
+    # create time) or can be inserted by code paths that bypass the service
+    # layer. Block any candidate with empty source_refs that is NOT flagged
+    # hand_authored from ever becoming active. Same error shape as the
+    # create-side check for symmetry.
+    if not (entity.source_refs or []) and not entity.hand_authored:
+        raise ValueError(
+            "source_refs required: cannot promote a candidate with no "
+            "provenance. Attach source_refs via PATCH /entities/{id}, "
+            "or flag hand_authored=true before promoting."
+        )
+
     if target_project is not None:
         new_project = (
             resolve_project_name(target_project) if target_project else ""
@@ -503,6 +580,22 @@ def supersede_entity(
                 superseded_by=superseded_by,
                 error=str(e),
             )
+
+        # V1-0 (F-5 hook on supersede, per plan's "every active-entity
+        # write path"). Supersede demotes `entity_id` AND adds a
+        # `supersedes` relationship rooted at the already-active
+        # `superseded_by`. That new edge can create a conflict the
+        # detector should catch synchronously. Fail-open per
+        # conflict-model.md:256.
+        try:
+            from atocore.engineering.conflicts import detect_conflicts_for_entity
+            detect_conflicts_for_entity(superseded_by)
+        except Exception as e:
+            log.warning(
+                "conflict_detection_failed",
+                entity_id=superseded_by,
+                error=str(e),
+            )
     return True
 
 
@@ -774,6 +867,15 @@ def get_entity_with_context(entity_id: str) -> dict | None:
 
 
 def _row_to_entity(row) -> Entity:
+    # V1-0 shared-header fields are optional on read — rows that predate
+    # V1-0 migration have NULL / missing values, so defaults kick in and
+    # older tests that build Entity() without the new fields keep passing.
+    # `row.keys()` lets us tolerate SQLite rows that lack the columns
+    # entirely (pre-migration sqlite3.Row).
+    keys = set(row.keys())
+    extractor_version = (row["extractor_version"] or "") if "extractor_version" in keys else ""
+    canonical_home = (row["canonical_home"] or "entity") if "canonical_home" in keys else "entity"
+    hand_authored = bool(row["hand_authored"]) if "hand_authored" in keys and row["hand_authored"] is not None else False
     return Entity(
         id=row["id"],
         entity_type=row["entity_type"],
@@ -786,6 +888,9 @@ def _row_to_entity(row) -> Entity:
         source_refs=json.loads(row["source_refs"] or "[]"),
         created_at=row["created_at"] or "",
         updated_at=row["updated_at"] or "",
+        extractor_version=extractor_version,
+        canonical_home=canonical_home,
+        hand_authored=hand_authored,
     )
 
 
diff --git a/src/atocore/engineering/wiki.py b/src/atocore/engineering/wiki.py
index 3ad8d36..39851ca 100644
--- a/src/atocore/engineering/wiki.py
+++ b/src/atocore/engineering/wiki.py
@@ -391,6 +391,8 @@ def render_new_entity_form(name: str = "", project: str = "") -> str:
       entity_type: fd.get('entity_type'),
       project: fd.get('project') || '',
       description: fd.get('description') || '',
+      // V1-0: human writes via the wiki form are hand_authored by definition.
+      hand_authored: true,
     };
     try {
       const r = await fetch('/v1/entities', {
diff --git a/src/atocore/models/database.py b/src/atocore/models/database.py
index bf0844b..0ece1fc 100644
--- a/src/atocore/models/database.py
+++ b/src/atocore/models/database.py
@@ -146,6 +146,28 @@ def _apply_migrations(conn: sqlite3.Connection) -> None:
         "CREATE INDEX IF NOT EXISTS idx_memories_graduated ON memories(graduated_to_entity_id)"
     )
 
+    # V1-0 (Engineering V1 completion): shared header fields per
+    # engineering-v1-acceptance.md:45. Three columns on `entities`:
+    # - extractor_version: which extractor produced this row. Lets old
+    #   candidates be re-evaluated with a newer extractor per
+    #   promotion-rules.md:268.
+    # - canonical_home: which layer holds the canonical record. Always
+    #   "entity" for rows written via create_entity; reserved for future
+    #   cross-layer bookkeeping.
+    # - hand_authored: 1 when the row was created directly by a human
+    #   without source provenance. Enforced by the write path so every
+    #   non-hand-authored row must carry non-empty source_refs (F-8).
+    # The entities table itself is created by init_engineering_schema
+    # (see engineering/service.py); these ALTERs cover existing DBs
+    # where the original CREATE TABLE predates V1-0.
+    if _table_exists(conn, "entities"):
+        if not _column_exists(conn, "entities", "extractor_version"):
+            conn.execute("ALTER TABLE entities ADD COLUMN extractor_version TEXT DEFAULT ''")
+        if not _column_exists(conn, "entities", "canonical_home"):
+            conn.execute("ALTER TABLE entities ADD COLUMN canonical_home TEXT DEFAULT 'entity'")
+        if not _column_exists(conn, "entities", "hand_authored"):
+            conn.execute("ALTER TABLE entities ADD COLUMN hand_authored INTEGER DEFAULT 0")
+
     # Phase 4 (Robustness V1): append-only audit log for memory mutations.
     # Every create/update/promote/reject/supersede/invalidate/reinforce/expire/
     # auto_promote writes one row here. before/after are JSON snapshots of the
@@ -352,6 +374,14 @@ def _column_exists(conn: sqlite3.Connection, table: str, column: str) -> bool:
     return any(row["name"] == column for row in rows)
 
 
+def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
+    row = conn.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
+        (table,),
+    ).fetchone()
+    return row is not None
+
+
 @contextmanager
 def get_connection() -> Generator[sqlite3.Connection, None, None]:
     """Get a database connection with row factory."""
diff --git a/tests/conftest.py b/tests/conftest.py
index 00981f5..78379bb 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,6 +16,36 @@ os.environ["ATOCORE_DATA_DIR"] = _default_test_dir
 os.environ["ATOCORE_DEBUG"] = "true"
 
 
+# V1-0: every entity created in a test is "hand authored" by the test
+# author — fixture data, not extracted content. Rather than rewrite 100+
+# existing test call sites, wrap create_entity so that tests which don't
+# provide source_refs get hand_authored=True automatically. Tests that
+# explicitly pass source_refs or hand_authored are unaffected. This keeps
+# the F-8 invariant enforced in production (the API, the wiki form, and
+# graduation scripts all go through the unwrapped function) while leaving
+# the existing test corpus intact.
+def _patch_create_entity_for_tests():
+    from atocore.engineering import service as _svc
+
+    _original = _svc.create_entity
+
+    def _create_entity_test(*args, **kwargs):
+        # Only auto-flag when hand_authored isn't explicitly specified.
+        # Tests that want to exercise the F-8 raise path pass
+        # hand_authored=False explicitly and should hit the error.
+        if (
+            not kwargs.get("source_refs")
+            and "hand_authored" not in kwargs
+        ):
+            kwargs["hand_authored"] = True
+        return _original(*args, **kwargs)
+
+    _svc.create_entity = _create_entity_test
+
+
+_patch_create_entity_for_tests()
+
+
 @pytest.fixture
 def tmp_data_dir(tmp_path):
     """Provide a temporary data directory for tests."""
diff --git a/tests/test_engineering_v1_phase5.py b/tests/test_engineering_v1_phase5.py
index f0eea40..32e56fd 100644
--- a/tests/test_engineering_v1_phase5.py
+++ b/tests/test_engineering_v1_phase5.py
@@ -143,8 +143,11 @@ def test_requirement_name_conflict_detected(tmp_data_dir):
     r2 = create_entity("requirement", "Surface figure < 25nm",
                       project="p-test", description="Different interpretation")
 
-    detected = detect_conflicts_for_entity(r2.id)
-    assert len(detected) == 1
+    # V1-0 synchronous hook: the conflict is already detected at r2's
+    # create-time, so a redundant detect call returns [] due to
+    # _record_conflict dedup. Assert on list_open_conflicts instead —
+    # that's what the intent of this test really tests: duplicate
+    # active requirements surface as an open conflict.
     conflicts = list_open_conflicts(project="p-test")
     assert any(c["slot_kind"] == "requirement.name" for c in conflicts)
 
@@ -191,8 +194,12 @@ def test_conflict_resolution_dismiss_leaves_entities_alone(tmp_data_dir):
                       description="first meaning")
     r2 = create_entity("requirement", "Dup req", project="p-test",
                       description="second meaning")
-    detected = detect_conflicts_for_entity(r2.id)
-    conflict_id = detected[0]
+    # V1-0 synchronous hook already recorded the conflict at r2's
+    # create-time. Look it up via list_open_conflicts rather than
+    # calling the detector again (which returns [] due to dedup).
+    open_list = list_open_conflicts(project="p-test")
+    assert open_list, "expected conflict recorded by create-time hook"
+    conflict_id = open_list[0]["id"]
 
     assert resolve_conflict(conflict_id, "dismiss")
     # Both still active — dismiss just clears the conflict marker
diff --git a/tests/test_inbox_crossproject.py b/tests/test_inbox_crossproject.py
index 22794ac..1414a85 100644
--- a/tests/test_inbox_crossproject.py
+++ b/tests/test_inbox_crossproject.py
@@ -132,6 +132,7 @@ def test_api_post_entity_with_null_project_stores_global(seeded_db):
         "entity_type": "material",
         "name": "Titanium",
         "project": None,
+        "hand_authored": True,  # V1-0 F-8: test fixture, no source_refs
     })
     assert r.status_code == 200
 
diff --git a/tests/test_v1_0_write_invariants.py b/tests/test_v1_0_write_invariants.py
new file mode 100644
index 0000000..d993c6d
--- /dev/null
+++ b/tests/test_v1_0_write_invariants.py
@@ -0,0 +1,362 @@
+"""V1-0 write-time invariant tests.
+
+Covers the Engineering V1 completion plan Phase V1-0 acceptance:
+- F-1 shared-header fields: extractor_version + canonical_home + hand_authored
+  land in the entities table with working defaults
+- F-8 provenance enforcement: create_entity raises without source_refs
+  unless hand_authored=True
+- F-5 synchronous conflict-detection hook on any active-entity write
+  (create_entity with status="active" + the pre-existing promote_entity
+  path); fail-open per conflict-model.md:256
+- Q-3 "flag, never block": a conflict never 4xx-blocks the write
+- Q-4 partial trust: get_entities scope_only filters candidates out
+
+Plan: docs/plans/engineering-v1-completion-plan.md
+Spec: docs/architecture/engineering-v1-acceptance.md
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from atocore.engineering.service import (
+    EXTRACTOR_VERSION,
+    create_entity,
+    create_relationship,
+    get_entities,
+    get_entity,
+    init_engineering_schema,
+    promote_entity,
+    supersede_entity,
+)
+from atocore.models.database import get_connection, init_db
+
+
+# ---------- F-1: shared-header fields ----------
+
+
+def test_entity_row_has_shared_header_fields(tmp_data_dir):
+    init_db()
+    init_engineering_schema()
+    with get_connection() as conn:
+        cols = {row["name"] for row in conn.execute("PRAGMA table_info(entities)").fetchall()}
+    assert "extractor_version" in cols
+    assert "canonical_home" in cols
+    assert "hand_authored" in cols
+
+
+def test_created_entity_has_default_extractor_version_and_canonical_home(tmp_data_dir):
+    init_db()
+    init_engineering_schema()
+    e = create_entity(
+        entity_type="component",
+        name="Pivot Pin",
+        project="p04-gigabit",
+        source_refs=["test:fixture"],
+    )
+    assert e.extractor_version == EXTRACTOR_VERSION
+    assert e.canonical_home == "entity"
+    assert e.hand_authored is False
+
+    # round-trip through get_entity to confirm the row mapper returns
+    # the same values (not just the return-by-construct path)
+    got = get_entity(e.id)
+    assert got is not None
+    assert got.extractor_version == EXTRACTOR_VERSION
+    assert got.canonical_home == "entity"
+    assert got.hand_authored is False
+
+
+def test_explicit_extractor_version_is_persisted(tmp_data_dir):
+    init_db()
+    init_engineering_schema()
+    e = create_entity(
+        entity_type="decision",
+        name="Pick GF-PTFE pads",
+        project="p04-gigabit",
+        source_refs=["interaction:abc"],
+        extractor_version="custom-v2.3",
+    )
+    got = get_entity(e.id)
+    assert got.extractor_version == "custom-v2.3"
+
+
+# ---------- F-8: provenance enforcement ----------
+
+
+def test_create_entity_without_provenance_raises(tmp_data_dir):
+    init_db()
+    init_engineering_schema()
+    with pytest.raises(ValueError, match="source_refs required"):
+        create_entity(
+            entity_type="component",
+            name="No Provenance",
+            project="p04-gigabit",
+            hand_authored=False,  # explicit — bypasses the test-conftest auto-flag
+        )
+
+
+def test_create_entity_with_hand_authored_needs_no_source_refs(tmp_data_dir):
+    init_db()
+    init_engineering_schema()
+    e = create_entity(
+        entity_type="component",
+        name="Human Entry",
+        project="p04-gigabit",
+        hand_authored=True,
+    )
+    assert e.hand_authored is True
+    got = get_entity(e.id)
+    assert got.hand_authored is True
+    # source_refs stays empty — the hand_authored flag IS the provenance
+    assert got.source_refs == []
+
+
+def test_create_entity_with_empty_source_refs_list_is_treated_as_missing(tmp_data_dir):
+    init_db()
+    init_engineering_schema()
+    with pytest.raises(ValueError, match="source_refs required"):
+        create_entity(
+            entity_type="component",
+            name="Empty Refs",
+            project="p04-gigabit",
+            source_refs=[],
+            hand_authored=False,
+        )
+
+
+def test_promote_rejects_legacy_candidate_without_provenance(tmp_data_dir):
+    """Regression (Codex V1-0 probe): candidate rows can exist in the DB
+    from before V1-0 enforcement (or from paths that bypass create_entity).
+    promote_entity must re-check the invariant and refuse to flip a
+    no-provenance candidate to active. Without this check, the active
+    store can leak F-8 violations in from legacy data."""
+    init_db()
+    init_engineering_schema()
+
+    # Simulate a pre-V1-0 candidate by inserting directly into the table,
+    # bypassing the service-layer invariant. Real legacy rows look exactly
+    # like this: empty source_refs, hand_authored=0.
+    import uuid as _uuid
+    entity_id = str(_uuid.uuid4())
+    with get_connection() as conn:
+        conn.execute(
+            "INSERT INTO entities (id, entity_type, name, project, "
+            "description, properties, status, confidence, source_refs, "
+            "extractor_version, canonical_home, hand_authored, "
+            "created_at, updated_at) "
+            "VALUES (?, 'component', 'Legacy Orphan', 'p04-gigabit', "
+            "'', '{}', 'candidate', 1.0, '[]', '', 'entity', 0, "
+            "CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)",
+            (entity_id,),
+        )
+
+    with pytest.raises(ValueError, match="source_refs required"):
+        promote_entity(entity_id)
+
+    # And the row stays a candidate — no half-transition.
+    got = get_entity(entity_id)
+    assert got is not None
+    assert got.status == "candidate"
+
+
+def test_promote_accepts_candidate_flagged_hand_authored(tmp_data_dir):
+    """The other side of the promote re-check: hand_authored=1 with
+    empty source_refs still lets promote succeed, matching
+    create_entity's symmetry."""
+    init_db()
+    init_engineering_schema()
+
+    import uuid as _uuid
+    entity_id = str(_uuid.uuid4())
+    with get_connection() as conn:
+        conn.execute(
+            "INSERT INTO entities (id, entity_type, name, project, "
+            "description, properties, status, confidence, source_refs, "
+            "extractor_version, canonical_home, hand_authored, "
+            "created_at, updated_at) "
+            "VALUES (?, 'component', 'Hand Authored Candidate', "
+            "'p04-gigabit', '', '{}', 'candidate', 1.0, '[]', '', "
+            "'entity', 1, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)",
+            (entity_id,),
+        )
+
+    assert promote_entity(entity_id) is True
+    assert get_entity(entity_id).status == "active"
+
+
+# ---------- F-5: synchronous conflict-detection hook ----------
+
+
+def test_active_create_runs_conflict_detection_hook(tmp_data_dir, monkeypatch):
+    """status=active writes trigger detect_conflicts_for_entity."""
+    init_db()
+    init_engineering_schema()
+
+    called_with: list[str] = []
+
+    def _fake_detect(entity_id: str):
+        called_with.append(entity_id)
+        return []
+
+    import atocore.engineering.conflicts as conflicts_mod
+    monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _fake_detect)
+
+    e = create_entity(
+        entity_type="component",
+        name="Active With Hook",
+        project="p04-gigabit",
+        source_refs=["test:hook"],
+        status="active",
+    )
+
+    assert called_with == [e.id]
+
+
+def test_supersede_runs_conflict_detection_on_new_active(tmp_data_dir, monkeypatch):
+    """Regression (Codex V1-0 probe): per plan's 'every active-entity
+    write path', supersede_entity must trigger synchronous conflict
+    detection. The subject is the `superseded_by` entity — the one
+    whose graph state just changed because a new `supersedes` edge was
+    rooted at it."""
+    init_db()
+    init_engineering_schema()
+
+    old = create_entity(
+        entity_type="component",
+        name="Old Pad",
+        project="p04-gigabit",
+        source_refs=["test:old"],
+        status="active",
+    )
+    new = create_entity(
+        entity_type="component",
+        name="New Pad",
+        project="p04-gigabit",
+        source_refs=["test:new"],
+        status="active",
+    )
+
+    called_with: list[str] = []
+
+    def _fake_detect(entity_id: str):
+        called_with.append(entity_id)
+        return []
+
+    import atocore.engineering.conflicts as conflicts_mod
+    monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _fake_detect)
+
+    assert supersede_entity(old.id, superseded_by=new.id) is True
+
+    # The detector fires on the `superseded_by` entity — the one whose
+    # edges just grew a new `supersedes` relationship.
+    assert new.id in called_with
+
+
+def test_supersede_hook_fails_open(tmp_data_dir, monkeypatch):
+    """Supersede must survive a broken detector per Q-3 flag-never-block."""
+    init_db()
+    init_engineering_schema()
+
+    old = create_entity(
+        entity_type="component", name="Old2", project="p04-gigabit",
+        source_refs=["test:old"], status="active",
+    )
+    new = create_entity(
+        entity_type="component", name="New2", project="p04-gigabit",
+        source_refs=["test:new"], status="active",
+    )
+
+    def _boom(entity_id: str):
+        raise RuntimeError("synthetic detector failure")
+
+    import atocore.engineering.conflicts as conflicts_mod
+    monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _boom)
+
+    # The supersede still succeeds despite the detector blowing up.
+    assert supersede_entity(old.id, superseded_by=new.id) is True
+    assert get_entity(old.id).status == "superseded"
+
+
+def test_candidate_create_does_not_run_conflict_hook(tmp_data_dir, monkeypatch):
+    """status=candidate writes do NOT trigger detection — the hook is
+    for active rows only, per V1-0 scope. Candidates are checked at
+    promote time."""
+    init_db()
+    init_engineering_schema()
+
+    called: list[str] = []
+
+    def _fake_detect(entity_id: str):
+        called.append(entity_id)
+        return []
+
+    import atocore.engineering.conflicts as conflicts_mod
+    monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _fake_detect)
+
+    create_entity(
+        entity_type="component",
+        name="Candidate No Hook",
+        project="p04-gigabit",
+        source_refs=["test:cand"],
+        status="candidate",
+    )
+
+    assert called == []
+
+
+# ---------- Q-3: flag, never block ----------
+
+
+def test_conflict_detector_failure_does_not_block_write(tmp_data_dir, monkeypatch):
+    """Per conflict-model.md:256: detection errors must not fail the
+    write. The entity is still created; only a warning is logged."""
+    init_db()
+    init_engineering_schema()
+
+    def _boom(entity_id: str):
+        raise RuntimeError("synthetic detector failure")
+
+    import atocore.engineering.conflicts as conflicts_mod
+    monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _boom)
+
+    # The write still succeeds — no exception propagates.
+    e = create_entity(
+        entity_type="component",
+        name="Hook Fails Open",
+        project="p04-gigabit",
+        source_refs=["test:failopen"],
+        status="active",
+    )
+    assert get_entity(e.id) is not None
+
+
+# ---------- Q-4 (partial): trust-hierarchy — scope_only filters candidates ----------
+
+
+def test_scope_only_active_does_not_return_candidates(tmp_data_dir):
+    """V1-0 partial Q-4: active-scoped listing never returns candidates.
+    Full trust-hierarchy coverage (no-auto-project-state, etc.) ships in
+    V1-E per plan."""
+    init_db()
+    init_engineering_schema()
+
+    active = create_entity(
+        entity_type="component",
+        name="Active Alpha",
+        project="p04-gigabit",
+        source_refs=["test:alpha"],
+        status="active",
+    )
+    candidate = create_entity(
+        entity_type="component",
+        name="Candidate Beta",
+        project="p04-gigabit",
+        source_refs=["test:beta"],
+        status="candidate",
+    )
+
+    listed = get_entities(project="p04-gigabit", status="active", scope_only=True)
+    ids = {e.id for e in listed}
+    assert active.id in ids
+    assert candidate.id not in ids