From 2712c5d2d03cb2a6af38b559664afd1c4cd0e050 Mon Sep 17 00:00:00 2001 From: Anto01 Date: Wed, 22 Apr 2026 14:59:17 -0400 Subject: [PATCH] feat(engineering): enforce V1-0 write invariants --- DEV-LEDGER.md | 4 + docs/architecture/engineering-ontology-v1.md | 11 + scripts/v1_0_backfill_provenance.py | 167 +++++++++ src/atocore/api/routes.py | 7 + src/atocore/engineering/service.py | 113 +++++- src/atocore/engineering/wiki.py | 2 + src/atocore/models/database.py | 30 ++ tests/conftest.py | 30 ++ tests/test_engineering_v1_phase5.py | 15 +- tests/test_inbox_crossproject.py | 1 + tests/test_v1_0_write_invariants.py | 362 +++++++++++++++++++ 11 files changed, 734 insertions(+), 8 deletions(-) create mode 100644 scripts/v1_0_backfill_provenance.py create mode 100644 tests/test_v1_0_write_invariants.py diff --git a/DEV-LEDGER.md b/DEV-LEDGER.md index 329f2d4..c15fc83 100644 --- a/DEV-LEDGER.md +++ b/DEV-LEDGER.md @@ -164,6 +164,10 @@ One branch `codex/extractor-eval-loop` for Day 1-5, a second `codex/retrieval-ha ## Session Log +- **2026-04-22 Claude (V1-0 patches per Codex review)** Codex audit of commit `cbf9e03` surfaced two P1 gaps + one P2 scope concern, all verified with code-level probes. **P1 #1**: `promote_entity` didn't re-check the F-8 invariant — a legacy candidate with empty `source_refs` and `hand_authored=0` could still promote to active, violating the plan's "invariant at both `create_entity` and `promote_entity`". Fixed: `promote_entity` at `service.py:365-379` now raises `ValueError("source_refs required: cannot promote a candidate with no provenance...")` before flipping status. Stays symmetric with the create-side error. **P1 #2**: `supersede_entity` was missing the F-5 hook the plan requires on every active-entity write path. The `supersedes` relationship rooted at the `superseded_by` entity can create a conflict the detector should catch. Fixed at `service.py:581-591`: calls `detect_conflicts_for_entity(superseded_by)` with fail-open per Q-3. **P2**: backfill script's `--invalidate-instead` flag queried both active AND superseded rows; invalidating already-superseded rows would collapse history. Fixed at `scripts/v1_0_backfill_provenance.py:52-63`: `--invalidate-instead` now scopes to `status='active'` only (default flag-hand_authored mode stays broad as it's additive/non-destructive). Help text tightened to make the destructive posture explicit. **Four new regression tests** in `test_v1_0_write_invariants.py`: (1) `test_promote_rejects_legacy_candidate_without_provenance` — directly inserts a legacy candidate and confirms promote raises + row stays candidate; (2) `test_promote_accepts_candidate_flagged_hand_authored` — symmetry check; (3) `test_supersede_runs_conflict_detection_on_new_active` — monkeypatches detector, confirms hook fires on `superseded_by`; (4) `test_supersede_hook_fails_open` — Q-3 check for supersede path. **Test count**: 543 → 547 (+4 regression). Full suite `547 passed in 81.07s`. Next: commit patches on branch, push, Codex re-review. + +- **2026-04-22 Claude (V1-0 landed on branch)** First V1 completion phase done on branch `claude/v1-0-write-invariants`. **F-1 schema remediation**: added `extractor_version`, `canonical_home`, `hand_authored` columns to `entities` via idempotent ALTERs in both `_apply_migrations` (`database.py:148-170`) and `init_engineering_schema` (`service.py:95-139`). CREATE TABLE also updated so fresh DBs get the columns natively. New `_table_exists` helper at `database.py:378`. `Entity` dataclass gains the three fields with sensible defaults. `EXTRACTOR_VERSION = "v1.0.0"` module constant at top of `service.py`. `_row_to_entity` tolerates rows without the new columns so tests predating V1-0 still pass. **F-8 provenance enforcement**: `create_entity` raises `ValueError("source_refs required: ...")` when called without non-empty source_refs AND without `hand_authored=True`. New kwargs `hand_authored: bool = False` and `extractor_version: str | None = None` threaded through `service.create_entity`, the `EntityCreateRequest` Pydantic model, the API route, and the wiki `/wiki/new` form body (form writes `hand_authored: true` since human entries are hand-authored by definition). **F-5 hook on active create**: `create_entity(status="active")` now calls `detect_conflicts_for_entity` with fail-open per `conflict-model.md:256` (errors log warning, write still succeeds). The promote path's existing hook at `service.py:400-404` was kept as-is. **Doc note** added to `engineering-ontology-v1.md` recording that `project` IS the `project_id` per "fields equivalent to" wording. **Backfill script** at `scripts/v1_0_backfill_provenance.py` — idempotent, defaults to flagging no-provenance active entities as `hand_authored=1`, supports `--dry-run` and `--invalidate-instead`. **Tests**: 10 new in `tests/test_v1_0_write_invariants.py` covering F-1 fields, F-8 raise path, F-8 hand_authored bypass, F-5 active-create hook, F-5 candidate-no-hook, Q-3 fail-open on detector error, Q-4 partial (scope_only=active excludes candidates). **Test fixes**: three pre-existing tests adapted — `test_requirement_name_conflict_detected` + `test_conflict_resolution_dismiss_leaves_entities_alone` now read from `list_open_conflicts` because the V1-0 hook records the conflict at create-time (detector dedup returns [] on re-run); `test_api_post_entity_with_null_project_stores_global` sends `hand_authored: true` since the fixture has no source_refs. **conftest.py monkeypatch**: wraps `create_entity` so tests missing both source_refs and hand_authored default to `hand_authored=True` (reasonable since tests author their own fixture data). Production paths (API route, wiki form, graduation scripts) all pass explicit values and are unaffected by the monkeypatch. **Test count**: 533 → 543 (+10), full suite `543 passed in 77.86s`. **Not yet**: commit + push + Codex review + deploy. **Branch**: `claude/v1-0-write-invariants`. + - **2026-04-22 Codex (late night)** Third-round audit closed the remaining five open questions. Patched `docs/plans/engineering-v1-completion-plan.md` inline (no commit by Codex). **F-7 finding (P1):** graduation stack is partially built — `_graduation_prompt.py`, `scripts/graduate_memories.py`, `database.py:143-146` (`graduated_to_entity_id`), memory `graduated` status, promote-preserves-original at `service.py:354-356,389-451`, tests at `test_engineering_v1_phase5.py:67-90` all exist. Real gaps: no direct `POST /memory/{id}/graduate` route at `routes.py:756`; spec's `knowledge→Fact` doesn't match ontology (`service.py:16` has no `fact` type — reconcile to `parameter` or similar). V1-E estimate 2 → 3–4 days. **Q-5 finding (P2):** "stabilize timestamp" insufficient — renderer reads wall-clock in `_footer()` at `mirror.py:320`; fix is inject regenerated timestamp + checksum as renderer inputs + sort DB iteration + remove dict ordering deps. V1-D scope patched. **Remaining three (P3):** `project` stays as doc-note equivalence (no rename); total estimate 17.5–19.5 focused days; release notes must NOT canonize "Minions" — neutral "queued background processing / async workers" only. **Sign-off:** "with those edits, I'd sign off on the five questions. The only non-architectural uncertainty left in the plan is scheduling discipline against the current Now list; that does not block V1-0 once the soak window and memory-density gate clear." **Status:** Claude + Codex agreed. Plan frozen pending Antoine final accept and gate clearance. Claude to commit Codex's patches + push. - **2026-04-22 Claude (late night)** Codex second-round review did the full file-level audit and came back with three P1/P2 findings, all with exact file:line refs. Verified each against current code before revising. (1) **F-1 not clean**: `Entity` dataclass at `service.py:67` and `entities` table schema are missing the `extractor_version` and `canonical_home` shared-header fields required by `engineering-v1-acceptance.md:45`; `project` field is the project identifier but not named `project_id` as spec writes (spec wording "fields equivalent to" allows the naming, but needs explicit doc note). V1-0 scope now includes adding both missing fields via additive `_apply_migrations` pattern. (2) **F-2 needed exact statuses, not guesses**: per-function audit gave ground truth — 9 of 20 v1-required queries done, 1 partial (Q-001 returns project-wide tree not subsystem-scoped expand=contains per `engineering-query-catalog.md:71`), 10 missing. V1-A scope shrank to Q-001 shape fix + Q-6 integration (most pillar queries already implemented); V1-C closes the 8 net-new queries + Q-020 to V1-D. (3) **F-5 misframed**: the generic `conflicts` + `conflict_members` schema is ALREADY spec-compliant at `database.py:190`; divergence is detector body at `conflicts.py:36` (per-type dispatch needs generalization) + route path (`/admin/conflicts/*` needs `/conflicts/*` alias). V1-F no longer includes a schema migration; detector generalization + route alignment only. Totals revised to 16.5–17.5 days, ~60 tests (down from 12–17 / 65 because V1-A and V1-F scopes both shrank after audit). Three of the eight open questions resolved. Remaining open: F-7 graduation depth, mirror determinism, `project` naming, velocity calibration, minions-as-V2 naming. No code changes this session — plan + ledger only. Next: commit + push revised plan, then await Antoine+Codex joint sign-off before V1-0 starts. diff --git a/docs/architecture/engineering-ontology-v1.md b/docs/architecture/engineering-ontology-v1.md index 038ee3a..4dbad3a 100644 --- a/docs/architecture/engineering-ontology-v1.md +++ b/docs/architecture/engineering-ontology-v1.md @@ -159,6 +159,17 @@ Every major object should support fields equivalent to: - `created_at` - `updated_at` - `notes` (optional) +- `extractor_version` (V1-0) +- `canonical_home` (V1-0) + +**Naming note (V1-0, 2026-04-22).** The AtoCore `entities` table and +`Entity` dataclass name the project-identifier field `project`, not +`project_id`. This doc's "fields equivalent to" wording allows that +naming flexibility — the `project` field on entity rows IS the +`project_id` per spec. No storage rename is planned; downstream readers +should treat `entity.project` as the project identifier. This was +resolved in Codex's third-round audit of the V1 Completion Plan (see +`docs/plans/engineering-v1-completion-plan.md`). ## Suggested Status Lifecycle diff --git a/scripts/v1_0_backfill_provenance.py b/scripts/v1_0_backfill_provenance.py new file mode 100644 index 0000000..b0f9b81 --- /dev/null +++ b/scripts/v1_0_backfill_provenance.py @@ -0,0 +1,167 @@ +"""V1-0 one-time backfill: flag existing active entities that have no +provenance (empty source_refs) as hand_authored=1 so they stop failing +the F-8 invariant. + +Runs against the live AtoCore DB. Idempotent: a second run after the +first touches nothing because the flagged rows already have +hand_authored=1. + +Per the Engineering V1 Completion Plan (V1-0 scope), the three options +for an existing active entity without provenance are: + +1. Attach provenance — impossible without human review, not automatable +2. Flag hand-authored — safe, additive, this script's default +3. Invalidate — destructive, requires operator sign-off + +This script picks option (2) by default. Add --dry-run to see what +would change without writing. Add --invalidate-instead to pick option +(3) for all rows (not recommended for first run). + +Usage: + python scripts/v1_0_backfill_provenance.py --base-url http://dalidou:8100 --dry-run + python scripts/v1_0_backfill_provenance.py --base-url http://dalidou:8100 +""" + +from __future__ import annotations + +import argparse +import json +import sqlite3 +import sys +from pathlib import Path + + +def run(db_path: Path, dry_run: bool, invalidate_instead: bool) -> int: + if not db_path.exists(): + print(f"ERROR: db not found: {db_path}", file=sys.stderr) + return 2 + + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + + # Verify the V1-0 migration ran: if hand_authored column is missing + # the operator hasn't deployed V1-0 yet, and running this script + # would crash. Fail loud rather than attempt the ALTER here. + cols = {r["name"] for r in conn.execute("PRAGMA table_info(entities)").fetchall()} + if "hand_authored" not in cols: + print( + "ERROR: entities table lacks the hand_authored column. " + "Deploy V1-0 migrations first (init_db + init_engineering_schema).", + file=sys.stderr, + ) + return 2 + + # Scope differs by mode: + # - Default (flag hand_authored=1): safe/additive, applies to active + # AND superseded rows so the historical trail is consistent. + # - --invalidate-instead: destructive — scope to ACTIVE rows only. + # Invalidating already-superseded history would collapse the audit + # trail, which the plan's remediation scope never intended + # (V1-0 talks about existing active no-provenance entities). + if invalidate_instead: + scope_sql = "status = 'active'" + else: + scope_sql = "status IN ('active', 'superseded')" + rows = conn.execute( + f"SELECT id, entity_type, name, project, status, source_refs, hand_authored " + f"FROM entities WHERE {scope_sql} AND hand_authored = 0" + ).fetchall() + + needs_fix = [] + for row in rows: + refs_raw = row["source_refs"] or "[]" + try: + refs = json.loads(refs_raw) + except Exception: + refs = [] + if not refs: + needs_fix.append(row) + + print(f"found {len(needs_fix)} active/superseded entities with no provenance") + for row in needs_fix: + print( + f" - {row['id'][:8]} [{row['entity_type']}] " + f"{row['name']!r} project={row['project']!r} status={row['status']}" + ) + + if dry_run: + print("--dry-run: no changes written") + return 0 + + if not needs_fix: + print("nothing to do") + return 0 + + action = "invalidate" if invalidate_instead else "flag hand_authored=1" + print(f"applying: {action}") + + cur = conn.cursor() + for row in needs_fix: + if invalidate_instead: + cur.execute( + "UPDATE entities SET status = 'invalid', " + "updated_at = CURRENT_TIMESTAMP WHERE id = ?", + (row["id"],), + ) + cur.execute( + "INSERT INTO memory_audit " + "(id, memory_id, action, actor, before_json, after_json, note, entity_kind) " + "VALUES (?, ?, 'invalidated', 'v1_0_backfill', ?, ?, ?, 'entity')", + ( + f"v10bf-{row['id'][:8]}-inv", + row["id"], + json.dumps({"status": row["status"]}), + json.dumps({"status": "invalid"}), + "V1-0 backfill: invalidated, no provenance", + ), + ) + else: + cur.execute( + "UPDATE entities SET hand_authored = 1, " + "updated_at = CURRENT_TIMESTAMP WHERE id = ?", + (row["id"],), + ) + cur.execute( + "INSERT INTO memory_audit " + "(id, memory_id, action, actor, before_json, after_json, note, entity_kind) " + "VALUES (?, ?, 'hand_authored_flagged', 'v1_0_backfill', ?, ?, ?, 'entity')", + ( + f"v10bf-{row['id'][:8]}-ha", + row["id"], + json.dumps({"hand_authored": False}), + json.dumps({"hand_authored": True}), + "V1-0 backfill: flagged hand_authored since source_refs empty", + ), + ) + + conn.commit() + print(f"done: updated {len(needs_fix)} entities") + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--db", + type=Path, + default=Path("data/db/atocore.db"), + help="Path to the SQLite database (default: data/db/atocore.db)", + ) + parser.add_argument("--dry-run", action="store_true", help="Report only; no writes") + parser.add_argument( + "--invalidate-instead", + action="store_true", + help=( + "DESTRUCTIVE. Invalidate active rows with no provenance instead " + "of flagging them hand_authored. Scoped to status='active' only " + "(superseded rows are left alone to preserve audit history). " + "Not recommended for first run — start with --dry-run, then " + "the default hand_authored flag path." + ), + ) + args = parser.parse_args() + return run(args.db, args.dry_run, args.invalidate_instead) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/atocore/api/routes.py b/src/atocore/api/routes.py index ecffa93..cd93786 100644 --- a/src/atocore/api/routes.py +++ b/src/atocore/api/routes.py @@ -1457,6 +1457,11 @@ class EntityCreateRequest(BaseModel): status: str = "active" confidence: float = 1.0 source_refs: list[str] | None = None + # V1-0 provenance enforcement (F-8). Clients must either pass + # non-empty source_refs or set hand_authored=true. The service layer + # raises ValueError otherwise, surfaced here as 400. + hand_authored: bool = False + extractor_version: str | None = None class EntityPromoteRequest(BaseModel): @@ -1486,6 +1491,8 @@ def api_create_entity(req: EntityCreateRequest) -> dict: confidence=req.confidence, source_refs=req.source_refs, actor="api-http", + hand_authored=req.hand_authored, + extractor_version=req.extractor_version, ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) diff --git a/src/atocore/engineering/service.py b/src/atocore/engineering/service.py index 5fd314a..7608b6f 100644 --- a/src/atocore/engineering/service.py +++ b/src/atocore/engineering/service.py @@ -63,6 +63,12 @@ RELATIONSHIP_TYPES = [ ENTITY_STATUSES = ["candidate", "active", "superseded", "invalid"] +# V1-0: extractor version this module writes into new entity rows. +# Per promotion-rules.md:268, every candidate must record the version of +# the extractor that produced it so later re-evaluation is auditable. +# Bump this when extraction logic materially changes. +EXTRACTOR_VERSION = "v1.0.0" + @dataclass class Entity: @@ -77,6 +83,10 @@ class Entity: source_refs: list[str] = field(default_factory=list) created_at: str = "" updated_at: str = "" + # V1-0 shared-header fields per engineering-v1-acceptance.md:45. + extractor_version: str = "" + canonical_home: str = "entity" + hand_authored: bool = False @dataclass @@ -103,10 +113,25 @@ def init_engineering_schema() -> None: status TEXT NOT NULL DEFAULT 'active', confidence REAL NOT NULL DEFAULT 1.0, source_refs TEXT NOT NULL DEFAULT '[]', + extractor_version TEXT NOT NULL DEFAULT '', + canonical_home TEXT NOT NULL DEFAULT 'entity', + hand_authored INTEGER NOT NULL DEFAULT 0, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP, updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ) """) + # V1-0 (Engineering V1 completion): the three shared-header fields + # per engineering-v1-acceptance.md:45. Idempotent ALTERs for + # databases created before V1-0 land these columns without a full + # migration. Fresh DBs get them via the CREATE TABLE above; the + # ALTERs below are a no-op there. + from atocore.models.database import _column_exists # late import; avoids cycle + if not _column_exists(conn, "entities", "extractor_version"): + conn.execute("ALTER TABLE entities ADD COLUMN extractor_version TEXT DEFAULT ''") + if not _column_exists(conn, "entities", "canonical_home"): + conn.execute("ALTER TABLE entities ADD COLUMN canonical_home TEXT DEFAULT 'entity'") + if not _column_exists(conn, "entities", "hand_authored"): + conn.execute("ALTER TABLE entities ADD COLUMN hand_authored INTEGER DEFAULT 0") conn.execute(""" CREATE TABLE IF NOT EXISTS relationships ( id TEXT PRIMARY KEY, @@ -149,6 +174,8 @@ def create_entity( confidence: float = 1.0, source_refs: list[str] | None = None, actor: str = "api", + hand_authored: bool = False, + extractor_version: str | None = None, ) -> Entity: if entity_type not in ENTITY_TYPES: raise ValueError(f"Invalid entity type: {entity_type}. Must be one of {ENTITY_TYPES}") @@ -157,6 +184,21 @@ def create_entity( if not name or not name.strip(): raise ValueError("Entity name must be non-empty") + refs = list(source_refs) if source_refs else [] + + # V1-0 (F-8 provenance enforcement, engineering-v1-acceptance.md:147): + # every new entity row must carry non-empty source_refs OR be explicitly + # flagged hand_authored. This is the non-negotiable invariant every + # later V1 phase depends on — without it, active entities can escape + # into the graph with no traceable origin. Raises at the write seam so + # the bug is impossible to introduce silently. + if not refs and not hand_authored: + raise ValueError( + "source_refs required: every entity must carry provenance " + "(source_chunk_id / source_interaction_id / kb_cad_export_id / ...) " + "or set hand_authored=True to explicitly flag a direct human write" + ) + # Phase 5: enforce project canonicalization contract at the write seam. # Aliases like "p04" become "p04-gigabit" so downstream reads stay # consistent with the registry. @@ -165,18 +207,22 @@ def create_entity( entity_id = str(uuid.uuid4()) now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") props = properties or {} - refs = source_refs or [] + ev = extractor_version if extractor_version is not None else EXTRACTOR_VERSION with get_connection() as conn: conn.execute( """INSERT INTO entities (id, entity_type, name, project, description, properties, - status, confidence, source_refs, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", + status, confidence, source_refs, + extractor_version, canonical_home, hand_authored, + created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", ( entity_id, entity_type, name.strip(), project, description, json.dumps(props), status, confidence, - json.dumps(refs), now, now, + json.dumps(refs), + ev, "entity", 1 if hand_authored else 0, + now, now, ), ) @@ -194,14 +240,31 @@ def create_entity( "project": project, "status": status, "confidence": confidence, + "hand_authored": hand_authored, + "extractor_version": ev, }, ) + # V1-0 (F-5 hook, engineering-v1-acceptance.md:99): synchronous + # conflict detection on any active-entity write. The promote path + # already had this hook (see promote_entity below); V1-0 adds it to + # direct-active creates so every active row — however it got that + # way — is checked. Fail-open per "flag, never block" rule in + # conflict-model.md:256: detector errors log but never fail the write. + if status == "active": + try: + from atocore.engineering.conflicts import detect_conflicts_for_entity + detect_conflicts_for_entity(entity_id) + except Exception as e: + log.warning("conflict_detection_failed", entity_id=entity_id, error=str(e)) + return Entity( id=entity_id, entity_type=entity_type, name=name.strip(), project=project, description=description, properties=props, status=status, confidence=confidence, source_refs=refs, created_at=now, updated_at=now, + extractor_version=ev, canonical_home="entity", + hand_authored=hand_authored, ) @@ -361,6 +424,20 @@ def promote_entity( if entity is None or entity.status != "candidate": return False + # V1-0 (F-8 provenance re-check at promote). The invariant must hold at + # BOTH create_entity AND promote_entity per the plan, because candidate + # rows can exist in the DB from before V1-0 (no enforcement at their + # create time) or can be inserted by code paths that bypass the service + # layer. Block any candidate with empty source_refs that is NOT flagged + # hand_authored from ever becoming active. Same error shape as the + # create-side check for symmetry. + if not (entity.source_refs or []) and not entity.hand_authored: + raise ValueError( + "source_refs required: cannot promote a candidate with no " + "provenance. Attach source_refs via PATCH /entities/{id}, " + "or flag hand_authored=true before promoting." + ) + if target_project is not None: new_project = ( resolve_project_name(target_project) if target_project else "" @@ -503,6 +580,22 @@ def supersede_entity( superseded_by=superseded_by, error=str(e), ) + + # V1-0 (F-5 hook on supersede, per plan's "every active-entity + # write path"). Supersede demotes `entity_id` AND adds a + # `supersedes` relationship rooted at the already-active + # `superseded_by`. That new edge can create a conflict the + # detector should catch synchronously. Fail-open per + # conflict-model.md:256. + try: + from atocore.engineering.conflicts import detect_conflicts_for_entity + detect_conflicts_for_entity(superseded_by) + except Exception as e: + log.warning( + "conflict_detection_failed", + entity_id=superseded_by, + error=str(e), + ) return True @@ -774,6 +867,15 @@ def get_entity_with_context(entity_id: str) -> dict | None: def _row_to_entity(row) -> Entity: + # V1-0 shared-header fields are optional on read — rows that predate + # V1-0 migration have NULL / missing values, so defaults kick in and + # older tests that build Entity() without the new fields keep passing. + # `row.keys()` lets us tolerate SQLite rows that lack the columns + # entirely (pre-migration sqlite3.Row). + keys = set(row.keys()) + extractor_version = (row["extractor_version"] or "") if "extractor_version" in keys else "" + canonical_home = (row["canonical_home"] or "entity") if "canonical_home" in keys else "entity" + hand_authored = bool(row["hand_authored"]) if "hand_authored" in keys and row["hand_authored"] is not None else False return Entity( id=row["id"], entity_type=row["entity_type"], @@ -786,6 +888,9 @@ def _row_to_entity(row) -> Entity: source_refs=json.loads(row["source_refs"] or "[]"), created_at=row["created_at"] or "", updated_at=row["updated_at"] or "", + extractor_version=extractor_version, + canonical_home=canonical_home, + hand_authored=hand_authored, ) diff --git a/src/atocore/engineering/wiki.py b/src/atocore/engineering/wiki.py index 3ad8d36..39851ca 100644 --- a/src/atocore/engineering/wiki.py +++ b/src/atocore/engineering/wiki.py @@ -391,6 +391,8 @@ def render_new_entity_form(name: str = "", project: str = "") -> str: entity_type: fd.get('entity_type'), project: fd.get('project') || '', description: fd.get('description') || '', + // V1-0: human writes via the wiki form are hand_authored by definition. + hand_authored: true, }; try { const r = await fetch('/v1/entities', { diff --git a/src/atocore/models/database.py b/src/atocore/models/database.py index bf0844b..0ece1fc 100644 --- a/src/atocore/models/database.py +++ b/src/atocore/models/database.py @@ -146,6 +146,28 @@ def _apply_migrations(conn: sqlite3.Connection) -> None: "CREATE INDEX IF NOT EXISTS idx_memories_graduated ON memories(graduated_to_entity_id)" ) + # V1-0 (Engineering V1 completion): shared header fields per + # engineering-v1-acceptance.md:45. Three columns on `entities`: + # - extractor_version: which extractor produced this row. Lets old + # candidates be re-evaluated with a newer extractor per + # promotion-rules.md:268. + # - canonical_home: which layer holds the canonical record. Always + # "entity" for rows written via create_entity; reserved for future + # cross-layer bookkeeping. + # - hand_authored: 1 when the row was created directly by a human + # without source provenance. Enforced by the write path so every + # non-hand-authored row must carry non-empty source_refs (F-8). + # The entities table itself is created by init_engineering_schema + # (see engineering/service.py); these ALTERs cover existing DBs + # where the original CREATE TABLE predates V1-0. + if _table_exists(conn, "entities"): + if not _column_exists(conn, "entities", "extractor_version"): + conn.execute("ALTER TABLE entities ADD COLUMN extractor_version TEXT DEFAULT ''") + if not _column_exists(conn, "entities", "canonical_home"): + conn.execute("ALTER TABLE entities ADD COLUMN canonical_home TEXT DEFAULT 'entity'") + if not _column_exists(conn, "entities", "hand_authored"): + conn.execute("ALTER TABLE entities ADD COLUMN hand_authored INTEGER DEFAULT 0") + # Phase 4 (Robustness V1): append-only audit log for memory mutations. # Every create/update/promote/reject/supersede/invalidate/reinforce/expire/ # auto_promote writes one row here. before/after are JSON snapshots of the @@ -352,6 +374,14 @@ def _column_exists(conn: sqlite3.Connection, table: str, column: str) -> bool: return any(row["name"] == column for row in rows) +def _table_exists(conn: sqlite3.Connection, table: str) -> bool: + row = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name=?", + (table,), + ).fetchone() + return row is not None + + @contextmanager def get_connection() -> Generator[sqlite3.Connection, None, None]: """Get a database connection with row factory.""" diff --git a/tests/conftest.py b/tests/conftest.py index 00981f5..78379bb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,36 @@ os.environ["ATOCORE_DATA_DIR"] = _default_test_dir os.environ["ATOCORE_DEBUG"] = "true" +# V1-0: every entity created in a test is "hand authored" by the test +# author — fixture data, not extracted content. Rather than rewrite 100+ +# existing test call sites, wrap create_entity so that tests which don't +# provide source_refs get hand_authored=True automatically. Tests that +# explicitly pass source_refs or hand_authored are unaffected. This keeps +# the F-8 invariant enforced in production (the API, the wiki form, and +# graduation scripts all go through the unwrapped function) while leaving +# the existing test corpus intact. +def _patch_create_entity_for_tests(): + from atocore.engineering import service as _svc + + _original = _svc.create_entity + + def _create_entity_test(*args, **kwargs): + # Only auto-flag when hand_authored isn't explicitly specified. + # Tests that want to exercise the F-8 raise path pass + # hand_authored=False explicitly and should hit the error. + if ( + not kwargs.get("source_refs") + and "hand_authored" not in kwargs + ): + kwargs["hand_authored"] = True + return _original(*args, **kwargs) + + _svc.create_entity = _create_entity_test + + +_patch_create_entity_for_tests() + + @pytest.fixture def tmp_data_dir(tmp_path): """Provide a temporary data directory for tests.""" diff --git a/tests/test_engineering_v1_phase5.py b/tests/test_engineering_v1_phase5.py index f0eea40..32e56fd 100644 --- a/tests/test_engineering_v1_phase5.py +++ b/tests/test_engineering_v1_phase5.py @@ -143,8 +143,11 @@ def test_requirement_name_conflict_detected(tmp_data_dir): r2 = create_entity("requirement", "Surface figure < 25nm", project="p-test", description="Different interpretation") - detected = detect_conflicts_for_entity(r2.id) - assert len(detected) == 1 + # V1-0 synchronous hook: the conflict is already detected at r2's + # create-time, so a redundant detect call returns [] due to + # _record_conflict dedup. Assert on list_open_conflicts instead — + # that's what the intent of this test really tests: duplicate + # active requirements surface as an open conflict. conflicts = list_open_conflicts(project="p-test") assert any(c["slot_kind"] == "requirement.name" for c in conflicts) @@ -191,8 +194,12 @@ def test_conflict_resolution_dismiss_leaves_entities_alone(tmp_data_dir): description="first meaning") r2 = create_entity("requirement", "Dup req", project="p-test", description="second meaning") - detected = detect_conflicts_for_entity(r2.id) - conflict_id = detected[0] + # V1-0 synchronous hook already recorded the conflict at r2's + # create-time. Look it up via list_open_conflicts rather than + # calling the detector again (which returns [] due to dedup). + open_list = list_open_conflicts(project="p-test") + assert open_list, "expected conflict recorded by create-time hook" + conflict_id = open_list[0]["id"] assert resolve_conflict(conflict_id, "dismiss") # Both still active — dismiss just clears the conflict marker diff --git a/tests/test_inbox_crossproject.py b/tests/test_inbox_crossproject.py index 22794ac..1414a85 100644 --- a/tests/test_inbox_crossproject.py +++ b/tests/test_inbox_crossproject.py @@ -132,6 +132,7 @@ def test_api_post_entity_with_null_project_stores_global(seeded_db): "entity_type": "material", "name": "Titanium", "project": None, + "hand_authored": True, # V1-0 F-8: test fixture, no source_refs }) assert r.status_code == 200 diff --git a/tests/test_v1_0_write_invariants.py b/tests/test_v1_0_write_invariants.py new file mode 100644 index 0000000..d993c6d --- /dev/null +++ b/tests/test_v1_0_write_invariants.py @@ -0,0 +1,362 @@ +"""V1-0 write-time invariant tests. + +Covers the Engineering V1 completion plan Phase V1-0 acceptance: +- F-1 shared-header fields: extractor_version + canonical_home + hand_authored + land in the entities table with working defaults +- F-8 provenance enforcement: create_entity raises without source_refs + unless hand_authored=True +- F-5 synchronous conflict-detection hook on any active-entity write + (create_entity with status="active" + the pre-existing promote_entity + path); fail-open per conflict-model.md:256 +- Q-3 "flag, never block": a conflict never 4xx-blocks the write +- Q-4 partial trust: get_entities scope_only filters candidates out + +Plan: docs/plans/engineering-v1-completion-plan.md +Spec: docs/architecture/engineering-v1-acceptance.md +""" + +from __future__ import annotations + +import pytest + +from atocore.engineering.service import ( + EXTRACTOR_VERSION, + create_entity, + create_relationship, + get_entities, + get_entity, + init_engineering_schema, + promote_entity, + supersede_entity, +) +from atocore.models.database import get_connection, init_db + + +# ---------- F-1: shared-header fields ---------- + + +def test_entity_row_has_shared_header_fields(tmp_data_dir): + init_db() + init_engineering_schema() + with get_connection() as conn: + cols = {row["name"] for row in conn.execute("PRAGMA table_info(entities)").fetchall()} + assert "extractor_version" in cols + assert "canonical_home" in cols + assert "hand_authored" in cols + + +def test_created_entity_has_default_extractor_version_and_canonical_home(tmp_data_dir): + init_db() + init_engineering_schema() + e = create_entity( + entity_type="component", + name="Pivot Pin", + project="p04-gigabit", + source_refs=["test:fixture"], + ) + assert e.extractor_version == EXTRACTOR_VERSION + assert e.canonical_home == "entity" + assert e.hand_authored is False + + # round-trip through get_entity to confirm the row mapper returns + # the same values (not just the return-by-construct path) + got = get_entity(e.id) + assert got is not None + assert got.extractor_version == EXTRACTOR_VERSION + assert got.canonical_home == "entity" + assert got.hand_authored is False + + +def test_explicit_extractor_version_is_persisted(tmp_data_dir): + init_db() + init_engineering_schema() + e = create_entity( + entity_type="decision", + name="Pick GF-PTFE pads", + project="p04-gigabit", + source_refs=["interaction:abc"], + extractor_version="custom-v2.3", + ) + got = get_entity(e.id) + assert got.extractor_version == "custom-v2.3" + + +# ---------- F-8: provenance enforcement ---------- + + +def test_create_entity_without_provenance_raises(tmp_data_dir): + init_db() + init_engineering_schema() + with pytest.raises(ValueError, match="source_refs required"): + create_entity( + entity_type="component", + name="No Provenance", + project="p04-gigabit", + hand_authored=False, # explicit — bypasses the test-conftest auto-flag + ) + + +def test_create_entity_with_hand_authored_needs_no_source_refs(tmp_data_dir): + init_db() + init_engineering_schema() + e = create_entity( + entity_type="component", + name="Human Entry", + project="p04-gigabit", + hand_authored=True, + ) + assert e.hand_authored is True + got = get_entity(e.id) + assert got.hand_authored is True + # source_refs stays empty — the hand_authored flag IS the provenance + assert got.source_refs == [] + + +def test_create_entity_with_empty_source_refs_list_is_treated_as_missing(tmp_data_dir): + init_db() + init_engineering_schema() + with pytest.raises(ValueError, match="source_refs required"): + create_entity( + entity_type="component", + name="Empty Refs", + project="p04-gigabit", + source_refs=[], + hand_authored=False, + ) + + +def test_promote_rejects_legacy_candidate_without_provenance(tmp_data_dir): + """Regression (Codex V1-0 probe): candidate rows can exist in the DB + from before V1-0 enforcement (or from paths that bypass create_entity). + promote_entity must re-check the invariant and refuse to flip a + no-provenance candidate to active. Without this check, the active + store can leak F-8 violations in from legacy data.""" + init_db() + init_engineering_schema() + + # Simulate a pre-V1-0 candidate by inserting directly into the table, + # bypassing the service-layer invariant. Real legacy rows look exactly + # like this: empty source_refs, hand_authored=0. + import uuid as _uuid + entity_id = str(_uuid.uuid4()) + with get_connection() as conn: + conn.execute( + "INSERT INTO entities (id, entity_type, name, project, " + "description, properties, status, confidence, source_refs, " + "extractor_version, canonical_home, hand_authored, " + "created_at, updated_at) " + "VALUES (?, 'component', 'Legacy Orphan', 'p04-gigabit', " + "'', '{}', 'candidate', 1.0, '[]', '', 'entity', 0, " + "CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)", + (entity_id,), + ) + + with pytest.raises(ValueError, match="source_refs required"): + promote_entity(entity_id) + + # And the row stays a candidate — no half-transition. + got = get_entity(entity_id) + assert got is not None + assert got.status == "candidate" + + +def test_promote_accepts_candidate_flagged_hand_authored(tmp_data_dir): + """The other side of the promote re-check: hand_authored=1 with + empty source_refs still lets promote succeed, matching + create_entity's symmetry.""" + init_db() + init_engineering_schema() + + import uuid as _uuid + entity_id = str(_uuid.uuid4()) + with get_connection() as conn: + conn.execute( + "INSERT INTO entities (id, entity_type, name, project, " + "description, properties, status, confidence, source_refs, " + "extractor_version, canonical_home, hand_authored, " + "created_at, updated_at) " + "VALUES (?, 'component', 'Hand Authored Candidate', " + "'p04-gigabit', '', '{}', 'candidate', 1.0, '[]', '', " + "'entity', 1, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)", + (entity_id,), + ) + + assert promote_entity(entity_id) is True + assert get_entity(entity_id).status == "active" + + +# ---------- F-5: synchronous conflict-detection hook ---------- + + +def test_active_create_runs_conflict_detection_hook(tmp_data_dir, monkeypatch): + """status=active writes trigger detect_conflicts_for_entity.""" + init_db() + init_engineering_schema() + + called_with: list[str] = [] + + def _fake_detect(entity_id: str): + called_with.append(entity_id) + return [] + + import atocore.engineering.conflicts as conflicts_mod + monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _fake_detect) + + e = create_entity( + entity_type="component", + name="Active With Hook", + project="p04-gigabit", + source_refs=["test:hook"], + status="active", + ) + + assert called_with == [e.id] + + +def test_supersede_runs_conflict_detection_on_new_active(tmp_data_dir, monkeypatch): + """Regression (Codex V1-0 probe): per plan's 'every active-entity + write path', supersede_entity must trigger synchronous conflict + detection. The subject is the `superseded_by` entity — the one + whose graph state just changed because a new `supersedes` edge was + rooted at it.""" + init_db() + init_engineering_schema() + + old = create_entity( + entity_type="component", + name="Old Pad", + project="p04-gigabit", + source_refs=["test:old"], + status="active", + ) + new = create_entity( + entity_type="component", + name="New Pad", + project="p04-gigabit", + source_refs=["test:new"], + status="active", + ) + + called_with: list[str] = [] + + def _fake_detect(entity_id: str): + called_with.append(entity_id) + return [] + + import atocore.engineering.conflicts as conflicts_mod + monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _fake_detect) + + assert supersede_entity(old.id, superseded_by=new.id) is True + + # The detector fires on the `superseded_by` entity — the one whose + # edges just grew a new `supersedes` relationship. + assert new.id in called_with + + +def test_supersede_hook_fails_open(tmp_data_dir, monkeypatch): + """Supersede must survive a broken detector per Q-3 flag-never-block.""" + init_db() + init_engineering_schema() + + old = create_entity( + entity_type="component", name="Old2", project="p04-gigabit", + source_refs=["test:old"], status="active", + ) + new = create_entity( + entity_type="component", name="New2", project="p04-gigabit", + source_refs=["test:new"], status="active", + ) + + def _boom(entity_id: str): + raise RuntimeError("synthetic detector failure") + + import atocore.engineering.conflicts as conflicts_mod + monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _boom) + + # The supersede still succeeds despite the detector blowing up. + assert supersede_entity(old.id, superseded_by=new.id) is True + assert get_entity(old.id).status == "superseded" + + +def test_candidate_create_does_not_run_conflict_hook(tmp_data_dir, monkeypatch): + """status=candidate writes do NOT trigger detection — the hook is + for active rows only, per V1-0 scope. Candidates are checked at + promote time.""" + init_db() + init_engineering_schema() + + called: list[str] = [] + + def _fake_detect(entity_id: str): + called.append(entity_id) + return [] + + import atocore.engineering.conflicts as conflicts_mod + monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _fake_detect) + + create_entity( + entity_type="component", + name="Candidate No Hook", + project="p04-gigabit", + source_refs=["test:cand"], + status="candidate", + ) + + assert called == [] + + +# ---------- Q-3: flag, never block ---------- + + +def test_conflict_detector_failure_does_not_block_write(tmp_data_dir, monkeypatch): + """Per conflict-model.md:256: detection errors must not fail the + write. The entity is still created; only a warning is logged.""" + init_db() + init_engineering_schema() + + def _boom(entity_id: str): + raise RuntimeError("synthetic detector failure") + + import atocore.engineering.conflicts as conflicts_mod + monkeypatch.setattr(conflicts_mod, "detect_conflicts_for_entity", _boom) + + # The write still succeeds — no exception propagates. + e = create_entity( + entity_type="component", + name="Hook Fails Open", + project="p04-gigabit", + source_refs=["test:failopen"], + status="active", + ) + assert get_entity(e.id) is not None + + +# ---------- Q-4 (partial): trust-hierarchy — scope_only filters candidates ---------- + + +def test_scope_only_active_does_not_return_candidates(tmp_data_dir): + """V1-0 partial Q-4: active-scoped listing never returns candidates. + Full trust-hierarchy coverage (no-auto-project-state, etc.) ships in + V1-E per plan.""" + init_db() + init_engineering_schema() + + active = create_entity( + entity_type="component", + name="Active Alpha", + project="p04-gigabit", + source_refs=["test:alpha"], + status="active", + ) + candidate = create_entity( + entity_type="component", + name="Candidate Beta", + project="p04-gigabit", + source_refs=["test:beta"], + status="candidate", + ) + + listed = get_entities(project="p04-gigabit", status="active", scope_only=True) + ids = {e.id for e in listed} + assert active.id in ids + assert candidate.id not in ids