feat(engineering): enforce V1-0 write invariants
This commit is contained in:
@@ -1457,6 +1457,11 @@ class EntityCreateRequest(BaseModel):
|
||||
status: str = "active"
|
||||
confidence: float = 1.0
|
||||
source_refs: list[str] | None = None
|
||||
# V1-0 provenance enforcement (F-8). Clients must either pass
|
||||
# non-empty source_refs or set hand_authored=true. The service layer
|
||||
# raises ValueError otherwise, surfaced here as 400.
|
||||
hand_authored: bool = False
|
||||
extractor_version: str | None = None
|
||||
|
||||
|
||||
class EntityPromoteRequest(BaseModel):
|
||||
@@ -1486,6 +1491,8 @@ def api_create_entity(req: EntityCreateRequest) -> dict:
|
||||
confidence=req.confidence,
|
||||
source_refs=req.source_refs,
|
||||
actor="api-http",
|
||||
hand_authored=req.hand_authored,
|
||||
extractor_version=req.extractor_version,
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
@@ -63,6 +63,12 @@ RELATIONSHIP_TYPES = [
|
||||
|
||||
ENTITY_STATUSES = ["candidate", "active", "superseded", "invalid"]
|
||||
|
||||
# V1-0: extractor version this module writes into new entity rows.
|
||||
# Per promotion-rules.md:268, every candidate must record the version of
|
||||
# the extractor that produced it so later re-evaluation is auditable.
|
||||
# Bump this when extraction logic materially changes.
|
||||
EXTRACTOR_VERSION = "v1.0.0"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Entity:
|
||||
@@ -77,6 +83,10 @@ class Entity:
|
||||
source_refs: list[str] = field(default_factory=list)
|
||||
created_at: str = ""
|
||||
updated_at: str = ""
|
||||
# V1-0 shared-header fields per engineering-v1-acceptance.md:45.
|
||||
extractor_version: str = ""
|
||||
canonical_home: str = "entity"
|
||||
hand_authored: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -103,10 +113,25 @@ def init_engineering_schema() -> None:
|
||||
status TEXT NOT NULL DEFAULT 'active',
|
||||
confidence REAL NOT NULL DEFAULT 1.0,
|
||||
source_refs TEXT NOT NULL DEFAULT '[]',
|
||||
extractor_version TEXT NOT NULL DEFAULT '',
|
||||
canonical_home TEXT NOT NULL DEFAULT 'entity',
|
||||
hand_authored INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
""")
|
||||
# V1-0 (Engineering V1 completion): the three shared-header fields
|
||||
# per engineering-v1-acceptance.md:45. Idempotent ALTERs for
|
||||
# databases created before V1-0 land these columns without a full
|
||||
# migration. Fresh DBs get them via the CREATE TABLE above; the
|
||||
# ALTERs below are a no-op there.
|
||||
from atocore.models.database import _column_exists # late import; avoids cycle
|
||||
if not _column_exists(conn, "entities", "extractor_version"):
|
||||
conn.execute("ALTER TABLE entities ADD COLUMN extractor_version TEXT DEFAULT ''")
|
||||
if not _column_exists(conn, "entities", "canonical_home"):
|
||||
conn.execute("ALTER TABLE entities ADD COLUMN canonical_home TEXT DEFAULT 'entity'")
|
||||
if not _column_exists(conn, "entities", "hand_authored"):
|
||||
conn.execute("ALTER TABLE entities ADD COLUMN hand_authored INTEGER DEFAULT 0")
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS relationships (
|
||||
id TEXT PRIMARY KEY,
|
||||
@@ -149,6 +174,8 @@ def create_entity(
|
||||
confidence: float = 1.0,
|
||||
source_refs: list[str] | None = None,
|
||||
actor: str = "api",
|
||||
hand_authored: bool = False,
|
||||
extractor_version: str | None = None,
|
||||
) -> Entity:
|
||||
if entity_type not in ENTITY_TYPES:
|
||||
raise ValueError(f"Invalid entity type: {entity_type}. Must be one of {ENTITY_TYPES}")
|
||||
@@ -157,6 +184,21 @@ def create_entity(
|
||||
if not name or not name.strip():
|
||||
raise ValueError("Entity name must be non-empty")
|
||||
|
||||
refs = list(source_refs) if source_refs else []
|
||||
|
||||
# V1-0 (F-8 provenance enforcement, engineering-v1-acceptance.md:147):
|
||||
# every new entity row must carry non-empty source_refs OR be explicitly
|
||||
# flagged hand_authored. This is the non-negotiable invariant every
|
||||
# later V1 phase depends on — without it, active entities can escape
|
||||
# into the graph with no traceable origin. Raises at the write seam so
|
||||
# the bug is impossible to introduce silently.
|
||||
if not refs and not hand_authored:
|
||||
raise ValueError(
|
||||
"source_refs required: every entity must carry provenance "
|
||||
"(source_chunk_id / source_interaction_id / kb_cad_export_id / ...) "
|
||||
"or set hand_authored=True to explicitly flag a direct human write"
|
||||
)
|
||||
|
||||
# Phase 5: enforce project canonicalization contract at the write seam.
|
||||
# Aliases like "p04" become "p04-gigabit" so downstream reads stay
|
||||
# consistent with the registry.
|
||||
@@ -165,18 +207,22 @@ def create_entity(
|
||||
entity_id = str(uuid.uuid4())
|
||||
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||||
props = properties or {}
|
||||
refs = source_refs or []
|
||||
ev = extractor_version if extractor_version is not None else EXTRACTOR_VERSION
|
||||
|
||||
with get_connection() as conn:
|
||||
conn.execute(
|
||||
"""INSERT INTO entities
|
||||
(id, entity_type, name, project, description, properties,
|
||||
status, confidence, source_refs, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
status, confidence, source_refs,
|
||||
extractor_version, canonical_home, hand_authored,
|
||||
created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
entity_id, entity_type, name.strip(), project,
|
||||
description, json.dumps(props), status, confidence,
|
||||
json.dumps(refs), now, now,
|
||||
json.dumps(refs),
|
||||
ev, "entity", 1 if hand_authored else 0,
|
||||
now, now,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -194,14 +240,31 @@ def create_entity(
|
||||
"project": project,
|
||||
"status": status,
|
||||
"confidence": confidence,
|
||||
"hand_authored": hand_authored,
|
||||
"extractor_version": ev,
|
||||
},
|
||||
)
|
||||
|
||||
# V1-0 (F-5 hook, engineering-v1-acceptance.md:99): synchronous
|
||||
# conflict detection on any active-entity write. The promote path
|
||||
# already had this hook (see promote_entity below); V1-0 adds it to
|
||||
# direct-active creates so every active row — however it got that
|
||||
# way — is checked. Fail-open per "flag, never block" rule in
|
||||
# conflict-model.md:256: detector errors log but never fail the write.
|
||||
if status == "active":
|
||||
try:
|
||||
from atocore.engineering.conflicts import detect_conflicts_for_entity
|
||||
detect_conflicts_for_entity(entity_id)
|
||||
except Exception as e:
|
||||
log.warning("conflict_detection_failed", entity_id=entity_id, error=str(e))
|
||||
|
||||
return Entity(
|
||||
id=entity_id, entity_type=entity_type, name=name.strip(),
|
||||
project=project, description=description, properties=props,
|
||||
status=status, confidence=confidence, source_refs=refs,
|
||||
created_at=now, updated_at=now,
|
||||
extractor_version=ev, canonical_home="entity",
|
||||
hand_authored=hand_authored,
|
||||
)
|
||||
|
||||
|
||||
@@ -361,6 +424,20 @@ def promote_entity(
|
||||
if entity is None or entity.status != "candidate":
|
||||
return False
|
||||
|
||||
# V1-0 (F-8 provenance re-check at promote). The invariant must hold at
|
||||
# BOTH create_entity AND promote_entity per the plan, because candidate
|
||||
# rows can exist in the DB from before V1-0 (no enforcement at their
|
||||
# create time) or can be inserted by code paths that bypass the service
|
||||
# layer. Block any candidate with empty source_refs that is NOT flagged
|
||||
# hand_authored from ever becoming active. Same error shape as the
|
||||
# create-side check for symmetry.
|
||||
if not (entity.source_refs or []) and not entity.hand_authored:
|
||||
raise ValueError(
|
||||
"source_refs required: cannot promote a candidate with no "
|
||||
"provenance. Attach source_refs via PATCH /entities/{id}, "
|
||||
"or flag hand_authored=true before promoting."
|
||||
)
|
||||
|
||||
if target_project is not None:
|
||||
new_project = (
|
||||
resolve_project_name(target_project) if target_project else ""
|
||||
@@ -503,6 +580,22 @@ def supersede_entity(
|
||||
superseded_by=superseded_by,
|
||||
error=str(e),
|
||||
)
|
||||
|
||||
# V1-0 (F-5 hook on supersede, per plan's "every active-entity
|
||||
# write path"). Supersede demotes `entity_id` AND adds a
|
||||
# `supersedes` relationship rooted at the already-active
|
||||
# `superseded_by`. That new edge can create a conflict the
|
||||
# detector should catch synchronously. Fail-open per
|
||||
# conflict-model.md:256.
|
||||
try:
|
||||
from atocore.engineering.conflicts import detect_conflicts_for_entity
|
||||
detect_conflicts_for_entity(superseded_by)
|
||||
except Exception as e:
|
||||
log.warning(
|
||||
"conflict_detection_failed",
|
||||
entity_id=superseded_by,
|
||||
error=str(e),
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
@@ -774,6 +867,15 @@ def get_entity_with_context(entity_id: str) -> dict | None:
|
||||
|
||||
|
||||
def _row_to_entity(row) -> Entity:
|
||||
# V1-0 shared-header fields are optional on read — rows that predate
|
||||
# V1-0 migration have NULL / missing values, so defaults kick in and
|
||||
# older tests that build Entity() without the new fields keep passing.
|
||||
# `row.keys()` lets us tolerate SQLite rows that lack the columns
|
||||
# entirely (pre-migration sqlite3.Row).
|
||||
keys = set(row.keys())
|
||||
extractor_version = (row["extractor_version"] or "") if "extractor_version" in keys else ""
|
||||
canonical_home = (row["canonical_home"] or "entity") if "canonical_home" in keys else "entity"
|
||||
hand_authored = bool(row["hand_authored"]) if "hand_authored" in keys and row["hand_authored"] is not None else False
|
||||
return Entity(
|
||||
id=row["id"],
|
||||
entity_type=row["entity_type"],
|
||||
@@ -786,6 +888,9 @@ def _row_to_entity(row) -> Entity:
|
||||
source_refs=json.loads(row["source_refs"] or "[]"),
|
||||
created_at=row["created_at"] or "",
|
||||
updated_at=row["updated_at"] or "",
|
||||
extractor_version=extractor_version,
|
||||
canonical_home=canonical_home,
|
||||
hand_authored=hand_authored,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -391,6 +391,8 @@ def render_new_entity_form(name: str = "", project: str = "") -> str:
|
||||
entity_type: fd.get('entity_type'),
|
||||
project: fd.get('project') || '',
|
||||
description: fd.get('description') || '',
|
||||
// V1-0: human writes via the wiki form are hand_authored by definition.
|
||||
hand_authored: true,
|
||||
};
|
||||
try {
|
||||
const r = await fetch('/v1/entities', {
|
||||
|
||||
@@ -146,6 +146,28 @@ def _apply_migrations(conn: sqlite3.Connection) -> None:
|
||||
"CREATE INDEX IF NOT EXISTS idx_memories_graduated ON memories(graduated_to_entity_id)"
|
||||
)
|
||||
|
||||
# V1-0 (Engineering V1 completion): shared header fields per
|
||||
# engineering-v1-acceptance.md:45. Three columns on `entities`:
|
||||
# - extractor_version: which extractor produced this row. Lets old
|
||||
# candidates be re-evaluated with a newer extractor per
|
||||
# promotion-rules.md:268.
|
||||
# - canonical_home: which layer holds the canonical record. Always
|
||||
# "entity" for rows written via create_entity; reserved for future
|
||||
# cross-layer bookkeeping.
|
||||
# - hand_authored: 1 when the row was created directly by a human
|
||||
# without source provenance. Enforced by the write path so every
|
||||
# non-hand-authored row must carry non-empty source_refs (F-8).
|
||||
# The entities table itself is created by init_engineering_schema
|
||||
# (see engineering/service.py); these ALTERs cover existing DBs
|
||||
# where the original CREATE TABLE predates V1-0.
|
||||
if _table_exists(conn, "entities"):
|
||||
if not _column_exists(conn, "entities", "extractor_version"):
|
||||
conn.execute("ALTER TABLE entities ADD COLUMN extractor_version TEXT DEFAULT ''")
|
||||
if not _column_exists(conn, "entities", "canonical_home"):
|
||||
conn.execute("ALTER TABLE entities ADD COLUMN canonical_home TEXT DEFAULT 'entity'")
|
||||
if not _column_exists(conn, "entities", "hand_authored"):
|
||||
conn.execute("ALTER TABLE entities ADD COLUMN hand_authored INTEGER DEFAULT 0")
|
||||
|
||||
# Phase 4 (Robustness V1): append-only audit log for memory mutations.
|
||||
# Every create/update/promote/reject/supersede/invalidate/reinforce/expire/
|
||||
# auto_promote writes one row here. before/after are JSON snapshots of the
|
||||
@@ -352,6 +374,14 @@ def _column_exists(conn: sqlite3.Connection, table: str, column: str) -> bool:
|
||||
return any(row["name"] == column for row in rows)
|
||||
|
||||
|
||||
def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
|
||||
row = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
|
||||
(table,),
|
||||
).fetchone()
|
||||
return row is not None
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_connection() -> Generator[sqlite3.Connection, None, None]:
|
||||
"""Get a database connection with row factory."""
|
||||
|
||||
Reference in New Issue
Block a user