First slice of the Engineering V1 sprint. Lays the schema + lifecycle
plumbing so the 10 canonical queries, memory graduation, and conflict
detection can land cleanly on top.
Schema (src/atocore/models/database.py):
- conflicts + conflict_members tables per conflict-model.md (with 5
indexes on status/project/slot/members)
- memory_audit.entity_kind discriminator — same audit table serves
both memories ("memory") and entities ("entity"); unified history
without duplicating infrastructure
- memories.graduated_to_entity_id forward pointer for graduated
memories (M → E transition preserves the memory as historical
pointer)
Memory (src/atocore/memory/service.py):
- MEMORY_STATUSES gains "graduated" — memory-entity graduation flow
ready to wire in Phase 5F
Engineering service (src/atocore/engineering/service.py):
- RELATIONSHIP_TYPES organized into 4 families per ontology-v1.md:
+ Structural: contains, part_of, interfaces_with
+ Intent: satisfies, constrained_by, affected_by_decision,
based_on_assumption (new), supersedes
+ Validation: analyzed_by, validated_by, supports (new),
conflicts_with (new), depends_on
+ Provenance: described_by, updated_by_session (new),
evidenced_by (new), summarized_in (new)
- create_entity + create_relationship now call resolve_project_name()
on write (canonicalization contract per doc)
- Both accept actor= parameter for audit provenance
- _audit_entity() helper uses shared memory_audit table with
entity_kind="entity" — one observability layer for everything
- promote_entity / reject_entity_candidate / supersede_entity —
mirror the memory lifecycle exactly (same pattern, same naming)
- get_entity_audit() reads from the shared table filtered by
entity_kind
API (src/atocore/api/routes.py):
- POST /entities/{id}/promote (candidate → active)
- POST /entities/{id}/reject (candidate → invalid)
- GET /entities/{id}/audit (full history for one entity)
- POST /entities passes actor="api-http" through
Tests: 317 → 326 (9 new):
- test_entity_project_canonicalization (p04 → p04-gigabit)
- test_promote_entity_candidate_to_active
- test_reject_entity_candidate
- test_promote_active_entity_noop (only candidates promote)
- test_entity_audit_log_captures_lifecycle (before/after snapshots)
- test_new_relationship_types_available (6 new types present)
- test_conflicts_tables_exist
- test_memory_audit_has_entity_kind
- test_graduated_status_accepted
What's next (5B-5I, deferred): entity triage UI tab, core structure
queries, the 3 killer queries, memory graduation script, conflict
detection, MCP + context pack integration. See plan file.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
281 lines
12 KiB
Python
281 lines
12 KiB
Python
"""SQLite database schema and connection management."""
|
|
|
|
import sqlite3
|
|
from contextlib import contextmanager
|
|
from pathlib import Path
|
|
from typing import Generator
|
|
|
|
import atocore.config as _config
|
|
from atocore.observability.logger import get_logger
|
|
|
|
log = get_logger("database")
|
|
|
|
SCHEMA_SQL = """
|
|
CREATE TABLE IF NOT EXISTS source_documents (
|
|
id TEXT PRIMARY KEY,
|
|
file_path TEXT UNIQUE NOT NULL,
|
|
file_hash TEXT NOT NULL,
|
|
title TEXT,
|
|
doc_type TEXT DEFAULT 'markdown',
|
|
tags TEXT DEFAULT '[]',
|
|
ingested_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS source_chunks (
|
|
id TEXT PRIMARY KEY,
|
|
document_id TEXT NOT NULL REFERENCES source_documents(id) ON DELETE CASCADE,
|
|
chunk_index INTEGER NOT NULL,
|
|
content TEXT NOT NULL,
|
|
heading_path TEXT DEFAULT '',
|
|
char_count INTEGER NOT NULL,
|
|
metadata TEXT DEFAULT '{}',
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS memories (
|
|
id TEXT PRIMARY KEY,
|
|
memory_type TEXT NOT NULL,
|
|
content TEXT NOT NULL,
|
|
project TEXT DEFAULT '',
|
|
source_chunk_id TEXT REFERENCES source_chunks(id),
|
|
confidence REAL DEFAULT 1.0,
|
|
status TEXT DEFAULT 'active',
|
|
last_referenced_at DATETIME,
|
|
reference_count INTEGER DEFAULT 0,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS projects (
|
|
id TEXT PRIMARY KEY,
|
|
name TEXT UNIQUE NOT NULL,
|
|
description TEXT DEFAULT '',
|
|
status TEXT DEFAULT 'active',
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS interactions (
|
|
id TEXT PRIMARY KEY,
|
|
prompt TEXT NOT NULL,
|
|
context_pack TEXT DEFAULT '{}',
|
|
response_summary TEXT DEFAULT '',
|
|
response TEXT DEFAULT '',
|
|
memories_used TEXT DEFAULT '[]',
|
|
chunks_used TEXT DEFAULT '[]',
|
|
client TEXT DEFAULT '',
|
|
session_id TEXT DEFAULT '',
|
|
project TEXT DEFAULT '',
|
|
project_id TEXT REFERENCES projects(id),
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Indexes that reference columns guaranteed to exist since the first
|
|
-- release ship here. Indexes that reference columns added by later
|
|
-- migrations (memories.project, interactions.project,
|
|
-- interactions.session_id) are created inside _apply_migrations AFTER
|
|
-- the corresponding ALTER TABLE, NOT here. Creating them here would
|
|
-- fail on upgrade from a pre-migration schema because CREATE TABLE
|
|
-- IF NOT EXISTS is a no-op on an existing table, so the new columns
|
|
-- wouldn't be added before the CREATE INDEX runs.
|
|
CREATE INDEX IF NOT EXISTS idx_chunks_document ON source_chunks(document_id);
|
|
CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type);
|
|
CREATE INDEX IF NOT EXISTS idx_memories_status ON memories(status);
|
|
CREATE INDEX IF NOT EXISTS idx_interactions_project ON interactions(project_id);
|
|
"""
|
|
|
|
|
|
def _ensure_data_dir() -> None:
|
|
_config.ensure_runtime_dirs()
|
|
|
|
|
|
def init_db() -> None:
|
|
"""Initialize the database with schema."""
|
|
_ensure_data_dir()
|
|
with get_connection() as conn:
|
|
conn.executescript(SCHEMA_SQL)
|
|
_apply_migrations(conn)
|
|
log.info("database_initialized", path=str(_config.settings.db_path))
|
|
|
|
|
|
def _apply_migrations(conn: sqlite3.Connection) -> None:
|
|
"""Apply lightweight schema migrations for existing local databases."""
|
|
if not _column_exists(conn, "memories", "project"):
|
|
conn.execute("ALTER TABLE memories ADD COLUMN project TEXT DEFAULT ''")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_project ON memories(project)")
|
|
|
|
# Phase 9 Commit B: reinforcement columns.
|
|
# last_referenced_at records when a memory was most recently referenced
|
|
# in a captured interaction; reference_count is a monotonically
|
|
# increasing counter bumped on every reference. Together they let
|
|
# Reflection (Commit C) and decay (deferred) reason about which
|
|
# memories are actually being used versus which have gone cold.
|
|
if not _column_exists(conn, "memories", "last_referenced_at"):
|
|
conn.execute("ALTER TABLE memories ADD COLUMN last_referenced_at DATETIME")
|
|
if not _column_exists(conn, "memories", "reference_count"):
|
|
conn.execute("ALTER TABLE memories ADD COLUMN reference_count INTEGER DEFAULT 0")
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_memories_last_referenced ON memories(last_referenced_at)"
|
|
)
|
|
|
|
# Phase 3 (Auto-Organization V1): domain tags + expiry.
|
|
# domain_tags is a JSON array of lowercase strings (optics, mechanics,
|
|
# firmware, business, etc.) inferred by the LLM during triage. Used for
|
|
# cross-project retrieval: a query about "optics" can surface matches from
|
|
# p04 + p05 + p06 without knowing all the project names.
|
|
# valid_until is an ISO UTC timestamp beyond which the memory is
|
|
# considered stale. get_memories_for_context filters these out of context
|
|
# packs automatically so ephemeral facts (status snapshots, weekly counts)
|
|
# don't pollute grounding once they've aged out.
|
|
if not _column_exists(conn, "memories", "domain_tags"):
|
|
conn.execute("ALTER TABLE memories ADD COLUMN domain_tags TEXT DEFAULT '[]'")
|
|
if not _column_exists(conn, "memories", "valid_until"):
|
|
conn.execute("ALTER TABLE memories ADD COLUMN valid_until DATETIME")
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_memories_valid_until ON memories(valid_until)"
|
|
)
|
|
|
|
# Phase 5 (Engineering V1): when a memory graduates to an entity, we
|
|
# keep the memory row as an immutable historical pointer. The forward
|
|
# pointer lets downstream code follow "what did this memory become?"
|
|
# without having to join through source_refs.
|
|
if not _column_exists(conn, "memories", "graduated_to_entity_id"):
|
|
conn.execute("ALTER TABLE memories ADD COLUMN graduated_to_entity_id TEXT")
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_memories_graduated ON memories(graduated_to_entity_id)"
|
|
)
|
|
|
|
# Phase 4 (Robustness V1): append-only audit log for memory mutations.
|
|
# Every create/update/promote/reject/supersede/invalidate/reinforce/expire/
|
|
# auto_promote writes one row here. before/after are JSON snapshots of the
|
|
# relevant fields. actor lets us distinguish auto-triage vs human-triage vs
|
|
# api vs cron. This is the "how did this memory get to its current state"
|
|
# trail — essential once the brain starts auto-organizing itself.
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS memory_audit (
|
|
id TEXT PRIMARY KEY,
|
|
memory_id TEXT NOT NULL,
|
|
action TEXT NOT NULL,
|
|
actor TEXT DEFAULT 'api',
|
|
before_json TEXT DEFAULT '{}',
|
|
after_json TEXT DEFAULT '{}',
|
|
note TEXT DEFAULT '',
|
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
"""
|
|
)
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_audit_memory ON memory_audit(memory_id)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_audit_timestamp ON memory_audit(timestamp)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_audit_action ON memory_audit(action)")
|
|
|
|
# Phase 5 (Engineering V1): entity_kind discriminator lets one audit
|
|
# table serve both memories AND entities. Default "memory" keeps existing
|
|
# rows correct; entity mutations write entity_kind="entity".
|
|
if not _column_exists(conn, "memory_audit", "entity_kind"):
|
|
conn.execute("ALTER TABLE memory_audit ADD COLUMN entity_kind TEXT DEFAULT 'memory'")
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_memory_audit_entity_kind ON memory_audit(entity_kind)"
|
|
)
|
|
|
|
# Phase 5: conflicts + conflict_members tables per conflict-model.md.
|
|
# A conflict is "two or more active rows claiming the same slot with
|
|
# incompatible values". slot_kind + slot_key identify the logical slot
|
|
# (e.g., "component.material" for some component id). Members point
|
|
# back to the conflicting rows (memory or entity) with layer trust so
|
|
# resolution can pick the highest-trust winner.
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS conflicts (
|
|
id TEXT PRIMARY KEY,
|
|
slot_kind TEXT NOT NULL,
|
|
slot_key TEXT NOT NULL,
|
|
project TEXT DEFAULT '',
|
|
status TEXT DEFAULT 'open',
|
|
resolution TEXT DEFAULT '',
|
|
resolved_at DATETIME,
|
|
detected_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
note TEXT DEFAULT ''
|
|
)
|
|
"""
|
|
)
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS conflict_members (
|
|
id TEXT PRIMARY KEY,
|
|
conflict_id TEXT NOT NULL REFERENCES conflicts(id) ON DELETE CASCADE,
|
|
member_kind TEXT NOT NULL,
|
|
member_id TEXT NOT NULL,
|
|
member_layer_trust INTEGER DEFAULT 0,
|
|
value_snapshot TEXT DEFAULT ''
|
|
)
|
|
"""
|
|
)
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_conflicts_status ON conflicts(status)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_conflicts_project ON conflicts(project)")
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_conflicts_slot ON conflicts(slot_kind, slot_key)"
|
|
)
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_conflict_members_conflict ON conflict_members(conflict_id)"
|
|
)
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_conflict_members_member ON conflict_members(member_kind, member_id)"
|
|
)
|
|
|
|
# Phase 9 Commit A: capture loop columns on the interactions table.
|
|
# The original schema only carried prompt + project_id + a context_pack
|
|
# JSON blob. To make interactions a real audit trail of what AtoCore fed
|
|
# the LLM and what came back, we record the full response, the chunk
|
|
# and memory ids that were actually used, plus client + session metadata.
|
|
if not _column_exists(conn, "interactions", "response"):
|
|
conn.execute("ALTER TABLE interactions ADD COLUMN response TEXT DEFAULT ''")
|
|
if not _column_exists(conn, "interactions", "memories_used"):
|
|
conn.execute("ALTER TABLE interactions ADD COLUMN memories_used TEXT DEFAULT '[]'")
|
|
if not _column_exists(conn, "interactions", "chunks_used"):
|
|
conn.execute("ALTER TABLE interactions ADD COLUMN chunks_used TEXT DEFAULT '[]'")
|
|
if not _column_exists(conn, "interactions", "client"):
|
|
conn.execute("ALTER TABLE interactions ADD COLUMN client TEXT DEFAULT ''")
|
|
if not _column_exists(conn, "interactions", "session_id"):
|
|
conn.execute("ALTER TABLE interactions ADD COLUMN session_id TEXT DEFAULT ''")
|
|
if not _column_exists(conn, "interactions", "project"):
|
|
conn.execute("ALTER TABLE interactions ADD COLUMN project TEXT DEFAULT ''")
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_interactions_session ON interactions(session_id)"
|
|
)
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_interactions_project_name ON interactions(project)"
|
|
)
|
|
conn.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_interactions_created_at ON interactions(created_at)"
|
|
)
|
|
|
|
|
|
def _column_exists(conn: sqlite3.Connection, table: str, column: str) -> bool:
|
|
rows = conn.execute(f"PRAGMA table_info({table})").fetchall()
|
|
return any(row["name"] == column for row in rows)
|
|
|
|
|
|
@contextmanager
|
|
def get_connection() -> Generator[sqlite3.Connection, None, None]:
|
|
"""Get a database connection with row factory."""
|
|
_ensure_data_dir()
|
|
conn = sqlite3.connect(
|
|
str(_config.settings.db_path),
|
|
timeout=_config.settings.db_busy_timeout_ms / 1000,
|
|
)
|
|
conn.row_factory = sqlite3.Row
|
|
conn.execute("PRAGMA foreign_keys = ON")
|
|
conn.execute(f"PRAGMA busy_timeout = {_config.settings.db_busy_timeout_ms}")
|
|
conn.execute("PRAGMA journal_mode = WAL")
|
|
conn.execute("PRAGMA synchronous = NORMAL")
|
|
try:
|
|
yield conn
|
|
conn.commit()
|
|
except Exception:
|
|
conn.rollback()
|
|
raise
|
|
finally:
|
|
conn.close()
|