Files
ATOCore/src/atocore/models/database.py
Anto01 b4afbbb53a feat: implement AtoCore Phase 0 + Phase 0.5 (foundation + PoC)
Complete implementation of the personal context engine foundation:
- FastAPI server with 5 endpoints (ingest, query, context/build, health, debug)
- SQLite database with 5 tables (documents, chunks, memories, projects, interactions)
- Heading-aware markdown chunker (800 char max, recursive splitting)
- Multilingual embeddings via sentence-transformers (EN/FR)
- ChromaDB vector store with cosine similarity retrieval
- Context builder with project boosting, dedup, and budget enforcement
- CLI scripts for batch ingestion and test prompt evaluation
- 19 unit tests passing, 79% coverage
- Validated on 482 real project files (8383 chunks, 0 errors)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 09:21:27 -04:00

99 lines
2.9 KiB
Python

"""SQLite database schema and connection management."""
import sqlite3
from contextlib import contextmanager
from pathlib import Path
from typing import Generator
from atocore.config import settings
from atocore.observability.logger import get_logger
log = get_logger("database")
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS source_documents (
id TEXT PRIMARY KEY,
file_path TEXT UNIQUE NOT NULL,
file_hash TEXT NOT NULL,
title TEXT,
doc_type TEXT DEFAULT 'markdown',
tags TEXT DEFAULT '[]',
ingested_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS source_chunks (
id TEXT PRIMARY KEY,
document_id TEXT NOT NULL REFERENCES source_documents(id) ON DELETE CASCADE,
chunk_index INTEGER NOT NULL,
content TEXT NOT NULL,
heading_path TEXT DEFAULT '',
char_count INTEGER NOT NULL,
metadata TEXT DEFAULT '{}',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
memory_type TEXT NOT NULL,
content TEXT NOT NULL,
source_chunk_id TEXT REFERENCES source_chunks(id),
confidence REAL DEFAULT 1.0,
status TEXT DEFAULT 'active',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS projects (
id TEXT PRIMARY KEY,
name TEXT UNIQUE NOT NULL,
description TEXT DEFAULT '',
status TEXT DEFAULT 'active',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS interactions (
id TEXT PRIMARY KEY,
prompt TEXT NOT NULL,
context_pack TEXT DEFAULT '{}',
response_summary TEXT DEFAULT '',
project_id TEXT REFERENCES projects(id),
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_chunks_document ON source_chunks(document_id);
CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type);
CREATE INDEX IF NOT EXISTS idx_memories_status ON memories(status);
CREATE INDEX IF NOT EXISTS idx_interactions_project ON interactions(project_id);
"""
def _ensure_data_dir() -> None:
settings.data_dir.mkdir(parents=True, exist_ok=True)
def init_db() -> None:
"""Initialize the database with schema."""
_ensure_data_dir()
with get_connection() as conn:
conn.executescript(SCHEMA_SQL)
log.info("database_initialized", path=str(settings.db_path))
@contextmanager
def get_connection() -> Generator[sqlite3.Connection, None, None]:
"""Get a database connection with row factory."""
_ensure_data_dir()
conn = sqlite3.connect(str(settings.db_path))
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA foreign_keys = ON")
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()