feat: implement AtoCore Phase 0 + Phase 0.5 (foundation + PoC)

Complete implementation of the personal context engine foundation:
- FastAPI server with 5 endpoints (ingest, query, context/build, health, debug)
- SQLite database with 5 tables (documents, chunks, memories, projects, interactions)
- Heading-aware markdown chunker (800 char max, recursive splitting)
- Multilingual embeddings via sentence-transformers (EN/FR)
- ChromaDB vector store with cosine similarity retrieval
- Context builder with project boosting, dedup, and budget enforcement
- CLI scripts for batch ingestion and test prompt evaluation
- 19 unit tests passing, 79% coverage
- Validated on 482 real project files (8383 chunks, 0 errors)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-05 09:21:27 -04:00
parent 32ce409a7b
commit b4afbbb53a
34 changed files with 1756 additions and 0 deletions

View File

@@ -0,0 +1,98 @@
"""SQLite database schema and connection management."""
import sqlite3
from contextlib import contextmanager
from pathlib import Path
from typing import Generator
from atocore.config import settings
from atocore.observability.logger import get_logger
log = get_logger("database")
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS source_documents (
id TEXT PRIMARY KEY,
file_path TEXT UNIQUE NOT NULL,
file_hash TEXT NOT NULL,
title TEXT,
doc_type TEXT DEFAULT 'markdown',
tags TEXT DEFAULT '[]',
ingested_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS source_chunks (
id TEXT PRIMARY KEY,
document_id TEXT NOT NULL REFERENCES source_documents(id) ON DELETE CASCADE,
chunk_index INTEGER NOT NULL,
content TEXT NOT NULL,
heading_path TEXT DEFAULT '',
char_count INTEGER NOT NULL,
metadata TEXT DEFAULT '{}',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
memory_type TEXT NOT NULL,
content TEXT NOT NULL,
source_chunk_id TEXT REFERENCES source_chunks(id),
confidence REAL DEFAULT 1.0,
status TEXT DEFAULT 'active',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS projects (
id TEXT PRIMARY KEY,
name TEXT UNIQUE NOT NULL,
description TEXT DEFAULT '',
status TEXT DEFAULT 'active',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS interactions (
id TEXT PRIMARY KEY,
prompt TEXT NOT NULL,
context_pack TEXT DEFAULT '{}',
response_summary TEXT DEFAULT '',
project_id TEXT REFERENCES projects(id),
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_chunks_document ON source_chunks(document_id);
CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type);
CREATE INDEX IF NOT EXISTS idx_memories_status ON memories(status);
CREATE INDEX IF NOT EXISTS idx_interactions_project ON interactions(project_id);
"""
def _ensure_data_dir() -> None:
settings.data_dir.mkdir(parents=True, exist_ok=True)
def init_db() -> None:
"""Initialize the database with schema."""
_ensure_data_dir()
with get_connection() as conn:
conn.executescript(SCHEMA_SQL)
log.info("database_initialized", path=str(settings.db_path))
@contextmanager
def get_connection() -> Generator[sqlite3.Connection, None, None]:
"""Get a database connection with row factory."""
_ensure_data_dir()
conn = sqlite3.connect(str(settings.db_path))
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA foreign_keys = ON")
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()