feat: implement AtoCore Phase 0 + Phase 0.5 (foundation + PoC)

Complete implementation of the personal context engine foundation:
- FastAPI server with 5 endpoints (ingest, query, context/build, health, debug)
- SQLite database with 5 tables (documents, chunks, memories, projects, interactions)
- Heading-aware markdown chunker (800 char max, recursive splitting)
- Multilingual embeddings via sentence-transformers (EN/FR)
- ChromaDB vector store with cosine similarity retrieval
- Context builder with project boosting, dedup, and budget enforcement
- CLI scripts for batch ingestion and test prompt evaluation
- 19 unit tests passing, 79% coverage
- Validated on 482 real project files (8383 chunks, 0 errors)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-05 09:21:27 -04:00
parent 32ce409a7b
commit b4afbbb53a
34 changed files with 1756 additions and 0 deletions

132
src/atocore/api/routes.py Normal file
View File

@@ -0,0 +1,132 @@
"""FastAPI route definitions."""
from pathlib import Path
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from atocore.context.builder import (
ContextPack,
build_context,
get_last_context_pack,
_pack_to_dict,
)
from atocore.ingestion.pipeline import ingest_file, ingest_folder
from atocore.retrieval.retriever import retrieve
from atocore.retrieval.vector_store import get_vector_store
router = APIRouter()
# --- Request/Response models ---
class IngestRequest(BaseModel):
path: str # file or folder path
class IngestResponse(BaseModel):
results: list[dict]
class QueryRequest(BaseModel):
prompt: str
top_k: int = 10
filter_tags: list[str] | None = None
class QueryResponse(BaseModel):
results: list[dict]
class ContextBuildRequest(BaseModel):
prompt: str
project: str | None = None
budget: int | None = None
class ContextBuildResponse(BaseModel):
formatted_context: str
full_prompt: str
chunks_used: int
total_chars: int
budget: int
budget_remaining: int
duration_ms: int
chunks: list[dict]
# --- Endpoints ---
@router.post("/ingest", response_model=IngestResponse)
def api_ingest(req: IngestRequest):
"""Ingest a markdown file or folder."""
target = Path(req.path)
if target.is_file():
results = [ingest_file(target)]
elif target.is_dir():
results = ingest_folder(target)
else:
raise HTTPException(status_code=404, detail=f"Path not found: {req.path}")
return IngestResponse(results=results)
@router.post("/query", response_model=QueryResponse)
def api_query(req: QueryRequest):
"""Retrieve relevant chunks for a prompt."""
chunks = retrieve(req.prompt, top_k=req.top_k, filter_tags=req.filter_tags)
return QueryResponse(
results=[
{
"chunk_id": c.chunk_id,
"content": c.content,
"score": c.score,
"heading_path": c.heading_path,
"source_file": c.source_file,
"title": c.title,
}
for c in chunks
]
)
@router.post("/context/build", response_model=ContextBuildResponse)
def api_build_context(req: ContextBuildRequest):
"""Build a full context pack for a prompt."""
pack = build_context(
user_prompt=req.prompt,
project_hint=req.project,
budget=req.budget,
)
pack_dict = _pack_to_dict(pack)
return ContextBuildResponse(
formatted_context=pack.formatted_context,
full_prompt=pack.full_prompt,
chunks_used=len(pack.chunks_used),
total_chars=pack.total_chars,
budget=pack.budget,
budget_remaining=pack.budget_remaining,
duration_ms=pack.duration_ms,
chunks=pack_dict["chunks"],
)
@router.get("/health")
def api_health():
"""Health check."""
store = get_vector_store()
return {
"status": "ok",
"version": "0.1.0",
"vectors_count": store.count,
}
@router.get("/debug/context")
def api_debug_context():
"""Inspect the last assembled context pack."""
pack = get_last_context_pack()
if pack is None:
return {"message": "No context pack built yet."}
return _pack_to_dict(pack)