fix: critical bugs and hardening from validation audit

- Fix infinite loop in chunker _hard_split when overlap >= max_size
- Fix tag filter false positives by quoting tag values in ChromaDB query
- Fix score boost semantics (additive → multiplicative) to stay within 0-1 range
- Add error handling and type hints to all API routes
- Update README with proper project documentation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-05 09:35:37 -04:00
parent b4afbbb53a
commit 6081462058
5 changed files with 117 additions and 25 deletions

View File

@@ -6,23 +6,24 @@ from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from atocore.context.builder import (
ContextPack,
build_context,
get_last_context_pack,
_pack_to_dict,
)
from atocore.ingestion.pipeline import ingest_file, ingest_folder
from atocore.observability.logger import get_logger
from atocore.retrieval.retriever import retrieve
from atocore.retrieval.vector_store import get_vector_store
router = APIRouter()
log = get_logger("api")
# --- Request/Response models ---
class IngestRequest(BaseModel):
path: str # file or folder path
path: str
class IngestResponse(BaseModel):
@@ -60,22 +61,32 @@ class ContextBuildResponse(BaseModel):
@router.post("/ingest", response_model=IngestResponse)
def api_ingest(req: IngestRequest):
def api_ingest(req: IngestRequest) -> IngestResponse:
"""Ingest a markdown file or folder."""
target = Path(req.path)
if target.is_file():
results = [ingest_file(target)]
elif target.is_dir():
results = ingest_folder(target)
else:
raise HTTPException(status_code=404, detail=f"Path not found: {req.path}")
try:
if target.is_file():
results = [ingest_file(target)]
elif target.is_dir():
results = ingest_folder(target)
else:
raise HTTPException(status_code=404, detail=f"Path not found: {req.path}")
except HTTPException:
raise
except Exception as e:
log.error("ingest_failed", path=req.path, error=str(e))
raise HTTPException(status_code=500, detail=f"Ingestion failed: {e}")
return IngestResponse(results=results)
@router.post("/query", response_model=QueryResponse)
def api_query(req: QueryRequest):
def api_query(req: QueryRequest) -> QueryResponse:
"""Retrieve relevant chunks for a prompt."""
chunks = retrieve(req.prompt, top_k=req.top_k, filter_tags=req.filter_tags)
try:
chunks = retrieve(req.prompt, top_k=req.top_k, filter_tags=req.filter_tags)
except Exception as e:
log.error("query_failed", prompt=req.prompt[:100], error=str(e))
raise HTTPException(status_code=500, detail=f"Query failed: {e}")
return QueryResponse(
results=[
{
@@ -92,13 +103,17 @@ def api_query(req: QueryRequest):
@router.post("/context/build", response_model=ContextBuildResponse)
def api_build_context(req: ContextBuildRequest):
def api_build_context(req: ContextBuildRequest) -> ContextBuildResponse:
"""Build a full context pack for a prompt."""
pack = build_context(
user_prompt=req.prompt,
project_hint=req.project,
budget=req.budget,
)
try:
pack = build_context(
user_prompt=req.prompt,
project_hint=req.project,
budget=req.budget,
)
except Exception as e:
log.error("context_build_failed", prompt=req.prompt[:100], error=str(e))
raise HTTPException(status_code=500, detail=f"Context build failed: {e}")
pack_dict = _pack_to_dict(pack)
return ContextBuildResponse(
formatted_context=pack.formatted_context,
@@ -113,7 +128,7 @@ def api_build_context(req: ContextBuildRequest):
@router.get("/health")
def api_health():
def api_health() -> dict:
"""Health check."""
store = get_vector_store()
return {
@@ -124,7 +139,7 @@ def api_health():
@router.get("/debug/context")
def api_debug_context():
def api_debug_context() -> dict:
"""Inspect the last assembled context pack."""
pack = get_last_context_pack()
if pack is None: