Stabilize core correctness and sync project plan state
This commit is contained in:
@@ -3,7 +3,8 @@
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
from atocore.config import settings
|
||||
import atocore.config as _config
|
||||
from atocore.models.database import get_connection
|
||||
from atocore.observability.logger import get_logger
|
||||
from atocore.retrieval.embeddings import embed_query
|
||||
from atocore.retrieval.vector_store import get_vector_store
|
||||
@@ -29,7 +30,7 @@ def retrieve(
|
||||
filter_tags: list[str] | None = None,
|
||||
) -> list[ChunkResult]:
|
||||
"""Retrieve the most relevant chunks for a query."""
|
||||
top_k = top_k or settings.context_top_k
|
||||
top_k = top_k or _config.settings.context_top_k
|
||||
start = time.time()
|
||||
|
||||
query_embedding = embed_query(query)
|
||||
@@ -59,7 +60,10 @@ def retrieve(
|
||||
|
||||
chunks = []
|
||||
if results and results["ids"] and results["ids"][0]:
|
||||
existing_ids = _existing_chunk_ids(results["ids"][0])
|
||||
for i, chunk_id in enumerate(results["ids"][0]):
|
||||
if chunk_id not in existing_ids:
|
||||
continue
|
||||
# ChromaDB returns distances (lower = more similar for cosine)
|
||||
# Convert to similarity score (1 - distance)
|
||||
distance = results["distances"][0][i] if results["distances"] else 0
|
||||
@@ -90,3 +94,17 @@ def retrieve(
|
||||
)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
def _existing_chunk_ids(chunk_ids: list[str]) -> set[str]:
|
||||
"""Filter out stale vector entries whose chunk rows no longer exist."""
|
||||
if not chunk_ids:
|
||||
return set()
|
||||
|
||||
placeholders = ", ".join("?" for _ in chunk_ids)
|
||||
with get_connection() as conn:
|
||||
rows = conn.execute(
|
||||
f"SELECT id FROM source_chunks WHERE id IN ({placeholders})",
|
||||
chunk_ids,
|
||||
).fetchall()
|
||||
return {row["id"] for row in rows}
|
||||
|
||||
Reference in New Issue
Block a user