fix: critical bugs and hardening from validation audit
- Fix infinite loop in chunker _hard_split when overlap >= max_size - Fix tag filter false positives by quoting tag values in ChromaDB query - Fix score boost semantics (additive → multiplicative) to stay within 0-1 range - Add error handling and type hints to all API routes - Update README with proper project documentation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -36,11 +36,20 @@ def retrieve(
|
||||
store = get_vector_store()
|
||||
|
||||
# Build filter
|
||||
# Tags are stored as JSON strings like '["tag1", "tag2"]'.
|
||||
# We use $contains with quoted tag to avoid substring false positives
|
||||
# (e.g. searching "prod" won't match "production" because we search '"prod"').
|
||||
where = None
|
||||
if filter_tags:
|
||||
# ChromaDB where filter for tags (stored as JSON string)
|
||||
# Simple contains check — works for single-tag filtering
|
||||
where = {"tags": {"$contains": filter_tags[0]}}
|
||||
if len(filter_tags) == 1:
|
||||
where = {"tags": {"$contains": f'"{filter_tags[0]}"'}}
|
||||
else:
|
||||
where = {
|
||||
"$and": [
|
||||
{"tags": {"$contains": f'"{tag}"'}}
|
||||
for tag in filter_tags
|
||||
]
|
||||
}
|
||||
|
||||
results = store.query(
|
||||
query_embedding=query_embedding,
|
||||
|
||||
Reference in New Issue
Block a user