Expand active project wave and serialize refreshes

This commit is contained in:
2026-04-06 14:58:14 -04:00
parent 46a5d5887a
commit bdb42dba05
8 changed files with 243 additions and 45 deletions

View File

@@ -18,6 +18,7 @@ from atocore.context.project_state import (
set_state,
)
from atocore.ingestion.pipeline import (
exclusive_ingestion,
get_ingestion_stats,
get_source_status,
ingest_configured_sources,
@@ -153,12 +154,13 @@ def api_ingest(req: IngestRequest) -> IngestResponse:
"""Ingest a markdown file or folder."""
target = Path(req.path)
try:
if target.is_file():
results = [ingest_file(target)]
elif target.is_dir():
results = ingest_folder(target)
else:
raise HTTPException(status_code=404, detail=f"Path not found: {req.path}")
with exclusive_ingestion():
if target.is_file():
results = [ingest_file(target)]
elif target.is_dir():
results = ingest_folder(target)
else:
raise HTTPException(status_code=404, detail=f"Path not found: {req.path}")
except HTTPException:
raise
except Exception as e:
@@ -171,7 +173,8 @@ def api_ingest(req: IngestRequest) -> IngestResponse:
def api_ingest_sources() -> IngestSourcesResponse:
"""Ingest enabled configured source directories."""
try:
results = ingest_configured_sources()
with exclusive_ingestion():
results = ingest_configured_sources()
except Exception as e:
log.error("ingest_sources_failed", error=str(e))
raise HTTPException(status_code=500, detail=f"Configured source ingestion failed: {e}")
@@ -246,7 +249,8 @@ def api_project_update(project_name: str, req: ProjectUpdateRequest) -> dict:
def api_refresh_project(project_name: str, purge_deleted: bool = False) -> ProjectRefreshResponse:
"""Refresh one registered project from its configured ingest roots."""
try:
result = refresh_registered_project(project_name, purge_deleted=purge_deleted)
with exclusive_ingestion():
result = refresh_registered_project(project_name, purge_deleted=purge_deleted)
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
except Exception as e:

View File

@@ -2,8 +2,10 @@
import hashlib
import json
import threading
import time
import uuid
from contextlib import contextmanager
from pathlib import Path
import atocore.config as _config
@@ -17,6 +19,17 @@ log = get_logger("ingestion")
# Encodings to try when reading markdown files
_ENCODINGS = ["utf-8", "utf-8-sig", "latin-1", "cp1252"]
_INGESTION_LOCK = threading.Lock()
@contextmanager
def exclusive_ingestion():
"""Serialize long-running ingestion operations across API requests."""
_INGESTION_LOCK.acquire()
try:
yield
finally:
_INGESTION_LOCK.release()
def ingest_file(file_path: Path) -> dict: