fix(retrieval): preserve project ids across unscoped ingest
This commit is contained in:
@@ -103,6 +103,66 @@ def test_ingest_file_records_project_id_metadata(tmp_data_dir, sample_markdown,
|
||||
)
|
||||
|
||||
|
||||
def test_ingest_file_derives_project_id_from_registry_root(tmp_data_dir, tmp_path, monkeypatch):
|
||||
"""Unscoped ingest should preserve ownership for files under registered roots."""
|
||||
import atocore.config as config
|
||||
|
||||
vault_dir = tmp_path / "vault"
|
||||
drive_dir = tmp_path / "drive"
|
||||
config_dir = tmp_path / "config"
|
||||
project_dir = vault_dir / "incoming" / "projects" / "p04-gigabit"
|
||||
project_dir.mkdir(parents=True)
|
||||
drive_dir.mkdir()
|
||||
config_dir.mkdir()
|
||||
note = project_dir / "status.md"
|
||||
note.write_text(
|
||||
"# Status\n\nCurrent project status with enough detail to create "
|
||||
"a retrievable chunk for the ingestion pipeline test.",
|
||||
encoding="utf-8",
|
||||
)
|
||||
registry_path = config_dir / "project-registry.json"
|
||||
registry_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"projects": [
|
||||
{
|
||||
"id": "p04-gigabit",
|
||||
"aliases": ["p04"],
|
||||
"ingest_roots": [
|
||||
{"source": "vault", "subpath": "incoming/projects/p04-gigabit"}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
class FakeVectorStore:
|
||||
def __init__(self):
|
||||
self.metadatas = []
|
||||
|
||||
def add(self, ids, documents, metadatas):
|
||||
self.metadatas.extend(metadatas)
|
||||
|
||||
def delete(self, ids):
|
||||
return None
|
||||
|
||||
fake_store = FakeVectorStore()
|
||||
monkeypatch.setenv("ATOCORE_VAULT_SOURCE_DIR", str(vault_dir))
|
||||
monkeypatch.setenv("ATOCORE_DRIVE_SOURCE_DIR", str(drive_dir))
|
||||
monkeypatch.setenv("ATOCORE_PROJECT_REGISTRY_PATH", str(registry_path))
|
||||
config.settings = config.Settings()
|
||||
monkeypatch.setattr("atocore.ingestion.pipeline.get_vector_store", lambda: fake_store)
|
||||
|
||||
init_db()
|
||||
result = ingest_file(note)
|
||||
|
||||
assert result["status"] == "ingested"
|
||||
assert fake_store.metadatas
|
||||
assert all(meta["project_id"] == "p04-gigabit" for meta in fake_store.metadatas)
|
||||
|
||||
|
||||
def test_ingest_project_folder_passes_project_id_to_files(tmp_data_dir, sample_folder, monkeypatch):
|
||||
seen = []
|
||||
|
||||
|
||||
Reference in New Issue
Block a user