55 lines
1.5 KiB
Python
55 lines
1.5 KiB
Python
|
|
"""CLI script to ingest a folder of markdown files."""
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import json
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
# Add src to path
|
||
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
||
|
|
|
||
|
|
from atocore.ingestion.pipeline import ingest_folder
|
||
|
|
from atocore.models.database import init_db
|
||
|
|
from atocore.observability.logger import setup_logging
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
parser = argparse.ArgumentParser(description="Ingest markdown files into AtoCore")
|
||
|
|
parser.add_argument("--path", required=True, help="Path to folder with markdown files")
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
setup_logging()
|
||
|
|
init_db()
|
||
|
|
|
||
|
|
folder = Path(args.path)
|
||
|
|
if not folder.is_dir():
|
||
|
|
print(f"Error: {folder} is not a directory")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
results = ingest_folder(folder)
|
||
|
|
|
||
|
|
# Summary
|
||
|
|
ingested = sum(1 for r in results if r["status"] == "ingested")
|
||
|
|
skipped = sum(1 for r in results if r["status"] == "skipped")
|
||
|
|
errors = sum(1 for r in results if r["status"] == "error")
|
||
|
|
total_chunks = sum(r.get("chunks", 0) for r in results)
|
||
|
|
|
||
|
|
print(f"\n{'='*50}")
|
||
|
|
print(f"Ingestion complete:")
|
||
|
|
print(f" Files processed: {len(results)}")
|
||
|
|
print(f" Ingested: {ingested}")
|
||
|
|
print(f" Skipped (unchanged): {skipped}")
|
||
|
|
print(f" Errors: {errors}")
|
||
|
|
print(f" Total chunks created: {total_chunks}")
|
||
|
|
print(f"{'='*50}")
|
||
|
|
|
||
|
|
if errors:
|
||
|
|
print("\nErrors:")
|
||
|
|
for r in results:
|
||
|
|
if r["status"] == "error":
|
||
|
|
print(f" {r['file']}: {r['error']}")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|