scripts/query_test.py

"""CLI script to run test prompts and compare baseline vs enriched."""

import argparse
import sys
from pathlib import Path

import yaml

# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

from atocore.context.builder import build_context
from atocore.models.database import init_db
from atocore.observability.logger import setup_logging


def main():
    parser = argparse.ArgumentParser(description="Run test prompts against AtoCore")
    parser.add_argument(
        "--prompts",
        default=str(Path(__file__).parent.parent / "tests" / "test_prompts" / "prompts.yaml"),
        help="Path to prompts YAML file",
    )
    args = parser.parse_args()

    setup_logging()
    init_db()

    prompts_path = Path(args.prompts)
    if not prompts_path.exists():
        print(f"Error: {prompts_path} not found")
        sys.exit(1)

    with open(prompts_path) as f:
        data = yaml.safe_load(f)

    prompts = data.get("prompts", [])
    print(f"Running {len(prompts)} test prompts...\n")

    for p in prompts:
        prompt_id = p["id"]
        prompt_text = p["prompt"]
        project = p.get("project")
        expected = p.get("expected", "")

        print(f"{'='*60}")
        print(f"[{prompt_id}] {prompt_text}")
        print(f"Project: {project or 'none'}")
        print(f"Expected: {expected}")
        print(f"-" * 60)

        pack = build_context(
            user_prompt=prompt_text,
            project_hint=project,
        )

        print(f"Chunks retrieved: {len(pack.chunks_used)}")
        print(f"Total chars: {pack.total_chars} / {pack.budget}")
        print(f"Duration: {pack.duration_ms}ms")
        print()

        for i, chunk in enumerate(pack.chunks_used[:5]):
            print(f"  [{i+1}] Score: {chunk.score:.2f} | {chunk.source_file}")
            print(f"      Section: {chunk.heading_path}")
            print(f"      Preview: {chunk.content[:120]}...")
            print()

        print(f"Full prompt length: {len(pack.full_prompt)} chars")
        print()

    print(f"{'='*60}")
    print("Done. Review output above to assess retrieval quality.")


if __name__ == "__main__":
    main()
feat: implement AtoCore Phase 0 + Phase 0.5 (foundation + PoC) Complete implementation of the personal context engine foundation: - FastAPI server with 5 endpoints (ingest, query, context/build, health, debug) - SQLite database with 5 tables (documents, chunks, memories, projects, interactions) - Heading-aware markdown chunker (800 char max, recursive splitting) - Multilingual embeddings via sentence-transformers (EN/FR) - ChromaDB vector store with cosine similarity retrieval - Context builder with project boosting, dedup, and budget enforcement - CLI scripts for batch ingestion and test prompt evaluation - 19 unit tests passing, 79% coverage - Validated on 482 real project files (8383 chunks, 0 errors) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-04-05 09:21:27 -04:00			`"""CLI script to run test prompts and compare baseline vs enriched."""`

			`import argparse`
			`import sys`
			`from pathlib import Path`

			`import yaml`

			`# Add src to path`
			`sys.path.insert(0, str(Path(__file__).parent.parent / "src"))`

			`from atocore.context.builder import build_context`
			`from atocore.models.database import init_db`
			`from atocore.observability.logger import setup_logging`


			`def main():`
			`parser = argparse.ArgumentParser(description="Run test prompts against AtoCore")`
			`parser.add_argument(`
			`"--prompts",`
			`default=str(Path(__file__).parent.parent / "tests" / "test_prompts" / "prompts.yaml"),`
			`help="Path to prompts YAML file",`
			`)`
			`args = parser.parse_args()`

			`setup_logging()`
			`init_db()`

			`prompts_path = Path(args.prompts)`
			`if not prompts_path.exists():`
			`print(f"Error: {prompts_path} not found")`
			`sys.exit(1)`

			`with open(prompts_path) as f:`
			`data = yaml.safe_load(f)`

			`prompts = data.get("prompts", [])`
			`print(f"Running {len(prompts)} test prompts...\n")`

			`for p in prompts:`
			`prompt_id = p["id"]`
			`prompt_text = p["prompt"]`
			`project = p.get("project")`
			`expected = p.get("expected", "")`

			`print(f"{'='*60}")`
			`print(f"[{prompt_id}] {prompt_text}")`
			`print(f"Project: {project or 'none'}")`
			`print(f"Expected: {expected}")`
			`print(f"-" * 60)`

			`pack = build_context(`
			`user_prompt=prompt_text,`
			`project_hint=project,`
			`)`

			`print(f"Chunks retrieved: {len(pack.chunks_used)}")`
			`print(f"Total chars: {pack.total_chars} / {pack.budget}")`
			`print(f"Duration: {pack.duration_ms}ms")`
			`print()`

			`for i, chunk in enumerate(pack.chunks_used[:5]):`
			`print(f" [{i+1}] Score: {chunk.score:.2f} \| {chunk.source_file}")`
			`print(f" Section: {chunk.heading_path}")`
			`print(f" Preview: {chunk.content[:120]}...")`
			`print()`

			`print(f"Full prompt length: {len(pack.full_prompt)} chars")`
			`print()`

			`print(f"{'='*60}")`
			`print("Done. Review output above to assess retrieval quality.")`


			`if __name__ == "__main__":`
			`main()`