tests/test_project_state.py

"""Tests for Trusted Project State."""

import pytest

from atocore.context.project_state import (
    CATEGORIES,
    ensure_project,
    format_project_state,
    get_state,
    init_project_state_schema,
    invalidate_state,
    set_state,
)
from atocore.models.database import init_db


@pytest.fixture(autouse=True)
def setup_db(tmp_data_dir):
    """Initialize DB and project state schema for every test."""
    init_db()
    init_project_state_schema()


def test_ensure_project_creates():
    """Test creating a new project."""
    pid = ensure_project("test-project", "A test project")
    assert pid
    # Second call returns same ID
    pid2 = ensure_project("test-project")
    assert pid == pid2


def test_set_state_creates_entry():
    """Test creating a project state entry."""
    entry = set_state("myproject", "status", "phase", "Phase 0.5 — PoC complete")
    assert entry.category == "status"
    assert entry.key == "phase"
    assert entry.value == "Phase 0.5 — PoC complete"
    assert entry.status == "active"


def test_set_state_upserts():
    """Test that setting same key updates the value."""
    set_state("myproject", "status", "phase", "Phase 0")
    entry = set_state("myproject", "status", "phase", "Phase 1")
    assert entry.value == "Phase 1"

    # Only one entry should exist
    entries = get_state("myproject", category="status")
    assert len(entries) == 1
    assert entries[0].value == "Phase 1"


def test_set_state_invalid_category():
    """Test that invalid category raises ValueError."""
    with pytest.raises(ValueError, match="Invalid category"):
        set_state("myproject", "invalid_category", "key", "value")


def test_set_state_validates_confidence():
    """Project-state confidence should stay within the documented range."""
    with pytest.raises(ValueError, match="Confidence must be between 0.0 and 1.0"):
        set_state("myproject", "status", "phase", "Phase 1", confidence=1.2)


def test_get_state_all():
    """Test getting all state entries for a project."""
    set_state("proj", "status", "phase", "Phase 1")
    set_state("proj", "decision", "database", "SQLite for v1")
    set_state("proj", "requirement", "latency", "<2 seconds")

    entries = get_state("proj")
    assert len(entries) == 3
    categories = {e.category for e in entries}
    assert categories == {"status", "decision", "requirement"}


def test_get_state_by_category():
    """Test filtering by category."""
    set_state("proj", "status", "phase", "Phase 1")
    set_state("proj", "decision", "database", "SQLite")
    set_state("proj", "decision", "vectordb", "ChromaDB")

    entries = get_state("proj", category="decision")
    assert len(entries) == 2
    assert all(e.category == "decision" for e in entries)


def test_get_state_nonexistent_project():
    """Test getting state for a project that doesn't exist."""
    entries = get_state("nonexistent")
    assert entries == []


def test_invalidate_state():
    """Test marking a state entry as superseded."""
    set_state("invalidate-test", "decision", "approach", "monolith")
    success = invalidate_state("invalidate-test", "decision", "approach")
    assert success

    # Active entries should be empty
    entries = get_state("invalidate-test", active_only=True)
    assert len(entries) == 0

    # But entry still exists if we include inactive
    entries = get_state("invalidate-test", active_only=False)
    assert len(entries) == 1
    assert entries[0].status == "superseded"


def test_invalidate_nonexistent():
    """Test invalidating a nonexistent entry."""
    success = invalidate_state("proj", "decision", "nonexistent")
    assert not success


def test_format_project_state():
    """Test formatting state entries for context injection."""
    set_state("proj", "status", "phase", "Phase 1")
    set_state("proj", "decision", "database", "SQLite", source="Build Spec V1")
    entries = get_state("proj")

    formatted = format_project_state(entries)
    assert "--- Trusted Project State ---" in formatted
    assert "--- End Project State ---" in formatted
    assert "phase: Phase 1" in formatted
    assert "database: SQLite" in formatted
    assert "(source: Build Spec V1)" in formatted


def test_format_empty():
    """Test formatting empty state."""
    assert format_project_state([]) == ""


# --- Alias canonicalization regression tests --------------------------------


def test_set_state_canonicalizes_alias(project_registry):
    """Writing state via an alias should land under the canonical project id.

    Regression for codex's P1 finding: previously /project/state with
    project="p05" created a separate alias row that later context builds
    (which canonicalize the hint) would never see.
    """
    project_registry(("p05-interferometer", ["p05", "interferometer"]))

    set_state("p05", "status", "next_focus", "Wave 2 ingestion")

    # The state must be reachable via every alias AND the canonical id
    via_alias = get_state("p05")
    via_canonical = get_state("p05-interferometer")
    via_other_alias = get_state("interferometer")

    assert len(via_alias) == 1
    assert len(via_canonical) == 1
    assert len(via_other_alias) == 1
    # All three reads return the same row id (no fragmented duplicates)
    assert via_alias[0].id == via_canonical[0].id == via_other_alias[0].id
    assert via_canonical[0].value == "Wave 2 ingestion"


def test_get_state_canonicalizes_alias_after_canonical_write(project_registry):
    """Reading via an alias should find state written under the canonical id."""
    project_registry(("p04-gigabit", ["p04", "gigabit"]))

    set_state("p04-gigabit", "status", "phase", "Phase 1 baseline")
    via_alias = get_state("gigabit")

    assert len(via_alias) == 1
    assert via_alias[0].value == "Phase 1 baseline"


def test_invalidate_state_canonicalizes_alias(project_registry):
    """Invalidating via an alias should hit the canonical row."""
    project_registry(("p06-polisher", ["p06", "polisher"]))

    set_state("p06-polisher", "decision", "frame", "kinematic mounts")
    success = invalidate_state("polisher", "decision", "frame")

    assert success is True
    active = get_state("p06-polisher")
    assert len(active) == 0


def test_unregistered_project_state_still_works(project_registry):
    """Hand-curated state for an unregistered project must still round-trip.

    Backwards compatibility with state created before the project
    registry existed: resolve_project_name returns the input unchanged
    when the registry has no record, so the raw name is used as-is.
    """
    project_registry()  # empty registry

    set_state("orphan-project", "status", "phase", "Standalone")
    entries = get_state("orphan-project")
    assert len(entries) == 1
    assert entries[0].value == "Standalone"


def test_legacy_alias_keyed_state_is_invisible_until_migrated(project_registry):
    """Documents the compatibility gap from project-identity-canonicalization.md.

    Rows that were written under a registered alias BEFORE the
    canonicalization landed in fb6298a are stored in the projects
    table under the alias name (not the canonical id). Every read
    path now canonicalizes to the canonical id, so those legacy
    rows become invisible.

    This test simulates the legacy state by inserting a shadow
    project row and a state row that points at it via raw SQL,
    bypassing set_state() which now canonicalizes. Then it
    verifies the canonicalized get_state() does NOT find the
    legacy row.

    When the legacy alias migration script lands (see the open
    follow-ups in docs/architecture/project-identity-canonicalization.md),
    this test must be inverted: after running the migration the
    legacy state should be reachable via the canonical project,
    not invisible. The migration is required before engineering
    V1 ships.
    """
    import uuid

    from atocore.models.database import get_connection

    project_registry(("p05-interferometer", ["p05", "interferometer"]))

    # Simulate a pre-fix legacy row by writing directly under the
    # alias name. This is what the OLD set_state would have done
    # before fb6298a added canonicalization.
    legacy_project_id = str(uuid.uuid4())
    legacy_state_id = str(uuid.uuid4())
    with get_connection() as conn:
        conn.execute(
            "INSERT INTO projects (id, name, description) VALUES (?, ?, ?)",
            (legacy_project_id, "p05", "shadow row created before canonicalization"),
        )
        conn.execute(
            "INSERT INTO project_state "
            "(id, project_id, category, key, value, source, confidence) "
            "VALUES (?, ?, ?, ?, ?, ?, ?)",
            (
                legacy_state_id,
                legacy_project_id,
                "status",
                "legacy_focus",
                "Wave 1 ingestion",
                "pre-canonicalization",
                1.0,
            ),
        )

    # The canonicalized read path looks under "p05-interferometer"
    # and cannot see the legacy row. THIS IS THE GAP.
    via_alias = get_state("p05")
    via_canonical = get_state("p05-interferometer")
    assert all(entry.value != "Wave 1 ingestion" for entry in via_alias)
    assert all(entry.value != "Wave 1 ingestion" for entry in via_canonical)

    # The legacy row is still in the database — it's just unreachable
    # from the canonicalized read path. The migration script (open
    # follow-up) is what closes the gap.
    with get_connection() as conn:
        row = conn.execute(
            "SELECT value FROM project_state WHERE id = ?", (legacy_state_id,)
        ).fetchone()
    assert row is not None
    assert row["value"] == "Wave 1 ingestion"