src/atocore/context/project_state.py

"""Trusted Project State — the highest-priority context source.

Per the Master Plan trust precedence:
  1. Trusted Project State (this module)
  2. AtoDrive artifacts
  3. Recent validated memory
  4. AtoVault summaries
  5. PKM chunks
  6. Historical / low-confidence

Project state is manually curated or explicitly confirmed facts about a project.
It always wins over retrieval-based context when there's a conflict.
"""

import uuid
from dataclasses import dataclass
from datetime import datetime, timezone

from atocore.models.database import get_connection
from atocore.observability.logger import get_logger
from atocore.projects.registry import resolve_project_name

log = get_logger("project_state")

# DB schema extension for project state
PROJECT_STATE_SCHEMA = """
CREATE TABLE IF NOT EXISTS project_state (
    id TEXT PRIMARY KEY,
    project_id TEXT NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
    category TEXT NOT NULL,
    key TEXT NOT NULL,
    value TEXT NOT NULL,
    source TEXT DEFAULT '',
    confidence REAL DEFAULT 1.0,
    status TEXT DEFAULT 'active',
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    UNIQUE(project_id, category, key)
);

CREATE INDEX IF NOT EXISTS idx_project_state_project ON project_state(project_id);
CREATE INDEX IF NOT EXISTS idx_project_state_category ON project_state(category);
CREATE INDEX IF NOT EXISTS idx_project_state_status ON project_state(status);
"""

# Valid categories for project state entries
CATEGORIES = [
    "status",       # current project status, phase, blockers
    "decision",     # confirmed design/engineering decisions
    "requirement",  # key requirements and constraints
    "contact",      # key people, vendors, stakeholders
    "milestone",    # dates, deadlines, deliverables
    "fact",         # verified technical facts
    "config",       # project configuration, parameters
]


@dataclass
class ProjectStateEntry:
    id: str
    project_id: str
    category: str
    key: str
    value: str
    source: str = ""
    confidence: float = 1.0
    status: str = "active"
    created_at: str = ""
    updated_at: str = ""


def init_project_state_schema() -> None:
    """Create the project_state table if it doesn't exist."""
    with get_connection() as conn:
        conn.executescript(PROJECT_STATE_SCHEMA)
    log.info("project_state_schema_initialized")


def ensure_project(name: str, description: str = "") -> str:
    """Get or create a project by name. Returns project_id."""
    with get_connection() as conn:
        row = conn.execute(
            "SELECT id FROM projects WHERE lower(name) = lower(?)", (name,)
        ).fetchone()
        if row:
            return row["id"]

        project_id = str(uuid.uuid4())
        conn.execute(
            "INSERT INTO projects (id, name, description) VALUES (?, ?, ?)",
            (project_id, name, description),
        )
        log.info("project_created", name=name, project_id=project_id)
        return project_id


def set_state(
    project_name: str,
    category: str,
    key: str,
    value: str,
    source: str = "",
    confidence: float = 1.0,
) -> ProjectStateEntry:
    """Set or update a project state entry. Upsert semantics.

    The ``project_name`` is canonicalized through the registry so a
    caller passing an alias (``p05``) ends up writing into the same
    row as the canonical id (``p05-interferometer``). Without this
    step, alias and canonical names would create two parallel
    project rows and fragmented state.
    """
    if category not in CATEGORIES:
        raise ValueError(f"Invalid category '{category}'. Must be one of: {CATEGORIES}")
    _validate_confidence(confidence)

    project_name = resolve_project_name(project_name)
    project_id = ensure_project(project_name)
    entry_id = str(uuid.uuid4())
    now = datetime.now(timezone.utc).isoformat()

    with get_connection() as conn:
        # Check if entry exists
        existing = conn.execute(
            "SELECT id FROM project_state WHERE project_id = ? AND category = ? AND key = ?",
            (project_id, category, key),
        ).fetchone()

        if existing:
            entry_id = existing["id"]
            conn.execute(
                "UPDATE project_state SET value = ?, source = ?, confidence = ?, "
                "status = 'active', updated_at = CURRENT_TIMESTAMP "
                "WHERE id = ?",
                (value, source, confidence, entry_id),
            )
            log.info("project_state_updated", project=project_name, category=category, key=key)
        else:
            conn.execute(
                "INSERT INTO project_state (id, project_id, category, key, value, source, confidence) "
                "VALUES (?, ?, ?, ?, ?, ?, ?)",
                (entry_id, project_id, category, key, value, source, confidence),
            )
            log.info("project_state_created", project=project_name, category=category, key=key)

    return ProjectStateEntry(
        id=entry_id,
        project_id=project_id,
        category=category,
        key=key,
        value=value,
        source=source,
        confidence=confidence,
        status="active",
        created_at=now,
        updated_at=now,
    )


def get_state(
    project_name: str,
    category: str | None = None,
    active_only: bool = True,
) -> list[ProjectStateEntry]:
    """Get project state entries, optionally filtered by category.

    The lookup is canonicalized through the registry so an alias hint
    finds the same rows as the canonical id.
    """
    project_name = resolve_project_name(project_name)
    with get_connection() as conn:
        project = conn.execute(
            "SELECT id FROM projects WHERE lower(name) = lower(?)", (project_name,)
        ).fetchone()
        if not project:
            return []

        query = "SELECT * FROM project_state WHERE project_id = ?"
        params: list = [project["id"]]

        if category:
            query += " AND category = ?"
            params.append(category)
        if active_only:
            query += " AND status = 'active'"

        query += " ORDER BY category, key"
        rows = conn.execute(query, params).fetchall()

    return [
        ProjectStateEntry(
            id=r["id"],
            project_id=r["project_id"],
            category=r["category"],
            key=r["key"],
            value=r["value"],
            source=r["source"],
            confidence=r["confidence"],
            status=r["status"],
            created_at=r["created_at"],
            updated_at=r["updated_at"],
        )
        for r in rows
    ]


def invalidate_state(project_name: str, category: str, key: str) -> bool:
    """Mark a project state entry as superseded.

    The lookup is canonicalized through the registry so an alias is
    treated as the canonical project for the invalidation lookup.
    """
    project_name = resolve_project_name(project_name)
    with get_connection() as conn:
        project = conn.execute(
            "SELECT id FROM projects WHERE lower(name) = lower(?)", (project_name,)
        ).fetchone()
        if not project:
            return False

        result = conn.execute(
            "UPDATE project_state SET status = 'superseded', updated_at = CURRENT_TIMESTAMP "
            "WHERE project_id = ? AND category = ? AND key = ? AND status = 'active'",
            (project["id"], category, key),
        )
        if result.rowcount > 0:
            log.info("project_state_invalidated", project=project_name, category=category, key=key)
            return True
        return False


def format_project_state(entries: list[ProjectStateEntry]) -> str:
    """Format project state entries for context injection."""
    if not entries:
        return ""

    lines = ["--- Trusted Project State ---"]
    current_category = ""

    for entry in entries:
        if entry.category != current_category:
            current_category = entry.category
            lines.append(f"\n[{current_category.upper()}]")
        lines.append(f"  {entry.key}: {entry.value}")
        if entry.source:
            lines.append(f"    (source: {entry.source})")

    lines.append("\n--- End Project State ---")
    return "\n".join(lines)


def _validate_confidence(confidence: float) -> None:
    if not 0.0 <= confidence <= 1.0:
        raise ValueError("Confidence must be between 0.0 and 1.0")