"""
Atomizer Playbook - Structured Knowledge Store

Part of the ACE (Agentic Context Engineering) implementation for Atomizer.
Based on ACE framework principles:
- Incremental delta updates (never rewrite wholesale)
- Helpful/harmful tracking for each insight
- Semantic deduplication
- Category-based organization

This module provides the core data structures for accumulating optimization
knowledge across sessions.
"""

from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
from enum import Enum
import json
from pathlib import Path
from datetime import datetime
import hashlib


class InsightCategory(Enum):
    """Categories for playbook insights."""
    STRATEGY = "str"      # Optimization strategies
    CALCULATION = "cal"   # Formulas and calculations
    MISTAKE = "mis"       # Common mistakes to avoid
    TOOL = "tool"         # Tool usage patterns
    DOMAIN = "dom"        # Domain-specific knowledge (FEA, NX)
    WORKFLOW = "wf"       # Workflow patterns


@dataclass
class PlaybookItem:
    """
    Single insight in the playbook with helpful/harmful tracking.

    Each item accumulates feedback over time:
    - helpful_count: Times this insight led to success
    - harmful_count: Times this insight led to failure
    - net_score: helpful - harmful (used for ranking)
    - confidence: helpful / (helpful + harmful)
    """
    id: str
    category: InsightCategory
    content: str
    helpful_count: int = 0
    harmful_count: int = 0
    created_at: str = field(default_factory=lambda: datetime.now().isoformat())
    last_used: Optional[str] = None
    source_trials: List[int] = field(default_factory=list)
    tags: List[str] = field(default_factory=list)

    @property
    def net_score(self) -> int:
        """Net helpfulness score (helpful - harmful)."""
        return self.helpful_count - self.harmful_count

    @property
    def confidence(self) -> float:
        """Confidence score (0.0-1.0) based on outcome ratio."""
        total = self.helpful_count + self.harmful_count
        if total == 0:
            return 0.5  # Neutral confidence for untested items
        return self.helpful_count / total

    def to_context_string(self) -> str:
        """Format for injection into LLM context."""
        return f"[{self.id}] helpful={self.helpful_count} harmful={self.harmful_count} :: {self.content}"

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for serialization."""
        return {
            "id": self.id,
            "category": self.category.value,
            "content": self.content,
            "helpful_count": self.helpful_count,
            "harmful_count": self.harmful_count,
            "created_at": self.created_at,
            "last_used": self.last_used,
            "source_trials": self.source_trials,
            "tags": self.tags
        }

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "PlaybookItem":
        """Create from dictionary."""
        return cls(
            id=data["id"],
            category=InsightCategory(data["category"]),
            content=data["content"],
            helpful_count=data.get("helpful_count", 0),
            harmful_count=data.get("harmful_count", 0),
            created_at=data.get("created_at", ""),
            last_used=data.get("last_used"),
            source_trials=data.get("source_trials", []),
            tags=data.get("tags", [])
        )


@dataclass
class AtomizerPlaybook:
    """
    Evolving playbook that accumulates optimization knowledge.

    Based on ACE framework principles:
    - Incremental delta updates (never rewrite wholesale)
    - Helpful/harmful tracking for each insight
    - Semantic deduplication
    - Category-based organization

    Usage:
        playbook = AtomizerPlaybook.load(path)
        item = playbook.add_insight(InsightCategory.STRATEGY, "Use shell elements for thin walls")
        playbook.record_outcome(item.id, helpful=True)
        playbook.save(path)
    """
    items: Dict[str, PlaybookItem] = field(default_factory=dict)
    version: int = 1
    last_updated: str = field(default_factory=lambda: datetime.now().isoformat())

    def _generate_id(self, category: InsightCategory) -> str:
        """Generate unique ID for new item."""
        existing = [k for k in self.items.keys() if k.startswith(category.value)]
        next_num = len(existing) + 1
        return f"{category.value}-{next_num:05d}"

    def _content_hash(self, content: str) -> str:
        """Generate hash for content deduplication."""
        normalized = content.lower().strip()
        return hashlib.md5(normalized.encode()).hexdigest()[:12]

    def add_insight(
        self,
        category: InsightCategory,
        content: str,
        source_trial: Optional[int] = None,
        tags: Optional[List[str]] = None
    ) -> PlaybookItem:
        """
        Add new insight with delta update (ACE principle).

        Checks for semantic duplicates before adding.
        If duplicate found, increments helpful_count instead.

        Args:
            category: Type of insight
            content: The insight text
            source_trial: Trial number that generated this insight
            tags: Optional tags for filtering

        Returns:
            The created or updated PlaybookItem
        """
        content_hash = self._content_hash(content)

        # Check for near-duplicates
        for item in self.items.values():
            existing_hash = self._content_hash(item.content)
            if content_hash == existing_hash:
                # Update existing instead of adding duplicate
                item.helpful_count += 1
                if source_trial and source_trial not in item.source_trials:
                    item.source_trials.append(source_trial)
                if tags:
                    item.tags = list(set(item.tags + tags))
                self.last_updated = datetime.now().isoformat()
                return item

        # Create new item
        item_id = self._generate_id(category)
        item = PlaybookItem(
            id=item_id,
            category=category,
            content=content,
            source_trials=[source_trial] if source_trial else [],
            tags=tags or []
        )
        self.items[item_id] = item
        self.last_updated = datetime.now().isoformat()
        self.version += 1
        return item

    def record_outcome(self, item_id: str, helpful: bool) -> bool:
        """
        Record whether using this insight was helpful or harmful.

        Args:
            item_id: The playbook item ID
            helpful: True if outcome was positive, False if negative

        Returns:
            True if item was found and updated, False otherwise
        """
        if item_id not in self.items:
            return False

        if helpful:
            self.items[item_id].helpful_count += 1
        else:
            self.items[item_id].harmful_count += 1
        self.items[item_id].last_used = datetime.now().isoformat()
        self.last_updated = datetime.now().isoformat()
        return True

    def get_context_for_task(
        self,
        task_type: str,
        max_items: int = 20,
        min_confidence: float = 0.5,
        tags: Optional[List[str]] = None
    ) -> str:
        """
        Generate context string for LLM consumption.

        Filters by relevance and confidence, sorted by net score.

        Args:
            task_type: Type of task (for filtering)
            max_items: Maximum items to include
            min_confidence: Minimum confidence threshold
            tags: Optional tags to filter by

        Returns:
            Formatted context string for LLM
        """
        relevant_items = [
            item for item in self.items.values()
            if item.confidence >= min_confidence
        ]

        # Filter by tags if provided
        if tags:
            relevant_items = [
                item for item in relevant_items
                if any(tag in item.tags for tag in tags)
            ]

        # Sort by net score (most helpful first)
        relevant_items.sort(key=lambda x: x.net_score, reverse=True)

        # Group by category
        sections: Dict[str, List[str]] = {}
        for item in relevant_items[:max_items]:
            cat_name = item.category.name
            if cat_name not in sections:
                sections[cat_name] = []
            sections[cat_name].append(item.to_context_string())

        # Build context string
        lines = ["## Atomizer Knowledge Playbook", ""]
        for cat_name, items in sections.items():
            lines.append(f"### {cat_name}")
            lines.extend(items)
            lines.append("")

        return "\n".join(lines)

    def search_by_content(
        self,
        query: str,
        category: Optional[InsightCategory] = None,
        limit: int = 5
    ) -> List[PlaybookItem]:
        """
        Search playbook items by content similarity.

        Simple keyword matching - could be enhanced with embeddings.

        Args:
            query: Search query
            category: Optional category filter
            limit: Maximum results

        Returns:
            List of matching items sorted by relevance
        """
        query_lower = query.lower()
        query_words = set(query_lower.split())

        scored_items = []
        for item in self.items.values():
            if category and item.category != category:
                continue

            content_lower = item.content.lower()
            content_words = set(content_lower.split())

            # Simple word overlap scoring
            overlap = len(query_words & content_words)
            if overlap > 0 or query_lower in content_lower:
                score = overlap + (1 if query_lower in content_lower else 0)
                scored_items.append((score, item))

        scored_items.sort(key=lambda x: (-x[0], -x[1].net_score))
        return [item for _, item in scored_items[:limit]]

    def get_by_category(
        self,
        category: InsightCategory,
        min_score: int = 0
    ) -> List[PlaybookItem]:
        """Get all items in a category with minimum net score."""
        return [
            item for item in self.items.values()
            if item.category == category and item.net_score >= min_score
        ]

    def prune_harmful(self, threshold: int = -3) -> int:
        """
        Remove items that have proven consistently harmful.

        Args:
            threshold: Net score threshold (items at or below are removed)

        Returns:
            Number of items removed
        """
        to_remove = [
            item_id for item_id, item in self.items.items()
            if item.net_score <= threshold
        ]
        for item_id in to_remove:
            del self.items[item_id]

        if to_remove:
            self.last_updated = datetime.now().isoformat()
            self.version += 1

        return len(to_remove)

    def get_stats(self) -> Dict[str, Any]:
        """Get playbook statistics."""
        by_category = {}
        for item in self.items.values():
            cat = item.category.name
            if cat not in by_category:
                by_category[cat] = 0
            by_category[cat] += 1

        scores = [item.net_score for item in self.items.values()]

        return {
            "total_items": len(self.items),
            "by_category": by_category,
            "version": self.version,
            "last_updated": self.last_updated,
            "avg_score": sum(scores) / len(scores) if scores else 0,
            "max_score": max(scores) if scores else 0,
            "min_score": min(scores) if scores else 0
        }

    def save(self, path: Path) -> None:
        """
        Persist playbook to JSON.

        Args:
            path: File path to save to
        """
        data = {
            "version": self.version,
            "last_updated": self.last_updated,
            "items": {k: v.to_dict() for k, v in self.items.items()}
        }
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2)

    @classmethod
    def load(cls, path: Path) -> "AtomizerPlaybook":
        """
        Load playbook from JSON.

        Args:
            path: File path to load from

        Returns:
            Loaded playbook (or new empty playbook if file doesn't exist)
        """
        if not path.exists():
            return cls()

        with open(path, encoding='utf-8') as f:
            data = json.load(f)

        playbook = cls(
            version=data.get("version", 1),
            last_updated=data.get("last_updated", datetime.now().isoformat())
        )

        for item_data in data.get("items", {}).values():
            item = PlaybookItem.from_dict(item_data)
            playbook.items[item.id] = item

        return playbook


# Convenience function for global playbook access
_global_playbook: Optional[AtomizerPlaybook] = None
_global_playbook_path: Optional[Path] = None


def get_playbook(path: Optional[Path] = None) -> AtomizerPlaybook:
    """
    Get the global playbook instance.

    Args:
        path: Optional path to load from (uses default if not provided)

    Returns:
        The global AtomizerPlaybook instance
    """
    global _global_playbook, _global_playbook_path

    if path is None:
        # Default path
        path = Path(__file__).parents[2] / "knowledge_base" / "playbook.json"

    if _global_playbook is None or _global_playbook_path != path:
        _global_playbook = AtomizerPlaybook.load(path)
        _global_playbook_path = path

    return _global_playbook


def save_playbook() -> None:
    """Save the global playbook to its path."""
    global _global_playbook, _global_playbook_path

    if _global_playbook is not None and _global_playbook_path is not None:
        _global_playbook.save(_global_playbook_path)