Atomizer/optimization_engine/plugins/post_solve/error_tracker.py

"""
Error Tracker Hook - Context Engineering Integration

Preserves solver errors and failures in context for learning.
Based on Manus insight: "leave the wrong turns in the context"

This hook:
1. Captures solver errors and failures
2. Classifies error types for playbook categorization
3. Extracts relevant F06 content for analysis
4. Records errors to session state and LAC

Hook Point: post_solve
Priority: 100 (run early to capture before cleanup)
"""

from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional
import json
import re


def classify_error(error_msg: str) -> str:
    """
    Classify error type for playbook categorization.

    Args:
        error_msg: Error message text

    Returns:
        Error classification string
    """
    error_lower = error_msg.lower()

    # Check patterns in priority order
    if any(x in error_lower for x in ['convergence', 'did not converge', 'diverge']):
        return "convergence_failure"
    elif any(x in error_lower for x in ['mesh', 'element', 'distorted', 'jacobian']):
        return "mesh_error"
    elif any(x in error_lower for x in ['singular', 'matrix', 'pivot', 'ill-conditioned']):
        return "singularity"
    elif any(x in error_lower for x in ['memory', 'allocation', 'out of memory']):
        return "memory_error"
    elif any(x in error_lower for x in ['license', 'checkout']):
        return "license_error"
    elif any(x in error_lower for x in ['boundary', 'constraint', 'spc', 'rigid body']):
        return "boundary_condition_error"
    elif any(x in error_lower for x in ['timeout', 'time limit']):
        return "timeout_error"
    elif any(x in error_lower for x in ['file', 'not found', 'missing']):
        return "file_error"
    else:
        return "unknown_error"


def extract_f06_error(f06_path: Optional[str], max_chars: int = 500) -> str:
    """
    Extract error section from F06 file.

    Args:
        f06_path: Path to F06 file
        max_chars: Maximum characters to extract

    Returns:
        Error section content or empty string
    """
    if not f06_path:
        return ""

    path = Path(f06_path)
    if not path.exists():
        return ""

    try:
        with open(path, 'r', encoding='utf-8', errors='ignore') as f:
            content = f.read()

        # Look for error indicators
        error_markers = [
            "*** USER FATAL",
            "*** SYSTEM FATAL",
            "*** USER WARNING",
            "*** SYSTEM WARNING",
            "FATAL ERROR",
            "ERROR MESSAGE"
        ]

        for marker in error_markers:
            if marker in content:
                idx = content.index(marker)
                # Extract surrounding context
                start = max(0, idx - 100)
                end = min(len(content), idx + max_chars)
                return content[start:end].strip()

        # If no explicit error marker, check for convergence messages
        convergence_patterns = [
            r"CONVERGENCE NOT ACHIEVED",
            r"SOLUTION DID NOT CONVERGE",
            r"DIVERGENCE DETECTED"
        ]

        for pattern in convergence_patterns:
            match = re.search(pattern, content, re.IGNORECASE)
            if match:
                idx = match.start()
                start = max(0, idx - 50)
                end = min(len(content), idx + max_chars)
                return content[start:end].strip()

        return ""

    except Exception as e:
        return f"Error reading F06: {str(e)}"


def find_f06_file(working_dir: str, sim_file: str = "") -> Optional[Path]:
    """
    Find the F06 file in the working directory.

    Args:
        working_dir: Working directory path
        sim_file: Simulation file name (for naming pattern)

    Returns:
        Path to F06 file or None
    """
    work_path = Path(working_dir)

    # Try common patterns
    patterns = [
        "*.f06",
        "*-solution*.f06",
        "*_sim*.f06"
    ]

    for pattern in patterns:
        matches = list(work_path.glob(pattern))
        if matches:
            # Return most recently modified
            return max(matches, key=lambda p: p.stat().st_mtime)

    return None


def track_error(context: Dict[str, Any]) -> Dict[str, Any]:
    """
    Hook that preserves errors for context learning.

    Called at post_solve after solver completes.
    Captures error information regardless of success/failure
    to enable learning from both outcomes.

    Args:
        context: Hook context with trial information

    Returns:
        Dictionary with error tracking results
    """
    trial_number = context.get('trial_number', -1)
    working_dir = context.get('working_dir', '.')
    output_dir = context.get('output_dir', working_dir)
    solver_returncode = context.get('solver_returncode', 0)

    # Determine if this is an error case
    # (solver returncode non-zero, or explicit error flag)
    is_error = (
        solver_returncode != 0 or
        context.get('error', False) or
        context.get('solver_failed', False)
    )

    if not is_error:
        # No error to track, but still record success for learning
        return {"error_tracked": False, "trial_success": True}

    # Find and extract F06 error info
    f06_path = context.get('f06_path')
    if not f06_path:
        f06_file = find_f06_file(working_dir, context.get('sim_file', ''))
        if f06_file:
            f06_path = str(f06_file)

    f06_snippet = extract_f06_error(f06_path)

    # Get error message from context or F06
    error_message = context.get('error_message', '')
    if not error_message and f06_snippet:
        # Extract first line of F06 error as message
        lines = f06_snippet.strip().split('\n')
        error_message = lines[0][:200] if lines else "Unknown solver error"

    # Classify error
    error_type = classify_error(error_message or f06_snippet)

    # Build error record
    error_info = {
        "trial": trial_number,
        "timestamp": datetime.now().isoformat(),
        "solver_returncode": solver_returncode,
        "error_type": error_type,
        "error_message": error_message,
        "f06_snippet": f06_snippet[:1000] if f06_snippet else "",
        "design_variables": context.get('design_variables', {}),
        "working_dir": working_dir
    }

    # Save to error log (append mode - accumulate errors)
    error_log_path = Path(output_dir) / "error_history.jsonl"
    try:
        error_log_path.parent.mkdir(parents=True, exist_ok=True)
        with open(error_log_path, 'a', encoding='utf-8') as f:
            f.write(json.dumps(error_info) + "\n")
    except Exception as e:
        print(f"Warning: Could not write error log: {e}")

    # Try to update session state if context engineering is active
    try:
        from optimization_engine.context.session_state import get_session
        session = get_session()
        session.add_error(
            f"Trial {trial_number}: {error_type} - {error_message[:100]}",
            error_type=error_type
        )
    except ImportError:
        pass  # Context module not available

    # Try to record to LAC if available
    try:
        from knowledge_base.lac import get_lac
        lac = get_lac()
        lac.record_insight(
            category="failure",
            context=f"Trial {trial_number} solver error",
            insight=f"{error_type}: {error_message[:200]}",
            confidence=0.7,
            tags=["solver", error_type, "automatic"]
        )
    except ImportError:
        pass  # LAC not available

    return {
        "error_tracked": True,
        "error_type": error_type,
        "error_message": error_message[:200],
        "f06_extracted": bool(f06_snippet)
    }


# Hook registration metadata
HOOK_CONFIG = {
    "name": "error_tracker",
    "hook_point": "post_solve",
    "priority": 100,  # Run early to capture before cleanup
    "enabled": True,
    "description": "Preserves solver errors for context learning"
}


# Make the function discoverable by hook manager
def get_hook():
    """Return the hook function for registration."""
    return track_error


# For direct plugin discovery
__all__ = ['track_error', 'HOOK_CONFIG', 'get_hook']