atomizer-dashboard/backend/api/routes/insights.py

"""
Study Insights API endpoints
Provides physics-focused visualizations for completed optimization trials

Key Features:
- Config-driven insights from optimization_config.json
- AI recommendations based on objectives
- Report generation with PDF export
- Objective-linked and standalone insights
"""

from fastapi import APIRouter, HTTPException
from fastapi.responses import JSONResponse, HTMLResponse
from pydantic import BaseModel
from pathlib import Path
from typing import List, Dict, Optional, Any
import json
import sys

# Add project root to path
sys.path.append(str(Path(__file__).parent.parent.parent.parent.parent))

router = APIRouter()

# Base studies directory
STUDIES_DIR = Path(__file__).parent.parent.parent.parent.parent / "studies"


class InsightSpecRequest(BaseModel):
    """Request model for insight specification."""
    type: str
    name: str
    enabled: bool = True
    linked_objective: Optional[str] = None
    config: Dict[str, Any] = {}
    include_in_report: bool = True


class GenerateReportRequest(BaseModel):
    """Request model for report generation."""
    specs: Optional[List[InsightSpecRequest]] = None
    include_appendix: bool = True


def resolve_study_path(study_id: str) -> Path:
    """Find study folder by scanning all topic directories."""
    # First check direct path (backwards compatibility)
    direct_path = STUDIES_DIR / study_id
    if direct_path.exists() and direct_path.is_dir():
        if (direct_path / "1_setup").exists() or (direct_path / "optimization_config.json").exists():
            return direct_path

    # Scan topic folders for nested structure
    for topic_dir in STUDIES_DIR.iterdir():
        if topic_dir.is_dir() and not topic_dir.name.startswith('.'):
            study_dir = topic_dir / study_id
            if study_dir.exists() and study_dir.is_dir():
                if (study_dir / "1_setup").exists() or (study_dir / "optimization_config.json").exists():
                    return study_dir

    raise HTTPException(status_code=404, detail=f"Study not found: {study_id}")


@router.get("/studies/{study_id}/available")
async def list_available_insights(study_id: str):
    """List all insight types that can be generated for this study."""
    try:
        study_path = resolve_study_path(study_id)

        # Import insights module
        from optimization_engine.insights import list_available_insights as get_available

        available = get_available(study_path)

        return {
            "study_id": study_id,
            "insights": available
        }
    except HTTPException:
        raise
    except ImportError as e:
        return {
            "study_id": study_id,
            "insights": [],
            "error": f"Insights module not available: {str(e)}"
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/studies/{study_id}/all")
async def list_all_insights():
    """List all registered insight types (regardless of availability for any study)."""
    try:
        from optimization_engine.insights import list_insights

        return {
            "insights": list_insights()
        }
    except ImportError as e:
        return {
            "insights": [],
            "error": f"Insights module not available: {str(e)}"
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


class GenerateInsightRequest(BaseModel):
    """Request model for insight generation."""
    iteration: Optional[str] = None  # e.g., "iter5", "best_design_archive"
    trial_id: Optional[int] = None
    config: Dict[str, Any] = {}


def _is_valid_op2(op2_path: Path) -> bool:
    """Quick check if OP2 file is valid (not from a failed solve)."""
    try:
        # Check file size - failed OP2s are often very small
        if op2_path.stat().st_size < 10000:  # Less than 10KB is suspicious
            return False

        # Try to read header to verify it's a valid OP2
        with open(op2_path, 'rb') as f:
            header = f.read(100)
            # Valid OP2 files have specific markers
            if b'NASTRAN' not in header and b'XXXXXXXX' not in header:
                # Check for common valid patterns
                pass  # Size check is usually enough

        return True
    except Exception:
        return False


@router.get("/studies/{study_id}/iterations")
async def list_iterations(study_id: str):
    """List available iterations/trials with OP2 files for insight generation.

    Returns iterations sorted by modification time (newest first).
    Only includes iterations with valid (non-corrupted) OP2 files.
    """
    try:
        study_path = resolve_study_path(study_id)
        iterations = []

        # Check 2_iterations folder
        iter_dir = study_path / "2_iterations"
        if iter_dir.exists():
            for subdir in sorted(iter_dir.iterdir(), reverse=True):
                if subdir.is_dir():
                    op2_files = [f for f in subdir.glob("*.op2") if _is_valid_op2(f)]
                    if op2_files:
                        newest_op2 = max(op2_files, key=lambda p: p.stat().st_mtime)
                        iterations.append({
                            "id": subdir.name,
                            "path": str(subdir.relative_to(study_path)),
                            "op2_file": newest_op2.name,
                            "modified": newest_op2.stat().st_mtime,
                            "type": "iteration"
                        })

        # Check 3_results/best_design_archive folder
        best_dir = study_path / "3_results" / "best_design_archive"
        if best_dir.exists():
            op2_files = [f for f in best_dir.glob("**/*.op2") if _is_valid_op2(f)]
            if op2_files:
                newest_op2 = max(op2_files, key=lambda p: p.stat().st_mtime)
                iterations.insert(0, {  # Insert at start as "best"
                    "id": "best_design_archive",
                    "path": "3_results/best_design_archive",
                    "op2_file": newest_op2.name,
                    "modified": newest_op2.stat().st_mtime,
                    "type": "best",
                    "label": "Best Design (Recommended)"
                })

        # Sort by modification time (newest first), keeping best at top
        best = [i for i in iterations if i.get("type") == "best"]
        others = sorted([i for i in iterations if i.get("type") != "best"],
                       key=lambda x: x["modified"], reverse=True)

        return {
            "study_id": study_id,
            "iterations": best + others,
            "count": len(iterations)
        }

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/studies/{study_id}/generate/{insight_type}")
async def generate_insight(
    study_id: str,
    insight_type: str,
    request: Optional[GenerateInsightRequest] = None
):
    """Generate a specific insight visualization.

    Args:
        study_id: Study identifier
        insight_type: Type of insight (e.g., 'zernike_wfe', 'stress_field', 'design_space')
        request: Optional generation config with iteration selection

    Returns:
        JSON with plotly_figure data and summary statistics
    """
    try:
        study_path = resolve_study_path(study_id)

        from optimization_engine.insights import get_insight, InsightConfig

        insight = get_insight(insight_type, study_path)
        if insight is None:
            raise HTTPException(status_code=404, detail=f"Unknown insight type: {insight_type}")

        # If iteration specified, override the OP2 path
        if request and request.iteration:
            iteration_id = request.iteration
            if iteration_id == "best_design_archive":
                iter_path = study_path / "3_results" / "best_design_archive"
            else:
                iter_path = study_path / "2_iterations" / iteration_id

            if iter_path.exists():
                op2_files = list(iter_path.glob("**/*.op2"))
                if op2_files:
                    # Override the insight's OP2 path
                    insight.op2_path = max(op2_files, key=lambda p: p.stat().st_mtime)
                    # Re-find geometry
                    try:
                        insight.geo_path = insight._find_geometry_file(insight.op2_path)
                    except (FileNotFoundError, AttributeError):
                        pass  # Use default

        if not insight.can_generate():
            raise HTTPException(
                status_code=400,
                detail=f"Cannot generate {insight_type}: required data not found"
            )

        # Configure insight
        trial_id = request.trial_id if request else None
        extra_config = request.config if request else {}
        config = InsightConfig(trial_id=trial_id, extra=extra_config)

        # Generate
        result = insight.generate(config)

        if not result.success:
            raise HTTPException(status_code=500, detail=result.error or "Generation failed")

        return {
            "success": True,
            "insight_type": insight_type,
            "study_id": study_id,
            "iteration": request.iteration if request else None,
            "html_path": str(result.html_path) if result.html_path else None,
            "plotly_figure": result.plotly_figure,
            "summary": result.summary
        }

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/studies/{study_id}/view/{insight_type}")
async def view_insight_html(study_id: str, insight_type: str):
    """Get the HTML content for an insight (for iframe embedding).

    Returns the most recent generated HTML file for this insight type,
    or generates one if none exists.
    """
    try:
        study_path = resolve_study_path(study_id)
        insights_dir = study_path / "3_insights"

        # Look for existing HTML files
        if insights_dir.exists():
            pattern = f"{insight_type}_*.html"
            existing = list(insights_dir.glob(pattern))
            if existing:
                # Return most recent
                newest = max(existing, key=lambda p: p.stat().st_mtime)
                return HTMLResponse(content=newest.read_text(encoding='utf-8'))

        # No existing file - generate one
        from optimization_engine.insights import get_insight, InsightConfig

        insight = get_insight(insight_type, study_path)
        if insight is None:
            raise HTTPException(status_code=404, detail=f"Unknown insight type: {insight_type}")

        if not insight.can_generate():
            raise HTTPException(
                status_code=400,
                detail=f"Cannot generate {insight_type}: required data not found"
            )

        result = insight.generate(InsightConfig())

        if result.success and result.html_path:
            return HTMLResponse(content=result.html_path.read_text(encoding='utf-8'))
        else:
            raise HTTPException(status_code=500, detail=result.error or "Generation failed")

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/studies/{study_id}/generated")
async def list_generated_insights(study_id: str):
    """List all previously generated insight HTML files for a study."""
    try:
        study_path = resolve_study_path(study_id)
        insights_dir = study_path / "3_insights"

        if not insights_dir.exists():
            return {"study_id": study_id, "files": []}

        files = []
        for html_file in insights_dir.glob("*.html"):
            # Parse insight type from filename (e.g., "zernike_wfe_20251220_143022.html")
            name = html_file.stem
            parts = name.rsplit('_', 2)  # Split from right to get type and timestamp

            insight_type = parts[0] if len(parts) >= 3 else name
            timestamp = f"{parts[-2]}_{parts[-1]}" if len(parts) >= 3 else None

            files.append({
                "filename": html_file.name,
                "insight_type": insight_type,
                "timestamp": timestamp,
                "size_kb": round(html_file.stat().st_size / 1024, 1),
                "modified": html_file.stat().st_mtime
            })

        # Sort by modification time (newest first)
        files.sort(key=lambda x: x['modified'], reverse=True)

        return {
            "study_id": study_id,
            "files": files
        }

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/studies/{study_id}/configured")
async def get_configured_insights_endpoint(study_id: str):
    """Get insights configured in the study's optimization_config.json."""
    try:
        study_path = resolve_study_path(study_id)

        from optimization_engine.insights import get_configured_insights

        specs = get_configured_insights(study_path)

        return {
            "study_id": study_id,
            "configured": [
                {
                    "type": spec.type,
                    "name": spec.name,
                    "enabled": spec.enabled,
                    "linked_objective": spec.linked_objective,
                    "config": spec.config,
                    "include_in_report": spec.include_in_report
                }
                for spec in specs
            ]
        }

    except HTTPException:
        raise
    except ImportError as e:
        return {
            "study_id": study_id,
            "configured": [],
            "error": f"Insights module not available: {str(e)}"
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/studies/{study_id}/recommend")
async def recommend_insights_endpoint(study_id: str):
    """Get AI recommendations for insights based on study objectives."""
    try:
        study_path = resolve_study_path(study_id)

        from optimization_engine.insights import recommend_insights_for_study

        recommendations = recommend_insights_for_study(study_path)

        return {
            "study_id": study_id,
            "recommendations": recommendations
        }

    except HTTPException:
        raise
    except ImportError as e:
        return {
            "study_id": study_id,
            "recommendations": [],
            "error": f"Insights module not available: {str(e)}"
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/studies/{study_id}/report")
async def generate_report(study_id: str, request: GenerateReportRequest):
    """Generate comprehensive HTML report with all insights.

    Args:
        study_id: Study identifier
        request: Report configuration with optional insight specs

    Returns:
        JSON with report path and generation results
    """
    try:
        study_path = resolve_study_path(study_id)

        from optimization_engine.insights import (
            InsightReport, InsightSpec, get_configured_insights,
            recommend_insights_for_study
        )

        # Build specs from request or config
        if request.specs:
            specs = [
                InsightSpec(
                    type=s.type,
                    name=s.name,
                    enabled=s.enabled,
                    linked_objective=s.linked_objective,
                    config=s.config,
                    include_in_report=s.include_in_report
                )
                for s in request.specs
            ]
        else:
            # Try config first, then recommendations
            specs = get_configured_insights(study_path)
            if not specs:
                recommendations = recommend_insights_for_study(study_path)
                specs = [
                    InsightSpec(
                        type=rec['type'],
                        name=rec['name'],
                        linked_objective=rec.get('linked_objective'),
                        config=rec.get('config', {})
                    )
                    for rec in recommendations
                ]

        if not specs:
            raise HTTPException(
                status_code=400,
                detail="No insights configured or recommended for this study"
            )

        # Generate report
        report = InsightReport(study_path)
        results = report.generate_all(specs)
        report_path = report.generate_report_html(include_appendix=request.include_appendix)

        return {
            "success": True,
            "study_id": study_id,
            "report_path": str(report_path),
            "results": [
                {
                    "type": r.insight_type,
                    "name": r.insight_name,
                    "success": r.success,
                    "linked_objective": r.linked_objective,
                    "error": r.error
                }
                for r in results
            ],
            "summary": {
                "total": len(results),
                "successful": sum(1 for r in results if r.success),
                "failed": sum(1 for r in results if not r.success)
            }
        }

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/studies/{study_id}/report/view")
async def view_report(study_id: str):
    """Get the latest generated report HTML for embedding."""
    try:
        study_path = resolve_study_path(study_id)
        report_path = study_path / "3_insights" / "STUDY_INSIGHTS_REPORT.html"

        if not report_path.exists():
            raise HTTPException(
                status_code=404,
                detail="No report generated yet. Use POST /insights/report to generate."
            )

        return HTMLResponse(content=report_path.read_text(encoding='utf-8'))

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/studies/{study_id}/summary")
async def get_insights_summary(study_id: str):
    """Get insights summary JSON for Results page integration."""
    try:
        study_path = resolve_study_path(study_id)
        summary_path = study_path / "3_insights" / "insights_summary.json"

        if not summary_path.exists():
            return {
                "study_id": study_id,
                "generated_at": None,
                "insights": []
            }

        with open(summary_path) as f:
            summary = json.load(f)

        summary["study_id"] = study_id
        return summary

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.get("/studies/{study_id}/new-best")
async def check_new_best(study_id: str):
    """Check if there's a new best solution that needs insight generation.

    The optimization script writes new_best.json when a new best is found.
    Dashboard can poll this endpoint to auto-generate insights.
    """
    try:
        study_path = resolve_study_path(study_id)
        new_best_file = study_path / "3_results" / "new_best.json"

        if not new_best_file.exists():
            return {
                "study_id": study_id,
                "has_new_best": False,
                "new_best": None
            }

        with open(new_best_file) as f:
            new_best = json.load(f)

        return {
            "study_id": study_id,
            "has_new_best": new_best.get("needs_insights", False),
            "new_best": new_best
        }

    except HTTPException:
        raise
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@router.post("/studies/{study_id}/auto-insights")
async def generate_auto_insights(study_id: str):
    """Generate insights for the current best solution based on config.

    Reads insights config from optimization_config.json and generates
    all enabled insights for the best (or latest new best) iteration.
    Clears the needs_insights flag after generation.
    """
    try:
        study_path = resolve_study_path(study_id)

        # Find the iteration to generate insights for
        new_best_file = study_path / "3_results" / "new_best.json"
        iteration_id = None

        if new_best_file.exists():
            with open(new_best_file) as f:
                new_best = json.load(f)
            iteration_id = new_best.get("iteration_folder")

        if not iteration_id:
            # Fall back to finding best from database or latest iteration
            iter_dir = study_path / "2_iterations"
            if iter_dir.exists():
                iterations = sorted(
                    [d for d in iter_dir.iterdir() if d.is_dir() and any(d.glob("*.op2"))],
                    key=lambda p: p.stat().st_mtime,
                    reverse=True
                )
                if iterations:
                    iteration_id = iterations[0].name

        if not iteration_id:
            raise HTTPException(status_code=400, detail="No iteration found for insight generation")

        # Load insights config
        config_path = study_path / "1_setup" / "optimization_config.json"
        if not config_path.exists():
            config_path = study_path / "optimization_config.json"

        if not config_path.exists():
            raise HTTPException(status_code=400, detail="No optimization_config.json found")

        with open(config_path) as f:
            config = json.load(f)

        insights_config = config.get("insights", [])
        if not insights_config:
            return {
                "study_id": study_id,
                "message": "No insights configured in optimization_config.json",
                "results": []
            }

        # Generate insights
        from optimization_engine.insights import get_insight, InsightConfig

        iter_path = study_path / "2_iterations" / iteration_id
        op2_files = list(iter_path.glob("*.op2"))

        if not op2_files:
            raise HTTPException(status_code=400, detail=f"No OP2 file in {iteration_id}")

        op2_path = op2_files[0]
        results = []

        for insight_spec in insights_config:
            if not insight_spec.get("enabled", True):
                continue

            insight_type = insight_spec.get("type")
            insight_name = insight_spec.get("name", insight_type)

            try:
                insight = get_insight(insight_type, study_path)
                if insight:
                    insight.op2_path = op2_path
                    config_obj = InsightConfig(extra=insight_spec.get("config", {}))
                    result = insight.generate(config_obj)

                    results.append({
                        "type": insight_type,
                        "name": insight_name,
                        "success": result.success,
                        "html_path": str(result.html_path) if result.html_path else None,
                        "error": result.error
                    })
            except Exception as e:
                results.append({
                    "type": insight_type,
                    "name": insight_name,
                    "success": False,
                    "error": str(e)
                })

        # Clear the needs_insights flag
        if new_best_file.exists():
            with open(new_best_file) as f:
                new_best = json.load(f)
            new_best["needs_insights"] = False
            with open(new_best_file, 'w') as f:
                json.dump(new_best, f, indent=2)

        return {
            "study_id": study_id,
            "iteration": iteration_id,
            "results": results,
            "summary": {
                "total": len(results),
                "successful": sum(1 for r in results if r["success"]),
                "failed": sum(1 for r in results if not r["success"])
            }
        }

    except HTTPException:
        raise
    except ImportError as e:
        raise HTTPException(status_code=500, detail=f"Insights module not available: {str(e)}")
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))