Atomizer/optimization_engine/future/report_generator.py

"""
Report Generator Utility
Generates Markdown/HTML/PDF reports for optimization studies

Supports multiple data sources:
- optimization_config.json (primary config)
- study.db (Optuna SQLite, preferred)
- optimization_history_incremental.json (legacy)
"""

import json
import sqlite3
from pathlib import Path
from typing import Optional, List, Dict, Any
try:
    import markdown
except ImportError:
    markdown = None
from datetime import datetime


def _load_trials_from_db(db_path: Path) -> List[Dict[str, Any]]:
    """Load trial data from Optuna SQLite database."""
    trials = []
    try:
        conn = sqlite3.connect(str(db_path))
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()

        # Get completed trials
        cursor.execute("""
            SELECT t.trial_id, t.number as trial_number, t.state
            FROM trials t
            WHERE t.state = 'COMPLETE'
            ORDER BY t.number
        """)

        for row in cursor.fetchall():
            trial = {
                'trial_number': row['trial_number'],
                'design_variables': {},
                'results': {},
                'objective': None
            }

            # Get params
            cursor.execute("""
                SELECT param_name, param_value FROM trial_params
                WHERE trial_id = ?
            """, (row['trial_id'],))
            for p in cursor.fetchall():
                try:
                    trial['design_variables'][p['param_name']] = float(p['param_value'])
                except (ValueError, TypeError):
                    trial['design_variables'][p['param_name']] = p['param_value']

            # Get objective values
            cursor.execute("""
                SELECT objective, value FROM trial_values
                WHERE trial_id = ?
            """, (row['trial_id'],))
            values = cursor.fetchall()
            if values:
                trial['objective'] = float(values[0]['value'])
                for v in values:
                    trial['results'][f'objective_{v["objective"]}'] = float(v['value'])

            # Get user attributes (like weighted_sum, mass_kg, etc.)
            cursor.execute("""
                SELECT key, value_json FROM trial_user_attributes
                WHERE trial_id = ?
            """, (row['trial_id'],))
            for attr in cursor.fetchall():
                try:
                    val = json.loads(attr['value_json'])
                    trial['results'][attr['key']] = val
                except:
                    pass

            if trial['objective'] is not None:
                trials.append(trial)

        conn.close()
    except Exception as e:
        print(f"Error loading trials from DB: {e}")

    return trials


def generate_study_report(
    study_dir: Path,
    output_format: str = "markdown",
    include_llm_summary: bool = False
) -> Optional[Path]:
    """
    Generate a report for the study.

    Args:
        study_dir: Path to the study directory
        output_format: 'markdown', 'html', or 'pdf'
        include_llm_summary: Whether to include AI-generated summary

    Returns:
        Path to the generated report file
    """
    try:
        study_dir = Path(study_dir)

        # Load config - try multiple locations
        config_path = study_dir / "1_setup" / "optimization_config.json"
        if not config_path.exists():
            config_path = study_dir / "optimization_config.json"

        config = {}
        if config_path.exists():
            with open(config_path) as f:
                config = json.load(f)

        # Load history - try multiple sources
        history = []

        # Try Optuna DB first (preferred)
        db_path = study_dir / "3_results" / "study.db"
        if not db_path.exists():
            db_path = study_dir / "2_results" / "study.db"

        if db_path.exists():
            history = _load_trials_from_db(db_path)

        # Fallback to incremental JSON
        if not history:
            for hist_dir in ["2_results", "3_results", "2_iterations"]:
                history_path = study_dir / hist_dir / "optimization_history_incremental.json"
                if history_path.exists():
                    with open(history_path) as f:
                        history = json.load(f)
                    break

        if not history:
            print(f"No trial data found in {study_dir}")
            return None

        # Find best trial
        best_trial = min(history, key=lambda x: x.get('objective', float('inf')))

        # Get study name
        study_name = config.get('study_name', study_dir.name)

        # Get objectives info
        objectives = config.get('objectives', [])
        obj_names = [o.get('name', f'Objective {i}') for i, o in enumerate(objectives)]
        if not obj_names:
            obj_names = ['Objective']

        # Get design variables info
        design_vars = config.get('design_variables', [])
        n_vars = len(design_vars) if design_vars else len(best_trial.get('design_variables', {}))

        # Calculate stats
        n_trials = len(history)
        n_target = config.get('optimization_settings', {}).get('n_trials', n_trials)
        status = 'Completed' if n_trials >= n_target else 'In Progress'

        # Calculate improvement
        first_obj = history[0].get('objective', 0) if history else 0
        best_obj = best_trial.get('objective', 0)
        improvement = ((first_obj - best_obj) / first_obj * 100) if first_obj != 0 else 0

        # Generate Markdown content
        md_content = f"""# Optimization Report: {study_name}

**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M')}
**Status**: {status}

---

## Executive Summary

{_generate_summary(history, best_trial, include_llm_summary)}

---

## Study Configuration

| Setting | Value |
|---------|-------|
| **Objectives** | {', '.join(obj_names)} |
| **Design Variables** | {n_vars} parameters |
| **Target Trials** | {n_target} |
| **Completed Trials** | {n_trials} |

---

## Best Result

**Trial #{best_trial.get('trial_number', 'N/A')}** achieved the optimal design:

| Metric | Value |
|--------|-------|
| **Objective Value** | {best_trial.get('objective', 'N/A'):.4f if isinstance(best_trial.get('objective'), (int, float)) else 'N/A'} |
| **Improvement** | {improvement:.1f}% |

### Design Parameters

| Parameter | Value |
|-----------|-------|
"""

        # Add best design variables
        for k, v in best_trial.get('design_variables', {}).items():
            if isinstance(v, (int, float)):
                md_content += f"| {k} | {v:.4f} |\n"
            else:
                md_content += f"| {k} | {v} |\n"

        # Add results if available
        results = best_trial.get('results', {})
        if results:
            md_content += "\n### Result Metrics\n\n| Metric | Value |\n|--------|-------|\n"
            for k, v in results.items():
                if isinstance(v, (int, float)):
                    md_content += f"| {k} | {v:.4f} |\n"
                elif v is not None:
                    md_content += f"| {k} | {v} |\n"

        md_content += f"""
---

## Optimization Progress

The optimization process completed **{n_trials} trials** exploring the design space.

### Statistics

| Metric | Value |
|--------|-------|
| First Trial Objective | {first_obj:.4f if isinstance(first_obj, (int, float)) else 'N/A'} |
| Best Trial Objective | {best_obj:.4f if isinstance(best_obj, (int, float)) else 'N/A'} |
| Total Improvement | {improvement:.1f}% |
| Best Trial Found | #{best_trial.get('trial_number', 'N/A')} |

---

## Top 5 Designs

| Rank | Trial | Objective |
|------|-------|-----------|
"""

        # Add top 5 trials
        sorted_history = sorted(history, key=lambda x: x.get('objective', float('inf')))[:5]
        for i, t in enumerate(sorted_history, 1):
            obj_val = t.get('objective', 'N/A')
            if isinstance(obj_val, (int, float)):
                obj_val = f"{obj_val:.4f}"
            md_content += f"| {i} | #{t.get('trial_number', 'N/A')} | {obj_val} |\n"

        md_content += f"""
---

## Files

- **Configuration**: `1_setup/optimization_config.json`
- **Database**: `3_results/study.db`
- **This Report**: `STUDY_REPORT.md`

---

*Report generated by Atomizer Dashboard*
*{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*
"""

        # Determine output directory
        output_dir = study_dir / "3_results"
        if not output_dir.exists():
            output_dir = study_dir / "2_results"
        if not output_dir.exists():
            output_dir = study_dir
        output_dir.mkdir(parents=True, exist_ok=True)

        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

        if output_format in ['markdown', 'md']:
            output_path = output_dir / f"STUDY_REPORT_{timestamp}.md"
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(md_content)

        elif output_format == 'html':
            output_path = output_dir / f"STUDY_REPORT_{timestamp}.html"
            if markdown:
                html_content = markdown.markdown(md_content, extensions=['tables'])
            else:
                # Basic markdown to HTML conversion
                html_content = f"<pre>{md_content}</pre>"

            styled_html = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>Optimization Report - {study_name}</title>
    <style>
        body {{
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            max-width: 900px;
            margin: 0 auto;
            padding: 40px 20px;
            background: #0a0f1a;
            color: #e2e8f0;
        }}
        h1 {{ color: #3b82f6; border-bottom: 2px solid #1e3a5f; padding-bottom: 10px; }}
        h2 {{ color: #60a5fa; margin-top: 40px; }}
        h3 {{ color: #93c5fd; }}
        table {{ border-collapse: collapse; width: 100%; margin: 20px 0; }}
        th, td {{ border: 1px solid #334155; padding: 12px; text-align: left; }}
        th {{ background: #1e293b; color: #94a3b8; }}
        tr:nth-child(even) {{ background: #0f172a; }}
        code {{ background: #1e293b; padding: 2px 6px; border-radius: 4px; }}
        hr {{ border: none; border-top: 1px solid #334155; margin: 30px 0; }}
        em {{ color: #64748b; }}
    </style>
</head>
<body>
    {html_content}
</body>
</html>"""
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(styled_html)

        elif output_format == 'pdf':
            try:
                from weasyprint import HTML
                output_path = output_dir / f"STUDY_REPORT_{timestamp}.pdf"
                if markdown:
                    html_content = markdown.markdown(md_content, extensions=['tables'])
                else:
                    html_content = f"<pre>{md_content}</pre>"
                HTML(string=html_content).write_pdf(str(output_path))
            except ImportError:
                print("WeasyPrint not installed, falling back to HTML")
                return generate_study_report(study_dir, 'html', include_llm_summary)
        else:
            output_path = output_dir / f"STUDY_REPORT_{timestamp}.md"
            with open(output_path, 'w', encoding='utf-8') as f:
                f.write(md_content)

        return output_path

    except Exception as e:
        print(f"Report generation error: {e}")
        import traceback
        traceback.print_exc()
        return None


def _generate_summary(history, best_trial, use_llm):
    """Generate executive summary text."""
    if use_llm:
        return "[AI Summary Placeholder] The optimization successfully identified a design that minimizes the objective while satisfying all constraints."

    if not history:
        return "No trials completed yet."

    n_trials = len(history)
    best_obj = best_trial.get('objective', 0)
    best_num = best_trial.get('trial_number', 'N/A')

    first_obj = history[0].get('objective', 0)
    improvement = ((first_obj - best_obj) / first_obj * 100) if first_obj != 0 else 0

    # Determine convergence efficiency
    efficiency = "excellent" if best_num < n_trials * 0.3 else "good" if best_num < n_trials * 0.6 else "moderate"

    summary = f"""This optimization study completed **{n_trials} trials** and achieved a **{improvement:.1f}% improvement** from the initial design.

The best solution was found at **Trial #{best_num}** with an objective value of **{best_obj:.4f}**, demonstrating {efficiency} convergence efficiency.
"""

    return summary