Atomizer/optimization_engine/generate_report_markdown.py

"""
Generate comprehensive markdown optimization reports with graphs.
Uses Optuna's built-in visualization library for professional-quality plots.
"""

import json
import sys
from pathlib import Path
from typing import Dict, Any, List, Optional
import numpy as np
import matplotlib
matplotlib.use('Agg')  # Non-interactive backend
import matplotlib.pyplot as plt
import optuna
from optuna.visualization import (
    plot_optimization_history,
    plot_parallel_coordinate,
    plot_param_importances,
    plot_slice,
    plot_contour
)


def create_confidence_progression_plot(confidence_history: List[Dict], phase_transitions: List[Dict], output_dir: Path) -> Optional[str]:
    """Create confidence progression plot showing confidence metrics over trials."""
    if not confidence_history:
        return None

    trial_numbers = [c['trial_number'] for c in confidence_history]
    overall = [c['confidence_metrics']['overall_confidence'] for c in confidence_history]
    convergence = [c['confidence_metrics']['convergence_score'] for c in confidence_history]
    coverage = [c['confidence_metrics']['exploration_coverage'] for c in confidence_history]
    stability = [c['confidence_metrics']['prediction_stability'] for c in confidence_history]

    plt.figure(figsize=(12, 7))
    plt.plot(trial_numbers, overall, 'b-', linewidth=2.5, label='Overall Confidence')
    plt.plot(trial_numbers, convergence, 'g--', alpha=0.7, label='Convergence Score')
    plt.plot(trial_numbers, coverage, 'orange', linestyle='--', alpha=0.7, label='Exploration Coverage')
    plt.plot(trial_numbers, stability, 'purple', linestyle='--', alpha=0.7, label='Prediction Stability')

    # Mark phase transitions
    for transition in phase_transitions:
        trial_num = transition['trial_number']
        plt.axvline(x=trial_num, color='red', linestyle='-', linewidth=2, alpha=0.8)
        plt.text(trial_num, 0.95, f'  Exploitation Phase', rotation=90,
                verticalalignment='top', fontsize=10, color='red', fontweight='bold')

    # Mark confidence threshold
    plt.axhline(y=0.65, color='gray', linestyle=':', linewidth=1.5, alpha=0.6, label='Confidence Threshold (65%)')

    plt.xlabel('Trial Number', fontsize=11)
    plt.ylabel('Confidence Score (0-1)', fontsize=11)
    plt.title('Surrogate Confidence Progression', fontsize=13, fontweight='bold')
    plt.legend(loc='lower right', fontsize=9)
    plt.grid(True, alpha=0.3)
    plt.ylim(0, 1.05)
    plt.tight_layout()

    plot_file = output_dir / 'confidence_progression.png'
    plt.savefig(plot_file, dpi=150)
    plt.close()

    return plot_file.name


def create_convergence_plot(history: List[Dict], target: Optional[float], output_dir: Path) -> str:
    """Create convergence plot showing best objective over trials."""
    trial_numbers = [t['trial_number'] for t in history]
    objectives = [t['objective'] for t in history]

    # Calculate cumulative best
    cumulative_best = []
    current_best = float('inf')
    for obj in objectives:
        current_best = min(current_best, obj)
        cumulative_best.append(current_best)

    plt.figure(figsize=(10, 6))
    plt.plot(trial_numbers, objectives, 'o-', alpha=0.5, label='Trial objective')
    plt.plot(trial_numbers, cumulative_best, 'r-', linewidth=2, label='Best so far')

    if target is not None:
        plt.axhline(y=0, color='g', linestyle='--', linewidth=2, label=f'Target (error = 0)')

    plt.xlabel('Trial Number')
    plt.ylabel('Objective Value (Error from Target)')
    plt.title('Optimization Convergence')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()

    plot_file = output_dir / 'convergence_plot.png'
    plt.savefig(plot_file, dpi=150)
    plt.close()

    return plot_file.name


def create_design_space_plot(history: List[Dict], output_dir: Path) -> str:
    """Create 2D design space exploration plot."""
    first_trial = history[0]
    var_names = list(first_trial['design_variables'].keys())

    if len(var_names) != 2:
        return None  # Only works for 2D problems

    var1_name, var2_name = var_names
    var1_values = [t['design_variables'][var1_name] for t in history]
    var2_values = [t['design_variables'][var2_name] for t in history]
    objectives = [t['objective'] for t in history]

    plt.figure(figsize=(10, 8))
    scatter = plt.scatter(var1_values, var2_values, c=objectives, s=100,
                          cmap='viridis', alpha=0.6, edgecolors='black')
    plt.colorbar(scatter, label='Objective Value')
    plt.xlabel(var1_name.replace('_', ' ').title())
    plt.ylabel(var2_name.replace('_', ' ').title())
    plt.title('Design Space Exploration')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()

    plot_file = output_dir / 'design_space_plot.png'
    plt.savefig(plot_file, dpi=150)
    plt.close()

    return plot_file.name


def create_parameter_sensitivity_plot(history: List[Dict], output_dir: Path) -> str:
    """Create parameter sensitivity plots."""
    first_trial = history[0]
    var_names = list(first_trial['design_variables'].keys())

    fig, axes = plt.subplots(1, len(var_names), figsize=(6*len(var_names), 5))
    if len(var_names) == 1:
        axes = [axes]

    for idx, var_name in enumerate(var_names):
        var_values = [t['design_variables'][var_name] for t in history]
        objectives = [t['objective'] for t in history]

        axes[idx].scatter(var_values, objectives, alpha=0.6, s=50)
        axes[idx].set_xlabel(var_name.replace('_', ' ').title())
        axes[idx].set_ylabel('Objective Value')
        axes[idx].set_title(f'Sensitivity to {var_name.replace("_", " ").title()}')
        axes[idx].grid(True, alpha=0.3)

    plt.tight_layout()
    plot_file = output_dir / 'parameter_sensitivity.png'
    plt.savefig(plot_file, dpi=150)
    plt.close()

    return plot_file.name


def create_optuna_plots(study: optuna.Study, output_dir: Path) -> Dict[str, str]:
    """
    Create professional Optuna visualization plots.

    Args:
        study: Optuna study object
        output_dir: Directory to save plots

    Returns:
        Dictionary mapping plot names to filenames
    """
    plots = {}

    try:
        # 1. Parallel Coordinate Plot - shows parameter interactions
        fig = plot_parallel_coordinate(study)
        if fig is not None:
            plot_file = output_dir / 'optuna_parallel_coordinate.png'
            fig.write_image(str(plot_file), width=1200, height=600)
            plots['parallel_coordinate'] = plot_file.name
    except Exception as e:
        print(f"Warning: Could not create parallel coordinate plot: {e}")

    try:
        # 2. Optimization History - convergence over trials
        fig = plot_optimization_history(study)
        if fig is not None:
            plot_file = output_dir / 'optuna_optimization_history.png'
            fig.write_image(str(plot_file), width=1000, height=600)
            plots['optimization_history'] = plot_file.name
    except Exception as e:
        print(f"Warning: Could not create optimization history plot: {e}")

    try:
        # 3. Parameter Importances - which parameters matter most
        fig = plot_param_importances(study)
        if fig is not None:
            plot_file = output_dir / 'optuna_param_importances.png'
            fig.write_image(str(plot_file), width=800, height=500)
            plots['param_importances'] = plot_file.name
    except Exception as e:
        print(f"Warning: Could not create parameter importance plot: {e}")

    try:
        # 4. Slice Plot - individual parameter effects
        fig = plot_slice(study)
        if fig is not None:
            plot_file = output_dir / 'optuna_slice.png'
            fig.write_image(str(plot_file), width=1000, height=600)
            plots['slice'] = plot_file.name
    except Exception as e:
        print(f"Warning: Could not create slice plot: {e}")

    try:
        # 5. Contour Plot - parameter interaction heatmap (2D only)
        if len(study.best_params) == 2:
            fig = plot_contour(study)
            if fig is not None:
                plot_file = output_dir / 'optuna_contour.png'
                fig.write_image(str(plot_file), width=800, height=800)
                plots['contour'] = plot_file.name
    except Exception as e:
        print(f"Warning: Could not create contour plot: {e}")

    return plots


def generate_markdown_report(history_file: Path, target_value: Optional[float] = None,
                             tolerance: float = 0.1, reports_dir: Optional[Path] = None,
                             study: Optional[optuna.Study] = None) -> str:
    """Generate comprehensive markdown optimization report with graphs."""

    # Load history
    with open(history_file) as f:
        history = json.load(f)

    if not history:
        return "# Optimization Report\n\nNo optimization history found."

    # Graphs should be saved to 3_reports/ folder (same as markdown file)
    study_dir = history_file.parent.parent
    study_name = study_dir.name

    if reports_dir is None:
        reports_dir = study_dir / "3_reports"
    reports_dir.mkdir(parents=True, exist_ok=True)

    # Load phase transition and confidence history if available
    results_dir = study_dir / "2_results"
    phase_transitions = []
    confidence_history = []
    phase_transition_file = results_dir / "phase_transitions.json"
    confidence_history_file = results_dir / "confidence_history.json"

    if phase_transition_file.exists():
        try:
            with open(phase_transition_file) as f:
                phase_transitions = json.load(f)
        except Exception:
            pass

    if confidence_history_file.exists():
        try:
            with open(confidence_history_file) as f:
                confidence_history = json.load(f)
        except Exception:
            pass

    # Generate plots in reports folder
    convergence_plot = create_convergence_plot(history, target_value, reports_dir)
    design_space_plot = create_design_space_plot(history, reports_dir)
    sensitivity_plot = create_parameter_sensitivity_plot(history, reports_dir)

    # Generate confidence progression plot if data available
    confidence_plot = None
    if confidence_history:
        print("  Generating confidence progression plot...")
        confidence_plot = create_confidence_progression_plot(confidence_history, phase_transitions, reports_dir)

    # Generate Optuna plots if study object provided
    optuna_plots = {}
    if study is not None:
        print("  Generating Optuna visualization plots...")
        optuna_plots = create_optuna_plots(study, reports_dir)
        print(f"  Generated {len(optuna_plots)} Optuna plots")

    # Build markdown report
    lines = []
    lines.append(f"# {study_name.replace('_', ' ').title()} - Optimization Report")
    lines.append("")
    lines.append(f"**Total Trials**: {len(history)}")
    lines.append("")

    # Study information
    lines.append("## Study Information")
    lines.append("")
    first_trial = history[0]
    design_vars = list(first_trial['design_variables'].keys())
    lines.append(f"- **Design Variables**: {', '.join([v.replace('_', ' ').title() for v in design_vars])}")
    lines.append(f"- **Number of Trials**: {len(history)}")
    lines.append("")

    # Adaptive optimization strategy information
    if phase_transitions or confidence_history:
        lines.append("## Adaptive Optimization Strategy")
        lines.append("")
        lines.append("This study used adaptive surrogate-based optimization with confidence-driven phase transitions.")
        lines.append("")

        if phase_transitions:
            lines.append("### Phase Transitions")
            lines.append("")
            for transition in phase_transitions:
                trial_num = transition['trial_number']
                conf = transition['confidence_metrics']['overall_confidence']
                lines.append(f"- **Trial #{trial_num}**: EXPLORATION → EXPLOITATION")
                lines.append(f"  - Confidence at transition: {conf:.1%}")
                lines.append(f"  - Convergence score: {transition['confidence_metrics']['convergence_score']:.1%}")
                lines.append(f"  - Exploration coverage: {transition['confidence_metrics']['exploration_coverage']:.1%}")
                lines.append(f"  - Prediction stability: {transition['confidence_metrics']['prediction_stability']:.1%}")
                lines.append("")
        else:
            lines.append("### Phase Transitions")
            lines.append("")
            lines.append("No phase transitions occurred - optimization remained in exploration phase.")
            lines.append("This may indicate:")
            lines.append("- Insufficient trials to build surrogate confidence")
            lines.append("- Poor exploration coverage of the design space")
            lines.append("- Unstable convergence behavior")
            lines.append("")

        if confidence_plot:
            lines.append("### Confidence Progression")
            lines.append("")
            lines.append(f"![Confidence Progression]({confidence_plot})")
            lines.append("")
            lines.append("This plot shows how the surrogate model confidence evolved over the optimization.")
            lines.append("The red vertical line (if present) marks the transition to exploitation phase.")
            lines.append("")
        lines.append("")

    # Best result
    objectives = [t['objective'] for t in history]
    best_idx = np.argmin(objectives)
    best_trial = history[best_idx]

    lines.append("## Best Result")
    lines.append("")
    lines.append(f"- **Trial**: #{best_trial['trial_number']}")
    lines.append("")

    # Show actual results FIRST (what the client cares about)
    lines.append("### Achieved Performance")
    for result, value in best_trial['results'].items():
        metric_name = result.replace('_', ' ').title()
        lines.append(f"- **{metric_name}**: {value:.4f}")

        # Show target comparison if available
        if target_value is not None and 'frequency' in result.lower():
            error = abs(value - target_value)
            lines.append(f"  - Target: {target_value:.4f}")
            lines.append(f"  - Error: {error:.4f} ({(error/target_value*100):.2f}%)")
    lines.append("")

    # Then design parameters that achieved it
    lines.append("### Design Parameters")
    for var, value in best_trial['design_variables'].items():
        lines.append(f"- **{var.replace('_', ' ').title()}**: {value:.4f}")
    lines.append("")

    # Technical objective last (for engineers)
    lines.append("<details>")
    lines.append("<summary>Technical Details (Objective Function)</summary>")
    lines.append("")
    lines.append(f"- **Objective Value (Error)**: {best_trial['objective']:.6f}")
    lines.append("")
    lines.append("</details>")
    lines.append("")

    # Success assessment
    if target_value is not None:
        lines.append("## Success Assessment")
        lines.append("")
        best_objective = min(objectives)

        if best_objective <= tolerance:
            lines.append(f"### ✅ TARGET ACHIEVED")
            lines.append("")
            lines.append(f"Target value {target_value} was achieved within tolerance {tolerance}!")
            lines.append(f"- **Best Error**: {best_objective:.6f}")
        else:
            lines.append(f"### ⚠️ TARGET NOT YET ACHIEVED")
            lines.append("")
            lines.append(f"Target value {target_value} not achieved within tolerance {tolerance}")
            lines.append(f"- **Best Error**: {best_objective:.6f}")
            lines.append(f"- **Required Improvement**: {best_objective - tolerance:.6f}")
            lines.append(f"- **Recommendation**: Continue optimization with more trials")
        lines.append("")

    # Top 5 trials - show ACTUAL METRICS not just objective
    lines.append("## Top 5 Trials")
    lines.append("")
    sorted_history = sorted(history, key=lambda x: x['objective'])

    # Extract result column names (e.g., "first_frequency")
    result_cols = list(sorted_history[0]['results'].keys())
    result_col_names = [r.replace('_', ' ').title() for r in result_cols]

    # Build header with results AND design vars
    header_cols = ["Rank", "Trial"] + result_col_names + [v.replace('_', ' ').title() for v in design_vars]
    lines.append("| " + " | ".join(header_cols) + " |")
    lines.append("|" + "|".join(["-"*max(6, len(c)) for c in header_cols]) + "|")

    for i, trial in enumerate(sorted_history[:5], 1):
        result_vals = [f"{trial['results'][r]:.2f}" for r in result_cols]
        var_vals = [f"{trial['design_variables'][v]:.2f}" for v in design_vars]
        row_data = [str(i), f"#{trial['trial_number']}"] + result_vals + var_vals
        lines.append("| " + " | ".join(row_data) + " |")
    lines.append("")

    # Statistics
    lines.append("## Statistics")
    lines.append("")
    lines.append(f"- **Mean Objective**: {np.mean(objectives):.6f}")
    lines.append(f"- **Std Deviation**: {np.std(objectives):.6f}")
    lines.append(f"- **Best Objective**: {np.min(objectives):.6f}")
    lines.append(f"- **Worst Objective**: {np.max(objectives):.6f}")
    lines.append("")

    # Design variable ranges
    lines.append("### Design Variable Ranges")
    lines.append("")
    for var in design_vars:
        values = [t['design_variables'][var] for t in history]
        lines.append(f"**{var.replace('_', ' ').title()}**:")
        lines.append(f"- Min: {min(values):.6f}")
        lines.append(f"- Max: {max(values):.6f}")
        lines.append(f"- Mean: {np.mean(values):.6f}")
        lines.append("")

    # Convergence plot
    lines.append("## Convergence Plot")
    lines.append("")
    lines.append(f"![Convergence Plot]({convergence_plot})")
    lines.append("")
    lines.append("This plot shows how the optimization converged over time. The blue line shows each trial's objective value, while the red line shows the best objective found so far.")
    lines.append("")

    # Design space plot
    if design_space_plot:
        lines.append("## Design Space Exploration")
        lines.append("")
        lines.append(f"![Design Space Plot]({design_space_plot})")
        lines.append("")
        lines.append("This plot shows which regions of the design space were explored. Darker colors indicate better objective values.")
        lines.append("")

    # Sensitivity plot
    lines.append("## Parameter Sensitivity")
    lines.append("")
    lines.append(f"![Parameter Sensitivity]({sensitivity_plot})")
    lines.append("")
    lines.append("These plots show how each design variable affects the objective value. Steeper slopes indicate higher sensitivity.")
    lines.append("")

    # Optuna Advanced Visualizations
    if optuna_plots:
        lines.append("## Advanced Optimization Analysis (Optuna)")
        lines.append("")
        lines.append("The following plots leverage Optuna's professional visualization library to provide deeper insights into the optimization process.")
        lines.append("")

        # Parallel Coordinate Plot
        if 'parallel_coordinate' in optuna_plots:
            lines.append("### Parallel Coordinate Plot")
            lines.append("")
            lines.append(f"![Parallel Coordinate]({optuna_plots['parallel_coordinate']})")
            lines.append("")
            lines.append("This interactive plot shows how different parameter combinations lead to different objective values. Each line represents one trial, colored by objective value. You can see parameter interactions and identify promising regions.")
            lines.append("")

        # Optimization History
        if 'optimization_history' in optuna_plots:
            lines.append("### Optimization History")
            lines.append("")
            lines.append(f"![Optimization History]({optuna_plots['optimization_history']})")
            lines.append("")
            lines.append("Professional visualization of convergence over trials, showing both individual trial performance and best value progression.")
            lines.append("")

        # Parameter Importance
        if 'param_importances' in optuna_plots:
            lines.append("### Parameter Importance Analysis")
            lines.append("")
            lines.append(f"![Parameter Importance]({optuna_plots['param_importances']})")
            lines.append("")
            lines.append("This analysis quantifies which design variables have the most impact on the objective. Based on fANOVA (functional ANOVA) or other importance metrics.")
            lines.append("")

        # Slice Plot
        if 'slice' in optuna_plots:
            lines.append("### Parameter Slice Analysis")
            lines.append("")
            lines.append(f"![Slice Plot]({optuna_plots['slice']})")
            lines.append("")
            lines.append("Shows how changing each parameter individually affects the objective value, with other parameters held constant.")
            lines.append("")

        # Contour Plot
        if 'contour' in optuna_plots:
            lines.append("### Parameter Interaction Contour")
            lines.append("")
            lines.append(f"![Contour Plot]({optuna_plots['contour']})")
            lines.append("")
            lines.append("2D heatmap showing how combinations of two parameters affect the objective. Reveals interaction effects and optimal regions.")
            lines.append("")

    # Trial history table - show actual results
    lines.append("## Complete Trial History")
    lines.append("")
    lines.append("<details>")
    lines.append("<summary>Click to expand full trial history</summary>")
    lines.append("")

    # Build complete history table with results
    history_header = ["Trial"] + result_col_names + [v.replace('_', ' ').title() for v in design_vars]
    lines.append("| " + " | ".join(history_header) + " |")
    lines.append("|" + "|".join(["-"*max(6, len(c)) for c in history_header]) + "|")

    for trial in history:
        result_vals = [f"{trial['results'][r]:.2f}" for r in result_cols]
        var_vals = [f"{trial['design_variables'][v]:.2f}" for v in design_vars]
        row_data = [f"#{trial['trial_number']}"] + result_vals + var_vals
        lines.append("| " + " | ".join(row_data) + " |")

    lines.append("")
    lines.append("</details>")
    lines.append("")

    # Footer
    lines.append("---")
    lines.append("")
    lines.append(f"*Report generated automatically by Atomizer optimization system*")

    return '\n'.join(lines)


def main():
    """Command-line interface."""
    if len(sys.argv) < 2:
        print("Usage: python generate_report_markdown.py <history_file> [target_value] [tolerance]")
        sys.exit(1)

    history_file = Path(sys.argv[1])
    if not history_file.exists():
        print(f"Error: History file not found: {history_file}")
        sys.exit(1)

    target_value = float(sys.argv[2]) if len(sys.argv) > 2 else None
    tolerance = float(sys.argv[3]) if len(sys.argv) > 3 else 0.1

    # Generate report
    report = generate_markdown_report(history_file, target_value, tolerance)

    # Save report
    report_file = history_file.parent / 'OPTIMIZATION_REPORT.md'
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write(report)

    print(f"Report saved to: {report_file}")


if __name__ == '__main__':
    main()