Atomizer/optimization_engine/reporting/visualizer.py

"""
Optimization Visualization System

Generates publication-quality plots for optimization results:
- Convergence plots
- Design space exploration
- Parallel coordinate plots
- Parameter sensitivity heatmaps
- Constraint violation tracking
"""

from pathlib import Path
from typing import Dict, List, Any, Optional
import json
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.figure import Figure
import pandas as pd
from datetime import datetime

# Configure matplotlib for publication quality
mpl.rcParams['figure.dpi'] = 150
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.size'] = 10
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['axes.labelsize'] = 10
mpl.rcParams['axes.titlesize'] = 11
mpl.rcParams['xtick.labelsize'] = 9
mpl.rcParams['ytick.labelsize'] = 9
mpl.rcParams['legend.fontsize'] = 9


class OptimizationVisualizer:
    """
    Generate comprehensive visualizations for optimization studies.

    Automatically creates:
    - Convergence plot (objective vs trials)
    - Design space exploration (parameter evolution)
    - Parallel coordinate plot (high-dimensional view)
    - Sensitivity heatmap (correlations)
    - Constraint violation tracking
    """

    def __init__(self, substudy_dir: Path):
        """
        Initialize visualizer for a substudy.

        Args:
            substudy_dir: Path to substudy directory containing history.json
        """
        self.substudy_dir = Path(substudy_dir)
        self.plots_dir = self.substudy_dir / 'plots'
        self.plots_dir.mkdir(exist_ok=True)

        # Load data
        self.history = self._load_history()
        self.config = self._load_config()
        self.df = self._history_to_dataframe()

    def _load_history(self) -> List[Dict]:
        """Load optimization history from JSON."""
        history_file = self.substudy_dir / 'history.json'
        if not history_file.exists():
            raise FileNotFoundError(f"History file not found: {history_file}")

        with open(history_file, 'r') as f:
            return json.load(f)

    def _load_config(self) -> Dict:
        """Load optimization configuration."""
        # Try to find config in parent directories
        for parent in [self.substudy_dir, self.substudy_dir.parent, self.substudy_dir.parent.parent]:
            config_files = list(parent.glob('*config.json'))
            if config_files:
                with open(config_files[0], 'r') as f:
                    return json.load(f)

        # Return minimal config if not found
        return {'design_variables': {}, 'objectives': [], 'constraints': []}

    def _history_to_dataframe(self) -> pd.DataFrame:
        """Convert history to flat DataFrame for analysis."""
        rows = []
        for entry in self.history:
            row = {
                'trial': entry.get('trial_number'),
                'timestamp': entry.get('timestamp'),
                'total_objective': entry.get('total_objective')
            }

            # Add design variables
            for var, val in entry.get('design_variables', {}).items():
                row[f'dv_{var}'] = val

            # Add objectives
            for obj, val in entry.get('objectives', {}).items():
                row[f'obj_{obj}'] = val

            # Add constraints
            for const, val in entry.get('constraints', {}).items():
                row[f'const_{const}'] = val

            rows.append(row)

        return pd.DataFrame(rows)

    def generate_all_plots(self, save_formats: List[str] = ['png', 'pdf']) -> Dict[str, List[Path]]:
        """
        Generate all visualization plots.

        Args:
            save_formats: List of formats to save plots in (png, pdf, svg)

        Returns:
            Dictionary mapping plot type to list of saved file paths
        """
        saved_files = {}

        print(f"Generating plots in: {self.plots_dir}")

        # 1. Convergence plot
        print("  - Generating convergence plot...")
        saved_files['convergence'] = self.plot_convergence(save_formats)

        # 2. Design space exploration
        print("  - Generating design space exploration...")
        saved_files['design_space'] = self.plot_design_space(save_formats)

        # 3. Parallel coordinate plot
        print("  - Generating parallel coordinate plot...")
        saved_files['parallel_coords'] = self.plot_parallel_coordinates(save_formats)

        # 4. Sensitivity heatmap
        print("  - Generating sensitivity heatmap...")
        saved_files['sensitivity'] = self.plot_sensitivity_heatmap(save_formats)

        # 5. Constraint violations (if constraints exist)
        if any('const_' in col for col in self.df.columns):
            print("  - Generating constraint violation plot...")
            saved_files['constraints'] = self.plot_constraint_violations(save_formats)

        # 6. Objective breakdown (if multi-objective)
        obj_cols = [col for col in self.df.columns if col.startswith('obj_')]
        if len(obj_cols) > 1:
            print("  - Generating objective breakdown...")
            saved_files['objectives'] = self.plot_objective_breakdown(save_formats)

        print(f"SUCCESS: All plots saved to: {self.plots_dir}")
        return saved_files

    def plot_convergence(self, save_formats: List[str] = ['png']) -> List[Path]:
        """
        Plot optimization convergence: objective value vs trial number.
        Shows both individual trials and running best.
        """
        fig, ax = plt.subplots(figsize=(10, 6))

        trials = self.df['trial'].values
        objectives = self.df['total_objective'].values

        # Calculate running best
        running_best = np.minimum.accumulate(objectives)

        # Plot individual trials
        ax.scatter(trials, objectives, alpha=0.6, s=30, color='steelblue',
                   label='Trial objective', zorder=2)

        # Plot running best
        ax.plot(trials, running_best, color='darkred', linewidth=2,
                label='Running best', zorder=3)

        # Highlight best trial
        best_idx = np.argmin(objectives)
        ax.scatter(trials[best_idx], objectives[best_idx],
                   color='gold', s=200, marker='*', edgecolors='black',
                   linewidths=1.5, label='Best trial', zorder=4)

        ax.set_xlabel('Trial Number')
        ax.set_ylabel('Total Objective Value')
        ax.set_title('Optimization Convergence')
        ax.legend(loc='best')
        ax.grid(True, alpha=0.3)

        # Add improvement annotation
        improvement = (objectives[0] - objectives[best_idx]) / objectives[0] * 100
        ax.text(0.02, 0.98, f'Improvement: {improvement:.1f}%\nBest trial: {trials[best_idx]}',
                transform=ax.transAxes, verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

        plt.tight_layout()
        return self._save_figure(fig, 'convergence', save_formats)

    def plot_design_space(self, save_formats: List[str] = ['png']) -> List[Path]:
        """
        Plot design variable evolution over trials.
        Shows how parameters change during optimization.
        """
        dv_cols = [col for col in self.df.columns if col.startswith('dv_')]
        n_vars = len(dv_cols)

        if n_vars == 0:
            print("  Warning: No design variables found, skipping design space plot")
            return []

        # Create subplots
        fig, axes = plt.subplots(n_vars, 1, figsize=(10, 3*n_vars), sharex=True)
        if n_vars == 1:
            axes = [axes]

        trials = self.df['trial'].values
        objectives = self.df['total_objective'].values
        best_idx = np.argmin(objectives)

        for idx, col in enumerate(dv_cols):
            ax = axes[idx]
            var_name = col.replace('dv_', '')
            values = self.df[col].values

            # Color points by objective value (normalized)
            norm = mpl.colors.Normalize(vmin=objectives.min(), vmax=objectives.max())
            colors = plt.cm.viridis_r(norm(objectives))  # reversed so better = darker

            # Plot evolution
            scatter = ax.scatter(trials, values, c=colors, s=40, alpha=0.7,
                                edgecolors='black', linewidths=0.5)

            # Highlight best trial
            ax.scatter(trials[best_idx], values[best_idx],
                       color='gold', s=200, marker='*', edgecolors='black',
                       linewidths=1.5, zorder=10)

            # Get units from config
            units = self.config.get('design_variables', {}).get(var_name, {}).get('units', '')
            ylabel = f'{var_name}'
            if units:
                ylabel += f' [{units}]'

            ax.set_ylabel(ylabel)
            ax.grid(True, alpha=0.3)

            # Add colorbar for first subplot
            if idx == 0:
                cbar = plt.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap='viridis_r'),
                                    ax=ax, orientation='horizontal', pad=0.1)
                cbar.set_label('Objective Value (darker = better)')

        axes[-1].set_xlabel('Trial Number')
        fig.suptitle('Design Space Exploration', fontsize=12, y=1.0)
        plt.tight_layout()

        return self._save_figure(fig, 'design_space_evolution', save_formats)

    def plot_parallel_coordinates(self, save_formats: List[str] = ['png']) -> List[Path]:
        """
        Parallel coordinate plot showing high-dimensional design space.
        Each line represents one trial, colored by objective value.
        """
        # Get design variables and objective
        dv_cols = [col for col in self.df.columns if col.startswith('dv_')]

        if len(dv_cols) == 0:
            print("  Warning: No design variables found, skipping parallel coordinates plot")
            return []

        # Prepare data: normalize all columns to [0, 1]
        plot_data = self.df[dv_cols + ['total_objective']].copy()

        # Normalize each column
        normalized = pd.DataFrame()
        for col in plot_data.columns:
            col_min = plot_data[col].min()
            col_max = plot_data[col].max()
            if col_max > col_min:
                normalized[col] = (plot_data[col] - col_min) / (col_max - col_min)
            else:
                normalized[col] = 0.5  # If constant, put in middle

        # Create figure
        fig, ax = plt.subplots(figsize=(12, 6))

        # Setup x-axis
        n_vars = len(normalized.columns)
        x_positions = np.arange(n_vars)

        # Color by objective value
        objectives = self.df['total_objective'].values
        norm = mpl.colors.Normalize(vmin=objectives.min(), vmax=objectives.max())
        colormap = plt.cm.viridis_r

        # Plot each trial as a line
        for idx in range(len(normalized)):
            values = normalized.iloc[idx].values
            color = colormap(norm(objectives[idx]))
            ax.plot(x_positions, values, color=color, alpha=0.3, linewidth=1)

        # Highlight best trial
        best_idx = np.argmin(objectives)
        best_values = normalized.iloc[best_idx].values
        ax.plot(x_positions, best_values, color='gold', linewidth=3,
                label='Best trial', zorder=10, marker='o', markersize=8,
                markeredgecolor='black', markeredgewidth=1.5)

        # Setup axes
        ax.set_xticks(x_positions)
        labels = [col.replace('dv_', '').replace('_', '\n') for col in dv_cols] + ['Objective']
        ax.set_xticklabels(labels, rotation=0, ha='center')
        ax.set_ylabel('Normalized Value [0-1]')
        ax.set_title('Parallel Coordinate Plot - Design Space Overview')
        ax.set_ylim(-0.05, 1.05)
        ax.grid(True, alpha=0.3, axis='y')
        ax.legend(loc='best')

        # Add colorbar
        sm = mpl.cm.ScalarMappable(cmap=colormap, norm=norm)
        sm.set_array([])
        cbar = plt.colorbar(sm, ax=ax, orientation='vertical', pad=0.02)
        cbar.set_label('Objective Value (darker = better)')

        plt.tight_layout()
        return self._save_figure(fig, 'parallel_coordinates', save_formats)

    def plot_sensitivity_heatmap(self, save_formats: List[str] = ['png']) -> List[Path]:
        """
        Correlation heatmap showing sensitivity between design variables and objectives.
        """
        # Get numeric columns
        dv_cols = [col for col in self.df.columns if col.startswith('dv_')]
        obj_cols = [col for col in self.df.columns if col.startswith('obj_')]

        if not dv_cols or not obj_cols:
            print("  Warning: Insufficient data for sensitivity heatmap, skipping")
            return []

        # Calculate correlation matrix
        analysis_cols = dv_cols + obj_cols + ['total_objective']
        corr_matrix = self.df[analysis_cols].corr()

        # Extract DV vs Objective correlations
        sensitivity = corr_matrix.loc[dv_cols, obj_cols + ['total_objective']]

        # Create heatmap
        fig, ax = plt.subplots(figsize=(10, max(6, len(dv_cols) * 0.6)))

        im = ax.imshow(sensitivity.values, cmap='RdBu_r', vmin=-1, vmax=1, aspect='auto')

        # Set ticks
        ax.set_xticks(np.arange(len(sensitivity.columns)))
        ax.set_yticks(np.arange(len(sensitivity.index)))

        # Labels
        x_labels = [col.replace('obj_', '').replace('_', ' ') for col in sensitivity.columns]
        y_labels = [col.replace('dv_', '').replace('_', ' ') for col in sensitivity.index]
        ax.set_xticklabels(x_labels, rotation=45, ha='right')
        ax.set_yticklabels(y_labels)

        # Add correlation values as text
        for i in range(len(sensitivity.index)):
            for j in range(len(sensitivity.columns)):
                value = sensitivity.values[i, j]
                color = 'white' if abs(value) > 0.5 else 'black'
                ax.text(j, i, f'{value:.2f}', ha='center', va='center',
                       color=color, fontsize=9)

        ax.set_title('Parameter Sensitivity Analysis\n(Correlation: Design Variables vs Objectives)')

        # Colorbar
        cbar = plt.colorbar(im, ax=ax)
        cbar.set_label('Correlation Coefficient', rotation=270, labelpad=20)

        plt.tight_layout()
        return self._save_figure(fig, 'sensitivity_heatmap', save_formats)

    def plot_constraint_violations(self, save_formats: List[str] = ['png']) -> List[Path]:
        """
        Plot constraint violations over trials.
        """
        const_cols = [col for col in self.df.columns if col.startswith('const_')]

        if not const_cols:
            return []

        fig, ax = plt.subplots(figsize=(10, 6))

        trials = self.df['trial'].values

        for col in const_cols:
            const_name = col.replace('const_', '').replace('_', ' ')
            values = self.df[col].values

            # Plot constraint value
            ax.plot(trials, values, marker='o', markersize=4,
                   label=const_name, alpha=0.7, linewidth=1.5)

        ax.axhline(y=0, color='red', linestyle='--', linewidth=2,
                   label='Feasible threshold', zorder=1)

        ax.set_xlabel('Trial Number')
        ax.set_ylabel('Constraint Value (< 0 = satisfied)')
        ax.set_title('Constraint Violations Over Trials')
        ax.legend(loc='best')
        ax.grid(True, alpha=0.3)

        plt.tight_layout()
        return self._save_figure(fig, 'constraint_violations', save_formats)

    def plot_objective_breakdown(self, save_formats: List[str] = ['png']) -> List[Path]:
        """
        Stacked area plot showing individual objective contributions.
        """
        obj_cols = [col for col in self.df.columns if col.startswith('obj_')]

        if len(obj_cols) < 2:
            return []

        fig, ax = plt.subplots(figsize=(10, 6))

        trials = self.df['trial'].values

        # Normalize objectives for stacking
        obj_data = self.df[obj_cols].values.T

        ax.stackplot(trials, *obj_data,
                     labels=[col.replace('obj_', '').replace('_', ' ') for col in obj_cols],
                     alpha=0.7)

        # Also plot total
        ax.plot(trials, self.df['total_objective'].values,
               color='black', linewidth=2, linestyle='--',
               label='Total objective', zorder=10)

        ax.set_xlabel('Trial Number')
        ax.set_ylabel('Objective Value')
        ax.set_title('Multi-Objective Breakdown')
        ax.legend(loc='best')
        ax.grid(True, alpha=0.3)

        plt.tight_layout()
        return self._save_figure(fig, 'objective_breakdown', save_formats)

    def _save_figure(self, fig: Figure, name: str, formats: List[str]) -> List[Path]:
        """
        Save figure in multiple formats.

        Args:
            fig: Matplotlib figure
            name: Base filename (without extension)
            formats: List of file formats (png, pdf, svg)

        Returns:
            List of saved file paths
        """
        saved_paths = []
        for fmt in formats:
            filepath = self.plots_dir / f'{name}.{fmt}'
            fig.savefig(filepath, bbox_inches='tight')
            saved_paths.append(filepath)

        plt.close(fig)
        return saved_paths

    def generate_plot_summary(self) -> Dict[str, Any]:
        """
        Generate summary statistics for inclusion in reports.

        Returns:
            Dictionary with key statistics and insights
        """
        objectives = self.df['total_objective'].values
        trials = self.df['trial'].values

        best_idx = np.argmin(objectives)
        best_trial = int(trials[best_idx])
        best_value = float(objectives[best_idx])
        initial_value = float(objectives[0])
        improvement_pct = (initial_value - best_value) / initial_value * 100

        # Convergence metrics
        running_best = np.minimum.accumulate(objectives)
        improvements = np.diff(running_best)
        significant_improvements = np.sum(improvements < -0.01 * initial_value)  # >1% improvement

        # Design variable ranges
        dv_cols = [col for col in self.df.columns if col.startswith('dv_')]
        dv_exploration = {}
        for col in dv_cols:
            var_name = col.replace('dv_', '')
            values = self.df[col].values
            dv_exploration[var_name] = {
                'min_explored': float(values.min()),
                'max_explored': float(values.max()),
                'best_value': float(values[best_idx]),
                'range_coverage': float((values.max() - values.min()))
            }

        summary = {
            'total_trials': int(len(trials)),
            'best_trial': best_trial,
            'best_objective': best_value,
            'initial_objective': initial_value,
            'improvement_percent': improvement_pct,
            'significant_improvements': int(significant_improvements),
            'design_variable_exploration': dv_exploration,
            'convergence_rate': float(np.mean(np.abs(improvements[:10]))) if len(improvements) > 10 else 0.0,
            'timestamp': datetime.now().isoformat()
        }

        # Save summary
        summary_file = self.plots_dir / 'plot_summary.json'
        with open(summary_file, 'w') as f:
            json.dump(summary, f, indent=2)

        return summary


def generate_plots_for_substudy(substudy_dir: Path, formats: List[str] = ['png', 'pdf']):
    """
    Convenience function to generate all plots for a substudy.

    Args:
        substudy_dir: Path to substudy directory
        formats: List of save formats

    Returns:
        OptimizationVisualizer instance
    """
    visualizer = OptimizationVisualizer(substudy_dir)
    visualizer.generate_all_plots(save_formats=formats)
    summary = visualizer.generate_plot_summary()

    print(f"\n{'='*60}")
    print(f"VISUALIZATION SUMMARY")
    print(f"{'='*60}")
    print(f"Total trials: {summary['total_trials']}")
    print(f"Best trial: {summary['best_trial']}")
    print(f"Improvement: {summary['improvement_percent']:.2f}%")
    print(f"Plots saved to: {visualizer.plots_dir}")
    print(f"{'='*60}\n")

    return visualizer


if __name__ == '__main__':
    import sys

    if len(sys.argv) < 2:
        print("Usage: python visualizer.py <substudy_directory> [formats...]")
        print("Example: python visualizer.py studies/beam/substudies/opt1 png pdf")
        sys.exit(1)

    substudy_path = Path(sys.argv[1])
    formats = sys.argv[2:] if len(sys.argv) > 2 else ['png', 'pdf']

    generate_plots_for_substudy(substudy_path, formats)