""" Interactive HTML Report Generator ================================= Generates professional, interactive HTML reports for optimization studies using: - Plotly for interactive charts (zoom, pan, hover) - Tailwind CSS for styling - Self-contained HTML (works offline) Features: - Executive summary with key metrics - Interactive convergence plot - Pareto front visualization (multi-objective) - Parameter importance analysis - Parallel coordinates plot - Design comparison table - Export to PDF option Usage: from optimization_engine.reporting.html_report import HTMLReportGenerator generator = HTMLReportGenerator(study_dir) report_path = generator.generate() """ from __future__ import annotations import json import logging from dataclasses import dataclass, field from datetime import datetime from pathlib import Path from typing import Optional, List, Dict, Any, Tuple import numpy as np logger = logging.getLogger(__name__) # Plotly CDN URL PLOTLY_CDN = "https://cdn.plot.ly/plotly-2.27.0.min.js" # Tailwind CDN URL TAILWIND_CDN = "https://cdn.tailwindcss.com" @dataclass class StudyData: """Loaded study data for report generation.""" study_name: str description: str = "" # Trials data trials: List[Dict[str, Any]] = field(default_factory=list) n_trials: int = 0 n_successful: int = 0 # Design variables design_variables: List[str] = field(default_factory=list) dv_bounds: Dict[str, Tuple[float, float]] = field(default_factory=dict) # Objectives objectives: List[str] = field(default_factory=list) is_multi_objective: bool = False # Constraints constraints: List[str] = field(default_factory=list) # Best results best_trial: Optional[Dict[str, Any]] = None best_values: Dict[str, float] = field(default_factory=dict) baseline_values: Dict[str, float] = field(default_factory=dict) # Pareto front (multi-objective) pareto_trials: List[Dict[str, Any]] = field(default_factory=list) # Timing total_runtime_seconds: float = 0.0 avg_trial_time: float = 0.0 # Metadata algorithm: str = "" created_at: Optional[datetime] = None completed_at: Optional[datetime] = None class HTMLReportGenerator: """ Generates interactive HTML reports for optimization studies. """ def __init__(self, study_dir: Path): """ Initialize the report generator. Args: study_dir: Path to the study directory """ self.study_dir = Path(study_dir) self.study_name = self.study_dir.name self.data: Optional[StudyData] = None def generate( self, output_dir: Optional[Path] = None, include_pdf: bool = False, ) -> Path: """ Generate the HTML report. Args: output_dir: Output directory (default: study_dir/4_report) include_pdf: Also generate PDF version Returns: Path to generated report """ logger.info(f"Generating report for: {self.study_name}") # Load study data self.data = self._load_study_data() # Set output directory if output_dir is None: output_dir = self.study_dir / "4_report" output_dir.mkdir(parents=True, exist_ok=True) # Generate HTML html_content = self._generate_html() # Save HTML report_path = output_dir / "index.html" report_path.write_text(html_content, encoding="utf-8") logger.info(f"Report saved to: {report_path}") # Save figures as separate files self._save_figures(output_dir) # Save data exports self._save_data_exports(output_dir) # Generate PDF if requested if include_pdf: self._generate_pdf(output_dir) return report_path def _load_study_data(self) -> StudyData: """Load all study data from files.""" data = StudyData(study_name=self.study_name) # Load spec/config spec = self._load_spec() if spec: data.description = spec.get("description", spec.get("meta", {}).get("description", "")) data.algorithm = spec.get("optimization_settings", {}).get( "sampler", spec.get("optimization", {}).get("algorithm", {}).get("type", "") ) # Extract design variables for dv in spec.get("design_variables", []): name = dv.get("parameter", dv.get("expression_name", dv.get("name", ""))) data.design_variables.append(name) bounds = dv.get("bounds", {}) if isinstance(bounds, dict): data.dv_bounds[name] = (bounds.get("min", 0), bounds.get("max", 1)) elif isinstance(bounds, (list, tuple)): data.dv_bounds[name] = tuple(bounds) # Extract objectives for obj in spec.get("objectives", []): data.objectives.append(obj.get("name", "objective")) data.is_multi_objective = len(data.objectives) > 1 # Extract constraints for const in spec.get("constraints", []): data.constraints.append(const.get("name", "constraint")) # Baseline values baseline = spec.get("baseline", {}) data.baseline_values = baseline # Load trials from database data.trials = self._load_trials_from_db() data.n_trials = len(data.trials) data.n_successful = len([t for t in data.trials if t.get("success", True)]) # Find best trial if data.trials: if data.is_multi_objective: # For multi-objective, find Pareto front data.pareto_trials = self._find_pareto_front(data.trials, data.objectives) data.best_trial = data.pareto_trials[0] if data.pareto_trials else data.trials[0] else: # Single objective - find minimum valid_trials = [t for t in data.trials if t.get("objectives")] if valid_trials: obj_name = data.objectives[0] if data.objectives else "objective" data.best_trial = min( valid_trials, key=lambda t: t.get("objectives", {}).get(obj_name, float("inf")), ) if data.best_trial: data.best_values = data.best_trial.get("objectives", {}) # Calculate timing if data.trials: times = [t.get("solve_time", 0) for t in data.trials if t.get("solve_time")] if times: data.total_runtime_seconds = sum(times) data.avg_trial_time = np.mean(times) return data def _load_spec(self) -> Optional[Dict[str, Any]]: """Load study specification.""" spec_paths = [ self.study_dir / "atomizer_spec.json", self.study_dir / "optimization_config.json", self.study_dir / "1_setup" / "optimization_config.json", ] for path in spec_paths: if path.exists(): with open(path) as f: return json.load(f) return None def _load_trials_from_db(self) -> List[Dict[str, Any]]: """Load trials from Optuna database.""" db_paths = [ self.study_dir / "3_results" / "study.db", self.study_dir / "2_results" / "study.db", ] for db_path in db_paths: if db_path.exists(): return self._query_optuna_db(db_path) # Fall back to JSON history return self._load_trials_from_json() def _query_optuna_db(self, db_path: Path) -> List[Dict[str, Any]]: """Query trials from Optuna SQLite database.""" import sqlite3 trials = [] try: conn = sqlite3.connect(str(db_path)) conn.row_factory = sqlite3.Row cursor = conn.cursor() # Get study ID cursor.execute("SELECT study_id FROM studies LIMIT 1") row = cursor.fetchone() if not row: return trials study_id = row["study_id"] # Get trials cursor.execute( """ SELECT trial_id, number, state, datetime_start, datetime_complete FROM trials WHERE study_id = ? ORDER BY number """, (study_id,), ) for row in cursor.fetchall(): trial = { "trial_id": row["trial_id"], "number": row["number"], "state": row["state"], "success": row["state"] == "COMPLETE", "params": {}, "objectives": {}, } # Get params cursor.execute( """ SELECT param_name, param_value FROM trial_params WHERE trial_id = ? """, (row["trial_id"],), ) for param_row in cursor.fetchall(): try: trial["params"][param_row["param_name"]] = float(param_row["param_value"]) except (ValueError, TypeError): trial["params"][param_row["param_name"]] = param_row["param_value"] # Get values (objectives) cursor.execute( """ SELECT objective, value FROM trial_values WHERE trial_id = ? """, (row["trial_id"],), ) for val_row in cursor.fetchall(): obj_idx = val_row["objective"] obj_name = f"objective_{obj_idx}" if obj_idx > 0 else "objective" trial["objectives"][obj_name] = val_row["value"] # Get user attributes cursor.execute( """ SELECT key, value_json FROM trial_user_attributes WHERE trial_id = ? """, (row["trial_id"],), ) for attr_row in cursor.fetchall(): try: trial[attr_row["key"]] = json.loads(attr_row["value_json"]) except (json.JSONDecodeError, TypeError): trial[attr_row["key"]] = attr_row["value_json"] trials.append(trial) conn.close() except Exception as e: logger.warning(f"Failed to query Optuna DB: {e}") return trials def _load_trials_from_json(self) -> List[Dict[str, Any]]: """Load trials from JSON history file.""" history_paths = [ self.study_dir / "3_results" / "optimization_history.json", self.study_dir / "2_results" / "optimization_history_incremental.json", ] for path in history_paths: if path.exists(): with open(path) as f: return json.load(f) return [] def _find_pareto_front( self, trials: List[Dict[str, Any]], objectives: List[str] ) -> List[Dict[str, Any]]: """Find Pareto-optimal trials.""" if not trials or not objectives: return [] pareto = [] for trial in trials: objs = trial.get("objectives", {}) if not all(obj in objs for obj in objectives): continue is_dominated = False for other in trials: other_objs = other.get("objectives", {}) if not all(obj in other_objs for obj in objectives): continue # Check if other dominates trial better_in_all = all( other_objs.get(obj, float("inf")) <= objs.get(obj, float("inf")) for obj in objectives ) better_in_one = any( other_objs.get(obj, float("inf")) < objs.get(obj, float("inf")) for obj in objectives ) if better_in_all and better_in_one: is_dominated = True break if not is_dominated: pareto.append(trial) return pareto def _generate_html(self) -> str: """Generate the complete HTML report.""" return f""" {self.data.study_name} - Optimization Report
{self._generate_header()} {self._generate_executive_summary()} {self._generate_convergence_section()} {self._generate_pareto_section() if self.data.is_multi_objective else ""} {self._generate_parameters_section()} {self._generate_best_designs_section()} {self._generate_parallel_coords_section()} {self._generate_footer()}
""" def _generate_header(self) -> str: """Generate report header.""" return f"""

{self.data.study_name}

{self.data.description or "Optimization Study Report"}

Generated: {datetime.now().strftime("%Y-%m-%d %H:%M")}

Algorithm: {self.data.algorithm or "N/A"}

Download CSV
""" def _generate_executive_summary(self) -> str: """Generate executive summary section.""" # Calculate improvement improvement = 0.0 if self.data.baseline_values and self.data.best_values and self.data.objectives: obj = self.data.objectives[0] baseline = self.data.baseline_values.get(obj, 0) best = self.data.best_values.get(obj, 0) if baseline > 0: improvement = ((baseline - best) / baseline) * 100 # Format runtime runtime_str = self._format_duration(self.data.total_runtime_seconds) # Best value display best_display = "" if self.data.best_values and self.data.objectives: obj = self.data.objectives[0] val = self.data.best_values.get(obj, 0) best_display = f"{val:.4g}" return f"""

Executive Summary

{self.data.n_trials}
Total Trials
{improvement:.1f}%
Improvement
{best_display}
Best {self.data.objectives[0] if self.data.objectives else "Objective"}
{runtime_str}
Total Runtime
{self._generate_summary_details()}
""" def _generate_summary_details(self) -> str: """Generate detailed summary info.""" dv_list = ", ".join(self.data.design_variables[:5]) if len(self.data.design_variables) > 5: dv_list += f" (+{len(self.data.design_variables) - 5} more)" return f"""

Study Configuration

Performance

""" def _generate_convergence_section(self) -> str: """Generate convergence plot section.""" return """

Convergence History

Hover over points to see trial details. Use mouse to zoom and pan.

""" def _generate_pareto_section(self) -> str: """Generate Pareto front section (multi-objective only).""" if not self.data.is_multi_objective: return "" return f"""

Pareto Front

{len(self.data.pareto_trials)} Pareto-optimal solutions found. These represent the best trade-offs between objectives.

""" def _generate_parameters_section(self) -> str: """Generate parameter importance section.""" return """

Parameter Analysis

""" def _generate_best_designs_section(self) -> str: """Generate best designs table.""" if not self.data.trials: return "" # Get top 10 trials valid_trials = [t for t in self.data.trials if t.get("objectives")] if self.data.is_multi_objective: top_trials = self.data.pareto_trials[:10] else: obj = self.data.objectives[0] if self.data.objectives else "objective" top_trials = sorted( valid_trials, key=lambda t: t.get("objectives", {}).get(obj, float("inf")) )[:10] # Build table rows rows = [] for i, trial in enumerate(top_trials, 1): params_cells = [] for dv in self.data.design_variables[:4]: val = trial.get("params", {}).get(dv, "N/A") if isinstance(val, (int, float)): cell_content = f"{val:.3f}" else: cell_content = "N/A" params_cells.append(f'{cell_content}') obj_cells = [] for obj_name in self.data.objectives[:2]: val = trial.get("objectives", {}).get(obj_name, "N/A") if isinstance(val, (int, float)): cell_content = f"{val:.4f}" else: cell_content = "N/A" obj_cells.append( f'{cell_content}' ) row_class = "bg-green-50" if i == 1 else "hover:bg-gray-50" trial_num = trial.get("number", i) rows.append(f""" {i} #{trial_num} {"".join(params_cells)} {"".join(obj_cells)} """) # Build header dv_headers = "".join( [f'{dv[:12]}' for dv in self.data.design_variables[:4]] ) obj_headers = "".join( [f'{obj[:12]}' for obj in self.data.objectives[:2]] ) return f"""

Top Designs

{dv_headers} {obj_headers} {"".join(rows)}
Rank Trial

Best design highlighted in green. Full data available in CSV export.

""" def _generate_parallel_coords_section(self) -> str: """Generate parallel coordinates plot section.""" return """

Design Space Exploration

Each line represents a trial. Color indicates objective value. Drag on axes to filter.

""" def _generate_footer(self) -> str: """Generate report footer.""" return f""" """ def _generate_plotly_scripts(self) -> str: """Generate Plotly JavaScript for all charts.""" return f""" // Convergence Plot {self._generate_convergence_plot_js()} // Pareto Plot (if multi-objective) {self._generate_pareto_plot_js() if self.data.is_multi_objective else ""} // Importance Plot {self._generate_importance_plot_js()} // Distribution Plot {self._generate_distribution_plot_js()} // Parallel Coordinates {self._generate_parallel_plot_js()} """ def _generate_convergence_plot_js(self) -> str: """Generate JavaScript for convergence plot.""" if not self.data.trials or not self.data.objectives: return "// No data for convergence plot" obj = self.data.objectives[0] # Extract data trial_numbers = [] values = [] best_so_far = [] current_best = float("inf") for trial in self.data.trials: num = trial.get("number", len(trial_numbers)) val = trial.get("objectives", {}).get(obj) if val is not None and not np.isinf(val): trial_numbers.append(num) values.append(val) current_best = min(current_best, val) best_so_far.append(current_best) return f""" var convergenceData = [ {{ x: {json.dumps(trial_numbers)}, y: {json.dumps(values)}, mode: 'markers', type: 'scatter', name: 'All Trials', marker: {{ color: 'rgba(99, 102, 241, 0.5)', size: 8 }} }}, {{ x: {json.dumps(trial_numbers)}, y: {json.dumps(best_so_far)}, mode: 'lines', type: 'scatter', name: 'Best So Far', line: {{ color: 'rgb(34, 197, 94)', width: 2 }} }} ]; var convergenceLayout = {{ title: '', xaxis: {{ title: 'Trial Number' }}, yaxis: {{ title: '{obj}' }}, hovermode: 'closest', showlegend: true, legend: {{ x: 1, y: 1, xanchor: 'right' }} }}; Plotly.newPlot('convergence-plot', convergenceData, convergenceLayout, {{responsive: true}}); """ def _generate_pareto_plot_js(self) -> str: """Generate JavaScript for Pareto front plot.""" if not self.data.is_multi_objective or len(self.data.objectives) < 2: return "" obj1, obj2 = self.data.objectives[0], self.data.objectives[1] # All trials all_x = [] all_y = [] for trial in self.data.trials: x = trial.get("objectives", {}).get(obj1) y = trial.get("objectives", {}).get(obj2) if x is not None and y is not None: all_x.append(x) all_y.append(y) # Pareto trials pareto_x = [] pareto_y = [] for trial in self.data.pareto_trials: x = trial.get("objectives", {}).get(obj1) y = trial.get("objectives", {}).get(obj2) if x is not None and y is not None: pareto_x.append(x) pareto_y.append(y) return f""" var paretoData = [ {{ x: {json.dumps(all_x)}, y: {json.dumps(all_y)}, mode: 'markers', type: 'scatter', name: 'All Trials', marker: {{ color: 'rgba(156, 163, 175, 0.5)', size: 6 }} }}, {{ x: {json.dumps(pareto_x)}, y: {json.dumps(pareto_y)}, mode: 'markers', type: 'scatter', name: 'Pareto Front', marker: {{ color: 'rgb(239, 68, 68)', size: 10, symbol: 'diamond' }} }} ]; var paretoLayout = {{ title: '', xaxis: {{ title: '{obj1}' }}, yaxis: {{ title: '{obj2}' }}, hovermode: 'closest', showlegend: true }}; Plotly.newPlot('pareto-plot', paretoData, paretoLayout, {{responsive: true}}); """ def _generate_importance_plot_js(self) -> str: """Generate JavaScript for parameter importance plot.""" if not self.data.trials or not self.data.design_variables: return "// No data for importance plot" # Calculate simple correlation-based importance obj = self.data.objectives[0] if self.data.objectives else "objective" importances = [] for dv in self.data.design_variables: param_values = [] obj_values = [] for trial in self.data.trials: p = trial.get("params", {}).get(dv) o = trial.get("objectives", {}).get(obj) if p is not None and o is not None and not np.isinf(o): param_values.append(p) obj_values.append(o) if len(param_values) > 2: corr = abs(np.corrcoef(param_values, obj_values)[0, 1]) importances.append(corr if not np.isnan(corr) else 0) else: importances.append(0) # Sort by importance sorted_indices = np.argsort(importances)[::-1] sorted_dvs = [self.data.design_variables[i] for i in sorted_indices] sorted_imp = [importances[i] for i in sorted_indices] return f""" var importanceData = [{{ x: {json.dumps(sorted_imp)}, y: {json.dumps(sorted_dvs)}, type: 'bar', orientation: 'h', marker: {{ color: 'rgb(99, 102, 241)' }} }}]; var importanceLayout = {{ title: 'Parameter Importance', xaxis: {{ title: 'Correlation with Objective' }}, yaxis: {{ automargin: true }}, margin: {{ l: 120 }} }}; Plotly.newPlot('importance-plot', importanceData, importanceLayout, {{responsive: true}}); """ def _generate_distribution_plot_js(self) -> str: """Generate JavaScript for parameter distribution plot.""" if not self.data.trials or not self.data.design_variables: return "// No data for distribution plot" # Use first design variable dv = self.data.design_variables[0] values = [ t.get("params", {}).get(dv) for t in self.data.trials if t.get("params", {}).get(dv) is not None ] return f""" var distData = [{{ x: {json.dumps(values)}, type: 'histogram', marker: {{ color: 'rgb(34, 197, 94)' }}, nbinsx: 20 }}]; var distLayout = {{ title: 'Distribution: {dv}', xaxis: {{ title: '{dv}' }}, yaxis: {{ title: 'Count' }} }}; Plotly.newPlot('distribution-plot', distData, distLayout, {{responsive: true}}); """ def _generate_parallel_plot_js(self) -> str: """Generate JavaScript for parallel coordinates plot.""" if not self.data.trials or not self.data.design_variables: return "// No data for parallel plot" obj = self.data.objectives[0] if self.data.objectives else "objective" # Build dimensions dimensions = [] for dv in self.data.design_variables[:6]: # Limit to 6 DVs values = [t.get("params", {}).get(dv, 0) for t in self.data.trials] bounds = self.data.dv_bounds.get(dv, (min(values), max(values))) dimensions.append({"label": dv[:15], "values": values, "range": list(bounds)}) # Add objective as color dimension obj_values = [t.get("objectives", {}).get(obj, 0) for t in self.data.trials] return f""" var parallelData = [{{ type: 'parcoords', line: {{ color: {json.dumps(obj_values)}, colorscale: 'Viridis', showscale: true, colorbar: {{ title: '{obj}' }} }}, dimensions: {json.dumps(dimensions)} }}]; var parallelLayout = {{ title: '' }}; Plotly.newPlot('parallel-plot', parallelData, parallelLayout, {{responsive: true}}); """ def _save_figures(self, output_dir: Path) -> None: """Save static figures for PDF export.""" figures_dir = output_dir / "figures" figures_dir.mkdir(exist_ok=True) # Note: Static image export requires kaleido # For now, we just create the directory structure logger.info(f"Figures directory: {figures_dir}") def _save_data_exports(self, output_dir: Path) -> None: """Save data exports (CSV, JSON).""" data_dir = output_dir / "data" data_dir.mkdir(exist_ok=True) # Save all trials as JSON with open(data_dir / "all_trials.json", "w") as f: json.dump(self.data.trials, f, indent=2, default=str) # Save as CSV self._save_trials_csv(data_dir / "all_trials.csv") # Save summary summary = { "study_name": self.data.study_name, "n_trials": self.data.n_trials, "n_successful": self.data.n_successful, "design_variables": self.data.design_variables, "objectives": self.data.objectives, "best_values": self.data.best_values, "total_runtime_seconds": self.data.total_runtime_seconds, } with open(data_dir / "summary.json", "w") as f: json.dump(summary, f, indent=2) logger.info(f"Data exports saved to: {data_dir}") def _save_trials_csv(self, csv_path: Path) -> None: """Save trials as CSV file.""" if not self.data.trials: return # Build CSV content lines = [] # Header headers = ["trial", "success"] headers.extend(self.data.design_variables) headers.extend(self.data.objectives) lines.append(",".join(headers)) # Rows for trial in self.data.trials: row = [ str(trial.get("number", "")), str(trial.get("success", True)), ] for dv in self.data.design_variables: val = trial.get("params", {}).get(dv, "") row.append(f"{val:.6f}" if isinstance(val, (int, float)) else str(val)) for obj in self.data.objectives: val = trial.get("objectives", {}).get(obj, "") row.append(f"{val:.6f}" if isinstance(val, (int, float)) else str(val)) lines.append(",".join(row)) csv_path.write_text("\n".join(lines)) def _generate_pdf(self, output_dir: Path) -> Optional[Path]: """Generate PDF version of the report.""" # This would use playwright or weasyprint # For now, we rely on browser print functionality logger.info("PDF export available via browser print (Ctrl+P)") return None def _format_duration(self, seconds: float) -> str: """Format duration in human-readable form.""" if seconds < 60: return f"{seconds:.0f}s" elif seconds < 3600: return f"{seconds / 60:.0f}m" else: return f"{seconds / 3600:.1f}h" def generate_report(study_dir: Path, output_dir: Path = None) -> Path: """ Convenience function to generate a report. Args: study_dir: Path to study directory output_dir: Output directory (optional) Returns: Path to generated report """ generator = HTMLReportGenerator(study_dir) return generator.generate(output_dir)