""" Report Generator Utility Generates Markdown/HTML/PDF reports for optimization studies Supports multiple data sources: - optimization_config.json (primary config) - study.db (Optuna SQLite, preferred) - optimization_history_incremental.json (legacy) """ import json import sqlite3 from pathlib import Path from typing import Optional, List, Dict, Any try: import markdown except ImportError: markdown = None from datetime import datetime def _load_trials_from_db(db_path: Path) -> List[Dict[str, Any]]: """Load trial data from Optuna SQLite database.""" trials = [] try: conn = sqlite3.connect(str(db_path)) conn.row_factory = sqlite3.Row cursor = conn.cursor() # Get completed trials cursor.execute(""" SELECT t.trial_id, t.number as trial_number, t.state FROM trials t WHERE t.state = 'COMPLETE' ORDER BY t.number """) for row in cursor.fetchall(): trial = { 'trial_number': row['trial_number'], 'design_variables': {}, 'results': {}, 'objective': None } # Get params cursor.execute(""" SELECT param_name, param_value FROM trial_params WHERE trial_id = ? """, (row['trial_id'],)) for p in cursor.fetchall(): try: trial['design_variables'][p['param_name']] = float(p['param_value']) except (ValueError, TypeError): trial['design_variables'][p['param_name']] = p['param_value'] # Get objective values cursor.execute(""" SELECT objective, value FROM trial_values WHERE trial_id = ? """, (row['trial_id'],)) values = cursor.fetchall() if values: trial['objective'] = float(values[0]['value']) for v in values: trial['results'][f'objective_{v["objective"]}'] = float(v['value']) # Get user attributes (like weighted_sum, mass_kg, etc.) cursor.execute(""" SELECT key, value_json FROM trial_user_attributes WHERE trial_id = ? """, (row['trial_id'],)) for attr in cursor.fetchall(): try: val = json.loads(attr['value_json']) trial['results'][attr['key']] = val except: pass if trial['objective'] is not None: trials.append(trial) conn.close() except Exception as e: print(f"Error loading trials from DB: {e}") return trials def generate_study_report( study_dir: Path, output_format: str = "markdown", include_llm_summary: bool = False ) -> Optional[Path]: """ Generate a report for the study. Args: study_dir: Path to the study directory output_format: 'markdown', 'html', or 'pdf' include_llm_summary: Whether to include AI-generated summary Returns: Path to the generated report file """ try: study_dir = Path(study_dir) # Load config - try multiple locations config_path = study_dir / "1_setup" / "optimization_config.json" if not config_path.exists(): config_path = study_dir / "optimization_config.json" config = {} if config_path.exists(): with open(config_path) as f: config = json.load(f) # Load history - try multiple sources history = [] # Try Optuna DB first (preferred) db_path = study_dir / "3_results" / "study.db" if not db_path.exists(): db_path = study_dir / "2_results" / "study.db" if db_path.exists(): history = _load_trials_from_db(db_path) # Fallback to incremental JSON if not history: for hist_dir in ["2_results", "3_results", "2_iterations"]: history_path = study_dir / hist_dir / "optimization_history_incremental.json" if history_path.exists(): with open(history_path) as f: history = json.load(f) break if not history: print(f"No trial data found in {study_dir}") return None # Find best trial best_trial = min(history, key=lambda x: x.get('objective', float('inf'))) # Get study name study_name = config.get('study_name', study_dir.name) # Get objectives info objectives = config.get('objectives', []) obj_names = [o.get('name', f'Objective {i}') for i, o in enumerate(objectives)] if not obj_names: obj_names = ['Objective'] # Get design variables info design_vars = config.get('design_variables', []) n_vars = len(design_vars) if design_vars else len(best_trial.get('design_variables', {})) # Calculate stats n_trials = len(history) n_target = config.get('optimization_settings', {}).get('n_trials', n_trials) status = 'Completed' if n_trials >= n_target else 'In Progress' # Calculate improvement first_obj = history[0].get('objective', 0) if history else 0 best_obj = best_trial.get('objective', 0) improvement = ((first_obj - best_obj) / first_obj * 100) if first_obj != 0 else 0 # Generate Markdown content md_content = f"""# Optimization Report: {study_name} **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M')} **Status**: {status} --- ## Executive Summary {_generate_summary(history, best_trial, include_llm_summary)} --- ## Study Configuration | Setting | Value | |---------|-------| | **Objectives** | {', '.join(obj_names)} | | **Design Variables** | {n_vars} parameters | | **Target Trials** | {n_target} | | **Completed Trials** | {n_trials} | --- ## Best Result **Trial #{best_trial.get('trial_number', 'N/A')}** achieved the optimal design: | Metric | Value | |--------|-------| | **Objective Value** | {best_trial.get('objective', 'N/A'):.4f if isinstance(best_trial.get('objective'), (int, float)) else 'N/A'} | | **Improvement** | {improvement:.1f}% | ### Design Parameters | Parameter | Value | |-----------|-------| """ # Add best design variables for k, v in best_trial.get('design_variables', {}).items(): if isinstance(v, (int, float)): md_content += f"| {k} | {v:.4f} |\n" else: md_content += f"| {k} | {v} |\n" # Add results if available results = best_trial.get('results', {}) if results: md_content += "\n### Result Metrics\n\n| Metric | Value |\n|--------|-------|\n" for k, v in results.items(): if isinstance(v, (int, float)): md_content += f"| {k} | {v:.4f} |\n" elif v is not None: md_content += f"| {k} | {v} |\n" md_content += f""" --- ## Optimization Progress The optimization process completed **{n_trials} trials** exploring the design space. ### Statistics | Metric | Value | |--------|-------| | First Trial Objective | {first_obj:.4f if isinstance(first_obj, (int, float)) else 'N/A'} | | Best Trial Objective | {best_obj:.4f if isinstance(best_obj, (int, float)) else 'N/A'} | | Total Improvement | {improvement:.1f}% | | Best Trial Found | #{best_trial.get('trial_number', 'N/A')} | --- ## Top 5 Designs | Rank | Trial | Objective | |------|-------|-----------| """ # Add top 5 trials sorted_history = sorted(history, key=lambda x: x.get('objective', float('inf')))[:5] for i, t in enumerate(sorted_history, 1): obj_val = t.get('objective', 'N/A') if isinstance(obj_val, (int, float)): obj_val = f"{obj_val:.4f}" md_content += f"| {i} | #{t.get('trial_number', 'N/A')} | {obj_val} |\n" md_content += f""" --- ## Files - **Configuration**: `1_setup/optimization_config.json` - **Database**: `3_results/study.db` - **This Report**: `STUDY_REPORT.md` --- *Report generated by Atomizer Dashboard* *{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}* """ # Determine output directory output_dir = study_dir / "3_results" if not output_dir.exists(): output_dir = study_dir / "2_results" if not output_dir.exists(): output_dir = study_dir output_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') if output_format in ['markdown', 'md']: output_path = output_dir / f"STUDY_REPORT_{timestamp}.md" with open(output_path, 'w', encoding='utf-8') as f: f.write(md_content) elif output_format == 'html': output_path = output_dir / f"STUDY_REPORT_{timestamp}.html" if markdown: html_content = markdown.markdown(md_content, extensions=['tables']) else: # Basic markdown to HTML conversion html_content = f"
{md_content}
" styled_html = f""" Optimization Report - {study_name} {html_content} """ with open(output_path, 'w', encoding='utf-8') as f: f.write(styled_html) elif output_format == 'pdf': try: from weasyprint import HTML output_path = output_dir / f"STUDY_REPORT_{timestamp}.pdf" if markdown: html_content = markdown.markdown(md_content, extensions=['tables']) else: html_content = f"
{md_content}
" HTML(string=html_content).write_pdf(str(output_path)) except ImportError: print("WeasyPrint not installed, falling back to HTML") return generate_study_report(study_dir, 'html', include_llm_summary) else: output_path = output_dir / f"STUDY_REPORT_{timestamp}.md" with open(output_path, 'w', encoding='utf-8') as f: f.write(md_content) return output_path except Exception as e: print(f"Report generation error: {e}") import traceback traceback.print_exc() return None def _generate_summary(history, best_trial, use_llm): """Generate executive summary text.""" if use_llm: return "[AI Summary Placeholder] The optimization successfully identified a design that minimizes the objective while satisfying all constraints." if not history: return "No trials completed yet." n_trials = len(history) best_obj = best_trial.get('objective', 0) best_num = best_trial.get('trial_number', 'N/A') first_obj = history[0].get('objective', 0) improvement = ((first_obj - best_obj) / first_obj * 100) if first_obj != 0 else 0 # Determine convergence efficiency efficiency = "excellent" if best_num < n_trials * 0.3 else "good" if best_num < n_trials * 0.6 else "moderate" summary = f"""This optimization study completed **{n_trials} trials** and achieved a **{improvement:.1f}% improvement** from the initial design. The best solution was found at **Trial #{best_num}** with an objective value of **{best_obj:.4f}**, demonstrating {efficiency} convergence efficiency. """ return summary