Files
Atomizer/optimization_engine/generate_report_markdown.py
Anto01 e3bdb08a22 feat: Major update with validators, skills, dashboard, and docs reorganization
- Add validation framework (config, model, results, study validators)
- Add Claude Code skills (create-study, run-optimization, generate-report,
  troubleshoot, analyze-model)
- Add Atomizer Dashboard (React frontend + FastAPI backend)
- Reorganize docs into structured directories (00-09)
- Add neural surrogate modules and training infrastructure
- Add multi-objective optimization support

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 19:23:58 -05:00

570 lines
23 KiB
Python

"""
Generate comprehensive markdown optimization reports with graphs.
Uses Optuna's built-in visualization library for professional-quality plots.
"""
import json
import sys
from pathlib import Path
from typing import Dict, Any, List, Optional
import numpy as np
import matplotlib
matplotlib.use('Agg') # Non-interactive backend
import matplotlib.pyplot as plt
import optuna
from optuna.visualization import (
plot_optimization_history,
plot_parallel_coordinate,
plot_param_importances,
plot_slice,
plot_contour
)
def create_confidence_progression_plot(confidence_history: List[Dict], phase_transitions: List[Dict], output_dir: Path) -> Optional[str]:
"""Create confidence progression plot showing confidence metrics over trials."""
if not confidence_history:
return None
trial_numbers = [c['trial_number'] for c in confidence_history]
overall = [c['confidence_metrics']['overall_confidence'] for c in confidence_history]
convergence = [c['confidence_metrics']['convergence_score'] for c in confidence_history]
coverage = [c['confidence_metrics']['exploration_coverage'] for c in confidence_history]
stability = [c['confidence_metrics']['prediction_stability'] for c in confidence_history]
plt.figure(figsize=(12, 7))
plt.plot(trial_numbers, overall, 'b-', linewidth=2.5, label='Overall Confidence')
plt.plot(trial_numbers, convergence, 'g--', alpha=0.7, label='Convergence Score')
plt.plot(trial_numbers, coverage, 'orange', linestyle='--', alpha=0.7, label='Exploration Coverage')
plt.plot(trial_numbers, stability, 'purple', linestyle='--', alpha=0.7, label='Prediction Stability')
# Mark phase transitions
for transition in phase_transitions:
trial_num = transition['trial_number']
plt.axvline(x=trial_num, color='red', linestyle='-', linewidth=2, alpha=0.8)
plt.text(trial_num, 0.95, f' Exploitation Phase', rotation=90,
verticalalignment='top', fontsize=10, color='red', fontweight='bold')
# Mark confidence threshold
plt.axhline(y=0.65, color='gray', linestyle=':', linewidth=1.5, alpha=0.6, label='Confidence Threshold (65%)')
plt.xlabel('Trial Number', fontsize=11)
plt.ylabel('Confidence Score (0-1)', fontsize=11)
plt.title('Surrogate Confidence Progression', fontsize=13, fontweight='bold')
plt.legend(loc='lower right', fontsize=9)
plt.grid(True, alpha=0.3)
plt.ylim(0, 1.05)
plt.tight_layout()
plot_file = output_dir / 'confidence_progression.png'
plt.savefig(plot_file, dpi=150)
plt.close()
return plot_file.name
def create_convergence_plot(history: List[Dict], target: Optional[float], output_dir: Path) -> str:
"""Create convergence plot showing best objective over trials."""
trial_numbers = [t['trial_number'] for t in history]
objectives = [t['objective'] for t in history]
# Calculate cumulative best
cumulative_best = []
current_best = float('inf')
for obj in objectives:
current_best = min(current_best, obj)
cumulative_best.append(current_best)
plt.figure(figsize=(10, 6))
plt.plot(trial_numbers, objectives, 'o-', alpha=0.5, label='Trial objective')
plt.plot(trial_numbers, cumulative_best, 'r-', linewidth=2, label='Best so far')
if target is not None:
plt.axhline(y=0, color='g', linestyle='--', linewidth=2, label=f'Target (error = 0)')
plt.xlabel('Trial Number')
plt.ylabel('Objective Value (Error from Target)')
plt.title('Optimization Convergence')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot_file = output_dir / 'convergence_plot.png'
plt.savefig(plot_file, dpi=150)
plt.close()
return plot_file.name
def create_design_space_plot(history: List[Dict], output_dir: Path) -> str:
"""Create 2D design space exploration plot."""
first_trial = history[0]
var_names = list(first_trial['design_variables'].keys())
if len(var_names) != 2:
return None # Only works for 2D problems
var1_name, var2_name = var_names
var1_values = [t['design_variables'][var1_name] for t in history]
var2_values = [t['design_variables'][var2_name] for t in history]
objectives = [t['objective'] for t in history]
plt.figure(figsize=(10, 8))
scatter = plt.scatter(var1_values, var2_values, c=objectives, s=100,
cmap='viridis', alpha=0.6, edgecolors='black')
plt.colorbar(scatter, label='Objective Value')
plt.xlabel(var1_name.replace('_', ' ').title())
plt.ylabel(var2_name.replace('_', ' ').title())
plt.title('Design Space Exploration')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot_file = output_dir / 'design_space_plot.png'
plt.savefig(plot_file, dpi=150)
plt.close()
return plot_file.name
def create_parameter_sensitivity_plot(history: List[Dict], output_dir: Path) -> str:
"""Create parameter sensitivity plots."""
first_trial = history[0]
var_names = list(first_trial['design_variables'].keys())
fig, axes = plt.subplots(1, len(var_names), figsize=(6*len(var_names), 5))
if len(var_names) == 1:
axes = [axes]
for idx, var_name in enumerate(var_names):
var_values = [t['design_variables'][var_name] for t in history]
objectives = [t['objective'] for t in history]
axes[idx].scatter(var_values, objectives, alpha=0.6, s=50)
axes[idx].set_xlabel(var_name.replace('_', ' ').title())
axes[idx].set_ylabel('Objective Value')
axes[idx].set_title(f'Sensitivity to {var_name.replace("_", " ").title()}')
axes[idx].grid(True, alpha=0.3)
plt.tight_layout()
plot_file = output_dir / 'parameter_sensitivity.png'
plt.savefig(plot_file, dpi=150)
plt.close()
return plot_file.name
def create_optuna_plots(study: optuna.Study, output_dir: Path) -> Dict[str, str]:
"""
Create professional Optuna visualization plots.
Args:
study: Optuna study object
output_dir: Directory to save plots
Returns:
Dictionary mapping plot names to filenames
"""
plots = {}
try:
# 1. Parallel Coordinate Plot - shows parameter interactions
fig = plot_parallel_coordinate(study)
if fig is not None:
plot_file = output_dir / 'optuna_parallel_coordinate.png'
fig.write_image(str(plot_file), width=1200, height=600)
plots['parallel_coordinate'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create parallel coordinate plot: {e}")
try:
# 2. Optimization History - convergence over trials
fig = plot_optimization_history(study)
if fig is not None:
plot_file = output_dir / 'optuna_optimization_history.png'
fig.write_image(str(plot_file), width=1000, height=600)
plots['optimization_history'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create optimization history plot: {e}")
try:
# 3. Parameter Importances - which parameters matter most
fig = plot_param_importances(study)
if fig is not None:
plot_file = output_dir / 'optuna_param_importances.png'
fig.write_image(str(plot_file), width=800, height=500)
plots['param_importances'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create parameter importance plot: {e}")
try:
# 4. Slice Plot - individual parameter effects
fig = plot_slice(study)
if fig is not None:
plot_file = output_dir / 'optuna_slice.png'
fig.write_image(str(plot_file), width=1000, height=600)
plots['slice'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create slice plot: {e}")
try:
# 5. Contour Plot - parameter interaction heatmap (2D only)
if len(study.best_params) == 2:
fig = plot_contour(study)
if fig is not None:
plot_file = output_dir / 'optuna_contour.png'
fig.write_image(str(plot_file), width=800, height=800)
plots['contour'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create contour plot: {e}")
return plots
def generate_markdown_report(history_file: Path, target_value: Optional[float] = None,
tolerance: float = 0.1, reports_dir: Optional[Path] = None,
study: Optional[optuna.Study] = None) -> str:
"""Generate comprehensive markdown optimization report with graphs."""
# Load history
with open(history_file) as f:
history = json.load(f)
if not history:
return "# Optimization Report\n\nNo optimization history found."
# Graphs should be saved to 3_reports/ folder (same as markdown file)
study_dir = history_file.parent.parent
study_name = study_dir.name
if reports_dir is None:
reports_dir = study_dir / "3_reports"
reports_dir.mkdir(parents=True, exist_ok=True)
# Load phase transition and confidence history if available
results_dir = study_dir / "2_results"
phase_transitions = []
confidence_history = []
phase_transition_file = results_dir / "phase_transitions.json"
confidence_history_file = results_dir / "confidence_history.json"
if phase_transition_file.exists():
try:
with open(phase_transition_file) as f:
phase_transitions = json.load(f)
except Exception:
pass
if confidence_history_file.exists():
try:
with open(confidence_history_file) as f:
confidence_history = json.load(f)
except Exception:
pass
# Generate plots in reports folder
convergence_plot = create_convergence_plot(history, target_value, reports_dir)
design_space_plot = create_design_space_plot(history, reports_dir)
sensitivity_plot = create_parameter_sensitivity_plot(history, reports_dir)
# Generate confidence progression plot if data available
confidence_plot = None
if confidence_history:
print(" Generating confidence progression plot...")
confidence_plot = create_confidence_progression_plot(confidence_history, phase_transitions, reports_dir)
# Generate Optuna plots if study object provided
optuna_plots = {}
if study is not None:
print(" Generating Optuna visualization plots...")
optuna_plots = create_optuna_plots(study, reports_dir)
print(f" Generated {len(optuna_plots)} Optuna plots")
# Build markdown report
lines = []
lines.append(f"# {study_name.replace('_', ' ').title()} - Optimization Report")
lines.append("")
lines.append(f"**Total Trials**: {len(history)}")
lines.append("")
# Study information
lines.append("## Study Information")
lines.append("")
first_trial = history[0]
design_vars = list(first_trial['design_variables'].keys())
lines.append(f"- **Design Variables**: {', '.join([v.replace('_', ' ').title() for v in design_vars])}")
lines.append(f"- **Number of Trials**: {len(history)}")
lines.append("")
# Adaptive optimization strategy information
if phase_transitions or confidence_history:
lines.append("## Adaptive Optimization Strategy")
lines.append("")
lines.append("This study used adaptive surrogate-based optimization with confidence-driven phase transitions.")
lines.append("")
if phase_transitions:
lines.append("### Phase Transitions")
lines.append("")
for transition in phase_transitions:
trial_num = transition['trial_number']
conf = transition['confidence_metrics']['overall_confidence']
lines.append(f"- **Trial #{trial_num}**: EXPLORATION → EXPLOITATION")
lines.append(f" - Confidence at transition: {conf:.1%}")
lines.append(f" - Convergence score: {transition['confidence_metrics']['convergence_score']:.1%}")
lines.append(f" - Exploration coverage: {transition['confidence_metrics']['exploration_coverage']:.1%}")
lines.append(f" - Prediction stability: {transition['confidence_metrics']['prediction_stability']:.1%}")
lines.append("")
else:
lines.append("### Phase Transitions")
lines.append("")
lines.append("No phase transitions occurred - optimization remained in exploration phase.")
lines.append("This may indicate:")
lines.append("- Insufficient trials to build surrogate confidence")
lines.append("- Poor exploration coverage of the design space")
lines.append("- Unstable convergence behavior")
lines.append("")
if confidence_plot:
lines.append("### Confidence Progression")
lines.append("")
lines.append(f"![Confidence Progression]({confidence_plot})")
lines.append("")
lines.append("This plot shows how the surrogate model confidence evolved over the optimization.")
lines.append("The red vertical line (if present) marks the transition to exploitation phase.")
lines.append("")
lines.append("")
# Best result
objectives = [t['objective'] for t in history]
best_idx = np.argmin(objectives)
best_trial = history[best_idx]
lines.append("## Best Result")
lines.append("")
lines.append(f"- **Trial**: #{best_trial['trial_number']}")
lines.append("")
# Show actual results FIRST (what the client cares about)
lines.append("### Achieved Performance")
for result, value in best_trial['results'].items():
metric_name = result.replace('_', ' ').title()
lines.append(f"- **{metric_name}**: {value:.4f}")
# Show target comparison if available
if target_value is not None and 'frequency' in result.lower():
error = abs(value - target_value)
lines.append(f" - Target: {target_value:.4f}")
lines.append(f" - Error: {error:.4f} ({(error/target_value*100):.2f}%)")
lines.append("")
# Then design parameters that achieved it
lines.append("### Design Parameters")
for var, value in best_trial['design_variables'].items():
lines.append(f"- **{var.replace('_', ' ').title()}**: {value:.4f}")
lines.append("")
# Technical objective last (for engineers)
lines.append("<details>")
lines.append("<summary>Technical Details (Objective Function)</summary>")
lines.append("")
lines.append(f"- **Objective Value (Error)**: {best_trial['objective']:.6f}")
lines.append("")
lines.append("</details>")
lines.append("")
# Success assessment
if target_value is not None:
lines.append("## Success Assessment")
lines.append("")
best_objective = min(objectives)
if best_objective <= tolerance:
lines.append(f"### ✅ TARGET ACHIEVED")
lines.append("")
lines.append(f"Target value {target_value} was achieved within tolerance {tolerance}!")
lines.append(f"- **Best Error**: {best_objective:.6f}")
else:
lines.append(f"### ⚠️ TARGET NOT YET ACHIEVED")
lines.append("")
lines.append(f"Target value {target_value} not achieved within tolerance {tolerance}")
lines.append(f"- **Best Error**: {best_objective:.6f}")
lines.append(f"- **Required Improvement**: {best_objective - tolerance:.6f}")
lines.append(f"- **Recommendation**: Continue optimization with more trials")
lines.append("")
# Top 5 trials - show ACTUAL METRICS not just objective
lines.append("## Top 5 Trials")
lines.append("")
sorted_history = sorted(history, key=lambda x: x['objective'])
# Extract result column names (e.g., "first_frequency")
result_cols = list(sorted_history[0]['results'].keys())
result_col_names = [r.replace('_', ' ').title() for r in result_cols]
# Build header with results AND design vars
header_cols = ["Rank", "Trial"] + result_col_names + [v.replace('_', ' ').title() for v in design_vars]
lines.append("| " + " | ".join(header_cols) + " |")
lines.append("|" + "|".join(["-"*max(6, len(c)) for c in header_cols]) + "|")
for i, trial in enumerate(sorted_history[:5], 1):
result_vals = [f"{trial['results'][r]:.2f}" for r in result_cols]
var_vals = [f"{trial['design_variables'][v]:.2f}" for v in design_vars]
row_data = [str(i), f"#{trial['trial_number']}"] + result_vals + var_vals
lines.append("| " + " | ".join(row_data) + " |")
lines.append("")
# Statistics
lines.append("## Statistics")
lines.append("")
lines.append(f"- **Mean Objective**: {np.mean(objectives):.6f}")
lines.append(f"- **Std Deviation**: {np.std(objectives):.6f}")
lines.append(f"- **Best Objective**: {np.min(objectives):.6f}")
lines.append(f"- **Worst Objective**: {np.max(objectives):.6f}")
lines.append("")
# Design variable ranges
lines.append("### Design Variable Ranges")
lines.append("")
for var in design_vars:
values = [t['design_variables'][var] for t in history]
lines.append(f"**{var.replace('_', ' ').title()}**:")
lines.append(f"- Min: {min(values):.6f}")
lines.append(f"- Max: {max(values):.6f}")
lines.append(f"- Mean: {np.mean(values):.6f}")
lines.append("")
# Convergence plot
lines.append("## Convergence Plot")
lines.append("")
lines.append(f"![Convergence Plot]({convergence_plot})")
lines.append("")
lines.append("This plot shows how the optimization converged over time. The blue line shows each trial's objective value, while the red line shows the best objective found so far.")
lines.append("")
# Design space plot
if design_space_plot:
lines.append("## Design Space Exploration")
lines.append("")
lines.append(f"![Design Space Plot]({design_space_plot})")
lines.append("")
lines.append("This plot shows which regions of the design space were explored. Darker colors indicate better objective values.")
lines.append("")
# Sensitivity plot
lines.append("## Parameter Sensitivity")
lines.append("")
lines.append(f"![Parameter Sensitivity]({sensitivity_plot})")
lines.append("")
lines.append("These plots show how each design variable affects the objective value. Steeper slopes indicate higher sensitivity.")
lines.append("")
# Optuna Advanced Visualizations
if optuna_plots:
lines.append("## Advanced Optimization Analysis (Optuna)")
lines.append("")
lines.append("The following plots leverage Optuna's professional visualization library to provide deeper insights into the optimization process.")
lines.append("")
# Parallel Coordinate Plot
if 'parallel_coordinate' in optuna_plots:
lines.append("### Parallel Coordinate Plot")
lines.append("")
lines.append(f"![Parallel Coordinate]({optuna_plots['parallel_coordinate']})")
lines.append("")
lines.append("This interactive plot shows how different parameter combinations lead to different objective values. Each line represents one trial, colored by objective value. You can see parameter interactions and identify promising regions.")
lines.append("")
# Optimization History
if 'optimization_history' in optuna_plots:
lines.append("### Optimization History")
lines.append("")
lines.append(f"![Optimization History]({optuna_plots['optimization_history']})")
lines.append("")
lines.append("Professional visualization of convergence over trials, showing both individual trial performance and best value progression.")
lines.append("")
# Parameter Importance
if 'param_importances' in optuna_plots:
lines.append("### Parameter Importance Analysis")
lines.append("")
lines.append(f"![Parameter Importance]({optuna_plots['param_importances']})")
lines.append("")
lines.append("This analysis quantifies which design variables have the most impact on the objective. Based on fANOVA (functional ANOVA) or other importance metrics.")
lines.append("")
# Slice Plot
if 'slice' in optuna_plots:
lines.append("### Parameter Slice Analysis")
lines.append("")
lines.append(f"![Slice Plot]({optuna_plots['slice']})")
lines.append("")
lines.append("Shows how changing each parameter individually affects the objective value, with other parameters held constant.")
lines.append("")
# Contour Plot
if 'contour' in optuna_plots:
lines.append("### Parameter Interaction Contour")
lines.append("")
lines.append(f"![Contour Plot]({optuna_plots['contour']})")
lines.append("")
lines.append("2D heatmap showing how combinations of two parameters affect the objective. Reveals interaction effects and optimal regions.")
lines.append("")
# Trial history table - show actual results
lines.append("## Complete Trial History")
lines.append("")
lines.append("<details>")
lines.append("<summary>Click to expand full trial history</summary>")
lines.append("")
# Build complete history table with results
history_header = ["Trial"] + result_col_names + [v.replace('_', ' ').title() for v in design_vars]
lines.append("| " + " | ".join(history_header) + " |")
lines.append("|" + "|".join(["-"*max(6, len(c)) for c in history_header]) + "|")
for trial in history:
result_vals = [f"{trial['results'][r]:.2f}" for r in result_cols]
var_vals = [f"{trial['design_variables'][v]:.2f}" for v in design_vars]
row_data = [f"#{trial['trial_number']}"] + result_vals + var_vals
lines.append("| " + " | ".join(row_data) + " |")
lines.append("")
lines.append("</details>")
lines.append("")
# Footer
lines.append("---")
lines.append("")
lines.append(f"*Report generated automatically by Atomizer optimization system*")
return '\n'.join(lines)
def main():
"""Command-line interface."""
if len(sys.argv) < 2:
print("Usage: python generate_report_markdown.py <history_file> [target_value] [tolerance]")
sys.exit(1)
history_file = Path(sys.argv[1])
if not history_file.exists():
print(f"Error: History file not found: {history_file}")
sys.exit(1)
target_value = float(sys.argv[2]) if len(sys.argv) > 2 else None
tolerance = float(sys.argv[3]) if len(sys.argv) > 3 else 0.1
# Generate report
report = generate_markdown_report(history_file, target_value, tolerance)
# Save report
report_file = history_file.parent / 'OPTIMIZATION_REPORT.md'
with open(report_file, 'w', encoding='utf-8') as f:
f.write(report)
print(f"Report saved to: {report_file}")
if __name__ == '__main__':
main()