"""
Generate comprehensive optimization report for V11 adaptive surrogate study.

This script reads the Optuna database and adaptive_state.json to create
a detailed markdown report with results, statistics, and recommendations.

Usage:
    python generate_report.py [--output REPORT.md]
"""

import json
import argparse
import optuna
import numpy as np
from pathlib import Path
from datetime import datetime


def load_config(config_path: Path) -> dict:
    """Load optimization configuration."""
    with open(config_path) as f:
        return json.load(f)


def load_adaptive_state(state_path: Path) -> dict:
    """Load adaptive optimization state."""
    if not state_path.exists():
        return None
    with open(state_path) as f:
        return json.load(f)


def get_trials_from_optuna(db_path: Path) -> dict:
    """Get all trials from Optuna database, separated by source."""
    storage = f"sqlite:///{db_path}"

    trials_data = {
        'v10_fea': [],
        'v11_fea': [],
        'v11_nn': []
    }

    try:
        # Get all study names in the database
        from sqlalchemy import create_engine, text
        engine = create_engine(storage)
        with engine.connect() as conn:
            result = conn.execute(text("SELECT study_name FROM studies"))
            study_names = [row[0] for row in result]
    except Exception as e:
        print(f"Warning: Could not enumerate studies: {e}")
        study_names = ['v11_fea']

    for study_name in study_names:
        try:
            study = optuna.load_study(study_name=study_name, storage=storage)
            for trial in study.trials:
                if trial.state != optuna.trial.TrialState.COMPLETE:
                    continue

                source = trial.user_attrs.get('source', 'FEA')
                trial_info = {
                    'number': trial.number,
                    'params': dict(trial.params),
                    'value': trial.value if trial.value else (trial.values[0] if trial.values else None),
                    'user_attrs': dict(trial.user_attrs),
                    'study_name': study_name
                }

                if source == 'V10_FEA':
                    trials_data['v10_fea'].append(trial_info)
                elif source == 'FEA':
                    trials_data['v11_fea'].append(trial_info)
                elif source == 'NN':
                    trials_data['v11_nn'].append(trial_info)
                elif 'nn' in study_name.lower():
                    trials_data['v11_nn'].append(trial_info)
                else:
                    trials_data['v11_fea'].append(trial_info)

        except Exception as e:
            print(f"Warning: Could not load study '{study_name}': {e}")

    return trials_data


def calculate_statistics(trials: list, objective_keys: list) -> dict:
    """Calculate statistics for a set of trials."""
    if not trials:
        return None

    stats = {}
    for key in objective_keys:
        values = []
        for t in trials:
            if key in t.get('user_attrs', {}):
                val = t['user_attrs'][key]
                if val is not None and np.isfinite(val):
                    values.append(val)

        if values:
            stats[key] = {
                'min': min(values),
                'max': max(values),
                'mean': np.mean(values),
                'std': np.std(values),
                'count': len(values)
            }

    return stats


def find_best_trials(trials: list, config: dict) -> dict:
    """Find best trials for each objective."""
    best = {}
    objectives = config.get('objectives', [])

    for obj in objectives:
        name = obj['name']
        direction = obj.get('direction', 'minimize')

        best_trial = None
        best_value = float('inf') if direction == 'minimize' else float('-inf')

        for t in trials:
            val = t.get('user_attrs', {}).get(name)
            if val is None:
                continue

            if direction == 'minimize' and val < best_value:
                best_value = val
                best_trial = t
            elif direction == 'maximize' and val > best_value:
                best_value = val
                best_trial = t

        if best_trial:
            best[name] = {
                'trial': best_trial,
                'value': best_value
            }

    return best


def generate_report(config: dict, adaptive_state: dict, trials_data: dict) -> str:
    """Generate the markdown report."""
    now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    # Combine all trials for statistics
    all_fea = trials_data['v10_fea'] + trials_data['v11_fea']
    all_trials = all_fea + trials_data['v11_nn']

    # Get objective keys
    obj_keys = [obj['name'] for obj in config.get('objectives', [])]

    # Calculate statistics
    fea_stats = calculate_statistics(all_fea, obj_keys)
    nn_stats = calculate_statistics(trials_data['v11_nn'], obj_keys)

    # Find best trials
    best_fea = find_best_trials(all_fea, config)
    best_overall = find_best_trials(all_trials, config)

    # Build report
    report = f"""# M1 Mirror Adaptive Surrogate Optimization V11 - Results Report

**Study**: {config.get('study_name', 'Unknown')}
**Generated**: {now}
**Status**: {'Running' if adaptive_state and adaptive_state.get('iteration', 0) < config.get('adaptive_settings', {}).get('max_iterations', 100) else 'Completed'}

---

## Executive Summary

| Metric | Value |
|--------|-------|
| V10 FEA Trials (Training Data) | {len(trials_data['v10_fea'])} |
| V11 FEA Trials (Validation) | {len(trials_data['v11_fea'])} |
| V11 NN Trials (Surrogate) | {len(trials_data['v11_nn'])} |
| Total Trials | {len(all_trials)} |
"""

    if adaptive_state:
        report += f"""| Current Iteration | {adaptive_state.get('iteration', 0)} |
| Best 40-20 Operational | {adaptive_state.get('best_40_vs_20', '-'):.2f} nm |
| Best 60-20 Operational | {adaptive_state.get('best_60_vs_20', '-'):.2f} nm |
| Best Manufacturing (90-20) | {adaptive_state.get('best_mfg', '-'):.2f} nm |
| Best Weighted Objective | {adaptive_state.get('best_weighted', '-'):.4f} |
"""

    report += """
---

## 1. Study Configuration

### Objectives (Relative Filtered RMS)

| Objective | Description | Weight | Target | Units |
|-----------|-------------|--------|--------|-------|
"""
    for obj in config.get('objectives', []):
        report += f"| `{obj['name']}` | {obj.get('description', '-')} | {obj.get('weight', 1.0)} | {obj.get('target', '-')} | {obj.get('units', 'nm')} |\n"

    report += """
### Design Variables

| Parameter | Min | Max | Baseline | Units |
|-----------|-----|-----|----------|-------|
"""
    for dv in config.get('design_variables', []):
        if dv.get('enabled', True):
            report += f"| {dv['name']} | {dv['min']} | {dv['max']} | {dv.get('baseline', '-')} | {dv.get('units', '-')} |\n"

    report += f"""
---

## 2. Optimization Progress

### Current State
"""
    if adaptive_state:
        report += f"""
- **Iteration**: {adaptive_state.get('iteration', 0)}
- **Total FEA Evaluations**: {adaptive_state.get('total_fea_count', 0)}
- **Total NN Evaluations**: {adaptive_state.get('total_nn_count', 0)}
- **Convergence Patience**: {config.get('adaptive_settings', {}).get('patience', 5)} iterations

### Iteration History

| Iter | FEA Count | NN Count | Best 40-20 | Best 60-20 | Best Mfg | Improved |
|------|-----------|----------|------------|------------|----------|----------|
"""
        for h in adaptive_state.get('history', []):
            improved = "Yes" if h.get('improved', False) else "No"
            report += f"| {h.get('iteration', '-')} | {h.get('fea_count', '-')} | {h.get('nn_count', '-')} | {h.get('best_40_vs_20', 0):.2f} | {h.get('best_60_vs_20', 0):.2f} | {h.get('best_mfg', 0):.2f} | {improved} |\n"

    report += """
---

## 3. Results Summary

### FEA Trials Statistics
"""
    if fea_stats:
        report += """
| Metric | 40-20 (nm) | 60-20 (nm) | Mfg (nm) |
|--------|------------|------------|----------|
"""
        row_labels = ['min', 'max', 'mean', 'std']
        row_names = ['Minimum', 'Maximum', 'Mean', 'Std Dev']
        for label, name in zip(row_labels, row_names):
            vals = []
            for key in obj_keys:
                if key in fea_stats:
                    vals.append(f"{fea_stats[key].get(label, 0):.2f}")
                else:
                    vals.append("-")
            report += f"| {name} | {' | '.join(vals)} |\n"

    report += """
### Neural Network Trials Statistics
"""
    if nn_stats:
        report += """
| Metric | 40-20 (nm) | 60-20 (nm) | Mfg (nm) |
|--------|------------|------------|----------|
"""
        for label, name in zip(row_labels, row_names):
            vals = []
            for key in obj_keys:
                if key in nn_stats:
                    vals.append(f"{nn_stats[key].get(label, 0):.2f}")
                else:
                    vals.append("-")
            report += f"| {name} | {' | '.join(vals)} |\n"
    else:
        report += "\n*No NN trials with objective values available yet.*\n"

    report += """
---

## 4. Best Designs Found

### Best Overall Design
"""
    if adaptive_state and adaptive_state.get('best_params'):
        bp = adaptive_state['best_params']
        report += f"""
**Weighted Objective**: {adaptive_state.get('best_weighted', '-'):.4f}

| Objective | Value | Target | Status |
|-----------|-------|--------|--------|
| 40-20 Operational | {adaptive_state.get('best_40_vs_20', '-'):.2f} nm | {config['objectives'][0].get('target', '-')} nm | {'PASS' if adaptive_state.get('best_40_vs_20', float('inf')) <= config['objectives'][0].get('target', float('inf')) else 'FAIL'} |
| 60-20 Operational | {adaptive_state.get('best_60_vs_20', '-'):.2f} nm | {config['objectives'][1].get('target', '-')} nm | {'PASS' if adaptive_state.get('best_60_vs_20', float('inf')) <= config['objectives'][1].get('target', float('inf')) else 'FAIL'} |
| Manufacturing (90-20) | {adaptive_state.get('best_mfg', '-'):.2f} nm | {config['objectives'][2].get('target', '-')} nm | {'PASS' if adaptive_state.get('best_mfg', float('inf')) <= config['objectives'][2].get('target', float('inf')) else 'FAIL'} |

**Design Parameters**:

| Parameter | Value | Unit |
|-----------|-------|------|
"""
        for dv in config.get('design_variables', []):
            if dv.get('enabled', True) and dv['name'] in bp:
                report += f"| {dv['name']} | {bp[dv['name']]:.4f} | {dv.get('units', '-')} |\n"

    report += """
---

## 5. Neural Surrogate Performance

### Training Configuration

| Setting | Value |
|---------|-------|
"""
    ss = config.get('surrogate_settings', {})
    report += f"| Architecture | MLP {ss.get('hidden_dims', [128, 256, 256, 128, 64])} |\n"
    report += f"| Dropout | {ss.get('dropout', 0.1)} |\n"
    report += f"| Batch Size | {ss.get('batch_size', 16)} |\n"
    report += f"| Learning Rate | {ss.get('learning_rate', 0.001)} |\n"
    report += f"| MC Dropout Samples | {ss.get('mc_dropout_samples', 30)} |\n"

    report += """
### Model Checkpoints

| File | Description |
|------|-------------|
"""
    results_dir = Path(config.get('study_name', '.')).parent / '3_results' if config.get('study_name') else Path('3_results')
    for pt_file in sorted(Path('3_results').glob('surrogate_*.pt')):
        report += f"| `{pt_file.name}` | Surrogate model checkpoint |\n"

    report += """
---

## 6. Trial Source Distribution

| Source | Count | Percentage |
|--------|-------|------------|
"""
    total = len(all_trials) or 1
    report += f"| V10_FEA (Training) | {len(trials_data['v10_fea'])} | {100*len(trials_data['v10_fea'])/total:.1f}% |\n"
    report += f"| V11_FEA (Validation) | {len(trials_data['v11_fea'])} | {100*len(trials_data['v11_fea'])/total:.1f}% |\n"
    report += f"| V11_NN (Surrogate) | {len(trials_data['v11_nn'])} | {100*len(trials_data['v11_nn'])/total:.1f}% |\n"

    # Calculate NN vs FEA speedup
    nn_count = len(trials_data['v11_nn'])
    fea_count = len(all_fea)
    if fea_count > 0:
        # Assuming ~5 min per FEA, ~10ms per NN
        fea_time_estimate = fea_count * 5 * 60  # seconds
        nn_time_estimate = nn_count * 0.01  # seconds
        if nn_time_estimate > 0:
            speedup = fea_time_estimate / nn_time_estimate if nn_count > 0 else 0
            report += f"""
### Speedup Analysis

| Metric | FEA | Neural | Ratio |
|--------|-----|--------|-------|
| Trial Count | {fea_count} | {nn_count} | {nn_count/fea_count:.0f}x |
| Est. Time per Trial | ~5 min | ~10 ms | ~30,000x |
"""

    report += """
---

## 7. Engineering Recommendations

### Optical Performance Analysis

Based on the optimization results:

"""
    if adaptive_state:
        best_40 = adaptive_state.get('best_40_vs_20', float('inf'))
        best_60 = adaptive_state.get('best_60_vs_20', float('inf'))
        best_mfg = adaptive_state.get('best_mfg', float('inf'))

        target_40 = config['objectives'][0].get('target', 4.0)
        target_60 = config['objectives'][1].get('target', 10.0)
        target_mfg = config['objectives'][2].get('target', 20.0)

        if best_40 <= target_40:
            report += f"- **40-20 Tracking**: EXCELLENT - Within target ({best_40:.2f} nm <= {target_40} nm)\n"
        elif best_40 <= target_40 * 1.5:
            report += f"- **40-20 Tracking**: GOOD - Close to target ({best_40:.2f} nm, target: {target_40} nm)\n"
        else:
            report += f"- **40-20 Tracking**: NEEDS IMPROVEMENT - Above target ({best_40:.2f} nm, target: {target_40} nm)\n"

        if best_60 <= target_60:
            report += f"- **60-20 Tracking**: EXCELLENT - Within target ({best_60:.2f} nm <= {target_60} nm)\n"
        elif best_60 <= target_60 * 1.5:
            report += f"- **60-20 Tracking**: GOOD - Close to target ({best_60:.2f} nm, target: {target_60} nm)\n"
        else:
            report += f"- **60-20 Tracking**: NEEDS IMPROVEMENT - Above target ({best_60:.2f} nm, target: {target_60} nm)\n"

        if best_mfg <= target_mfg:
            report += f"- **Manufacturing**: EXCELLENT - Within target ({best_mfg:.2f} nm <= {target_mfg} nm)\n"
        elif best_mfg <= target_mfg * 1.5:
            report += f"- **Manufacturing**: GOOD - Close to target ({best_mfg:.2f} nm, target: {target_mfg} nm)\n"
        else:
            report += f"- **Manufacturing**: NEEDS IMPROVEMENT - Above target ({best_mfg:.2f} nm, target: {target_mfg} nm)\n"

    report += """
### Next Steps

1. **If optimization is running**: Monitor convergence in the dashboard
2. **If converged**: Validate best design with detailed FEA analysis
3. **If targets not met**: Consider:
   - Expanding design variable ranges
   - Adding more design variables
   - Increasing FEA validation budget

---

## 8. Files Generated

| File | Description |
|------|-------------|
| `3_results/study.db` | Optuna database with all trials |
| `3_results/adaptive_state.json` | Iteration-by-iteration state |
| `3_results/surrogate_*.pt` | Neural model checkpoints |
| `3_results/optimization.log` | Detailed execution log |

---

## 9. Dashboard Visualization

### Trial Source Differentiation

| Trial Type | Marker | Color |
|------------|--------|-------|
| FEA | Circle | Blue (#2196F3) |
| NN | Cross | Orange (#FF9800) |

### Access Points

| Dashboard | URL | Purpose |
|-----------|-----|---------|
| Atomizer Dashboard | http://localhost:3000 | Real-time monitoring, charts |
| Optuna Dashboard | http://localhost:8081 | Trial history, Pareto analysis |

---

*Report auto-generated by Atomizer V11 Report Generator*
*Last updated: """ + now + "*\n"

    return report


def main():
    parser = argparse.ArgumentParser(description='Generate V11 optimization report')
    parser.add_argument('--output', '-o', default='STUDY_REPORT.md',
                       help='Output file path (default: STUDY_REPORT.md)')
    args = parser.parse_args()

    # Paths
    study_dir = Path(__file__).parent
    config_path = study_dir / '1_setup' / 'optimization_config.json'
    state_path = study_dir / '3_results' / 'adaptive_state.json'
    db_path = study_dir / '3_results' / 'study.db'
    output_path = study_dir / args.output

    print(f"Loading configuration from {config_path}...")
    config = load_config(config_path)

    print(f"Loading adaptive state from {state_path}...")
    adaptive_state = load_adaptive_state(state_path)

    print(f"Loading trials from {db_path}...")
    trials_data = get_trials_from_optuna(db_path)

    print(f"Generating report...")
    report = generate_report(config, adaptive_state, trials_data)

    print(f"Writing report to {output_path}...")
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(report)

    print(f"\nReport generated successfully!")
    print(f"  - V10 FEA trials: {len(trials_data['v10_fea'])}")
    print(f"  - V11 FEA trials: {len(trials_data['v11_fea'])}")
    print(f"  - V11 NN trials: {len(trials_data['v11_nn'])}")
    print(f"  - Output: {output_path}")


if __name__ == '__main__':
    main()