Atomizer/optimization_engine/method_selector.py

"""
Adaptive Method Selector for Atomizer Optimization

This module provides intelligent method selection based on:
1. Problem characteristics (static analysis from config)
2. Early exploration metrics (dynamic analysis from first N trials)
3. Runtime performance metrics (continuous monitoring)

Classes:
- ProblemProfiler: Analyzes optimization config to extract problem characteristics
- EarlyMetricsCollector: Computes metrics from initial FEA trials
- AdaptiveMethodSelector: Recommends optimization method and parameters
- RuntimeAdvisor: Monitors optimization and suggests pivots

Usage:
    from optimization_engine.method_selector import AdaptiveMethodSelector

    selector = AdaptiveMethodSelector()
    recommendation = selector.recommend(config_path)
    print(recommendation['method'])  # 'turbo', 'hybrid_loop', 'pure_fea', etc.
"""

import json
import numpy as np
from pathlib import Path
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, field, asdict
from enum import Enum
import sqlite3
from datetime import datetime


class OptimizationMethod(Enum):
    """Available optimization methods."""
    PURE_FEA = "pure_fea"
    HYBRID_LOOP = "hybrid_loop"
    TURBO = "turbo"
    GNN_FIELD = "gnn_field"


@dataclass
class ProblemProfile:
    """Static problem characteristics extracted from config."""

    # Design space
    n_variables: int = 0
    variable_names: List[str] = field(default_factory=list)
    variable_bounds: Dict[str, Tuple[float, float]] = field(default_factory=dict)
    variable_types: Dict[str, str] = field(default_factory=dict)  # 'continuous', 'discrete', 'categorical'
    design_space_volume: float = 0.0  # Product of all ranges

    # Objectives
    n_objectives: int = 0
    objective_names: List[str] = field(default_factory=list)
    objective_goals: Dict[str, str] = field(default_factory=dict)  # 'minimize', 'maximize'

    # Constraints
    n_constraints: int = 0
    constraint_types: List[str] = field(default_factory=list)  # 'less_than', 'greater_than', 'equal'

    # Budget estimates
    fea_time_estimate: float = 300.0  # seconds per FEA run
    total_budget_hours: float = 8.0
    max_fea_trials: int = 0  # Computed from budget

    # Complexity indicators
    is_multi_objective: bool = False
    has_constraints: bool = False
    expected_nonlinearity: str = "unknown"  # 'low', 'medium', 'high', 'unknown'

    # Neural acceleration hints
    nn_enabled_in_config: bool = False
    min_training_points: int = 50

    def to_dict(self) -> dict:
        return asdict(self)


@dataclass
class EarlyMetrics:
    """Metrics computed from initial FEA exploration."""

    n_trials_analyzed: int = 0

    # Objective statistics
    objective_means: Dict[str, float] = field(default_factory=dict)
    objective_stds: Dict[str, float] = field(default_factory=dict)
    objective_ranges: Dict[str, Tuple[float, float]] = field(default_factory=dict)
    coefficient_of_variation: Dict[str, float] = field(default_factory=dict)  # std/mean

    # Correlation analysis
    objective_correlations: Dict[str, float] = field(default_factory=dict)  # pairwise
    variable_objective_correlations: Dict[str, Dict[str, float]] = field(default_factory=dict)

    # Feasibility
    feasibility_rate: float = 1.0
    n_feasible: int = 0
    n_infeasible: int = 0

    # Pareto analysis (multi-objective)
    pareto_front_size: int = 0
    pareto_growth_rate: float = 0.0  # New Pareto points per trial

    # Response smoothness (NN suitability)
    response_smoothness: float = 0.5  # 0-1, higher = smoother
    lipschitz_estimate: Dict[str, float] = field(default_factory=dict)

    # Variable sensitivity
    variable_sensitivity: Dict[str, float] = field(default_factory=dict)  # Variance-based
    most_sensitive_variable: str = ""

    # Clustering
    design_clustering: str = "unknown"  # 'clustered', 'scattered', 'unknown'

    # NN fit quality (if trained)
    nn_accuracy: Optional[float] = None  # R² or similar
    nn_mean_error: Optional[Dict[str, float]] = None

    def to_dict(self) -> dict:
        return asdict(self)


@dataclass
class RuntimeMetrics:
    """Metrics collected during optimization runtime."""

    timestamp: str = ""
    trials_completed: int = 0

    # Performance
    fea_time_mean: float = 0.0
    fea_time_std: float = 0.0
    fea_failure_rate: float = 0.0

    # Progress
    pareto_size: int = 0
    pareto_growth_rate: float = 0.0
    best_objectives: Dict[str, float] = field(default_factory=dict)
    improvement_rate: float = 0.0  # Best objective improvement per trial

    # NN performance (if using hybrid/turbo)
    nn_accuracy: Optional[float] = None
    nn_accuracy_trend: str = "stable"  # 'improving', 'stable', 'declining'
    nn_predictions_count: int = 0

    # Exploration vs exploitation
    exploration_ratio: float = 0.5  # How much of design space explored

    def to_dict(self) -> dict:
        return asdict(self)


@dataclass
class MethodRecommendation:
    """Output from the method selector."""

    method: str
    confidence: float  # 0-1
    parameters: Dict[str, Any] = field(default_factory=dict)
    reasoning: str = ""
    alternatives: List[Dict[str, Any]] = field(default_factory=list)
    warnings: List[str] = field(default_factory=list)

    def to_dict(self) -> dict:
        return asdict(self)


@dataclass
class NNQualityMetrics:
    """NN surrogate quality metrics with relative thresholds.

    Key insight: NN error should be compared to the coefficient of variation (CV)
    of each objective to determine if the NN is learning the physics properly.

    - If nn_error >> CV → NN is unreliable (not learning, just noise)
    - If nn_error ≈ CV → NN captures the trend (hybrid recommended)
    - If nn_error << CV → NN is excellent (turbo viable)
    """

    has_nn_data: bool = False
    n_validations: int = 0

    # Per-objective metrics
    nn_errors: Dict[str, float] = field(default_factory=dict)  # Absolute % error
    cv_ratios: Dict[str, float] = field(default_factory=dict)  # nn_error / (CV * 100)
    expected_errors: Dict[str, float] = field(default_factory=dict)  # Based on physics type

    # Overall quality scores (0-1, higher = better)
    overall_quality: float = 0.5
    turbo_suitability: float = 0.0
    hybrid_suitability: float = 0.5

    # Physics type classification used
    objective_types: Dict[str, str] = field(default_factory=dict)  # 'linear', 'smooth', 'nonlinear', 'chaotic'

    def to_dict(self) -> dict:
        return asdict(self)


class NNQualityAssessor:
    """Assesses NN surrogate quality relative to problem complexity.

    Uses physics-based expected error thresholds rather than absolute values.
    The key metric is the CV ratio: nn_error / coefficient_of_variation.

    CV Ratio Interpretation:
    - < 0.5  → NN is excellent (captures physics well beyond noise)
    - 0.5-1  → NN is good (adds value for exploration)
    - 1-2    → NN is marginal (use with validation)
    - > 2    → NN is poor (not learning physics, use FEA)
    """

    # Physics-based expected error thresholds
    PHYSICS_THRESHOLDS = {
        'linear': {'max_error': 2.0, 'cv_ratio_max': 0.5},      # mass, volume - deterministic
        'smooth': {'max_error': 5.0, 'cv_ratio_max': 1.0},      # frequency, avg stress
        'nonlinear': {'max_error': 10.0, 'cv_ratio_max': 2.0},  # max stress, stiffness
        'chaotic': {'max_error': 20.0, 'cv_ratio_max': 3.0},    # contact, buckling, fracture
    }

    # Objective name to physics type classification
    OBJECTIVE_CLASSIFICATION = {
        # Linear (deterministic, easy to learn)
        'mass': 'linear',
        'volume': 'linear',
        'weight': 'linear',
        'area': 'linear',

        # Smooth (well-behaved, moderate difficulty)
        'frequency': 'smooth',
        'fundamental_frequency': 'smooth',
        'first_frequency': 'smooth',
        'avg_stress': 'smooth',
        'mean_stress': 'smooth',
        'displacement': 'smooth',
        'avg_displacement': 'smooth',
        'compliance': 'smooth',

        # Nonlinear (sensitive to details, harder to learn)
        'stress': 'nonlinear',
        'max_stress': 'nonlinear',
        'von_mises': 'nonlinear',
        'stiffness': 'nonlinear',
        'max_displacement': 'nonlinear',
        'strain_energy': 'nonlinear',

        # Chaotic (highly nonlinear, very hard to learn)
        'buckling': 'chaotic',
        'contact_force': 'chaotic',
        'fracture': 'chaotic',
        'fatigue': 'chaotic',
    }

    def __init__(self):
        pass

    def collect(self, results_dir: Path, objective_names: List[str],
                early_metrics: EarlyMetrics) -> NNQualityMetrics:
        """Collect NN quality metrics from validation reports and database.

        Args:
            results_dir: Path to 2_results directory
            objective_names: List of objective names from config
            early_metrics: EarlyMetrics with coefficient_of_variation data

        Returns:
            NNQualityMetrics with quality scores and recommendations
        """
        metrics = NNQualityMetrics()

        # 1. Try validation_report.json first (most reliable - has explicit FEA comparison)
        validation_report = results_dir / "validation_report.json"
        if validation_report.exists():
            self._load_from_validation_report(validation_report, metrics, objective_names)

        # 2. Try turbo_report.json (has per-iteration errors)
        turbo_report = results_dir / "turbo_report.json"
        if turbo_report.exists() and not metrics.has_nn_data:
            self._load_from_turbo_report(turbo_report, metrics, objective_names)

        # 3. Query Optuna database for nn_error_percent user attributes
        db_path = results_dir / "study.db"
        if db_path.exists() and not metrics.has_nn_data:
            self._load_from_database(db_path, metrics, objective_names)

        # 4. Compute relative metrics using CV from early_metrics
        if metrics.has_nn_data and early_metrics.coefficient_of_variation:
            self._compute_relative_metrics(metrics, early_metrics, objective_names)

        return metrics

    def _load_from_validation_report(self, report_path: Path, metrics: NNQualityMetrics,
                                      objective_names: List[str]):
        """Load NN error data from validation_report.json."""
        try:
            with open(report_path) as f:
                report = json.load(f)

            metrics.n_validations = report.get('n_validated', 0)

            # Get average errors per objective
            avg_errors = report.get('average_errors_percent', {})
            if avg_errors:
                metrics.has_nn_data = True
                for obj_name in objective_names:
                    # Try exact match or partial match
                    error = avg_errors.get(obj_name)
                    if error is None:
                        # Try partial match (e.g., 'mass' in 'total_mass')
                        for key, val in avg_errors.items():
                            if obj_name.lower() in key.lower() or key.lower() in obj_name.lower():
                                error = val
                                break
                    if error is not None:
                        metrics.nn_errors[obj_name] = float(error)

        except Exception as e:
            pass  # Silently fail, try other sources

    def _load_from_turbo_report(self, report_path: Path, metrics: NNQualityMetrics,
                                 objective_names: List[str]):
        """Load NN error data from turbo_report.json."""
        try:
            with open(report_path) as f:
                report = json.load(f)

            metrics.n_validations = report.get('fea_validations', 0)
            best_solutions = report.get('best_solutions', [])

            if best_solutions:
                metrics.has_nn_data = True

                # Collect errors from all iterations
                all_errors = []
                for sol in best_solutions:
                    nn_error = sol.get('nn_error', [])
                    if nn_error:
                        all_errors.append(nn_error)

                if all_errors:
                    # Average across all validations
                    avg_errors = np.mean(all_errors, axis=0)
                    # Map to objective names (turbo only tracks mass, stress typically)
                    for i, obj_name in enumerate(objective_names[:len(avg_errors)]):
                        metrics.nn_errors[obj_name] = float(avg_errors[i])

        except Exception as e:
            pass

    def _load_from_database(self, db_path: Path, metrics: NNQualityMetrics,
                            objective_names: List[str]):
        """Load NN error data from Optuna database user attributes."""
        try:
            conn = sqlite3.connect(str(db_path))
            cursor = conn.cursor()

            # Query nn_error_percent from trial_user_attributes
            cursor.execute("""
                SELECT value_json FROM trial_user_attributes
                WHERE key = 'nn_error_percent'
            """)

            all_errors = []
            for (value_json,) in cursor.fetchall():
                try:
                    errors = json.loads(value_json)
                    if isinstance(errors, list):
                        all_errors.append(errors)
                except:
                    pass

            conn.close()

            if all_errors:
                metrics.has_nn_data = True
                metrics.n_validations = len(all_errors)

                # Average across all validated trials
                avg_errors = np.mean(all_errors, axis=0)
                for i, obj_name in enumerate(objective_names[:len(avg_errors)]):
                    metrics.nn_errors[obj_name] = float(avg_errors[i])

        except Exception as e:
            pass

    def _classify_objective(self, obj_name: str) -> str:
        """Classify objective by physics type."""
        # Check exact match first
        if obj_name in self.OBJECTIVE_CLASSIFICATION:
            return self.OBJECTIVE_CLASSIFICATION[obj_name]

        # Check partial match
        obj_lower = obj_name.lower()
        for key, obj_type in self.OBJECTIVE_CLASSIFICATION.items():
            if key in obj_lower or obj_lower in key:
                return obj_type

        # Default to 'smooth' if unknown
        return 'smooth'

    def _compute_relative_metrics(self, metrics: NNQualityMetrics,
                                   early_metrics: EarlyMetrics,
                                   objective_names: List[str]):
        """Compute NN error relative to objective variability (CV)."""

        for obj_name in objective_names:
            nn_error = metrics.nn_errors.get(obj_name)
            if nn_error is None:
                continue

            cv = early_metrics.coefficient_of_variation.get(obj_name, 0.1)

            # Compute CV ratio (nn_error is %, cv is fraction)
            # CV ratio = how many times larger is NN error than natural variability
            if cv > 0.001:
                cv_ratio = nn_error / (cv * 100)
            else:
                # Very low CV means linear/deterministic - use absolute error
                cv_ratio = nn_error / 2.0  # Normalize to 2% baseline

            metrics.cv_ratios[obj_name] = cv_ratio

            # Classify and store
            obj_type = self._classify_objective(obj_name)
            metrics.objective_types[obj_name] = obj_type
            metrics.expected_errors[obj_name] = self.PHYSICS_THRESHOLDS[obj_type]['max_error']

        # Compute overall quality scores
        self._compute_quality_scores(metrics)

    def _compute_quality_scores(self, metrics: NNQualityMetrics):
        """Compute overall quality scores based on relative metrics."""

        if not metrics.cv_ratios:
            return

        quality_scores = []
        turbo_scores = []
        hybrid_scores = []

        for obj_name, cv_ratio in metrics.cv_ratios.items():
            obj_type = metrics.objective_types.get(obj_name, 'smooth')
            threshold = self.PHYSICS_THRESHOLDS[obj_type]

            # Quality: how well does NN error compare to expected max?
            nn_error = metrics.nn_errors.get(obj_name, 0)
            expected = threshold['max_error']
            # Use sqrt to be less harsh on errors close to threshold
            quality = max(0, min(1, 1 - (nn_error / expected) ** 0.5)) if expected > 0 else 0.5
            quality_scores.append(quality)

            # Turbo suitability: cv_ratio should be < cv_ratio_max
            # Lower ratio = better (NN captures more than noise)
            cv_max = threshold['cv_ratio_max']
            turbo = max(0, min(1, 1 - cv_ratio / cv_max)) if cv_max > 0 else 0.5
            turbo_scores.append(turbo)

            # Hybrid suitability: more lenient threshold (2x)
            # NN just needs to add some value
            hybrid = max(0, min(1, 1 - cv_ratio / (cv_max * 2))) if cv_max > 0 else 0.5
            hybrid_scores.append(hybrid)

        metrics.overall_quality = float(np.mean(quality_scores)) if quality_scores else 0.5
        metrics.turbo_suitability = float(np.mean(turbo_scores)) if turbo_scores else 0.0
        metrics.hybrid_suitability = float(np.mean(hybrid_scores)) if hybrid_scores else 0.5


class ProblemProfiler:
    """Analyzes optimization config to extract problem characteristics."""

    def __init__(self):
        self.profile = ProblemProfile()

    def analyze(self, config: dict) -> ProblemProfile:
        """
        Analyze optimization config and return problem profile.

        Args:
            config: Loaded optimization_config.json dict

        Returns:
            ProblemProfile with extracted characteristics
        """
        profile = ProblemProfile()

        # Extract design variables
        design_vars = config.get('design_variables', [])
        profile.n_variables = len(design_vars)
        profile.variable_names = [v['parameter'] for v in design_vars]

        volume = 1.0
        for var in design_vars:
            name = var['parameter']
            bounds = var.get('bounds', [0, 1])
            profile.variable_bounds[name] = (bounds[0], bounds[1])
            profile.variable_types[name] = var.get('type', 'continuous')
            volume *= (bounds[1] - bounds[0])
        profile.design_space_volume = volume

        # Extract objectives
        objectives = config.get('objectives', [])
        profile.n_objectives = len(objectives)
        profile.objective_names = [o['name'] for o in objectives]
        profile.objective_goals = {o['name']: o.get('goal', 'minimize') for o in objectives}
        profile.is_multi_objective = profile.n_objectives > 1

        # Extract constraints
        constraints = config.get('constraints', [])
        profile.n_constraints = len(constraints)
        profile.constraint_types = [c.get('type', 'less_than') for c in constraints]
        profile.has_constraints = profile.n_constraints > 0

        # Budget estimates
        opt_settings = config.get('optimization_settings', {})
        profile.fea_time_estimate = opt_settings.get('timeout_per_trial', 300)
        profile.total_budget_hours = opt_settings.get('budget_hours', 8)

        if profile.fea_time_estimate > 0:
            profile.max_fea_trials = int(
                (profile.total_budget_hours * 3600) / profile.fea_time_estimate
            )

        # Neural acceleration config
        nn_config = config.get('neural_acceleration', {})
        profile.nn_enabled_in_config = nn_config.get('enabled', False)
        profile.min_training_points = nn_config.get('min_training_points', 50)

        # Infer nonlinearity from physics type
        sim_config = config.get('simulation', {})
        analysis_types = sim_config.get('analysis_types', [])

        if 'modal' in analysis_types or 'frequency' in str(analysis_types).lower():
            profile.expected_nonlinearity = 'medium'
        elif 'nonlinear' in str(analysis_types).lower():
            profile.expected_nonlinearity = 'high'
        else:
            profile.expected_nonlinearity = 'low'  # Static linear

        self.profile = profile
        return profile

    def analyze_from_file(self, config_path: Path) -> ProblemProfile:
        """Load config from file and analyze."""
        with open(config_path) as f:
            config = json.load(f)
        return self.analyze(config)


class EarlyMetricsCollector:
    """Computes metrics from initial FEA exploration trials."""

    def __init__(self, min_trials: int = 20):
        self.min_trials = min_trials
        self.metrics = EarlyMetrics()

    def collect(self, db_path: Path, objective_names: List[str],
                variable_names: List[str], constraints: List[dict] = None) -> EarlyMetrics:
        """
        Collect metrics from study database.

        Args:
            db_path: Path to study.db
            objective_names: List of objective column names
            variable_names: List of design variable names
            constraints: List of constraint definitions from config

        Returns:
            EarlyMetrics with computed statistics
        """
        metrics = EarlyMetrics()

        if not db_path.exists():
            return metrics

        # Load data from Optuna database
        conn = sqlite3.connect(str(db_path))
        cursor = conn.cursor()

        try:
            # Get completed trials from Optuna database
            # Note: Optuna stores params in trial_params and objectives in trial_values
            cursor.execute("""
                SELECT trial_id FROM trials
                WHERE state = 'COMPLETE'
            """)
            completed_trials = cursor.fetchall()
            metrics.n_trials_analyzed = len(completed_trials)

            if metrics.n_trials_analyzed < self.min_trials:
                conn.close()
                return metrics

            # Extract trial data from trial_params and trial_values tables
            trial_data = []
            for (trial_id,) in completed_trials:
                values = {}

                # Get parameters
                cursor.execute("""
                    SELECT param_name, param_value FROM trial_params
                    WHERE trial_id = ?
                """, (trial_id,))
                for name, value in cursor.fetchall():
                    try:
                        values[name] = float(value) if value is not None else None
                    except:
                        pass

                # Get objectives from trial_values
                cursor.execute("""
                    SELECT objective, value FROM trial_values
                    WHERE trial_id = ?
                """, (trial_id,))
                for idx, value in cursor.fetchall():
                    if idx < len(objective_names):
                        values[objective_names[idx]] = float(value) if value is not None else None

                if values:
                    trial_data.append(values)

            if not trial_data:
                conn.close()
                return metrics

            # Compute objective statistics
            for obj_name in objective_names:
                obj_values = [t.get(obj_name) for t in trial_data if t.get(obj_name) is not None]
                if obj_values:
                    metrics.objective_means[obj_name] = np.mean(obj_values)
                    metrics.objective_stds[obj_name] = np.std(obj_values)
                    metrics.objective_ranges[obj_name] = (min(obj_values), max(obj_values))

                    if metrics.objective_means[obj_name] != 0:
                        metrics.coefficient_of_variation[obj_name] = (
                            metrics.objective_stds[obj_name] /
                            abs(metrics.objective_means[obj_name])
                        )

            # Compute correlations between objectives
            if len(objective_names) >= 2:
                for i, obj1 in enumerate(objective_names):
                    for obj2 in objective_names[i+1:]:
                        vals1 = [t.get(obj1) for t in trial_data]
                        vals2 = [t.get(obj2) for t in trial_data]

                        # Filter out None values
                        paired = [(v1, v2) for v1, v2 in zip(vals1, vals2)
                                 if v1 is not None and v2 is not None]

                        if len(paired) > 5:
                            v1, v2 = zip(*paired)
                            corr = np.corrcoef(v1, v2)[0, 1]
                            metrics.objective_correlations[f"{obj1}_vs_{obj2}"] = corr

            # Compute variable-objective correlations (sensitivity)
            for var_name in variable_names:
                metrics.variable_objective_correlations[var_name] = {}
                var_values = [t.get(var_name) for t in trial_data]

                for obj_name in objective_names:
                    obj_values = [t.get(obj_name) for t in trial_data]

                    paired = [(v, o) for v, o in zip(var_values, obj_values)
                             if v is not None and o is not None]

                    if len(paired) > 5:
                        v, o = zip(*paired)
                        corr = abs(np.corrcoef(v, o)[0, 1])
                        metrics.variable_objective_correlations[var_name][obj_name] = corr

            # Compute overall variable sensitivity (average absolute correlation)
            for var_name in variable_names:
                correlations = list(metrics.variable_objective_correlations.get(var_name, {}).values())
                if correlations:
                    metrics.variable_sensitivity[var_name] = np.mean(correlations)

            if metrics.variable_sensitivity:
                metrics.most_sensitive_variable = max(
                    metrics.variable_sensitivity,
                    key=metrics.variable_sensitivity.get
                )

            # Estimate response smoothness
            # Higher CV suggests rougher landscape
            avg_cv = np.mean(list(metrics.coefficient_of_variation.values())) if metrics.coefficient_of_variation else 0.5
            metrics.response_smoothness = max(0, min(1, 1 - avg_cv))

            # Feasibility analysis
            if constraints:
                n_feasible = 0
                for trial in trial_data:
                    feasible = True
                    for constraint in constraints:
                        c_name = constraint.get('name')
                        c_type = constraint.get('type', 'less_than')
                        threshold = constraint.get('threshold')

                        value = trial.get(c_name)
                        if value is not None and threshold is not None:
                            if c_type == 'less_than' and value > threshold:
                                feasible = False
                            elif c_type == 'greater_than' and value < threshold:
                                feasible = False

                    if feasible:
                        n_feasible += 1

                metrics.n_feasible = n_feasible
                metrics.n_infeasible = len(trial_data) - n_feasible
                metrics.feasibility_rate = n_feasible / len(trial_data) if trial_data else 1.0

            conn.close()

        except Exception as e:
            print(f"Warning: Error collecting metrics: {e}")
            conn.close()

        self.metrics = metrics
        return metrics

    def estimate_nn_suitability(self) -> float:
        """
        Estimate how suitable the problem is for neural network acceleration.

        Returns:
            Score from 0-1, higher = more suitable
        """
        score = 0.5  # Base score

        # Smooth response is good for NN
        score += 0.2 * self.metrics.response_smoothness

        # High feasibility is good
        score += 0.1 * self.metrics.feasibility_rate

        # Enough training data
        if self.metrics.n_trials_analyzed >= 50:
            score += 0.1
        if self.metrics.n_trials_analyzed >= 100:
            score += 0.1

        return min(1.0, max(0.0, score))


class AdaptiveMethodSelector:
    """
    Recommends optimization method based on problem characteristics and metrics.

    The selector uses a scoring system to rank methods:
    - Each method starts with a base score
    - Scores are adjusted based on problem characteristics
    - Early metrics further refine the recommendation
    - NN quality metrics adjust confidence based on actual surrogate performance
    """

    def __init__(self):
        self.profiler = ProblemProfiler()
        self.metrics_collector = EarlyMetricsCollector()
        self.nn_quality_assessor = NNQualityAssessor()

        # Method base scores (can be tuned based on historical performance)
        self.base_scores = {
            OptimizationMethod.PURE_FEA: 0.5,
            OptimizationMethod.HYBRID_LOOP: 0.6,
            OptimizationMethod.TURBO: 0.7,
            OptimizationMethod.GNN_FIELD: 0.4,
        }

        # Store last metrics for reporting
        self.last_nn_quality: Optional[NNQualityMetrics] = None
        self.last_early_metrics: Optional[EarlyMetrics] = None

    def recommend(self, config: dict, db_path: Path = None,
                  early_metrics: EarlyMetrics = None,
                  results_dir: Path = None) -> MethodRecommendation:
        """
        Generate method recommendation.

        Args:
            config: Optimization config dict
            db_path: Optional path to existing study.db for early metrics
            early_metrics: Pre-computed early metrics (optional)
            results_dir: Optional path to 2_results directory for NN quality data

        Returns:
            MethodRecommendation with method, confidence, and parameters
        """
        # Profile the problem
        profile = self.profiler.analyze(config)

        # Collect early metrics if database exists
        if db_path and db_path.exists() and early_metrics is None:
            early_metrics = self.metrics_collector.collect(
                db_path,
                profile.objective_names,
                profile.variable_names,
                config.get('constraints', [])
            )

        # Collect NN quality metrics if results directory exists
        nn_quality = None
        if results_dir is None and db_path:
            results_dir = db_path.parent  # study.db is typically in 2_results

        if results_dir and results_dir.exists() and early_metrics:
            nn_quality = self.nn_quality_assessor.collect(
                results_dir,
                profile.objective_names,
                early_metrics
            )
            self.last_nn_quality = nn_quality

        # Store early_metrics for reporting
        self.last_early_metrics = early_metrics

        # Score each method (now includes NN quality)
        scores = self._score_methods(profile, early_metrics, nn_quality)

        # Sort by score
        ranked = sorted(scores.items(), key=lambda x: x[1]['score'], reverse=True)

        # Build recommendation
        best_method, best_info = ranked[0]

        recommendation = MethodRecommendation(
            method=best_method.value,
            confidence=min(1.0, best_info['score']),
            parameters=self._get_parameters(best_method, profile, early_metrics),
            reasoning=best_info['reason'],
            alternatives=[
                {
                    'method': m.value,
                    'confidence': min(1.0, info['score']),
                    'reason': info['reason']
                }
                for m, info in ranked[1:3]
            ],
            warnings=self._get_warnings(profile, early_metrics, nn_quality)
        )

        return recommendation

    def _score_methods(self, profile: ProblemProfile,
                       metrics: EarlyMetrics = None,
                       nn_quality: NNQualityMetrics = None) -> Dict[OptimizationMethod, Dict]:
        """Score each method based on problem characteristics and NN quality."""

        scores = {}

        for method in OptimizationMethod:
            score = self.base_scores[method]
            reasons = []

            # === TURBO MODE ===
            if method == OptimizationMethod.TURBO:
                # Good for: low-dimensional, smooth, sufficient budget, good NN quality

                if profile.n_variables <= 5:
                    score += 0.15
                    reasons.append("low-dimensional design space")
                elif profile.n_variables > 10:
                    score -= 0.2
                    reasons.append("high-dimensional (may struggle)")

                if profile.max_fea_trials >= 50:
                    score += 0.1
                    reasons.append("sufficient FEA budget")
                else:
                    score -= 0.15
                    reasons.append("limited FEA budget")

                if metrics and metrics.response_smoothness > 0.7:
                    score += 0.15
                    reasons.append(f"smooth landscape ({metrics.response_smoothness:.0%})")
                elif metrics and metrics.response_smoothness < 0.4:
                    score -= 0.2
                    reasons.append(f"rough landscape ({metrics.response_smoothness:.0%})")

                # NEW: NN Quality-based adjustments using relative thresholds
                if nn_quality and nn_quality.has_nn_data:
                    if nn_quality.turbo_suitability > 0.8:
                        score += 0.25
                        reasons.append(f"excellent NN quality ({nn_quality.turbo_suitability:.0%})")
                    elif nn_quality.turbo_suitability > 0.5:
                        score += 0.1
                        reasons.append(f"good NN quality ({nn_quality.turbo_suitability:.0%})")
                    elif nn_quality.turbo_suitability < 0.3:
                        score -= 0.25
                        reasons.append(f"poor NN quality ({nn_quality.turbo_suitability:.0%}) - use hybrid")

                    # Per-objective warnings for high CV ratios
                    for obj, cv_ratio in nn_quality.cv_ratios.items():
                        if cv_ratio > 2.0:
                            score -= 0.1
                            reasons.append(f"{obj}: NN error >> variability")
                elif metrics and metrics.nn_accuracy and metrics.nn_accuracy > 0.9:
                    score += 0.1
                    reasons.append(f"excellent NN fit ({metrics.nn_accuracy:.0%})")

            # === HYBRID LOOP ===
            elif method == OptimizationMethod.HYBRID_LOOP:
                # Good for: moderate complexity, unknown landscape, need safety

                if 3 <= profile.n_variables <= 10:
                    score += 0.1
                    reasons.append("moderate dimensionality")

                if metrics and 0.4 < metrics.response_smoothness < 0.8:
                    score += 0.1
                    reasons.append("uncertain landscape - hybrid adapts")

                if profile.has_constraints and metrics and metrics.feasibility_rate < 0.9:
                    score += 0.1
                    reasons.append("constrained problem - safer approach")

                if profile.max_fea_trials >= 30:
                    score += 0.05
                    reasons.append("adequate budget for iterations")

                # NEW: NN Quality adjustments for hybrid
                if nn_quality and nn_quality.has_nn_data:
                    if nn_quality.hybrid_suitability > 0.5:
                        score += 0.15
                        reasons.append("NN adds value with periodic retraining")
                    if nn_quality.turbo_suitability < 0.5:
                        score += 0.1
                        reasons.append("NN quality suggests hybrid over turbo")

            # === PURE FEA ===
            elif method == OptimizationMethod.PURE_FEA:
                # Good for: small budget, highly nonlinear, rough landscape

                if profile.max_fea_trials < 30:
                    score += 0.2
                    reasons.append("limited budget - no NN overhead")

                if metrics and metrics.response_smoothness < 0.3:
                    score += 0.2
                    reasons.append("rough landscape - NN unreliable")

                if profile.expected_nonlinearity == 'high':
                    score += 0.15
                    reasons.append("highly nonlinear physics")

                if metrics and metrics.feasibility_rate < 0.5:
                    score += 0.1
                    reasons.append("many infeasible designs - need accurate FEA")

                # NEW: NN Quality - if NN is truly poor, favor pure FEA
                if nn_quality and nn_quality.has_nn_data:
                    if nn_quality.hybrid_suitability < 0.3:
                        score += 0.2
                        reasons.append("NN quality too low - prefer FEA")

            # === GNN FIELD ===
            elif method == OptimizationMethod.GNN_FIELD:
                # Good for: high-dimensional, need field visualization

                if profile.n_variables > 10:
                    score += 0.2
                    reasons.append("high-dimensional - GNN handles well")

                # GNN is more advanced, only recommend if specifically needed
                if profile.n_variables <= 5:
                    score -= 0.1
                    reasons.append("simple problem - MLP sufficient")

            # Compile reason string
            reason = "; ".join(reasons) if reasons else "default recommendation"
            scores[method] = {'score': score, 'reason': reason}

        return scores

    def _get_parameters(self, method: OptimizationMethod,
                        profile: ProblemProfile,
                        metrics: EarlyMetrics = None) -> Dict[str, Any]:
        """Generate recommended parameters for the selected method."""

        params = {}

        if method == OptimizationMethod.TURBO:
            # Scale NN trials based on dimensionality
            base_nn_trials = 5000
            if profile.n_variables <= 2:
                nn_trials = base_nn_trials
            elif profile.n_variables <= 5:
                nn_trials = base_nn_trials * 2
            else:
                nn_trials = base_nn_trials * 3

            params = {
                'nn_trials': nn_trials,
                'batch_size': 100,
                'retrain_every': 10,
                'epochs': 150 if metrics and metrics.n_trials_analyzed > 100 else 200
            }

        elif method == OptimizationMethod.HYBRID_LOOP:
            params = {
                'iterations': 5,
                'nn_trials_per_iter': 500,
                'validate_per_iter': 5,
                'epochs': 300
            }

        elif method == OptimizationMethod.PURE_FEA:
            # Choose sampler based on objectives
            if profile.is_multi_objective:
                sampler = 'NSGAIISampler'
            else:
                sampler = 'TPESampler'

            params = {
                'sampler': sampler,
                'n_trials': min(100, profile.max_fea_trials),
                'timeout_per_trial': profile.fea_time_estimate
            }

        elif method == OptimizationMethod.GNN_FIELD:
            params = {
                'model_type': 'parametric_gnn',
                'initial_fea_trials': 100,
                'nn_trials': 10000,
                'epochs': 200
            }

        return params

    def _get_warnings(self, profile: ProblemProfile,
                      metrics: EarlyMetrics = None,
                      nn_quality: NNQualityMetrics = None) -> List[str]:
        """Generate warnings about potential issues."""

        warnings = []

        if profile.n_variables > 10:
            warnings.append(
                f"High-dimensional problem ({profile.n_variables} variables) - "
                "consider dimensionality reduction or Latin Hypercube sampling"
            )

        if profile.max_fea_trials < 20:
            warnings.append(
                f"Very limited FEA budget ({profile.max_fea_trials} trials) - "
                "neural acceleration may not have enough training data"
            )

        if metrics and metrics.feasibility_rate < 0.5:
            warnings.append(
                f"Low feasibility rate ({metrics.feasibility_rate:.0%}) - "
                "consider relaxing constraints or narrowing design space"
            )

        if metrics and metrics.response_smoothness < 0.3:
            warnings.append(
                f"Rough objective landscape detected - "
                "neural surrogate may have high prediction errors"
            )

        # NEW: NN Quality warnings
        if nn_quality and nn_quality.has_nn_data:
            # Per-objective quality warnings
            for obj_name, cv_ratio in nn_quality.cv_ratios.items():
                obj_type = nn_quality.objective_types.get(obj_name, 'smooth')
                nn_error = nn_quality.nn_errors.get(obj_name, 0)
                expected = nn_quality.expected_errors.get(obj_name, 5.0)

                if cv_ratio > 2.0:
                    warnings.append(
                        f"{obj_name}: NN error ({nn_error:.1f}%) >> variability - "
                        f"NN not learning physics well for this {obj_type} objective"
                    )
                elif nn_error > expected * 1.5:
                    warnings.append(
                        f"{obj_name}: NN error ({nn_error:.1f}%) above expected ({expected:.0f}%) - "
                        f"consider retraining or using hybrid mode"
                    )

        return warnings


class RuntimeAdvisor:
    """
    Monitors optimization runtime and suggests method pivots.

    Call check_pivot() periodically during optimization to get
    suggestions for method changes.
    """

    def __init__(self, check_interval: int = 10):
        """
        Args:
            check_interval: Check for pivots every N trials
        """
        self.check_interval = check_interval
        self.history: List[RuntimeMetrics] = []
        self.pivot_suggestions: List[Dict] = []

    def update(self, metrics: RuntimeMetrics):
        """Add new runtime metrics to history."""
        metrics.timestamp = datetime.now().isoformat()
        self.history.append(metrics)

    def check_pivot(self, current_method: str) -> Optional[Dict]:
        """
        Check if a method pivot should be suggested.

        Args:
            current_method: Currently running method

        Returns:
            Pivot suggestion dict or None
        """
        if len(self.history) < 2:
            return None

        latest = self.history[-1]
        previous = self.history[-2]

        suggestion = None

        # Check 1: NN accuracy declining
        if latest.nn_accuracy_trend == 'declining':
            if current_method == 'turbo':
                suggestion = {
                    'suggest_pivot': True,
                    'from': current_method,
                    'to': 'hybrid_loop',
                    'reason': 'NN accuracy declining - switch to hybrid for more frequent retraining',
                    'urgency': 'medium'
                }

        # Check 2: Pareto front stagnating
        if latest.pareto_growth_rate < 0.01 and previous.pareto_growth_rate < 0.01:
            suggestion = {
                'suggest_pivot': True,
                'from': current_method,
                'to': 'increase_exploration',
                'reason': 'Pareto front stagnating - consider increasing exploration',
                'urgency': 'low'
            }

        # Check 3: High FEA failure rate
        if latest.fea_failure_rate > 0.2:
            if current_method in ['turbo', 'hybrid_loop']:
                suggestion = {
                    'suggest_pivot': True,
                    'from': current_method,
                    'to': 'pure_fea',
                    'reason': f'High FEA failure rate ({latest.fea_failure_rate:.0%}) - NN exploring invalid regions',
                    'urgency': 'high'
                }

        # Check 4: Diminishing returns
        if latest.improvement_rate < 0.001 and latest.trials_completed > 100:
            suggestion = {
                'suggest_pivot': True,
                'from': current_method,
                'to': 'stop_early',
                'reason': 'Diminishing returns - consider stopping optimization',
                'urgency': 'low'
            }

        if suggestion:
            self.pivot_suggestions.append(suggestion)

        return suggestion

    def get_summary(self) -> Dict:
        """Get summary of runtime performance."""
        if not self.history:
            return {}

        latest = self.history[-1]

        return {
            'trials_completed': latest.trials_completed,
            'pareto_size': latest.pareto_size,
            'fea_time_mean': latest.fea_time_mean,
            'fea_failure_rate': latest.fea_failure_rate,
            'nn_accuracy': latest.nn_accuracy,
            'pivot_suggestions_count': len(self.pivot_suggestions)
        }


def print_recommendation(rec: MethodRecommendation, profile: ProblemProfile = None,
                         nn_quality: NNQualityMetrics = None, early_metrics: EarlyMetrics = None):
    """Pretty-print a method recommendation with NN quality assessment."""

    print("\n" + "=" * 70)
    print("               OPTIMIZATION METHOD ADVISOR")
    print("=" * 70)

    if profile:
        print("\nProblem Profile:")
        print(f"  Variables: {profile.n_variables} ({', '.join(profile.variable_names)})")
        print(f"  Objectives: {profile.n_objectives} ({', '.join(profile.objective_names)})")
        print(f"  Constraints: {profile.n_constraints}")
        print(f"  Max FEA budget: ~{profile.max_fea_trials} trials")

    # NN Quality Assessment Section
    if nn_quality and nn_quality.has_nn_data:
        print("\nNN Quality Assessment:")
        print(f"  Validations analyzed: {nn_quality.n_validations}")
        print()

        # Build table header
        print("  | Objective     | NN Error | CV     | Ratio | Type       | Quality |")
        print("  |---------------|----------|--------|-------|------------|---------|")

        for obj_name in nn_quality.nn_errors.keys():
            nn_error = nn_quality.nn_errors.get(obj_name, 0)
            cv_ratio = nn_quality.cv_ratios.get(obj_name, 0)
            obj_type = nn_quality.objective_types.get(obj_name, 'smooth')

            # Get CV from early_metrics if available
            cv_pct = 0.0
            if early_metrics and early_metrics.coefficient_of_variation:
                cv = early_metrics.coefficient_of_variation.get(obj_name, 0)
                cv_pct = cv * 100  # Convert to percentage

            # Quality indicator
            if cv_ratio < 0.5:
                quality = "✓ Great"
            elif cv_ratio < 1.0:
                quality = "✓ Good"
            elif cv_ratio < 2.0:
                quality = "~ OK"
            else:
                quality = "✗ Poor"

            # Format row
            print(f"  | {obj_name[:13]:<13} | {nn_error:>6.1f}% | {cv_pct:>5.1f}% | {cv_ratio:>5.2f} | {obj_type:<10} | {quality:<7} |")

        print()
        print(f"  Overall Quality: {nn_quality.overall_quality:.0%}")
        print(f"  Turbo Suitability: {nn_quality.turbo_suitability:.0%}")
        print(f"  Hybrid Suitability: {nn_quality.hybrid_suitability:.0%}")

    print("\n" + "-" * 70)
    print(f"\n  RECOMMENDED: {rec.method.upper()}")
    print(f"  Confidence: {rec.confidence:.0%}")
    print(f"  Reason: {rec.reasoning}")

    print("\n  Suggested parameters:")
    for key, value in rec.parameters.items():
        print(f"    --{key.replace('_', '-')}: {value}")

    if rec.alternatives:
        print("\n  Alternatives:")
        for alt in rec.alternatives:
            print(f"    - {alt['method']} ({alt['confidence']:.0%}): {alt['reason']}")

    if rec.warnings:
        print("\n  Warnings:")
        for warning in rec.warnings:
            print(f"    ! {warning}")

    print("\n" + "=" * 70)


# Convenience function for quick use
def recommend_method(config_path: Path, db_path: Path = None,
                     results_dir: Path = None) -> Tuple[MethodRecommendation, 'AdaptiveMethodSelector']:
    """
    Quick method recommendation from config file.

    Args:
        config_path: Path to optimization_config.json
        db_path: Optional path to existing study.db
        results_dir: Optional path to results directory (for NN quality assessment)

    Returns:
        Tuple of (MethodRecommendation, AdaptiveMethodSelector)
        The selector contains last_nn_quality and last_early_metrics for display
    """
    with open(config_path) as f:
        config = json.load(f)

    selector = AdaptiveMethodSelector()
    rec = selector.recommend(config, db_path, early_metrics=None, results_dir=results_dir)
    return rec, selector


if __name__ == "__main__":
    # Test with a sample config
    import sys

    if len(sys.argv) > 1:
        config_path = Path(sys.argv[1])
        db_path = Path(sys.argv[2]) if len(sys.argv) > 2 else None

        # Infer results_dir from config_path location (typically in 1_setup)
        results_dir = None
        if config_path.parent.name == "1_setup":
            results_dir = config_path.parent.parent / "2_results"
        elif "2_results" in str(config_path):
            results_dir = config_path.parent

        rec, selector = recommend_method(config_path, db_path, results_dir)

        # Also get profile for display
        with open(config_path) as f:
            config = json.load(f)
        profiler = ProblemProfiler()
        profile = profiler.analyze(config)

        # Print with NN quality metrics if available
        print_recommendation(
            rec,
            profile,
            nn_quality=selector.last_nn_quality,
            early_metrics=selector.last_early_metrics
        )
    else:
        print("Usage: python method_selector.py <config_path> [db_path]")