""" Strategy Selector - Intelligent optimization strategy recommendation. This module implements decision logic to recommend the best optimization strategy based on landscape characteristics. Uses expert knowledge and empirical heuristics to match problem types to appropriate algorithms. Part of Protocol 10: Intelligent Multi-Strategy Optimization (IMSO) """ import optuna from typing import Dict, Optional, Tuple import json from pathlib import Path class IntelligentStrategySelector: """ Selects optimal optimization strategy based on problem characteristics. Decision tree combines: 1. Landscape analysis (smoothness, multimodality, noise) 2. Problem dimensionality 3. Trial budget and evaluation cost 4. Historical performance data (if available) """ def __init__(self, verbose: bool = True): """ Args: verbose: Print recommendation explanations """ self.verbose = verbose self.recommendation_history = [] def recommend_strategy( self, landscape: Dict, trials_completed: int = 0, trials_budget: Optional[int] = None, current_best_value: Optional[float] = None ) -> Tuple[str, Dict]: """ Recommend optimization strategy based on problem characteristics. Args: landscape: Output from LandscapeAnalyzer.analyze() trials_completed: Number of trials completed so far trials_budget: Total trial budget (if known) current_best_value: Current best objective value Returns: (strategy_name, recommendation_details) strategy_name: One of ['tpe', 'cmaes', 'gp_bo', 'random', 'hybrid_gp_cmaes'] recommendation_details: Dict with confidence, reasoning, and sampler config """ # Handle None landscape (multi-objective optimization) if landscape is None: # Multi-objective: Use NSGA-II/NSGA-III based on trial count return self._recommend_multiobjective_strategy(trials_completed) if not landscape.get('ready', False): # Not enough data, use random exploration return self._recommend_random_exploration(trials_completed) # Extract key characteristics landscape_type = landscape.get('landscape_type', 'unknown') smoothness = landscape.get('smoothness', 0.5) multimodal = landscape.get('multimodal', False) noise_level = landscape.get('noise_level', 0.0) dimensionality = landscape.get('dimensionality', 2) correlation_strength = landscape['parameter_correlation'].get('overall_strength', 0.3) # Use characterization trial count for strategy decisions (not total trials) # This prevents premature algorithm selection when many trials were pruned char_trials = landscape.get('total_trials', trials_completed) # Decision tree for strategy selection strategy, details = self._apply_decision_tree( landscape_type=landscape_type, smoothness=smoothness, multimodal=multimodal, noise_level=noise_level, dimensionality=dimensionality, correlation_strength=correlation_strength, trials_completed=char_trials # Use characterization trials, not total ) # Add landscape info to recommendation details['landscape_analysis'] = { 'type': landscape_type, 'smoothness': smoothness, 'multimodal': multimodal, 'dimensionality': dimensionality } # Log recommendation self._log_recommendation(strategy, details, trials_completed) if self.verbose: self._print_recommendation(strategy, details) return strategy, details def _apply_decision_tree( self, landscape_type: str, smoothness: float, multimodal: bool, noise_level: float, dimensionality: int, correlation_strength: float, trials_completed: int ) -> Tuple[str, Dict]: """ Apply expert decision tree for strategy selection. Decision logic based on optimization algorithm strengths: CMA-ES: - Best for: Smooth unimodal landscapes, correlated parameters - Strengths: Fast local convergence, handles parameter correlations - Weaknesses: Poor for multimodal, needs reasonable initialization GP-BO (Gaussian Process Bayesian Optimization): - Best for: Smooth landscapes, expensive evaluations, low-dimensional - Strengths: Sample efficient, good uncertainty quantification - Weaknesses: Scales poorly >10D, expensive surrogate training TPE (Tree-structured Parzen Estimator): - Best for: General purpose, multimodal, moderate dimensional - Strengths: Handles multimodality, scales to ~50D, robust - Weaknesses: Slower convergence than CMA-ES on smooth problems Hybrid GP→CMA-ES: - Best for: Smooth landscapes needing global+local search - Strengths: GP finds basin, CMA-ES refines locally - Weaknesses: More complex, needs transition logic """ # CASE 1: High noise - use robust methods if noise_level > 0.5: return 'tpe', { 'confidence': 0.85, 'reasoning': 'High noise detected - TPE is more robust to noisy evaluations', 'sampler_config': { 'type': 'TPESampler', 'params': { 'multivariate': True, 'n_startup_trials': 15, # More exploration for noisy problems 'n_ei_candidates': 24 } } } # CASE 2: Smooth unimodal with strong correlation - CMA-ES excels if landscape_type == 'smooth_unimodal' and correlation_strength > 0.5: return 'cmaes', { 'confidence': 0.92, 'reasoning': f'Smooth unimodal landscape with strong parameter correlation ({correlation_strength:.2f}) - CMA-ES will converge quickly', 'sampler_config': { 'type': 'CmaEsSampler', 'params': { 'restart_strategy': 'ipop', # Increasing population restart 'with_margin': True # Use margin for constraint handling } } } # CASE 3: Smooth but multimodal - Hybrid GP→CMA-ES or GP-BO if landscape_type == 'smooth_multimodal': if dimensionality <= 5 and trials_completed < 30: # Early stage: GP-BO for exploration return 'gp_bo', { 'confidence': 0.78, 'reasoning': f'Smooth multimodal landscape, {dimensionality}D - GP-BO for intelligent exploration, plan CMA-ES refinement later', 'sampler_config': { 'type': 'GPSampler', # Custom implementation needed 'params': { 'acquisition': 'EI', # Expected Improvement 'n_initial_points': 10 } }, 'transition_plan': { 'switch_to': 'cmaes', 'when': 'error < 1.0 OR trials > 40' } } else: # Later stage or higher dimensional: TPE return 'tpe', { 'confidence': 0.75, 'reasoning': f'Smooth multimodal landscape - TPE handles multiple modes well', 'sampler_config': { 'type': 'TPESampler', 'params': { 'multivariate': True, 'n_startup_trials': 10, 'n_ei_candidates': 32 # More exploitation } } } # CASE 4: Smooth unimodal, low-dimensional, expensive - GP-BO then CMA-ES if landscape_type == 'smooth_unimodal' and dimensionality <= 5: if trials_completed < 25: return 'gp_bo', { 'confidence': 0.82, 'reasoning': f'Smooth {dimensionality}D landscape - GP-BO for sample-efficient exploration', 'sampler_config': { 'type': 'GPSampler', 'params': { 'acquisition': 'EI', 'n_initial_points': 8 } }, 'transition_plan': { 'switch_to': 'cmaes', 'when': 'error < 2.0 OR trials > 25' } } else: # Switch to CMA-ES for final refinement return 'cmaes', { 'confidence': 0.88, 'reasoning': 'Switching to CMA-ES for final local refinement', 'sampler_config': { 'type': 'CmaEsSampler', 'params': { 'restart_strategy': 'ipop', 'with_margin': True } } } # CASE 5: Rugged multimodal - TPE is most robust if landscape_type == 'rugged_multimodal' or multimodal: return 'tpe', { 'confidence': 0.80, 'reasoning': 'Rugged/multimodal landscape - TPE is robust to multiple local optima', 'sampler_config': { 'type': 'TPESampler', 'params': { 'multivariate': True, 'n_startup_trials': 12, 'n_ei_candidates': 24 } } } # CASE 6: Rugged unimodal - TPE with more exploration if landscape_type == 'rugged_unimodal': return 'tpe', { 'confidence': 0.72, 'reasoning': 'Rugged landscape - TPE with extended exploration', 'sampler_config': { 'type': 'TPESampler', 'params': { 'multivariate': True, 'n_startup_trials': 15, 'n_ei_candidates': 20 } } } # CASE 7: High dimensional (>5D) - TPE scales best if dimensionality > 5: return 'tpe', { 'confidence': 0.77, 'reasoning': f'High dimensionality ({dimensionality}D) - TPE scales well to moderate dimensions', 'sampler_config': { 'type': 'TPESampler', 'params': { 'multivariate': True, 'n_startup_trials': min(20, dimensionality * 3), 'n_ei_candidates': 24 } } } # DEFAULT: TPE as safe general-purpose choice return 'tpe', { 'confidence': 0.65, 'reasoning': 'Default robust strategy - TPE works well for most problems', 'sampler_config': { 'type': 'TPESampler', 'params': { 'multivariate': True, 'n_startup_trials': 10, 'n_ei_candidates': 24 } } } def _recommend_random_exploration(self, trials_completed: int) -> Tuple[str, Dict]: """Recommend random exploration when insufficient data for analysis.""" return 'random', { 'confidence': 1.0, 'reasoning': f'Insufficient data ({trials_completed} trials) - using random exploration for landscape characterization', 'sampler_config': { 'type': 'RandomSampler', 'params': {} } } def _recommend_multiobjective_strategy(self, trials_completed: int) -> Tuple[str, Dict]: """ Recommend strategy for multi-objective optimization. For multi-objective problems, landscape analysis is not applicable. Use NSGA-II (default) or TPE with multivariate support. """ # Start with random for initial exploration if trials_completed < 8: return 'random', { 'confidence': 1.0, 'reasoning': f'Multi-objective: Random exploration for initial {trials_completed}/8 trials', 'sampler_config': { 'type': 'RandomSampler', 'params': {} } } # After initial exploration, use TPE with multivariate support # (NSGA-II sampler is already used at study creation level) return 'tpe', { 'confidence': 0.95, 'reasoning': f'Multi-objective: TPE with multivariate support for Pareto front exploration ({trials_completed} trials)', 'sampler_config': { 'type': 'TPESampler', 'params': { 'multivariate': True, 'n_startup_trials': 8, 'n_ei_candidates': 24, 'constant_liar': True # Better for parallel multi-objective } } } def _log_recommendation(self, strategy: str, details: Dict, trial_number: int): """Log recommendation for learning and transfer.""" self.recommendation_history.append({ 'trial_number': trial_number, 'strategy': strategy, 'confidence': details.get('confidence', 0.0), 'reasoning': details.get('reasoning', ''), 'landscape': details.get('landscape_analysis', {}) }) def _print_recommendation(self, strategy: str, details: Dict): """Print formatted recommendation.""" print(f"\n{'='*70}") print(f" STRATEGY RECOMMENDATION") print(f"{'='*70}") print(f" Recommended: {strategy.upper()}") print(f" Confidence: {details['confidence']:.1%}") print(f" Reasoning: {details['reasoning']}") if 'transition_plan' in details: plan = details['transition_plan'] print(f"\n TRANSITION PLAN:") print(f" Switch to: {plan['switch_to'].upper()}") print(f" When: {plan['when']}") print(f"{'='*70}\n") def save_recommendation_history(self, filepath: Path): """Save recommendation history to JSON for learning.""" try: with open(filepath, 'w') as f: json.dump(self.recommendation_history, f, indent=2) except Exception as e: if self.verbose: print(f" Warning: Failed to save recommendation history: {e}") def load_recommendation_history(self, filepath: Path): """Load previous recommendation history.""" try: if filepath.exists(): with open(filepath, 'r') as f: self.recommendation_history = json.load(f) except Exception as e: if self.verbose: print(f" Warning: Failed to load recommendation history: {e}") def create_sampler_from_config(config: Dict) -> optuna.samplers.BaseSampler: """ Create Optuna sampler from configuration dictionary. Args: config: Sampler configuration from strategy recommendation Returns: Configured Optuna sampler """ sampler_type = config.get('type', 'TPESampler') params = config.get('params', {}) if sampler_type == 'TPESampler': return optuna.samplers.TPESampler(**params) elif sampler_type == 'CmaEsSampler': return optuna.samplers.CmaEsSampler(**params) elif sampler_type == 'RandomSampler': return optuna.samplers.RandomSampler(**params) elif sampler_type == 'GPSampler': # GP-BO not directly available in Optuna # Would need custom implementation or use skopt integration print(" Warning: GP-BO sampler not yet implemented, falling back to TPE") return optuna.samplers.TPESampler(multivariate=True, n_startup_trials=10) else: # Default fallback print(f" Warning: Unknown sampler type {sampler_type}, using TPE") return optuna.samplers.TPESampler(multivariate=True, n_startup_trials=10)