refactor: Major reorganization of optimization_engine module structure

BREAKING CHANGE: Module paths have been reorganized for better maintainability. Backwards compatibility aliases with deprecation warnings are provided. New Structure: - core/ - Optimization runners (runner, intelligent_optimizer, etc.) - processors/ - Data processing - surrogates/ - Neural network surrogates - nx/ - NX/Nastran integration (solver, updater, session_manager) - study/ - Study management (creator, wizard, state, reset) - reporting/ - Reports and analysis (visualizer, report_generator) - config/ - Configuration management (manager, builder) - utils/ - Utilities (logger, auto_doc, etc.) - future/ - Research/experimental code Migration: - ~200 import changes across 125 files - All __init__.py files use lazy loading to avoid circular imports - Backwards compatibility layer supports old import paths with warnings - All existing functionality preserved To migrate existing code: OLD: from optimization_engine.nx_solver import NXSolver NEW: from optimization_engine.nx.solver import NXSolver OLD: from optimization_engine.runner import OptimizationRunner NEW: from optimization_engine.core.runner import OptimizationRunner 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 12:30:59 -05:00
parent 82f36689b7
commit eabcc4c3ca
120 changed files with 1127 additions and 637 deletions
--- a/optimization_engine/core/strategy_selector.py
+++ b/optimization_engine/core/strategy_selector.py
@@ -0,0 +1,419 @@
+"""
+Strategy Selector - Intelligent optimization strategy recommendation.
+
+This module implements decision logic to recommend the best optimization strategy
+based on landscape characteristics. Uses expert knowledge and empirical heuristics
+to match problem types to appropriate algorithms.
+
+Part of Protocol 10: Intelligent Multi-Strategy Optimization (IMSO)
+"""
+
+import optuna
+from typing import Dict, Optional, Tuple
+import json
+from pathlib import Path
+
+
+class IntelligentStrategySelector:
+    """
+    Selects optimal optimization strategy based on problem characteristics.
+
+    Decision tree combines:
+    1. Landscape analysis (smoothness, multimodality, noise)
+    2. Problem dimensionality
+    3. Trial budget and evaluation cost
+    4. Historical performance data (if available)
+    """
+
+    def __init__(self, verbose: bool = True):
+        """
+        Args:
+            verbose: Print recommendation explanations
+        """
+        self.verbose = verbose
+        self.recommendation_history = []
+
+    def recommend_strategy(
+        self,
+        landscape: Dict,
+        trials_completed: int = 0,
+        trials_budget: Optional[int] = None,
+        current_best_value: Optional[float] = None
+    ) -> Tuple[str, Dict]:
+        """
+        Recommend optimization strategy based on problem characteristics.
+
+        Args:
+            landscape: Output from LandscapeAnalyzer.analyze()
+            trials_completed: Number of trials completed so far
+            trials_budget: Total trial budget (if known)
+            current_best_value: Current best objective value
+
+        Returns:
+            (strategy_name, recommendation_details)
+
+            strategy_name: One of ['tpe', 'cmaes', 'gp_bo', 'random', 'hybrid_gp_cmaes']
+            recommendation_details: Dict with confidence, reasoning, and sampler config
+        """
+        # Handle None landscape (multi-objective optimization)
+        if landscape is None:
+            # Multi-objective: Use NSGA-II/NSGA-III based on trial count
+            return self._recommend_multiobjective_strategy(trials_completed)
+
+        if not landscape.get('ready', False):
+            # Not enough data, use random exploration
+            return self._recommend_random_exploration(trials_completed)
+
+        # Extract key characteristics
+        landscape_type = landscape.get('landscape_type', 'unknown')
+        smoothness = landscape.get('smoothness', 0.5)
+        multimodal = landscape.get('multimodal', False)
+        noise_level = landscape.get('noise_level', 0.0)
+        dimensionality = landscape.get('dimensionality', 2)
+        correlation_strength = landscape['parameter_correlation'].get('overall_strength', 0.3)
+
+        # Use characterization trial count for strategy decisions (not total trials)
+        # This prevents premature algorithm selection when many trials were pruned
+        char_trials = landscape.get('total_trials', trials_completed)
+
+        # Decision tree for strategy selection
+        strategy, details = self._apply_decision_tree(
+            landscape_type=landscape_type,
+            smoothness=smoothness,
+            multimodal=multimodal,
+            noise_level=noise_level,
+            dimensionality=dimensionality,
+            correlation_strength=correlation_strength,
+            trials_completed=char_trials  # Use characterization trials, not total
+        )
+
+        # Add landscape info to recommendation
+        details['landscape_analysis'] = {
+            'type': landscape_type,
+            'smoothness': smoothness,
+            'multimodal': multimodal,
+            'dimensionality': dimensionality
+        }
+
+        # Log recommendation
+        self._log_recommendation(strategy, details, trials_completed)
+
+        if self.verbose:
+            self._print_recommendation(strategy, details)
+
+        return strategy, details
+
+    def _apply_decision_tree(
+        self,
+        landscape_type: str,
+        smoothness: float,
+        multimodal: bool,
+        noise_level: float,
+        dimensionality: int,
+        correlation_strength: float,
+        trials_completed: int
+    ) -> Tuple[str, Dict]:
+        """
+        Apply expert decision tree for strategy selection.
+
+        Decision logic based on optimization algorithm strengths:
+
+        CMA-ES:
+        - Best for: Smooth unimodal landscapes, correlated parameters
+        - Strengths: Fast local convergence, handles parameter correlations
+        - Weaknesses: Poor for multimodal, needs reasonable initialization
+
+        GP-BO (Gaussian Process Bayesian Optimization):
+        - Best for: Smooth landscapes, expensive evaluations, low-dimensional
+        - Strengths: Sample efficient, good uncertainty quantification
+        - Weaknesses: Scales poorly >10D, expensive surrogate training
+
+        TPE (Tree-structured Parzen Estimator):
+        - Best for: General purpose, multimodal, moderate dimensional
+        - Strengths: Handles multimodality, scales to ~50D, robust
+        - Weaknesses: Slower convergence than CMA-ES on smooth problems
+
+        Hybrid GP→CMA-ES:
+        - Best for: Smooth landscapes needing global+local search
+        - Strengths: GP finds basin, CMA-ES refines locally
+        - Weaknesses: More complex, needs transition logic
+        """
+
+        # CASE 1: High noise - use robust methods
+        if noise_level > 0.5:
+            return 'tpe', {
+                'confidence': 0.85,
+                'reasoning': 'High noise detected - TPE is more robust to noisy evaluations',
+                'sampler_config': {
+                    'type': 'TPESampler',
+                    'params': {
+                        'multivariate': True,
+                        'n_startup_trials': 15,  # More exploration for noisy problems
+                        'n_ei_candidates': 24
+                    }
+                }
+            }
+
+        # CASE 2: Smooth unimodal with strong correlation - CMA-ES excels
+        if landscape_type == 'smooth_unimodal' and correlation_strength > 0.5:
+            return 'cmaes', {
+                'confidence': 0.92,
+                'reasoning': f'Smooth unimodal landscape with strong parameter correlation ({correlation_strength:.2f}) - CMA-ES will converge quickly',
+                'sampler_config': {
+                    'type': 'CmaEsSampler',
+                    'params': {
+                        'restart_strategy': 'ipop',  # Increasing population restart
+                        'with_margin': True  # Use margin for constraint handling
+                    }
+                }
+            }
+
+        # CASE 3: Smooth but multimodal - Hybrid GP→CMA-ES or GP-BO
+        if landscape_type == 'smooth_multimodal':
+            if dimensionality <= 5 and trials_completed < 30:
+                # Early stage: GP-BO for exploration
+                return 'gp_bo', {
+                    'confidence': 0.78,
+                    'reasoning': f'Smooth multimodal landscape, {dimensionality}D - GP-BO for intelligent exploration, plan CMA-ES refinement later',
+                    'sampler_config': {
+                        'type': 'GPSampler',  # Custom implementation needed
+                        'params': {
+                            'acquisition': 'EI',  # Expected Improvement
+                            'n_initial_points': 10
+                        }
+                    },
+                    'transition_plan': {
+                        'switch_to': 'cmaes',
+                        'when': 'error < 1.0 OR trials > 40'
+                    }
+                }
+            else:
+                # Later stage or higher dimensional: TPE
+                return 'tpe', {
+                    'confidence': 0.75,
+                    'reasoning': f'Smooth multimodal landscape - TPE handles multiple modes well',
+                    'sampler_config': {
+                        'type': 'TPESampler',
+                        'params': {
+                            'multivariate': True,
+                            'n_startup_trials': 10,
+                            'n_ei_candidates': 32  # More exploitation
+                        }
+                    }
+                }
+
+        # CASE 4: Smooth unimodal, low-dimensional, expensive - GP-BO then CMA-ES
+        if landscape_type == 'smooth_unimodal' and dimensionality <= 5:
+            if trials_completed < 25:
+                return 'gp_bo', {
+                    'confidence': 0.82,
+                    'reasoning': f'Smooth {dimensionality}D landscape - GP-BO for sample-efficient exploration',
+                    'sampler_config': {
+                        'type': 'GPSampler',
+                        'params': {
+                            'acquisition': 'EI',
+                            'n_initial_points': 8
+                        }
+                    },
+                    'transition_plan': {
+                        'switch_to': 'cmaes',
+                        'when': 'error < 2.0 OR trials > 25'
+                    }
+                }
+            else:
+                # Switch to CMA-ES for final refinement
+                return 'cmaes', {
+                    'confidence': 0.88,
+                    'reasoning': 'Switching to CMA-ES for final local refinement',
+                    'sampler_config': {
+                        'type': 'CmaEsSampler',
+                        'params': {
+                            'restart_strategy': 'ipop',
+                            'with_margin': True
+                        }
+                    }
+                }
+
+        # CASE 5: Rugged multimodal - TPE is most robust
+        if landscape_type == 'rugged_multimodal' or multimodal:
+            return 'tpe', {
+                'confidence': 0.80,
+                'reasoning': 'Rugged/multimodal landscape - TPE is robust to multiple local optima',
+                'sampler_config': {
+                    'type': 'TPESampler',
+                    'params': {
+                        'multivariate': True,
+                        'n_startup_trials': 12,
+                        'n_ei_candidates': 24
+                    }
+                }
+            }
+
+        # CASE 6: Rugged unimodal - TPE with more exploration
+        if landscape_type == 'rugged_unimodal':
+            return 'tpe', {
+                'confidence': 0.72,
+                'reasoning': 'Rugged landscape - TPE with extended exploration',
+                'sampler_config': {
+                    'type': 'TPESampler',
+                    'params': {
+                        'multivariate': True,
+                        'n_startup_trials': 15,
+                        'n_ei_candidates': 20
+                    }
+                }
+            }
+
+        # CASE 7: High dimensional (>5D) - TPE scales best
+        if dimensionality > 5:
+            return 'tpe', {
+                'confidence': 0.77,
+                'reasoning': f'High dimensionality ({dimensionality}D) - TPE scales well to moderate dimensions',
+                'sampler_config': {
+                    'type': 'TPESampler',
+                    'params': {
+                        'multivariate': True,
+                        'n_startup_trials': min(20, dimensionality * 3),
+                        'n_ei_candidates': 24
+                    }
+                }
+            }
+
+        # DEFAULT: TPE as safe general-purpose choice
+        return 'tpe', {
+            'confidence': 0.65,
+            'reasoning': 'Default robust strategy - TPE works well for most problems',
+            'sampler_config': {
+                'type': 'TPESampler',
+                'params': {
+                    'multivariate': True,
+                    'n_startup_trials': 10,
+                    'n_ei_candidates': 24
+                }
+            }
+        }
+
+    def _recommend_random_exploration(self, trials_completed: int) -> Tuple[str, Dict]:
+        """Recommend random exploration when insufficient data for analysis."""
+        return 'random', {
+            'confidence': 1.0,
+            'reasoning': f'Insufficient data ({trials_completed} trials) - using random exploration for landscape characterization',
+            'sampler_config': {
+                'type': 'RandomSampler',
+                'params': {}
+            }
+        }
+
+    def _recommend_multiobjective_strategy(self, trials_completed: int) -> Tuple[str, Dict]:
+        """
+        Recommend strategy for multi-objective optimization.
+
+        For multi-objective problems, landscape analysis is not applicable.
+        Use NSGA-II (default) or TPE with multivariate support.
+        """
+        # Start with random for initial exploration
+        if trials_completed < 8:
+            return 'random', {
+                'confidence': 1.0,
+                'reasoning': f'Multi-objective: Random exploration for initial {trials_completed}/8 trials',
+                'sampler_config': {
+                    'type': 'RandomSampler',
+                    'params': {}
+                }
+            }
+
+        # After initial exploration, use TPE with multivariate support
+        # (NSGA-II sampler is already used at study creation level)
+        return 'tpe', {
+            'confidence': 0.95,
+            'reasoning': f'Multi-objective: TPE with multivariate support for Pareto front exploration ({trials_completed} trials)',
+            'sampler_config': {
+                'type': 'TPESampler',
+                'params': {
+                    'multivariate': True,
+                    'n_startup_trials': 8,
+                    'n_ei_candidates': 24,
+                    'constant_liar': True  # Better for parallel multi-objective
+                }
+            }
+        }
+
+    def _log_recommendation(self, strategy: str, details: Dict, trial_number: int):
+        """Log recommendation for learning and transfer."""
+        self.recommendation_history.append({
+            'trial_number': trial_number,
+            'strategy': strategy,
+            'confidence': details.get('confidence', 0.0),
+            'reasoning': details.get('reasoning', ''),
+            'landscape': details.get('landscape_analysis', {})
+        })
+
+    def _print_recommendation(self, strategy: str, details: Dict):
+        """Print formatted recommendation."""
+        print(f"\n{'='*70}")
+        print(f"  STRATEGY RECOMMENDATION")
+        print(f"{'='*70}")
+        print(f"  Recommended: {strategy.upper()}")
+        print(f"  Confidence: {details['confidence']:.1%}")
+        print(f"  Reasoning: {details['reasoning']}")
+
+        if 'transition_plan' in details:
+            plan = details['transition_plan']
+            print(f"\n  TRANSITION PLAN:")
+            print(f"    Switch to: {plan['switch_to'].upper()}")
+            print(f"    When: {plan['when']}")
+
+        print(f"{'='*70}\n")
+
+    def save_recommendation_history(self, filepath: Path):
+        """Save recommendation history to JSON for learning."""
+        try:
+            with open(filepath, 'w') as f:
+                json.dump(self.recommendation_history, f, indent=2)
+        except Exception as e:
+            if self.verbose:
+                print(f"  Warning: Failed to save recommendation history: {e}")
+
+    def load_recommendation_history(self, filepath: Path):
+        """Load previous recommendation history."""
+        try:
+            if filepath.exists():
+                with open(filepath, 'r') as f:
+                    self.recommendation_history = json.load(f)
+        except Exception as e:
+            if self.verbose:
+                print(f"  Warning: Failed to load recommendation history: {e}")
+
+
+def create_sampler_from_config(config: Dict) -> optuna.samplers.BaseSampler:
+    """
+    Create Optuna sampler from configuration dictionary.
+
+    Args:
+        config: Sampler configuration from strategy recommendation
+
+    Returns:
+        Configured Optuna sampler
+    """
+    sampler_type = config.get('type', 'TPESampler')
+    params = config.get('params', {})
+
+    if sampler_type == 'TPESampler':
+        return optuna.samplers.TPESampler(**params)
+
+    elif sampler_type == 'CmaEsSampler':
+        return optuna.samplers.CmaEsSampler(**params)
+
+    elif sampler_type == 'RandomSampler':
+        return optuna.samplers.RandomSampler(**params)
+
+    elif sampler_type == 'GPSampler':
+        # GP-BO not directly available in Optuna
+        # Would need custom implementation or use skopt integration
+        print("  Warning: GP-BO sampler not yet implemented, falling back to TPE")
+        return optuna.samplers.TPESampler(multivariate=True, n_startup_trials=10)
+
+    else:
+        # Default fallback
+        print(f"  Warning: Unknown sampler type {sampler_type}, using TPE")
+        return optuna.samplers.TPESampler(multivariate=True, n_startup_trials=10)