Atomizer/optimization_engine/future/llm_optimization_runner.py

"""
LLM-Enhanced Optimization Runner - Phase 3.2

Flexible LLM-enhanced optimization runner that integrates:
- Phase 2.7: LLM workflow analysis
- Phase 2.8: Inline code generation (optional)
- Phase 2.9: Post-processing hook generation (optional)
- Phase 3.0: pyNastran research agent (optional)
- Phase 3.1: Extractor orchestration (optional)

This runner enables users to describe optimization goals in natural language
and choose to leverage automated code generation, manual coding, or a hybrid approach.

Author: Atomizer Development Team
Version: 0.1.0 (Phase 3.2)
Last Updated: 2025-01-16
"""

from pathlib import Path
from typing import Dict, Any, List, Optional
import json
import logging
import optuna
from datetime import datetime

from optimization_engine.extractor_orchestrator import ExtractorOrchestrator
from optimization_engine.inline_code_generator import InlineCodeGenerator
from optimization_engine.hook_generator import HookGenerator
from optimization_engine.plugins.hook_manager import HookManager

logger = logging.getLogger(__name__)


class LLMOptimizationRunner:
    """
    LLM-enhanced optimization runner with flexible automation options.

    This runner empowers users to leverage LLM-assisted code generation for:
    - OP2 result extractors (Phase 3.1) - optional
    - Inline calculations (Phase 2.8) - optional
    - Post-processing hooks (Phase 2.9) - optional

    Users can describe goals in natural language and choose automated generation,
    manual coding, or a hybrid approach based on their needs.
    """

    def __init__(self,
                 llm_workflow: Dict[str, Any],
                 model_updater: callable,
                 simulation_runner: callable,
                 study_name: str = "llm_optimization",
                 output_dir: Optional[Path] = None):
        """
        Initialize LLM-driven optimization runner.

        Args:
            llm_workflow: Output from Phase 2.7 LLM analysis with:
                - engineering_features: List of FEA operations
                - inline_calculations: List of simple math operations
                - post_processing_hooks: List of custom calculations
                - optimization: Dict with algorithm, design_variables, etc.
            model_updater: Function(design_vars: Dict) -> None
                Updates NX expressions in the CAD model and saves changes.
            simulation_runner: Function(design_vars: Dict) -> Path
                Runs FEM simulation with updated design variables.
                Returns path to OP2 results file.
            study_name: Name for Optuna study
            output_dir: Directory for results
        """
        self.llm_workflow = llm_workflow
        self.model_updater = model_updater
        self.simulation_runner = simulation_runner
        self.study_name = study_name

        if output_dir is None:
            output_dir = Path.cwd() / "optimization_results" / study_name
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)

        # Save LLM workflow configuration for transparency and documentation
        workflow_config_file = self.output_dir / "llm_workflow_config.json"
        with open(workflow_config_file, 'w') as f:
            json.dump(llm_workflow, f, indent=2)
        logger.info(f"LLM workflow configuration saved to: {workflow_config_file}")

        # Initialize automation components
        self._initialize_automation()

        # Optuna study
        self.study = None
        self.history = []

        logger.info(f"LLMOptimizationRunner initialized for study: {study_name}")

    def _initialize_automation(self):
        """Initialize all automation components from LLM workflow."""
        logger.info("Initializing automation components...")

        # Phase 3.1: Extractor Orchestrator (NEW ARCHITECTURE)
        logger.info("  - Phase 3.1: Extractor Orchestrator")
        # NEW: Pass output_dir only for manifest, extractors go to core library
        self.orchestrator = ExtractorOrchestrator(
            extractors_dir=self.output_dir,  # Only for manifest file
            use_core_library=True  # Enable centralized library
        )

        # Generate extractors from LLM workflow (stored in core library now)
        self.extractors = self.orchestrator.process_llm_workflow(self.llm_workflow)
        logger.info(f"    {len(self.extractors)} extractor(s) available from core library")

        # Phase 2.8: Inline Code Generator
        logger.info("  - Phase 2.8: Inline Code Generator")
        self.inline_generator = InlineCodeGenerator()
        self.inline_code = []

        for calc in self.llm_workflow.get('inline_calculations', []):
            generated = self.inline_generator.generate_from_llm_output(calc)
            self.inline_code.append(generated.code)

        logger.info(f"    Generated {len(self.inline_code)} inline calculation(s)")

        # Phase 2.9: Hook Generator (TODO: Should also use centralized library in future)
        logger.info("  - Phase 2.9: Hook Generator")
        self.hook_generator = HookGenerator()

        # For now, hooks are not generated per-study unless they're truly custom
        # Most hooks should be in the core library (optimization_engine/hooks/)
        post_processing_hooks = self.llm_workflow.get('post_processing_hooks', [])

        if post_processing_hooks:
            logger.info(f"    Note: {len(post_processing_hooks)} custom hooks requested")
            logger.info("    Future: These should also use centralized library")
            # TODO: Implement hook library system similar to extractors

        # Phase 1: Hook Manager
        logger.info("  - Phase 1: Hook Manager")
        self.hook_manager = HookManager()

        # Load system hooks from core library
        system_hooks_dir = Path(__file__).parent / 'plugins'
        if system_hooks_dir.exists():
            self.hook_manager.load_plugins_from_directory(system_hooks_dir)

        summary = self.hook_manager.get_summary()
        logger.info(f"    Loaded {summary['enabled_hooks']} hook(s) from core library")

        logger.info("Automation components initialized successfully!")

    def _create_optuna_study(self) -> optuna.Study:
        """Create Optuna study from LLM workflow optimization settings."""
        opt_config = self.llm_workflow.get('optimization', {})

        # Determine direction (minimize or maximize)
        direction = opt_config.get('direction', 'minimize')

        # Create study
        study = optuna.create_study(
            study_name=self.study_name,
            direction=direction,
            storage=f"sqlite:///{self.output_dir / f'{self.study_name}.db'}",
            load_if_exists=True
        )

        logger.info(f"Created Optuna study: {self.study_name} (direction: {direction})")
        return study

    def _objective(self, trial: optuna.Trial) -> float:
        """
        Optuna objective function - LLM-enhanced with flexible automation!

        This function leverages LLM workflow analysis with user-configurable automation:
        1. Suggests design variables from LLM analysis
        2. Updates model
        3. Runs simulation
        4. Extracts results (using generated or manual extractors)
        5. Executes inline calculations (generated or manual)
        6. Executes post-calculation hooks (generated or manual)
        7. Returns objective value

        Args:
            trial: Optuna trial

        Returns:
            Objective value
        """
        trial_number = trial.number
        logger.info(f"\n{'='*80}")
        logger.info(f"Trial {trial_number} starting...")
        logger.info(f"{'='*80}")

        # ====================================================================
        # STEP 1: Suggest Design Variables
        # ====================================================================
        design_vars_config = self.llm_workflow.get('optimization', {}).get('design_variables', [])

        design_vars = {}
        for var_config in design_vars_config:
            var_name = var_config['parameter']

            # Parse bounds - LLM returns 'bounds' as [min, max]
            if 'bounds' in var_config:
                var_min, var_max = var_config['bounds']
            else:
                # Fallback to old format
                var_min = var_config.get('min', 0.0)
                var_max = var_config.get('max', 1.0)

            # Suggest value using Optuna
            design_vars[var_name] = trial.suggest_float(var_name, var_min, var_max)

        logger.info(f"Design variables: {design_vars}")

        # Execute pre-solve hooks
        self.hook_manager.execute_hooks('pre_solve', {
            'trial_number': trial_number,
            'design_variables': design_vars
        })

        # ====================================================================
        # STEP 2: Update Model
        # ====================================================================
        logger.info("Updating model...")
        self.model_updater(design_vars)

        # ====================================================================
        # STEP 3: Run Simulation
        # ====================================================================
        logger.info("Running simulation...")
        # NOTE: We do NOT pass design_vars to simulation_runner because:
        # 1. The PRT file was already updated by model_updater (via NX import journal)
        # 2. The solver just needs to load the SIM which references the updated PRT
        # 3. Passing design_vars would use hardcoded expression names that don't match our model
        op2_file = self.simulation_runner()
        logger.info(f"Simulation complete: {op2_file}")

        # Execute post-solve hooks
        self.hook_manager.execute_hooks('post_solve', {
            'trial_number': trial_number,
            'op2_file': op2_file
        })

        # ====================================================================
        # STEP 4: Extract Results (Phase 3.1 - Auto-Generated Extractors)
        # ====================================================================
        logger.info("Extracting results...")

        results = {}
        for extractor in self.extractors:
            try:
                extraction_result = self.orchestrator.execute_extractor(
                    extractor.name,
                    Path(op2_file),
                    subcase=1
                )
                results.update(extraction_result)
                logger.info(f"  {extractor.name}: {list(extraction_result.keys())}")
            except Exception as e:
                logger.error(f"Extraction failed for {extractor.name}: {e}")
                # Continue with other extractors

        # Execute post-extraction hooks
        self.hook_manager.execute_hooks('post_extraction', {
            'trial_number': trial_number,
            'results': results
        })

        # ====================================================================
        # STEP 5: Inline Calculations (Phase 2.8 - Auto-Generated Code)
        # ====================================================================
        logger.info("Executing inline calculations...")

        calculations = {}
        calc_namespace = {**results, **calculations}  # Make results available

        for calc_code in self.inline_code:
            try:
                exec(calc_code, calc_namespace)
                # Extract newly created variables
                for key, value in calc_namespace.items():
                    if key not in results and not key.startswith('_'):
                        calculations[key] = value

                logger.info(f"  Executed: {calc_code[:50]}...")
            except Exception as e:
                logger.error(f"Inline calculation failed: {e}")

        logger.info(f"Calculations: {calculations}")

        # ====================================================================
        # STEP 6: Post-Calculation Hooks (Phase 2.9 - Auto-Generated Hooks)
        # ====================================================================
        logger.info("Executing post-calculation hooks...")

        hook_results = self.hook_manager.execute_hooks('post_calculation', {
            'trial_number': trial_number,
            'design_variables': design_vars,
            'results': results,
            'calculations': calculations
        })

        # Merge hook results
        final_context = {**results, **calculations}
        for hook_result in hook_results:
            if hook_result:
                final_context.update(hook_result)

        logger.info(f"Hook results: {hook_results}")

        # ====================================================================
        # STEP 7: Extract Objective Value
        # ====================================================================

        # Try to get objective from hooks first
        objective = None

        # Check hook results for 'objective' or 'weighted_objective'
        for hook_result in hook_results:
            if hook_result:
                if 'objective' in hook_result:
                    objective = hook_result['objective']
                    break
                elif 'weighted_objective' in hook_result:
                    objective = hook_result['weighted_objective']
                    break

        # Fallback: use first extracted result
        if objective is None:
            # Try common objective names
            for key in ['max_displacement', 'max_stress', 'max_von_mises']:
                if key in final_context:
                    objective = final_context[key]
                    logger.warning(f"No explicit objective found, using: {key}")
                    break

        if objective is None:
            raise ValueError("Could not determine objective value from results/calculations/hooks")

        logger.info(f"Objective value: {objective}")

        # Save trial history
        trial_data = {
            'trial_number': trial_number,
            'design_variables': design_vars,
            'results': results,
            'calculations': calculations,
            'objective': objective
        }
        self.history.append(trial_data)

        # Incremental save - write history after each trial
        # This allows monitoring progress in real-time
        self._save_incremental_history()

        return float(objective)

    def run_optimization(self, n_trials: int = 50) -> Dict[str, Any]:
        """
        Run LLM-enhanced optimization with flexible automation.

        Args:
            n_trials: Number of optimization trials

        Returns:
            Dict with:
                - best_params: Best design variable values
                - best_value: Best objective value
                - history: Complete trial history
        """
        logger.info(f"\n{'='*80}")
        logger.info(f"Starting LLM-Driven Optimization")
        logger.info(f"{'='*80}")
        logger.info(f"Study: {self.study_name}")
        logger.info(f"Trials: {n_trials}")
        logger.info(f"Output: {self.output_dir}")
        logger.info(f"{'='*80}\n")

        # Create study
        self.study = self._create_optuna_study()

        # Run optimization
        self.study.optimize(self._objective, n_trials=n_trials)

        # Get results
        best_trial = self.study.best_trial

        results = {
            'best_params': best_trial.params,
            'best_value': best_trial.value,
            'best_trial_number': best_trial.number,
            'history': self.history
        }

        # Save results
        self._save_results(results)

        logger.info(f"\n{'='*80}")
        logger.info("Optimization Complete!")
        logger.info(f"{'='*80}")
        logger.info(f"Best value: {results['best_value']}")
        logger.info(f"Best params: {results['best_params']}")
        logger.info(f"Results saved to: {self.output_dir}")
        logger.info(f"{'='*80}\n")

        return results

    def _save_incremental_history(self):
        """
        Save trial history incrementally after each trial.
        This allows real-time monitoring of optimization progress.
        """
        history_file = self.output_dir / "optimization_history_incremental.json"

        # Convert history to JSON-serializable format
        serializable_history = []
        for trial in self.history:
            trial_copy = trial.copy()
            # Convert any numpy types to native Python types
            for key in ['results', 'calculations', 'design_variables']:
                if key in trial_copy:
                    trial_copy[key] = {k: float(v) if isinstance(v, (int, float)) else v
                                      for k, v in trial_copy[key].items()}
            if 'objective' in trial_copy:
                trial_copy['objective'] = float(trial_copy['objective'])
            serializable_history.append(trial_copy)

        # Write to file
        with open(history_file, 'w') as f:
            json.dump(serializable_history, f, indent=2, default=str)

    def _save_results(self, results: Dict[str, Any]):
        """Save optimization results to file."""
        results_file = self.output_dir / "optimization_results.json"

        # Make history JSON serializable
        serializable_results = {
            'best_params': results['best_params'],
            'best_value': results['best_value'],
            'best_trial_number': results['best_trial_number'],
            'timestamp': datetime.now().isoformat(),
            'study_name': self.study_name,
            'n_trials': len(results['history'])
        }

        with open(results_file, 'w') as f:
            json.dump(serializable_results, f, indent=2)

        logger.info(f"Results saved to: {results_file}")


def main():
    """Test LLM-driven optimization runner."""
    print("=" * 80)
    print("Phase 3.2: LLM-Driven Optimization Runner Test")
    print("=" * 80)
    print()

    # Example LLM workflow (from Phase 2.7)
    llm_workflow = {
        "engineering_features": [
            {
                "action": "extract_displacement",
                "domain": "result_extraction",
                "description": "Extract displacement from OP2",
                "params": {"result_type": "displacement"}
            }
        ],
        "inline_calculations": [
            {
                "action": "normalize",
                "params": {
                    "input": "max_displacement",
                    "reference": "max_allowed_disp",
                    "value": 5.0
                },
                "code_hint": "norm_disp = max_displacement / 5.0"
            }
        ],
        "post_processing_hooks": [
            {
                "action": "weighted_objective",
                "params": {
                    "inputs": ["norm_disp"],
                    "weights": [1.0],
                    "objective": "minimize"
                }
            }
        ],
        "optimization": {
            "algorithm": "TPE",
            "direction": "minimize",
            "design_variables": [
                {
                    "parameter": "wall_thickness",
                    "min": 3.0,
                    "max": 8.0,
                    "type": "continuous"
                }
            ]
        }
    }

    print("LLM Workflow Configuration:")
    print(f"  Engineering features: {len(llm_workflow['engineering_features'])}")
    print(f"  Inline calculations: {len(llm_workflow['inline_calculations'])}")
    print(f"  Post-processing hooks: {len(llm_workflow['post_processing_hooks'])}")
    print(f"  Design variables: {len(llm_workflow['optimization']['design_variables'])}")
    print()

    # Dummy functions for testing
    def dummy_model_updater(design_vars):
        print(f"  [Dummy] Updating model with: {design_vars}")

    def dummy_simulation_runner():
        print("  [Dummy] Running simulation...")
        # Return path to test OP2
        return Path("tests/bracket_sim1-solution_1.op2")

    # Initialize runner
    print("Initializing LLM-driven optimization runner...")
    runner = LLMOptimizationRunner(
        llm_workflow=llm_workflow,
        model_updater=dummy_model_updater,
        simulation_runner=dummy_simulation_runner,
        study_name="test_llm_optimization"
    )

    print()
    print("=" * 80)
    print("Runner initialized successfully!")
    print("Ready to run optimization with auto-generated code!")
    print("=" * 80)


if __name__ == '__main__':
    main()