feat: Add substudy system with live history tracking and workflow fixes

Major Features: - Hierarchical substudy system (like NX Solutions/Subcases) * Shared model files across all substudies * Independent configuration per substudy * Continuation support from previous substudies * Real-time incremental history updates - Live history tracking with optimization_history_incremental.json - Complete bracket_displacement_maximizing study with substudy examples Core Fixes: - Fixed expression update workflow to pass design_vars through simulation_runner * Restored working NX journal expression update mechanism * OP2 timestamp verification instead of file deletion * Resolved issue where all trials returned identical objective values - Fixed LLMOptimizationRunner to pass design variables to simulation runner - Enhanced NXSolver with timestamp-based file regeneration verification New Components: - optimization_engine/llm_optimization_runner.py - LLM-driven optimization runner - optimization_engine/optimization_setup_wizard.py - Phase 3.3 setup wizard - studies/bracket_displacement_maximizing/ - Complete substudy example * run_substudy.py - Substudy runner with continuation * run_optimization.py - Standalone optimization runner * config/substudy_template.json - Template for new substudies * substudies/coarse_exploration/ - 20-trial coarse search * substudies/fine_tuning/ - 50-trial refinement (continuation example) * SUBSTUDIES_README.md - Complete substudy documentation Technical Improvements: - Incremental history saving after each trial (optimization_history_incremental.json) - Expression update workflow: .prt update → NX journal receives values → geometry update → FEM update → solve - Trial indexing fix in substudy result saving - Updated README with substudy system documentation Testing: - Successfully ran 20-trial coarse_exploration substudy - Verified different objective values across trials (workflow fix validated) - Confirmed live history updates in real-time - Tested shared model file usage across substudies 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 21:29:54 -05:00
parent 90a9e020d8
commit 2f3afc3813
126 changed files with 15592 additions and 97 deletions
--- a/optimization_engine/llm_optimization_runner.py
+++ b/optimization_engine/llm_optimization_runner.py
@@ -0,0 +1,528 @@
+"""
+LLM-Enhanced Optimization Runner - Phase 3.2
+
+Flexible LLM-enhanced optimization runner that integrates:
+- Phase 2.7: LLM workflow analysis
+- Phase 2.8: Inline code generation (optional)
+- Phase 2.9: Post-processing hook generation (optional)
+- Phase 3.0: pyNastran research agent (optional)
+- Phase 3.1: Extractor orchestration (optional)
+
+This runner enables users to describe optimization goals in natural language
+and choose to leverage automated code generation, manual coding, or a hybrid approach.
+
+Author: Atomizer Development Team
+Version: 0.1.0 (Phase 3.2)
+Last Updated: 2025-01-16
+"""
+
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+import json
+import logging
+import optuna
+from datetime import datetime
+
+from optimization_engine.extractor_orchestrator import ExtractorOrchestrator
+from optimization_engine.inline_code_generator import InlineCodeGenerator
+from optimization_engine.hook_generator import HookGenerator
+from optimization_engine.plugins.hook_manager import HookManager
+
+logger = logging.getLogger(__name__)
+
+
+class LLMOptimizationRunner:
+    """
+    LLM-enhanced optimization runner with flexible automation options.
+
+    This runner empowers users to leverage LLM-assisted code generation for:
+    - OP2 result extractors (Phase 3.1) - optional
+    - Inline calculations (Phase 2.8) - optional
+    - Post-processing hooks (Phase 2.9) - optional
+
+    Users can describe goals in natural language and choose automated generation,
+    manual coding, or a hybrid approach based on their needs.
+    """
+
+    def __init__(self,
+                 llm_workflow: Dict[str, Any],
+                 model_updater: callable,
+                 simulation_runner: callable,
+                 study_name: str = "llm_optimization",
+                 output_dir: Optional[Path] = None):
+        """
+        Initialize LLM-driven optimization runner.
+
+        Args:
+            llm_workflow: Output from Phase 2.7 LLM analysis with:
+                - engineering_features: List of FEA operations
+                - inline_calculations: List of simple math operations
+                - post_processing_hooks: List of custom calculations
+                - optimization: Dict with algorithm, design_variables, etc.
+            model_updater: Function(design_vars: Dict) -> None
+            simulation_runner: Function() -> Path (returns OP2 file path)
+            study_name: Name for Optuna study
+            output_dir: Directory for results
+        """
+        self.llm_workflow = llm_workflow
+        self.model_updater = model_updater
+        self.simulation_runner = simulation_runner
+        self.study_name = study_name
+
+        if output_dir is None:
+            output_dir = Path.cwd() / "optimization_results" / study_name
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+        # Initialize automation components
+        self._initialize_automation()
+
+        # Optuna study
+        self.study = None
+        self.history = []
+
+        logger.info(f"LLMOptimizationRunner initialized for study: {study_name}")
+
+    def _initialize_automation(self):
+        """Initialize all automation components from LLM workflow."""
+        logger.info("Initializing automation components...")
+
+        # Phase 3.1: Extractor Orchestrator
+        logger.info("  - Phase 3.1: Extractor Orchestrator")
+        self.orchestrator = ExtractorOrchestrator(
+            extractors_dir=self.output_dir / "generated_extractors"
+        )
+
+        # Generate extractors from LLM workflow
+        self.extractors = self.orchestrator.process_llm_workflow(self.llm_workflow)
+        logger.info(f"    Generated {len(self.extractors)} extractor(s)")
+
+        # Phase 2.8: Inline Code Generator
+        logger.info("  - Phase 2.8: Inline Code Generator")
+        self.inline_generator = InlineCodeGenerator()
+        self.inline_code = []
+
+        for calc in self.llm_workflow.get('inline_calculations', []):
+            generated = self.inline_generator.generate_from_llm_output(calc)
+            self.inline_code.append(generated.code)
+
+        logger.info(f"    Generated {len(self.inline_code)} inline calculation(s)")
+
+        # Phase 2.9: Hook Generator
+        logger.info("  - Phase 2.9: Hook Generator")
+        self.hook_generator = HookGenerator()
+
+        # Generate lifecycle hooks from post_processing_hooks
+        hook_dir = self.output_dir / "generated_hooks"
+        hook_dir.mkdir(exist_ok=True)
+
+        for hook_spec in self.llm_workflow.get('post_processing_hooks', []):
+            hook_content = self.hook_generator.generate_lifecycle_hook(
+                hook_spec,
+                hook_point='post_calculation'
+            )
+
+            # Save hook
+            hook_name = hook_spec.get('action', 'custom_hook')
+            hook_file = hook_dir / f"{hook_name}.py"
+            with open(hook_file, 'w') as f:
+                f.write(hook_content)
+
+            logger.info(f"    Generated hook: {hook_name}")
+
+        # Phase 1: Hook Manager
+        logger.info("  - Phase 1: Hook Manager")
+        self.hook_manager = HookManager()
+
+        # Load generated hooks
+        if hook_dir.exists():
+            self.hook_manager.load_plugins_from_directory(hook_dir)
+
+        # Load system hooks
+        system_hooks_dir = Path(__file__).parent / 'plugins'
+        if system_hooks_dir.exists():
+            self.hook_manager.load_plugins_from_directory(system_hooks_dir)
+
+        summary = self.hook_manager.get_summary()
+        logger.info(f"    Loaded {summary['enabled_hooks']} hook(s)")
+
+        logger.info("Automation components initialized successfully!")
+
+    def _create_optuna_study(self) -> optuna.Study:
+        """Create Optuna study from LLM workflow optimization settings."""
+        opt_config = self.llm_workflow.get('optimization', {})
+
+        # Determine direction (minimize or maximize)
+        direction = opt_config.get('direction', 'minimize')
+
+        # Create study
+        study = optuna.create_study(
+            study_name=self.study_name,
+            direction=direction,
+            storage=f"sqlite:///{self.output_dir / f'{self.study_name}.db'}",
+            load_if_exists=True
+        )
+
+        logger.info(f"Created Optuna study: {self.study_name} (direction: {direction})")
+        return study
+
+    def _objective(self, trial: optuna.Trial) -> float:
+        """
+        Optuna objective function - LLM-enhanced with flexible automation!
+
+        This function leverages LLM workflow analysis with user-configurable automation:
+        1. Suggests design variables from LLM analysis
+        2. Updates model
+        3. Runs simulation
+        4. Extracts results (using generated or manual extractors)
+        5. Executes inline calculations (generated or manual)
+        6. Executes post-calculation hooks (generated or manual)
+        7. Returns objective value
+
+        Args:
+            trial: Optuna trial
+
+        Returns:
+            Objective value
+        """
+        trial_number = trial.number
+        logger.info(f"\n{'='*80}")
+        logger.info(f"Trial {trial_number} starting...")
+        logger.info(f"{'='*80}")
+
+        # ====================================================================
+        # STEP 1: Suggest Design Variables
+        # ====================================================================
+        design_vars_config = self.llm_workflow.get('optimization', {}).get('design_variables', [])
+
+        design_vars = {}
+        for var_config in design_vars_config:
+            var_name = var_config['parameter']
+            var_min = var_config.get('min', 0.0)
+            var_max = var_config.get('max', 1.0)
+
+            # Suggest value using Optuna
+            design_vars[var_name] = trial.suggest_float(var_name, var_min, var_max)
+
+        logger.info(f"Design variables: {design_vars}")
+
+        # Execute pre-solve hooks
+        self.hook_manager.execute_hooks('pre_solve', {
+            'trial_number': trial_number,
+            'design_variables': design_vars
+        })
+
+        # ====================================================================
+        # STEP 2: Update Model
+        # ====================================================================
+        logger.info("Updating model...")
+        self.model_updater(design_vars)
+
+        # ====================================================================
+        # STEP 3: Run Simulation
+        # ====================================================================
+        logger.info("Running simulation...")
+        # Pass design_vars to simulation_runner so NX journal can update expressions
+        op2_file = self.simulation_runner(design_vars)
+        logger.info(f"Simulation complete: {op2_file}")
+
+        # Execute post-solve hooks
+        self.hook_manager.execute_hooks('post_solve', {
+            'trial_number': trial_number,
+            'op2_file': op2_file
+        })
+
+        # ====================================================================
+        # STEP 4: Extract Results (Phase 3.1 - Auto-Generated Extractors)
+        # ====================================================================
+        logger.info("Extracting results...")
+
+        results = {}
+        for extractor in self.extractors:
+            try:
+                extraction_result = self.orchestrator.execute_extractor(
+                    extractor.name,
+                    Path(op2_file),
+                    subcase=1
+                )
+                results.update(extraction_result)
+                logger.info(f"  {extractor.name}: {list(extraction_result.keys())}")
+            except Exception as e:
+                logger.error(f"Extraction failed for {extractor.name}: {e}")
+                # Continue with other extractors
+
+        # Execute post-extraction hooks
+        self.hook_manager.execute_hooks('post_extraction', {
+            'trial_number': trial_number,
+            'results': results
+        })
+
+        # ====================================================================
+        # STEP 5: Inline Calculations (Phase 2.8 - Auto-Generated Code)
+        # ====================================================================
+        logger.info("Executing inline calculations...")
+
+        calculations = {}
+        calc_namespace = {**results, **calculations}  # Make results available
+
+        for calc_code in self.inline_code:
+            try:
+                exec(calc_code, calc_namespace)
+                # Extract newly created variables
+                for key, value in calc_namespace.items():
+                    if key not in results and not key.startswith('_'):
+                        calculations[key] = value
+
+                logger.info(f"  Executed: {calc_code[:50]}...")
+            except Exception as e:
+                logger.error(f"Inline calculation failed: {e}")
+
+        logger.info(f"Calculations: {calculations}")
+
+        # ====================================================================
+        # STEP 6: Post-Calculation Hooks (Phase 2.9 - Auto-Generated Hooks)
+        # ====================================================================
+        logger.info("Executing post-calculation hooks...")
+
+        hook_results = self.hook_manager.execute_hooks('post_calculation', {
+            'trial_number': trial_number,
+            'design_variables': design_vars,
+            'results': results,
+            'calculations': calculations
+        })
+
+        # Merge hook results
+        final_context = {**results, **calculations}
+        for hook_result in hook_results:
+            if hook_result:
+                final_context.update(hook_result)
+
+        logger.info(f"Hook results: {hook_results}")
+
+        # ====================================================================
+        # STEP 7: Extract Objective Value
+        # ====================================================================
+
+        # Try to get objective from hooks first
+        objective = None
+
+        # Check hook results for 'objective' or 'weighted_objective'
+        for hook_result in hook_results:
+            if hook_result:
+                if 'objective' in hook_result:
+                    objective = hook_result['objective']
+                    break
+                elif 'weighted_objective' in hook_result:
+                    objective = hook_result['weighted_objective']
+                    break
+
+        # Fallback: use first extracted result
+        if objective is None:
+            # Try common objective names
+            for key in ['max_displacement', 'max_stress', 'max_von_mises']:
+                if key in final_context:
+                    objective = final_context[key]
+                    logger.warning(f"No explicit objective found, using: {key}")
+                    break
+
+        if objective is None:
+            raise ValueError("Could not determine objective value from results/calculations/hooks")
+
+        logger.info(f"Objective value: {objective}")
+
+        # Save trial history
+        trial_data = {
+            'trial_number': trial_number,
+            'design_variables': design_vars,
+            'results': results,
+            'calculations': calculations,
+            'objective': objective
+        }
+        self.history.append(trial_data)
+
+        # Incremental save - write history after each trial
+        # This allows monitoring progress in real-time
+        self._save_incremental_history()
+
+        return float(objective)
+
+    def run_optimization(self, n_trials: int = 50) -> Dict[str, Any]:
+        """
+        Run LLM-enhanced optimization with flexible automation.
+
+        Args:
+            n_trials: Number of optimization trials
+
+        Returns:
+            Dict with:
+                - best_params: Best design variable values
+                - best_value: Best objective value
+                - history: Complete trial history
+        """
+        logger.info(f"\n{'='*80}")
+        logger.info(f"Starting LLM-Driven Optimization")
+        logger.info(f"{'='*80}")
+        logger.info(f"Study: {self.study_name}")
+        logger.info(f"Trials: {n_trials}")
+        logger.info(f"Output: {self.output_dir}")
+        logger.info(f"{'='*80}\n")
+
+        # Create study
+        self.study = self._create_optuna_study()
+
+        # Run optimization
+        self.study.optimize(self._objective, n_trials=n_trials)
+
+        # Get results
+        best_trial = self.study.best_trial
+
+        results = {
+            'best_params': best_trial.params,
+            'best_value': best_trial.value,
+            'best_trial_number': best_trial.number,
+            'history': self.history
+        }
+
+        # Save results
+        self._save_results(results)
+
+        logger.info(f"\n{'='*80}")
+        logger.info("Optimization Complete!")
+        logger.info(f"{'='*80}")
+        logger.info(f"Best value: {results['best_value']}")
+        logger.info(f"Best params: {results['best_params']}")
+        logger.info(f"Results saved to: {self.output_dir}")
+        logger.info(f"{'='*80}\n")
+
+        return results
+
+    def _save_incremental_history(self):
+        """
+        Save trial history incrementally after each trial.
+        This allows real-time monitoring of optimization progress.
+        """
+        history_file = self.output_dir / "optimization_history_incremental.json"
+
+        # Convert history to JSON-serializable format
+        serializable_history = []
+        for trial in self.history:
+            trial_copy = trial.copy()
+            # Convert any numpy types to native Python types
+            for key in ['results', 'calculations', 'design_variables']:
+                if key in trial_copy:
+                    trial_copy[key] = {k: float(v) if isinstance(v, (int, float)) else v
+                                      for k, v in trial_copy[key].items()}
+            if 'objective' in trial_copy:
+                trial_copy['objective'] = float(trial_copy['objective'])
+            serializable_history.append(trial_copy)
+
+        # Write to file
+        with open(history_file, 'w') as f:
+            json.dump(serializable_history, f, indent=2, default=str)
+
+    def _save_results(self, results: Dict[str, Any]):
+        """Save optimization results to file."""
+        results_file = self.output_dir / "optimization_results.json"
+
+        # Make history JSON serializable
+        serializable_results = {
+            'best_params': results['best_params'],
+            'best_value': results['best_value'],
+            'best_trial_number': results['best_trial_number'],
+            'timestamp': datetime.now().isoformat(),
+            'study_name': self.study_name,
+            'n_trials': len(results['history'])
+        }
+
+        with open(results_file, 'w') as f:
+            json.dump(serializable_results, f, indent=2)
+
+        logger.info(f"Results saved to: {results_file}")
+
+
+def main():
+    """Test LLM-driven optimization runner."""
+    print("=" * 80)
+    print("Phase 3.2: LLM-Driven Optimization Runner Test")
+    print("=" * 80)
+    print()
+
+    # Example LLM workflow (from Phase 2.7)
+    llm_workflow = {
+        "engineering_features": [
+            {
+                "action": "extract_displacement",
+                "domain": "result_extraction",
+                "description": "Extract displacement from OP2",
+                "params": {"result_type": "displacement"}
+            }
+        ],
+        "inline_calculations": [
+            {
+                "action": "normalize",
+                "params": {
+                    "input": "max_displacement",
+                    "reference": "max_allowed_disp",
+                    "value": 5.0
+                },
+                "code_hint": "norm_disp = max_displacement / 5.0"
+            }
+        ],
+        "post_processing_hooks": [
+            {
+                "action": "weighted_objective",
+                "params": {
+                    "inputs": ["norm_disp"],
+                    "weights": [1.0],
+                    "objective": "minimize"
+                }
+            }
+        ],
+        "optimization": {
+            "algorithm": "TPE",
+            "direction": "minimize",
+            "design_variables": [
+                {
+                    "parameter": "wall_thickness",
+                    "min": 3.0,
+                    "max": 8.0,
+                    "type": "continuous"
+                }
+            ]
+        }
+    }
+
+    print("LLM Workflow Configuration:")
+    print(f"  Engineering features: {len(llm_workflow['engineering_features'])}")
+    print(f"  Inline calculations: {len(llm_workflow['inline_calculations'])}")
+    print(f"  Post-processing hooks: {len(llm_workflow['post_processing_hooks'])}")
+    print(f"  Design variables: {len(llm_workflow['optimization']['design_variables'])}")
+    print()
+
+    # Dummy functions for testing
+    def dummy_model_updater(design_vars):
+        print(f"  [Dummy] Updating model with: {design_vars}")
+
+    def dummy_simulation_runner():
+        print("  [Dummy] Running simulation...")
+        # Return path to test OP2
+        return Path("tests/bracket_sim1-solution_1.op2")
+
+    # Initialize runner
+    print("Initializing LLM-driven optimization runner...")
+    runner = LLMOptimizationRunner(
+        llm_workflow=llm_workflow,
+        model_updater=dummy_model_updater,
+        simulation_runner=dummy_simulation_runner,
+        study_name="test_llm_optimization"
+    )
+
+    print()
+    print("=" * 80)
+    print("Runner initialized successfully!")
+    print("Ready to run optimization with auto-generated code!")
+    print("=" * 80)
+
+
+if __name__ == '__main__':
+    main()