feat: Add comprehensive study management system

Implement study persistence and resumption capabilities for optimization workflows: Features: - Resume existing studies to add more trials - Create new studies when topology/config changes - Study metadata tracking (creation date, trials, config hash) - SQLite database persistence for Optuna studies - Configuration change detection with warnings - List all available studies Key Changes: - Enhanced OptimizationRunner.run() with resume parameter - Added _load_existing_study() for study resumption - Added _save_study_metadata() for tracking - Added _get_config_hash() to detect topology changes - Added list_studies() to view all studies - SQLite storage for study persistence Updated Files: - optimization_engine/runner.py: Core study management - examples/test_journal_optimization.py: Interactive study management - examples/study_management_example.py: Comprehensive examples Usage Examples: # New study runner.run(study_name="bracket_v1", n_trials=50) # Resume study (add 25 more trials) runner.run(study_name="bracket_v1", n_trials=25, resume=True) # New study after topology change runner.run(study_name="bracket_v2", n_trials=50) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 13:02:15 -05:00
parent a267e2d6f0
commit 7d97ef1cb5
3 changed files with 548 additions and 23 deletions
--- a/examples/study_management_example.py
+++ b/examples/study_management_example.py
@@ -0,0 +1,261 @@
+"""
+Study Management Example
+
+This script demonstrates how to use the study management features:
+1. Create a new study
+2. Resume an existing study to add more trials
+3. List all available studies
+4. Create a new study after topology/configuration changes
+"""
+
+import sys
+from pathlib import Path
+
+# Add project root to path
+project_root = Path(__file__).parent.parent
+sys.path.insert(0, str(project_root))
+
+from optimization_engine.runner import OptimizationRunner
+from optimization_engine.nx_solver import run_nx_simulation
+from optimization_engine.result_extractors import (
+    extract_stress_from_op2,
+    extract_displacement_from_op2
+)
+
+
+def bracket_model_updater(design_vars: dict):
+    """Update bracket model with new design variable values."""
+    from integration.nx_expression_updater import update_expressions_from_file
+
+    sim_file = Path('examples/bracket/Bracket_sim1.sim')
+
+    # Map design variables to NX expressions
+    expressions = {
+        'tip_thickness': design_vars['tip_thickness'],
+        'support_angle': design_vars['support_angle']
+    }
+
+    update_expressions_from_file(
+        sim_file=sim_file,
+        expressions=expressions
+    )
+
+
+def bracket_simulation_runner() -> Path:
+    """Run bracket simulation using journal-based NX solver."""
+    sim_file = Path('examples/bracket/Bracket_sim1.sim')
+
+    op2_file = run_nx_simulation(
+        sim_file=sim_file,
+        nastran_version='2412',
+        timeout=300,
+        cleanup=False,
+        use_journal=True
+    )
+
+    return op2_file
+
+
+def stress_extractor(result_path: Path) -> dict:
+    """Extract stress results from OP2."""
+    results = extract_stress_from_op2(result_path)
+    return results
+
+
+def displacement_extractor(result_path: Path) -> dict:
+    """Extract displacement results from OP2."""
+    results = extract_displacement_from_op2(result_path)
+    return results
+
+
+def example_1_new_study():
+    """
+    Example 1: Create a new optimization study with 20 trials
+    """
+    print("\n" + "="*70)
+    print("EXAMPLE 1: Creating a New Study")
+    print("="*70)
+
+    config_path = Path('examples/bracket/optimization_config_stress_displacement.json')
+
+    runner = OptimizationRunner(
+        config_path=config_path,
+        model_updater=bracket_model_updater,
+        simulation_runner=bracket_simulation_runner,
+        result_extractors={
+            'stress_extractor': stress_extractor,
+            'displacement_extractor': displacement_extractor
+        }
+    )
+
+    # Create a new study with a specific name
+    # This uses the config's n_trials (50) unless overridden
+    study = runner.run(
+        study_name="bracket_optimization_v1",
+        n_trials=20,  # Override to 20 trials for this example
+        resume=False  # Create new study
+    )
+
+    print("\nStudy completed successfully!")
+    print(f"Database saved to: {runner._get_study_db_path('bracket_optimization_v1')}")
+
+
+def example_2_resume_study():
+    """
+    Example 2: Resume an existing study to add more trials
+    """
+    print("\n" + "="*70)
+    print("EXAMPLE 2: Resuming an Existing Study")
+    print("="*70)
+
+    config_path = Path('examples/bracket/optimization_config_stress_displacement.json')
+
+    runner = OptimizationRunner(
+        config_path=config_path,
+        model_updater=bracket_model_updater,
+        simulation_runner=bracket_simulation_runner,
+        result_extractors={
+            'stress_extractor': stress_extractor,
+            'displacement_extractor': displacement_extractor
+        }
+    )
+
+    # Resume the study created in example 1
+    # Add 30 more trials (bringing total to 50)
+    study = runner.run(
+        study_name="bracket_optimization_v1",
+        n_trials=30,  # Additional trials to run
+        resume=True   # Resume existing study
+    )
+
+    print("\nStudy resumed and expanded successfully!")
+    print(f"Total trials: {len(study.trials)}")
+
+
+def example_3_list_studies():
+    """
+    Example 3: List all available studies
+    """
+    print("\n" + "="*70)
+    print("EXAMPLE 3: Listing All Studies")
+    print("="*70)
+
+    config_path = Path('examples/bracket/optimization_config_stress_displacement.json')
+
+    runner = OptimizationRunner(
+        config_path=config_path,
+        model_updater=bracket_model_updater,
+        simulation_runner=bracket_simulation_runner,
+        result_extractors={
+            'stress_extractor': stress_extractor,
+            'displacement_extractor': displacement_extractor
+        }
+    )
+
+    studies = runner.list_studies()
+
+    if not studies:
+        print("No studies found.")
+    else:
+        print(f"\nFound {len(studies)} studies:\n")
+        for study in studies:
+            print(f"Study: {study['study_name']}")
+            print(f"  Created: {study['created_at']}")
+            print(f"  Total trials: {study.get('total_trials', 0)}")
+            print(f"  Resume count: {study.get('resume_count', 0)}")
+            print(f"  Config hash: {study.get('config_hash', 'N/A')[:8]}...")
+            print()
+
+
+def example_4_new_study_after_change():
+    """
+    Example 4: Create a new study after topology/configuration changes
+
+    This demonstrates what to do when:
+    - Geometry topology has changed significantly
+    - Design variables have been added/removed
+    - Objectives have changed
+
+    In these cases, the surrogate model from the previous study is no longer valid,
+    so you should create a NEW study rather than resume.
+    """
+    print("\n" + "="*70)
+    print("EXAMPLE 4: New Study After Configuration Change")
+    print("="*70)
+    print("\nScenario: Bracket topology was modified, added new design variable")
+    print("Old surrogate is invalid -> Create NEW study with different name\n")
+
+    config_path = Path('examples/bracket/optimization_config_stress_displacement.json')
+
+    runner = OptimizationRunner(
+        config_path=config_path,
+        model_updater=bracket_model_updater,
+        simulation_runner=bracket_simulation_runner,
+        result_extractors={
+            'stress_extractor': stress_extractor,
+            'displacement_extractor': displacement_extractor
+        }
+    )
+
+    # Create a NEW study with a different name
+    # Version number (v2) indicates this is a different geometry/configuration
+    study = runner.run(
+        study_name="bracket_optimization_v2",  # Different name!
+        n_trials=50,
+        resume=False  # New study, not resuming
+    )
+
+    print("\nNew study created for modified configuration!")
+    print("Old study (v1) remains unchanged in database.")
+
+
+if __name__ == "__main__":
+    print("="*70)
+    print("STUDY MANAGEMENT DEMONSTRATION")
+    print("="*70)
+    print("\nThis script demonstrates study management features:")
+    print("1. Create new study")
+    print("2. Resume existing study (add more trials)")
+    print("3. List all studies")
+    print("4. Create new study after topology change")
+    print("\nREQUIREMENT: Simcenter3D must be OPEN")
+    print("="*70)
+
+    response = input("\nIs Simcenter3D open? (yes/no): ")
+    if response.lower() not in ['yes', 'y']:
+        print("Please open Simcenter3D and try again.")
+        sys.exit(0)
+
+    print("\n" + "="*70)
+    print("Which example would you like to run?")
+    print("="*70)
+    print("1. Create a new study (20 trials)")
+    print("2. Resume existing study 'bracket_optimization_v1' (+30 trials)")
+    print("3. List all available studies")
+    print("4. Create new study after topology change (50 trials)")
+    print("0. Exit")
+    print("="*70)
+
+    choice = input("\nEnter choice (0-4): ").strip()
+
+    try:
+        if choice == '1':
+            example_1_new_study()
+        elif choice == '2':
+            example_2_resume_study()
+        elif choice == '3':
+            example_3_list_studies()
+        elif choice == '4':
+            example_4_new_study_after_change()
+        elif choice == '0':
+            print("Exiting.")
+        else:
+            print("Invalid choice.")
+
+    except Exception as e:
+        print("\n" + "="*70)
+        print("ERROR")
+        print("="*70)
+        print(f"{e}")
+        import traceback
+        traceback.print_exc()
--- a/examples/test_journal_optimization.py
+++ b/examples/test_journal_optimization.py
@@ -116,8 +116,57 @@ if __name__ == "__main__":
    # Use the configured number of trials (50 by default)
    n_trials = runner.config['optimization_settings']['n_trials']

+    # Check for existing studies
+    existing_studies = runner.list_studies()
+
    print("\n" + "="*60)
-    print(f"Starting optimization with {n_trials} trials")
+    print("STUDY MANAGEMENT")
+    print("="*60)
+
+    if existing_studies:
+        print(f"\nFound {len(existing_studies)} existing studies:")
+        for study in existing_studies:
+            print(f"  - {study['study_name']}: {study.get('total_trials', 0)} trials")
+
+        print("\nOptions:")
+        print("1. Create NEW study (fresh start)")
+        print("2. RESUME existing study (add more trials)")
+        choice = input("\nChoose option (1 or 2): ").strip()
+
+        if choice == '2':
+            # Resume existing study
+            if len(existing_studies) == 1:
+                study_name = existing_studies[0]['study_name']
+                print(f"\nResuming study: {study_name}")
+            else:
+                print("\nAvailable studies:")
+                for i, study in enumerate(existing_studies):
+                    print(f"{i+1}. {study['study_name']}")
+                study_idx = int(input("Select study number: ")) - 1
+                study_name = existing_studies[study_idx]['study_name']
+
+            resume_mode = True
+        else:
+            # New study
+            study_name = input("\nEnter study name (default: bracket_stress_opt): ").strip()
+            if not study_name:
+                study_name = "bracket_stress_opt"
+            resume_mode = False
+    else:
+        print("\nNo existing studies found. Creating new study.")
+        study_name = input("\nEnter study name (default: bracket_stress_opt): ").strip()
+        if not study_name:
+            study_name = "bracket_stress_opt"
+        resume_mode = False
+
+    print("\n" + "="*60)
+    if resume_mode:
+        print(f"RESUMING STUDY: {study_name}")
+        print(f"Adding {n_trials} additional trials")
+    else:
+        print(f"STARTING NEW STUDY: {study_name}")
+        print(f"Running {n_trials} trials")
+    print("="*60)
    print("Objective: Minimize max von Mises stress")
    print("Constraint: Max displacement <= 1.0 mm")
    print("Solver: Journal-based (using running NX GUI)")
@@ -125,7 +174,11 @@ if __name__ == "__main__":
    print("="*60)

    try:
-        study = runner.run(study_name="journal_solver_test")
+        study = runner.run(
+            study_name=study_name,
+            n_trials=n_trials,
+            resume=resume_mode
+        )

        print("\n" + "="*60)
        print("OPTIMIZATION COMPLETE!")
--- a/optimization_engine/runner.py
+++ b/optimization_engine/runner.py
@@ -16,10 +16,12 @@ from pathlib import Path
 from typing import Dict, Any, List, Optional, Callable
 import json
 import time
+import hashlib
 import optuna
 from optuna.samplers import TPESampler, CmaEsSampler, GPSampler
 import pandas as pd
 from datetime import datetime
+import pickle


 class OptimizationRunner:
@@ -124,6 +126,160 @@ class OptimizationRunner:
        units_lower = units.lower() if units else 'dimensionless'
        return precision_map.get(units_lower, 4)  # Default to 4 decimals

+    def _get_config_hash(self) -> str:
+        """
+        Generate hash of critical configuration parameters.
+        Used to detect if configuration has changed between study runs.
+
+        Returns:
+            MD5 hash of design variables, objectives, and constraints
+        """
+        # Extract critical config parts that affect optimization
+        critical_config = {
+            'design_variables': self.config.get('design_variables', []),
+            'objectives': self.config.get('objectives', []),
+            'constraints': self.config.get('constraints', [])
+        }
+
+        config_str = json.dumps(critical_config, sort_keys=True)
+        return hashlib.md5(config_str.encode()).hexdigest()
+
+    def _get_study_metadata_path(self, study_name: str) -> Path:
+        """Get path to study metadata file."""
+        return self.output_dir / f'study_{study_name}_metadata.json'
+
+    def _get_study_db_path(self, study_name: str) -> Path:
+        """Get path to Optuna study database."""
+        return self.output_dir / f'study_{study_name}.db'
+
+    def _save_study_metadata(self, study_name: str, is_new: bool = False):
+        """
+        Save study metadata for tracking and resumption.
+
+        Args:
+            study_name: Name of the study
+            is_new: Whether this is a new study (vs resumed)
+        """
+        metadata_path = self._get_study_metadata_path(study_name)
+
+        # Load existing metadata if resuming
+        if metadata_path.exists() and not is_new:
+            with open(metadata_path, 'r') as f:
+                metadata = json.load(f)
+        else:
+            metadata = {
+                'study_name': study_name,
+                'created_at': datetime.now().isoformat(),
+                'config_hash': self._get_config_hash(),
+                'total_trials': 0,
+                'resume_count': 0
+            }
+
+        # Update metadata
+        if self.study:
+            metadata['total_trials'] = len(self.study.trials)
+            metadata['last_updated': datetime.now().isoformat()
+            if not is_new and 'created_at' in metadata:
+                metadata['resume_count'] = metadata.get('resume_count', 0) + 1
+
+        with open(metadata_path, 'w') as f:
+            json.dump(metadata, f, indent=2)
+
+    def _load_existing_study(self, study_name: str) -> Optional[optuna.Study]:
+        """
+        Load an existing Optuna study from database.
+
+        Args:
+            study_name: Name of the study to load
+
+        Returns:
+            Loaded study or None if not found
+        """
+        db_path = self._get_study_db_path(study_name)
+        metadata_path = self._get_study_metadata_path(study_name)
+
+        if not db_path.exists():
+            return None
+
+        # Check if metadata exists and validate config
+        if metadata_path.exists():
+            with open(metadata_path, 'r') as f:
+                metadata = json.load(f)
+
+            current_hash = self._get_config_hash()
+            stored_hash = metadata.get('config_hash', '')
+
+            if current_hash != stored_hash:
+                print("\n" + "!"*60)
+                print("WARNING: Configuration has changed since study was created!")
+                print("!"*60)
+                print("This may indicate:")
+                print("  - Different design variables")
+                print("  - Different objectives or constraints")
+                print("  - Topology/geometry changes")
+                print("\nRecommendation: Create a NEW study instead of resuming.")
+                print("!"*60)
+
+                response = input("\nContinue anyway? (yes/no): ")
+                if response.lower() not in ['yes', 'y']:
+                    print("Aborting. Please create a new study.")
+                    return None
+
+        # Load study from SQLite database
+        storage = optuna.storages.RDBStorage(
+            url=f"sqlite:///{db_path}",
+            engine_kwargs={"connect_args": {"timeout": 10.0}}
+        )
+
+        try:
+            study = optuna.load_study(
+                study_name=study_name,
+                storage=storage
+            )
+
+            print("\n" + "="*60)
+            print(f"LOADED EXISTING STUDY: {study_name}")
+            print("="*60)
+            print(f"Trials completed: {len(study.trials)}")
+            if len(study.trials) > 0:
+                print(f"Best value so far: {study.best_value:.6f}")
+                print(f"Best parameters:")
+                for param, value in study.best_params.items():
+                    print(f"  {param}: {value:.4f}")
+            print("="*60)
+
+            # Load existing history
+            history_json_path = self.output_dir / 'history.json'
+            if history_json_path.exists():
+                with open(history_json_path, 'r') as f:
+                    self.history = json.load(f)
+                print(f"Loaded {len(self.history)} previous trials from history")
+
+            return study
+
+        except Exception as e:
+            print(f"Error loading study: {e}")
+            return None
+
+    def list_studies(self) -> List[Dict[str, Any]]:
+        """
+        List all available studies in the output directory.
+
+        Returns:
+            List of study metadata dictionaries
+        """
+        studies = []
+
+        for metadata_file in self.output_dir.glob('study_*_metadata.json'):
+            try:
+                with open(metadata_file, 'r') as f:
+                    metadata = json.load(f)
+                    studies.append(metadata)
+            except Exception as e:
+                print(f"Error reading {metadata_file}: {e}")
+
+        return sorted(studies, key=lambda x: x.get('created_at', ''), reverse=True)
+
    def _objective_function(self, trial: optuna.Trial) -> float:
        """
        Optuna objective function.
@@ -254,41 +410,94 @@ class OptimizationRunner:

        return total_objective

-    def run(self, study_name: Optional[str] = None) -> optuna.Study:
+    def run(
+        self,
+        study_name: Optional[str] = None,
+        n_trials: Optional[int] = None,
+        resume: bool = False
+    ) -> optuna.Study:
        """
        Run the optimization.

        Args:
-            study_name: Optional name for the study
+            study_name: Optional name for the study. If None, generates timestamp-based name.
+            n_trials: Number of trials to run. If None, uses config value.
+                     When resuming, this is ADDITIONAL trials to run.
+            resume: If True, attempts to resume existing study. If False, creates new study.

        Returns:
            Completed Optuna study
+
+        Examples:
+            # New study with 50 trials
+            runner.run(study_name="bracket_opt_v1", n_trials=50)
+
+            # Resume existing study for 25 more trials
+            runner.run(study_name="bracket_opt_v1", n_trials=25, resume=True)
+
+            # New study after topology change
+            runner.run(study_name="bracket_opt_v2", n_trials=50)
        """
        if study_name is None:
            study_name = f"optimization_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        # Get optimization settings
        settings = self.config['optimization_settings']
-        n_trials = settings.get('n_trials', 100)
+        if n_trials is None:
+            n_trials = settings.get('n_trials', 100)
        sampler_name = settings.get('sampler', 'TPE')

-        # Create Optuna study
-        sampler = self._get_sampler(sampler_name)
-        self.study = optuna.create_study(
-            study_name=study_name,
-            direction='minimize',  # Total weighted objective is always minimized
-            sampler=sampler
-        )
+        # Try to load existing study if resume=True
+        if resume:
+            existing_study = self._load_existing_study(study_name)
+            if existing_study is not None:
+                self.study = existing_study
+                trials_completed = len(self.study.trials)

-        print("="*60)
-        print(f"STARTING OPTIMIZATION: {study_name}")
-        print("="*60)
-        print(f"Design Variables: {len(self.config['design_variables'])}")
-        print(f"Objectives: {len(self.config['objectives'])}")
-        print(f"Constraints: {len(self.config.get('constraints', []))}")
-        print(f"Trials: {n_trials}")
-        print(f"Sampler: {sampler_name}")
-        print("="*60)
+                print("\n" + "="*60)
+                print(f"RESUMING OPTIMIZATION: {study_name}")
+                print("="*60)
+                print(f"Trials already completed: {trials_completed}")
+                print(f"Additional trials to run: {n_trials}")
+                print(f"Total trials after completion: {trials_completed + n_trials}")
+                print("="*60)
+
+                # Save metadata indicating this is a resume
+                self._save_study_metadata(study_name, is_new=False)
+            else:
+                print(f"\nNo existing study '{study_name}' found. Creating new study instead.")
+                resume = False
+
+        # Create new study if not resuming or if resume failed
+        if not resume or self.study is None:
+            # Create storage for persistence
+            db_path = self._get_study_db_path(study_name)
+            storage = optuna.storages.RDBStorage(
+                url=f"sqlite:///{db_path}",
+                engine_kwargs={"connect_args": {"timeout": 10.0}}
+            )
+
+            sampler = self._get_sampler(sampler_name)
+            self.study = optuna.create_study(
+                study_name=study_name,
+                direction='minimize',  # Total weighted objective is always minimized
+                sampler=sampler,
+                storage=storage,
+                load_if_exists=False  # Force new study
+            )
+
+            print("="*60)
+            print(f"STARTING NEW OPTIMIZATION: {study_name}")
+            print("="*60)
+            print(f"Design Variables: {len(self.config['design_variables'])}")
+            print(f"Objectives: {len(self.config['objectives'])}")
+            print(f"Constraints: {len(self.config.get('constraints', []))}")
+            print(f"Trials: {n_trials}")
+            print(f"Sampler: {sampler_name}")
+            print("="*60)
+
+            # Save metadata for new study
+            self._save_study_metadata(study_name, is_new=True)

        # Run optimization
        start_time = time.time()
@@ -302,14 +511,16 @@ class OptimizationRunner:
        print("\n" + "="*60)
        print("OPTIMIZATION COMPLETE")
        print("="*60)
-        print(f"Total time: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} minutes)")
+        print(f"Time for this run: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} minutes)")
+        print(f"Total trials completed: {len(self.study.trials)}")
        print(f"Best objective value: {self.best_value:.6f}")
        print(f"Best parameters:")
        for param, value in self.best_params.items():
            print(f"  {param}: {value:.4f}")
        print("="*60)

-        # Save final results
+        # Save metadata and final results
+        self._save_study_metadata(study_name)
        self._save_final_results()

        return self.study