feat: Add comprehensive study management system

Implement study persistence and resumption capabilities for optimization workflows: Features: - Resume existing studies to add more trials - Create new studies when topology/config changes - Study metadata tracking (creation date, trials, config hash) - SQLite database persistence for Optuna studies - Configuration change detection with warnings - List all available studies Key Changes: - Enhanced OptimizationRunner.run() with resume parameter - Added _load_existing_study() for study resumption - Added _save_study_metadata() for tracking - Added _get_config_hash() to detect topology changes - Added list_studies() to view all studies - SQLite storage for study persistence Updated Files: - optimization_engine/runner.py: Core study management - examples/test_journal_optimization.py: Interactive study management - examples/study_management_example.py: Comprehensive examples Usage Examples: # New study runner.run(study_name="bracket_v1", n_trials=50) # Resume study (add 25 more trials) runner.run(study_name="bracket_v1", n_trials=25, resume=True) # New study after topology change runner.run(study_name="bracket_v2", n_trials=50) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 13:02:15 -05:00
parent a267e2d6f0
commit 7d97ef1cb5
3 changed files with 548 additions and 23 deletions
--- a/optimization_engine/runner.py
+++ b/optimization_engine/runner.py
@@ -16,10 +16,12 @@ from pathlib import Path
 from typing import Dict, Any, List, Optional, Callable
 import json
 import time
+import hashlib
 import optuna
 from optuna.samplers import TPESampler, CmaEsSampler, GPSampler
 import pandas as pd
 from datetime import datetime
+import pickle


 class OptimizationRunner:
@@ -124,6 +126,160 @@ class OptimizationRunner:
        units_lower = units.lower() if units else 'dimensionless'
        return precision_map.get(units_lower, 4)  # Default to 4 decimals

+    def _get_config_hash(self) -> str:
+        """
+        Generate hash of critical configuration parameters.
+        Used to detect if configuration has changed between study runs.
+
+        Returns:
+            MD5 hash of design variables, objectives, and constraints
+        """
+        # Extract critical config parts that affect optimization
+        critical_config = {
+            'design_variables': self.config.get('design_variables', []),
+            'objectives': self.config.get('objectives', []),
+            'constraints': self.config.get('constraints', [])
+        }
+
+        config_str = json.dumps(critical_config, sort_keys=True)
+        return hashlib.md5(config_str.encode()).hexdigest()
+
+    def _get_study_metadata_path(self, study_name: str) -> Path:
+        """Get path to study metadata file."""
+        return self.output_dir / f'study_{study_name}_metadata.json'
+
+    def _get_study_db_path(self, study_name: str) -> Path:
+        """Get path to Optuna study database."""
+        return self.output_dir / f'study_{study_name}.db'
+
+    def _save_study_metadata(self, study_name: str, is_new: bool = False):
+        """
+        Save study metadata for tracking and resumption.
+
+        Args:
+            study_name: Name of the study
+            is_new: Whether this is a new study (vs resumed)
+        """
+        metadata_path = self._get_study_metadata_path(study_name)
+
+        # Load existing metadata if resuming
+        if metadata_path.exists() and not is_new:
+            with open(metadata_path, 'r') as f:
+                metadata = json.load(f)
+        else:
+            metadata = {
+                'study_name': study_name,
+                'created_at': datetime.now().isoformat(),
+                'config_hash': self._get_config_hash(),
+                'total_trials': 0,
+                'resume_count': 0
+            }
+
+        # Update metadata
+        if self.study:
+            metadata['total_trials'] = len(self.study.trials)
+            metadata['last_updated': datetime.now().isoformat()
+            if not is_new and 'created_at' in metadata:
+                metadata['resume_count'] = metadata.get('resume_count', 0) + 1
+
+        with open(metadata_path, 'w') as f:
+            json.dump(metadata, f, indent=2)
+
+    def _load_existing_study(self, study_name: str) -> Optional[optuna.Study]:
+        """
+        Load an existing Optuna study from database.
+
+        Args:
+            study_name: Name of the study to load
+
+        Returns:
+            Loaded study or None if not found
+        """
+        db_path = self._get_study_db_path(study_name)
+        metadata_path = self._get_study_metadata_path(study_name)
+
+        if not db_path.exists():
+            return None
+
+        # Check if metadata exists and validate config
+        if metadata_path.exists():
+            with open(metadata_path, 'r') as f:
+                metadata = json.load(f)
+
+            current_hash = self._get_config_hash()
+            stored_hash = metadata.get('config_hash', '')
+
+            if current_hash != stored_hash:
+                print("\n" + "!"*60)
+                print("WARNING: Configuration has changed since study was created!")
+                print("!"*60)
+                print("This may indicate:")
+                print("  - Different design variables")
+                print("  - Different objectives or constraints")
+                print("  - Topology/geometry changes")
+                print("\nRecommendation: Create a NEW study instead of resuming.")
+                print("!"*60)
+
+                response = input("\nContinue anyway? (yes/no): ")
+                if response.lower() not in ['yes', 'y']:
+                    print("Aborting. Please create a new study.")
+                    return None
+
+        # Load study from SQLite database
+        storage = optuna.storages.RDBStorage(
+            url=f"sqlite:///{db_path}",
+            engine_kwargs={"connect_args": {"timeout": 10.0}}
+        )
+
+        try:
+            study = optuna.load_study(
+                study_name=study_name,
+                storage=storage
+            )
+
+            print("\n" + "="*60)
+            print(f"LOADED EXISTING STUDY: {study_name}")
+            print("="*60)
+            print(f"Trials completed: {len(study.trials)}")
+            if len(study.trials) > 0:
+                print(f"Best value so far: {study.best_value:.6f}")
+                print(f"Best parameters:")
+                for param, value in study.best_params.items():
+                    print(f"  {param}: {value:.4f}")
+            print("="*60)
+
+            # Load existing history
+            history_json_path = self.output_dir / 'history.json'
+            if history_json_path.exists():
+                with open(history_json_path, 'r') as f:
+                    self.history = json.load(f)
+                print(f"Loaded {len(self.history)} previous trials from history")
+
+            return study
+
+        except Exception as e:
+            print(f"Error loading study: {e}")
+            return None
+
+    def list_studies(self) -> List[Dict[str, Any]]:
+        """
+        List all available studies in the output directory.
+
+        Returns:
+            List of study metadata dictionaries
+        """
+        studies = []
+
+        for metadata_file in self.output_dir.glob('study_*_metadata.json'):
+            try:
+                with open(metadata_file, 'r') as f:
+                    metadata = json.load(f)
+                    studies.append(metadata)
+            except Exception as e:
+                print(f"Error reading {metadata_file}: {e}")
+
+        return sorted(studies, key=lambda x: x.get('created_at', ''), reverse=True)
+
    def _objective_function(self, trial: optuna.Trial) -> float:
        """
        Optuna objective function.
@@ -254,41 +410,94 @@ class OptimizationRunner:

        return total_objective

-    def run(self, study_name: Optional[str] = None) -> optuna.Study:
+    def run(
+        self,
+        study_name: Optional[str] = None,
+        n_trials: Optional[int] = None,
+        resume: bool = False
+    ) -> optuna.Study:
        """
        Run the optimization.

        Args:
-            study_name: Optional name for the study
+            study_name: Optional name for the study. If None, generates timestamp-based name.
+            n_trials: Number of trials to run. If None, uses config value.
+                     When resuming, this is ADDITIONAL trials to run.
+            resume: If True, attempts to resume existing study. If False, creates new study.

        Returns:
            Completed Optuna study
+
+        Examples:
+            # New study with 50 trials
+            runner.run(study_name="bracket_opt_v1", n_trials=50)
+
+            # Resume existing study for 25 more trials
+            runner.run(study_name="bracket_opt_v1", n_trials=25, resume=True)
+
+            # New study after topology change
+            runner.run(study_name="bracket_opt_v2", n_trials=50)
        """
        if study_name is None:
            study_name = f"optimization_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

        # Get optimization settings
        settings = self.config['optimization_settings']
-        n_trials = settings.get('n_trials', 100)
+        if n_trials is None:
+            n_trials = settings.get('n_trials', 100)
        sampler_name = settings.get('sampler', 'TPE')

-        # Create Optuna study
-        sampler = self._get_sampler(sampler_name)
-        self.study = optuna.create_study(
-            study_name=study_name,
-            direction='minimize',  # Total weighted objective is always minimized
-            sampler=sampler
-        )
+        # Try to load existing study if resume=True
+        if resume:
+            existing_study = self._load_existing_study(study_name)
+            if existing_study is not None:
+                self.study = existing_study
+                trials_completed = len(self.study.trials)

-        print("="*60)
-        print(f"STARTING OPTIMIZATION: {study_name}")
-        print("="*60)
-        print(f"Design Variables: {len(self.config['design_variables'])}")
-        print(f"Objectives: {len(self.config['objectives'])}")
-        print(f"Constraints: {len(self.config.get('constraints', []))}")
-        print(f"Trials: {n_trials}")
-        print(f"Sampler: {sampler_name}")
-        print("="*60)
+                print("\n" + "="*60)
+                print(f"RESUMING OPTIMIZATION: {study_name}")
+                print("="*60)
+                print(f"Trials already completed: {trials_completed}")
+                print(f"Additional trials to run: {n_trials}")
+                print(f"Total trials after completion: {trials_completed + n_trials}")
+                print("="*60)
+
+                # Save metadata indicating this is a resume
+                self._save_study_metadata(study_name, is_new=False)
+            else:
+                print(f"\nNo existing study '{study_name}' found. Creating new study instead.")
+                resume = False
+
+        # Create new study if not resuming or if resume failed
+        if not resume or self.study is None:
+            # Create storage for persistence
+            db_path = self._get_study_db_path(study_name)
+            storage = optuna.storages.RDBStorage(
+                url=f"sqlite:///{db_path}",
+                engine_kwargs={"connect_args": {"timeout": 10.0}}
+            )
+
+            sampler = self._get_sampler(sampler_name)
+            self.study = optuna.create_study(
+                study_name=study_name,
+                direction='minimize',  # Total weighted objective is always minimized
+                sampler=sampler,
+                storage=storage,
+                load_if_exists=False  # Force new study
+            )
+
+            print("="*60)
+            print(f"STARTING NEW OPTIMIZATION: {study_name}")
+            print("="*60)
+            print(f"Design Variables: {len(self.config['design_variables'])}")
+            print(f"Objectives: {len(self.config['objectives'])}")
+            print(f"Constraints: {len(self.config.get('constraints', []))}")
+            print(f"Trials: {n_trials}")
+            print(f"Sampler: {sampler_name}")
+            print("="*60)
+
+            # Save metadata for new study
+            self._save_study_metadata(study_name, is_new=True)

        # Run optimization
        start_time = time.time()
@@ -302,14 +511,16 @@ class OptimizationRunner:
        print("\n" + "="*60)
        print("OPTIMIZATION COMPLETE")
        print("="*60)
-        print(f"Total time: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} minutes)")
+        print(f"Time for this run: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} minutes)")
+        print(f"Total trials completed: {len(self.study.trials)}")
        print(f"Best objective value: {self.best_value:.6f}")
        print(f"Best parameters:")
        for param, value in self.best_params.items():
            print(f"  {param}: {value:.4f}")
        print("="*60)

-        # Save final results
+        # Save metadata and final results
+        self._save_study_metadata(study_name)
        self._save_final_results()

        return self.study