feat: Add comprehensive study management system

Implement study persistence and resumption capabilities for optimization workflows:

Features:
- Resume existing studies to add more trials
- Create new studies when topology/config changes
- Study metadata tracking (creation date, trials, config hash)
- SQLite database persistence for Optuna studies
- Configuration change detection with warnings
- List all available studies

Key Changes:
- Enhanced OptimizationRunner.run() with resume parameter
- Added _load_existing_study() for study resumption
- Added _save_study_metadata() for tracking
- Added _get_config_hash() to detect topology changes
- Added list_studies() to view all studies
- SQLite storage for study persistence

Updated Files:
- optimization_engine/runner.py: Core study management
- examples/test_journal_optimization.py: Interactive study management
- examples/study_management_example.py: Comprehensive examples

Usage Examples:
  # New study
  runner.run(study_name="bracket_v1", n_trials=50)

  # Resume study (add 25 more trials)
  runner.run(study_name="bracket_v1", n_trials=25, resume=True)

  # New study after topology change
  runner.run(study_name="bracket_v2", n_trials=50)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-15 13:02:15 -05:00
parent a267e2d6f0
commit 7d97ef1cb5
3 changed files with 548 additions and 23 deletions

View File

@@ -16,10 +16,12 @@ from pathlib import Path
from typing import Dict, Any, List, Optional, Callable
import json
import time
import hashlib
import optuna
from optuna.samplers import TPESampler, CmaEsSampler, GPSampler
import pandas as pd
from datetime import datetime
import pickle
class OptimizationRunner:
@@ -124,6 +126,160 @@ class OptimizationRunner:
units_lower = units.lower() if units else 'dimensionless'
return precision_map.get(units_lower, 4) # Default to 4 decimals
def _get_config_hash(self) -> str:
"""
Generate hash of critical configuration parameters.
Used to detect if configuration has changed between study runs.
Returns:
MD5 hash of design variables, objectives, and constraints
"""
# Extract critical config parts that affect optimization
critical_config = {
'design_variables': self.config.get('design_variables', []),
'objectives': self.config.get('objectives', []),
'constraints': self.config.get('constraints', [])
}
config_str = json.dumps(critical_config, sort_keys=True)
return hashlib.md5(config_str.encode()).hexdigest()
def _get_study_metadata_path(self, study_name: str) -> Path:
"""Get path to study metadata file."""
return self.output_dir / f'study_{study_name}_metadata.json'
def _get_study_db_path(self, study_name: str) -> Path:
"""Get path to Optuna study database."""
return self.output_dir / f'study_{study_name}.db'
def _save_study_metadata(self, study_name: str, is_new: bool = False):
"""
Save study metadata for tracking and resumption.
Args:
study_name: Name of the study
is_new: Whether this is a new study (vs resumed)
"""
metadata_path = self._get_study_metadata_path(study_name)
# Load existing metadata if resuming
if metadata_path.exists() and not is_new:
with open(metadata_path, 'r') as f:
metadata = json.load(f)
else:
metadata = {
'study_name': study_name,
'created_at': datetime.now().isoformat(),
'config_hash': self._get_config_hash(),
'total_trials': 0,
'resume_count': 0
}
# Update metadata
if self.study:
metadata['total_trials'] = len(self.study.trials)
metadata['last_updated': datetime.now().isoformat()
if not is_new and 'created_at' in metadata:
metadata['resume_count'] = metadata.get('resume_count', 0) + 1
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
def _load_existing_study(self, study_name: str) -> Optional[optuna.Study]:
"""
Load an existing Optuna study from database.
Args:
study_name: Name of the study to load
Returns:
Loaded study or None if not found
"""
db_path = self._get_study_db_path(study_name)
metadata_path = self._get_study_metadata_path(study_name)
if not db_path.exists():
return None
# Check if metadata exists and validate config
if metadata_path.exists():
with open(metadata_path, 'r') as f:
metadata = json.load(f)
current_hash = self._get_config_hash()
stored_hash = metadata.get('config_hash', '')
if current_hash != stored_hash:
print("\n" + "!"*60)
print("WARNING: Configuration has changed since study was created!")
print("!"*60)
print("This may indicate:")
print(" - Different design variables")
print(" - Different objectives or constraints")
print(" - Topology/geometry changes")
print("\nRecommendation: Create a NEW study instead of resuming.")
print("!"*60)
response = input("\nContinue anyway? (yes/no): ")
if response.lower() not in ['yes', 'y']:
print("Aborting. Please create a new study.")
return None
# Load study from SQLite database
storage = optuna.storages.RDBStorage(
url=f"sqlite:///{db_path}",
engine_kwargs={"connect_args": {"timeout": 10.0}}
)
try:
study = optuna.load_study(
study_name=study_name,
storage=storage
)
print("\n" + "="*60)
print(f"LOADED EXISTING STUDY: {study_name}")
print("="*60)
print(f"Trials completed: {len(study.trials)}")
if len(study.trials) > 0:
print(f"Best value so far: {study.best_value:.6f}")
print(f"Best parameters:")
for param, value in study.best_params.items():
print(f" {param}: {value:.4f}")
print("="*60)
# Load existing history
history_json_path = self.output_dir / 'history.json'
if history_json_path.exists():
with open(history_json_path, 'r') as f:
self.history = json.load(f)
print(f"Loaded {len(self.history)} previous trials from history")
return study
except Exception as e:
print(f"Error loading study: {e}")
return None
def list_studies(self) -> List[Dict[str, Any]]:
"""
List all available studies in the output directory.
Returns:
List of study metadata dictionaries
"""
studies = []
for metadata_file in self.output_dir.glob('study_*_metadata.json'):
try:
with open(metadata_file, 'r') as f:
metadata = json.load(f)
studies.append(metadata)
except Exception as e:
print(f"Error reading {metadata_file}: {e}")
return sorted(studies, key=lambda x: x.get('created_at', ''), reverse=True)
def _objective_function(self, trial: optuna.Trial) -> float:
"""
Optuna objective function.
@@ -254,41 +410,94 @@ class OptimizationRunner:
return total_objective
def run(self, study_name: Optional[str] = None) -> optuna.Study:
def run(
self,
study_name: Optional[str] = None,
n_trials: Optional[int] = None,
resume: bool = False
) -> optuna.Study:
"""
Run the optimization.
Args:
study_name: Optional name for the study
study_name: Optional name for the study. If None, generates timestamp-based name.
n_trials: Number of trials to run. If None, uses config value.
When resuming, this is ADDITIONAL trials to run.
resume: If True, attempts to resume existing study. If False, creates new study.
Returns:
Completed Optuna study
Examples:
# New study with 50 trials
runner.run(study_name="bracket_opt_v1", n_trials=50)
# Resume existing study for 25 more trials
runner.run(study_name="bracket_opt_v1", n_trials=25, resume=True)
# New study after topology change
runner.run(study_name="bracket_opt_v2", n_trials=50)
"""
if study_name is None:
study_name = f"optimization_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
# Get optimization settings
settings = self.config['optimization_settings']
n_trials = settings.get('n_trials', 100)
if n_trials is None:
n_trials = settings.get('n_trials', 100)
sampler_name = settings.get('sampler', 'TPE')
# Create Optuna study
sampler = self._get_sampler(sampler_name)
self.study = optuna.create_study(
study_name=study_name,
direction='minimize', # Total weighted objective is always minimized
sampler=sampler
)
# Try to load existing study if resume=True
if resume:
existing_study = self._load_existing_study(study_name)
if existing_study is not None:
self.study = existing_study
trials_completed = len(self.study.trials)
print("="*60)
print(f"STARTING OPTIMIZATION: {study_name}")
print("="*60)
print(f"Design Variables: {len(self.config['design_variables'])}")
print(f"Objectives: {len(self.config['objectives'])}")
print(f"Constraints: {len(self.config.get('constraints', []))}")
print(f"Trials: {n_trials}")
print(f"Sampler: {sampler_name}")
print("="*60)
print("\n" + "="*60)
print(f"RESUMING OPTIMIZATION: {study_name}")
print("="*60)
print(f"Trials already completed: {trials_completed}")
print(f"Additional trials to run: {n_trials}")
print(f"Total trials after completion: {trials_completed + n_trials}")
print("="*60)
# Save metadata indicating this is a resume
self._save_study_metadata(study_name, is_new=False)
else:
print(f"\nNo existing study '{study_name}' found. Creating new study instead.")
resume = False
# Create new study if not resuming or if resume failed
if not resume or self.study is None:
# Create storage for persistence
db_path = self._get_study_db_path(study_name)
storage = optuna.storages.RDBStorage(
url=f"sqlite:///{db_path}",
engine_kwargs={"connect_args": {"timeout": 10.0}}
)
sampler = self._get_sampler(sampler_name)
self.study = optuna.create_study(
study_name=study_name,
direction='minimize', # Total weighted objective is always minimized
sampler=sampler,
storage=storage,
load_if_exists=False # Force new study
)
print("="*60)
print(f"STARTING NEW OPTIMIZATION: {study_name}")
print("="*60)
print(f"Design Variables: {len(self.config['design_variables'])}")
print(f"Objectives: {len(self.config['objectives'])}")
print(f"Constraints: {len(self.config.get('constraints', []))}")
print(f"Trials: {n_trials}")
print(f"Sampler: {sampler_name}")
print("="*60)
# Save metadata for new study
self._save_study_metadata(study_name, is_new=True)
# Run optimization
start_time = time.time()
@@ -302,14 +511,16 @@ class OptimizationRunner:
print("\n" + "="*60)
print("OPTIMIZATION COMPLETE")
print("="*60)
print(f"Total time: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} minutes)")
print(f"Time for this run: {elapsed_time:.1f} seconds ({elapsed_time/60:.1f} minutes)")
print(f"Total trials completed: {len(self.study.trials)}")
print(f"Best objective value: {self.best_value:.6f}")
print(f"Best parameters:")
for param, value in self.best_params.items():
print(f" {param}: {value:.4f}")
print("="*60)
# Save final results
# Save metadata and final results
self._save_study_metadata(study_name)
self._save_final_results()
return self.study