refactor: Major reorganization of optimization_engine module structure

BREAKING CHANGE: Module paths have been reorganized for better maintainability.
Backwards compatibility aliases with deprecation warnings are provided.

New Structure:
- core/           - Optimization runners (runner, intelligent_optimizer, etc.)
- processors/     - Data processing
  - surrogates/   - Neural network surrogates
- nx/             - NX/Nastran integration (solver, updater, session_manager)
- study/          - Study management (creator, wizard, state, reset)
- reporting/      - Reports and analysis (visualizer, report_generator)
- config/         - Configuration management (manager, builder)
- utils/          - Utilities (logger, auto_doc, etc.)
- future/         - Research/experimental code

Migration:
- ~200 import changes across 125 files
- All __init__.py files use lazy loading to avoid circular imports
- Backwards compatibility layer supports old import paths with warnings
- All existing functionality preserved

To migrate existing code:
  OLD: from optimization_engine.nx_solver import NXSolver
  NEW: from optimization_engine.nx.solver import NXSolver

  OLD: from optimization_engine.runner import OptimizationRunner
  NEW: from optimization_engine.core.runner import OptimizationRunner

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 12:30:59 -05:00
parent 82f36689b7
commit eabcc4c3ca
120 changed files with 1127 additions and 637 deletions

View File

@@ -0,0 +1,60 @@
"""
Study Management
================
Study creation, state management, and lifecycle.
Modules:
- creator: Study creation from templates
- wizard: Interactive study setup wizard
- state: Study state tracking
- reset: Study reset functionality
- continuation: Resume interrupted studies
"""
# Lazy imports to avoid circular dependencies
def __getattr__(name):
if name == 'StudyCreator':
from .creator import StudyCreator
return StudyCreator
elif name == 'create_study':
from .creator import create_study
return create_study
elif name == 'StudyWizard':
from .wizard import StudyWizard
return StudyWizard
elif name == 'StudyState':
from .state import StudyState
return StudyState
elif name == 'StudyReset':
from .reset import StudyReset
return StudyReset
elif name == 'reset_study':
from .reset import reset_study
return reset_study
elif name == 'StudyContinuation':
from .continuation import StudyContinuation
return StudyContinuation
elif name == 'continue_study':
from .continuation import continue_study
return continue_study
elif name == 'BenchmarkingSubstudy':
from .benchmarking import BenchmarkingSubstudy
return BenchmarkingSubstudy
elif name == 'generate_history':
from .history_generator import generate_history
return generate_history
raise AttributeError(f"module 'optimization_engine.study' has no attribute '{name}'")
__all__ = [
'StudyCreator',
'create_study',
'StudyWizard',
'StudyState',
'StudyReset',
'reset_study',
'StudyContinuation',
'continue_study',
'BenchmarkingSubstudy',
'generate_history',
]

View File

@@ -0,0 +1,472 @@
"""
Benchmarking Substudy - Mandatory Discovery & Validation System
The benchmarking substudy is a mandatory first step for all optimization studies.
It performs model introspection, validation, and configuration proposal before
any optimization trials are run.
Purpose:
- Discover available expressions, OP2 contents, baseline performance
- Validate that model can be simulated and results extracted
- Propose initial optimization configuration
- Act as gatekeeper before full optimization
This substudy ALWAYS runs before any other substudy and auto-updates when
new substudies are created.
Author: Antoine Letarte
Date: 2025-11-17
Version: 1.0.0
"""
import json
import logging
from pathlib import Path
from typing import Dict, Any, List, Optional
from dataclasses import dataclass, asdict
from datetime import datetime
from optimization_engine.config.setup_wizard import OptimizationSetupWizard, ModelIntrospection, OP2Introspection
logger = logging.getLogger(__name__)
@dataclass
class BenchmarkResults:
"""Results from benchmarking analysis."""
timestamp: str
# Model introspection
expressions: Dict[str, Dict[str, Any]] # name -> {value, units, formula}
expression_count: int
# OP2 introspection
element_types: List[str]
result_types: List[str]
subcases: List[int]
node_count: int
element_count: int
# Baseline simulation results
baseline_op2_path: str
baseline_results: Dict[str, float] # e.g., max_stress, max_displacement, mass
# Validation status
simulation_works: bool
extraction_works: bool
validation_passed: bool
# Proposals
proposed_design_variables: List[Dict[str, Any]]
proposed_extractors: List[Dict[str, Any]]
proposed_objectives: List[str]
# Issues found
warnings: List[str]
errors: List[str]
class BenchmarkingSubstudy:
"""
Mandatory benchmarking substudy for discovery and validation.
This runs before any optimization to:
1. Discover what's in the model
2. Validate the pipeline works
3. Propose configuration
4. Gate-keep before optimization
"""
def __init__(self, study_dir: Path, prt_file: Path, sim_file: Path):
"""
Initialize benchmarking substudy.
Args:
study_dir: Root study directory
prt_file: Path to NX part file
sim_file: Path to NX simulation file
"""
self.study_dir = Path(study_dir)
self.prt_file = Path(prt_file)
self.sim_file = Path(sim_file)
# Benchmarking substudy directory
self.benchmark_dir = self.study_dir / "substudies" / "benchmarking"
self.benchmark_dir.mkdir(parents=True, exist_ok=True)
# Results file
self.results_file = self.benchmark_dir / "benchmark_results.json"
# Use Phase 3.3 wizard for introspection
self.wizard = OptimizationSetupWizard(prt_file, sim_file)
logger.info(f"Benchmarking substudy initialized for: {study_dir.name}")
def run_discovery(self) -> BenchmarkResults:
"""
Run complete discovery and validation.
Returns:
BenchmarkResults with all discovery information
"""
logger.info("=" * 80)
logger.info("BENCHMARKING SUBSTUDY - Discovery & Validation")
logger.info("=" * 80)
logger.info("")
results = BenchmarkResults(
timestamp=datetime.now().isoformat(),
expressions={},
expression_count=0,
element_types=[],
result_types=[],
subcases=[],
node_count=0,
element_count=0,
baseline_op2_path="",
baseline_results={},
simulation_works=False,
extraction_works=False,
validation_passed=False,
proposed_design_variables=[],
proposed_extractors=[],
proposed_objectives=[],
warnings=[],
errors=[]
)
# Step 1: Model Introspection
logger.info("Step 1: Model Introspection")
logger.info("-" * 40)
try:
model_info = self.wizard.introspect_model()
results.expressions = model_info.expressions
results.expression_count = len(model_info.expressions)
logger.info(f"Found {results.expression_count} expressions:")
for name, info in model_info.expressions.items():
logger.info(f" - {name}: {info['value']} {info['units']}")
logger.info("")
except Exception as e:
error_msg = f"Model introspection failed: {e}"
logger.error(error_msg)
results.errors.append(error_msg)
results.validation_passed = False
return results
# Step 2: Baseline Simulation
logger.info("Step 2: Baseline Simulation")
logger.info("-" * 40)
try:
baseline_op2 = self.wizard.run_baseline_simulation()
if baseline_op2:
results.baseline_op2_path = str(baseline_op2)
results.simulation_works = True
logger.info(f"Baseline simulation complete: {baseline_op2.name}")
logger.info("")
else:
warning_msg = "Baseline simulation returned no OP2 file"
logger.warning(warning_msg)
results.warnings.append(warning_msg)
logger.info("")
except Exception as e:
error_msg = f"Baseline simulation failed: {e}"
logger.error(error_msg)
results.errors.append(error_msg)
logger.info("Continuing with available information...")
logger.info("")
# Step 3: OP2 Introspection
logger.info("Step 3: OP2 Introspection")
logger.info("-" * 40)
try:
op2_info = self.wizard.introspect_op2()
results.element_types = op2_info.element_types
results.result_types = op2_info.result_types
results.subcases = op2_info.subcases
results.node_count = op2_info.node_count
results.element_count = op2_info.element_count
logger.info(f"OP2 Analysis:")
logger.info(f" - Element types: {', '.join(results.element_types)}")
logger.info(f" - Result types: {', '.join(results.result_types)}")
logger.info(f" - Subcases: {results.subcases}")
logger.info(f" - Nodes: {results.node_count}")
logger.info(f" - Elements: {results.element_count}")
logger.info("")
except Exception as e:
error_msg = f"OP2 introspection failed: {e}"
logger.error(error_msg)
results.errors.append(error_msg)
results.validation_passed = False
return results
# Step 4: Extract Baseline Results
logger.info("Step 4: Extract Baseline Results")
logger.info("-" * 40)
try:
# Try to extract common results
baseline_results = self._extract_baseline_results(Path(results.baseline_op2_path))
results.baseline_results = baseline_results
results.extraction_works = True
logger.info("Baseline performance:")
for key, value in baseline_results.items():
logger.info(f" - {key}: {value}")
logger.info("")
except Exception as e:
warning_msg = f"Baseline extraction partially failed: {e}"
logger.warning(warning_msg)
results.warnings.append(warning_msg)
# Not a hard failure - continue
# Step 5: Generate Proposals
logger.info("Step 5: Generate Configuration Proposals")
logger.info("-" * 40)
proposals = self._generate_proposals(model_info, op2_info, results.baseline_results)
results.proposed_design_variables = proposals['design_variables']
results.proposed_extractors = proposals['extractors']
results.proposed_objectives = proposals['objectives']
logger.info(f"Proposed design variables ({len(results.proposed_design_variables)}):")
for var in results.proposed_design_variables:
logger.info(f" - {var['parameter']}: {var.get('suggested_range', 'range needed')}")
logger.info(f"\nProposed extractors ({len(results.proposed_extractors)}):")
for ext in results.proposed_extractors:
logger.info(f" - {ext['action']}: {ext['description']}")
logger.info(f"\nProposed objectives ({len(results.proposed_objectives)}):")
for obj in results.proposed_objectives:
logger.info(f" - {obj}")
logger.info("")
# Validation passed if simulation and basic extraction work
results.validation_passed = results.simulation_works and len(results.element_types) > 0
# Save results
self._save_results(results)
logger.info("=" * 80)
if results.validation_passed:
logger.info("BENCHMARKING COMPLETE - Validation PASSED")
else:
logger.info("BENCHMARKING COMPLETE - Validation FAILED")
logger.info("=" * 80)
logger.info("")
return results
def _extract_baseline_results(self, op2_file: Path) -> Dict[str, float]:
"""Extract baseline results from OP2 file."""
from pyNastran.op2.op2 import OP2
results = {}
try:
op2 = OP2()
op2.read_op2(str(op2_file), load_geometry=False)
# Try to extract displacement
if hasattr(op2, 'displacements') and op2.displacements:
disp_data = list(op2.displacements.values())[0]
if hasattr(disp_data, 'data'):
max_disp = float(abs(disp_data.data).max())
results['max_displacement'] = round(max_disp, 6)
# Try to extract stress
if hasattr(op2, 'ctetra_stress') and op2.ctetra_stress:
stress_data = list(op2.ctetra_stress.values())[0]
if hasattr(stress_data, 'data'):
max_stress = float(abs(stress_data.data).max())
results['max_von_mises'] = round(max_stress, 3)
elif hasattr(op2, 'chexa_stress') and op2.chexa_stress:
stress_data = list(op2.chexa_stress.values())[0]
if hasattr(stress_data, 'data'):
max_stress = float(abs(stress_data.data).max())
results['max_von_mises'] = round(max_stress, 3)
except Exception as e:
logger.warning(f"Could not extract all baseline results: {e}")
return results
def _generate_proposals(self, model_info: ModelIntrospection, op2_info: OP2Introspection,
baseline_results: Dict[str, float]) -> Dict[str, Any]:
"""Generate configuration proposals based on discovery."""
proposals = {
'design_variables': [],
'extractors': [],
'objectives': []
}
# Propose design variables from expressions
# Filter out likely constants (e.g., material properties, loads)
constant_keywords = ['modulus', 'poisson', 'density', 'load', 'force', 'pressure']
for name, info in model_info.expressions.items():
# Skip if likely a constant
if any(keyword in name.lower() for keyword in constant_keywords):
continue
# Propose as design variable
proposals['design_variables'].append({
'parameter': name,
'current_value': info['value'],
'units': info['units'],
'suggested_range': f"±20% of {info['value']} {info['units']}"
})
# Propose extractors based on OP2 contents
if 'displacement' in op2_info.result_types or 'DISPLACEMENT' in op2_info.result_types:
proposals['extractors'].append({
'action': 'extract_displacement',
'description': 'Extract displacement results from OP2 file',
'params': {'result_type': 'displacement'}
})
proposals['objectives'].append('max_displacement (minimize or maximize)')
if op2_info.element_types:
element_type = op2_info.element_types[0].lower()
proposals['extractors'].append({
'action': 'extract_solid_stress',
'description': f'Extract stress from {element_type.upper()} elements',
'params': {
'result_type': 'stress',
'element_type': element_type
}
})
proposals['objectives'].append('max_von_mises (minimize for safety)')
return proposals
def _save_results(self, results: BenchmarkResults):
"""Save benchmark results to JSON file."""
import numpy as np
results_dict = asdict(results)
# Convert numpy types to native Python types for JSON serialization
def convert_numpy(obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, dict):
return {k: convert_numpy(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [convert_numpy(item) for item in obj]
return obj
results_dict = convert_numpy(results_dict)
with open(self.results_file, 'w') as f:
json.dump(results_dict, f, indent=2)
logger.info(f"Benchmark results saved to: {self.results_file}")
def load_results(self) -> Optional[BenchmarkResults]:
"""Load previous benchmark results if they exist."""
if not self.results_file.exists():
return None
with open(self.results_file, 'r') as f:
data = json.load(f)
return BenchmarkResults(**data)
def generate_report(self, results: BenchmarkResults) -> str:
"""
Generate human-readable benchmark report.
Returns:
Markdown formatted report
"""
report = []
report.append("# Benchmarking Report")
report.append("")
report.append(f"**Study**: {self.study_dir.name}")
report.append(f"**Date**: {results.timestamp}")
report.append(f"**Validation**: {'✅ PASSED' if results.validation_passed else '❌ FAILED'}")
report.append("")
report.append("## Model Introspection")
report.append("")
report.append(f"**Expressions Found**: {results.expression_count}")
report.append("")
report.append("| Expression | Value | Units |")
report.append("|------------|-------|-------|")
for name, info in results.expressions.items():
report.append(f"| {name} | {info['value']} | {info['units']} |")
report.append("")
report.append("## OP2 Analysis")
report.append("")
report.append(f"- **Element Types**: {', '.join(results.element_types)}")
report.append(f"- **Result Types**: {', '.join(results.result_types)}")
report.append(f"- **Subcases**: {results.subcases}")
report.append(f"- **Nodes**: {results.node_count}")
report.append(f"- **Elements**: {results.element_count}")
report.append("")
report.append("## Baseline Performance")
report.append("")
if results.baseline_results:
for key, value in results.baseline_results.items():
report.append(f"- **{key}**: {value}")
else:
report.append("*No baseline results extracted*")
report.append("")
report.append("## Configuration Proposals")
report.append("")
report.append("### Proposed Design Variables")
report.append("")
for var in results.proposed_design_variables:
report.append(f"- **{var['parameter']}**: {var['suggested_range']}")
report.append("")
report.append("### Proposed Extractors")
report.append("")
for ext in results.proposed_extractors:
report.append(f"- **{ext['action']}**: {ext['description']}")
report.append("")
report.append("### Proposed Objectives")
report.append("")
for obj in results.proposed_objectives:
report.append(f"- {obj}")
report.append("")
if results.warnings:
report.append("## Warnings")
report.append("")
for warning in results.warnings:
report.append(f"⚠️ {warning}")
report.append("")
if results.errors:
report.append("## Errors")
report.append("")
for error in results.errors:
report.append(f"{error}")
report.append("")
return "\n".join(report)
def main():
"""Test benchmarking substudy."""
print("Benchmarking Substudy Test")
print("=" * 80)
print()
print("This module provides mandatory discovery and validation for all studies.")
print("Use it via the study setup workflow.")
print()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,264 @@
"""
Study Continuation - Standard utility for continuing existing optimization studies.
This module provides a standardized way to continue optimization studies with
additional trials, preserving all existing trial data and learned knowledge.
Usage:
from optimization_engine.study.continuation import continue_study
continue_study(
study_dir=Path("studies/my_study"),
additional_trials=50,
objective_function=my_objective,
design_variables={'param1': (0, 10), 'param2': (0, 100)}
)
This is an Atomizer standard feature that should be exposed in the dashboard
alongside "Start New Optimization".
"""
import optuna
import json
from pathlib import Path
from typing import Dict, Tuple, Callable, Optional
def continue_study(
study_dir: Path,
additional_trials: int,
objective_function: Callable,
design_variables: Optional[Dict[str, Tuple[float, float]]] = None,
target_value: Optional[float] = None,
tolerance: Optional[float] = None,
verbose: bool = True
) -> Dict:
"""
Continue an existing optimization study with additional trials.
Args:
study_dir: Path to study directory containing 1_setup and 2_results
additional_trials: Number of additional trials to run
objective_function: Objective function to optimize (same as original)
design_variables: Optional dict of design variable bounds (for reference)
target_value: Optional target value for early stopping
tolerance: Optional tolerance for target achievement
verbose: Print progress information
Returns:
Dict containing:
- 'study': The Optuna study object
- 'total_trials': Total number of trials after continuation
- 'successful_trials': Number of successful trials
- 'pruned_trials': Number of pruned trials
- 'best_value': Best objective value achieved
- 'best_params': Best parameters found
- 'target_achieved': Whether target was achieved (if specified)
Raises:
FileNotFoundError: If study database doesn't exist
ValueError: If study name cannot be determined
"""
# Setup paths
setup_dir = study_dir / "1_setup"
results_dir = study_dir / "2_results"
history_file = results_dir / "optimization_history_incremental.json"
# Load workflow config to get study name
workflow_file = setup_dir / "workflow_config.json"
if not workflow_file.exists():
raise FileNotFoundError(
f"Workflow config not found: {workflow_file}. "
f"Make sure this is a valid study directory."
)
with open(workflow_file) as f:
workflow = json.load(f)
study_name = workflow.get('study_name')
if not study_name:
raise ValueError("Study name not found in workflow_config.json")
# Load existing study
storage = f"sqlite:///{results_dir / 'study.db'}"
try:
study = optuna.load_study(study_name=study_name, storage=storage)
except KeyError:
raise FileNotFoundError(
f"Study '{study_name}' not found in database. "
f"Run the initial optimization first using run_optimization.py"
)
# Get current state
current_trials = len(study.trials)
current_best = study.best_value if study.best_trial else None
if verbose:
print("\n" + "="*70)
print(" CONTINUING OPTIMIZATION STUDY")
print("="*70)
print(f"\n Study: {study_name}")
print(f" Current trials: {current_trials}")
if current_best is not None:
print(f" Current best: {current_best:.6f}")
print(f" Best params:")
for param, value in study.best_params.items():
print(f" {param}: {value:.4f}")
print(f"\n Adding {additional_trials} trials...\n")
# Continue optimization
study.optimize(
objective_function,
n_trials=additional_trials,
timeout=None,
catch=(Exception,) # Catch exceptions to allow graceful continuation
)
# Analyze results
total_trials = len(study.trials)
successful_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])
pruned_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED])
results = {
'study': study,
'total_trials': total_trials,
'successful_trials': successful_trials,
'pruned_trials': pruned_trials,
'best_value': study.best_value,
'best_params': study.best_params,
}
# Check target achievement if specified
if target_value is not None and tolerance is not None:
target_achieved = abs(study.best_value - target_value) <= tolerance
results['target_achieved'] = target_achieved
if verbose:
print("\n" + "="*70)
print(" CONTINUATION COMPLETE")
print("="*70)
print(f" Total trials: {total_trials} (added {additional_trials})")
print(f" Successful: {successful_trials}")
print(f" Pruned: {pruned_trials}")
print(f" Pruning rate: {pruned_trials/total_trials*100:.1f}%")
print(f"\n Best value: {study.best_value:.6f}")
print(f" Best params:")
for param, value in study.best_params.items():
print(f" {param}: {value:.4f}")
if target_value is not None and tolerance is not None:
target_achieved = results.get('target_achieved', False)
print(f"\n Target: {target_value} ± {tolerance}")
print(f" Target achieved: {'YES' if target_achieved else 'NO'}")
print("="*70 + "\n")
return results
def can_continue_study(study_dir: Path) -> Tuple[bool, str]:
"""
Check if a study can be continued.
Args:
study_dir: Path to study directory
Returns:
(can_continue, message): Tuple of bool and explanation message
"""
setup_dir = study_dir / "1_setup"
results_dir = study_dir / "2_results"
# Check if workflow config exists
workflow_file = setup_dir / "workflow_config.json"
if not workflow_file.exists():
return False, f"No workflow_config.json found in {setup_dir}"
# Load study name
try:
with open(workflow_file) as f:
workflow = json.load(f)
study_name = workflow.get('study_name')
if not study_name:
return False, "No study_name in workflow_config.json"
except Exception as e:
return False, f"Error reading workflow config: {e}"
# Check if database exists
db_file = results_dir / "study.db"
if not db_file.exists():
return False, f"No study.db found. Run initial optimization first."
# Try to load study
try:
storage = f"sqlite:///{db_file}"
study = optuna.load_study(study_name=study_name, storage=storage)
trial_count = len(study.trials)
if trial_count == 0:
return False, "Study exists but has no trials yet"
return True, f"Study '{study_name}' ready (current trials: {trial_count})"
except KeyError:
return False, f"Study '{study_name}' not found in database"
except Exception as e:
return False, f"Error loading study: {e}"
def get_study_status(study_dir: Path) -> Optional[Dict]:
"""
Get current status of a study.
Args:
study_dir: Path to study directory
Returns:
Dict with study status info, or None if study doesn't exist
{
'study_name': str,
'total_trials': int,
'successful_trials': int,
'pruned_trials': int,
'best_value': float,
'best_params': dict
}
"""
can_continue, message = can_continue_study(study_dir)
if not can_continue:
return None
setup_dir = study_dir / "1_setup"
results_dir = study_dir / "2_results"
# Load study
with open(setup_dir / "workflow_config.json") as f:
workflow = json.load(f)
study_name = workflow['study_name']
storage = f"sqlite:///{results_dir / 'study.db'}"
try:
study = optuna.load_study(study_name=study_name, storage=storage)
total_trials = len(study.trials)
successful_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])
pruned_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED])
return {
'study_name': study_name,
'total_trials': total_trials,
'successful_trials': successful_trials,
'pruned_trials': pruned_trials,
'pruning_rate': pruned_trials / total_trials if total_trials > 0 else 0,
'best_value': study.best_value if study.best_trial else None,
'best_params': study.best_params if study.best_trial else None
}
except Exception:
return None

View File

@@ -0,0 +1,412 @@
"""
Study Creator - Atomizer Optimization Study Management
Creates and manages optimization studies with mandatory benchmarking workflow.
Workflow:
1. Create study structure
2. User provides NX models
3. Run benchmarking (mandatory)
4. Create substudies (substudy_1, substudy_2, etc.)
5. Each substudy validates against benchmarking before running
Author: Antoine Letarte
Date: 2025-11-17
Version: 1.0.0
"""
import json
import shutil
from pathlib import Path
from typing import Dict, Any, Optional, List
from datetime import datetime
import logging
from optimization_engine.study.benchmarking import BenchmarkingSubstudy, BenchmarkResults
logger = logging.getLogger(__name__)
class StudyCreator:
"""
Creates and manages Atomizer optimization studies.
Enforces mandatory benchmarking workflow and provides
study structure management.
"""
def __init__(self, studies_root: Path = None):
"""
Initialize study creator.
Args:
studies_root: Root directory for all studies (default: ./studies)
"""
if studies_root is None:
studies_root = Path.cwd() / "studies"
self.studies_root = Path(studies_root)
self.studies_root.mkdir(parents=True, exist_ok=True)
logger.info(f"StudyCreator initialized: {self.studies_root}")
def create_study(self, study_name: str, description: str = "") -> Path:
"""
Create a new optimization study with standard structure.
Args:
study_name: Name of the study (will be folder name)
description: Brief description of the study
Returns:
Path to created study directory
"""
study_dir = self.studies_root / study_name
if study_dir.exists():
logger.warning(f"Study already exists: {study_name}")
return study_dir
logger.info(f"Creating new study: {study_name}")
# Create directory structure
(study_dir / "model").mkdir(parents=True)
(study_dir / "substudies" / "benchmarking").mkdir(parents=True)
(study_dir / "config").mkdir(parents=True)
(study_dir / "plugins" / "post_calculation").mkdir(parents=True)
(study_dir / "results").mkdir(parents=True)
# Create study metadata
metadata = {
"study_name": study_name,
"description": description,
"created": datetime.now().isoformat(),
"status": "created",
"benchmarking_completed": False,
"substudies": []
}
metadata_file = study_dir / "study_metadata.json"
with open(metadata_file, 'w') as f:
json.dump(metadata, f, indent=2)
# Create README
readme_content = self._generate_study_readme(study_name, description)
readme_file = study_dir / "README.md"
with open(readme_file, 'w', encoding='utf-8') as f:
f.write(readme_content)
logger.info(f"Study created: {study_dir}")
logger.info("")
logger.info("Next steps:")
logger.info(f" 1. Add NX model files to: {study_dir / 'model'}/")
logger.info(f" 2. Run benchmarking: study.run_benchmarking()")
logger.info("")
return study_dir
def run_benchmarking(self, study_dir: Path, prt_file: Path, sim_file: Path) -> BenchmarkResults:
"""
Run mandatory benchmarking for a study.
This MUST be run before any optimization substudies.
Args:
study_dir: Study directory
prt_file: Path to NX part file
sim_file: Path to NX simulation file
Returns:
BenchmarkResults
"""
logger.info("=" * 80)
logger.info(f"RUNNING BENCHMARKING FOR STUDY: {study_dir.name}")
logger.info("=" * 80)
logger.info("")
# Create benchmarking substudy
benchmark = BenchmarkingSubstudy(study_dir, prt_file, sim_file)
# Run discovery
results = benchmark.run_discovery()
# Generate report
report_content = benchmark.generate_report(results)
report_file = study_dir / "substudies" / "benchmarking" / "BENCHMARK_REPORT.md"
with open(report_file, 'w', encoding='utf-8') as f:
f.write(report_content)
logger.info(f"Benchmark report saved to: {report_file}")
logger.info("")
# Update metadata
self._update_metadata(study_dir, {
"benchmarking_completed": results.validation_passed,
"last_benchmarking": datetime.now().isoformat(),
"status": "benchmarked" if results.validation_passed else "benchmark_failed"
})
if not results.validation_passed:
logger.error("Benchmarking validation FAILED!")
logger.error("Fix issues before creating substudies")
else:
logger.info("Benchmarking validation PASSED!")
logger.info("Ready to create substudies")
logger.info("")
return results
def create_substudy(self, study_dir: Path, substudy_name: Optional[str] = None,
config: Optional[Dict[str, Any]] = None) -> Path:
"""
Create a new substudy.
Automatically validates against benchmarking before proceeding.
Args:
study_dir: Study directory
substudy_name: Name of substudy (if None, auto-generates substudy_N)
config: Optional configuration dict
Returns:
Path to substudy directory
"""
# Check benchmarking completed
metadata = self._load_metadata(study_dir)
if not metadata.get('benchmarking_completed', False):
raise ValueError(
"Benchmarking must be completed before creating substudies!\n"
f"Run: study.run_benchmarking(prt_file, sim_file)"
)
# Auto-generate substudy name if not provided
if substudy_name is None:
existing_substudies = metadata.get('substudies', [])
# Filter out benchmarking
non_benchmark = [s for s in existing_substudies if s != 'benchmarking']
substudy_number = len(non_benchmark) + 1
substudy_name = f"substudy_{substudy_number}"
substudy_dir = study_dir / "substudies" / substudy_name
if substudy_dir.exists():
logger.warning(f"Substudy already exists: {substudy_name}")
return substudy_dir
logger.info(f"Creating substudy: {substudy_name}")
# Create substudy directory
substudy_dir.mkdir(parents=True, exist_ok=True)
# Create substudy config
if config is None:
# Use template
config = self._create_default_substudy_config(study_dir, substudy_name)
config_file = substudy_dir / "config.json"
with open(config_file, 'w') as f:
json.dump(config, f, indent=2)
# Update metadata
substudies = metadata.get('substudies', [])
if substudy_name not in substudies:
substudies.append(substudy_name)
self._update_metadata(study_dir, {'substudies': substudies})
logger.info(f"Substudy created: {substudy_dir}")
logger.info(f"Config: {config_file}")
logger.info("")
return substudy_dir
def _create_default_substudy_config(self, study_dir: Path, substudy_name: str) -> Dict[str, Any]:
"""Create default substudy configuration based on benchmarking."""
# Load benchmark results
benchmark_file = study_dir / "substudies" / "benchmarking" / "benchmark_results.json"
if not benchmark_file.exists():
raise FileNotFoundError(f"Benchmark results not found: {benchmark_file}")
with open(benchmark_file, 'r') as f:
benchmark_data = json.load(f)
# Create config from benchmark proposals
config = {
"substudy_name": substudy_name,
"description": f"Substudy {substudy_name}",
"created": datetime.now().isoformat(),
"optimization": {
"algorithm": "TPE",
"direction": "minimize",
"n_trials": 20,
"n_startup_trials": 10,
"design_variables": []
},
"continuation": {
"enabled": False
},
"solver": {
"nastran_version": "2412",
"use_journal": True,
"timeout": 300
}
}
# Add proposed design variables
for var in benchmark_data.get('proposed_design_variables', []):
config["optimization"]["design_variables"].append({
"parameter": var['parameter'],
"min": 0.0, # User must fill
"max": 0.0, # User must fill
"units": var.get('units', ''),
"comment": f"From benchmarking: {var.get('suggested_range', 'define range')}"
})
return config
def _load_metadata(self, study_dir: Path) -> Dict[str, Any]:
"""Load study metadata."""
metadata_file = study_dir / "study_metadata.json"
if not metadata_file.exists():
return {}
with open(metadata_file, 'r') as f:
return json.load(f)
def _update_metadata(self, study_dir: Path, updates: Dict[str, Any]):
"""Update study metadata."""
metadata = self._load_metadata(study_dir)
metadata.update(updates)
metadata_file = study_dir / "study_metadata.json"
with open(metadata_file, 'w') as f:
json.dump(metadata, f, indent=2)
def _generate_study_readme(self, study_name: str, description: str) -> str:
"""Generate README for new study."""
readme = []
readme.append(f"# {study_name}")
readme.append("")
readme.append(f"**Description**: {description}")
readme.append(f"**Created**: {datetime.now().strftime('%Y-%m-%d')}")
readme.append("")
readme.append("## Study Structure")
readme.append("")
readme.append("```")
readme.append(f"{study_name}/")
readme.append("├── model/ # NX model files (.prt, .sim)")
readme.append("├── substudies/")
readme.append("│ ├── benchmarking/ # Mandatory discovery & validation")
readme.append("│ ├── substudy_1/ # First optimization campaign")
readme.append("│ └── substudy_2/ # Additional campaigns")
readme.append("├── config/ # Configuration templates")
readme.append("├── plugins/ # Study-specific hooks")
readme.append("├── results/ # Optimization results")
readme.append("└── README.md # This file")
readme.append("```")
readme.append("")
readme.append("## Workflow")
readme.append("")
readme.append("### 1. Add NX Models")
readme.append("Place your `.prt` and `.sim` files in the `model/` directory.")
readme.append("")
readme.append("### 2. Run Benchmarking (Mandatory)")
readme.append("```python")
readme.append("from optimization_engine.study.creator import StudyCreator")
readme.append("")
readme.append("creator = StudyCreator()")
readme.append(f"results = creator.run_benchmarking(")
readme.append(f" study_dir=Path('studies/{study_name}'),")
readme.append(" prt_file=Path('studies/{study_name}/model/YourPart.prt'),")
readme.append(" sim_file=Path('studies/{study_name}/model/YourSim.sim')")
readme.append(")")
readme.append("```")
readme.append("")
readme.append("### 3. Review Benchmark Report")
readme.append("Check `substudies/benchmarking/BENCHMARK_REPORT.md` for:")
readme.append("- Discovered expressions")
readme.append("- OP2 contents")
readme.append("- Baseline performance")
readme.append("- Configuration proposals")
readme.append("")
readme.append("### 4. Create Substudies")
readme.append("```python")
readme.append("# Auto-numbered: substudy_1, substudy_2, etc.")
readme.append(f"substudy_dir = creator.create_substudy(Path('studies/{study_name}'))")
readme.append("")
readme.append("# Or custom name:")
readme.append(f"substudy_dir = creator.create_substudy(")
readme.append(f" Path('studies/{study_name}'), ")
readme.append(" substudy_name='coarse_exploration'")
readme.append(")")
readme.append("```")
readme.append("")
readme.append("### 5. Configure & Run Optimization")
readme.append("Edit `substudies/substudy_N/config.json` with:")
readme.append("- Design variable ranges")
readme.append("- Objectives and constraints")
readme.append("- Number of trials")
readme.append("")
readme.append("Then run the optimization!")
readme.append("")
readme.append("## Status")
readme.append("")
readme.append("See `study_metadata.json` for current study status.")
readme.append("")
return "\n".join(readme)
def list_studies(self) -> List[Dict[str, Any]]:
"""List all studies in the studies root."""
studies = []
for study_dir in self.studies_root.iterdir():
if not study_dir.is_dir():
continue
metadata_file = study_dir / "study_metadata.json"
if metadata_file.exists():
with open(metadata_file, 'r') as f:
metadata = json.load(f)
studies.append({
'name': study_dir.name,
'path': study_dir,
'status': metadata.get('status', 'unknown'),
'created': metadata.get('created', 'unknown'),
'benchmarking_completed': metadata.get('benchmarking_completed', False),
'substudies_count': len(metadata.get('substudies', [])) - 1 # Exclude benchmarking
})
return studies
def main():
"""Example usage of StudyCreator."""
print("=" * 80)
print("Atomizer Study Creator")
print("=" * 80)
print()
creator = StudyCreator()
# List existing studies
studies = creator.list_studies()
print(f"Existing studies: {len(studies)}")
for study in studies:
status_icon = "" if study['benchmarking_completed'] else "⚠️"
print(f" {status_icon} {study['name']} ({study['status']}) - {study['substudies_count']} substudies")
print()
print("To create a new study:")
print(" creator.create_study('my_study_name', 'Brief description')")
print()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,69 @@
"""
Generate history.json from trial directories.
For older substudies that don't have history.json,
reconstruct it from individual trial results.json files.
"""
from pathlib import Path
import json
import sys
def generate_history(substudy_dir: Path) -> list:
"""Generate history from trial directories."""
substudy_dir = Path(substudy_dir)
trial_dirs = sorted(substudy_dir.glob('trial_*'))
history = []
for trial_dir in trial_dirs:
results_file = trial_dir / 'results.json'
if not results_file.exists():
print(f"Warning: No results.json in {trial_dir.name}")
continue
with open(results_file, 'r') as f:
trial_data = json.load(f)
# Extract trial number from directory name
trial_num = int(trial_dir.name.split('_')[-1])
# Create history entry
history_entry = {
'trial_number': trial_num,
'timestamp': trial_data.get('timestamp', ''),
'design_variables': trial_data.get('design_variables', {}),
'objectives': trial_data.get('objectives', {}),
'constraints': trial_data.get('constraints', {}),
'total_objective': trial_data.get('total_objective', 0.0)
}
history.append(history_entry)
# Sort by trial number
history.sort(key=lambda x: x['trial_number'])
return history
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Usage: python generate_history_from_trials.py <substudy_directory>")
sys.exit(1)
substudy_path = Path(sys.argv[1])
print(f"Generating history.json from trials in: {substudy_path}")
history = generate_history(substudy_path)
print(f"Generated {len(history)} history entries")
# Save history.json
history_file = substudy_path / 'history.json'
with open(history_file, 'w') as f:
json.dump(history, f, indent=2)
print(f"Saved: {history_file}")

View File

@@ -0,0 +1,447 @@
"""
Study Reset and Cleanup Utility for Atomizer
Provides safe operations to reset or clean up optimization studies:
- Reset database (remove all trials, keep configuration)
- Clean up temporary files
- Archive results
- Full study deletion
Usage:
python -m optimization_engine.study_reset my_study --reset-db
python -m optimization_engine.study_reset my_study --cleanup-temp
python -m optimization_engine.study_reset my_study --full-reset
Safety features:
- Confirmation prompts for destructive operations
- Automatic backups before deletion
- Dry-run mode to preview changes
"""
import json
import shutil
import optuna
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, List, Optional
import logging
logger = logging.getLogger(__name__)
class StudyReset:
"""Handles study reset and cleanup operations."""
def __init__(self, study_name: str, studies_dir: str = "studies"):
"""
Initialize study reset utility.
Args:
study_name: Name of the study to reset
studies_dir: Base directory for studies
"""
self.study_name = study_name
self.studies_dir = Path(studies_dir)
self.study_path = self.studies_dir / study_name
self.setup_dir = self.study_path / "1_setup"
self.model_dir = self.setup_dir / "model"
self.results_dir = self.study_path / "2_results"
def validate_study_exists(self) -> bool:
"""Check if study exists."""
return self.study_path.exists()
def get_study_stats(self) -> Dict[str, Any]:
"""
Get current study statistics.
Returns:
Dictionary with study statistics
"""
stats = {
"study_name": self.study_name,
"exists": self.study_path.exists(),
"has_results": self.results_dir.exists(),
"trials": 0,
"completed": 0,
"failed": 0,
"db_size_mb": 0,
"temp_files": 0,
"temp_size_mb": 0
}
if not self.study_path.exists():
return stats
# Check database
db_path = self.results_dir / "study.db"
if db_path.exists():
stats["db_size_mb"] = db_path.stat().st_size / (1024 * 1024)
try:
storage = f"sqlite:///{db_path}"
study = optuna.load_study(study_name=self.study_name, storage=storage)
stats["trials"] = len(study.trials)
stats["completed"] = len([t for t in study.trials
if t.state == optuna.trial.TrialState.COMPLETE])
stats["failed"] = len([t for t in study.trials
if t.state == optuna.trial.TrialState.FAIL])
except Exception as e:
logger.warning(f"Could not load study: {e}")
# Count temp files
temp_patterns = ["_temp*", "*.log", "*.bak", "worker_*"]
temp_files = []
for pattern in temp_patterns:
temp_files.extend(self.model_dir.glob(pattern))
temp_files.extend(self.results_dir.glob(pattern))
stats["temp_files"] = len(temp_files)
stats["temp_size_mb"] = sum(f.stat().st_size for f in temp_files if f.is_file()) / (1024 * 1024)
return stats
def reset_database(self, backup: bool = True, dry_run: bool = False) -> Dict[str, Any]:
"""
Reset the Optuna database (delete all trials).
Args:
backup: Create backup before reset
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
result = {"operation": "reset_database", "dry_run": dry_run}
db_path = self.results_dir / "study.db"
if not db_path.exists():
result["status"] = "skipped"
result["message"] = "No database found"
return result
if dry_run:
result["status"] = "preview"
result["message"] = f"Would delete {db_path}"
return result
# Create backup
if backup:
backup_name = f"study_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.db"
backup_path = self.results_dir / backup_name
shutil.copy2(db_path, backup_path)
result["backup"] = str(backup_path)
logger.info(f"Created backup: {backup_path}")
# Delete database
db_path.unlink()
result["status"] = "success"
result["message"] = "Database reset complete"
# Also clean history files
for history_file in ["history.json", "history.csv", "optimization_summary.json"]:
hist_path = self.results_dir / history_file
if hist_path.exists():
hist_path.unlink()
logger.info(f"Deleted: {hist_path}")
return result
def cleanup_temp_files(self, dry_run: bool = False) -> Dict[str, Any]:
"""
Remove temporary files from study.
Args:
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
result = {
"operation": "cleanup_temp",
"dry_run": dry_run,
"deleted_files": [],
"deleted_size_mb": 0
}
temp_patterns = [
"_temp*", # Temporary NX files
"*.log", # Log files
"*.bak", # Backup files
"worker_*", # Worker directories
"*.pyc", # Python cache
"__pycache__" # Python cache dirs
]
files_to_delete: List[Path] = []
for pattern in temp_patterns:
files_to_delete.extend(self.model_dir.glob(pattern))
files_to_delete.extend(self.results_dir.glob(pattern))
files_to_delete.extend(self.study_path.glob(pattern))
total_size = 0
for path in files_to_delete:
if path.is_file():
total_size += path.stat().st_size
result["files_found"] = len(files_to_delete)
result["size_mb"] = total_size / (1024 * 1024)
if dry_run:
result["status"] = "preview"
result["files_to_delete"] = [str(f) for f in files_to_delete[:20]] # Limit preview
return result
# Actually delete
for path in files_to_delete:
try:
if path.is_file():
path.unlink()
elif path.is_dir():
shutil.rmtree(path)
result["deleted_files"].append(str(path))
except Exception as e:
logger.warning(f"Could not delete {path}: {e}")
result["deleted_size_mb"] = total_size / (1024 * 1024)
result["status"] = "success"
return result
def archive_results(self, archive_dir: Optional[Path] = None, dry_run: bool = False) -> Dict[str, Any]:
"""
Archive study results before reset.
Args:
archive_dir: Directory for archives (default: studies/archives)
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
result = {"operation": "archive", "dry_run": dry_run}
if archive_dir is None:
archive_dir = self.studies_dir / "archives"
if not self.results_dir.exists():
result["status"] = "skipped"
result["message"] = "No results to archive"
return result
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
archive_name = f"{self.study_name}_{timestamp}"
archive_path = archive_dir / archive_name
if dry_run:
result["status"] = "preview"
result["archive_path"] = str(archive_path)
return result
archive_dir.mkdir(parents=True, exist_ok=True)
shutil.copytree(self.results_dir, archive_path)
result["status"] = "success"
result["archive_path"] = str(archive_path)
logger.info(f"Archived results to: {archive_path}")
return result
def full_reset(self, backup: bool = True, dry_run: bool = False) -> Dict[str, Any]:
"""
Perform full study reset (database + temp files).
Args:
backup: Create backup before reset
dry_run: Preview changes without executing
Returns:
Combined operation result
"""
results = {"operation": "full_reset", "dry_run": dry_run}
if backup and not dry_run:
archive_result = self.archive_results(dry_run=dry_run)
results["archive"] = archive_result
db_result = self.reset_database(backup=backup, dry_run=dry_run)
results["database"] = db_result
temp_result = self.cleanup_temp_files(dry_run=dry_run)
results["temp_cleanup"] = temp_result
# Remove lock files
lock_file = self.results_dir / ".optimization_lock"
if lock_file.exists() and not dry_run:
lock_file.unlink()
results["lock_removed"] = True
results["status"] = "success" if not dry_run else "preview"
return results
def delete_study(self, confirm: bool = False, dry_run: bool = False) -> Dict[str, Any]:
"""
Completely delete study (DESTRUCTIVE).
Args:
confirm: Must be True to actually delete
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
result = {"operation": "delete_study", "dry_run": dry_run}
if not confirm and not dry_run:
result["status"] = "error"
result["message"] = "Must set confirm=True to delete study"
return result
if not self.study_path.exists():
result["status"] = "skipped"
result["message"] = "Study does not exist"
return result
if dry_run:
result["status"] = "preview"
result["message"] = f"Would delete: {self.study_path}"
return result
# Create archive first
archive_result = self.archive_results()
result["archive"] = archive_result
# Delete study folder
shutil.rmtree(self.study_path)
result["status"] = "success"
result["message"] = f"Deleted study: {self.study_name}"
return result
def reset_study(
study_name: str,
reset_db: bool = True,
cleanup_temp: bool = True,
backup: bool = True,
dry_run: bool = False
) -> Dict[str, Any]:
"""
Convenience function to reset a study.
Args:
study_name: Name of the study
reset_db: Reset the Optuna database
cleanup_temp: Clean up temporary files
backup: Create backup before reset
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
resetter = StudyReset(study_name)
if not resetter.validate_study_exists():
return {"status": "error", "message": f"Study '{study_name}' not found"}
results = {}
if reset_db:
results["database"] = resetter.reset_database(backup=backup, dry_run=dry_run)
if cleanup_temp:
results["temp_cleanup"] = resetter.cleanup_temp_files(dry_run=dry_run)
return results
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="Reset or cleanup Atomizer optimization studies",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Show study status
python -m optimization_engine.study_reset my_study --status
# Preview reset (dry run)
python -m optimization_engine.study_reset my_study --full-reset --dry-run
# Reset database only
python -m optimization_engine.study_reset my_study --reset-db
# Clean temp files only
python -m optimization_engine.study_reset my_study --cleanup-temp
# Full reset with backup
python -m optimization_engine.study_reset my_study --full-reset
"""
)
parser.add_argument("study_name", help="Name of the study")
parser.add_argument("--status", action="store_true", help="Show study status only")
parser.add_argument("--reset-db", action="store_true", help="Reset Optuna database")
parser.add_argument("--cleanup-temp", action="store_true", help="Clean temporary files")
parser.add_argument("--full-reset", action="store_true", help="Full reset (db + temp)")
parser.add_argument("--archive", action="store_true", help="Archive results before reset")
parser.add_argument("--delete", action="store_true", help="Delete study completely")
parser.add_argument("--no-backup", action="store_true", help="Skip backup")
parser.add_argument("--dry-run", action="store_true", help="Preview without executing")
parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompts")
args = parser.parse_args()
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s'
)
resetter = StudyReset(args.study_name)
if not resetter.validate_study_exists():
print(f"Error: Study '{args.study_name}' not found")
sys.exit(1)
if args.status:
stats = resetter.get_study_stats()
print(f"\nStudy: {args.study_name}")
print("=" * 50)
print(f" Trials: {stats['trials']} ({stats['completed']} completed, {stats['failed']} failed)")
print(f" Database size: {stats['db_size_mb']:.2f} MB")
print(f" Temp files: {stats['temp_files']} ({stats['temp_size_mb']:.2f} MB)")
sys.exit(0)
# Confirmation
if not args.dry_run and not args.yes:
action = "full reset" if args.full_reset else \
"delete" if args.delete else \
"reset" if args.reset_db else "cleanup"
response = input(f"\nReally {action} study '{args.study_name}'? [y/N] ")
if response.lower() not in ['y', 'yes']:
print("Aborted")
sys.exit(0)
backup = not args.no_backup
if args.full_reset:
result = resetter.full_reset(backup=backup, dry_run=args.dry_run)
elif args.delete:
result = resetter.delete_study(confirm=True, dry_run=args.dry_run)
elif args.reset_db:
result = resetter.reset_database(backup=backup, dry_run=args.dry_run)
elif args.cleanup_temp:
result = resetter.cleanup_temp_files(dry_run=args.dry_run)
elif args.archive:
result = resetter.archive_results(dry_run=args.dry_run)
else:
parser.print_help()
sys.exit(0)
print("\nResult:")
print(json.dumps(result, indent=2))
if args.dry_run:
print("\n[DRY RUN - no changes made]")

View File

@@ -0,0 +1,322 @@
"""
Study State Detector for Atomizer
This module provides utilities to detect and summarize the state of an optimization study.
Used by Claude sessions to quickly understand study context on initialization.
"""
import json
import sqlite3
from pathlib import Path
from typing import Dict, Any, Optional, List
from datetime import datetime
def detect_study_state(study_dir: Path) -> Dict[str, Any]:
"""
Detect the current state of an optimization study.
Args:
study_dir: Path to the study directory
Returns:
Dictionary with study state information
"""
study_dir = Path(study_dir)
state = {
"is_study": False,
"study_name": study_dir.name,
"status": "unknown",
"config": None,
"fea_trials": 0,
"nn_trials": 0,
"pareto_solutions": 0,
"best_trial": None,
"last_activity": None,
"has_turbo_report": False,
"has_surrogate": False,
"warnings": [],
"next_actions": []
}
# Check if this is a valid study directory
config_path = study_dir / "optimization_config.json"
if not config_path.exists():
# Try 1_setup subdirectory
config_path = study_dir / "1_setup" / "optimization_config.json"
if not config_path.exists():
state["warnings"].append("No optimization_config.json found")
return state
state["is_study"] = True
# Load config
try:
with open(config_path, 'r') as f:
config = json.load(f)
state["config"] = _summarize_config(config)
except Exception as e:
state["warnings"].append(f"Failed to parse config: {e}")
# Check results directory
results_dir = study_dir / "2_results"
if not results_dir.exists():
state["status"] = "not_started"
state["next_actions"].append("Run: python run_optimization.py --discover")
return state
# Check study.db for FEA trials
db_path = results_dir / "study.db"
if db_path.exists():
fea_stats = _query_study_db(db_path)
state.update(fea_stats)
# Check nn_study.db for NN trials
nn_db_path = results_dir / "nn_study.db"
if nn_db_path.exists():
nn_stats = _query_study_db(nn_db_path, prefix="nn_")
state["nn_trials"] = nn_stats.get("nn_fea_trials", 0)
# Check for turbo report
turbo_report_path = results_dir / "turbo_report.json"
if turbo_report_path.exists():
state["has_turbo_report"] = True
try:
with open(turbo_report_path, 'r') as f:
turbo = json.load(f)
state["turbo_summary"] = {
"mode": turbo.get("mode"),
"nn_trials": turbo.get("total_nn_trials", 0),
"fea_validations": turbo.get("fea_validations", 0),
"time_minutes": round(turbo.get("time_minutes", 0), 1)
}
except Exception:
pass
# Check for trained surrogate
surrogate_path = results_dir / "surrogate.pt"
state["has_surrogate"] = surrogate_path.exists()
# Determine overall status
state["status"] = _determine_status(state)
# Suggest next actions
state["next_actions"] = _suggest_next_actions(state)
return state
def _summarize_config(config: Dict) -> Dict[str, Any]:
"""Extract key information from config."""
# Handle different config formats
variables = config.get("design_variables", config.get("variables", []))
objectives = config.get("objectives", [])
constraints = config.get("constraints", [])
# Get variable names (handle different key names)
var_names = []
for v in variables:
name = v.get("parameter") or v.get("name") or v.get("expression_name", "unknown")
var_names.append(name)
# Get objective names
obj_names = []
for o in objectives:
name = o.get("name") or o.get("metric", "unknown")
direction = o.get("goal") or o.get("direction", "minimize")
obj_names.append(f"{name} ({direction})")
return {
"n_variables": len(variables),
"n_objectives": len(objectives),
"n_constraints": len(constraints),
"variable_names": var_names[:5], # First 5 only
"objective_names": obj_names,
"study_type": "multi_objective" if len(objectives) > 1 else "single_objective"
}
def _query_study_db(db_path: Path, prefix: str = "") -> Dict[str, Any]:
"""Query Optuna study database for statistics."""
stats = {
f"{prefix}fea_trials": 0,
f"{prefix}completed_trials": 0,
f"{prefix}failed_trials": 0,
f"{prefix}pareto_solutions": 0,
"best_trial": None,
"last_activity": None
}
try:
conn = sqlite3.connect(str(db_path))
cursor = conn.cursor()
# Count trials by state
cursor.execute("""
SELECT state, COUNT(*) FROM trials
GROUP BY state
""")
for state, count in cursor.fetchall():
if state == "COMPLETE":
stats[f"{prefix}completed_trials"] = count
stats[f"{prefix}fea_trials"] = count
elif state == "FAIL":
stats[f"{prefix}failed_trials"] = count
# Get last activity time
cursor.execute("""
SELECT MAX(datetime_complete) FROM trials
WHERE datetime_complete IS NOT NULL
""")
result = cursor.fetchone()
if result and result[0]:
stats["last_activity"] = result[0]
# Get best trial (for single objective)
cursor.execute("""
SELECT trial_id, value FROM trial_values
WHERE objective_id = 0
ORDER BY value ASC
LIMIT 1
""")
result = cursor.fetchone()
if result:
stats["best_trial"] = {"trial_id": result[0], "value": result[1]}
# Count Pareto solutions (trials with user_attr pareto=True or non-dominated)
# Simplified: count distinct trials in trial_values
cursor.execute("""
SELECT COUNT(DISTINCT trial_id) FROM trial_values
""")
result = cursor.fetchone()
if result:
# For multi-objective, this is a rough estimate
stats[f"{prefix}pareto_solutions"] = min(result[0], 50) # Cap at 50
conn.close()
except Exception as e:
stats["db_error"] = str(e)
return stats
def _determine_status(state: Dict) -> str:
"""Determine overall study status."""
if state["fea_trials"] == 0:
return "not_started"
elif state["fea_trials"] < 3:
return "discovery"
elif state["fea_trials"] < 10:
return "validation"
elif state["has_turbo_report"]:
return "turbo_complete"
elif state["has_surrogate"]:
return "training_complete"
elif state["fea_trials"] >= 50:
return "fea_complete"
else:
return "in_progress"
def _suggest_next_actions(state: Dict) -> List[str]:
"""Suggest next actions based on study state."""
actions = []
if state["status"] == "not_started":
actions.append("Run: python run_optimization.py --discover")
elif state["status"] == "discovery":
actions.append("Run: python run_optimization.py --validate")
elif state["status"] == "validation":
actions.append("Run: python run_optimization.py --test")
actions.append("Or run full: python run_optimization.py --run --trials 50")
elif state["status"] == "in_progress":
actions.append("Continue: python run_optimization.py --resume")
elif state["status"] == "fea_complete":
actions.append("Analyze: python -m optimization_engine.method_selector optimization_config.json 2_results/study.db")
actions.append("Or run turbo: python run_nn_optimization.py --turbo")
elif state["status"] == "turbo_complete":
actions.append("View results in dashboard: cd atomizer-dashboard && npm run dev")
actions.append("Generate report: python generate_report.py")
return actions
def format_study_summary(state: Dict) -> str:
"""Format study state as a human-readable summary."""
if not state["is_study"]:
return f"❌ Not a valid study directory: {state['study_name']}"
lines = [
f"📊 **Study: {state['study_name']}**",
f"Status: {state['status'].replace('_', ' ').title()}",
""
]
if state["config"]:
cfg = state["config"]
lines.append(f"**Configuration:**")
lines.append(f"- Variables: {cfg['n_variables']} ({', '.join(cfg['variable_names'][:3])}{'...' if cfg['n_variables'] > 3 else ''})")
lines.append(f"- Objectives: {cfg['n_objectives']} ({', '.join(cfg['objective_names'])})")
lines.append(f"- Constraints: {cfg['n_constraints']}")
lines.append(f"- Type: {cfg['study_type']}")
lines.append("")
lines.append("**Progress:**")
lines.append(f"- FEA trials: {state['fea_trials']}")
if state["nn_trials"] > 0:
lines.append(f"- NN trials: {state['nn_trials']}")
if state["has_turbo_report"] and "turbo_summary" in state:
ts = state["turbo_summary"]
lines.append(f"- Turbo mode: {ts['nn_trials']} NN + {ts['fea_validations']} FEA validations ({ts['time_minutes']} min)")
if state["last_activity"]:
lines.append(f"- Last activity: {state['last_activity']}")
lines.append("")
if state["next_actions"]:
lines.append("**Suggested Next Actions:**")
for action in state["next_actions"]:
lines.append(f"{action}")
if state["warnings"]:
lines.append("")
lines.append("**Warnings:**")
for warning in state["warnings"]:
lines.append(f" ⚠️ {warning}")
return "\n".join(lines)
def get_all_studies(atomizer_root: Path) -> List[Dict[str, Any]]:
"""Get state of all studies in the Atomizer studies directory."""
studies_dir = atomizer_root / "studies"
if not studies_dir.exists():
return []
studies = []
for study_path in studies_dir.iterdir():
if study_path.is_dir() and not study_path.name.startswith("."):
state = detect_study_state(study_path)
if state["is_study"]:
studies.append(state)
# Sort by last activity (most recent first)
studies.sort(
key=lambda s: s.get("last_activity") or "1970-01-01",
reverse=True
)
return studies
if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
study_path = Path(sys.argv[1])
else:
# Default to current directory
study_path = Path.cwd()
state = detect_study_state(study_path)
print(format_study_summary(state))

File diff suppressed because it is too large Load Diff