feat: Major update with validators, skills, dashboard, and docs reorganization

- Add validation framework (config, model, results, study validators)
- Add Claude Code skills (create-study, run-optimization, generate-report,
  troubleshoot, analyze-model)
- Add Atomizer Dashboard (React frontend + FastAPI backend)
- Reorganize docs into structured directories (00-09)
- Add neural surrogate modules and training infrastructure
- Add multi-objective optimization support

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-25 19:23:58 -05:00
parent 74a92803b7
commit e3bdb08a22
155 changed files with 52729 additions and 37 deletions

View File

@@ -0,0 +1,591 @@
"""
Active Learning Surrogate with Uncertainty Estimation
This module implements an ensemble-based neural network surrogate that:
1. Provides uncertainty estimates via ensemble disagreement
2. Supports active learning for strategic FEA validation
3. Tracks confidence and knows when predictions are reliable
Key Concept:
- Train multiple NNs (ensemble) on slightly different data (bootstrap)
- Uncertainty = disagreement between ensemble members
- High uncertainty regions need FEA validation
- Low uncertainty + good accuracy = ready for optimization
"""
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import json
import logging
logger = logging.getLogger(__name__)
class EnsembleMLP(nn.Module):
"""Single MLP member of the ensemble."""
def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = [64, 64, 32]):
super().__init__()
layers = []
prev_dim = input_dim
for hidden_dim in hidden_dims:
layers.extend([
nn.Linear(prev_dim, hidden_dim),
nn.ReLU(),
nn.Dropout(0.1)
])
prev_dim = hidden_dim
layers.append(nn.Linear(prev_dim, output_dim))
self.network = nn.Sequential(*layers)
def forward(self, x):
return self.network(x)
class ActiveLearningSurrogate:
"""
Ensemble-based surrogate with uncertainty estimation for active learning.
Strategy:
1. Use ensemble of 5-10 neural networks
2. Each trained on bootstrap sample of data
3. Uncertainty = std dev of predictions across ensemble
4. Select high-uncertainty designs for FEA validation
"""
def __init__(
self,
n_ensemble: int = 5,
hidden_dims: List[int] = [64, 64, 32],
device: str = 'cpu'
):
self.n_ensemble = n_ensemble
self.hidden_dims = hidden_dims
self.device = device
self.models: List[EnsembleMLP] = []
self.design_var_names: List[str] = []
self.objective_names: List[str] = ['mass', 'frequency', 'max_displacement', 'max_stress']
# Normalization parameters
self.input_mean = None
self.input_std = None
self.output_mean = None
self.output_std = None
# Training history for each ensemble member
self.training_history = []
# Confidence tracking
self.validation_errors = [] # Track FEA validation errors
self.confidence_score = 0.0
def _normalize_input(self, x: np.ndarray) -> torch.Tensor:
"""Normalize input features."""
x_norm = (x - self.input_mean) / (self.input_std + 1e-8)
return torch.FloatTensor(x_norm).to(self.device)
def _denormalize_output(self, y: torch.Tensor) -> np.ndarray:
"""Denormalize output predictions."""
y_np = y.cpu().numpy()
return y_np * (self.output_std + 1e-8) + self.output_mean
def train(
self,
design_params: np.ndarray,
objectives: np.ndarray,
design_var_names: List[str],
epochs: int = 200,
lr: float = 0.001,
batch_size: int = 32,
val_split: float = 0.2
):
"""
Train ensemble on the data with bootstrap sampling.
Args:
design_params: (N, D) array of design parameters
objectives: (N, O) array of objective values
design_var_names: Names of design variables
epochs: Training epochs per ensemble member
lr: Learning rate
batch_size: Batch size
val_split: Validation split ratio
"""
self.design_var_names = design_var_names
n_samples = len(design_params)
input_dim = design_params.shape[1]
output_dim = objectives.shape[1]
# Compute normalization parameters from full dataset
self.input_mean = design_params.mean(axis=0)
self.input_std = design_params.std(axis=0)
self.output_mean = objectives.mean(axis=0)
self.output_std = objectives.std(axis=0)
# Train each ensemble member on bootstrap sample
self.models = []
self.training_history = []
for i in range(self.n_ensemble):
logger.info(f"Training ensemble member {i+1}/{self.n_ensemble}")
# Bootstrap sampling (sample with replacement)
bootstrap_idx = np.random.choice(n_samples, size=n_samples, replace=True)
X_boot = design_params[bootstrap_idx]
y_boot = objectives[bootstrap_idx]
# Split into train/val
n_val = int(len(X_boot) * val_split)
indices = np.random.permutation(len(X_boot))
train_idx, val_idx = indices[n_val:], indices[:n_val]
X_train = self._normalize_input(X_boot[train_idx])
y_train = torch.FloatTensor((y_boot[train_idx] - self.output_mean) / (self.output_std + 1e-8)).to(self.device)
X_val = self._normalize_input(X_boot[val_idx])
y_val = torch.FloatTensor((y_boot[val_idx] - self.output_mean) / (self.output_std + 1e-8)).to(self.device)
# Create and train model
model = EnsembleMLP(input_dim, output_dim, self.hidden_dims).to(self.device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss()
best_val_loss = float('inf')
patience_counter = 0
best_state = None
for epoch in range(epochs):
model.train()
# Mini-batch training
perm = torch.randperm(len(X_train))
epoch_loss = 0.0
n_batches = 0
for j in range(0, len(X_train), batch_size):
batch_idx = perm[j:j+batch_size]
X_batch = X_train[batch_idx]
y_batch = y_train[batch_idx]
optimizer.zero_grad()
pred = model(X_batch)
loss = criterion(pred, y_batch)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
n_batches += 1
# Validation
model.eval()
with torch.no_grad():
val_pred = model(X_val)
val_loss = criterion(val_pred, y_val).item()
# Early stopping
if val_loss < best_val_loss:
best_val_loss = val_loss
best_state = model.state_dict().copy()
patience_counter = 0
else:
patience_counter += 1
if patience_counter >= 20:
break
# Restore best model
if best_state is not None:
model.load_state_dict(best_state)
self.models.append(model)
self.training_history.append({
'member': i,
'best_val_loss': best_val_loss,
'epochs_trained': epoch + 1
})
logger.info(f" Member {i+1}: val_loss={best_val_loss:.6f}, epochs={epoch+1}")
def predict(self, params: Dict[str, float]) -> Dict[str, float]:
"""
Predict objectives for a single design.
Returns dict with predictions and uncertainty estimates.
"""
# Convert to array
x = np.array([[params.get(name, 0.0) for name in self.design_var_names]], dtype=np.float32)
# Get predictions from all ensemble members
predictions = []
for model in self.models:
model.eval()
with torch.no_grad():
x_norm = self._normalize_input(x)
pred_norm = model(x_norm)
pred = self._denormalize_output(pred_norm)
predictions.append(pred[0])
predictions = np.array(predictions) # (n_ensemble, n_objectives)
# Mean prediction and uncertainty (std dev)
mean_pred = predictions.mean(axis=0)
std_pred = predictions.std(axis=0)
result = {}
for i, name in enumerate(self.objective_names):
result[name] = float(mean_pred[i])
result[f'{name}_uncertainty'] = float(std_pred[i])
# Overall uncertainty score (normalized)
result['total_uncertainty'] = float(np.mean(std_pred / (self.output_std + 1e-8)))
return result
def predict_batch(self, params_list: List[Dict[str, float]]) -> List[Dict[str, float]]:
"""Predict for multiple designs efficiently."""
return [self.predict(p) for p in params_list]
def select_designs_for_validation(
self,
candidate_designs: List[Dict[str, float]],
n_select: int = 5,
strategy: str = 'uncertainty'
) -> List[Tuple[int, Dict[str, float], float]]:
"""
Select designs that should be validated with FEA.
Strategies:
- 'uncertainty': Select highest uncertainty designs
- 'pareto_uncertainty': Select from Pareto front with high uncertainty
- 'diverse': Select diverse designs with moderate uncertainty
Returns: List of (index, params, uncertainty_score)
"""
# Get predictions with uncertainty
predictions = self.predict_batch(candidate_designs)
# Score each design
scored = []
for i, (design, pred) in enumerate(zip(candidate_designs, predictions)):
uncertainty = pred['total_uncertainty']
scored.append((i, design, pred, uncertainty))
if strategy == 'uncertainty':
# Simply select highest uncertainty
scored.sort(key=lambda x: x[3], reverse=True)
elif strategy == 'pareto_uncertainty':
# Prefer Pareto-optimal designs with uncertainty
# Simple proxy: designs with low mass and high frequency predictions
for item in scored:
pred = item[2]
# Bonus for potentially good designs
pareto_score = -pred['mass'] / 1000 + pred['frequency'] / 10
# Combined score: uncertainty * pareto_potential
item = (item[0], item[1], item[2], item[3] * (1 + 0.5 * pareto_score))
scored.sort(key=lambda x: x[3], reverse=True)
elif strategy == 'diverse':
# Select diverse designs using simple greedy selection
selected = []
remaining = scored.copy()
# First, select highest uncertainty
remaining.sort(key=lambda x: x[3], reverse=True)
selected.append(remaining.pop(0))
while len(selected) < n_select and remaining:
# Find design most different from selected ones
best_idx = 0
best_min_dist = 0
for i, item in enumerate(remaining):
design = item[1]
min_dist = float('inf')
for sel_item in selected:
sel_design = sel_item[1]
dist = sum((design.get(k, 0) - sel_design.get(k, 0))**2
for k in self.design_var_names)
min_dist = min(min_dist, dist)
# Weight by uncertainty too
weighted_dist = min_dist * (1 + item[3])
if weighted_dist > best_min_dist:
best_min_dist = weighted_dist
best_idx = i
selected.append(remaining.pop(best_idx))
return [(s[0], s[1], s[3]) for s in selected]
return [(s[0], s[1], s[3]) for s in scored[:n_select]]
def update_with_validation(
self,
validated_designs: List[Dict[str, float]],
fea_results: List[Dict[str, float]]
):
"""
Update validation error tracking with new FEA results.
This doesn't retrain the model, just tracks prediction accuracy.
"""
for design, fea_result in zip(validated_designs, fea_results):
pred = self.predict(design)
errors = {}
for name in ['mass', 'frequency']:
if name in fea_result:
pred_val = pred[name]
actual_val = fea_result[name]
error = abs(pred_val - actual_val) / (abs(actual_val) + 1e-8)
errors[name] = error
self.validation_errors.append({
'design': design,
'predicted': {k: pred[k] for k in self.objective_names},
'actual': fea_result,
'errors': errors,
'uncertainty': pred['total_uncertainty']
})
# Update confidence score
self._update_confidence()
def _update_confidence(self):
"""Calculate overall confidence score based on validation history."""
if not self.validation_errors:
self.confidence_score = 0.0
return
recent_errors = self.validation_errors[-20:] # Last 20 validations
mass_errors = [e['errors'].get('mass', 1.0) for e in recent_errors]
freq_errors = [e['errors'].get('frequency', 1.0) for e in recent_errors]
# Confidence based on MAPE < 10%
mass_conf = sum(1 for e in mass_errors if e < 0.10) / len(mass_errors)
freq_conf = sum(1 for e in freq_errors if e < 0.10) / len(freq_errors)
# Combined confidence (frequency is harder, weight less)
self.confidence_score = 0.6 * mass_conf + 0.4 * freq_conf
def get_confidence_report(self) -> Dict:
"""Get detailed confidence metrics."""
if not self.validation_errors:
return {
'confidence_score': 0.0,
'n_validations': 0,
'status': 'NO_DATA',
'recommendation': 'Need FEA validation data'
}
recent = self.validation_errors[-20:]
mass_mape = np.mean([e['errors'].get('mass', 1.0) for e in recent]) * 100
freq_mape = np.mean([e['errors'].get('frequency', 1.0) for e in recent]) * 100
# Correlation between uncertainty and error
uncertainties = [e['uncertainty'] for e in recent]
total_errors = [np.mean(list(e['errors'].values())) for e in recent]
if len(set(uncertainties)) > 1 and len(set(total_errors)) > 1:
correlation = np.corrcoef(uncertainties, total_errors)[0, 1]
else:
correlation = 0.0
# Determine status
if self.confidence_score >= 0.8 and mass_mape < 5 and freq_mape < 15:
status = 'HIGH_CONFIDENCE'
recommendation = 'NN ready for optimization'
elif self.confidence_score >= 0.5:
status = 'MEDIUM_CONFIDENCE'
recommendation = 'Continue targeted FEA validation in high-uncertainty regions'
else:
status = 'LOW_CONFIDENCE'
recommendation = 'Need more FEA training data, especially in unexplored regions'
return {
'confidence_score': self.confidence_score,
'n_validations': len(self.validation_errors),
'mass_mape': mass_mape,
'freq_mape': freq_mape,
'uncertainty_error_correlation': correlation,
'status': status,
'recommendation': recommendation
}
def is_ready_for_optimization(self, threshold: float = 0.7) -> bool:
"""Check if NN is confident enough for optimization."""
return self.confidence_score >= threshold
def save(self, path: str):
"""Save the ensemble model."""
path = Path(path)
state = {
'n_ensemble': self.n_ensemble,
'hidden_dims': self.hidden_dims,
'design_var_names': self.design_var_names,
'objective_names': self.objective_names,
'input_mean': self.input_mean.tolist() if self.input_mean is not None else None,
'input_std': self.input_std.tolist() if self.input_std is not None else None,
'output_mean': self.output_mean.tolist() if self.output_mean is not None else None,
'output_std': self.output_std.tolist() if self.output_std is not None else None,
'validation_errors': self.validation_errors,
'confidence_score': self.confidence_score,
'training_history': self.training_history,
'models': [m.state_dict() for m in self.models]
}
torch.save(state, path)
logger.info(f"Saved ensemble surrogate to {path}")
@classmethod
def load(cls, path: str) -> 'ActiveLearningSurrogate':
"""Load the ensemble model."""
path = Path(path)
if not path.exists():
raise FileNotFoundError(f"Model not found: {path}")
state = torch.load(path, map_location='cpu')
surrogate = cls(
n_ensemble=state['n_ensemble'],
hidden_dims=state['hidden_dims']
)
surrogate.design_var_names = state['design_var_names']
surrogate.objective_names = state['objective_names']
surrogate.input_mean = np.array(state['input_mean']) if state['input_mean'] else None
surrogate.input_std = np.array(state['input_std']) if state['input_std'] else None
surrogate.output_mean = np.array(state['output_mean']) if state['output_mean'] else None
surrogate.output_std = np.array(state['output_std']) if state['output_std'] else None
surrogate.validation_errors = state.get('validation_errors', [])
surrogate.confidence_score = state.get('confidence_score', 0.0)
surrogate.training_history = state.get('training_history', [])
# Reconstruct models
input_dim = len(surrogate.design_var_names)
output_dim = len(surrogate.objective_names)
for model_state in state['models']:
model = EnsembleMLP(input_dim, output_dim, surrogate.hidden_dims)
model.load_state_dict(model_state)
surrogate.models.append(model)
logger.info(f"Loaded ensemble surrogate from {path}")
return surrogate
def extract_training_data_from_study(db_path: str, study_name: str):
"""Extract training data from Optuna study database."""
import optuna
storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}")
study = optuna.load_study(study_name=study_name, storage=storage)
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed_trials:
raise ValueError("No completed trials found")
# Infer design variable names
design_var_names = list(completed_trials[0].params.keys())
design_params_list = []
objectives_list = []
for trial in completed_trials:
if len(trial.values) < 2:
continue
mass = trial.values[0]
raw_freq = trial.values[1]
frequency = -raw_freq if raw_freq < 0 else raw_freq
max_disp = trial.user_attrs.get('max_displacement', 0.0)
max_stress = trial.user_attrs.get('max_stress', 0.0)
# Skip invalid
if any(np.isinf(v) or np.isnan(v) for v in [mass, frequency]):
continue
if frequency <= 0:
continue
params = [trial.params.get(name, 0.0) for name in design_var_names]
design_params_list.append(params)
objectives_list.append([mass, frequency, max_disp, max_stress])
return (
np.array(design_params_list, dtype=np.float32),
np.array(objectives_list, dtype=np.float32),
design_var_names
)
if __name__ == '__main__':
import sys
logging.basicConfig(level=logging.INFO)
project_root = Path(__file__).parent.parent
# Find database
db_path = project_root / "studies/uav_arm_optimization/2_results/study.db"
study_name = "uav_arm_optimization"
if not db_path.exists():
db_path = project_root / "studies/uav_arm_atomizerfield_test/2_results/study.db"
study_name = "uav_arm_atomizerfield_test"
print("="*60)
print("Training Active Learning Surrogate (Ensemble)")
print("="*60)
# Extract data
print(f"\nLoading data from {db_path}")
design_params, objectives, design_var_names = extract_training_data_from_study(
str(db_path), study_name
)
print(f"Loaded {len(design_params)} samples")
print(f"Design variables: {design_var_names}")
# Train ensemble
print("\nTraining 5-member ensemble...")
surrogate = ActiveLearningSurrogate(n_ensemble=5)
surrogate.train(design_params, objectives, design_var_names, epochs=200)
# Test predictions with uncertainty
print("\n" + "="*60)
print("Testing Predictions with Uncertainty")
print("="*60)
# Test on a few samples
test_designs = [
{'beam_half_core_thickness': 2.0, 'beam_face_thickness': 1.0, 'holes_diameter': 5.0, 'hole_count': 10},
{'beam_half_core_thickness': 5.0, 'beam_face_thickness': 2.0, 'holes_diameter': 20.0, 'hole_count': 8},
{'beam_half_core_thickness': 1.0, 'beam_face_thickness': 0.5, 'holes_diameter': 2.0, 'hole_count': 6}, # Low data region
]
for i, design in enumerate(test_designs):
pred = surrogate.predict(design)
print(f"\nDesign {i+1}: {design}")
print(f" Mass: {pred['mass']:.1f}g +/- {pred['mass_uncertainty']:.1f}g")
print(f" Freq: {pred['frequency']:.1f}Hz +/- {pred['frequency_uncertainty']:.1f}Hz")
print(f" Total Uncertainty: {pred['total_uncertainty']:.3f}")
# Save model
save_path = project_root / "active_learning_surrogate.pt"
surrogate.save(str(save_path))
print(f"\nSaved to {save_path}")
# Get confidence report
print("\n" + "="*60)
print("Confidence Report")
print("="*60)
report = surrogate.get_confidence_report()
for k, v in report.items():
print(f" {k}: {v}")

View File

@@ -0,0 +1,415 @@
"""
Adaptive Characterization Module - Intelligent stopping for landscape characterization.
This module implements adaptive stopping criteria for the characterization phase
that intelligently determines when enough landscape exploration has been done.
Simple problems (smooth, unimodal) -> stop early (~10-15 trials)
Complex problems (multimodal, rugged) -> continue longer (~20-30 trials)
Part of Protocol 10: Intelligent Multi-Strategy Optimization (IMSO)
"""
import numpy as np
import optuna
from typing import Dict, List, Optional
from dataclasses import dataclass
import json
from pathlib import Path
from datetime import datetime
@dataclass
class LandscapeMetricSnapshot:
"""Snapshot of landscape metrics at a given trial."""
trial_number: int
smoothness: float
multimodal: bool
n_modes: int
noise_level: float
landscape_type: str
overall_confidence: float
class CharacterizationStoppingCriterion:
"""
Intelligently determines when characterization phase has gathered enough information.
Key Features:
1. Progressive landscape analysis (every 5 trials starting at trial 10)
2. Metric convergence detection (are metrics stabilizing?)
3. Complexity-aware sample adequacy (complex problems need more trials)
4. Parameter space coverage assessment
5. Confidence scoring (combines all factors)
Stopping Decision:
- Simple problems: Stop at ~10-15 trials when metrics converge
- Complex problems: Continue to ~20-30 trials for adequate coverage
"""
def __init__(
self,
min_trials: int = 10,
max_trials: int = 30,
confidence_threshold: float = 0.85,
check_interval: int = 5,
verbose: bool = True,
tracking_dir: Optional[Path] = None
):
"""
Args:
min_trials: Minimum trials before considering stopping
max_trials: Maximum trials (stop even if not converged)
confidence_threshold: Confidence needed to stop (0-1)
check_interval: How often to check stopping criteria
verbose: Print progress reports
tracking_dir: Directory to save characterization tracking
"""
self.min_trials = min_trials
self.max_trials = max_trials
self.confidence_threshold = confidence_threshold
self.check_interval = check_interval
self.verbose = verbose
self.tracking_dir = tracking_dir
# Track metric history across analyses
self.metric_history: List[LandscapeMetricSnapshot] = []
self.should_stop_flag = False
self.stop_reason = ""
self.final_confidence = 0.0
# Initialize tracking
if tracking_dir:
self.tracking_dir = Path(tracking_dir)
self.tracking_dir.mkdir(parents=True, exist_ok=True)
self.characterization_log = self.tracking_dir / "characterization_progress.json"
def update(self, landscape: Dict, trial_number: int):
"""
Update with latest landscape analysis.
Args:
landscape: Landscape analysis dictionary
trial_number: Current trial number
"""
if not landscape.get('ready', False):
return
# Create snapshot
snapshot = LandscapeMetricSnapshot(
trial_number=trial_number,
smoothness=landscape['smoothness'],
multimodal=landscape['multimodal'],
n_modes=landscape['n_modes'],
noise_level=landscape['noise_level'],
landscape_type=landscape['landscape_type'],
overall_confidence=0.0 # Will be calculated
)
self.metric_history.append(snapshot)
# Calculate confidence
confidence = self._calculate_confidence(landscape, trial_number)
snapshot.overall_confidence = confidence
# Save progress
self._save_progress()
# Print report
if self.verbose:
self._print_progress_report(trial_number, landscape, confidence)
# Check stopping criteria
if trial_number >= self.min_trials:
self._evaluate_stopping_criteria(landscape, trial_number, confidence)
def should_stop(self, study: optuna.Study) -> bool:
"""
Check if characterization should stop.
Args:
study: Optuna study
Returns:
True if should stop characterization
"""
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
n_trials = len(completed_trials)
# Force stop at max trials
if n_trials >= self.max_trials:
self.should_stop_flag = True
self.stop_reason = f"Maximum characterization trials reached ({self.max_trials})"
return True
return self.should_stop_flag
def _calculate_confidence(self, landscape: Dict, trial_number: int) -> float:
"""
Calculate confidence score for stopping decision.
Confidence Components (weighted sum):
1. Metric Stability (40%): Are metrics converging?
2. Parameter Coverage (30%): Explored enough space?
3. Sample Adequacy (20%): Enough samples for complexity?
4. Landscape Clarity (10%): Clear classification?
"""
if trial_number < self.min_trials:
return 0.0
# 1. Metric Stability Score
stability_score = self._compute_metric_stability()
# 2. Parameter Coverage Score
coverage_score = self._compute_parameter_coverage(landscape)
# 3. Sample Adequacy Score
adequacy_score = self._compute_sample_adequacy(landscape, trial_number)
# 4. Landscape Clarity Score
clarity_score = self._compute_landscape_clarity(landscape)
# Weighted confidence
confidence = (
0.40 * stability_score +
0.30 * coverage_score +
0.20 * adequacy_score +
0.10 * clarity_score
)
return confidence
def _compute_metric_stability(self) -> float:
"""
Compute how stable landscape metrics are.
High stability = metrics have converged (good for stopping)
Low stability = metrics still changing (need more trials)
"""
if len(self.metric_history) < 3:
return 0.0
# Look at last 3 analyses
recent_snapshots = self.metric_history[-3:]
# Check smoothness stability
smoothness_values = [s.smoothness for s in recent_snapshots]
smoothness_std = np.std(smoothness_values)
smoothness_stable = smoothness_std < 0.05 # Stable if std < 0.05
# Check noise stability
noise_values = [s.noise_level for s in recent_snapshots]
noise_std = np.std(noise_values)
noise_stable = noise_std < 0.1 # Stable if std < 0.1
# Check landscape type consistency
landscape_types = [s.landscape_type for s in recent_snapshots]
type_consistent = len(set(landscape_types)) == 1 # All same type
# Check n_modes stability
n_modes = [s.n_modes for s in recent_snapshots]
modes_consistent = len(set(n_modes)) <= 1 # Same or ±1
# Combine stability indicators
stability_indicators = [
1.0 if smoothness_stable else 0.0,
1.0 if noise_stable else 0.0,
1.0 if type_consistent else 0.0,
1.0 if modes_consistent else 0.0
]
stability_score = np.mean(stability_indicators)
return stability_score
def _compute_parameter_coverage(self, landscape: Dict) -> float:
"""
Compute how well parameter space has been explored.
High coverage = explored wide range of each parameter
"""
param_ranges = landscape.get('parameter_ranges', {})
if not param_ranges:
return 0.5 # Unknown
coverage_scores = []
for param, ranges in param_ranges.items():
coverage = ranges['coverage'] # Already computed in landscape analyzer
coverage_scores.append(coverage)
avg_coverage = np.mean(coverage_scores)
# Normalize: 50% coverage = 0.5 score, 100% coverage = 1.0 score
coverage_score = min(1.0, avg_coverage / 0.5)
return coverage_score
def _compute_sample_adequacy(self, landscape: Dict, trial_number: int) -> float:
"""
Compute if we have enough samples for the detected complexity.
Simple problems: 10 trials sufficient
Complex problems: 20-30 trials needed
"""
dimensionality = landscape.get('dimensionality', 2)
multimodal = landscape.get('multimodal', False)
n_modes = landscape.get('n_modes', 1)
# Calculate required samples based on complexity
if multimodal and n_modes > 2:
# Complex multimodal: need more samples
required_samples = 10 + 5 * n_modes + 2 * dimensionality
elif multimodal:
# Simple multimodal: moderate samples
required_samples = 15 + 2 * dimensionality
else:
# Unimodal: fewer samples needed
required_samples = 10 + dimensionality
# Cap at max_trials
required_samples = min(required_samples, self.max_trials)
# Score based on how many samples we have vs required
adequacy_score = min(1.0, trial_number / required_samples)
return adequacy_score
def _compute_landscape_clarity(self, landscape: Dict) -> float:
"""
Compute how clearly we can classify the landscape.
Clear classification = high confidence in landscape type
"""
smoothness = landscape.get('smoothness', 0.5)
noise_level = landscape.get('noise_level', 0.5)
# Clear cases:
# - Very smooth (> 0.7) or very rugged (< 0.3)
# - Low noise (< 0.3) or high noise (> 0.7)
smoothness_clarity = max(
abs(smoothness - 0.7), # Distance from smooth threshold
abs(smoothness - 0.3) # Distance from rugged threshold
)
noise_clarity = max(
abs(noise_level - 0.3), # Distance from low noise threshold
abs(noise_level - 0.7) # Distance from high noise threshold
)
# Normalize to 0-1
clarity_score = min(1.0, (smoothness_clarity + noise_clarity) / 0.8)
return clarity_score
def _evaluate_stopping_criteria(self, landscape: Dict, trial_number: int, confidence: float):
"""
Evaluate if we should stop characterization.
Stop if:
1. Confidence threshold met
2. OR maximum trials reached
"""
if confidence >= self.confidence_threshold:
self.should_stop_flag = True
self.stop_reason = f"Characterization confidence threshold met ({confidence:.1%})"
self.final_confidence = confidence
if self.verbose:
print(f"\n{'='*70}")
print(f" CHARACTERIZATION COMPLETE")
print(f"{'='*70}")
print(f" Trial #{trial_number}")
print(f" Confidence: {confidence:.1%}")
print(f" Landscape Type: {landscape['landscape_type'].upper()}")
print(f" Ready for strategy selection")
print(f"{'='*70}\n")
def _print_progress_report(self, trial_number: int, landscape: Dict, confidence: float):
"""Print characterization progress report."""
print(f"\n{'='*70}")
print(f" CHARACTERIZATION PROGRESS - Trial #{trial_number}")
print(f"{'='*70}")
print(f" Landscape Type: {landscape['landscape_type']}")
print(f" Smoothness: {landscape['smoothness']:.2f}")
print(f" Multimodal: {'YES' if landscape['multimodal'] else 'NO'} ({landscape['n_modes']} modes)")
print(f" Noise: {landscape['noise_level']:.2f}")
print(f" Characterization Confidence: {confidence:.1%}")
if confidence >= self.confidence_threshold:
print(f" Status: READY TO STOP (confidence >= {self.confidence_threshold:.0%})")
else:
remaining = self.confidence_threshold - confidence
print(f" Status: CONTINUE (need +{remaining:.1%} confidence)")
print(f"{'='*70}\n")
def _save_progress(self):
"""Save characterization progress to JSON."""
if not self.tracking_dir:
return
progress_data = {
'min_trials': self.min_trials,
'max_trials': self.max_trials,
'confidence_threshold': self.confidence_threshold,
'metric_history': [
{
'trial_number': s.trial_number,
'smoothness': s.smoothness,
'multimodal': s.multimodal,
'n_modes': s.n_modes,
'noise_level': s.noise_level,
'landscape_type': s.landscape_type,
'confidence': s.overall_confidence
}
for s in self.metric_history
],
'should_stop': self.should_stop_flag,
'stop_reason': self.stop_reason,
'final_confidence': self.final_confidence,
'timestamp': datetime.now().isoformat()
}
try:
with open(self.characterization_log, 'w') as f:
json.dump(progress_data, f, indent=2)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to save characterization progress: {e}")
def get_summary_report(self) -> str:
"""Generate summary report of characterization phase."""
if not self.metric_history:
return "No characterization data available"
final_snapshot = self.metric_history[-1]
report = "\n" + "="*70 + "\n"
report += " CHARACTERIZATION PHASE SUMMARY\n"
report += "="*70 + "\n"
report += f" Total Trials: {final_snapshot.trial_number}\n"
report += f" Final Confidence: {final_snapshot.overall_confidence:.1%}\n"
report += f" Stop Reason: {self.stop_reason}\n"
report += f"\n FINAL LANDSCAPE CLASSIFICATION:\n"
report += f" Type: {final_snapshot.landscape_type.upper()}\n"
report += f" Smoothness: {final_snapshot.smoothness:.2f}\n"
report += f" Multimodal: {'YES' if final_snapshot.multimodal else 'NO'} ({final_snapshot.n_modes} modes)\n"
report += f" Noise Level: {final_snapshot.noise_level:.2f}\n"
if len(self.metric_history) >= 2:
report += f"\n METRIC CONVERGENCE:\n"
# Show how metrics evolved
first = self.metric_history[0]
last = self.metric_history[-1]
smoothness_change = abs(last.smoothness - first.smoothness)
report += f" Smoothness stability: {smoothness_change:.3f} (lower = more stable)\n"
type_changes = len(set(s.landscape_type for s in self.metric_history))
report += f" Landscape type changes: {type_changes - 1}\n"
report += "="*70 + "\n"
return report

View File

@@ -0,0 +1,393 @@
"""
Adaptive surrogate modeling with confidence-based exploration-exploitation transitions.
This module implements state-of-the-art Bayesian optimization strategies that
dynamically adjust exploration vs exploitation based on surrogate model confidence.
"""
import numpy as np
from typing import Optional, Dict, List
import optuna
from scipy.stats import variation
import json
from pathlib import Path
class SurrogateConfidenceMetrics:
"""Calculate confidence metrics for surrogate model quality.
STUDY-AWARE: Uses study.trials directly instead of session-based history
to properly track confidence across multiple optimization runs.
"""
def __init__(self, min_trials_for_confidence: int = 15):
self.min_trials = min_trials_for_confidence
def update(self, study: optuna.Study, trial: optuna.trial.FrozenTrial):
"""Update metrics after each trial (no-op for study-aware implementation)."""
pass # Study-aware: we read directly from study.trials
def calculate_confidence(self, study: optuna.Study) -> Dict[str, float]:
"""
Calculate comprehensive surrogate confidence metrics.
STUDY-AWARE: Uses ALL completed trials from the study database,
not just trials from the current session.
PROTOCOL 11: Multi-objective studies are NOT supported by adaptive
characterization. Return immediately with max confidence to skip
characterization phase.
Returns:
Dict with confidence scores:
- 'overall_confidence': 0-1 score, where 1 = high confidence
- 'convergence_score': How stable recent improvements are
- 'exploration_coverage': How well parameter space is covered
- 'prediction_stability': How consistent the model predictions are
"""
# [Protocol 11] Multi-objective NOT supported by adaptive characterization
is_multi_objective = len(study.directions) > 1
if is_multi_objective:
return {
'overall_confidence': 1.0, # Skip characterization
'convergence_score': 1.0,
'exploration_coverage': 1.0,
'prediction_stability': 1.0,
'ready_for_exploitation': True, # Go straight to NSGA-II
'total_trials': len(study.trials),
'message': '[Protocol 11] Multi-objective: skipping adaptive characterization, using NSGA-II directly'
}
# Get ALL completed trials from study (study-aware)
all_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if len(all_trials) < self.min_trials:
return {
'overall_confidence': 0.0,
'convergence_score': 0.0,
'exploration_coverage': 0.0,
'prediction_stability': 0.0,
'ready_for_exploitation': False,
'total_trials': len(all_trials),
'message': f'Need {self.min_trials - len(all_trials)} more trials for confidence assessment (currently {len(all_trials)} trials)'
}
# 1. Convergence Score - are we finding consistent improvements?
recent_window = 10
recent_trials = all_trials[-recent_window:]
recent_values = [t.value for t in recent_trials] # Safe: single-objective only
# Calculate improvement rate
improvements = []
for i in range(1, len(recent_values)):
if recent_values[i] < recent_values[i-1]:
improvement = (recent_values[i-1] - recent_values[i]) / abs(recent_values[i-1])
improvements.append(improvement)
# If we're consistently finding improvements, convergence is good
if improvements:
avg_improvement = np.mean(improvements)
improvement_consistency = 1.0 - variation(improvements) if len(improvements) > 1 else 0.5
convergence_score = min(1.0, avg_improvement * improvement_consistency * 10)
else:
convergence_score = 0.0
# 2. Exploration Coverage - how well have we covered parameter space?
# Use coefficient of variation for each parameter
param_names = list(all_trials[0].params.keys()) if all_trials else []
coverage_scores = []
for param in param_names:
values = [t.params[param] for t in all_trials]
# Get parameter bounds
distribution = all_trials[0].distributions[param]
param_range = distribution.high - distribution.low
# Calculate spread relative to bounds
spread = max(values) - min(values)
coverage = spread / param_range
coverage_scores.append(coverage)
exploration_coverage = np.mean(coverage_scores)
# 3. Prediction Stability - are recent trials clustered in good regions?
recent_best_values = []
current_best = float('inf')
for trial in recent_trials:
current_best = min(current_best, trial.value)
recent_best_values.append(current_best)
# If best hasn't improved much recently, model is stable
if len(recent_best_values) > 1:
best_stability = 1.0 - (recent_best_values[0] - recent_best_values[-1]) / (recent_best_values[0] + 1e-10)
prediction_stability = max(0.0, min(1.0, best_stability))
else:
prediction_stability = 0.0
# 4. Overall Confidence - weighted combination
overall_confidence = (
0.4 * convergence_score +
0.3 * exploration_coverage +
0.3 * prediction_stability
)
# Decision: Ready for intensive exploitation?
ready_for_exploitation = (
overall_confidence >= 0.65 and
exploration_coverage >= 0.5 and
len(all_trials) >= self.min_trials
)
message = self._get_confidence_message(overall_confidence, ready_for_exploitation)
return {
'overall_confidence': overall_confidence,
'convergence_score': convergence_score,
'exploration_coverage': exploration_coverage,
'prediction_stability': prediction_stability,
'ready_for_exploitation': ready_for_exploitation,
'total_trials': len(all_trials),
'message': message
}
def _get_confidence_message(self, confidence: float, ready: bool) -> str:
"""Generate human-readable confidence assessment."""
if ready:
return f"HIGH CONFIDENCE ({confidence:.1%}) - Transitioning to exploitation phase"
elif confidence >= 0.5:
return f"MEDIUM CONFIDENCE ({confidence:.1%}) - Continue exploration with some exploitation"
elif confidence >= 0.3:
return f"LOW CONFIDENCE ({confidence:.1%}) - Focus on exploration"
else:
return f"VERY LOW CONFIDENCE ({confidence:.1%}) - Need more diverse exploration"
class AdaptiveExploitationCallback:
"""
Dynamically adjust sampler behavior based on surrogate confidence.
This callback monitors surrogate model confidence and adapts the optimization
strategy from exploration-heavy to exploitation-heavy as confidence increases.
STUDY-AWARE: Tracks phase transitions across multiple optimization runs
and persists confidence history to JSON files.
"""
def __init__(
self,
target_value: Optional[float] = None,
tolerance: float = 0.1,
min_confidence_for_exploitation: float = 0.65,
min_trials: int = 15,
verbose: bool = True,
tracking_dir: Optional[Path] = None
):
"""
Args:
target_value: Target objective value (if known)
tolerance: Acceptable error from target
min_confidence_for_exploitation: Confidence threshold to enable intensive exploitation
min_trials: Minimum trials before confidence assessment
verbose: Print confidence updates
tracking_dir: Directory to save phase transition tracking files
"""
self.target_value = target_value
self.tolerance = tolerance
self.min_confidence = min_confidence_for_exploitation
self.verbose = verbose
self.tracking_dir = tracking_dir
self.metrics = SurrogateConfidenceMetrics(min_trials_for_confidence=min_trials)
self.consecutive_successes = 0
# Initialize phase transition tracking
self.phase_transition_file = None
self.confidence_history_file = None
if tracking_dir:
self.tracking_dir = Path(tracking_dir)
self.tracking_dir.mkdir(parents=True, exist_ok=True)
self.phase_transition_file = self.tracking_dir / "phase_transitions.json"
self.confidence_history_file = self.tracking_dir / "confidence_history.json"
# Load existing phase transition data if available
self.phase_transitions = self._load_phase_transitions()
self.confidence_history = self._load_confidence_history()
# Determine current phase from history
self.phase = self._get_current_phase()
def _load_phase_transitions(self) -> List[Dict]:
"""Load existing phase transition history from JSON."""
if self.phase_transition_file and self.phase_transition_file.exists():
try:
with open(self.phase_transition_file, 'r') as f:
return json.load(f)
except Exception:
return []
return []
def _load_confidence_history(self) -> List[Dict]:
"""Load existing confidence history from JSON."""
if self.confidence_history_file and self.confidence_history_file.exists():
try:
with open(self.confidence_history_file, 'r') as f:
return json.load(f)
except Exception:
return []
return []
def _get_current_phase(self) -> str:
"""Determine current phase from transition history."""
if not self.phase_transitions:
return "exploration"
# If any transition to exploitation exists, we're in exploitation
for transition in self.phase_transitions:
if transition.get('to_phase') == 'exploitation':
return "exploitation"
return "exploration"
def _save_phase_transition(self, trial_number: int, confidence: Dict):
"""Save phase transition event to JSON."""
if not self.phase_transition_file:
return
transition_event = {
'trial_number': trial_number,
'from_phase': 'exploration',
'to_phase': 'exploitation',
'confidence_metrics': {
'overall_confidence': confidence['overall_confidence'],
'convergence_score': confidence['convergence_score'],
'exploration_coverage': confidence['exploration_coverage'],
'prediction_stability': confidence['prediction_stability']
},
'total_trials': confidence.get('total_trials', trial_number + 1)
}
self.phase_transitions.append(transition_event)
try:
with open(self.phase_transition_file, 'w') as f:
json.dump(self.phase_transitions, f, indent=2)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to save phase transition: {e}")
def _save_confidence_snapshot(self, trial_number: int, confidence: Dict):
"""Save confidence metrics snapshot to history."""
if not self.confidence_history_file:
return
snapshot = {
'trial_number': trial_number,
'phase': self.phase,
'confidence_metrics': {
'overall_confidence': confidence['overall_confidence'],
'convergence_score': confidence['convergence_score'],
'exploration_coverage': confidence['exploration_coverage'],
'prediction_stability': confidence['prediction_stability']
},
'total_trials': confidence.get('total_trials', trial_number + 1)
}
self.confidence_history.append(snapshot)
try:
with open(self.confidence_history_file, 'w') as f:
json.dump(self.confidence_history, f, indent=2)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to save confidence history: {e}")
def __call__(self, study: optuna.Study, trial: optuna.trial.FrozenTrial):
"""Called after each trial completes."""
# Skip failed trials
if trial.state != optuna.trial.TrialState.COMPLETE:
return
# Update metrics (no-op for study-aware implementation)
self.metrics.update(study, trial)
# Calculate confidence
confidence = self.metrics.calculate_confidence(study)
# Save confidence snapshot every 5 trials
if trial.number % 5 == 0:
self._save_confidence_snapshot(trial.number, confidence)
# Print confidence report
if self.verbose and trial.number % 5 == 0: # Every 5 trials
self._print_confidence_report(trial.number, confidence)
# Check for phase transition
if confidence['ready_for_exploitation'] and self.phase == "exploration":
self.phase = "exploitation"
# Save transition event
self._save_phase_transition(trial.number, confidence)
if self.verbose:
print(f"\n{'='*60}")
print(f" PHASE TRANSITION: EXPLORATION -> EXPLOITATION")
print(f" Trial #{trial.number}")
print(f" Surrogate confidence: {confidence['overall_confidence']:.1%}")
print(f" Now focusing on refining best regions")
print(f"{'='*60}\n")
# Check for target achievement
if self.target_value is not None and trial.value <= self.tolerance:
self.consecutive_successes += 1
if self.verbose:
print(f" [TARGET] Trial #{trial.number}: {trial.value:.6f}{self.tolerance:.6f}")
print(f" [TARGET] Consecutive successes: {self.consecutive_successes}/3")
# Stop after 3 consecutive successes in exploitation phase
if self.consecutive_successes >= 3 and self.phase == "exploitation":
if self.verbose:
print(f"\n{'='*60}")
print(f" TARGET ACHIEVED WITH HIGH CONFIDENCE")
print(f" Best value: {study.best_value:.6f}")
print(f" Stopping optimization")
print(f"{'='*60}\n")
study.stop()
else:
self.consecutive_successes = 0
def _print_confidence_report(self, trial_number: int, confidence: Dict):
"""Print confidence metrics report."""
print(f"\n [CONFIDENCE REPORT - Trial #{trial_number}]")
print(f" Phase: {self.phase.upper()}")
print(f" Overall Confidence: {confidence['overall_confidence']:.1%}")
print(f" - Convergence: {confidence['convergence_score']:.1%}")
print(f" - Coverage: {confidence['exploration_coverage']:.1%}")
print(f" - Stability: {confidence['prediction_stability']:.1%}")
print(f" {confidence['message']}")
print()
def create_adaptive_sampler(
n_startup_trials: int = 10,
multivariate: bool = True,
confidence_threshold: float = 0.65
) -> optuna.samplers.TPESampler:
"""
Create TPE sampler configured for adaptive exploration-exploitation.
Args:
n_startup_trials: Initial random exploration trials
multivariate: Enable multivariate TPE for correlated parameters
confidence_threshold: Confidence needed before intensive exploitation
Returns:
Configured TPESampler
"""
# Higher n_ei_candidates = more exploitation
# Will be used once confidence threshold is reached
return optuna.samplers.TPESampler(
n_startup_trials=n_startup_trials,
n_ei_candidates=24,
multivariate=multivariate,
warn_independent_sampling=True
)

View File

@@ -0,0 +1,152 @@
"""
Generate human-readable optimization reports from incremental history JSON.
This script should be run automatically at the end of optimization, or manually
to generate a report for any completed optimization study.
"""
import json
import sys
from pathlib import Path
from typing import Dict, Any, List
import numpy as np
def generate_optimization_report(history_file: Path, target_value: float = None, tolerance: float = 0.1) -> str:
"""
Generate a comprehensive human-readable optimization report.
Args:
history_file: Path to optimization_history_incremental.json
target_value: Target objective value (if applicable)
tolerance: Acceptable tolerance for success (default 0.1)
Returns:
Report text as a string
"""
# Load history
with open(history_file) as f:
history = json.load(f)
if not history:
return "No optimization history found."
report = []
report.append('=' * 80)
report.append('OPTIMIZATION REPORT')
report.append('=' * 80)
report.append('')
# Study information
study_dir = history_file.parent.parent.parent
study_name = study_dir.name
report.append('STUDY INFORMATION')
report.append('-' * 80)
report.append(f'Study: {study_name}')
report.append(f'Total trials: {len(history)}')
report.append('')
# Design variables
first_trial = history[0]
design_vars = list(first_trial['design_variables'].keys())
report.append('DESIGN VARIABLES')
report.append('-' * 80)
for var in design_vars:
values = [t['design_variables'][var] for t in history]
report.append(f' {var}:')
report.append(f' Range: {min(values):.4f} - {max(values):.4f}')
report.append(f' Mean: {np.mean(values):.4f}')
report.append('')
# Objective results
results = list(first_trial['results'].keys())
report.append('OBJECTIVE RESULTS')
report.append('-' * 80)
for result in results:
values = [t['results'][result] for t in history]
report.append(f' {result}:')
report.append(f' Range: {min(values):.4f} - {max(values):.4f}')
report.append(f' Mean: {np.mean(values):.4f}')
report.append(f' Std dev: {np.std(values):.4f}')
report.append('')
# Best trial
objectives = [t['objective'] for t in history]
best_trial = history[np.argmin(objectives)]
report.append('BEST TRIAL')
report.append('-' * 80)
report.append(f'Trial #{best_trial["trial_number"]}')
report.append(f' Objective value: {best_trial["objective"]:.4f}')
report.append(' Design variables:')
for var, value in best_trial['design_variables'].items():
report.append(f' {var}: {value:.4f}')
report.append(' Results:')
for result, value in best_trial['results'].items():
report.append(f' {result}: {value:.4f}')
report.append('')
# Top 5 trials
report.append('TOP 5 TRIALS (by objective value)')
report.append('-' * 80)
sorted_history = sorted(history, key=lambda x: x['objective'])
for i, trial in enumerate(sorted_history[:5], 1):
report.append(f'{i}. Trial #{trial["trial_number"]}: Objective = {trial["objective"]:.4f}')
vars_str = ', '.join([f'{k}={v:.2f}' for k, v in trial['design_variables'].items()])
report.append(f' {vars_str}')
report.append('')
# Success assessment (if target provided)
if target_value is not None:
report.append('SUCCESS ASSESSMENT')
report.append('-' * 80)
best_objective = min(objectives)
error = abs(best_objective - target_value)
if error <= tolerance:
report.append(f'[SUCCESS] Target {target_value} achieved within tolerance {tolerance}!')
report.append(f' Best objective: {best_objective:.4f}')
report.append(f' Error: {error:.4f}')
else:
report.append(f'[INCOMPLETE] Target {target_value} not achieved')
report.append(f' Best objective: {best_objective:.4f}')
report.append(f' Error: {error:.4f}')
report.append(f' Need {error - tolerance:.4f} improvement')
report.append('')
report.append('=' * 80)
return '\n'.join(report)
def main():
"""Command-line interface for report generation."""
if len(sys.argv) < 2:
print("Usage: python generate_report.py <history_file> [target_value] [tolerance]")
print("Example: python generate_report.py studies/my_study/2_substudies/results/optimization_history_incremental.json 115.0 0.1")
sys.exit(1)
history_file = Path(sys.argv[1])
if not history_file.exists():
print(f"Error: History file not found: {history_file}")
sys.exit(1)
target_value = float(sys.argv[2]) if len(sys.argv) > 2 else None
tolerance = float(sys.argv[3]) if len(sys.argv) > 3 else 0.1
# Generate report
report = generate_optimization_report(history_file, target_value, tolerance)
# Save report
report_file = history_file.parent / 'OPTIMIZATION_REPORT.txt'
with open(report_file, 'w') as f:
f.write(report)
# Print to console
print(report)
print()
print(f"Report saved to: {report_file}")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,569 @@
"""
Generate comprehensive markdown optimization reports with graphs.
Uses Optuna's built-in visualization library for professional-quality plots.
"""
import json
import sys
from pathlib import Path
from typing import Dict, Any, List, Optional
import numpy as np
import matplotlib
matplotlib.use('Agg') # Non-interactive backend
import matplotlib.pyplot as plt
import optuna
from optuna.visualization import (
plot_optimization_history,
plot_parallel_coordinate,
plot_param_importances,
plot_slice,
plot_contour
)
def create_confidence_progression_plot(confidence_history: List[Dict], phase_transitions: List[Dict], output_dir: Path) -> Optional[str]:
"""Create confidence progression plot showing confidence metrics over trials."""
if not confidence_history:
return None
trial_numbers = [c['trial_number'] for c in confidence_history]
overall = [c['confidence_metrics']['overall_confidence'] for c in confidence_history]
convergence = [c['confidence_metrics']['convergence_score'] for c in confidence_history]
coverage = [c['confidence_metrics']['exploration_coverage'] for c in confidence_history]
stability = [c['confidence_metrics']['prediction_stability'] for c in confidence_history]
plt.figure(figsize=(12, 7))
plt.plot(trial_numbers, overall, 'b-', linewidth=2.5, label='Overall Confidence')
plt.plot(trial_numbers, convergence, 'g--', alpha=0.7, label='Convergence Score')
plt.plot(trial_numbers, coverage, 'orange', linestyle='--', alpha=0.7, label='Exploration Coverage')
plt.plot(trial_numbers, stability, 'purple', linestyle='--', alpha=0.7, label='Prediction Stability')
# Mark phase transitions
for transition in phase_transitions:
trial_num = transition['trial_number']
plt.axvline(x=trial_num, color='red', linestyle='-', linewidth=2, alpha=0.8)
plt.text(trial_num, 0.95, f' Exploitation Phase', rotation=90,
verticalalignment='top', fontsize=10, color='red', fontweight='bold')
# Mark confidence threshold
plt.axhline(y=0.65, color='gray', linestyle=':', linewidth=1.5, alpha=0.6, label='Confidence Threshold (65%)')
plt.xlabel('Trial Number', fontsize=11)
plt.ylabel('Confidence Score (0-1)', fontsize=11)
plt.title('Surrogate Confidence Progression', fontsize=13, fontweight='bold')
plt.legend(loc='lower right', fontsize=9)
plt.grid(True, alpha=0.3)
plt.ylim(0, 1.05)
plt.tight_layout()
plot_file = output_dir / 'confidence_progression.png'
plt.savefig(plot_file, dpi=150)
plt.close()
return plot_file.name
def create_convergence_plot(history: List[Dict], target: Optional[float], output_dir: Path) -> str:
"""Create convergence plot showing best objective over trials."""
trial_numbers = [t['trial_number'] for t in history]
objectives = [t['objective'] for t in history]
# Calculate cumulative best
cumulative_best = []
current_best = float('inf')
for obj in objectives:
current_best = min(current_best, obj)
cumulative_best.append(current_best)
plt.figure(figsize=(10, 6))
plt.plot(trial_numbers, objectives, 'o-', alpha=0.5, label='Trial objective')
plt.plot(trial_numbers, cumulative_best, 'r-', linewidth=2, label='Best so far')
if target is not None:
plt.axhline(y=0, color='g', linestyle='--', linewidth=2, label=f'Target (error = 0)')
plt.xlabel('Trial Number')
plt.ylabel('Objective Value (Error from Target)')
plt.title('Optimization Convergence')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot_file = output_dir / 'convergence_plot.png'
plt.savefig(plot_file, dpi=150)
plt.close()
return plot_file.name
def create_design_space_plot(history: List[Dict], output_dir: Path) -> str:
"""Create 2D design space exploration plot."""
first_trial = history[0]
var_names = list(first_trial['design_variables'].keys())
if len(var_names) != 2:
return None # Only works for 2D problems
var1_name, var2_name = var_names
var1_values = [t['design_variables'][var1_name] for t in history]
var2_values = [t['design_variables'][var2_name] for t in history]
objectives = [t['objective'] for t in history]
plt.figure(figsize=(10, 8))
scatter = plt.scatter(var1_values, var2_values, c=objectives, s=100,
cmap='viridis', alpha=0.6, edgecolors='black')
plt.colorbar(scatter, label='Objective Value')
plt.xlabel(var1_name.replace('_', ' ').title())
plt.ylabel(var2_name.replace('_', ' ').title())
plt.title('Design Space Exploration')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plot_file = output_dir / 'design_space_plot.png'
plt.savefig(plot_file, dpi=150)
plt.close()
return plot_file.name
def create_parameter_sensitivity_plot(history: List[Dict], output_dir: Path) -> str:
"""Create parameter sensitivity plots."""
first_trial = history[0]
var_names = list(first_trial['design_variables'].keys())
fig, axes = plt.subplots(1, len(var_names), figsize=(6*len(var_names), 5))
if len(var_names) == 1:
axes = [axes]
for idx, var_name in enumerate(var_names):
var_values = [t['design_variables'][var_name] for t in history]
objectives = [t['objective'] for t in history]
axes[idx].scatter(var_values, objectives, alpha=0.6, s=50)
axes[idx].set_xlabel(var_name.replace('_', ' ').title())
axes[idx].set_ylabel('Objective Value')
axes[idx].set_title(f'Sensitivity to {var_name.replace("_", " ").title()}')
axes[idx].grid(True, alpha=0.3)
plt.tight_layout()
plot_file = output_dir / 'parameter_sensitivity.png'
plt.savefig(plot_file, dpi=150)
plt.close()
return plot_file.name
def create_optuna_plots(study: optuna.Study, output_dir: Path) -> Dict[str, str]:
"""
Create professional Optuna visualization plots.
Args:
study: Optuna study object
output_dir: Directory to save plots
Returns:
Dictionary mapping plot names to filenames
"""
plots = {}
try:
# 1. Parallel Coordinate Plot - shows parameter interactions
fig = plot_parallel_coordinate(study)
if fig is not None:
plot_file = output_dir / 'optuna_parallel_coordinate.png'
fig.write_image(str(plot_file), width=1200, height=600)
plots['parallel_coordinate'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create parallel coordinate plot: {e}")
try:
# 2. Optimization History - convergence over trials
fig = plot_optimization_history(study)
if fig is not None:
plot_file = output_dir / 'optuna_optimization_history.png'
fig.write_image(str(plot_file), width=1000, height=600)
plots['optimization_history'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create optimization history plot: {e}")
try:
# 3. Parameter Importances - which parameters matter most
fig = plot_param_importances(study)
if fig is not None:
plot_file = output_dir / 'optuna_param_importances.png'
fig.write_image(str(plot_file), width=800, height=500)
plots['param_importances'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create parameter importance plot: {e}")
try:
# 4. Slice Plot - individual parameter effects
fig = plot_slice(study)
if fig is not None:
plot_file = output_dir / 'optuna_slice.png'
fig.write_image(str(plot_file), width=1000, height=600)
plots['slice'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create slice plot: {e}")
try:
# 5. Contour Plot - parameter interaction heatmap (2D only)
if len(study.best_params) == 2:
fig = plot_contour(study)
if fig is not None:
plot_file = output_dir / 'optuna_contour.png'
fig.write_image(str(plot_file), width=800, height=800)
plots['contour'] = plot_file.name
except Exception as e:
print(f"Warning: Could not create contour plot: {e}")
return plots
def generate_markdown_report(history_file: Path, target_value: Optional[float] = None,
tolerance: float = 0.1, reports_dir: Optional[Path] = None,
study: Optional[optuna.Study] = None) -> str:
"""Generate comprehensive markdown optimization report with graphs."""
# Load history
with open(history_file) as f:
history = json.load(f)
if not history:
return "# Optimization Report\n\nNo optimization history found."
# Graphs should be saved to 3_reports/ folder (same as markdown file)
study_dir = history_file.parent.parent
study_name = study_dir.name
if reports_dir is None:
reports_dir = study_dir / "3_reports"
reports_dir.mkdir(parents=True, exist_ok=True)
# Load phase transition and confidence history if available
results_dir = study_dir / "2_results"
phase_transitions = []
confidence_history = []
phase_transition_file = results_dir / "phase_transitions.json"
confidence_history_file = results_dir / "confidence_history.json"
if phase_transition_file.exists():
try:
with open(phase_transition_file) as f:
phase_transitions = json.load(f)
except Exception:
pass
if confidence_history_file.exists():
try:
with open(confidence_history_file) as f:
confidence_history = json.load(f)
except Exception:
pass
# Generate plots in reports folder
convergence_plot = create_convergence_plot(history, target_value, reports_dir)
design_space_plot = create_design_space_plot(history, reports_dir)
sensitivity_plot = create_parameter_sensitivity_plot(history, reports_dir)
# Generate confidence progression plot if data available
confidence_plot = None
if confidence_history:
print(" Generating confidence progression plot...")
confidence_plot = create_confidence_progression_plot(confidence_history, phase_transitions, reports_dir)
# Generate Optuna plots if study object provided
optuna_plots = {}
if study is not None:
print(" Generating Optuna visualization plots...")
optuna_plots = create_optuna_plots(study, reports_dir)
print(f" Generated {len(optuna_plots)} Optuna plots")
# Build markdown report
lines = []
lines.append(f"# {study_name.replace('_', ' ').title()} - Optimization Report")
lines.append("")
lines.append(f"**Total Trials**: {len(history)}")
lines.append("")
# Study information
lines.append("## Study Information")
lines.append("")
first_trial = history[0]
design_vars = list(first_trial['design_variables'].keys())
lines.append(f"- **Design Variables**: {', '.join([v.replace('_', ' ').title() for v in design_vars])}")
lines.append(f"- **Number of Trials**: {len(history)}")
lines.append("")
# Adaptive optimization strategy information
if phase_transitions or confidence_history:
lines.append("## Adaptive Optimization Strategy")
lines.append("")
lines.append("This study used adaptive surrogate-based optimization with confidence-driven phase transitions.")
lines.append("")
if phase_transitions:
lines.append("### Phase Transitions")
lines.append("")
for transition in phase_transitions:
trial_num = transition['trial_number']
conf = transition['confidence_metrics']['overall_confidence']
lines.append(f"- **Trial #{trial_num}**: EXPLORATION → EXPLOITATION")
lines.append(f" - Confidence at transition: {conf:.1%}")
lines.append(f" - Convergence score: {transition['confidence_metrics']['convergence_score']:.1%}")
lines.append(f" - Exploration coverage: {transition['confidence_metrics']['exploration_coverage']:.1%}")
lines.append(f" - Prediction stability: {transition['confidence_metrics']['prediction_stability']:.1%}")
lines.append("")
else:
lines.append("### Phase Transitions")
lines.append("")
lines.append("No phase transitions occurred - optimization remained in exploration phase.")
lines.append("This may indicate:")
lines.append("- Insufficient trials to build surrogate confidence")
lines.append("- Poor exploration coverage of the design space")
lines.append("- Unstable convergence behavior")
lines.append("")
if confidence_plot:
lines.append("### Confidence Progression")
lines.append("")
lines.append(f"![Confidence Progression]({confidence_plot})")
lines.append("")
lines.append("This plot shows how the surrogate model confidence evolved over the optimization.")
lines.append("The red vertical line (if present) marks the transition to exploitation phase.")
lines.append("")
lines.append("")
# Best result
objectives = [t['objective'] for t in history]
best_idx = np.argmin(objectives)
best_trial = history[best_idx]
lines.append("## Best Result")
lines.append("")
lines.append(f"- **Trial**: #{best_trial['trial_number']}")
lines.append("")
# Show actual results FIRST (what the client cares about)
lines.append("### Achieved Performance")
for result, value in best_trial['results'].items():
metric_name = result.replace('_', ' ').title()
lines.append(f"- **{metric_name}**: {value:.4f}")
# Show target comparison if available
if target_value is not None and 'frequency' in result.lower():
error = abs(value - target_value)
lines.append(f" - Target: {target_value:.4f}")
lines.append(f" - Error: {error:.4f} ({(error/target_value*100):.2f}%)")
lines.append("")
# Then design parameters that achieved it
lines.append("### Design Parameters")
for var, value in best_trial['design_variables'].items():
lines.append(f"- **{var.replace('_', ' ').title()}**: {value:.4f}")
lines.append("")
# Technical objective last (for engineers)
lines.append("<details>")
lines.append("<summary>Technical Details (Objective Function)</summary>")
lines.append("")
lines.append(f"- **Objective Value (Error)**: {best_trial['objective']:.6f}")
lines.append("")
lines.append("</details>")
lines.append("")
# Success assessment
if target_value is not None:
lines.append("## Success Assessment")
lines.append("")
best_objective = min(objectives)
if best_objective <= tolerance:
lines.append(f"### ✅ TARGET ACHIEVED")
lines.append("")
lines.append(f"Target value {target_value} was achieved within tolerance {tolerance}!")
lines.append(f"- **Best Error**: {best_objective:.6f}")
else:
lines.append(f"### ⚠️ TARGET NOT YET ACHIEVED")
lines.append("")
lines.append(f"Target value {target_value} not achieved within tolerance {tolerance}")
lines.append(f"- **Best Error**: {best_objective:.6f}")
lines.append(f"- **Required Improvement**: {best_objective - tolerance:.6f}")
lines.append(f"- **Recommendation**: Continue optimization with more trials")
lines.append("")
# Top 5 trials - show ACTUAL METRICS not just objective
lines.append("## Top 5 Trials")
lines.append("")
sorted_history = sorted(history, key=lambda x: x['objective'])
# Extract result column names (e.g., "first_frequency")
result_cols = list(sorted_history[0]['results'].keys())
result_col_names = [r.replace('_', ' ').title() for r in result_cols]
# Build header with results AND design vars
header_cols = ["Rank", "Trial"] + result_col_names + [v.replace('_', ' ').title() for v in design_vars]
lines.append("| " + " | ".join(header_cols) + " |")
lines.append("|" + "|".join(["-"*max(6, len(c)) for c in header_cols]) + "|")
for i, trial in enumerate(sorted_history[:5], 1):
result_vals = [f"{trial['results'][r]:.2f}" for r in result_cols]
var_vals = [f"{trial['design_variables'][v]:.2f}" for v in design_vars]
row_data = [str(i), f"#{trial['trial_number']}"] + result_vals + var_vals
lines.append("| " + " | ".join(row_data) + " |")
lines.append("")
# Statistics
lines.append("## Statistics")
lines.append("")
lines.append(f"- **Mean Objective**: {np.mean(objectives):.6f}")
lines.append(f"- **Std Deviation**: {np.std(objectives):.6f}")
lines.append(f"- **Best Objective**: {np.min(objectives):.6f}")
lines.append(f"- **Worst Objective**: {np.max(objectives):.6f}")
lines.append("")
# Design variable ranges
lines.append("### Design Variable Ranges")
lines.append("")
for var in design_vars:
values = [t['design_variables'][var] for t in history]
lines.append(f"**{var.replace('_', ' ').title()}**:")
lines.append(f"- Min: {min(values):.6f}")
lines.append(f"- Max: {max(values):.6f}")
lines.append(f"- Mean: {np.mean(values):.6f}")
lines.append("")
# Convergence plot
lines.append("## Convergence Plot")
lines.append("")
lines.append(f"![Convergence Plot]({convergence_plot})")
lines.append("")
lines.append("This plot shows how the optimization converged over time. The blue line shows each trial's objective value, while the red line shows the best objective found so far.")
lines.append("")
# Design space plot
if design_space_plot:
lines.append("## Design Space Exploration")
lines.append("")
lines.append(f"![Design Space Plot]({design_space_plot})")
lines.append("")
lines.append("This plot shows which regions of the design space were explored. Darker colors indicate better objective values.")
lines.append("")
# Sensitivity plot
lines.append("## Parameter Sensitivity")
lines.append("")
lines.append(f"![Parameter Sensitivity]({sensitivity_plot})")
lines.append("")
lines.append("These plots show how each design variable affects the objective value. Steeper slopes indicate higher sensitivity.")
lines.append("")
# Optuna Advanced Visualizations
if optuna_plots:
lines.append("## Advanced Optimization Analysis (Optuna)")
lines.append("")
lines.append("The following plots leverage Optuna's professional visualization library to provide deeper insights into the optimization process.")
lines.append("")
# Parallel Coordinate Plot
if 'parallel_coordinate' in optuna_plots:
lines.append("### Parallel Coordinate Plot")
lines.append("")
lines.append(f"![Parallel Coordinate]({optuna_plots['parallel_coordinate']})")
lines.append("")
lines.append("This interactive plot shows how different parameter combinations lead to different objective values. Each line represents one trial, colored by objective value. You can see parameter interactions and identify promising regions.")
lines.append("")
# Optimization History
if 'optimization_history' in optuna_plots:
lines.append("### Optimization History")
lines.append("")
lines.append(f"![Optimization History]({optuna_plots['optimization_history']})")
lines.append("")
lines.append("Professional visualization of convergence over trials, showing both individual trial performance and best value progression.")
lines.append("")
# Parameter Importance
if 'param_importances' in optuna_plots:
lines.append("### Parameter Importance Analysis")
lines.append("")
lines.append(f"![Parameter Importance]({optuna_plots['param_importances']})")
lines.append("")
lines.append("This analysis quantifies which design variables have the most impact on the objective. Based on fANOVA (functional ANOVA) or other importance metrics.")
lines.append("")
# Slice Plot
if 'slice' in optuna_plots:
lines.append("### Parameter Slice Analysis")
lines.append("")
lines.append(f"![Slice Plot]({optuna_plots['slice']})")
lines.append("")
lines.append("Shows how changing each parameter individually affects the objective value, with other parameters held constant.")
lines.append("")
# Contour Plot
if 'contour' in optuna_plots:
lines.append("### Parameter Interaction Contour")
lines.append("")
lines.append(f"![Contour Plot]({optuna_plots['contour']})")
lines.append("")
lines.append("2D heatmap showing how combinations of two parameters affect the objective. Reveals interaction effects and optimal regions.")
lines.append("")
# Trial history table - show actual results
lines.append("## Complete Trial History")
lines.append("")
lines.append("<details>")
lines.append("<summary>Click to expand full trial history</summary>")
lines.append("")
# Build complete history table with results
history_header = ["Trial"] + result_col_names + [v.replace('_', ' ').title() for v in design_vars]
lines.append("| " + " | ".join(history_header) + " |")
lines.append("|" + "|".join(["-"*max(6, len(c)) for c in history_header]) + "|")
for trial in history:
result_vals = [f"{trial['results'][r]:.2f}" for r in result_cols]
var_vals = [f"{trial['design_variables'][v]:.2f}" for v in design_vars]
row_data = [f"#{trial['trial_number']}"] + result_vals + var_vals
lines.append("| " + " | ".join(row_data) + " |")
lines.append("")
lines.append("</details>")
lines.append("")
# Footer
lines.append("---")
lines.append("")
lines.append(f"*Report generated automatically by Atomizer optimization system*")
return '\n'.join(lines)
def main():
"""Command-line interface."""
if len(sys.argv) < 2:
print("Usage: python generate_report_markdown.py <history_file> [target_value] [tolerance]")
sys.exit(1)
history_file = Path(sys.argv[1])
if not history_file.exists():
print(f"Error: History file not found: {history_file}")
sys.exit(1)
target_value = float(sys.argv[2]) if len(sys.argv) > 2 else None
tolerance = float(sys.argv[3]) if len(sys.argv) > 3 else 0.1
# Generate report
report = generate_markdown_report(history_file, target_value, tolerance)
# Save report
report_file = history_file.parent / 'OPTIMIZATION_REPORT.md'
with open(report_file, 'w', encoding='utf-8') as f:
f.write(report)
print(f"Report saved to: {report_file}")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,914 @@
"""
Hybrid Mode Study Creator - Complete Automation
This module provides COMPLETE automation for creating optimization studies:
1. Creates proper study structure (1_setup, 2_substudies, 3_reports)
2. Runs benchmarking to validate simulation setup
3. Auto-generates runner from workflow JSON
4. Provides progress monitoring
No user intervention required after workflow JSON is created.
"""
from pathlib import Path
from typing import Dict, Any, Optional, List
import json
import shutil
from datetime import datetime
class HybridStudyCreator:
"""
Complete automation for Hybrid Mode study creation.
Usage:
creator = HybridStudyCreator()
study = creator.create_from_workflow(
workflow_json_path="path/to/workflow.json",
model_files={"prt": "path.prt", "sim": "path.sim", "fem": "path.fem"},
study_name="my_optimization"
)
"""
def __init__(self):
self.project_root = Path(__file__).parent.parent
def create_from_workflow(
self,
workflow_json_path: Path,
model_files: Dict[str, Path],
study_name: str,
output_parent: Optional[Path] = None
) -> Path:
"""
Create complete study from workflow JSON with full automation.
Args:
workflow_json_path: Path to workflow JSON config
model_files: Dict with keys 'prt', 'sim', 'fem' (and optionally 'fem_i')
study_name: Name for the study
output_parent: Parent directory for studies (default: project_root/studies)
Returns:
Path to created study directory
"""
print("="*80)
print(" HYBRID MODE - AUTOMATED STUDY CREATION")
print("="*80)
print()
# Step 1: Create study structure
print("[1/5] Creating study structure...")
study_dir = self._create_study_structure(study_name, output_parent)
print(f" [OK] Study directory: {study_dir.name}")
print()
# Step 2: Copy files
print("[2/5] Copying model files...")
self._copy_model_files(model_files, study_dir / "1_setup/model")
print(f" [OK] Copied {len(model_files)} files")
print()
# Step 3: Copy workflow JSON
print("[3/5] Installing workflow configuration...")
workflow_dest = study_dir / "1_setup/workflow_config.json"
shutil.copy2(workflow_json_path, workflow_dest)
with open(workflow_dest) as f:
workflow = json.load(f)
print(f" [OK] Workflow: {workflow.get('study_name', 'unnamed')}")
print(f" [OK] Variables: {len(workflow.get('design_variables', []))}")
print(f" [OK] Objectives: {len(workflow.get('objectives', []))}")
print()
# Step 4: Run benchmarking
print("[4/5] Running benchmarking (validating simulation setup)...")
benchmark_results = self._run_benchmarking(
study_dir / "1_setup/model" / model_files['prt'].name,
study_dir / "1_setup/model" / model_files['sim'].name,
workflow
)
if not benchmark_results['success']:
raise RuntimeError(f"Benchmarking failed: {benchmark_results['error']}")
print(f" [OK] Simulation validated")
print(f" [OK] Extracted {benchmark_results['n_results']} results")
print()
# Step 4.5: Generate configuration report
print("[4.5/5] Generating configuration report...")
self._generate_configuration_report(study_dir, workflow, benchmark_results)
print(f" [OK] Configuration report: 1_setup/CONFIGURATION_REPORT.md")
print()
# Step 5: Generate runner
print("[5/5] Generating optimization runner...")
runner_path = self._generate_runner(study_dir, workflow, benchmark_results)
print(f" [OK] Runner: {runner_path.name}")
print()
# Create README
self._create_readme(study_dir, workflow, benchmark_results)
print("="*80)
print(" STUDY CREATION COMPLETE")
print("="*80)
print()
print(f"Study location: {study_dir}")
print()
print("To run optimization:")
print(f" python {runner_path.relative_to(self.project_root)}")
print()
return study_dir
def _create_study_structure(self, study_name: str, output_parent: Optional[Path]) -> Path:
"""Create proper study folder structure."""
if output_parent is None:
output_parent = self.project_root / "studies"
study_dir = output_parent / study_name
# Create structure
(study_dir / "1_setup/model").mkdir(parents=True, exist_ok=True)
(study_dir / "2_results").mkdir(parents=True, exist_ok=True)
(study_dir / "3_reports").mkdir(parents=True, exist_ok=True)
return study_dir
def _copy_model_files(self, model_files: Dict[str, Path], dest_dir: Path):
"""Copy model files to study."""
for file_type, file_path in model_files.items():
if file_path and file_path.exists():
shutil.copy2(file_path, dest_dir / file_path.name)
def _run_benchmarking(
self,
prt_file: Path,
sim_file: Path,
workflow: Dict[str, Any]
) -> Dict[str, Any]:
"""
Run INTELLIGENT benchmarking to validate simulation setup.
This uses IntelligentSetup to:
1. Solve ALL solutions in the .sim file
2. Discover all available results
3. Match objectives to results automatically
4. Select optimal solution for optimization
Returns dict with:
- success: bool
- n_results: int (number of results extracted)
- results: dict (extracted values)
- solution_name: str (optimal solution to use for optimization)
- error: str (if failed)
"""
from optimization_engine.intelligent_setup import IntelligentSetup
try:
print(" Running INTELLIGENT benchmarking...")
print(" - Solving ALL solutions in .sim file")
print(" - Discovering all available results")
print(" - Matching objectives to results")
print()
# Run intelligent benchmarking
intelligent = IntelligentSetup()
benchmark_data = intelligent.run_complete_benchmarking(
prt_file, sim_file, workflow
)
if not benchmark_data['success']:
return {
'success': False,
'error': f"Intelligent benchmarking failed: {benchmark_data.get('error', 'Unknown')}"
}
# Display discovered information
print(f" [OK] Expressions found: {len(benchmark_data.get('expressions', {}))}")
print(f" [OK] Solutions found: {len(benchmark_data.get('solutions', {}))}")
print(f" [OK] Results discovered: {len(benchmark_data.get('available_results', {}))}")
# Display objective mapping
obj_mapping = benchmark_data.get('objective_mapping', {})
if 'objectives' in obj_mapping:
print(f" [OK] Objectives matched: {len(obj_mapping['objectives'])}")
for obj_name, obj_info in obj_mapping['objectives'].items():
solution = obj_info.get('solution', 'Unknown')
result_type = obj_info.get('result_type', 'Unknown')
confidence = obj_info.get('match_confidence', 'Unknown')
print(f" - {obj_name}: {result_type} from '{solution}' ({confidence} confidence)")
# Get recommended solution
recommended_solution = obj_mapping.get('primary_solution')
if recommended_solution:
print(f" [OK] Recommended solution: {recommended_solution}")
# Extract baseline values
extracted = {}
for obj in workflow.get('objectives', []):
extraction = obj.get('extraction', {})
action = extraction.get('action', '')
if 'frequency' in action.lower() or 'eigenvalue' in action.lower():
# Extract eigenvalues from discovered results
available_results = benchmark_data.get('available_results', {})
if 'eigenvalues' in available_results:
# Get op2 file from eigenvalues result
eig_result = available_results['eigenvalues']
op2_file = Path(eig_result['op2_path'])
freq = self._extract_frequency(op2_file, mode_number=1)
extracted['first_frequency'] = freq
print(f" Baseline first frequency: {freq:.4f} Hz")
elif 'displacement' in action.lower():
# Extract displacement from discovered results
available_results = benchmark_data.get('available_results', {})
if 'displacements' in available_results:
disp_result = available_results['displacements']
op2_file = Path(disp_result['op2_path'])
disp = self._extract_displacement(op2_file)
extracted['max_displacement'] = disp
print(f" Baseline max displacement: {disp:.6f} mm")
elif 'stress' in action.lower():
# Extract stress from discovered results
available_results = benchmark_data.get('available_results', {})
if 'stresses' in available_results:
stress_result = available_results['stresses']
op2_file = Path(stress_result['op2_path'])
stress = self._extract_stress(op2_file)
extracted['max_stress'] = stress
print(f" Baseline max stress: {stress:.2f} MPa")
return {
'success': True,
'n_results': len(extracted),
'results': extracted,
'solution_name': recommended_solution,
'benchmark_data': benchmark_data # Include full benchmarking data
}
except Exception as e:
return {
'success': False,
'error': str(e)
}
def _extract_frequency(self, op2_file: Path, mode_number: int = 1) -> float:
"""Extract eigenfrequency from OP2."""
from pyNastran.op2.op2 import OP2
import numpy as np
model = OP2()
model.read_op2(str(op2_file))
if not hasattr(model, 'eigenvalues') or len(model.eigenvalues) == 0:
raise ValueError("No eigenvalues found in OP2 file")
subcase = list(model.eigenvalues.keys())[0]
eig_obj = model.eigenvalues[subcase]
eigenvalue = eig_obj.eigenvalues[mode_number - 1]
angular_freq = np.sqrt(eigenvalue)
frequency_hz = angular_freq / (2 * np.pi)
return float(frequency_hz)
def _extract_displacement(self, op2_file: Path) -> float:
"""Extract max displacement from OP2."""
from pyNastran.op2.op2 import OP2
import numpy as np
model = OP2()
model.read_op2(str(op2_file))
if hasattr(model, 'displacements') and len(model.displacements) > 0:
subcase = list(model.displacements.keys())[0]
disp_obj = model.displacements[subcase]
translations = disp_obj.data[0, :, :3] # [time, node, tx/ty/tz]
magnitudes = np.linalg.norm(translations, axis=1)
return float(np.max(magnitudes))
raise ValueError("No displacements found in OP2 file")
def _extract_stress(self, op2_file: Path) -> float:
"""Extract max von Mises stress from OP2."""
from pyNastran.op2.op2 import OP2
import numpy as np
model = OP2()
model.read_op2(str(op2_file))
# Try different stress result locations
if hasattr(model, 'cquad4_stress') and len(model.cquad4_stress) > 0:
subcase = list(model.cquad4_stress.keys())[0]
stress_obj = model.cquad4_stress[subcase]
von_mises = stress_obj.data[0, :, 7] # von Mises typically at index 7
return float(np.max(von_mises))
raise ValueError("No stress results found in OP2 file")
def _generate_runner(
self,
study_dir: Path,
workflow: Dict[str, Any],
benchmark_results: Dict[str, Any]
) -> Path:
"""Generate optimization runner script."""
runner_path = study_dir / "run_optimization.py"
# Detect result types from workflow
extracts_frequency = any(
'frequency' in obj.get('extraction', {}).get('action', '').lower()
for obj in workflow.get('objectives', [])
)
# Generate extractor function based on workflow
extractor_code = self._generate_extractor_code(workflow)
runner_code = f'''"""
Auto-generated optimization runner
Created: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
"""
import sys
from pathlib import Path
# Add project root to path
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
import json
import optuna
from optimization_engine.nx_updater import NXParameterUpdater
from optimization_engine.nx_solver import NXSolver
{extractor_code}
def main():
print("="*80)
print(" {workflow.get('study_name', 'OPTIMIZATION').upper()}")
print("="*80)
print()
# Load workflow
config_file = Path(__file__).parent / "1_setup/workflow_config.json"
with open(config_file) as f:
workflow = json.load(f)
print("Workflow loaded:")
print(f" Request: {workflow.get('optimization_request', 'N/A')}")
print(f" Variables: {len(workflow.get('design_variables', []))}")
print()
# Setup paths
prt_file = Path(__file__).parent / "1_setup/model" / [f for f in (Path(__file__).parent / "1_setup/model").glob("*.prt")][0].name
sim_file = Path(__file__).parent / "1_setup/model" / [f for f in (Path(__file__).parent / "1_setup/model").glob("*.sim")][0].name
output_dir = Path(__file__).parent / "2_results"
reports_dir = Path(__file__).parent / "3_reports"
output_dir.mkdir(parents=True, exist_ok=True)
reports_dir.mkdir(parents=True, exist_ok=True)
# Initialize
updater = NXParameterUpdater(prt_file)
solver = NXSolver()
# Create Optuna study
study_name = "{workflow.get('study_name', 'optimization')}"
storage = f"sqlite:///{{output_dir / 'study.db'}}"
study = optuna.create_study(
study_name=study_name,
storage=storage,
load_if_exists=True,
direction="minimize"
)
# Initialize incremental history
history_file = output_dir / 'optimization_history_incremental.json'
history = []
if history_file.exists():
with open(history_file) as f:
history = json.load(f)
def objective(trial):
# Sample design variables
params = {{}}
for var in workflow['design_variables']:
name = var['parameter']
bounds = var['bounds']
params[name] = trial.suggest_float(name, bounds[0], bounds[1])
print(f"\\nTrial {{trial.number}}:")
for name, value in params.items():
print(f" {{name}} = {{value:.2f}}")
# Update model
updater.update_expressions(params)
# Run simulation with the optimal solution
result = solver.run_simulation(sim_file, solution_name="{benchmark_results.get('solution_name')}")
if not result['success']:
raise RuntimeError(f"Simulation failed: {{result.get('errors', 'Unknown')}}")
op2_file = result['op2_file']
# Extract results and calculate objective
results = extract_results(op2_file, workflow)
# Print results
for name, value in results.items():
print(f" {{name}} = {{value:.4f}}")
# Calculate objective (from first objective in workflow)
obj_config = workflow['objectives'][0]
result_name = list(results.keys())[0]
# For target-matching objectives, compute error from target
if 'target_frequency' in obj_config.get('extraction', {{}}).get('params', {{}}):
target = obj_config['extraction']['params']['target_frequency']
objective_value = abs(results[result_name] - target)
print(f" Frequency: {{results[result_name]:.4f}} Hz, Target: {{target}} Hz, Error: {{objective_value:.4f}} Hz")
elif obj_config['goal'] == 'minimize':
objective_value = results[result_name]
else:
objective_value = -results[result_name]
print(f" Objective = {{objective_value:.4f}}")
# Save to incremental history
trial_record = {{
'trial_number': trial.number,
'design_variables': params,
'results': results,
'objective': objective_value
}}
history.append(trial_record)
with open(history_file, 'w') as f:
json.dump(history, f, indent=2)
return objective_value
# Run optimization
n_trials = 10
print(f"\\nRunning {{n_trials}} trials...")
print("="*80)
print()
study.optimize(objective, n_trials=n_trials)
# Results
print()
print("="*80)
print(" OPTIMIZATION COMPLETE")
print("="*80)
print()
print(f"Best trial: #{{study.best_trial.number}}")
for name, value in study.best_params.items():
print(f" {{name}} = {{value:.2f}}")
print(f"\\nBest objective = {{study.best_value:.4f}}")
print()
# Generate human-readable markdown report with graphs
print("Generating optimization report...")
from optimization_engine.generate_report_markdown import generate_markdown_report
# Extract target frequency from workflow objectives
target_value = None
tolerance = 0.1
for obj in workflow.get('objectives', []):
if 'target_frequency' in obj.get('extraction', {{}}).get('params', {{}}):
target_value = obj['extraction']['params']['target_frequency']
break
# Generate markdown report with graphs
report = generate_markdown_report(history_file, target_value=target_value, tolerance=tolerance)
report_file = reports_dir / 'OPTIMIZATION_REPORT.md'
with open(report_file, 'w', encoding='utf-8') as f:
f.write(report)
print(f"✓ Markdown report with graphs saved to: {{report_file}}")
print()
if __name__ == "__main__":
main()
'''
with open(runner_path, 'w', encoding='utf-8') as f:
f.write(runner_code)
return runner_path
def _generate_extractor_code(self, workflow: Dict[str, Any]) -> str:
"""Generate extractor function based on workflow objectives."""
# Detect what needs to be extracted
needs_frequency = False
needs_displacement = False
needs_stress = False
for obj in workflow.get('objectives', []):
action = obj.get('extraction', {}).get('action', '').lower()
if 'frequency' in action or 'eigenvalue' in action:
needs_frequency = True
elif 'displacement' in action:
needs_displacement = True
elif 'stress' in action:
needs_stress = True
code = '''
def extract_results(op2_file, workflow):
"""Extract results from OP2 file based on workflow objectives."""
from pyNastran.op2.op2 import OP2
import numpy as np
model = OP2()
model.read_op2(str(op2_file))
results = {}
'''
if needs_frequency:
code += '''
# Extract first frequency
if hasattr(model, 'eigenvalues') and len(model.eigenvalues) > 0:
subcase = list(model.eigenvalues.keys())[0]
eig_obj = model.eigenvalues[subcase]
eigenvalue = eig_obj.eigenvalues[0]
angular_freq = np.sqrt(eigenvalue)
frequency_hz = angular_freq / (2 * np.pi)
results['first_frequency'] = float(frequency_hz)
else:
raise ValueError("No eigenvalues found in OP2 file")
'''
if needs_displacement:
code += '''
# Extract max displacement
if hasattr(model, 'displacements') and len(model.displacements) > 0:
subcase = list(model.displacements.keys())[0]
disp_obj = model.displacements[subcase]
translations = disp_obj.data[0, :, :3]
magnitudes = np.linalg.norm(translations, axis=1)
results['max_displacement'] = float(np.max(magnitudes))
'''
if needs_stress:
code += '''
# Extract max stress
if hasattr(model, 'cquad4_stress') and len(model.cquad4_stress) > 0:
subcase = list(model.cquad4_stress.keys())[0]
stress_obj = model.cquad4_stress[subcase]
von_mises = stress_obj.data[0, :, 7]
results['max_stress'] = float(np.max(von_mises))
'''
code += '''
return results
'''
return code
def _generate_configuration_report(
self,
study_dir: Path,
workflow: Dict[str, Any],
benchmark_results: Dict[str, Any]
):
"""
Generate a comprehensive configuration report with ALL setup details.
This creates 1_setup/CONFIGURATION_REPORT.md with:
- User's optimization request
- All discovered expressions
- All discovered solutions
- All available result types
- Objective matching details
- Baseline values
- Warnings and issues
"""
report_path = study_dir / "1_setup" / "CONFIGURATION_REPORT.md"
# Get benchmark data
benchmark_data = benchmark_results.get('benchmark_data', {})
expressions = benchmark_data.get('expressions', {})
solutions = benchmark_data.get('solutions', {})
available_results = benchmark_data.get('available_results', {})
obj_mapping = benchmark_data.get('objective_mapping', {})
# Build expressions section
expressions_md = "## Model Expressions\n\n"
if expressions:
expressions_md += f"**Total expressions found: {len(expressions)}**\n\n"
expressions_md += "| Expression Name | Current Value | Units | Formula |\n"
expressions_md += "|----------------|---------------|-------|----------|\n"
for name, info in sorted(expressions.items()):
value = info.get('value', 'N/A')
units = info.get('units', '')
formula = info.get('formula', '')
expressions_md += f"| {name} | {value} | {units} | {formula} |\n"
else:
expressions_md += "*No expressions found in model*\n"
# Build solutions section
solutions_md = "## Simulation Solutions\n\n"
if solutions:
# Handle both old format (solution_names list) and new format (dict)
if isinstance(solutions, dict):
if 'solution_names' in solutions:
# Old format: just solution names
solution_names = solutions.get('solution_names', [])
num_solved = solutions.get('num_solved', 0)
num_failed = solutions.get('num_failed', 0)
num_skipped = solutions.get('num_skipped', 0)
solutions_md += f"**Solutions discovered**: {len(solution_names)}\n"
solutions_md += f"**Solved**: {num_solved} | **Failed**: {num_failed} | **Skipped**: {num_skipped}\n\n"
if solution_names:
for sol_name in solution_names:
solutions_md += f"- {sol_name}\n"
else:
solutions_md += "*No solution names retrieved*\n"
else:
# New format: dict of solution details
solutions_md += f"**Total solutions found: {len(solutions)}**\n\n"
for sol_name, sol_info in solutions.items():
solutions_md += f"### {sol_name}\n\n"
solutions_md += f"- **Type**: {sol_info.get('type', 'Unknown')}\n"
solutions_md += f"- **OP2 File**: `{sol_info.get('op2_path', 'N/A')}`\n\n"
else:
solutions_md += "*No solutions discovered - check if benchmarking solved all solutions*\n"
# Build available results section
results_md = "## Available Results\n\n"
if available_results:
results_md += f"**Total result types discovered: {len(available_results)}**\n\n"
for result_type, result_info in available_results.items():
results_md += f"### {result_type}\n\n"
results_md += f"- **Solution**: {result_info.get('solution', 'Unknown')}\n"
results_md += f"- **OP2 File**: `{result_info.get('op2_path', 'N/A')}`\n"
if 'sample_value' in result_info:
results_md += f"- **Sample Value**: {result_info['sample_value']}\n"
results_md += "\n"
else:
results_md += "*No results discovered - check if simulations solved successfully*\n"
# Build objective matching section
matching_md = "## Objective Matching\n\n"
if 'objectives' in obj_mapping and obj_mapping['objectives']:
matching_md += f"**Objectives matched: {len(obj_mapping['objectives'])}**\n\n"
for obj_name, obj_info in obj_mapping['objectives'].items():
solution = obj_info.get('solution', 'NONE')
result_type = obj_info.get('result_type', 'Unknown')
confidence = obj_info.get('match_confidence', 'Unknown')
extractor = obj_info.get('extractor', 'Unknown')
op2_file = obj_info.get('op2_file', 'N/A')
error = obj_info.get('error', None)
matching_md += f"### {obj_name}\n\n"
matching_md += f"- **Result Type**: {result_type}\n"
matching_md += f"- **Solution**: {solution}\n"
matching_md += f"- **Confidence**: {confidence}\n"
matching_md += f"- **Extractor**: `{extractor}`\n"
matching_md += f"- **OP2 File**: `{op2_file}`\n"
if error:
matching_md += f"- **⚠️ ERROR**: {error}\n"
matching_md += "\n"
# Add primary solution
primary_solution = obj_mapping.get('primary_solution')
if primary_solution:
matching_md += f"**Primary Solution Selected**: `{primary_solution}`\n\n"
matching_md += "This solution will be used for optimization.\n\n"
else:
matching_md += "*No objectives matched - check workflow configuration*\n"
# Build baseline values section
baseline_md = "## Baseline Values\n\n"
baseline_results = benchmark_results.get('results', {})
if baseline_results:
baseline_md += "Values extracted from the initial (unoptimized) model:\n\n"
for key, value in baseline_results.items():
baseline_md += f"- **{key}**: {value}\n"
else:
baseline_md += "*No baseline values extracted*\n"
# Build warnings section
warnings_md = "## Warnings and Issues\n\n"
warnings = []
# Check for missing eigenvalues
for obj_name, obj_info in obj_mapping.get('objectives', {}).items():
if obj_info.get('error'):
warnings.append(f"- ⚠️ **{obj_name}**: {obj_info['error']}")
# Check for no solutions
if not solutions:
warnings.append("- ⚠️ **No solutions discovered**: Benchmarking may not have solved all solutions")
# Check for no results
if not available_results:
warnings.append("- ⚠️ **No results available**: Check if simulations ran successfully")
if warnings:
warnings_md += "\n".join(warnings) + "\n"
else:
warnings_md += "✅ No issues detected!\n"
# Build full report
content = f'''# Configuration Report
**Study**: {workflow.get('study_name', study_dir.name)}
**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
---
## Optimization Request
**User's Goal**:
> {workflow.get('optimization_request', '*No description provided*')}
**Design Variables**: {len(workflow.get('design_variables', []))}
| Variable | Min | Max |
|----------|-----|-----|
'''
for var in workflow.get('design_variables', []):
param = var.get('parameter', 'Unknown')
bounds = var.get('bounds', [0, 0])
content += f"| {param} | {bounds[0]} | {bounds[1]} |\n"
content += f'''
**Objectives**: {len(workflow.get('objectives', []))}
| Objective | Goal |
|-----------|------|
'''
for obj in workflow.get('objectives', []):
obj_name = obj.get('name', 'Unknown')
goal = obj.get('goal', 'Unknown')
content += f"| {obj_name} | {goal} |\n"
content += f'''
---
{expressions_md}
---
{solutions_md}
---
{results_md}
---
{matching_md}
---
{baseline_md}
---
{warnings_md}
---
## Next Steps
1. ✅ Study structure created
2. ✅ Benchmarking complete
3. ✅ Configuration validated
4. ➡️ **Run optimization**: `python run_optimization.py`
---
*This report was auto-generated by the Intelligent Setup System*
'''
with open(report_path, 'w', encoding='utf-8') as f:
f.write(content)
def _create_readme(
self,
study_dir: Path,
workflow: Dict[str, Any],
benchmark_results: Dict[str, Any]
):
"""Create README for the study."""
readme_path = study_dir / "README.md"
# Format design variables
vars_md = ""
for var in workflow.get('design_variables', []):
bounds = var.get('bounds', [0, 1])
desc = var.get('description', '')
vars_md += f"- `{var['parameter']}`: {bounds[0]}-{bounds[1]} mm"
if desc:
vars_md += f" - {desc}"
vars_md += "\n"
# Format objectives
objs_md = ""
for obj in workflow.get('objectives', []):
objs_md += f"- {obj['goal'].title()} {obj['name']}\n"
# Format benchmark results
bench_md = ""
if benchmark_results.get('success'):
for name, value in benchmark_results.get('results', {}).items():
bench_md += f"- {name}: {value:.4f}\n"
content = f'''# {workflow.get('study_name', 'Optimization Study')}
**Created**: {datetime.now().strftime("%Y-%m-%d")}
**Mode**: Hybrid (Workflow JSON + Auto-generated runner)
## Problem Description
{workflow.get('optimization_request', 'N/A')}
### Design Variables
{vars_md}
### Objectives
{objs_md}
## Benchmark Results
Baseline simulation (default geometry):
{bench_md}
## Study Structure
```
{study_dir.name}/
├── 1_setup/
│ ├── model/ # FEM model files
│ └── workflow_config.json # Optimization specification
├── 2_substudies/
│ └── results/ # Optimization results
├── 3_reports/
├── run_optimization.py # Auto-generated runner
└── README.md # This file
```
## Running the Optimization
```bash
python run_optimization.py
```
This will:
1. Load workflow configuration
2. Initialize NX model updater and solver
3. Run {10} optimization trials
4. Save results to `2_substudies/results/`
## Results
After optimization completes, check:
- `2_substudies/results/study.db` - Optuna database
- `2_substudies/results/` - Best design parameters
---
**Created by Hybrid Mode** - 90% automation, production ready!
'''
with open(readme_path, 'w', encoding='utf-8') as f:
f.write(content)
if __name__ == "__main__":
# Example usage
creator = HybridStudyCreator()
# Example: Create study from workflow JSON
study_dir = creator.create_from_workflow(
workflow_json_path=Path("path/to/workflow.json"),
model_files={
'prt': Path("path/to/model.prt"),
'sim': Path("path/to/model.sim"),
'fem': Path("path/to/model.fem")
},
study_name="example_study"
)
print(f"Study created: {study_dir}")

View File

@@ -0,0 +1,694 @@
"""
Intelligent Setup System for Atomizer
This module provides COMPLETE autonomy for optimization setup:
1. Solves ALL solutions in .sim file
2. Discovers all available results (eigenvalues, displacements, stresses, etc.)
3. Catalogs expressions and parameters
4. Matches workflow objectives to available results
5. Auto-selects correct solution for optimization
6. Generates optimized runner code
This is the level of intelligence Atomizer should have.
"""
from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple
import json
from datetime import datetime
class IntelligentSetup:
"""
Intelligent benchmarking and setup system.
Proactively discovers EVERYTHING about a simulation:
- All solutions (Static, Modal, Buckling, etc.)
- All result types (displacements, stresses, eigenvalues, etc.)
- All expressions and parameters
- Matches user objectives to available data
"""
def __init__(self):
self.project_root = Path(__file__).parent.parent
def run_complete_benchmarking(
self,
prt_file: Path,
sim_file: Path,
workflow: Dict[str, Any]
) -> Dict[str, Any]:
"""
Run COMPLETE benchmarking:
1. Extract ALL expressions from .prt
2. Solve ALL solutions in .sim
3. Analyze ALL result files
4. Match objectives to available results
5. Determine optimal solution for each objective
Returns:
Complete catalog of available data and recommendations
"""
print()
print("="*80)
print(" INTELLIGENT SETUP - COMPLETE ANALYSIS")
print("="*80)
print()
results = {
'success': False,
'expressions': {},
'solutions': {},
'available_results': {},
'objective_mapping': {},
'recommended_solution': None,
'errors': []
}
try:
# Phase 1: Extract ALL expressions
print("[Phase 1/4] Extracting ALL expressions from model...")
expressions = self._extract_all_expressions(prt_file)
results['expressions'] = expressions
print(f" [OK] Found {len(expressions)} expressions")
for name, info in list(expressions.items())[:5]:
val = info.get('value', 'N/A')
units = info.get('units', '')
print(f" - {name}: {val} {units}")
if len(expressions) > 5:
print(f" ... and {len(expressions) - 5} more")
print()
# Phase 2: Solve ALL solutions
print("[Phase 2/4] Solving ALL solutions in .sim file...")
solutions_info = self._solve_all_solutions(sim_file)
results['solutions'] = solutions_info
print(f" [OK] Solved {solutions_info['num_solved']} solutions")
for sol_name in solutions_info['solution_names']:
print(f" - {sol_name}")
print()
# Phase 3: Analyze ALL result files
print("[Phase 3/4] Analyzing ALL result files...")
available_results = self._analyze_all_results(sim_file.parent, solutions_info)
results['available_results'] = available_results
print(f" [OK] Found {len(available_results)} result files")
for result_type, details in available_results.items():
print(f" - {result_type}: {details['count']} entries in {details['file']}")
print()
# Phase 4: Match objectives to results
print("[Phase 4/4] Matching objectives to available results...")
mapping = self._match_objectives_to_results(workflow, available_results, solutions_info)
results['objective_mapping'] = mapping
results['recommended_solution'] = mapping.get('primary_solution')
print(f" [OK] Objective mapping complete")
for obj_name, obj_info in mapping['objectives'].items():
print(f" - {obj_name}")
print(f" Solution: {obj_info.get('solution', 'NONE')}")
print(f" Result type: {obj_info.get('result_type', 'Unknown')}")
print(f" Extractor: {obj_info.get('extractor', 'Unknown')}")
if 'error' in obj_info:
print(f" [WARNING] {obj_info['error']}")
print()
if mapping.get('primary_solution'):
print(f" [RECOMMENDATION] Use solution: {mapping['primary_solution']}")
print()
results['success'] = True
except Exception as e:
results['errors'].append(str(e))
print(f" [ERROR] {e}")
print()
print("="*80)
print(" ANALYSIS COMPLETE")
print("="*80)
print()
return results
def _extract_all_expressions(self, prt_file: Path) -> Dict[str, Any]:
"""Extract ALL expressions from .prt file."""
from optimization_engine.nx_updater import NXParameterUpdater
updater = NXParameterUpdater(prt_file)
return updater.get_all_expressions()
def _solve_all_solutions(self, sim_file: Path) -> Dict[str, Any]:
"""
Solve ALL solutions in .sim file using NXOpen journal approach.
CRITICAL: This method updates the .fem file from the .prt before solving!
This is required when geometry changes (modal analysis, etc.)
Returns dict with:
- num_solved: int
- num_failed: int
- num_skipped: int
- solution_names: List[str]
"""
# Create journal to solve all solutions
journal_code = f'''
import sys
import NXOpen
import NXOpen.CAE
def main(args):
if len(args) < 1:
print("ERROR: No .sim file path provided")
return False
sim_file_path = args[0]
theSession = NXOpen.Session.GetSession()
# Open the .sim file
print(f"[JOURNAL] Opening simulation: {{sim_file_path}}")
basePart1, partLoadStatus1 = theSession.Parts.OpenActiveDisplay(
sim_file_path,
NXOpen.DisplayPartOption.AllowAdditional
)
partLoadStatus1.Dispose()
workSimPart = theSession.Parts.BaseWork
print(f"[JOURNAL] Simulation opened successfully")
# CRITICAL: Update FEM from master model (.prt)
# This is required when geometry has changed (modal analysis, etc.)
print("[JOURNAL] Updating FEM from master model...")
simSimulation = workSimPart.Simulation
# Get all FEModels and update them
femModels = simSimulation.FemParts
for i in range(femModels.Length):
femPart = femModels.Item(i)
print(f"[JOURNAL] Updating FEM: {{femPart.Name}}")
# Update the FEM from associated CAD part
femPart.UpdateFemodel()
# Save after FEM update
print("[JOURNAL] Saving after FEM update...")
partSaveStatus = workSimPart.Save(
NXOpen.BasePart.SaveComponents.TrueValue,
NXOpen.BasePart.CloseAfterSave.FalseValue
)
partSaveStatus.Dispose()
# Get all solutions
theCAESimSolveManager = NXOpen.CAE.SimSolveManager.GetSimSolveManager(theSession)
# Solve all solutions
print("[JOURNAL] Solving ALL solutions...")
num_solved, num_failed, num_skipped = theCAESimSolveManager.SolveAllSolutions(
NXOpen.CAE.SimSolution.SolveOption.Solve,
NXOpen.CAE.SimSolution.SetupCheckOption.CompleteCheckAndOutputErrors,
NXOpen.CAE.SimSolution.SolveMode.Foreground,
False
)
# Get solution names
simSimulation = workSimPart.FindObject("Simulation")
solutions = []
for obj in simSimulation.GetAllDescendents():
if "Solution[" in str(obj):
solutions.append(str(obj))
# Save to write output files
print("[JOURNAL] Saving simulation to write output files...")
partSaveStatus = workSimPart.Save(
NXOpen.BasePart.SaveComponents.TrueValue,
NXOpen.BasePart.CloseAfterSave.FalseValue
)
partSaveStatus.Dispose()
# Output results
print(f"ATOMIZER_SOLUTIONS_SOLVED: {{num_solved}}")
print(f"ATOMIZER_SOLUTIONS_FAILED: {{num_failed}}")
print(f"ATOMIZER_SOLUTIONS_SKIPPED: {{num_skipped}}")
for sol in solutions:
print(f"ATOMIZER_SOLUTION: {{sol}}")
return True
if __name__ == '__main__':
success = main(sys.argv[1:])
sys.exit(0 if success else 1)
'''
# Write and execute journal
journal_path = sim_file.parent / "_solve_all_solutions.py"
with open(journal_path, 'w') as f:
f.write(journal_code)
# Run journal via NX
from optimization_engine.nx_solver import NXSolver
solver = NXSolver()
import subprocess
from config import NX_RUN_JOURNAL
result = subprocess.run(
[str(NX_RUN_JOURNAL), str(journal_path), str(sim_file)],
capture_output=True,
text=True,
timeout=600
)
# Parse output
num_solved = 0
num_failed = 0
num_skipped = 0
solution_names = []
for line in result.stdout.split('\n'):
if 'ATOMIZER_SOLUTIONS_SOLVED:' in line:
num_solved = int(line.split(':')[1].strip())
elif 'ATOMIZER_SOLUTIONS_FAILED:' in line:
num_failed = int(line.split(':')[1].strip())
elif 'ATOMIZER_SOLUTIONS_SKIPPED:' in line:
num_skipped = int(line.split(':')[1].strip())
elif 'ATOMIZER_SOLUTION:' in line:
sol_name = line.split(':', 1)[1].strip()
solution_names.append(sol_name)
# Clean up
journal_path.unlink()
return {
'num_solved': num_solved,
'num_failed': num_failed,
'num_skipped': num_skipped,
'solution_names': solution_names
}
def _analyze_all_results(
self,
model_dir: Path,
solutions_info: Dict[str, Any]
) -> Dict[str, Any]:
"""
Analyze ALL .op2 files to discover available results.
Returns dict mapping result types to details:
{
'eigenvalues': {'file': 'xxx.op2', 'count': 10, 'solution': 'Modal'},
'displacements': {'file': 'yyy.op2', 'count': 613, 'solution': 'Static'},
'stress_quad4': {'file': 'yyy.op2', 'count': 561, 'solution': 'Static'},
...
}
"""
from pyNastran.op2.op2 import OP2
available = {}
# Find all .op2 files
op2_files = list(model_dir.glob("*.op2"))
for op2_file in op2_files:
try:
model = OP2()
model.read_op2(str(op2_file))
# Check for eigenvalues
if hasattr(model, 'eigenvalues') and len(model.eigenvalues) > 0:
subcase = list(model.eigenvalues.keys())[0]
eig_obj = model.eigenvalues[subcase]
available['eigenvalues'] = {
'file': op2_file.name,
'count': len(eig_obj.eigenvalues),
'solution': self._guess_solution_from_filename(op2_file.name),
'op2_path': op2_file
}
# Check for displacements
if hasattr(model, 'displacements') and len(model.displacements) > 0:
subcase = list(model.displacements.keys())[0]
disp_obj = model.displacements[subcase]
available['displacements'] = {
'file': op2_file.name,
'count': disp_obj.data.shape[1], # Number of nodes
'solution': self._guess_solution_from_filename(op2_file.name),
'op2_path': op2_file
}
# Check for stresses
if hasattr(model, 'cquad4_stress') and len(model.cquad4_stress) > 0:
subcase = list(model.cquad4_stress.keys())[0]
stress_obj = model.cquad4_stress[subcase]
available['stress_quad4'] = {
'file': op2_file.name,
'count': stress_obj.data.shape[1], # Number of elements
'solution': self._guess_solution_from_filename(op2_file.name),
'op2_path': op2_file
}
# Check for forces
if hasattr(model, 'cquad4_force') and len(model.cquad4_force) > 0:
available['force_quad4'] = {
'file': op2_file.name,
'count': len(model.cquad4_force),
'solution': self._guess_solution_from_filename(op2_file.name),
'op2_path': op2_file
}
except Exception as e:
print(f" [WARNING] Could not analyze {op2_file.name}: {e}")
return available
def _guess_solution_from_filename(self, filename: str) -> str:
"""Guess solution type from filename."""
filename_lower = filename.lower()
if 'normal_modes' in filename_lower or 'modal' in filename_lower:
return 'Solution_Normal_Modes'
elif 'buckling' in filename_lower:
return 'Solution_Buckling'
elif 'static' in filename_lower or 'solution_1' in filename_lower:
return 'Solution_1'
else:
return 'Unknown'
def _match_objectives_to_results(
self,
workflow: Dict[str, Any],
available_results: Dict[str, Any],
solutions_info: Dict[str, Any]
) -> Dict[str, Any]:
"""
Intelligently match workflow objectives to available results.
Returns:
{
'objectives': {
'obj_name': {
'solution': 'Solution_Normal_Modes',
'result_type': 'eigenvalues',
'extractor': 'extract_first_frequency',
'op2_file': Path(...)
}
},
'primary_solution': 'Solution_Normal_Modes' # Most important solution
}
"""
mapping = {
'objectives': {},
'primary_solution': None
}
for obj in workflow.get('objectives', []):
obj_name = obj.get('name', 'unnamed')
extraction = obj.get('extraction', {})
action = extraction.get('action', '').lower()
# Match based on objective type
if 'frequency' in action or 'eigenvalue' in action or 'modal' in action:
if 'eigenvalues' in available_results:
result_info = available_results['eigenvalues']
mapping['objectives'][obj_name] = {
'solution': result_info['solution'],
'result_type': 'eigenvalues',
'extractor': 'extract_first_frequency',
'op2_file': result_info['op2_path'],
'match_confidence': 'HIGH'
}
if not mapping['primary_solution']:
mapping['primary_solution'] = result_info['solution']
else:
mapping['objectives'][obj_name] = {
'solution': 'NONE',
'result_type': 'eigenvalues',
'extractor': 'extract_first_frequency',
'op2_file': None,
'match_confidence': 'ERROR',
'error': 'No eigenvalue results found - check if modal solution exists'
}
elif 'displacement' in action or 'deflection' in action:
if 'displacements' in available_results:
result_info = available_results['displacements']
mapping['objectives'][obj_name] = {
'solution': result_info['solution'],
'result_type': 'displacements',
'extractor': 'extract_max_displacement',
'op2_file': result_info['op2_path'],
'match_confidence': 'HIGH'
}
if not mapping['primary_solution']:
mapping['primary_solution'] = result_info['solution']
elif 'stress' in action or 'von_mises' in action:
if 'stress_quad4' in available_results:
result_info = available_results['stress_quad4']
mapping['objectives'][obj_name] = {
'solution': result_info['solution'],
'result_type': 'stress',
'extractor': 'extract_max_stress',
'op2_file': result_info['op2_path'],
'match_confidence': 'HIGH'
}
if not mapping['primary_solution']:
mapping['primary_solution'] = result_info['solution']
return mapping
def generate_intelligent_runner(
self,
study_dir: Path,
workflow: Dict[str, Any],
benchmark_results: Dict[str, Any]
) -> Path:
"""
Generate optimized runner based on intelligent analysis.
Uses benchmark results to:
1. Select correct solution to solve
2. Generate correct extractors
3. Optimize for speed (only solve what's needed)
"""
runner_path = study_dir / "run_optimization.py"
# Get recommended solution
recommended_solution = benchmark_results.get('recommended_solution', 'Solution_1')
objective_mapping = benchmark_results.get('objective_mapping', {})
# Generate extractor functions based on actual available results
extractor_code = self._generate_intelligent_extractors(objective_mapping)
runner_code = f'''"""
Auto-generated INTELLIGENT optimization runner
Created: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
Intelligently configured based on complete benchmarking:
- Solution: {recommended_solution}
- Extractors: Auto-matched to available results
"""
import sys
from pathlib import Path
# Add project root to path
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
import json
import optuna
from optimization_engine.nx_updater import NXParameterUpdater
from optimization_engine.nx_solver import NXSolver
{extractor_code}
def main():
print("="*80)
print(" {workflow.get('study_name', 'OPTIMIZATION').upper()}")
print(" Intelligent Setup - Auto-configured")
print("="*80)
print()
# Load workflow
config_file = Path(__file__).parent / "1_setup/workflow_config.json"
with open(config_file) as f:
workflow = json.load(f)
print("Configuration:")
print(f" Target solution: {recommended_solution}")
print(f" Objectives: {len(workflow.get('objectives', []))}")
print(f" Variables: {len(workflow.get('design_variables', []))}")
print()
# Setup paths
model_dir = Path(__file__).parent / "1_setup/model"
prt_file = list(model_dir.glob("*.prt"))[0]
sim_file = list(model_dir.glob("*.sim"))[0]
output_dir = Path(__file__).parent / "2_substudies/results"
output_dir.mkdir(parents=True, exist_ok=True)
# Initialize
updater = NXParameterUpdater(prt_file)
solver = NXSolver()
# Create Optuna study
study_name = "{workflow.get('study_name', 'optimization')}"
storage = f"sqlite:///{{output_dir / 'study.db'}}"
study = optuna.create_study(
study_name=study_name,
storage=storage,
load_if_exists=True,
direction="minimize"
)
def objective(trial):
# Sample design variables
params = {{}}
for var in workflow['design_variables']:
name = var['parameter']
bounds = var['bounds']
params[name] = trial.suggest_float(name, bounds[0], bounds[1])
print(f"\\nTrial {{trial.number}}:")
for name, value in params.items():
print(f" {{name}} = {{value:.2f}}")
# Update model
updater.update_expressions(params)
# Run SPECIFIC solution (optimized - only what's needed)
result = solver.run_simulation(
sim_file,
solution_name="{recommended_solution}"
)
if not result['success']:
raise RuntimeError(f"Simulation failed: {{result.get('errors', 'Unknown')}}")
op2_file = result['op2_file']
# Extract results
results = extract_results(op2_file, workflow)
# Print results
for name, value in results.items():
print(f" {{name}} = {{value:.4f}}")
# Calculate objective
obj_config = workflow['objectives'][0]
result_name = list(results.keys())[0]
if obj_config['goal'] == 'minimize':
objective_value = results[result_name]
else:
objective_value = -results[result_name]
print(f" Objective = {{objective_value:.4f}}")
return objective_value
# Run optimization
n_trials = 10
print(f"\\nRunning {{n_trials}} trials...")
print("="*80)
print()
study.optimize(objective, n_trials=n_trials)
# Results
print()
print("="*80)
print(" OPTIMIZATION COMPLETE")
print("="*80)
print()
print(f"Best trial: #{{study.best_trial.number}}")
for name, value in study.best_params.items():
print(f" {{name}} = {{value:.2f}}")
print(f"\\nBest objective = {{study.best_value:.4f}}")
print()
if __name__ == "__main__":
main()
'''
with open(runner_path, 'w') as f:
f.write(runner_code)
return runner_path
def _generate_intelligent_extractors(self, objective_mapping: Dict[str, Any]) -> str:
"""Generate extractor functions based on intelligent mapping."""
extractors = set()
for obj_name, obj_info in objective_mapping.get('objectives', {}).items():
if 'extractor' in obj_info:
extractors.add(obj_info['extractor'])
code = '''
def extract_results(op2_file, workflow):
"""Intelligently extract results based on benchmarking."""
from pyNastran.op2.op2 import OP2
import numpy as np
model = OP2()
model.read_op2(str(op2_file))
results = {}
'''
if 'extract_first_frequency' in extractors:
code += '''
# Extract first frequency (auto-matched to eigenvalues)
if hasattr(model, 'eigenvalues') and len(model.eigenvalues) > 0:
subcase = list(model.eigenvalues.keys())[0]
eig_obj = model.eigenvalues[subcase]
eigenvalue = eig_obj.eigenvalues[0]
angular_freq = np.sqrt(eigenvalue)
frequency_hz = angular_freq / (2 * np.pi)
results['first_frequency'] = float(frequency_hz)
'''
if 'extract_max_displacement' in extractors:
code += '''
# Extract max displacement (auto-matched to displacements)
if hasattr(model, 'displacements') and len(model.displacements) > 0:
subcase = list(model.displacements.keys())[0]
disp_obj = model.displacements[subcase]
translations = disp_obj.data[0, :, :3]
magnitudes = np.linalg.norm(translations, axis=1)
results['max_displacement'] = float(np.max(magnitudes))
'''
if 'extract_max_stress' in extractors:
code += '''
# Extract max stress (auto-matched to stress results)
if hasattr(model, 'cquad4_stress') and len(model.cquad4_stress) > 0:
subcase = list(model.cquad4_stress.keys())[0]
stress_obj = model.cquad4_stress[subcase]
von_mises = stress_obj.data[0, :, 7]
results['max_stress'] = float(np.max(von_mises))
'''
code += '''
return results
'''
return code
if __name__ == "__main__":
# Example usage
setup = IntelligentSetup()
# Run complete analysis
results = setup.run_complete_benchmarking(
prt_file=Path("path/to/model.prt"),
sim_file=Path("path/to/model.sim"),
workflow={'objectives': [{'name': 'freq', 'extraction': {'action': 'extract_frequency'}}]}
)
print("Analysis complete:")
print(json.dumps(results, indent=2, default=str))

View File

@@ -0,0 +1,386 @@
"""
Landscape Analyzer - Automatic optimization problem characterization.
This module analyzes the characteristics of an optimization landscape to inform
intelligent strategy selection. It computes metrics like smoothness, multimodality,
parameter correlation, and noise level.
Part of Protocol 10: Intelligent Multi-Strategy Optimization (IMSO)
"""
import numpy as np
from typing import Dict, List, Optional
from scipy.stats import spearmanr, variation
from scipy.spatial.distance import pdist, squareform
from sklearn.cluster import DBSCAN
import optuna
class LandscapeAnalyzer:
"""Analyzes optimization landscape characteristics from trial history."""
def __init__(self, min_trials_for_analysis: int = 10, verbose: bool = True):
"""
Args:
min_trials_for_analysis: Minimum trials needed for reliable analysis
verbose: Whether to print diagnostic messages
"""
self.min_trials = min_trials_for_analysis
self.verbose = verbose
def analyze(self, study: optuna.Study) -> Dict:
"""
Analyze optimization landscape characteristics.
STUDY-AWARE: Uses study.trials directly for analysis.
Args:
study: Optuna study with completed trials
Returns:
Dictionary with landscape characteristics:
- smoothness: 0-1, how smooth the objective landscape is
- multimodal: boolean, multiple local optima detected
- n_modes: estimated number of local optima
- parameter_correlation: dict of correlation scores
- noise_level: estimated noise in evaluations
- dimensionality: number of design variables
- landscape_type: classification (smooth/rugged/multimodal)
"""
# Get completed trials
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if len(completed_trials) < self.min_trials:
return {
'ready': False,
'total_trials': len(completed_trials),
'message': f'Need {self.min_trials - len(completed_trials)} more trials for landscape analysis'
}
# Check if this is a multi-objective study
# Multi-objective studies have trial.values (plural), not trial.value
is_multi_objective = len(study.directions) > 1
if is_multi_objective:
return {
'ready': False,
'total_trials': len(completed_trials),
'message': 'Landscape analysis not supported for multi-objective optimization'
}
# Extract data
X = [] # Parameter values
y = [] # Objective values
param_names = []
for trial in completed_trials:
X.append(list(trial.params.values()))
y.append(trial.value)
if not param_names:
param_names = list(trial.params.keys())
X = np.array(X)
y = np.array(y)
# Compute characteristics
smoothness = self._compute_smoothness(X, y)
multimodal, n_modes = self._detect_multimodality(X, y)
correlation_scores = self._compute_parameter_correlation(X, y, param_names)
noise_level = self._estimate_noise(X, y)
landscape_type = self._classify_landscape(smoothness, multimodal, noise_level, n_modes)
# Compute parameter ranges for coverage metrics
param_ranges = self._compute_parameter_ranges(completed_trials)
return {
'ready': True,
'total_trials': len(completed_trials),
'dimensionality': X.shape[1],
'parameter_names': param_names,
'smoothness': smoothness,
'multimodal': multimodal,
'n_modes': n_modes,
'parameter_correlation': correlation_scores,
'noise_level': noise_level,
'landscape_type': landscape_type,
'parameter_ranges': param_ranges,
'objective_statistics': {
'mean': float(np.mean(y)),
'std': float(np.std(y)),
'min': float(np.min(y)),
'max': float(np.max(y)),
'range': float(np.max(y) - np.min(y))
}
}
def _compute_smoothness(self, X: np.ndarray, y: np.ndarray) -> float:
"""
Compute landscape smoothness score.
High smoothness = nearby points have similar objective values
Low smoothness = nearby points have very different values (rugged)
Method: Compare objective differences vs parameter distances
"""
if len(y) < 3:
return 0.5 # Unknown
# Normalize parameters to [0, 1] for fair distance computation
X_norm = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0) + 1e-10)
# Compute pairwise distances in parameter space
param_distances = pdist(X_norm, metric='euclidean')
# Compute pairwise differences in objective space
objective_diffs = pdist(y.reshape(-1, 1), metric='euclidean')
# Smoothness = correlation between distance and objective difference
# Smooth landscape: nearby points → similar objectives (high correlation)
# Rugged landscape: nearby points → very different objectives (low correlation)
if len(param_distances) > 0 and len(objective_diffs) > 0:
# Filter out zero distances to avoid division issues
mask = param_distances > 1e-6
if np.sum(mask) > 5:
param_distances = param_distances[mask]
objective_diffs = objective_diffs[mask]
# Compute correlation
correlation, _ = spearmanr(param_distances, objective_diffs)
# Convert to smoothness score: high correlation = smooth
# Handle NaN from constant arrays
if np.isnan(correlation):
smoothness = 0.5
else:
smoothness = max(0.0, min(1.0, (correlation + 1.0) / 2.0))
else:
smoothness = 0.5
else:
smoothness = 0.5
return smoothness
def _detect_multimodality(self, X: np.ndarray, y: np.ndarray) -> tuple:
"""
Detect multiple local optima using clustering.
Returns:
(is_multimodal, n_modes)
"""
if len(y) < 10:
return False, 1
# Find good trials (bottom 30%)
threshold = np.percentile(y, 30)
good_trials_mask = y <= threshold
if np.sum(good_trials_mask) < 3:
return False, 1
X_good = X[good_trials_mask]
# Normalize for clustering
X_norm = (X_good - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0) + 1e-10)
# Use DBSCAN to find clusters of good solutions
# If they're spread across multiple regions → multimodal
try:
clustering = DBSCAN(eps=0.2, min_samples=2).fit(X_norm)
n_clusters = len(set(clustering.labels_)) - (1 if -1 in clustering.labels_ else 0)
is_multimodal = n_clusters > 1
n_modes = max(1, n_clusters)
except:
is_multimodal = False
n_modes = 1
return is_multimodal, n_modes
def _compute_parameter_correlation(self, X: np.ndarray, y: np.ndarray, param_names: List[str]) -> Dict:
"""
Compute correlation between each parameter and objective.
Returns dict of {param_name: correlation_score}
High absolute correlation → parameter strongly affects objective
"""
correlations = {}
for i, param_name in enumerate(param_names):
param_values = X[:, i]
# Spearman correlation (handles nonlinearity)
corr, p_value = spearmanr(param_values, y)
if np.isnan(corr):
corr = 0.0
correlations[param_name] = {
'correlation': float(corr),
'abs_correlation': float(abs(corr)),
'p_value': float(p_value) if not np.isnan(p_value) else 1.0
}
# Compute overall correlation strength
avg_abs_corr = np.mean([v['abs_correlation'] for v in correlations.values()])
correlations['overall_strength'] = float(avg_abs_corr)
return correlations
def _estimate_noise(self, X: np.ndarray, y: np.ndarray) -> float:
"""
Estimate noise level in objective evaluations.
For deterministic FEA simulations, this should be very low.
High noise would suggest numerical issues or simulation instability.
Method: Look at local variations - similar inputs should give similar outputs.
Wide exploration range (high CV) is NOT noise.
"""
if len(y) < 10:
return 0.0
# Calculate pairwise distances in parameter space
from scipy.spatial.distance import pdist, squareform
# Normalize X to [0,1] for distance calculation
X_norm = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0) + 1e-10)
# Compute pairwise distances
param_distances = squareform(pdist(X_norm, 'euclidean'))
objective_diffs = np.abs(y[:, np.newaxis] - y[np.newaxis, :])
# Find pairs that are close in parameter space (distance < 0.1)
close_pairs_mask = (param_distances > 1e-6) & (param_distances < 0.1)
if np.sum(close_pairs_mask) < 5:
# Not enough close pairs to assess noise
return 0.0
# For close pairs, measure objective variation
# True noise: close inputs give very different outputs
# Smooth function: close inputs give similar outputs
close_objective_diffs = objective_diffs[close_pairs_mask]
close_param_dists = param_distances[close_pairs_mask]
# Normalize by expected difference based on smoothness
# Noise = unexpected variation for nearby points
expected_diff = np.median(close_objective_diffs / (close_param_dists + 1e-10))
actual_std = np.std(close_objective_diffs / (close_param_dists + 1e-10))
# Coefficient of variation of local gradients
if expected_diff > 1e-6:
local_cv = actual_std / expected_diff
noise_score = min(1.0, local_cv / 2.0)
else:
noise_score = 0.0
return float(noise_score)
def _classify_landscape(self, smoothness: float, multimodal: bool, noise: float, n_modes: int = 1) -> str:
"""
Classify landscape type for strategy selection.
Args:
smoothness: Smoothness score (0-1)
multimodal: Whether multiple modes detected
noise: Noise level (0-1)
n_modes: Number of modes detected
Returns one of:
- 'smooth_unimodal': Single smooth bowl (best for CMA-ES, GP-BO)
- 'smooth_multimodal': Multiple smooth regions (good for GP-BO, TPE)
- 'rugged_unimodal': Single rugged region (TPE, hybrid)
- 'rugged_multimodal': Multiple rugged regions (TPE, evolutionary)
- 'noisy': High noise level (robust methods)
"""
# IMPROVEMENT: Detect false multimodality from smooth continuous manifolds
# If only 2 modes detected with high smoothness and low noise,
# it's likely a continuous smooth surface, not true multimodality
if multimodal and n_modes == 2 and smoothness > 0.6 and noise < 0.2:
if self.verbose:
print(f"[LANDSCAPE] Reclassifying: 2 modes with smoothness={smoothness:.2f}, noise={noise:.2f}")
print(f"[LANDSCAPE] This appears to be a smooth continuous manifold, not true multimodality")
multimodal = False # Override: treat as unimodal
if noise > 0.5:
return 'noisy'
if smoothness > 0.6:
if multimodal:
return 'smooth_multimodal'
else:
return 'smooth_unimodal'
else:
if multimodal:
return 'rugged_multimodal'
else:
return 'rugged_unimodal'
def _compute_parameter_ranges(self, trials: List) -> Dict:
"""Compute explored parameter ranges."""
if not trials:
return {}
param_names = list(trials[0].params.keys())
ranges = {}
for param in param_names:
values = [t.params[param] for t in trials]
distribution = trials[0].distributions[param]
ranges[param] = {
'explored_min': float(np.min(values)),
'explored_max': float(np.max(values)),
'explored_range': float(np.max(values) - np.min(values)),
'bounds_min': float(distribution.low),
'bounds_max': float(distribution.high),
'bounds_range': float(distribution.high - distribution.low),
'coverage': float((np.max(values) - np.min(values)) / (distribution.high - distribution.low))
}
return ranges
def print_landscape_report(landscape: Dict, verbose: bool = True):
"""Print formatted landscape analysis report."""
if not verbose:
return
# Handle None (multi-objective studies)
if landscape is None:
print(f"\n [LANDSCAPE ANALYSIS] Skipped for multi-objective optimization")
return
if not landscape.get('ready', False):
print(f"\n [LANDSCAPE ANALYSIS] {landscape.get('message', 'Not ready')}")
return
print(f"\n{'='*70}")
print(f" LANDSCAPE ANALYSIS REPORT")
print(f"{'='*70}")
print(f" Total Trials Analyzed: {landscape['total_trials']}")
print(f" Dimensionality: {landscape['dimensionality']} parameters")
print(f"\n LANDSCAPE CHARACTERISTICS:")
print(f" Type: {landscape['landscape_type'].upper()}")
print(f" Smoothness: {landscape['smoothness']:.2f} {'(smooth)' if landscape['smoothness'] > 0.6 else '(rugged)'}")
print(f" Multimodal: {'YES' if landscape['multimodal'] else 'NO'} ({landscape['n_modes']} modes)")
print(f" Noise Level: {landscape['noise_level']:.2f} {'(low)' if landscape['noise_level'] < 0.3 else '(high)'}")
print(f"\n PARAMETER CORRELATIONS:")
for param, info in landscape['parameter_correlation'].items():
if param != 'overall_strength':
corr = info['correlation']
strength = 'strong' if abs(corr) > 0.5 else 'moderate' if abs(corr) > 0.3 else 'weak'
direction = 'positive' if corr > 0 else 'negative'
print(f" {param}: {corr:+.3f} ({strength} {direction})")
print(f"\n OBJECTIVE STATISTICS:")
stats = landscape['objective_statistics']
print(f" Best: {stats['min']:.6f}")
print(f" Mean: {stats['mean']:.6f}")
print(f" Std: {stats['std']:.6f}")
print(f" Range: {stats['range']:.6f}")
print(f"{'='*70}\n")

View File

@@ -0,0 +1,133 @@
"""
Mesh Converter Utility
Converts Nastran BDF/OP2 files to GLTF for web visualization
"""
import json
import numpy as np
from pathlib import Path
from typing import Optional, Dict, Any
import trimesh
from pyNastran.bdf.bdf import BDF
from pyNastran.op2.op2 import OP2
def convert_study_mesh(study_dir: Path) -> Optional[Path]:
"""
Convert the mesh and results of a study to GLTF format.
Args:
study_dir: Path to the study directory
Returns:
Path to the generated GLTF file, or None if conversion failed
"""
try:
# Locate files
setup_dir = study_dir / "1_setup" / "model"
results_dir = study_dir / "2_results"
vis_dir = study_dir / "3_visualization"
vis_dir.mkdir(parents=True, exist_ok=True)
# Find BDF/DAT file
bdf_files = list(setup_dir.glob("*.dat")) + list(setup_dir.glob("*.bdf"))
if not bdf_files:
# Fallback: Generate placeholder if no BDF found
return _generate_placeholder_mesh(vis_dir)
bdf_path = bdf_files[0]
# Find OP2 file (optional)
op2_files = list(results_dir.glob("*.op2"))
op2_path = op2_files[0] if op2_files else None
# Load BDF
model = BDF()
model.read_bdf(bdf_path, xref=False)
# Extract nodes and elements
# This is a simplified extraction for shell/solid elements
# A full implementation would handle all element types
nodes = []
node_ids = []
for nid, node in model.nodes.items():
nodes.append(node.get_position())
node_ids.append(nid)
nodes = np.array(nodes)
node_map = {nid: i for i, nid in enumerate(node_ids)}
faces = []
# Process CQUAD4/CTRIA3 elements
for eid, element in model.elements.items():
if element.type == 'CQUAD4':
n = [node_map[nid] for nid in element.nodes]
faces.append([n[0], n[1], n[2]])
faces.append([n[0], n[2], n[3]])
elif element.type == 'CTRIA3':
n = [node_map[nid] for nid in element.nodes]
faces.append([n[0], n[1], n[2]])
if not faces:
# Fallback if no compatible elements found
return _generate_placeholder_mesh(vis_dir)
# Create mesh
mesh = trimesh.Trimesh(vertices=nodes, faces=faces)
# Map results if OP2 exists
if op2_path:
op2 = OP2()
op2.read_op2(op2_path)
# Example: Map displacement magnitude to vertex colors
if 1 in op2.displacements:
disp = op2.displacements[1]
# Get last timestep
t3 = disp.data[-1, :, :3] # Translation x,y,z
mag = np.linalg.norm(t3, axis=1)
# Normalize to 0-1 for coloring
if mag.max() > mag.min():
norm_mag = (mag - mag.min()) / (mag.max() - mag.min())
else:
norm_mag = np.zeros_like(mag)
# Apply colormap (simple blue-to-red)
colors = np.zeros((len(nodes), 4))
colors[:, 0] = norm_mag # R
colors[:, 2] = 1 - norm_mag # B
colors[:, 3] = 1.0 # Alpha
mesh.visual.vertex_colors = colors
# Export to GLTF
output_path = vis_dir / "model.gltf"
mesh.export(output_path)
# Save metadata
metadata = {
"node_count": len(nodes),
"element_count": len(faces),
"has_results": op2_path is not None
}
with open(vis_dir / "model.json", 'w') as f:
json.dump(metadata, f, indent=2)
return output_path
except Exception as e:
print(f"Mesh conversion error: {e}")
# Fallback on error
return _generate_placeholder_mesh(vis_dir)
def _generate_placeholder_mesh(output_dir: Path) -> Path:
"""Generate a simple box mesh for testing"""
mesh = trimesh.creation.box(extents=[10, 10, 10])
output_path = output_dir / "model.gltf"
mesh.export(output_path)
with open(output_dir / "model.json", 'w') as f:
json.dump({"placeholder": True}, f)
return output_path

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,442 @@
"""
NX Session Manager - Prevents conflicts when multiple optimizations run concurrently.
This module ensures that NX sessions don't interfere with each other when:
1. Multiple optimizations are running simultaneously
2. User has NX open for manual work
3. Multiple Atomizer instances are running
Key Features:
- Session detection (running NX processes)
- File locking (prevents concurrent access to same model)
- Process queuing (waits if NX is busy with another optimization)
- Batch mode isolation (uses dedicated NX instances)
"""
import psutil
import time
import os
from pathlib import Path
from typing import Optional, List
from contextlib import contextmanager
from dataclasses import dataclass
import json
# Platform-specific imports
if os.name != 'nt': # Unix/Linux/Mac
import fcntl
else: # Windows
import msvcrt
@dataclass
class NXSessionInfo:
"""Information about a running NX session."""
pid: int
name: str
cmdline: List[str]
working_dir: Optional[str]
create_time: float
class NXSessionManager:
"""
Manages NX sessions to prevent conflicts between concurrent optimizations.
Strategy:
1. Detect running NX processes
2. Use file locks to ensure exclusive model access
3. Queue optimization trials if NX is busy
4. Isolate batch mode sessions from interactive sessions
"""
def __init__(
self,
lock_dir: Optional[Path] = None,
max_concurrent_sessions: int = 1,
wait_timeout: int = 300,
verbose: bool = True
):
"""
Initialize session manager.
Args:
lock_dir: Directory for lock files (default: temp)
max_concurrent_sessions: Maximum concurrent NX optimization sessions
wait_timeout: Maximum wait time for NX to become available (seconds)
verbose: Print session management info
"""
self.lock_dir = Path(lock_dir) if lock_dir else Path.home() / ".atomizer" / "locks"
self.lock_dir.mkdir(parents=True, exist_ok=True)
self.max_concurrent = max_concurrent_sessions
self.wait_timeout = wait_timeout
self.verbose = verbose
# Track active sessions
self.session_lock_file = self.lock_dir / "nx_sessions.json"
self.global_lock_file = self.lock_dir / "nx_global.lock"
def get_running_nx_sessions(self) -> List[NXSessionInfo]:
"""
Detect all running NX processes.
Returns:
List of NX session info objects
"""
nx_sessions = []
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'cwd', 'create_time']):
try:
name = proc.info['name']
# Check if this is an NX process
if name and any(nx_exe in name.lower() for nx_exe in ['ugraf.exe', 'nx.exe', 'run_journal.exe', 'nxmgr_inter.exe']):
session = NXSessionInfo(
pid=proc.info['pid'],
name=name,
cmdline=proc.info['cmdline'] or [],
working_dir=proc.info['cwd'],
create_time=proc.info['create_time']
)
nx_sessions.append(session)
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return nx_sessions
def is_nx_interactive_session_running(self) -> bool:
"""
Check if user has NX open interactively (not batch mode).
Returns:
True if interactive NX session detected
"""
sessions = self.get_running_nx_sessions()
for session in sessions:
# Interactive sessions are typically ugraf.exe or nx.exe without -batch
if 'ugraf.exe' in session.name.lower() or 'nx.exe' in session.name.lower():
# Check if it's not a batch session
if '-batch' not in ' '.join(session.cmdline).lower():
return True
return False
@contextmanager
def acquire_model_lock(self, model_file: Path, study_name: str):
"""
Acquire exclusive lock for a specific model file.
This prevents two optimizations from modifying the same model simultaneously.
Args:
model_file: Path to the model file (.prt)
study_name: Name of the study (for logging)
Yields:
Lock context
Raises:
TimeoutError: If lock cannot be acquired within timeout
"""
# Create lock file for this specific model
model_hash = str(abs(hash(str(model_file))))
lock_file = self.lock_dir / f"model_{model_hash}.lock"
if self.verbose:
print(f"\n[SESSION MGR] Acquiring lock for model: {model_file.name}")
lock_fd = None
start_time = time.time()
try:
# Try to acquire lock with timeout
while True:
try:
lock_fd = open(lock_file, 'w')
# Try to acquire exclusive lock (non-blocking)
if os.name == 'nt': # Windows
import msvcrt
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_NBLCK, 1)
else: # Unix
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
# Write lock info
lock_info = {
'study_name': study_name,
'model_file': str(model_file),
'pid': os.getpid(),
'timestamp': time.time()
}
lock_fd.write(json.dumps(lock_info, indent=2))
lock_fd.flush()
if self.verbose:
print(f"[SESSION MGR] Lock acquired successfully")
break # Lock acquired!
except (IOError, OSError):
# Lock is held by another process
elapsed = time.time() - start_time
if elapsed > self.wait_timeout:
raise TimeoutError(
f"Could not acquire model lock for {model_file.name} "
f"after {self.wait_timeout}s. Another optimization may be using this model."
)
if self.verbose and elapsed % 10 == 0: # Print every 10 seconds
print(f"[SESSION MGR] Waiting for model lock... ({elapsed:.0f}s)")
time.sleep(1)
yield # Lock acquired, user code runs here
finally:
# Release lock
if lock_fd:
try:
if os.name == 'nt':
import msvcrt
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
if self.verbose:
print(f"[SESSION MGR] Lock released for model: {model_file.name}")
except Exception as e:
if self.verbose:
print(f"[SESSION MGR] Warning: Failed to release lock: {e}")
# Clean up lock file
try:
if lock_file.exists():
lock_file.unlink()
except:
pass
@contextmanager
def acquire_nx_session(self, study_name: str):
"""
Acquire permission to run an NX batch session.
This ensures we don't exceed max_concurrent_sessions and
don't interfere with interactive NX sessions.
Args:
study_name: Name of the study (for logging)
Yields:
Session context
Raises:
TimeoutError: If session cannot be acquired within timeout
"""
if self.verbose:
print(f"\n[SESSION MGR] Requesting NX batch session for study: {study_name}")
# Check for interactive NX sessions
if self.is_nx_interactive_session_running():
if self.verbose:
print(f"[SESSION MGR] WARNING: Interactive NX session detected!")
print(f"[SESSION MGR] Batch operations may conflict with user's work.")
print(f"[SESSION MGR] Recommend closing interactive NX before running optimization.")
start_time = time.time()
session_acquired = False
try:
# Wait for available session slot
while True:
active_sessions = self._count_active_sessions()
if active_sessions < self.max_concurrent:
# Register this session
self._register_session(study_name)
session_acquired = True
if self.verbose:
print(f"[SESSION MGR] NX session acquired (active: {active_sessions + 1}/{self.max_concurrent})")
break
# Check timeout
elapsed = time.time() - start_time
if elapsed > self.wait_timeout:
raise TimeoutError(
f"Could not acquire NX session after {self.wait_timeout}s. "
f"Max concurrent sessions ({self.max_concurrent}) reached."
)
if self.verbose and elapsed % 10 == 0:
print(f"[SESSION MGR] Waiting for NX session... ({elapsed:.0f}s, active: {active_sessions})")
time.sleep(2)
yield # Session acquired, user code runs here
finally:
# Unregister session
if session_acquired:
self._unregister_session(study_name)
if self.verbose:
print(f"[SESSION MGR] NX session released for study: {study_name}")
def _count_active_sessions(self) -> int:
"""Count active optimization sessions."""
if not self.session_lock_file.exists():
return 0
try:
with open(self.session_lock_file, 'r') as f:
sessions = json.load(f)
# Clean up stale sessions (processes that no longer exist)
active_sessions = []
for session in sessions:
pid = session.get('pid')
if pid and psutil.pid_exists(pid):
active_sessions.append(session)
# Update file with only active sessions
with open(self.session_lock_file, 'w') as f:
json.dump(active_sessions, f, indent=2)
return len(active_sessions)
except Exception as e:
if self.verbose:
print(f"[SESSION MGR] Warning: Failed to count sessions: {e}")
return 0
def _register_session(self, study_name: str):
"""Register a new optimization session."""
sessions = []
if self.session_lock_file.exists():
try:
with open(self.session_lock_file, 'r') as f:
sessions = json.load(f)
except:
sessions = []
# Add new session
sessions.append({
'study_name': study_name,
'pid': os.getpid(),
'start_time': time.time(),
'timestamp': time.time()
})
# Save
with open(self.session_lock_file, 'w') as f:
json.dump(sessions, f, indent=2)
def _unregister_session(self, study_name: str):
"""Unregister an optimization session."""
if not self.session_lock_file.exists():
return
try:
with open(self.session_lock_file, 'r') as f:
sessions = json.load(f)
# Remove this session
pid = os.getpid()
sessions = [s for s in sessions if s.get('pid') != pid]
# Save
with open(self.session_lock_file, 'w') as f:
json.dump(sessions, f, indent=2)
except Exception as e:
if self.verbose:
print(f"[SESSION MGR] Warning: Failed to unregister session: {e}")
def cleanup_stale_locks(self):
"""Remove lock files from crashed processes."""
if not self.lock_dir.exists():
return
cleaned = 0
for lock_file in self.lock_dir.glob("*.lock"):
try:
# Try to read lock info
with open(lock_file, 'r') as f:
lock_info = json.load(f)
pid = lock_info.get('pid')
# Check if process still exists
if pid and not psutil.pid_exists(pid):
lock_file.unlink()
cleaned += 1
if self.verbose:
print(f"[SESSION MGR] Cleaned stale lock: {lock_file.name}")
except Exception:
# If we can't read lock file, it might be corrupted - remove it
try:
lock_file.unlink()
cleaned += 1
except:
pass
if self.verbose and cleaned > 0:
print(f"[SESSION MGR] Cleaned {cleaned} stale lock file(s)")
def get_status_report(self) -> str:
"""Generate status report of NX sessions and locks."""
report = "\n" + "="*70 + "\n"
report += " NX SESSION MANAGER STATUS\n"
report += "="*70 + "\n"
# Running NX sessions
nx_sessions = self.get_running_nx_sessions()
report += f"\n Running NX Processes: {len(nx_sessions)}\n"
for session in nx_sessions:
report += f" PID {session.pid}: {session.name}\n"
if session.working_dir:
report += f" Working dir: {session.working_dir}\n"
# Interactive session warning
if self.is_nx_interactive_session_running():
report += f"\n WARNING: Interactive NX session detected!\n"
report += f" Batch operations may conflict with user's work.\n"
# Active optimization sessions
active_count = self._count_active_sessions()
report += f"\n Active Optimization Sessions: {active_count}/{self.max_concurrent}\n"
if self.session_lock_file.exists():
try:
with open(self.session_lock_file, 'r') as f:
sessions = json.load(f)
for session in sessions:
study = session.get('study_name', 'Unknown')
pid = session.get('pid', 'Unknown')
report += f" {study} (PID {pid})\n"
except:
pass
# Lock files
lock_files = list(self.lock_dir.glob("*.lock"))
report += f"\n Active Lock Files: {len(lock_files)}\n"
report += "="*70 + "\n"
return report

View File

@@ -24,6 +24,7 @@ from datetime import datetime
import pickle
from optimization_engine.plugins import HookManager
from optimization_engine.training_data_exporter import create_exporter_from_config
class OptimizationRunner:
@@ -79,6 +80,11 @@ class OptimizationRunner:
if summary['total_hooks'] > 0:
print(f"Loaded {summary['enabled_hooks']}/{summary['total_hooks']} plugins")
# Initialize training data exporter (if enabled in config)
self.training_data_exporter = create_exporter_from_config(self.config)
if self.training_data_exporter:
print(f"Training data export enabled: {self.training_data_exporter.export_dir}")
def _load_config(self) -> Dict[str, Any]:
"""Load and validate optimization configuration."""
with open(self.config_path, 'r') as f:
@@ -436,6 +442,32 @@ class OptimizationRunner:
}
self.hook_manager.execute_hooks('post_extraction', post_extraction_context, fail_fast=False)
# Export training data (if enabled)
if self.training_data_exporter:
# Determine .dat and .op2 file paths from result_path
# NX naming: sim_name-solution_N.dat and sim_name-solution_N.op2
if result_path:
sim_dir = Path(result_path).parent if Path(result_path).is_file() else Path(result_path)
sim_name = self.config.get('sim_file', '').replace('.sim', '')
# Try to find the .dat and .op2 files
# Typically: sim_name-solution_1.dat and sim_name-solution_1.op2
dat_files = list(sim_dir.glob(f"{Path(sim_name).stem}*.dat"))
op2_files = list(sim_dir.glob(f"{Path(sim_name).stem}*.op2"))
if dat_files and op2_files:
simulation_files = {
'dat_file': dat_files[0], # Use first match
'op2_file': op2_files[0]
}
self.training_data_exporter.export_trial(
trial_number=trial.number,
design_variables=design_vars,
results=extracted_results,
simulation_files=simulation_files
)
# 5. Evaluate constraints
for const in self.config.get('constraints', []):
value = extracted_results[const['name']]
@@ -614,6 +646,11 @@ class OptimizationRunner:
self._save_study_metadata(study_name)
self._save_final_results()
# Finalize training data export (if enabled)
if self.training_data_exporter:
self.training_data_exporter.finalize()
print(f"Training data export finalized: {self.training_data_exporter.trial_count} trials exported")
# Post-processing: Visualization and Model Cleanup
self._run_post_processing()

View File

@@ -0,0 +1,516 @@
"""
Neural-Enhanced Optimization Runner
Extends the base OptimizationRunner with neural network surrogate capabilities
from AtomizerField for super-efficient optimization.
Features:
- Automatic neural surrogate integration when models are available
- Hybrid optimization with smart FEA/NN switching
- Confidence-based fallback to FEA
- Speedup tracking and reporting
"""
from pathlib import Path
from typing import Dict, Any, List, Optional, Callable, Tuple
import json
import time
import logging
import numpy as np
from datetime import datetime
import optuna
from optimization_engine.runner import OptimizationRunner
from optimization_engine.neural_surrogate import (
create_surrogate_from_config,
create_hybrid_optimizer_from_config,
NeuralSurrogate,
HybridOptimizer
)
logger = logging.getLogger(__name__)
class NeuralOptimizationRunner(OptimizationRunner):
"""
Extended optimization runner with neural network surrogate support.
Seamlessly integrates AtomizerField neural models to achieve 600x-500,000x
speedup over traditional FEA-based optimization.
"""
def __init__(
self,
config_path: Path,
model_updater: Callable,
simulation_runner: Callable,
result_extractors: Dict[str, Callable]
):
"""
Initialize neural-enhanced optimization runner.
Args:
config_path: Path to optimization_config.json
model_updater: Function to update NX model parameters
simulation_runner: Function to run FEA simulation
result_extractors: Dictionary of result extraction functions
"""
# Initialize base class
super().__init__(config_path, model_updater, simulation_runner, result_extractors)
# Initialize neural surrogate components
self.neural_surrogate: Optional[NeuralSurrogate] = None
self.hybrid_optimizer: Optional[HybridOptimizer] = None
self.neural_speedup_tracker = []
# Try to initialize neural components
self._initialize_neural_components()
def _initialize_neural_components(self):
"""Initialize neural surrogate and hybrid optimizer if configured."""
try:
# Create neural surrogate from config
self.neural_surrogate = create_surrogate_from_config(self.config)
if self.neural_surrogate:
logger.info("✓ Neural surrogate initialized successfully")
logger.info(f" Model: {self.neural_surrogate.model_checkpoint}")
logger.info(f" Confidence threshold: {self.neural_surrogate.confidence_threshold}")
# Create hybrid optimizer for smart FEA/NN switching
self.hybrid_optimizer = create_hybrid_optimizer_from_config(self.config)
if self.hybrid_optimizer:
logger.info("✓ Hybrid optimizer initialized")
logger.info(f" Exploration trials: {self.hybrid_optimizer.exploration_trials}")
logger.info(f" Validation frequency: {self.hybrid_optimizer.validation_frequency}")
else:
logger.info("Neural surrogate not configured - using standard FEA optimization")
except Exception as e:
logger.warning(f"Could not initialize neural components: {e}")
logger.info("Falling back to standard FEA optimization")
def _objective_function_with_neural(self, trial: optuna.Trial) -> float:
"""
Enhanced objective function with neural network surrogate support.
Attempts to use neural network for fast prediction, falls back to FEA
when confidence is low or validation is needed.
Args:
trial: Optuna trial object
Returns:
Objective value (float)
"""
# Sample design variables (same as base class)
design_vars = self._sample_design_variables(trial)
# Decide whether to use neural network or FEA
use_neural = False
nn_prediction = None
nn_confidence = 0.0
if self.neural_surrogate and self.hybrid_optimizer:
# Check if hybrid optimizer recommends using NN
if self.hybrid_optimizer.should_use_nn(trial.number):
# Try neural network prediction
start_time = time.time()
try:
# Get case data for the current model
case_data = self._prepare_case_data(design_vars)
# Get neural network prediction
predictions, confidence, used_nn = self.neural_surrogate.predict(
design_vars,
case_data=case_data
)
if used_nn and predictions is not None:
# Successfully used neural network
nn_time = time.time() - start_time
use_neural = True
nn_prediction = predictions
nn_confidence = confidence
logger.info(f"Trial {trial.number}: Used neural network (confidence: {confidence:.2%}, time: {nn_time:.3f}s)")
# Track speedup
self.neural_speedup_tracker.append({
'trial': trial.number,
'nn_time': nn_time,
'confidence': confidence
})
else:
logger.info(f"Trial {trial.number}: Neural confidence too low ({confidence:.2%}), using FEA")
except Exception as e:
logger.warning(f"Trial {trial.number}: Neural prediction failed: {e}, using FEA")
# Execute hooks and get results
if use_neural and nn_prediction is not None:
# Use neural network results
extracted_results = self._process_neural_results(nn_prediction, design_vars)
# Skip model update and simulation since we used NN
result_path = None
else:
# Fall back to standard FEA (using base class method)
return super()._objective_function(trial)
# Process constraints and objectives (same as base class)
return self._evaluate_objectives_and_constraints(
trial, design_vars, extracted_results, result_path
)
def _sample_design_variables(self, trial: optuna.Trial) -> Dict[str, float]:
"""
Sample design variables from trial (extracted from base class).
Args:
trial: Optuna trial object
Returns:
Dictionary of design variable values
"""
design_vars = {}
# Handle both dict and list formats for design_variables
if isinstance(self.config['design_variables'], dict):
for var_name, var_info in self.config['design_variables'].items():
if var_info['type'] == 'continuous':
value = trial.suggest_float(
var_name,
var_info['min'],
var_info['max']
)
precision = self._get_precision(var_name, var_info.get('units', ''))
design_vars[var_name] = round(value, precision)
elif var_info['type'] in ['discrete', 'integer']:
design_vars[var_name] = trial.suggest_int(
var_name,
int(var_info['min']),
int(var_info['max'])
)
else:
# Old format
for dv in self.config['design_variables']:
if dv['type'] == 'continuous':
value = trial.suggest_float(
dv['name'],
dv['bounds'][0],
dv['bounds'][1]
)
precision = self._get_precision(dv['name'], dv.get('units', ''))
design_vars[dv['name']] = round(value, precision)
elif dv['type'] == 'discrete':
design_vars[dv['name']] = trial.suggest_int(
dv['name'],
int(dv['bounds'][0]),
int(dv['bounds'][1])
)
return design_vars
def _prepare_case_data(self, design_vars: Dict[str, float]) -> Optional[Dict[str, Any]]:
"""
Prepare case-specific data for neural network prediction.
This includes mesh file paths, boundary conditions, loads, etc.
Args:
design_vars: Current design variable values
Returns:
Case data dictionary or None
"""
try:
case_data = {
'fem_file': self.config.get('fem_file', ''),
'sim_file': self.config.get('sim_file', ''),
'design_variables': design_vars,
# Add any case-specific data needed by the neural network
}
# Add boundary conditions if specified
if 'boundary_conditions' in self.config:
case_data['boundary_conditions'] = self.config['boundary_conditions']
# Add loads if specified
if 'loads' in self.config:
case_data['loads'] = self.config['loads']
return case_data
except Exception as e:
logger.warning(f"Could not prepare case data: {e}")
return None
def _process_neural_results(
self,
nn_prediction: Dict[str, Any],
design_vars: Dict[str, float]
) -> Dict[str, float]:
"""
Process neural network predictions into extracted results format.
Args:
nn_prediction: Raw neural network predictions
design_vars: Current design variable values
Returns:
Dictionary of extracted results matching objective/constraint names
"""
extracted_results = {}
# Map neural network outputs to objective/constraint names
for obj in self.config['objectives']:
obj_name = obj['name']
# Try to find matching prediction
if obj_name in nn_prediction:
value = nn_prediction[obj_name]
elif 'metric' in obj and obj['metric'] in nn_prediction:
value = nn_prediction[obj['metric']]
else:
# Try common mappings
metric_mappings = {
'max_stress': ['max_von_mises_stress', 'stress', 'von_mises'],
'max_displacement': ['max_displacement', 'displacement', 'disp'],
'mass': ['mass', 'weight'],
'volume': ['volume'],
'compliance': ['compliance', 'strain_energy'],
'frequency': ['frequency', 'natural_frequency', 'freq']
}
value = None
for mapped_names in metric_mappings.get(obj_name, []):
if mapped_names in nn_prediction:
value = nn_prediction[mapped_names]
break
if value is None:
raise ValueError(f"Could not find neural prediction for objective '{obj_name}'")
# Apply appropriate precision
precision = self._get_precision(obj_name, obj.get('units', ''))
extracted_results[obj_name] = round(float(value), precision)
# Process constraints similarly
for const in self.config.get('constraints', []):
const_name = const['name']
if const_name in nn_prediction:
value = nn_prediction[const_name]
elif 'metric' in const and const['metric'] in nn_prediction:
value = nn_prediction[const['metric']]
else:
# Try to reuse objective values if constraint uses same metric
if const_name in extracted_results:
value = extracted_results[const_name]
else:
raise ValueError(f"Could not find neural prediction for constraint '{const_name}'")
precision = self._get_precision(const_name, const.get('units', ''))
extracted_results[const_name] = round(float(value), precision)
return extracted_results
def _evaluate_objectives_and_constraints(
self,
trial: optuna.Trial,
design_vars: Dict[str, float],
extracted_results: Dict[str, float],
result_path: Optional[Path]
) -> float:
"""
Evaluate objectives and constraints (extracted from base class).
Args:
trial: Optuna trial object
design_vars: Design variable values
extracted_results: Extracted simulation/NN results
result_path: Path to result files (None if using NN)
Returns:
Total objective value
"""
# Export training data if using FEA
if self.training_data_exporter and result_path:
self._export_training_data(trial.number, design_vars, extracted_results, result_path)
# Evaluate constraints
for const in self.config.get('constraints', []):
value = extracted_results[const['name']]
limit = const['limit']
if const['type'] == 'upper_bound' and value > limit:
logger.info(f"Constraint violated: {const['name']} = {value:.4f} > {limit:.4f}")
raise optuna.TrialPruned()
elif const['type'] == 'lower_bound' and value < limit:
logger.info(f"Constraint violated: {const['name']} = {value:.4f} < {limit:.4f}")
raise optuna.TrialPruned()
# Calculate weighted objective
total_objective = 0.0
for obj in self.config['objectives']:
value = extracted_results[obj['name']]
weight = obj.get('weight', 1.0)
direction = obj.get('direction', 'minimize')
if direction == 'minimize':
total_objective += weight * value
else: # maximize
total_objective -= weight * value
# Store in history
history_entry = {
'trial_number': trial.number,
'timestamp': datetime.now().isoformat(),
'design_variables': design_vars,
'objectives': {obj['name']: extracted_results[obj['name']] for obj in self.config['objectives']},
'constraints': {const['name']: extracted_results[const['name']] for const in self.config.get('constraints', [])},
'total_objective': total_objective,
'used_neural': result_path is None # Track if NN was used
}
self.history.append(history_entry)
# Save history
self._save_history()
logger.info(f"Trial {trial.number} completed:")
logger.info(f" Design vars: {design_vars}")
logger.info(f" Objectives: {history_entry['objectives']}")
logger.info(f" Total objective: {total_objective:.6f}")
if history_entry.get('used_neural'):
logger.info(f" Method: Neural Network")
return total_objective
def run(
self,
study_name: Optional[str] = None,
n_trials: Optional[int] = None,
resume: bool = False
) -> optuna.Study:
"""
Run neural-enhanced optimization.
Args:
study_name: Optional study name
n_trials: Number of trials to run
resume: Whether to resume existing study
Returns:
Completed Optuna study
"""
# Override objective function if neural surrogate is available
if self.neural_surrogate:
# Temporarily replace objective function
original_objective = self._objective_function
self._objective_function = self._objective_function_with_neural
try:
# Run optimization using base class
study = super().run(study_name, n_trials, resume)
# Print neural speedup summary if applicable
if self.neural_speedup_tracker:
self._print_speedup_summary()
return study
finally:
# Restore original objective function if replaced
if self.neural_surrogate:
self._objective_function = original_objective
def _print_speedup_summary(self):
"""Print summary of neural network speedup achieved."""
if not self.neural_speedup_tracker:
return
nn_trials = len(self.neural_speedup_tracker)
total_trials = len(self.history)
nn_percentage = (nn_trials / total_trials) * 100
avg_nn_time = np.mean([t['nn_time'] for t in self.neural_speedup_tracker])
avg_confidence = np.mean([t['confidence'] for t in self.neural_speedup_tracker])
# Estimate FEA time (rough estimate if not tracked)
estimated_fea_time = 30 * 60 # 30 minutes in seconds
estimated_speedup = estimated_fea_time / avg_nn_time
print("\n" + "="*60)
print("NEURAL NETWORK SPEEDUP SUMMARY")
print("="*60)
print(f"Trials using neural network: {nn_trials}/{total_trials} ({nn_percentage:.1f}%)")
print(f"Average NN inference time: {avg_nn_time:.3f} seconds")
print(f"Average NN confidence: {avg_confidence:.1%}")
print(f"Estimated speedup: {estimated_speedup:.0f}x")
print(f"Time saved: ~{(estimated_fea_time - avg_nn_time) * nn_trials / 3600:.1f} hours")
print("="*60)
def update_neural_model(self, new_checkpoint: Path):
"""
Update the neural network model checkpoint.
Useful for updating to a newly trained model during optimization.
Args:
new_checkpoint: Path to new model checkpoint
"""
if self.neural_surrogate:
try:
self.neural_surrogate.load_model(new_checkpoint)
logger.info(f"Updated neural model to: {new_checkpoint}")
except Exception as e:
logger.error(f"Failed to update neural model: {e}")
def train_neural_model(self, training_data_dir: Path, epochs: int = 100):
"""
Train a new neural model on collected data.
Args:
training_data_dir: Directory containing training data
epochs: Number of training epochs
"""
if self.hybrid_optimizer:
try:
model_path = self.hybrid_optimizer.train_surrogate_model(training_data_dir, epochs)
# Update to use the newly trained model
if model_path and self.neural_surrogate:
self.update_neural_model(model_path)
except Exception as e:
logger.error(f"Failed to train neural model: {e}")
def create_neural_runner(
config_path: Path,
model_updater: Callable,
simulation_runner: Callable,
result_extractors: Dict[str, Callable]
) -> NeuralOptimizationRunner:
"""
Factory function to create a neural-enhanced optimization runner.
Args:
config_path: Path to optimization configuration
model_updater: Function to update model parameters
simulation_runner: Function to run simulation
result_extractors: Dictionary of result extraction functions
Returns:
NeuralOptimizationRunner instance
"""
return NeuralOptimizationRunner(
config_path,
model_updater,
simulation_runner,
result_extractors
)

View File

@@ -0,0 +1,648 @@
"""
Simple MLP Surrogate for Fast Optimization
This module provides a lightweight neural network surrogate that:
1. Trains directly from Optuna database (no mesh parsing needed)
2. Uses simple MLP: design_params -> [mass, frequency, max_disp, max_stress]
3. Provides millisecond predictions for optimization
This is much simpler than the GNN-based approach and works well when:
- You have enough FEA data in the database
- You only need scalar objective predictions (no field data)
- You want quick setup without mesh parsing pipeline
Usage:
from optimization_engine.simple_mlp_surrogate import SimpleSurrogate, train_from_database
# Train from database
surrogate = train_from_database(
db_path="studies/uav_arm_atomizerfield_test/2_results/study.db",
study_name="uav_arm_atomizerfield_test"
)
# Predict
results = surrogate.predict({
'beam_half_core_thickness': 3.0,
'beam_face_thickness': 1.5,
'holes_diameter': 8.0,
'hole_count': 4
})
"""
import json
import logging
import time
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
import numpy as np
logger = logging.getLogger(__name__)
try:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
logger.warning("PyTorch not installed. SimpleSurrogate will be limited.")
try:
import optuna
OPTUNA_AVAILABLE = True
except ImportError:
OPTUNA_AVAILABLE = False
class MLPModel(nn.Module):
"""Simple MLP for design parameter -> objective prediction."""
def __init__(
self,
n_inputs: int = 4,
n_outputs: int = 4,
hidden_dims: List[int] = [128, 256, 128, 64],
dropout: float = 0.1
):
super().__init__()
layers = []
prev_dim = n_inputs
for hidden_dim in hidden_dims:
layers.extend([
nn.Linear(prev_dim, hidden_dim),
nn.LayerNorm(hidden_dim),
nn.ReLU(),
nn.Dropout(dropout)
])
prev_dim = hidden_dim
layers.append(nn.Linear(prev_dim, n_outputs))
self.network = nn.Sequential(*layers)
# Initialize weights
self._init_weights()
def _init_weights(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, x):
return self.network(x)
class FEADataset(Dataset):
"""Dataset for training from FEA results."""
def __init__(
self,
design_params: np.ndarray,
objectives: np.ndarray
):
self.design_params = torch.tensor(design_params, dtype=torch.float32)
self.objectives = torch.tensor(objectives, dtype=torch.float32)
def __len__(self):
return len(self.design_params)
def __getitem__(self, idx):
return self.design_params[idx], self.objectives[idx]
class SimpleSurrogate:
"""
Simple MLP-based surrogate for FEA prediction.
This is a lightweight alternative to the GNN-based approach that:
- Doesn't require mesh parsing
- Trains directly from database
- Provides fast scalar predictions
"""
def __init__(
self,
model: nn.Module = None,
design_var_names: List[str] = None,
objective_names: List[str] = None,
normalization: Dict[str, Any] = None,
device: str = 'auto'
):
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required. Install: pip install torch")
# Set device
if device == 'auto':
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = torch.device(device)
self.model = model
if model is not None:
self.model = model.to(self.device)
self.model.eval()
self.design_var_names = design_var_names or []
self.objective_names = objective_names or ['mass', 'frequency', 'max_displacement', 'max_stress']
# Normalization stats
self.normalization = normalization or {
'design_mean': np.zeros(len(self.design_var_names)),
'design_std': np.ones(len(self.design_var_names)),
'objective_mean': np.zeros(len(self.objective_names)),
'objective_std': np.ones(len(self.objective_names))
}
# Performance tracking
self.stats = {
'predictions': 0,
'total_time_ms': 0.0
}
logger.info(f"SimpleSurrogate initialized on {self.device}")
def predict(self, design_params: Dict[str, float]) -> Dict[str, Any]:
"""
Predict FEA objectives from design parameters.
Args:
design_params: Dict of design variable values
Returns:
Dict with mass, frequency, max_displacement, max_stress, inference_time_ms
"""
start_time = time.time()
# Build input tensor
param_values = [design_params.get(name, 0.0) for name in self.design_var_names]
x = np.array(param_values, dtype=np.float32)
# Normalize
x_norm = (x - self.normalization['design_mean']) / (self.normalization['design_std'] + 1e-8)
x_tensor = torch.tensor(x_norm, dtype=torch.float32, device=self.device).unsqueeze(0)
# Predict
with torch.no_grad():
y_norm = self.model(x_tensor).cpu().numpy()[0]
# Denormalize
y = y_norm * self.normalization['objective_std'] + self.normalization['objective_mean']
inference_time = (time.time() - start_time) * 1000
results = {
self.objective_names[i]: float(y[i]) for i in range(len(self.objective_names))
}
results['inference_time_ms'] = inference_time
# Update stats
self.stats['predictions'] += 1
self.stats['total_time_ms'] += inference_time
return results
def get_statistics(self) -> Dict[str, Any]:
"""Get prediction statistics."""
avg_time = self.stats['total_time_ms'] / self.stats['predictions'] \
if self.stats['predictions'] > 0 else 0
return {
'total_predictions': self.stats['predictions'],
'total_time_ms': self.stats['total_time_ms'],
'average_time_ms': avg_time,
'device': str(self.device),
'design_var_names': self.design_var_names,
'objective_names': self.objective_names
}
def save(self, path: Path):
"""Save surrogate to file."""
path = Path(path)
torch.save({
'model_state_dict': self.model.state_dict(),
'design_var_names': self.design_var_names,
'objective_names': self.objective_names,
'normalization': self.normalization,
'model_config': {
'n_inputs': len(self.design_var_names),
'n_outputs': len(self.objective_names)
}
}, path)
logger.info(f"Surrogate saved to {path}")
@classmethod
def load(cls, path: Path, device: str = 'auto') -> 'SimpleSurrogate':
"""Load surrogate from file."""
path = Path(path)
checkpoint = torch.load(path, map_location='cpu')
# Create model
model_config = checkpoint['model_config']
model = MLPModel(
n_inputs=model_config['n_inputs'],
n_outputs=model_config['n_outputs']
)
model.load_state_dict(checkpoint['model_state_dict'])
return cls(
model=model,
design_var_names=checkpoint['design_var_names'],
objective_names=checkpoint['objective_names'],
normalization=checkpoint['normalization'],
device=device
)
def extract_data_from_database(
db_path: str,
study_name: str
) -> Tuple[np.ndarray, np.ndarray, List[str], List[str]]:
"""
Extract training data from Optuna database.
Args:
db_path: Path to SQLite database
study_name: Name of Optuna study
Returns:
Tuple of (design_params, objectives, design_var_names, objective_names)
"""
if not OPTUNA_AVAILABLE:
raise ImportError("Optuna required. Install: pip install optuna")
storage = optuna.storages.RDBStorage(f"sqlite:///{db_path}")
study = optuna.load_study(study_name=study_name, storage=storage)
# Get completed trials
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if not completed_trials:
raise ValueError(f"No completed trials in study {study_name}")
logger.info(f"Found {len(completed_trials)} completed trials")
# Get design variable names from first trial
design_var_names = list(completed_trials[0].params.keys())
# Determine objective structure
first_values = completed_trials[0].values
if first_values is None:
raise ValueError("Trials have no objective values")
# For multi-objective, values are [mass, frequency, ...]
# We also need user_attrs for constraints
# Collect data - filter out invalid samples
design_params_list = []
objectives_list = []
skipped = 0
for trial in completed_trials:
# Objectives - need mass, frequency, max_disp, max_stress
mass = trial.values[0] if len(trial.values) > 0 else 0.0
frequency = trial.values[1] if len(trial.values) > 1 else 0.0
# Get constraints from user_attrs
max_disp = trial.user_attrs.get('max_displacement', 0.0)
max_stress = trial.user_attrs.get('max_stress', 0.0)
# Note: frequency is stored as -freq for minimization, so convert back
# Also filter out inf values
objectives = [mass, -frequency, max_disp, max_stress]
# Skip invalid samples (inf, nan, or extreme values)
if any(np.isinf(v) or np.isnan(v) or v > 1e10 for v in objectives):
skipped += 1
continue
# Skip if frequency is negative (indicates error)
if -frequency <= 0:
skipped += 1
continue
# Design parameters
params = [trial.params.get(name, 0.0) for name in design_var_names]
design_params_list.append(params)
objectives_list.append(objectives)
if skipped > 0:
logger.info(f"Skipped {skipped} invalid samples")
if not design_params_list:
raise ValueError("No valid samples found after filtering")
design_params = np.array(design_params_list, dtype=np.float32)
objectives = np.array(objectives_list, dtype=np.float32)
objective_names = ['mass', 'frequency', 'max_displacement', 'max_stress']
logger.info(f"Extracted {len(design_params)} valid samples")
logger.info(f"Design vars: {design_var_names}")
logger.info(f"Objectives: {objective_names}")
return design_params, objectives, design_var_names, objective_names
def train_from_database(
db_path: str,
study_name: str,
epochs: int = 200,
batch_size: int = 32,
learning_rate: float = 0.001,
val_split: float = 0.2,
save_path: Optional[str] = None,
device: str = 'auto'
) -> SimpleSurrogate:
"""
Train SimpleSurrogate from Optuna database.
Args:
db_path: Path to SQLite database
study_name: Name of Optuna study
epochs: Training epochs
batch_size: Batch size
learning_rate: Learning rate
val_split: Validation split ratio
save_path: Optional path to save trained model
device: Computing device
Returns:
Trained SimpleSurrogate
"""
if not TORCH_AVAILABLE:
raise ImportError("PyTorch required")
# Set device
if device == 'auto':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
device = torch.device(device)
print(f"\n{'='*60}")
print("Training Simple MLP Surrogate from Database")
print(f"{'='*60}")
print(f"Device: {device}")
# Extract data
print(f"\n[1] Loading data from {db_path}")
design_params, objectives, design_var_names, objective_names = extract_data_from_database(
db_path, study_name
)
print(f" Samples: {len(design_params)}")
print(f" Design vars: {design_var_names}")
print(f" Objectives: {objective_names}")
# Compute normalization stats
design_mean = design_params.mean(axis=0)
design_std = design_params.std(axis=0)
objective_mean = objectives.mean(axis=0)
objective_std = objectives.std(axis=0)
print(f"\n Objective ranges:")
for i, name in enumerate(objective_names):
print(f" {name}: {objectives[:, i].min():.2f} - {objectives[:, i].max():.2f}")
# Normalize data
design_params_norm = (design_params - design_mean) / (design_std + 1e-8)
objectives_norm = (objectives - objective_mean) / (objective_std + 1e-8)
# Create dataset
dataset = FEADataset(design_params_norm, objectives_norm)
# Split into train/val
n_val = int(len(dataset) * val_split)
n_train = len(dataset) - n_val
train_dataset, val_dataset = random_split(dataset, [n_train, n_val])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
print(f"\n[2] Creating model")
print(f" Train samples: {n_train}")
print(f" Val samples: {n_val}")
# Create model
model = MLPModel(
n_inputs=len(design_var_names),
n_outputs=len(objective_names),
hidden_dims=[128, 256, 128, 64]
).to(device)
n_params = sum(p.numel() for p in model.parameters())
print(f" Model params: {n_params:,}")
# Training
print(f"\n[3] Training for {epochs} epochs")
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)
best_val_loss = float('inf')
best_state = None
for epoch in range(epochs):
# Train
model.train()
train_loss = 0.0
for x, y in train_loader:
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
pred = model(x)
loss = F.mse_loss(pred, y)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= len(train_loader)
# Validate
model.eval()
val_loss = 0.0
with torch.no_grad():
for x, y in val_loader:
x, y = x.to(device), y.to(device)
pred = model(x)
val_loss += F.mse_loss(pred, y).item()
val_loss /= len(val_loader)
scheduler.step()
# Track best
if val_loss < best_val_loss:
best_val_loss = val_loss
best_state = model.state_dict().copy()
# Log progress
if (epoch + 1) % 20 == 0 or epoch == 0:
print(f" Epoch {epoch+1:3d}: train_loss={train_loss:.6f}, val_loss={val_loss:.6f}")
# Load best model
model.load_state_dict(best_state)
print(f"\n Best val_loss: {best_val_loss:.6f}")
# Create surrogate
normalization = {
'design_mean': design_mean,
'design_std': design_std,
'objective_mean': objective_mean,
'objective_std': objective_std
}
surrogate = SimpleSurrogate(
model=model,
design_var_names=design_var_names,
objective_names=objective_names,
normalization=normalization,
device=str(device)
)
# Evaluate accuracy
print(f"\n[4] Evaluating accuracy on validation set")
model.eval()
all_preds = []
all_targets = []
with torch.no_grad():
for x, y in val_loader:
x = x.to(device)
pred = model(x).cpu().numpy()
all_preds.append(pred)
all_targets.append(y.numpy())
all_preds = np.concatenate(all_preds)
all_targets = np.concatenate(all_targets)
# Denormalize for error calculation
preds_denorm = all_preds * objective_std + objective_mean
targets_denorm = all_targets * objective_std + objective_mean
for i, name in enumerate(objective_names):
mae = np.abs(preds_denorm[:, i] - targets_denorm[:, i]).mean()
mape = (np.abs(preds_denorm[:, i] - targets_denorm[:, i]) / (np.abs(targets_denorm[:, i]) + 1e-8)).mean() * 100
print(f" {name}: MAE={mae:.4f}, MAPE={mape:.1f}%")
# Save if requested
if save_path:
surrogate.save(save_path)
print(f"\n{'='*60}")
print("Training complete!")
print(f"{'='*60}")
return surrogate
def create_simple_surrogate_for_study(
db_path: str = None,
study_name: str = None,
model_path: str = None,
project_root: Path = None
) -> Optional[SimpleSurrogate]:
"""
Factory function to create or load SimpleSurrogate for UAV arm study.
If model_path exists, loads the model. Otherwise trains from database.
Args:
db_path: Path to Optuna database
study_name: Name of study
model_path: Path to saved model (auto-detect if None)
project_root: Project root for auto-detection
Returns:
SimpleSurrogate instance or None
"""
if not TORCH_AVAILABLE:
logger.warning("PyTorch not available")
return None
# Auto-detect paths
if project_root is None:
project_root = Path(__file__).parent.parent
if model_path is None:
model_path = project_root / "simple_mlp_surrogate.pt"
else:
model_path = Path(model_path)
# Try to load existing model
if model_path.exists():
logger.info(f"Loading existing surrogate from {model_path}")
return SimpleSurrogate.load(model_path)
# Otherwise train from database
if db_path is None:
db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db"
else:
db_path = Path(db_path)
if study_name is None:
study_name = "uav_arm_atomizerfield_test"
if not db_path.exists():
logger.warning(f"Database not found: {db_path}")
return None
logger.info(f"Training surrogate from {db_path}")
return train_from_database(
db_path=str(db_path),
study_name=study_name,
save_path=str(model_path)
)
if __name__ == "__main__":
import sys
# Default paths
project_root = Path(__file__).parent.parent
db_path = project_root / "studies" / "uav_arm_atomizerfield_test" / "2_results" / "study.db"
model_path = project_root / "simple_mlp_surrogate.pt"
print("Simple MLP Surrogate Training")
print("="*60)
if not db_path.exists():
print(f"ERROR: Database not found: {db_path}")
sys.exit(1)
# Train
surrogate = train_from_database(
db_path=str(db_path),
study_name="uav_arm_atomizerfield_test",
epochs=300,
save_path=str(model_path)
)
# Test predictions
print("\n[5] Testing predictions")
test_params = {
'beam_half_core_thickness': 3.0,
'beam_face_thickness': 1.5,
'holes_diameter': 8.0,
'hole_count': 4
}
print(f" Input: {test_params}")
results = surrogate.predict(test_params)
print(f" Mass: {results['mass']:.2f} g")
print(f" Frequency: {results['frequency']:.2f} Hz")
print(f" Max Displacement: {results['max_displacement']:.6f} mm")
print(f" Max Stress: {results['max_stress']:.2f} MPa")
print(f" Inference time: {results['inference_time_ms']:.2f} ms")
# Test variation
print("\n[6] Testing variation with parameters")
for thickness in [1.0, 3.0, 5.0]:
params = {**test_params, 'beam_half_core_thickness': thickness}
r = surrogate.predict(params)
print(f" thickness={thickness}: mass={r['mass']:.0f}g, freq={r['frequency']:.2f}Hz")

View File

@@ -0,0 +1,433 @@
"""
Strategy Portfolio Manager - Dynamic multi-strategy optimization.
This module manages dynamic switching between optimization strategies during a run.
It detects stagnation, evaluates alternative strategies, and orchestrates transitions
to maintain optimization progress.
Part of Protocol 10: Intelligent Multi-Strategy Optimization (IMSO)
"""
import numpy as np
import optuna
from typing import Dict, List, Optional, Tuple
import json
from pathlib import Path
from dataclasses import dataclass, asdict
from datetime import datetime
@dataclass
class StrategyPerformance:
"""Track performance metrics for a strategy."""
strategy_name: str
trials_used: int
best_value_achieved: float
improvement_rate: float # Improvement per trial
last_used_trial: int
avg_trial_time: float = 0.0
class StrategyTransitionManager:
"""
Manages transitions between optimization strategies.
Implements intelligent strategy switching based on:
1. Stagnation detection
2. Landscape characteristics
3. Strategy performance history
4. User-defined transition rules
"""
def __init__(
self,
stagnation_window: int = 10,
min_improvement_threshold: float = 0.001,
verbose: bool = True,
tracking_dir: Optional[Path] = None
):
"""
Args:
stagnation_window: Number of trials to check for stagnation
min_improvement_threshold: Minimum relative improvement to avoid stagnation
verbose: Print transition decisions
tracking_dir: Directory to save transition logs
"""
self.stagnation_window = stagnation_window
self.min_improvement = min_improvement_threshold
self.verbose = verbose
self.tracking_dir = tracking_dir
# Track strategy performance
self.strategy_history: Dict[str, StrategyPerformance] = {}
self.current_strategy: Optional[str] = None
self.transition_history: List[Dict] = []
# Initialize tracking files
if tracking_dir:
self.tracking_dir = Path(tracking_dir)
self.tracking_dir.mkdir(parents=True, exist_ok=True)
self.transition_log_file = self.tracking_dir / "strategy_transitions.json"
self.performance_log_file = self.tracking_dir / "strategy_performance.json"
# Load existing history
self._load_transition_history()
def should_switch_strategy(
self,
study: optuna.Study,
landscape: Optional[Dict] = None
) -> Tuple[bool, str]:
"""
Determine if strategy should be switched.
Args:
study: Optuna study
landscape: Current landscape analysis (optional)
Returns:
(should_switch, reason)
"""
completed_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if len(completed_trials) < self.stagnation_window:
return False, "Insufficient trials for stagnation analysis"
# Check for stagnation in recent trials
recent_trials = completed_trials[-self.stagnation_window:]
is_stagnant, stagnation_reason = self._detect_stagnation(recent_trials)
if is_stagnant:
return True, stagnation_reason
# Check if landscape changed (would require re-analysis)
if landscape and self._landscape_changed(landscape):
return True, "Landscape characteristics changed - re-evaluating strategy"
# Check if current strategy hit its theoretical limit
if self._strategy_exhausted(study, landscape):
return True, "Current strategy reached convergence limit"
return False, "Strategy performing adequately"
def _detect_stagnation(self, recent_trials: List) -> Tuple[bool, str]:
"""
Detect if optimization has stagnated.
Stagnation indicators:
1. No improvement in best value
2. High variance in recent objectives (thrashing)
3. Repeated similar parameter configurations
[Protocol 11] Multi-objective NOT supported - stagnation detection
requires a single objective value. Skip for multi-objective studies.
"""
if len(recent_trials) < 3:
return False, ""
# [Protocol 11] Skip stagnation detection for multi-objective
# Multi-objective has a Pareto front, not a single "best value"
if recent_trials and recent_trials[0].values is not None:
# Multi-objective trial (has .values instead of .value)
return False, "[Protocol 11] Stagnation detection skipped for multi-objective"
recent_values = [t.value for t in recent_trials]
# 1. Check for improvement in best value
best_values = []
current_best = float('inf')
for value in recent_values:
current_best = min(current_best, value)
best_values.append(current_best)
# Calculate improvement over window
if len(best_values) >= 2:
initial_best = best_values[0]
final_best = best_values[-1]
if initial_best > 0:
relative_improvement = (initial_best - final_best) / initial_best
else:
relative_improvement = abs(final_best - initial_best)
if relative_improvement < self.min_improvement:
return True, f"Stagnation detected: <{self.min_improvement:.1%} improvement in {self.stagnation_window} trials"
# 2. Check for thrashing (high variance without improvement)
recent_variance = np.var(recent_values)
recent_mean = np.mean(recent_values)
if recent_mean > 0:
coefficient_of_variation = np.sqrt(recent_variance) / recent_mean
if coefficient_of_variation > 0.3: # High variance
# If high variance but no improvement, we're thrashing
if best_values[0] == best_values[-1]:
return True, f"Thrashing detected: High variance ({coefficient_of_variation:.2f}) without improvement"
return False, ""
def _landscape_changed(self, landscape: Dict) -> bool:
"""
Detect if landscape characteristics changed significantly.
This would indicate we're in a different region of search space.
"""
# This is a placeholder - would need to track landscape history
# For now, return False (no change detection)
return False
def _strategy_exhausted(
self,
study: optuna.Study,
landscape: Optional[Dict]
) -> bool:
"""
Check if current strategy has reached its theoretical limit.
Different strategies have different convergence properties:
- CMA-ES: Fast convergence but can get stuck in local minimum
- TPE: Slower convergence but better global exploration
- GP-BO: Sample efficient but plateaus after exploration
"""
if not self.current_strategy or not landscape:
return False
# CMA-ES exhaustion: High convergence in smooth landscape
if self.current_strategy == 'cmaes':
if landscape.get('smoothness', 0) > 0.7:
# Check if we've converged (low variance in recent trials)
completed = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]
if len(completed) >= 20:
recent_params = []
for trial in completed[-10:]:
recent_params.append(list(trial.params.values()))
recent_params = np.array(recent_params)
param_variance = np.var(recent_params, axis=0)
# If variance is very low, CMA-ES has converged
if np.all(param_variance < 0.01):
return True
return False
def record_strategy_performance(
self,
strategy_name: str,
study: optuna.Study,
trial: optuna.trial.FrozenTrial
):
"""Record performance metrics for current strategy."""
if strategy_name not in self.strategy_history:
self.strategy_history[strategy_name] = StrategyPerformance(
strategy_name=strategy_name,
trials_used=0,
best_value_achieved=float('inf'),
improvement_rate=0.0,
last_used_trial=0
)
perf = self.strategy_history[strategy_name]
perf.trials_used += 1
perf.best_value_achieved = min(perf.best_value_achieved, trial.value)
perf.last_used_trial = trial.number
# Calculate improvement rate
if perf.trials_used > 1:
initial_best = study.trials[max(0, trial.number - perf.trials_used)].value
perf.improvement_rate = (initial_best - perf.best_value_achieved) / perf.trials_used
def execute_strategy_switch(
self,
study: optuna.Study,
from_strategy: str,
to_strategy: str,
reason: str,
trial_number: int
):
"""
Execute strategy switch and log the transition.
Args:
study: Optuna study
from_strategy: Current strategy
to_strategy: New strategy to switch to
reason: Reason for switching
trial_number: Current trial number
"""
transition_event = {
'trial_number': trial_number,
'from_strategy': from_strategy,
'to_strategy': to_strategy,
'reason': reason,
'best_value_at_switch': study.best_value,
'total_trials': len(study.trials),
'timestamp': datetime.now().isoformat()
}
self.transition_history.append(transition_event)
self.current_strategy = to_strategy
# Save transition log
if self.tracking_dir:
try:
with open(self.transition_log_file, 'w') as f:
json.dump(self.transition_history, f, indent=2)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to save transition log: {e}")
if self.verbose:
self._print_transition(transition_event)
def _print_transition(self, event: Dict):
"""Print formatted transition announcement."""
print(f"\n{'='*70}")
print(f" STRATEGY TRANSITION")
print(f"{'='*70}")
print(f" Trial #{event['trial_number']}")
print(f" {event['from_strategy'].upper()} -> {event['to_strategy'].upper()}")
print(f" Reason: {event['reason']}")
print(f" Best value at transition: {event['best_value_at_switch']:.6f}")
print(f"{'='*70}\n")
def _load_transition_history(self):
"""Load existing transition history from file."""
if self.transition_log_file and self.transition_log_file.exists():
try:
with open(self.transition_log_file, 'r') as f:
self.transition_history = json.load(f)
# Restore current strategy from history
if self.transition_history:
self.current_strategy = self.transition_history[-1]['to_strategy']
except Exception as e:
if self.verbose:
print(f" Warning: Failed to load transition history: {e}")
def save_performance_summary(self):
"""Save strategy performance summary to file."""
if not self.tracking_dir:
return
summary = {
'strategies': {
name: asdict(perf)
for name, perf in self.strategy_history.items()
},
'current_strategy': self.current_strategy,
'total_transitions': len(self.transition_history)
}
try:
with open(self.performance_log_file, 'w') as f:
json.dump(summary, f, indent=2)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to save performance summary: {e}")
def get_performance_report(self) -> str:
"""Generate human-readable performance report."""
if not self.strategy_history:
return "No strategy performance data available"
report = "\n" + "="*70 + "\n"
report += " STRATEGY PERFORMANCE SUMMARY\n"
report += "="*70 + "\n"
for name, perf in self.strategy_history.items():
report += f"\n {name.upper()}:\n"
report += f" Trials used: {perf.trials_used}\n"
report += f" Best value: {perf.best_value_achieved:.6f}\n"
report += f" Improvement rate: {perf.improvement_rate:.6f} per trial\n"
report += f" Last used: Trial #{perf.last_used_trial}\n"
if self.transition_history:
report += f"\n TRANSITIONS: {len(self.transition_history)}\n"
for event in self.transition_history:
report += f" Trial #{event['trial_number']}: "
report += f"{event['from_strategy']}{event['to_strategy']}\n"
report += f" Reason: {event['reason']}\n"
report += "="*70 + "\n"
return report
class AdaptiveStrategyCallback:
"""
Optuna callback that manages adaptive strategy switching.
This callback integrates with the IntelligentOptimizer to:
1. Monitor strategy performance
2. Detect when switching is needed
3. Coordinate with landscape analyzer and strategy selector
4. Execute transitions
"""
def __init__(
self,
transition_manager: StrategyTransitionManager,
landscape_analyzer,
strategy_selector,
reanalysis_interval: int = 15
):
"""
Args:
transition_manager: StrategyTransitionManager instance
landscape_analyzer: LandscapeAnalyzer instance
strategy_selector: IntelligentStrategySelector instance
reanalysis_interval: How often to re-analyze landscape
"""
self.transition_manager = transition_manager
self.landscape_analyzer = landscape_analyzer
self.strategy_selector = strategy_selector
self.reanalysis_interval = reanalysis_interval
self.last_landscape = None
self.last_recommendation = None
def __call__(self, study: optuna.Study, trial: optuna.trial.FrozenTrial):
"""Called after each trial completes."""
if trial.state != optuna.trial.TrialState.COMPLETE:
return
current_strategy = self.transition_manager.current_strategy
# Record performance
if current_strategy:
self.transition_manager.record_strategy_performance(
current_strategy, study, trial
)
# Periodically re-analyze landscape
if trial.number % self.reanalysis_interval == 0:
self.last_landscape = self.landscape_analyzer.analyze(study)
# Check if we should switch
should_switch, reason = self.transition_manager.should_switch_strategy(
study, self.last_landscape
)
if should_switch and self.last_landscape:
# Get new strategy recommendation
new_strategy, details = self.strategy_selector.recommend_strategy(
landscape=self.last_landscape,
trials_completed=trial.number,
current_best_value=study.best_value
)
# Only switch if recommendation is different
if new_strategy != current_strategy:
self.transition_manager.execute_strategy_switch(
study=study,
from_strategy=current_strategy or 'initial',
to_strategy=new_strategy,
reason=reason,
trial_number=trial.number
)
# Note: Actual sampler change requires study recreation
# This is logged for the IntelligentOptimizer to act on
self.last_recommendation = (new_strategy, details)

View File

@@ -0,0 +1,419 @@
"""
Strategy Selector - Intelligent optimization strategy recommendation.
This module implements decision logic to recommend the best optimization strategy
based on landscape characteristics. Uses expert knowledge and empirical heuristics
to match problem types to appropriate algorithms.
Part of Protocol 10: Intelligent Multi-Strategy Optimization (IMSO)
"""
import optuna
from typing import Dict, Optional, Tuple
import json
from pathlib import Path
class IntelligentStrategySelector:
"""
Selects optimal optimization strategy based on problem characteristics.
Decision tree combines:
1. Landscape analysis (smoothness, multimodality, noise)
2. Problem dimensionality
3. Trial budget and evaluation cost
4. Historical performance data (if available)
"""
def __init__(self, verbose: bool = True):
"""
Args:
verbose: Print recommendation explanations
"""
self.verbose = verbose
self.recommendation_history = []
def recommend_strategy(
self,
landscape: Dict,
trials_completed: int = 0,
trials_budget: Optional[int] = None,
current_best_value: Optional[float] = None
) -> Tuple[str, Dict]:
"""
Recommend optimization strategy based on problem characteristics.
Args:
landscape: Output from LandscapeAnalyzer.analyze()
trials_completed: Number of trials completed so far
trials_budget: Total trial budget (if known)
current_best_value: Current best objective value
Returns:
(strategy_name, recommendation_details)
strategy_name: One of ['tpe', 'cmaes', 'gp_bo', 'random', 'hybrid_gp_cmaes']
recommendation_details: Dict with confidence, reasoning, and sampler config
"""
# Handle None landscape (multi-objective optimization)
if landscape is None:
# Multi-objective: Use NSGA-II/NSGA-III based on trial count
return self._recommend_multiobjective_strategy(trials_completed)
if not landscape.get('ready', False):
# Not enough data, use random exploration
return self._recommend_random_exploration(trials_completed)
# Extract key characteristics
landscape_type = landscape.get('landscape_type', 'unknown')
smoothness = landscape.get('smoothness', 0.5)
multimodal = landscape.get('multimodal', False)
noise_level = landscape.get('noise_level', 0.0)
dimensionality = landscape.get('dimensionality', 2)
correlation_strength = landscape['parameter_correlation'].get('overall_strength', 0.3)
# Use characterization trial count for strategy decisions (not total trials)
# This prevents premature algorithm selection when many trials were pruned
char_trials = landscape.get('total_trials', trials_completed)
# Decision tree for strategy selection
strategy, details = self._apply_decision_tree(
landscape_type=landscape_type,
smoothness=smoothness,
multimodal=multimodal,
noise_level=noise_level,
dimensionality=dimensionality,
correlation_strength=correlation_strength,
trials_completed=char_trials # Use characterization trials, not total
)
# Add landscape info to recommendation
details['landscape_analysis'] = {
'type': landscape_type,
'smoothness': smoothness,
'multimodal': multimodal,
'dimensionality': dimensionality
}
# Log recommendation
self._log_recommendation(strategy, details, trials_completed)
if self.verbose:
self._print_recommendation(strategy, details)
return strategy, details
def _apply_decision_tree(
self,
landscape_type: str,
smoothness: float,
multimodal: bool,
noise_level: float,
dimensionality: int,
correlation_strength: float,
trials_completed: int
) -> Tuple[str, Dict]:
"""
Apply expert decision tree for strategy selection.
Decision logic based on optimization algorithm strengths:
CMA-ES:
- Best for: Smooth unimodal landscapes, correlated parameters
- Strengths: Fast local convergence, handles parameter correlations
- Weaknesses: Poor for multimodal, needs reasonable initialization
GP-BO (Gaussian Process Bayesian Optimization):
- Best for: Smooth landscapes, expensive evaluations, low-dimensional
- Strengths: Sample efficient, good uncertainty quantification
- Weaknesses: Scales poorly >10D, expensive surrogate training
TPE (Tree-structured Parzen Estimator):
- Best for: General purpose, multimodal, moderate dimensional
- Strengths: Handles multimodality, scales to ~50D, robust
- Weaknesses: Slower convergence than CMA-ES on smooth problems
Hybrid GP→CMA-ES:
- Best for: Smooth landscapes needing global+local search
- Strengths: GP finds basin, CMA-ES refines locally
- Weaknesses: More complex, needs transition logic
"""
# CASE 1: High noise - use robust methods
if noise_level > 0.5:
return 'tpe', {
'confidence': 0.85,
'reasoning': 'High noise detected - TPE is more robust to noisy evaluations',
'sampler_config': {
'type': 'TPESampler',
'params': {
'multivariate': True,
'n_startup_trials': 15, # More exploration for noisy problems
'n_ei_candidates': 24
}
}
}
# CASE 2: Smooth unimodal with strong correlation - CMA-ES excels
if landscape_type == 'smooth_unimodal' and correlation_strength > 0.5:
return 'cmaes', {
'confidence': 0.92,
'reasoning': f'Smooth unimodal landscape with strong parameter correlation ({correlation_strength:.2f}) - CMA-ES will converge quickly',
'sampler_config': {
'type': 'CmaEsSampler',
'params': {
'restart_strategy': 'ipop', # Increasing population restart
'with_margin': True # Use margin for constraint handling
}
}
}
# CASE 3: Smooth but multimodal - Hybrid GP→CMA-ES or GP-BO
if landscape_type == 'smooth_multimodal':
if dimensionality <= 5 and trials_completed < 30:
# Early stage: GP-BO for exploration
return 'gp_bo', {
'confidence': 0.78,
'reasoning': f'Smooth multimodal landscape, {dimensionality}D - GP-BO for intelligent exploration, plan CMA-ES refinement later',
'sampler_config': {
'type': 'GPSampler', # Custom implementation needed
'params': {
'acquisition': 'EI', # Expected Improvement
'n_initial_points': 10
}
},
'transition_plan': {
'switch_to': 'cmaes',
'when': 'error < 1.0 OR trials > 40'
}
}
else:
# Later stage or higher dimensional: TPE
return 'tpe', {
'confidence': 0.75,
'reasoning': f'Smooth multimodal landscape - TPE handles multiple modes well',
'sampler_config': {
'type': 'TPESampler',
'params': {
'multivariate': True,
'n_startup_trials': 10,
'n_ei_candidates': 32 # More exploitation
}
}
}
# CASE 4: Smooth unimodal, low-dimensional, expensive - GP-BO then CMA-ES
if landscape_type == 'smooth_unimodal' and dimensionality <= 5:
if trials_completed < 25:
return 'gp_bo', {
'confidence': 0.82,
'reasoning': f'Smooth {dimensionality}D landscape - GP-BO for sample-efficient exploration',
'sampler_config': {
'type': 'GPSampler',
'params': {
'acquisition': 'EI',
'n_initial_points': 8
}
},
'transition_plan': {
'switch_to': 'cmaes',
'when': 'error < 2.0 OR trials > 25'
}
}
else:
# Switch to CMA-ES for final refinement
return 'cmaes', {
'confidence': 0.88,
'reasoning': 'Switching to CMA-ES for final local refinement',
'sampler_config': {
'type': 'CmaEsSampler',
'params': {
'restart_strategy': 'ipop',
'with_margin': True
}
}
}
# CASE 5: Rugged multimodal - TPE is most robust
if landscape_type == 'rugged_multimodal' or multimodal:
return 'tpe', {
'confidence': 0.80,
'reasoning': 'Rugged/multimodal landscape - TPE is robust to multiple local optima',
'sampler_config': {
'type': 'TPESampler',
'params': {
'multivariate': True,
'n_startup_trials': 12,
'n_ei_candidates': 24
}
}
}
# CASE 6: Rugged unimodal - TPE with more exploration
if landscape_type == 'rugged_unimodal':
return 'tpe', {
'confidence': 0.72,
'reasoning': 'Rugged landscape - TPE with extended exploration',
'sampler_config': {
'type': 'TPESampler',
'params': {
'multivariate': True,
'n_startup_trials': 15,
'n_ei_candidates': 20
}
}
}
# CASE 7: High dimensional (>5D) - TPE scales best
if dimensionality > 5:
return 'tpe', {
'confidence': 0.77,
'reasoning': f'High dimensionality ({dimensionality}D) - TPE scales well to moderate dimensions',
'sampler_config': {
'type': 'TPESampler',
'params': {
'multivariate': True,
'n_startup_trials': min(20, dimensionality * 3),
'n_ei_candidates': 24
}
}
}
# DEFAULT: TPE as safe general-purpose choice
return 'tpe', {
'confidence': 0.65,
'reasoning': 'Default robust strategy - TPE works well for most problems',
'sampler_config': {
'type': 'TPESampler',
'params': {
'multivariate': True,
'n_startup_trials': 10,
'n_ei_candidates': 24
}
}
}
def _recommend_random_exploration(self, trials_completed: int) -> Tuple[str, Dict]:
"""Recommend random exploration when insufficient data for analysis."""
return 'random', {
'confidence': 1.0,
'reasoning': f'Insufficient data ({trials_completed} trials) - using random exploration for landscape characterization',
'sampler_config': {
'type': 'RandomSampler',
'params': {}
}
}
def _recommend_multiobjective_strategy(self, trials_completed: int) -> Tuple[str, Dict]:
"""
Recommend strategy for multi-objective optimization.
For multi-objective problems, landscape analysis is not applicable.
Use NSGA-II (default) or TPE with multivariate support.
"""
# Start with random for initial exploration
if trials_completed < 8:
return 'random', {
'confidence': 1.0,
'reasoning': f'Multi-objective: Random exploration for initial {trials_completed}/8 trials',
'sampler_config': {
'type': 'RandomSampler',
'params': {}
}
}
# After initial exploration, use TPE with multivariate support
# (NSGA-II sampler is already used at study creation level)
return 'tpe', {
'confidence': 0.95,
'reasoning': f'Multi-objective: TPE with multivariate support for Pareto front exploration ({trials_completed} trials)',
'sampler_config': {
'type': 'TPESampler',
'params': {
'multivariate': True,
'n_startup_trials': 8,
'n_ei_candidates': 24,
'constant_liar': True # Better for parallel multi-objective
}
}
}
def _log_recommendation(self, strategy: str, details: Dict, trial_number: int):
"""Log recommendation for learning and transfer."""
self.recommendation_history.append({
'trial_number': trial_number,
'strategy': strategy,
'confidence': details.get('confidence', 0.0),
'reasoning': details.get('reasoning', ''),
'landscape': details.get('landscape_analysis', {})
})
def _print_recommendation(self, strategy: str, details: Dict):
"""Print formatted recommendation."""
print(f"\n{'='*70}")
print(f" STRATEGY RECOMMENDATION")
print(f"{'='*70}")
print(f" Recommended: {strategy.upper()}")
print(f" Confidence: {details['confidence']:.1%}")
print(f" Reasoning: {details['reasoning']}")
if 'transition_plan' in details:
plan = details['transition_plan']
print(f"\n TRANSITION PLAN:")
print(f" Switch to: {plan['switch_to'].upper()}")
print(f" When: {plan['when']}")
print(f"{'='*70}\n")
def save_recommendation_history(self, filepath: Path):
"""Save recommendation history to JSON for learning."""
try:
with open(filepath, 'w') as f:
json.dump(self.recommendation_history, f, indent=2)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to save recommendation history: {e}")
def load_recommendation_history(self, filepath: Path):
"""Load previous recommendation history."""
try:
if filepath.exists():
with open(filepath, 'r') as f:
self.recommendation_history = json.load(f)
except Exception as e:
if self.verbose:
print(f" Warning: Failed to load recommendation history: {e}")
def create_sampler_from_config(config: Dict) -> optuna.samplers.BaseSampler:
"""
Create Optuna sampler from configuration dictionary.
Args:
config: Sampler configuration from strategy recommendation
Returns:
Configured Optuna sampler
"""
sampler_type = config.get('type', 'TPESampler')
params = config.get('params', {})
if sampler_type == 'TPESampler':
return optuna.samplers.TPESampler(**params)
elif sampler_type == 'CmaEsSampler':
return optuna.samplers.CmaEsSampler(**params)
elif sampler_type == 'RandomSampler':
return optuna.samplers.RandomSampler(**params)
elif sampler_type == 'GPSampler':
# GP-BO not directly available in Optuna
# Would need custom implementation or use skopt integration
print(" Warning: GP-BO sampler not yet implemented, falling back to TPE")
return optuna.samplers.TPESampler(multivariate=True, n_startup_trials=10)
else:
# Default fallback
print(f" Warning: Unknown sampler type {sampler_type}, using TPE")
return optuna.samplers.TPESampler(multivariate=True, n_startup_trials=10)

View File

@@ -0,0 +1,264 @@
"""
Study Continuation - Standard utility for continuing existing optimization studies.
This module provides a standardized way to continue optimization studies with
additional trials, preserving all existing trial data and learned knowledge.
Usage:
from optimization_engine.study_continuation import continue_study
continue_study(
study_dir=Path("studies/my_study"),
additional_trials=50,
objective_function=my_objective,
design_variables={'param1': (0, 10), 'param2': (0, 100)}
)
This is an Atomizer standard feature that should be exposed in the dashboard
alongside "Start New Optimization".
"""
import optuna
import json
from pathlib import Path
from typing import Dict, Tuple, Callable, Optional
def continue_study(
study_dir: Path,
additional_trials: int,
objective_function: Callable,
design_variables: Optional[Dict[str, Tuple[float, float]]] = None,
target_value: Optional[float] = None,
tolerance: Optional[float] = None,
verbose: bool = True
) -> Dict:
"""
Continue an existing optimization study with additional trials.
Args:
study_dir: Path to study directory containing 1_setup and 2_results
additional_trials: Number of additional trials to run
objective_function: Objective function to optimize (same as original)
design_variables: Optional dict of design variable bounds (for reference)
target_value: Optional target value for early stopping
tolerance: Optional tolerance for target achievement
verbose: Print progress information
Returns:
Dict containing:
- 'study': The Optuna study object
- 'total_trials': Total number of trials after continuation
- 'successful_trials': Number of successful trials
- 'pruned_trials': Number of pruned trials
- 'best_value': Best objective value achieved
- 'best_params': Best parameters found
- 'target_achieved': Whether target was achieved (if specified)
Raises:
FileNotFoundError: If study database doesn't exist
ValueError: If study name cannot be determined
"""
# Setup paths
setup_dir = study_dir / "1_setup"
results_dir = study_dir / "2_results"
history_file = results_dir / "optimization_history_incremental.json"
# Load workflow config to get study name
workflow_file = setup_dir / "workflow_config.json"
if not workflow_file.exists():
raise FileNotFoundError(
f"Workflow config not found: {workflow_file}. "
f"Make sure this is a valid study directory."
)
with open(workflow_file) as f:
workflow = json.load(f)
study_name = workflow.get('study_name')
if not study_name:
raise ValueError("Study name not found in workflow_config.json")
# Load existing study
storage = f"sqlite:///{results_dir / 'study.db'}"
try:
study = optuna.load_study(study_name=study_name, storage=storage)
except KeyError:
raise FileNotFoundError(
f"Study '{study_name}' not found in database. "
f"Run the initial optimization first using run_optimization.py"
)
# Get current state
current_trials = len(study.trials)
current_best = study.best_value if study.best_trial else None
if verbose:
print("\n" + "="*70)
print(" CONTINUING OPTIMIZATION STUDY")
print("="*70)
print(f"\n Study: {study_name}")
print(f" Current trials: {current_trials}")
if current_best is not None:
print(f" Current best: {current_best:.6f}")
print(f" Best params:")
for param, value in study.best_params.items():
print(f" {param}: {value:.4f}")
print(f"\n Adding {additional_trials} trials...\n")
# Continue optimization
study.optimize(
objective_function,
n_trials=additional_trials,
timeout=None,
catch=(Exception,) # Catch exceptions to allow graceful continuation
)
# Analyze results
total_trials = len(study.trials)
successful_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])
pruned_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED])
results = {
'study': study,
'total_trials': total_trials,
'successful_trials': successful_trials,
'pruned_trials': pruned_trials,
'best_value': study.best_value,
'best_params': study.best_params,
}
# Check target achievement if specified
if target_value is not None and tolerance is not None:
target_achieved = abs(study.best_value - target_value) <= tolerance
results['target_achieved'] = target_achieved
if verbose:
print("\n" + "="*70)
print(" CONTINUATION COMPLETE")
print("="*70)
print(f" Total trials: {total_trials} (added {additional_trials})")
print(f" Successful: {successful_trials}")
print(f" Pruned: {pruned_trials}")
print(f" Pruning rate: {pruned_trials/total_trials*100:.1f}%")
print(f"\n Best value: {study.best_value:.6f}")
print(f" Best params:")
for param, value in study.best_params.items():
print(f" {param}: {value:.4f}")
if target_value is not None and tolerance is not None:
target_achieved = results.get('target_achieved', False)
print(f"\n Target: {target_value} ± {tolerance}")
print(f" Target achieved: {'YES' if target_achieved else 'NO'}")
print("="*70 + "\n")
return results
def can_continue_study(study_dir: Path) -> Tuple[bool, str]:
"""
Check if a study can be continued.
Args:
study_dir: Path to study directory
Returns:
(can_continue, message): Tuple of bool and explanation message
"""
setup_dir = study_dir / "1_setup"
results_dir = study_dir / "2_results"
# Check if workflow config exists
workflow_file = setup_dir / "workflow_config.json"
if not workflow_file.exists():
return False, f"No workflow_config.json found in {setup_dir}"
# Load study name
try:
with open(workflow_file) as f:
workflow = json.load(f)
study_name = workflow.get('study_name')
if not study_name:
return False, "No study_name in workflow_config.json"
except Exception as e:
return False, f"Error reading workflow config: {e}"
# Check if database exists
db_file = results_dir / "study.db"
if not db_file.exists():
return False, f"No study.db found. Run initial optimization first."
# Try to load study
try:
storage = f"sqlite:///{db_file}"
study = optuna.load_study(study_name=study_name, storage=storage)
trial_count = len(study.trials)
if trial_count == 0:
return False, "Study exists but has no trials yet"
return True, f"Study '{study_name}' ready (current trials: {trial_count})"
except KeyError:
return False, f"Study '{study_name}' not found in database"
except Exception as e:
return False, f"Error loading study: {e}"
def get_study_status(study_dir: Path) -> Optional[Dict]:
"""
Get current status of a study.
Args:
study_dir: Path to study directory
Returns:
Dict with study status info, or None if study doesn't exist
{
'study_name': str,
'total_trials': int,
'successful_trials': int,
'pruned_trials': int,
'best_value': float,
'best_params': dict
}
"""
can_continue, message = can_continue_study(study_dir)
if not can_continue:
return None
setup_dir = study_dir / "1_setup"
results_dir = study_dir / "2_results"
# Load study
with open(setup_dir / "workflow_config.json") as f:
workflow = json.load(f)
study_name = workflow['study_name']
storage = f"sqlite:///{results_dir / 'study.db'}"
try:
study = optuna.load_study(study_name=study_name, storage=storage)
total_trials = len(study.trials)
successful_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])
pruned_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED])
return {
'study_name': study_name,
'total_trials': total_trials,
'successful_trials': successful_trials,
'pruned_trials': pruned_trials,
'pruning_rate': pruned_trials / total_trials if total_trials > 0 else 0,
'best_value': study.best_value if study.best_trial else None,
'best_params': study.best_params if study.best_trial else None
}
except Exception:
return None

View File

@@ -0,0 +1,385 @@
"""
Training Data Exporter for AtomizerField
This module exports training data from Atomizer optimization runs for AtomizerField neural network training.
It saves NX Nastran input (.dat) and output (.op2) files along with metadata for each trial.
Usage:
from optimization_engine.training_data_exporter import create_exporter_from_config
exporter = create_exporter_from_config(config)
if exporter:
exporter.export_trial(trial_number, design_vars, results, simulation_files)
exporter.finalize()
"""
import json
import shutil
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional, List
import logging
logger = logging.getLogger(__name__)
class TrainingDataExporter:
"""
Exports training data for AtomizerField neural network training.
After each FEA solve, saves:
- Input: NX Nastran .dat file (BDF format)
- Output: NX Nastran .op2 file (binary results)
- Metadata: JSON with design parameters, objectives, constraints
Directory structure:
{export_dir}/
├── trial_0001/
│ ├── input/
│ │ └── model.bdf
│ ├── output/
│ │ └── model.op2
│ └── metadata.json
├── trial_0002/
│ └── ...
├── study_summary.json
└── README.md
"""
def __init__(
self,
export_dir: Path,
study_name: str,
design_variable_names: List[str],
objective_names: List[str],
constraint_names: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None
):
"""
Initialize the training data exporter.
Args:
export_dir: Root directory for exported training data
study_name: Name of the optimization study
design_variable_names: List of design variable names
objective_names: List of objective function names
constraint_names: List of constraint names (optional)
metadata: Additional study metadata (optional)
"""
self.export_dir = Path(export_dir)
self.study_name = study_name
self.design_variable_names = design_variable_names
self.objective_names = objective_names
self.constraint_names = constraint_names or []
self.study_metadata = metadata or {}
self.trial_count = 0
self.exported_trials: List[Dict[str, Any]] = []
# Create root export directory
self.export_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Training data exporter initialized: {self.export_dir}")
# Create README
self._create_readme()
def export_trial(
self,
trial_number: int,
design_variables: Dict[str, float],
results: Dict[str, Any],
simulation_files: Dict[str, Path]
) -> bool:
"""
Export training data for a single trial.
Args:
trial_number: Optuna trial number
design_variables: Dict of design parameter names and values
results: Dict containing objectives, constraints, and other results
simulation_files: Dict with paths to 'dat_file' and 'op2_file'
Returns:
True if export successful, False otherwise
"""
try:
# Create trial directory
trial_dir = self.export_dir / f"trial_{trial_number:04d}"
trial_dir.mkdir(parents=True, exist_ok=True)
input_dir = trial_dir / "input"
output_dir = trial_dir / "output"
input_dir.mkdir(exist_ok=True)
output_dir.mkdir(exist_ok=True)
# Copy .dat file (Nastran input deck)
dat_file = simulation_files.get('dat_file')
if dat_file and Path(dat_file).exists():
shutil.copy2(dat_file, input_dir / "model.bdf")
logger.debug(f"Copied .dat file: {dat_file} -> {input_dir / 'model.bdf'}")
else:
logger.warning(f"Trial {trial_number}: .dat file not found at {dat_file}")
return False
# Copy .op2 file (Nastran binary results)
op2_file = simulation_files.get('op2_file')
if op2_file and Path(op2_file).exists():
shutil.copy2(op2_file, output_dir / "model.op2")
logger.debug(f"Copied .op2 file: {op2_file} -> {output_dir / 'model.op2'}")
else:
logger.warning(f"Trial {trial_number}: .op2 file not found at {op2_file}")
return False
# Create metadata.json
metadata = self._create_trial_metadata(
trial_number,
design_variables,
results
)
metadata_path = trial_dir / "metadata.json"
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
logger.info(f"Exported training data for trial {trial_number}")
self.trial_count += 1
self.exported_trials.append(metadata)
return True
except Exception as e:
logger.error(f"Failed to export trial {trial_number}: {e}", exc_info=True)
return False
def _create_trial_metadata(
self,
trial_number: int,
design_variables: Dict[str, float],
results: Dict[str, Any]
) -> Dict[str, Any]:
"""
Create metadata dictionary for a trial.
Args:
trial_number: Optuna trial number
design_variables: Design parameter values
results: Optimization results
Returns:
Metadata dictionary
"""
metadata = {
"trial_number": trial_number,
"timestamp": datetime.now().isoformat(),
"atomizer_study": self.study_name,
"design_parameters": design_variables,
"results": {}
}
# Extract objectives
if "objectives" in results:
metadata["results"]["objectives"] = results["objectives"]
# Extract constraints
if "constraints" in results:
metadata["results"]["constraints"] = results["constraints"]
# Extract any scalar results (max_stress, max_displacement, etc.)
for key in ["max_stress", "max_displacement", "mass", "volume"]:
if key in results:
metadata["results"][key] = results[key]
# Add any additional result fields
for key, value in results.items():
if key not in ["objectives", "constraints"] and isinstance(value, (int, float, str, bool)):
metadata["results"][key] = value
return metadata
def finalize(self) -> None:
"""
Finalize the training data export.
Creates study_summary.json with metadata about the entire study.
"""
try:
summary = {
"study_name": self.study_name,
"total_trials": self.trial_count,
"design_variables": self.design_variable_names,
"objectives": self.objective_names,
"constraints": self.constraint_names,
"export_timestamp": datetime.now().isoformat(),
"metadata": self.study_metadata
}
summary_path = self.export_dir / "study_summary.json"
with open(summary_path, 'w') as f:
json.dump(summary, f, indent=2)
logger.info(f"Training data export finalized: {self.trial_count} trials exported")
logger.info(f"Summary saved to: {summary_path}")
except Exception as e:
logger.error(f"Failed to finalize training data export: {e}", exc_info=True)
def _create_readme(self) -> None:
"""Create README.md explaining the training data structure."""
readme_content = f"""# AtomizerField Training Data
**Study Name**: {self.study_name}
**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
## Directory Structure
```
{self.export_dir.name}/
├── trial_0001/
│ ├── input/
│ │ └── model.bdf # NX Nastran input deck (BDF format)
│ ├── output/
│ │ └── model.op2 # NX Nastran binary results (OP2 format)
│ └── metadata.json # Design parameters, objectives, constraints
├── trial_0002/
│ └── ...
├── study_summary.json # Overall study metadata
└── README.md # This file
```
## Design Variables
{chr(10).join(f'- {name}' for name in self.design_variable_names)}
## Objectives
{chr(10).join(f'- {name}' for name in self.objective_names)}
## Constraints
{chr(10).join(f'- {name}' for name in self.constraint_names) if self.constraint_names else '- None'}
## Usage with AtomizerField
### 1. Parse Training Data
```bash
cd Atomizer-Field
python batch_parser.py --data-dir "{self.export_dir}"
```
This converts BDF/OP2 files to PyTorch Geometric format.
### 2. Validate Parsed Data
```bash
python validate_parsed_data.py
```
### 3. Train Neural Network
```bash
python train.py --data-dir "training_data/parsed/" --epochs 200
```
### 4. Use Trained Model in Atomizer
```bash
cd ../Atomizer
python run_optimization.py --config studies/{self.study_name}/workflow_config.json --use-neural
```
## File Formats
- **BDF (.bdf)**: Nastran Bulk Data File - contains mesh, materials, loads, BCs
- **OP2 (.op2)**: Nastran Output2 - binary results with displacements, stresses, etc.
- **metadata.json**: Human-readable trial metadata
## AtomizerField Documentation
See `Atomizer-Field/docs/` for complete documentation on:
- Neural network architecture
- Training procedures
- Integration with Atomizer
- Uncertainty quantification
---
*Generated by Atomizer Training Data Exporter*
"""
readme_path = self.export_dir / "README.md"
with open(readme_path, 'w', encoding='utf-8') as f:
f.write(readme_content)
logger.debug(f"Created README: {readme_path}")
def create_exporter_from_config(config: Dict[str, Any]) -> Optional[TrainingDataExporter]:
"""
Factory function to create TrainingDataExporter from workflow configuration.
Args:
config: Workflow configuration dictionary
Returns:
TrainingDataExporter instance if enabled, None otherwise
Example config YAML:
training_data_export:
enabled: true
export_dir: "atomizer_field_training_data/beam_study_001"
"""
if not config.get("training_data_export", {}).get("enabled", False):
logger.info("Training data export is disabled")
return None
export_config = config["training_data_export"]
# Get export directory
export_dir = export_config.get("export_dir")
if not export_dir:
logger.error("Training data export enabled but 'export_dir' not specified")
return None
# Get study name
study_name = config.get("study_name", "unnamed_study")
# Get design variable names
design_vars = config.get("design_variables", [])
design_var_names = [dv.get("name", dv.get("parameter", f"var_{i}"))
for i, dv in enumerate(design_vars)]
# Get objective names
objectives = config.get("objectives", [])
objective_names = [obj.get("name", f"obj_{i}")
for i, obj in enumerate(objectives)]
# Get constraint names
constraints = config.get("constraints", [])
constraint_names = [c.get("name", f"constraint_{i}")
for i, c in enumerate(constraints)]
# Additional metadata
metadata = {
"atomizer_version": config.get("version", "unknown"),
"optimization_algorithm": config.get("optimization", {}).get("algorithm", "unknown"),
"n_trials": config.get("optimization", {}).get("n_trials", 0)
}
try:
exporter = TrainingDataExporter(
export_dir=Path(export_dir),
study_name=study_name,
design_variable_names=design_var_names,
objective_names=objective_names,
constraint_names=constraint_names,
metadata=metadata
)
logger.info("Training data exporter created successfully")
return exporter
except Exception as e:
logger.error(f"Failed to create training data exporter: {e}", exc_info=True)
return None

View File

@@ -0,0 +1,74 @@
"""
Atomizer Validators
==================
Validation modules for ensuring correct configurations, model setups,
and optimization results.
Available validators:
- config_validator: Validate optimization_config.json files
- model_validator: Validate NX model files and simulation setup
- results_validator: Validate optimization results in study.db
- study_validator: Complete study health check (combines all validators)
"""
from .config_validator import (
validate_config,
validate_config_file,
ValidationResult,
ConfigError,
ConfigWarning
)
from .model_validator import (
validate_model,
validate_model_files,
validate_study_model,
ModelValidationResult
)
from .results_validator import (
validate_results,
validate_study_results,
get_pareto_summary,
ResultsValidationResult,
ResultsError,
ResultsWarning
)
from .study_validator import (
validate_study,
list_studies,
quick_check,
get_study_health,
StudyValidationResult,
StudyStatus
)
__all__ = [
# Config validator
'validate_config',
'validate_config_file',
'ValidationResult',
'ConfigError',
'ConfigWarning',
# Model validator
'validate_model',
'validate_model_files',
'validate_study_model',
'ModelValidationResult',
# Results validator
'validate_results',
'validate_study_results',
'get_pareto_summary',
'ResultsValidationResult',
'ResultsError',
'ResultsWarning',
# Study validator
'validate_study',
'list_studies',
'quick_check',
'get_study_health',
'StudyValidationResult',
'StudyStatus'
]

View File

@@ -0,0 +1,591 @@
"""
Configuration Validator for Atomizer
====================================
Validates optimization_config.json files before running optimizations.
Catches common errors and provides helpful suggestions.
Usage:
from optimization_engine.validators import validate_config, validate_config_file
# Validate from file path
result = validate_config_file("studies/my_study/1_setup/optimization_config.json")
# Validate from dict
result = validate_config(config_dict)
if result.is_valid:
print("Config is valid!")
else:
for error in result.errors:
print(f"ERROR: {error}")
for warning in result.warnings:
print(f"WARNING: {warning}")
"""
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Dict, Any, Optional, Union
@dataclass
class ConfigError:
"""Represents a configuration error that blocks execution."""
field: str
message: str
suggestion: Optional[str] = None
def __str__(self):
msg = f"[{self.field}] {self.message}"
if self.suggestion:
msg += f" (Suggestion: {self.suggestion})"
return msg
@dataclass
class ConfigWarning:
"""Represents a configuration warning that doesn't block execution."""
field: str
message: str
suggestion: Optional[str] = None
def __str__(self):
msg = f"[{self.field}] {self.message}"
if self.suggestion:
msg += f" (Suggestion: {self.suggestion})"
return msg
@dataclass
class ValidationResult:
"""Result of configuration validation."""
errors: List[ConfigError] = field(default_factory=list)
warnings: List[ConfigWarning] = field(default_factory=list)
config: Optional[Dict[str, Any]] = None
@property
def is_valid(self) -> bool:
"""Config is valid if there are no errors (warnings are OK)."""
return len(self.errors) == 0
def __str__(self):
lines = []
if self.errors:
lines.append(f"ERRORS ({len(self.errors)}):")
for e in self.errors:
lines.append(f" - {e}")
if self.warnings:
lines.append(f"WARNINGS ({len(self.warnings)}):")
for w in self.warnings:
lines.append(f" - {w}")
if self.is_valid and not self.warnings:
lines.append("Configuration is valid.")
return "\n".join(lines)
# Valid values for certain fields
VALID_PROTOCOLS = [
'protocol_10_single_objective',
'protocol_11_multi_objective',
'protocol_12_hybrid_surrogate',
'legacy'
]
VALID_SAMPLERS = [
'TPESampler',
'NSGAIISampler',
'CmaEsSampler',
'RandomSampler',
'GridSampler'
]
VALID_GOALS = ['minimize', 'maximize']
VALID_CONSTRAINT_TYPES = ['less_than', 'greater_than', 'equal_to', 'range']
VALID_VAR_TYPES = ['float', 'integer', 'categorical']
VALID_EXTRACTION_ACTIONS = [
'extract_displacement',
'extract_solid_stress',
'extract_frequency',
'extract_mass_from_expression',
'extract_mass_from_bdf',
'extract_mass',
'extract_stress'
]
def validate_config_file(config_path: Union[str, Path]) -> ValidationResult:
"""
Validate an optimization_config.json file.
Args:
config_path: Path to the configuration file
Returns:
ValidationResult with errors, warnings, and parsed config
"""
config_path = Path(config_path)
result = ValidationResult()
# Check file exists
if not config_path.exists():
result.errors.append(ConfigError(
field="file",
message=f"Configuration file not found: {config_path}",
suggestion="Create optimization_config.json using the create-study skill"
))
return result
# Parse JSON
try:
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
except json.JSONDecodeError as e:
result.errors.append(ConfigError(
field="file",
message=f"Invalid JSON: {e}",
suggestion="Check for syntax errors (missing commas, quotes, brackets)"
))
return result
# Validate content
return validate_config(config, result)
def validate_config(config: Dict[str, Any],
result: Optional[ValidationResult] = None) -> ValidationResult:
"""
Validate an optimization configuration dictionary.
Args:
config: Configuration dictionary
result: Existing ValidationResult to append to (optional)
Returns:
ValidationResult with errors, warnings, and config
"""
if result is None:
result = ValidationResult()
result.config = config
# Required top-level fields
_validate_required_fields(config, result)
# Validate each section
if 'design_variables' in config:
_validate_design_variables(config['design_variables'], result)
if 'objectives' in config:
_validate_objectives(config['objectives'], result)
if 'constraints' in config:
_validate_constraints(config['constraints'], result)
if 'optimization_settings' in config:
_validate_optimization_settings(config['optimization_settings'], result)
if 'simulation' in config:
_validate_simulation_settings(config['simulation'], result)
if 'surrogate_settings' in config:
_validate_surrogate_settings(config['surrogate_settings'], result)
# Cross-field validations
_validate_cross_references(config, result)
return result
def _validate_required_fields(config: Dict[str, Any], result: ValidationResult):
"""Check that required top-level fields exist."""
required = ['study_name', 'design_variables', 'objectives']
for field in required:
if field not in config:
result.errors.append(ConfigError(
field=field,
message=f"Required field '{field}' is missing",
suggestion=f"Add '{field}' to your configuration"
))
# Recommended fields
recommended = ['description', 'engineering_context', 'optimization_settings', 'simulation']
for field in recommended:
if field not in config:
result.warnings.append(ConfigWarning(
field=field,
message=f"Recommended field '{field}' is missing",
suggestion=f"Consider adding '{field}' for better documentation"
))
def _validate_design_variables(variables: List[Dict], result: ValidationResult):
"""Validate design variables section."""
if not isinstance(variables, list):
result.errors.append(ConfigError(
field="design_variables",
message="design_variables must be a list",
suggestion="Use array format: [{parameter: ..., bounds: ...}, ...]"
))
return
if len(variables) == 0:
result.errors.append(ConfigError(
field="design_variables",
message="At least one design variable is required",
suggestion="Add design variables with parameter names and bounds"
))
return
param_names = set()
for i, var in enumerate(variables):
prefix = f"design_variables[{i}]"
# Required fields
if 'parameter' not in var:
result.errors.append(ConfigError(
field=prefix,
message="'parameter' name is required",
suggestion="Add 'parameter': 'your_nx_expression_name'"
))
else:
param = var['parameter']
if param in param_names:
result.errors.append(ConfigError(
field=prefix,
message=f"Duplicate parameter name: '{param}'",
suggestion="Each parameter name must be unique"
))
param_names.add(param)
if 'bounds' not in var:
result.errors.append(ConfigError(
field=prefix,
message="'bounds' are required",
suggestion="Add 'bounds': [min_value, max_value]"
))
else:
bounds = var['bounds']
if not isinstance(bounds, list) or len(bounds) != 2:
result.errors.append(ConfigError(
field=f"{prefix}.bounds",
message="Bounds must be [min, max] array",
suggestion="Use format: 'bounds': [1.0, 10.0]"
))
elif bounds[0] >= bounds[1]:
result.errors.append(ConfigError(
field=f"{prefix}.bounds",
message=f"Min ({bounds[0]}) must be less than max ({bounds[1]})",
suggestion="Swap values or adjust range"
))
elif bounds[0] == bounds[1]:
result.warnings.append(ConfigWarning(
field=f"{prefix}.bounds",
message="Min equals max - variable will be constant",
suggestion="If intentional, consider removing this variable"
))
# Type validation
var_type = var.get('type', 'float')
if var_type not in VALID_VAR_TYPES:
result.warnings.append(ConfigWarning(
field=f"{prefix}.type",
message=f"Unknown type '{var_type}'",
suggestion=f"Use one of: {', '.join(VALID_VAR_TYPES)}"
))
# Integer bounds check
if var_type == 'integer' and 'bounds' in var:
bounds = var['bounds']
if isinstance(bounds, list) and len(bounds) == 2:
if not (isinstance(bounds[0], int) and isinstance(bounds[1], int)):
result.warnings.append(ConfigWarning(
field=f"{prefix}.bounds",
message="Integer variable bounds should be integers",
suggestion="Use whole numbers for integer bounds"
))
def _validate_objectives(objectives: List[Dict], result: ValidationResult):
"""Validate objectives section."""
if not isinstance(objectives, list):
result.errors.append(ConfigError(
field="objectives",
message="objectives must be a list",
suggestion="Use array format: [{name: ..., goal: ...}, ...]"
))
return
if len(objectives) == 0:
result.errors.append(ConfigError(
field="objectives",
message="At least one objective is required",
suggestion="Add an objective with name and goal (minimize/maximize)"
))
return
if len(objectives) > 3:
result.warnings.append(ConfigWarning(
field="objectives",
message=f"{len(objectives)} objectives may make optimization difficult",
suggestion="Consider reducing to 2-3 objectives for clearer trade-offs"
))
obj_names = set()
for i, obj in enumerate(objectives):
prefix = f"objectives[{i}]"
# Required fields
if 'name' not in obj:
result.errors.append(ConfigError(
field=prefix,
message="'name' is required",
suggestion="Add 'name': 'mass' or similar"
))
else:
name = obj['name']
if name in obj_names:
result.errors.append(ConfigError(
field=prefix,
message=f"Duplicate objective name: '{name}'",
suggestion="Each objective name must be unique"
))
obj_names.add(name)
if 'goal' not in obj:
result.errors.append(ConfigError(
field=prefix,
message="'goal' is required",
suggestion="Add 'goal': 'minimize' or 'goal': 'maximize'"
))
elif obj['goal'] not in VALID_GOALS:
result.errors.append(ConfigError(
field=f"{prefix}.goal",
message=f"Invalid goal '{obj['goal']}'",
suggestion=f"Use one of: {', '.join(VALID_GOALS)}"
))
# Extraction validation
if 'extraction' in obj:
_validate_extraction(obj['extraction'], f"{prefix}.extraction", result)
def _validate_constraints(constraints: List[Dict], result: ValidationResult):
"""Validate constraints section."""
if not isinstance(constraints, list):
result.errors.append(ConfigError(
field="constraints",
message="constraints must be a list",
suggestion="Use array format: [{name: ..., type: ..., threshold: ...}, ...]"
))
return
constraint_names = set()
for i, const in enumerate(constraints):
prefix = f"constraints[{i}]"
# Required fields
if 'name' not in const:
result.errors.append(ConfigError(
field=prefix,
message="'name' is required",
suggestion="Add 'name': 'max_stress' or similar"
))
else:
name = const['name']
if name in constraint_names:
result.warnings.append(ConfigWarning(
field=prefix,
message=f"Duplicate constraint name: '{name}'",
suggestion="Consider using unique names for clarity"
))
constraint_names.add(name)
if 'type' not in const:
result.errors.append(ConfigError(
field=prefix,
message="'type' is required",
suggestion="Add 'type': 'less_than' or 'type': 'greater_than'"
))
elif const['type'] not in VALID_CONSTRAINT_TYPES:
result.errors.append(ConfigError(
field=f"{prefix}.type",
message=f"Invalid constraint type '{const['type']}'",
suggestion=f"Use one of: {', '.join(VALID_CONSTRAINT_TYPES)}"
))
if 'threshold' not in const:
result.errors.append(ConfigError(
field=prefix,
message="'threshold' is required",
suggestion="Add 'threshold': 200 (the limit value)"
))
# Extraction validation
if 'extraction' in const:
_validate_extraction(const['extraction'], f"{prefix}.extraction", result)
def _validate_extraction(extraction: Dict, prefix: str, result: ValidationResult):
"""Validate extraction configuration."""
if not isinstance(extraction, dict):
result.errors.append(ConfigError(
field=prefix,
message="extraction must be an object",
suggestion="Use format: {action: '...', params: {...}}"
))
return
if 'action' not in extraction:
result.errors.append(ConfigError(
field=prefix,
message="'action' is required in extraction",
suggestion="Add 'action': 'extract_displacement' or similar"
))
elif extraction['action'] not in VALID_EXTRACTION_ACTIONS:
result.warnings.append(ConfigWarning(
field=f"{prefix}.action",
message=f"Unknown extraction action '{extraction['action']}'",
suggestion=f"Standard actions: {', '.join(VALID_EXTRACTION_ACTIONS)}"
))
def _validate_optimization_settings(settings: Dict, result: ValidationResult):
"""Validate optimization settings section."""
# Protocol
if 'protocol' in settings:
protocol = settings['protocol']
if protocol not in VALID_PROTOCOLS:
result.warnings.append(ConfigWarning(
field="optimization_settings.protocol",
message=f"Unknown protocol '{protocol}'",
suggestion=f"Standard protocols: {', '.join(VALID_PROTOCOLS)}"
))
# Number of trials
if 'n_trials' in settings:
n_trials = settings['n_trials']
if not isinstance(n_trials, int) or n_trials < 1:
result.errors.append(ConfigError(
field="optimization_settings.n_trials",
message="n_trials must be a positive integer",
suggestion="Use a value like 30, 50, or 100"
))
elif n_trials < 10:
result.warnings.append(ConfigWarning(
field="optimization_settings.n_trials",
message=f"Only {n_trials} trials may not be enough for good optimization",
suggestion="Consider at least 20-30 trials for meaningful results"
))
# Sampler
if 'sampler' in settings:
sampler = settings['sampler']
if sampler not in VALID_SAMPLERS:
result.warnings.append(ConfigWarning(
field="optimization_settings.sampler",
message=f"Unknown sampler '{sampler}'",
suggestion=f"Standard samplers: {', '.join(VALID_SAMPLERS)}"
))
def _validate_simulation_settings(simulation: Dict, result: ValidationResult):
"""Validate simulation settings section."""
required = ['model_file', 'sim_file']
for field in required:
if field not in simulation:
result.warnings.append(ConfigWarning(
field=f"simulation.{field}",
message=f"'{field}' not specified",
suggestion="Add file name for better documentation"
))
def _validate_surrogate_settings(surrogate: Dict, result: ValidationResult):
"""Validate surrogate (NN) settings section."""
if surrogate.get('enabled', False):
# Check training settings
if 'training' in surrogate:
training = surrogate['training']
if training.get('initial_fea_trials', 0) < 20:
result.warnings.append(ConfigWarning(
field="surrogate_settings.training.initial_fea_trials",
message="Less than 20 initial FEA trials may not provide enough training data",
suggestion="Recommend at least 20-30 initial trials"
))
# Check model settings
if 'model' in surrogate:
model = surrogate['model']
if 'min_accuracy_mape' in model:
mape = model['min_accuracy_mape']
if mape > 20:
result.warnings.append(ConfigWarning(
field="surrogate_settings.model.min_accuracy_mape",
message=f"MAPE threshold {mape}% is quite high",
suggestion="Consider 5-10% for better surrogate accuracy"
))
def _validate_cross_references(config: Dict, result: ValidationResult):
"""Validate cross-references between sections."""
# Check sampler matches objective count
objectives = config.get('objectives', [])
settings = config.get('optimization_settings', {})
sampler = settings.get('sampler', 'TPESampler')
if len(objectives) > 1 and sampler == 'TPESampler':
result.warnings.append(ConfigWarning(
field="optimization_settings.sampler",
message="TPESampler with multiple objectives will scalarize them",
suggestion="Consider NSGAIISampler for true multi-objective optimization"
))
if len(objectives) == 1 and sampler == 'NSGAIISampler':
result.warnings.append(ConfigWarning(
field="optimization_settings.sampler",
message="NSGAIISampler is designed for multi-objective; single-objective may be slower",
suggestion="Consider TPESampler or CmaEsSampler for single-objective"
))
# Protocol consistency
protocol = settings.get('protocol', '')
if 'multi_objective' in protocol and len(objectives) == 1:
result.warnings.append(ConfigWarning(
field="optimization_settings.protocol",
message="Multi-objective protocol with single objective",
suggestion="Use protocol_10_single_objective instead"
))
if 'single_objective' in protocol and len(objectives) > 1:
result.warnings.append(ConfigWarning(
field="optimization_settings.protocol",
message="Single-objective protocol with multiple objectives",
suggestion="Use protocol_11_multi_objective for multiple objectives"
))
# CLI interface for direct execution
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python config_validator.py <path_to_config.json>")
sys.exit(1)
config_path = sys.argv[1]
result = validate_config_file(config_path)
print(result)
if result.is_valid:
print("\n✓ Configuration is valid!")
sys.exit(0)
else:
print(f"\n✗ Configuration has {len(result.errors)} error(s)")
sys.exit(1)

View File

@@ -0,0 +1,557 @@
"""
Model Validator for Atomizer
============================
Validates NX model files and simulation setup before running optimizations.
Checks file existence, structure, and configuration compatibility.
Usage:
from optimization_engine.validators import validate_model, validate_model_files
# Validate model directory
result = validate_model("studies/my_study/1_setup/model")
# Validate specific files
result = validate_model_files(
prt_file="Beam.prt",
sim_file="Beam_sim1.sim",
model_dir=Path("studies/my_study/1_setup/model")
)
if result.is_valid:
print("Model is ready!")
else:
for error in result.errors:
print(f"ERROR: {error}")
"""
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Dict, Any, Optional, Union
@dataclass
class ModelError:
"""Represents a model validation error that blocks execution."""
component: str
message: str
suggestion: Optional[str] = None
def __str__(self):
msg = f"[{self.component}] {self.message}"
if self.suggestion:
msg += f" (Suggestion: {self.suggestion})"
return msg
@dataclass
class ModelWarning:
"""Represents a model validation warning."""
component: str
message: str
suggestion: Optional[str] = None
def __str__(self):
msg = f"[{self.component}] {self.message}"
if self.suggestion:
msg += f" (Suggestion: {self.suggestion})"
return msg
@dataclass
class ModelValidationResult:
"""Result of model validation."""
errors: List[ModelError] = field(default_factory=list)
warnings: List[ModelWarning] = field(default_factory=list)
# Discovered files
prt_file: Optional[Path] = None
sim_file: Optional[Path] = None
fem_file: Optional[Path] = None
# Model info
model_name: Optional[str] = None
model_dir: Optional[Path] = None
file_sizes: Dict[str, int] = field(default_factory=dict)
@property
def is_valid(self) -> bool:
"""Model is valid if there are no errors."""
return len(self.errors) == 0
@property
def has_simulation(self) -> bool:
"""Check if simulation file exists."""
return self.sim_file is not None
@property
def has_fem(self) -> bool:
"""Check if FEM mesh file exists."""
return self.fem_file is not None
def __str__(self):
lines = []
lines.append(f"Model: {self.model_name or 'Unknown'}")
lines.append(f"Directory: {self.model_dir or 'Unknown'}")
lines.append("")
lines.append("Files:")
if self.prt_file:
size = self.file_sizes.get('prt', 0)
lines.append(f" [OK] Part file: {self.prt_file.name} ({_format_size(size)})")
else:
lines.append(" [X] Part file: NOT FOUND")
if self.sim_file:
size = self.file_sizes.get('sim', 0)
lines.append(f" [OK] Simulation: {self.sim_file.name} ({_format_size(size)})")
else:
lines.append(" [X] Simulation: NOT FOUND")
if self.fem_file:
size = self.file_sizes.get('fem', 0)
lines.append(f" [OK] FEM mesh: {self.fem_file.name} ({_format_size(size)})")
else:
lines.append(" ? FEM mesh: Not found (will be created on first solve)")
if self.errors:
lines.append("")
lines.append(f"ERRORS ({len(self.errors)}):")
for e in self.errors:
lines.append(f" - {e}")
if self.warnings:
lines.append("")
lines.append(f"WARNINGS ({len(self.warnings)}):")
for w in self.warnings:
lines.append(f" - {w}")
if self.is_valid:
lines.append("")
lines.append("[OK] Model validation passed!")
return "\n".join(lines)
def _format_size(size_bytes: int) -> str:
"""Format file size for display."""
if size_bytes < 1024:
return f"{size_bytes} B"
elif size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.1f} KB"
else:
return f"{size_bytes / (1024 * 1024):.1f} MB"
def validate_model(model_dir: Union[str, Path],
expected_model_name: Optional[str] = None) -> ModelValidationResult:
"""
Validate an NX model directory.
Args:
model_dir: Path to the model directory
expected_model_name: Expected base name of the model (optional)
Returns:
ModelValidationResult with errors, warnings, and discovered files
"""
model_dir = Path(model_dir)
result = ModelValidationResult(model_dir=model_dir)
# Check directory exists
if not model_dir.exists():
result.errors.append(ModelError(
component="directory",
message=f"Model directory not found: {model_dir}",
suggestion="Create the directory and add NX model files"
))
return result
if not model_dir.is_dir():
result.errors.append(ModelError(
component="directory",
message=f"Path is not a directory: {model_dir}",
suggestion="Provide path to the model directory, not a file"
))
return result
# Find model files
prt_files = list(model_dir.glob("*.prt"))
sim_files = list(model_dir.glob("*.sim"))
fem_files = list(model_dir.glob("*.fem"))
# Check for part file
if len(prt_files) == 0:
result.errors.append(ModelError(
component="part",
message="No .prt file found in model directory",
suggestion="Add your NX part file to the model directory"
))
elif len(prt_files) > 1:
# Filter out internal files (often have _i suffix)
main_prt_files = [f for f in prt_files if not f.stem.endswith('_i')]
if len(main_prt_files) == 1:
prt_files = main_prt_files
elif expected_model_name:
matching = [f for f in prt_files if f.stem == expected_model_name]
if matching:
prt_files = matching
else:
result.warnings.append(ModelWarning(
component="part",
message=f"Multiple .prt files found, none match expected name '{expected_model_name}'",
suggestion="Specify the correct model name in configuration"
))
else:
result.warnings.append(ModelWarning(
component="part",
message=f"Multiple .prt files found: {[f.name for f in prt_files]}",
suggestion="Consider keeping only the main model file in the directory"
))
if prt_files:
result.prt_file = prt_files[0]
result.model_name = result.prt_file.stem
result.file_sizes['prt'] = result.prt_file.stat().st_size
# Validate part file
_validate_prt_file(result.prt_file, result)
# Check for simulation file
if len(sim_files) == 0:
result.errors.append(ModelError(
component="simulation",
message="No .sim file found in model directory",
suggestion="Create a simulation in NX and save it to this directory"
))
elif len(sim_files) > 1:
if result.model_name:
# Try to find matching sim file
expected_sim = f"{result.model_name}_sim1.sim"
matching = [f for f in sim_files if f.name.lower() == expected_sim.lower()]
if matching:
sim_files = matching
else:
result.warnings.append(ModelWarning(
component="simulation",
message=f"Multiple .sim files found: {[f.name for f in sim_files]}",
suggestion=f"Expected: {expected_sim}"
))
else:
result.warnings.append(ModelWarning(
component="simulation",
message=f"Multiple .sim files found: {[f.name for f in sim_files]}",
suggestion="Keep only one simulation file"
))
if sim_files:
result.sim_file = sim_files[0]
result.file_sizes['sim'] = result.sim_file.stat().st_size
# Validate simulation file
_validate_sim_file(result.sim_file, result)
# Check for FEM file
if len(fem_files) == 0:
result.warnings.append(ModelWarning(
component="fem",
message="No .fem file found",
suggestion="FEM mesh will be created automatically on first solve"
))
else:
if result.model_name:
expected_fem = f"{result.model_name}_fem1.fem"
matching = [f for f in fem_files if f.name.lower() == expected_fem.lower()]
if matching:
fem_files = matching
result.fem_file = fem_files[0]
result.file_sizes['fem'] = result.fem_file.stat().st_size
# Cross-validate files
_validate_file_relationships(result)
return result
def validate_model_files(prt_file: Union[str, Path],
sim_file: Union[str, Path],
model_dir: Optional[Union[str, Path]] = None) -> ModelValidationResult:
"""
Validate specific model files.
Args:
prt_file: Name or path to the part file
sim_file: Name or path to the simulation file
model_dir: Base directory (optional, will be inferred if full paths given)
Returns:
ModelValidationResult
"""
prt_path = Path(prt_file)
sim_path = Path(sim_file)
# If paths are relative and model_dir provided, resolve them
if model_dir:
model_dir = Path(model_dir)
if not prt_path.is_absolute():
prt_path = model_dir / prt_path
if not sim_path.is_absolute():
sim_path = model_dir / sim_path
else:
# Infer model_dir from prt_file
if prt_path.is_absolute():
model_dir = prt_path.parent
else:
model_dir = Path.cwd()
result = ModelValidationResult(model_dir=model_dir)
# Check part file
if not prt_path.exists():
result.errors.append(ModelError(
component="part",
message=f"Part file not found: {prt_path}",
suggestion="Check the file path and name"
))
else:
result.prt_file = prt_path
result.model_name = prt_path.stem
result.file_sizes['prt'] = prt_path.stat().st_size
_validate_prt_file(prt_path, result)
# Check simulation file
if not sim_path.exists():
result.errors.append(ModelError(
component="simulation",
message=f"Simulation file not found: {sim_path}",
suggestion="Check the file path and name"
))
else:
result.sim_file = sim_path
result.file_sizes['sim'] = sim_path.stat().st_size
_validate_sim_file(sim_path, result)
# Check for FEM file
if result.model_name:
fem_path = model_dir / f"{result.model_name}_fem1.fem"
if fem_path.exists():
result.fem_file = fem_path
result.file_sizes['fem'] = fem_path.stat().st_size
else:
# Try alternative naming
fem_files = list(model_dir.glob("*.fem")) if model_dir.exists() else []
if fem_files:
result.fem_file = fem_files[0]
result.file_sizes['fem'] = result.fem_file.stat().st_size
_validate_file_relationships(result)
return result
def _validate_prt_file(prt_path: Path, result: ModelValidationResult):
"""Validate a part file."""
# Check file size
size = prt_path.stat().st_size
if size == 0:
result.errors.append(ModelError(
component="part",
message="Part file is empty",
suggestion="Re-save the part file in NX"
))
return
if size < 1024:
result.warnings.append(ModelWarning(
component="part",
message=f"Part file is very small ({_format_size(size)})",
suggestion="Verify the file contains valid geometry"
))
# Check for NX file signature (basic validation)
try:
with open(prt_path, 'rb') as f:
header = f.read(8)
# NX files typically start with a specific signature
# This is a basic check - real NX files have more complex headers
if len(header) < 8:
result.warnings.append(ModelWarning(
component="part",
message="Part file appears incomplete",
suggestion="Re-save the file in NX"
))
except PermissionError:
result.errors.append(ModelError(
component="part",
message="Cannot read part file - permission denied",
suggestion="Close NX if the file is open, or check file permissions"
))
except Exception as e:
result.warnings.append(ModelWarning(
component="part",
message=f"Could not verify part file: {e}",
suggestion="Ensure file is a valid NX part"
))
def _validate_sim_file(sim_path: Path, result: ModelValidationResult):
"""Validate a simulation file."""
size = sim_path.stat().st_size
if size == 0:
result.errors.append(ModelError(
component="simulation",
message="Simulation file is empty",
suggestion="Re-save the simulation in NX"
))
return
if size < 512:
result.warnings.append(ModelWarning(
component="simulation",
message=f"Simulation file is very small ({_format_size(size)})",
suggestion="Verify simulation setup in NX"
))
def _validate_file_relationships(result: ModelValidationResult):
"""Validate relationships between model files."""
if not result.prt_file or not result.sim_file:
return
# Check naming convention
prt_stem = result.prt_file.stem
sim_stem = result.sim_file.stem
expected_sim_stem = f"{prt_stem}_sim1"
if sim_stem != expected_sim_stem and not sim_stem.startswith(prt_stem):
result.warnings.append(ModelWarning(
component="naming",
message=f"Simulation name '{sim_stem}' doesn't match part name '{prt_stem}'",
suggestion=f"Expected simulation name: {expected_sim_stem}.sim"
))
# Check FEM naming if present
if result.fem_file:
fem_stem = result.fem_file.stem
expected_fem_stem = f"{prt_stem}_fem1"
if fem_stem != expected_fem_stem and not fem_stem.startswith(prt_stem):
result.warnings.append(ModelWarning(
component="naming",
message=f"FEM name '{fem_stem}' doesn't match part name '{prt_stem}'",
suggestion=f"Expected FEM name: {expected_fem_stem}.fem"
))
# Check files are in same directory
if result.prt_file.parent != result.sim_file.parent:
result.warnings.append(ModelWarning(
component="directory",
message="Part and simulation files are in different directories",
suggestion="Keep all model files in the same directory"
))
def validate_study_model(study_name: str,
studies_dir: str = "studies",
config: Optional[Dict[str, Any]] = None) -> ModelValidationResult:
"""
Validate model for a complete study.
Args:
study_name: Name of the study folder (e.g., "uav_arm_optimization")
studies_dir: Base directory for studies (default: "studies")
config: Optional optimization_config.json contents (loaded dict, not path)
Returns:
ModelValidationResult
"""
study_path = Path(studies_dir) / study_name
model_dir = study_path / "1_setup" / "model"
# Load config if not provided
if config is None:
config_path = study_path / "1_setup" / "optimization_config.json"
if config_path.exists():
import json
try:
with open(config_path, 'r') as f:
config = json.load(f)
except (json.JSONDecodeError, IOError):
config = None
# Get expected file names from config if available
expected_model_name = None
if config and isinstance(config, dict) and 'simulation' in config:
sim_config = config['simulation']
if 'model_file' in sim_config:
expected_model_name = Path(sim_config['model_file']).stem
result = validate_model(model_dir, expected_model_name)
# Additional study-specific validations
if config and isinstance(config, dict):
_validate_config_model_match(config, result)
return result
def _validate_config_model_match(config: Dict[str, Any], result: ModelValidationResult):
"""Check that config matches discovered model files."""
sim_config = config.get('simulation', {})
# Check model file name matches
if 'model_file' in sim_config and result.prt_file:
config_model = Path(sim_config['model_file']).name
actual_model = result.prt_file.name
if config_model.lower() != actual_model.lower():
result.warnings.append(ModelWarning(
component="config",
message=f"Config specifies '{config_model}' but found '{actual_model}'",
suggestion="Update config to match actual file name"
))
# Check sim file name matches
if 'sim_file' in sim_config and result.sim_file:
config_sim = Path(sim_config['sim_file']).name
actual_sim = result.sim_file.name
if config_sim.lower() != actual_sim.lower():
result.warnings.append(ModelWarning(
component="config",
message=f"Config specifies '{config_sim}' but found '{actual_sim}'",
suggestion="Update config to match actual file name"
))
# CLI interface for direct execution
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python model_validator.py <path_to_model_directory>")
print(" python model_validator.py <path_to_study_directory>")
sys.exit(1)
path = Path(sys.argv[1])
# Check if it's a study directory or model directory
if (path / "1_setup" / "model").exists():
# It's a study directory
result = validate_study_model(path)
elif path.is_dir():
# It's a model directory
result = validate_model(path)
else:
print(f"ERROR: Path not found or not a directory: {path}")
sys.exit(1)
print(result)
if result.is_valid:
print("\n✓ Model validation passed!")
sys.exit(0)
else:
print(f"\n✗ Model has {len(result.errors)} error(s)")
sys.exit(1)

View File

@@ -0,0 +1,565 @@
"""
Results Validator for Atomizer Optimization Studies
Validates optimization results stored in study.db and provides
analysis of trial quality, constraint satisfaction, and data integrity.
Usage:
from optimization_engine.validators.results_validator import validate_results
result = validate_results("studies/my_study/2_results/study.db")
if result.is_valid:
print("Results are valid")
else:
for error in result.errors:
print(f"ERROR: {error}")
"""
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import json
@dataclass
class ResultsError:
"""Represents an error found during results validation."""
code: str
message: str
trial_number: Optional[int] = None
def __str__(self) -> str:
if self.trial_number is not None:
return f"[{self.code}] Trial #{self.trial_number}: {self.message}"
return f"[{self.code}] {self.message}"
@dataclass
class ResultsWarning:
"""Represents a warning found during results validation."""
code: str
message: str
trial_number: Optional[int] = None
def __str__(self) -> str:
if self.trial_number is not None:
return f"[{self.code}] Trial #{self.trial_number}: {self.message}"
return f"[{self.code}] {self.message}"
@dataclass
class ResultsInfo:
"""Information about the optimization results."""
study_name: str = ""
n_trials: int = 0
n_completed: int = 0
n_failed: int = 0
n_pruned: int = 0
n_pareto: int = 0
feasibility_rate: float = 0.0
is_multi_objective: bool = False
objective_names: List[str] = field(default_factory=list)
best_values: Dict[str, float] = field(default_factory=dict)
parameter_names: List[str] = field(default_factory=list)
@dataclass
class ResultsValidationResult:
"""Complete validation result for optimization results."""
is_valid: bool
errors: List[ResultsError]
warnings: List[ResultsWarning]
info: ResultsInfo
def __str__(self) -> str:
lines = []
# Status
status = "[OK] Results validation passed!" if self.is_valid else "[X] Results validation failed!"
lines.append(status)
lines.append("")
# Info
lines.append("RESULTS SUMMARY")
lines.append("-" * 40)
lines.append(f" Study: {self.info.study_name}")
lines.append(f" Total trials: {self.info.n_trials}")
lines.append(f" Completed: {self.info.n_completed}")
lines.append(f" Failed: {self.info.n_failed}")
if self.info.n_pruned > 0:
lines.append(f" Pruned: {self.info.n_pruned}")
lines.append(f" Multi-objective: {'Yes' if self.info.is_multi_objective else 'No'}")
if self.info.is_multi_objective and self.info.n_pareto > 0:
lines.append(f" Pareto-optimal: {self.info.n_pareto}")
if self.info.feasibility_rate > 0:
lines.append(f" Feasibility rate: {self.info.feasibility_rate:.1f}%")
lines.append("")
# Best values
if self.info.best_values:
lines.append("BEST VALUES")
lines.append("-" * 40)
for name, value in self.info.best_values.items():
lines.append(f" {name}: {value:.4f}")
lines.append("")
# Errors
if self.errors:
lines.append("ERRORS")
lines.append("-" * 40)
for error in self.errors:
lines.append(f" {error}")
lines.append("")
# Warnings
if self.warnings:
lines.append("WARNINGS")
lines.append("-" * 40)
for warning in self.warnings:
lines.append(f" {warning}")
lines.append("")
return "\n".join(lines)
def validate_results(
db_path: str,
config_path: Optional[str] = None,
min_trials: int = 1
) -> ResultsValidationResult:
"""
Validate optimization results stored in study.db.
Args:
db_path: Path to study.db file
config_path: Optional path to optimization_config.json for cross-validation
min_trials: Minimum number of completed trials required
Returns:
ResultsValidationResult with errors, warnings, and info
"""
errors: List[ResultsError] = []
warnings: List[ResultsWarning] = []
info = ResultsInfo()
db_path = Path(db_path)
# Check database exists
if not db_path.exists():
errors.append(ResultsError(
code="DB_NOT_FOUND",
message=f"Database not found: {db_path}"
))
return ResultsValidationResult(
is_valid=False,
errors=errors,
warnings=warnings,
info=info
)
# Try to load with Optuna
try:
import optuna
storage_url = f"sqlite:///{db_path}"
# Get all studies in the database
storage = optuna.storages.RDBStorage(url=storage_url)
study_summaries = storage.get_all_studies()
if not study_summaries:
errors.append(ResultsError(
code="NO_STUDIES",
message="Database contains no optimization studies"
))
return ResultsValidationResult(
is_valid=False,
errors=errors,
warnings=warnings,
info=info
)
# Use the first (usually only) study
study_summary = study_summaries[0]
info.study_name = study_summary.study_name
# Load the full study
study = optuna.load_study(
study_name=info.study_name,
storage=storage_url
)
# Basic counts
info.n_trials = len(study.trials)
info.n_completed = len([t for t in study.trials
if t.state == optuna.trial.TrialState.COMPLETE])
info.n_failed = len([t for t in study.trials
if t.state == optuna.trial.TrialState.FAIL])
info.n_pruned = len([t for t in study.trials
if t.state == optuna.trial.TrialState.PRUNED])
# Check minimum trials
if info.n_completed < min_trials:
errors.append(ResultsError(
code="INSUFFICIENT_TRIALS",
message=f"Only {info.n_completed} completed trials (minimum: {min_trials})"
))
# Check for multi-objective
info.is_multi_objective = len(study.directions) > 1
# Get parameter names from first completed trial
for trial in study.trials:
if trial.state == optuna.trial.TrialState.COMPLETE:
info.parameter_names = list(trial.params.keys())
break
# Analyze Pareto front for multi-objective
if info.is_multi_objective:
try:
pareto_trials = study.best_trials
info.n_pareto = len(pareto_trials)
if info.n_pareto == 0 and info.n_completed > 0:
warnings.append(ResultsWarning(
code="NO_PARETO",
message="No Pareto-optimal solutions found despite completed trials"
))
except Exception as e:
warnings.append(ResultsWarning(
code="PARETO_ERROR",
message=f"Could not compute Pareto front: {e}"
))
else:
# Single objective - get best value
if info.n_completed > 0:
try:
best_trial = study.best_trial
info.best_values["objective"] = best_trial.value
except Exception:
pass
# Analyze feasibility
feasible_count = 0
for trial in study.trials:
if trial.state == optuna.trial.TrialState.COMPLETE:
# Check user_attrs for feasibility flag
is_feasible = trial.user_attrs.get('feasible', True)
if is_feasible:
feasible_count += 1
if info.n_completed > 0:
info.feasibility_rate = (feasible_count / info.n_completed) * 100
if info.feasibility_rate < 50:
warnings.append(ResultsWarning(
code="LOW_FEASIBILITY",
message=f"Low feasibility rate ({info.feasibility_rate:.1f}%) - consider relaxing constraints or adjusting bounds"
))
elif info.feasibility_rate < 80:
warnings.append(ResultsWarning(
code="MODERATE_FEASIBILITY",
message=f"Moderate feasibility rate ({info.feasibility_rate:.1f}%)"
))
# Check for data quality issues
_validate_trial_data(study, errors, warnings)
# Cross-validate with config if provided
if config_path:
_cross_validate_with_config(study, config_path, info, errors, warnings)
except ImportError:
errors.append(ResultsError(
code="OPTUNA_NOT_INSTALLED",
message="Optuna is not installed. Cannot validate results."
))
except Exception as e:
errors.append(ResultsError(
code="LOAD_ERROR",
message=f"Failed to load study: {e}"
))
return ResultsValidationResult(
is_valid=len(errors) == 0,
errors=errors,
warnings=warnings,
info=info
)
def _validate_trial_data(study, errors: List[ResultsError], warnings: List[ResultsWarning]):
"""Validate individual trial data quality."""
import optuna
for trial in study.trials:
if trial.state != optuna.trial.TrialState.COMPLETE:
continue
# Check for NaN or inf values
if trial.values:
for i, val in enumerate(trial.values):
if val is None:
errors.append(ResultsError(
code="NULL_OBJECTIVE",
message=f"Objective {i} has null value",
trial_number=trial.number
))
elif not isinstance(val, (int, float)):
errors.append(ResultsError(
code="INVALID_OBJECTIVE_TYPE",
message=f"Objective {i} has invalid type: {type(val)}",
trial_number=trial.number
))
elif isinstance(val, float):
import math
if math.isnan(val):
errors.append(ResultsError(
code="NAN_OBJECTIVE",
message=f"Objective {i} is NaN",
trial_number=trial.number
))
elif math.isinf(val):
warnings.append(ResultsWarning(
code="INF_OBJECTIVE",
message=f"Objective {i} is infinite",
trial_number=trial.number
))
# Check for missing parameters
if not trial.params:
errors.append(ResultsError(
code="MISSING_PARAMS",
message="Trial has no parameters recorded",
trial_number=trial.number
))
# Check for negative values where unexpected
for param_name, param_value in trial.params.items():
if 'thickness' in param_name.lower() and param_value <= 0:
warnings.append(ResultsWarning(
code="INVALID_THICKNESS",
message=f"{param_name} = {param_value} (non-positive thickness)",
trial_number=trial.number
))
elif 'diameter' in param_name.lower() and param_value <= 0:
warnings.append(ResultsWarning(
code="INVALID_DIAMETER",
message=f"{param_name} = {param_value} (non-positive diameter)",
trial_number=trial.number
))
def _cross_validate_with_config(
study,
config_path: str,
info: ResultsInfo,
errors: List[ResultsError],
warnings: List[ResultsWarning]
):
"""Cross-validate results with optimization config."""
import optuna
config_path = Path(config_path)
if not config_path.exists():
warnings.append(ResultsWarning(
code="CONFIG_NOT_FOUND",
message=f"Config file not found for cross-validation: {config_path}"
))
return
try:
with open(config_path, 'r') as f:
config = json.load(f)
# Check parameter names match
config_params = set()
for var in config.get('design_variables', []):
param_name = var.get('parameter', var.get('name', ''))
if param_name:
config_params.add(param_name)
result_params = set(info.parameter_names)
missing_in_results = config_params - result_params
extra_in_results = result_params - config_params
if missing_in_results:
warnings.append(ResultsWarning(
code="MISSING_PARAMS_IN_RESULTS",
message=f"Config params not in results: {missing_in_results}"
))
if extra_in_results:
warnings.append(ResultsWarning(
code="EXTRA_PARAMS_IN_RESULTS",
message=f"Results have extra params not in config: {extra_in_results}"
))
# Check objective count matches
config_objectives = len(config.get('objectives', []))
result_objectives = len(study.directions)
if config_objectives != result_objectives:
warnings.append(ResultsWarning(
code="OBJECTIVE_COUNT_MISMATCH",
message=f"Config has {config_objectives} objectives, results have {result_objectives}"
))
# Get objective names from config
for obj in config.get('objectives', []):
obj_name = obj.get('name', '')
if obj_name:
info.objective_names.append(obj_name)
# Check bounds violations
for trial in study.trials:
if trial.state != optuna.trial.TrialState.COMPLETE:
continue
for var in config.get('design_variables', []):
param_name = var.get('parameter', var.get('name', ''))
bounds = var.get('bounds', [])
if param_name in trial.params and len(bounds) == 2:
value = trial.params[param_name]
min_val, max_val = bounds
# Small tolerance for floating point
tolerance = (max_val - min_val) * 0.001
if value < min_val - tolerance:
warnings.append(ResultsWarning(
code="BELOW_MIN_BOUND",
message=f"{param_name} = {value} < min ({min_val})",
trial_number=trial.number
))
elif value > max_val + tolerance:
warnings.append(ResultsWarning(
code="ABOVE_MAX_BOUND",
message=f"{param_name} = {value} > max ({max_val})",
trial_number=trial.number
))
except json.JSONDecodeError as e:
warnings.append(ResultsWarning(
code="CONFIG_PARSE_ERROR",
message=f"Could not parse config JSON: {e}"
))
except Exception as e:
warnings.append(ResultsWarning(
code="CONFIG_ERROR",
message=f"Error reading config: {e}"
))
def validate_study_results(study_name: str) -> ResultsValidationResult:
"""
Convenience function to validate results for a named study.
Args:
study_name: Name of the study (folder in studies/)
Returns:
ResultsValidationResult
"""
from pathlib import Path
study_dir = Path(f"studies/{study_name}")
db_path = study_dir / "2_results" / "study.db"
config_path = study_dir / "1_setup" / "optimization_config.json"
return validate_results(
db_path=str(db_path),
config_path=str(config_path) if config_path.exists() else None
)
def get_pareto_summary(db_path: str) -> Dict[str, Any]:
"""
Get a summary of Pareto-optimal designs from results.
Args:
db_path: Path to study.db
Returns:
Dictionary with Pareto front information
"""
try:
import optuna
storage_url = f"sqlite:///{db_path}"
storage = optuna.storages.RDBStorage(url=storage_url)
study_summaries = storage.get_all_studies()
if not study_summaries:
return {"error": "No studies found"}
study = optuna.load_study(
study_name=study_summaries[0].study_name,
storage=storage_url
)
if len(study.directions) < 2:
# Single objective
if study.best_trial:
return {
"type": "single_objective",
"best_trial": study.best_trial.number,
"best_value": study.best_value,
"best_params": study.best_params
}
return {"error": "No completed trials"}
# Multi-objective
pareto_trials = study.best_trials
designs = []
for trial in pareto_trials:
designs.append({
"trial_number": trial.number,
"objectives": trial.values,
"parameters": trial.params,
"user_attrs": dict(trial.user_attrs)
})
# Calculate ranges
ranges = {}
if designs:
for i in range(len(designs[0]["objectives"])):
values = [d["objectives"][i] for d in designs]
ranges[f"objective_{i}"] = {
"min": min(values),
"max": max(values),
"spread": max(values) - min(values)
}
return {
"type": "multi_objective",
"n_pareto": len(pareto_trials),
"designs": designs,
"ranges": ranges
}
except Exception as e:
return {"error": str(e)}
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python results_validator.py <study_name_or_db_path>")
print("Example: python results_validator.py uav_arm_optimization")
print("Example: python results_validator.py studies/my_study/2_results/study.db")
sys.exit(1)
arg = sys.argv[1]
# Check if it's a study name or db path
if arg.endswith('.db'):
result = validate_results(arg)
else:
result = validate_study_results(arg)
print(result)

View File

@@ -0,0 +1,421 @@
"""
Study Validator for Atomizer Optimization Studies
Comprehensive validation that combines config, model, and results validation
to provide a complete health check for an optimization study.
Usage:
from optimization_engine.validators.study_validator import validate_study
result = validate_study("uav_arm_optimization")
print(result) # Shows complete status with all checks
"""
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Dict, Any, Optional
from enum import Enum
class StudyStatus(Enum):
"""Overall status of a study."""
NOT_FOUND = "not_found"
SETUP_INCOMPLETE = "setup_incomplete"
READY_TO_RUN = "ready_to_run"
RUNNING = "running"
COMPLETED = "completed"
HAS_ERRORS = "has_errors"
@dataclass
class StudyCheckResult:
"""Result of a single validation check."""
name: str
passed: bool
message: str
details: Dict[str, Any] = field(default_factory=dict)
@dataclass
class StudyValidationResult:
"""Complete validation result for a study."""
study_name: str
status: StudyStatus
checks: List[StudyCheckResult]
summary: Dict[str, Any]
@property
def is_ready_to_run(self) -> bool:
"""Check if study is ready to run optimization."""
return self.status in [StudyStatus.READY_TO_RUN, StudyStatus.COMPLETED]
@property
def error_count(self) -> int:
"""Count of failed checks."""
return len([c for c in self.checks if not c.passed])
@property
def warning_count(self) -> int:
"""Count of warnings (checks that passed with warnings)."""
return len([c for c in self.checks
if c.passed and 'warning' in c.message.lower()])
def __str__(self) -> str:
lines = []
# Header
lines.append("=" * 60)
lines.append(f"STUDY VALIDATION: {self.study_name}")
lines.append("=" * 60)
lines.append("")
# Status
status_icons = {
StudyStatus.NOT_FOUND: "[X] NOT FOUND",
StudyStatus.SETUP_INCOMPLETE: "[!] SETUP INCOMPLETE",
StudyStatus.READY_TO_RUN: "[OK] READY TO RUN",
StudyStatus.RUNNING: "[...] RUNNING",
StudyStatus.COMPLETED: "[OK] COMPLETED",
StudyStatus.HAS_ERRORS: "[X] HAS ERRORS"
}
lines.append(f"Status: {status_icons.get(self.status, str(self.status))}")
lines.append("")
# Summary info
if self.summary:
lines.append("SUMMARY")
lines.append("-" * 40)
for key, value in self.summary.items():
lines.append(f" {key}: {value}")
lines.append("")
# Checks
lines.append("VALIDATION CHECKS")
lines.append("-" * 40)
for check in self.checks:
icon = "[OK]" if check.passed else "[X]"
lines.append(f" {icon} {check.name}")
if not check.passed or check.details:
lines.append(f" {check.message}")
lines.append("")
# Final verdict
if self.status == StudyStatus.READY_TO_RUN:
lines.append("Ready to run optimization!")
lines.append(" Command: python run_optimization.py --trials 30")
elif self.status == StudyStatus.COMPLETED:
lines.append("Optimization completed. View results:")
lines.append(" Command: python -m optimization_engine.validators.results_validator " + self.study_name)
elif self.status == StudyStatus.SETUP_INCOMPLETE:
lines.append("Complete setup before running:")
for check in self.checks:
if not check.passed:
lines.append(f" - Fix: {check.message}")
elif self.status == StudyStatus.HAS_ERRORS:
lines.append("Fix errors before continuing:")
for check in self.checks:
if not check.passed:
lines.append(f" - {check.message}")
return "\n".join(lines)
def validate_study(study_name: str, studies_dir: str = "studies") -> StudyValidationResult:
"""
Validate all aspects of an optimization study.
Args:
study_name: Name of the study folder
studies_dir: Base directory for studies (default: "studies")
Returns:
StudyValidationResult with complete validation status
"""
checks: List[StudyCheckResult] = []
summary: Dict[str, Any] = {}
study_path = Path(studies_dir) / study_name
# Check 1: Study folder exists
if not study_path.exists():
checks.append(StudyCheckResult(
name="Study folder exists",
passed=False,
message=f"Study folder not found: {study_path}"
))
return StudyValidationResult(
study_name=study_name,
status=StudyStatus.NOT_FOUND,
checks=checks,
summary=summary
)
checks.append(StudyCheckResult(
name="Study folder exists",
passed=True,
message="OK"
))
# Check 2: Required directory structure
setup_dir = study_path / "1_setup"
results_dir = study_path / "2_results"
model_dir = setup_dir / "model"
structure_ok = True
structure_msg = []
if not setup_dir.exists():
structure_ok = False
structure_msg.append("Missing 1_setup/")
if not model_dir.exists():
structure_ok = False
structure_msg.append("Missing 1_setup/model/")
checks.append(StudyCheckResult(
name="Directory structure",
passed=structure_ok,
message="OK" if structure_ok else f"Missing: {', '.join(structure_msg)}"
))
# Check 3: Configuration file
config_path = setup_dir / "optimization_config.json"
config_valid = False
config_details = {}
if config_path.exists():
from .config_validator import validate_config_file
config_result = validate_config_file(str(config_path))
config_valid = config_result.is_valid
config_details = {
"errors": len(config_result.errors),
"warnings": len(config_result.warnings)
}
summary["design_variables"] = len(config_result.config.get("design_variables", []))
summary["objectives"] = len(config_result.config.get("objectives", []))
summary["constraints"] = len(config_result.config.get("constraints", []))
if config_valid:
msg = "Configuration valid"
if config_result.warnings:
msg += f" ({len(config_result.warnings)} warnings)"
else:
msg = f"{len(config_result.errors)} errors"
else:
msg = "optimization_config.json not found"
checks.append(StudyCheckResult(
name="Configuration file",
passed=config_valid,
message=msg,
details=config_details
))
# Check 4: Model files
model_valid = False
model_details = {}
if model_dir.exists():
from .model_validator import validate_study_model
model_result = validate_study_model(study_name, studies_dir)
model_valid = model_result.is_valid
model_details = {
"prt": model_result.prt_file is not None,
"sim": model_result.sim_file is not None,
"fem": model_result.fem_file is not None
}
if model_result.model_name:
summary["model_name"] = model_result.model_name
if model_valid:
msg = "Model files valid"
if model_result.warnings:
msg += f" ({len(model_result.warnings)} warnings)"
else:
msg = f"{len(model_result.errors)} errors"
else:
msg = "Model directory not found"
checks.append(StudyCheckResult(
name="Model files",
passed=model_valid,
message=msg,
details=model_details
))
# Check 5: Run script
run_script = study_path / "run_optimization.py"
run_script_exists = run_script.exists()
checks.append(StudyCheckResult(
name="Run script",
passed=run_script_exists,
message="OK" if run_script_exists else "run_optimization.py not found"
))
# Check 6: Results (if any)
db_path = results_dir / "study.db"
has_results = db_path.exists()
results_valid = False
results_details = {}
if has_results:
from .results_validator import validate_results
results_result = validate_results(
str(db_path),
str(config_path) if config_path.exists() else None
)
results_valid = results_result.is_valid
results_details = {
"trials": results_result.info.n_trials,
"completed": results_result.info.n_completed,
"failed": results_result.info.n_failed,
"pareto": results_result.info.n_pareto
}
summary["trials_completed"] = results_result.info.n_completed
summary["trials_failed"] = results_result.info.n_failed
if results_result.info.n_pareto > 0:
summary["pareto_designs"] = results_result.info.n_pareto
if results_valid:
msg = f"{results_result.info.n_completed} completed trials"
if results_result.info.is_multi_objective:
msg += f", {results_result.info.n_pareto} Pareto-optimal"
else:
msg = f"{len(results_result.errors)} errors in results"
checks.append(StudyCheckResult(
name="Optimization results",
passed=results_valid,
message=msg,
details=results_details
))
else:
checks.append(StudyCheckResult(
name="Optimization results",
passed=True, # Not having results is OK for a new study
message="No results yet (study not run)",
details={"exists": False}
))
# Determine overall status
critical_checks_passed = all([
checks[0].passed, # folder exists
checks[1].passed, # structure
checks[2].passed, # config
checks[3].passed, # model
])
if not critical_checks_passed:
status = StudyStatus.SETUP_INCOMPLETE
elif has_results and results_valid:
# Check if still running (look for lock file or recent activity)
lock_file = results_dir / ".optimization_lock"
if lock_file.exists():
status = StudyStatus.RUNNING
else:
status = StudyStatus.COMPLETED
elif has_results and not results_valid:
status = StudyStatus.HAS_ERRORS
else:
status = StudyStatus.READY_TO_RUN
return StudyValidationResult(
study_name=study_name,
status=status,
checks=checks,
summary=summary
)
def list_studies(studies_dir: str = "studies") -> List[Dict[str, Any]]:
"""
List all studies and their validation status.
Args:
studies_dir: Base directory for studies
Returns:
List of dictionaries with study name and status
"""
studies_path = Path(studies_dir)
results = []
if not studies_path.exists():
return results
for study_folder in sorted(studies_path.iterdir()):
if study_folder.is_dir() and not study_folder.name.startswith('.'):
validation = validate_study(study_folder.name, studies_dir)
results.append({
"name": study_folder.name,
"status": validation.status.value,
"is_ready": validation.is_ready_to_run,
"errors": validation.error_count,
"trials": validation.summary.get("trials_completed", 0),
"pareto": validation.summary.get("pareto_designs", 0)
})
return results
def quick_check(study_name: str, studies_dir: str = "studies") -> bool:
"""
Quick check if a study is ready to run.
Args:
study_name: Name of the study
studies_dir: Base directory for studies
Returns:
True if ready to run, False otherwise
"""
result = validate_study(study_name, studies_dir)
return result.is_ready_to_run
def get_study_health(study_name: str, studies_dir: str = "studies") -> Dict[str, Any]:
"""
Get a simple health report for a study.
Args:
study_name: Name of the study
studies_dir: Base directory for studies
Returns:
Dictionary with health information
"""
result = validate_study(study_name, studies_dir)
return {
"name": study_name,
"status": result.status.value,
"is_ready": result.is_ready_to_run,
"checks_passed": len([c for c in result.checks if c.passed]),
"checks_total": len(result.checks),
"error_count": result.error_count,
"summary": result.summary
}
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
# List all studies
print("Available studies:")
print("-" * 60)
studies = list_studies()
if not studies:
print(" No studies found in studies/")
else:
for study in studies:
status_icon = "[OK]" if study["is_ready"] else "[X]"
trials_info = f"{study['trials']} trials" if study['trials'] > 0 else "no trials"
print(f" {status_icon} {study['name']}: {study['status']} ({trials_info})")
print()
print("Usage: python study_validator.py <study_name>")
else:
study_name = sys.argv[1]
result = validate_study(study_name)
print(result)