feat: Pre-migration checkpoint - updated docs and utilities
Updates before optimization_engine migration: - Updated migration plan to v2.1 with complete file inventory - Added OP_07 disk optimization protocol - Added SYS_16 self-aware turbo protocol - Added study archiver and cleanup utilities - Added ensemble surrogate module - Updated NX solver and session manager - Updated zernike HTML generator - Added context engineering plan - LAC session insights updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -242,19 +242,28 @@ class NXSolver:
|
||||
Format: [unit]name=value
|
||||
Example: [mm]whiffle_min=42.5
|
||||
"""
|
||||
# Default unit mapping (could be extended or made configurable)
|
||||
# Default unit mapping - MUST match NX model expression units exactly
|
||||
# Verified against working turbo V1 runs
|
||||
UNIT_MAPPING = {
|
||||
# Length parameters (mm)
|
||||
'whiffle_min': 'mm',
|
||||
'whiffle_triangle_closeness': 'mm',
|
||||
'inner_circular_rib_dia': 'mm',
|
||||
'outer_circular_rib_offset_from_outer': 'mm',
|
||||
'Pocket_Radius': 'mm',
|
||||
'center_thickness': 'mm',
|
||||
# Lateral pivot/closeness - mm in NX model (verified from V1)
|
||||
'lateral_outer_pivot': 'mm',
|
||||
'lateral_inner_pivot': 'mm',
|
||||
'lateral_middle_pivot': 'mm',
|
||||
'lateral_closeness': 'mm',
|
||||
# Angle parameters (degrees)
|
||||
'whiffle_outer_to_vertical': 'Degrees',
|
||||
# Rib/face thickness parameters (mm)
|
||||
'rib_thickness': 'mm',
|
||||
'ribs_circular_thk': 'mm',
|
||||
'rib_thickness_lateral_truss': 'mm',
|
||||
'mirror_face_thickness': 'mm',
|
||||
# Angle parameters (Degrees) - verified from working V1 runs
|
||||
'whiffle_outer_to_vertical': 'Degrees', # NX expects Degrees (verified V1)
|
||||
'lateral_inner_angle': 'Degrees',
|
||||
'lateral_outer_angle': 'Degrees',
|
||||
'blank_backface_angle': 'Degrees',
|
||||
|
||||
19
optimization_engine/surrogates/__init__.py
Normal file
19
optimization_engine/surrogates/__init__.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
Surrogate models for FEA acceleration.
|
||||
|
||||
Available surrogates:
|
||||
- EnsembleSurrogate: Multiple MLPs with uncertainty quantification
|
||||
- OODDetector: Out-of-distribution detection
|
||||
"""
|
||||
|
||||
from .ensemble_surrogate import (
|
||||
EnsembleSurrogate,
|
||||
OODDetector,
|
||||
create_and_train_ensemble
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'EnsembleSurrogate',
|
||||
'OODDetector',
|
||||
'create_and_train_ensemble'
|
||||
]
|
||||
540
optimization_engine/surrogates/ensemble_surrogate.py
Normal file
540
optimization_engine/surrogates/ensemble_surrogate.py
Normal file
@@ -0,0 +1,540 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Ensemble Surrogate with Uncertainty Quantification
|
||||
|
||||
Addresses the V5 failure mode where single MLPs gave overconfident predictions
|
||||
in out-of-distribution regions, leading L-BFGS to fake optima.
|
||||
|
||||
Key features:
|
||||
1. Ensemble of N MLPs - disagreement = uncertainty
|
||||
2. OOD detection - reject predictions far from training data
|
||||
3. Confidence bounds - never trust point predictions alone
|
||||
4. Active learning - prioritize FEA in uncertain regions
|
||||
|
||||
Author: Atomizer
|
||||
Created: 2025-12-28
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Tuple, List, Dict, Optional
|
||||
from pathlib import Path
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
HAS_TORCH = True
|
||||
except ImportError:
|
||||
HAS_TORCH = False
|
||||
logger.warning("PyTorch not available - ensemble features limited")
|
||||
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
|
||||
|
||||
class MLP(nn.Module):
|
||||
"""Single MLP for ensemble."""
|
||||
|
||||
def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = None):
|
||||
super().__init__()
|
||||
hidden_dims = hidden_dims or [64, 32]
|
||||
|
||||
layers = []
|
||||
in_dim = input_dim
|
||||
for h_dim in hidden_dims:
|
||||
layers.append(nn.Linear(in_dim, h_dim))
|
||||
layers.append(nn.ReLU())
|
||||
layers.append(nn.Dropout(0.1))
|
||||
in_dim = h_dim
|
||||
layers.append(nn.Linear(in_dim, output_dim))
|
||||
|
||||
self.net = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
return self.net(x)
|
||||
|
||||
|
||||
class OODDetector:
|
||||
"""
|
||||
Out-of-Distribution detector using multiple methods:
|
||||
1. Z-score check (is input within N std of training mean)
|
||||
2. KNN distance (is input close to training points)
|
||||
"""
|
||||
|
||||
def __init__(self, X_train: np.ndarray, z_threshold: float = 3.0, knn_k: int = 5):
|
||||
self.X_train = X_train
|
||||
self.z_threshold = z_threshold
|
||||
self.knn_k = knn_k
|
||||
|
||||
# Compute training statistics
|
||||
self.mean = X_train.mean(axis=0)
|
||||
self.std = X_train.std(axis=0) + 1e-8
|
||||
|
||||
# Fit KNN for local density estimation
|
||||
self.knn = NearestNeighbors(n_neighbors=min(knn_k, len(X_train)))
|
||||
self.knn.fit(X_train)
|
||||
|
||||
# Compute typical KNN distances in training set
|
||||
train_distances, _ = self.knn.kneighbors(X_train)
|
||||
self.typical_knn_dist = np.median(train_distances.mean(axis=1))
|
||||
|
||||
logger.info(f"[OOD] Initialized with {len(X_train)} training points")
|
||||
logger.info(f"[OOD] Typical KNN distance: {self.typical_knn_dist:.4f}")
|
||||
|
||||
def z_score_check(self, x: np.ndarray) -> Tuple[bool, float]:
|
||||
"""Check if point is within z_threshold std of training mean."""
|
||||
x = np.atleast_2d(x)
|
||||
z_scores = np.abs((x - self.mean) / self.std)
|
||||
max_z = z_scores.max(axis=1)
|
||||
is_ok = max_z < self.z_threshold
|
||||
return is_ok[0] if len(is_ok) == 1 else is_ok, max_z[0] if len(max_z) == 1 else max_z
|
||||
|
||||
def knn_distance_check(self, x: np.ndarray) -> Tuple[bool, float]:
|
||||
"""Check if point is close enough to training data."""
|
||||
x = np.atleast_2d(x)
|
||||
distances, _ = self.knn.kneighbors(x)
|
||||
avg_dist = distances.mean(axis=1)
|
||||
# Allow up to 3x typical distance
|
||||
is_ok = avg_dist < 3 * self.typical_knn_dist
|
||||
return is_ok[0] if len(is_ok) == 1 else is_ok, avg_dist[0] if len(avg_dist) == 1 else avg_dist
|
||||
|
||||
def is_in_distribution(self, x: np.ndarray) -> Tuple[bool, Dict]:
|
||||
"""Combined OOD check."""
|
||||
z_ok, z_score = self.z_score_check(x)
|
||||
knn_ok, knn_dist = self.knn_distance_check(x)
|
||||
|
||||
is_ok = z_ok and knn_ok
|
||||
details = {
|
||||
'z_score': float(z_score),
|
||||
'z_ok': bool(z_ok),
|
||||
'knn_dist': float(knn_dist),
|
||||
'knn_ok': bool(knn_ok),
|
||||
'in_distribution': bool(is_ok)
|
||||
}
|
||||
|
||||
return is_ok, details
|
||||
|
||||
|
||||
class EnsembleSurrogate:
|
||||
"""
|
||||
Ensemble of MLPs with uncertainty quantification.
|
||||
|
||||
Key insight: Models trained with different random seeds will agree
|
||||
in well-sampled regions but disagree in extrapolation regions.
|
||||
Disagreement = epistemic uncertainty.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dim: int,
|
||||
output_dim: int,
|
||||
n_models: int = 5,
|
||||
hidden_dims: List[int] = None,
|
||||
device: str = 'auto'
|
||||
):
|
||||
self.input_dim = input_dim
|
||||
self.output_dim = output_dim
|
||||
self.n_models = n_models
|
||||
self.hidden_dims = hidden_dims or [64, 32]
|
||||
|
||||
if device == 'auto':
|
||||
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
else:
|
||||
self.device = torch.device(device)
|
||||
|
||||
# Create ensemble
|
||||
self.models = [
|
||||
MLP(input_dim, output_dim, hidden_dims).to(self.device)
|
||||
for _ in range(n_models)
|
||||
]
|
||||
|
||||
# Normalization stats
|
||||
self.x_mean = None
|
||||
self.x_std = None
|
||||
self.y_mean = None
|
||||
self.y_std = None
|
||||
|
||||
# OOD detector
|
||||
self.ood_detector = None
|
||||
|
||||
# Training state
|
||||
self.is_trained = False
|
||||
|
||||
logger.info(f"[ENSEMBLE] Created {n_models} MLPs on {self.device}")
|
||||
|
||||
def train(
|
||||
self,
|
||||
X: np.ndarray,
|
||||
Y: np.ndarray,
|
||||
epochs: int = 500,
|
||||
lr: float = 0.001,
|
||||
val_split: float = 0.1,
|
||||
patience: int = 50
|
||||
) -> Dict:
|
||||
"""Train all models in ensemble with different random seeds."""
|
||||
|
||||
# Compute normalization
|
||||
self.x_mean = X.mean(axis=0)
|
||||
self.x_std = X.std(axis=0) + 1e-8
|
||||
self.y_mean = Y.mean(axis=0)
|
||||
self.y_std = Y.std(axis=0) + 1e-8
|
||||
|
||||
X_norm = (X - self.x_mean) / self.x_std
|
||||
Y_norm = (Y - self.y_mean) / self.y_std
|
||||
|
||||
# Split data
|
||||
n_val = max(int(len(X) * val_split), 5)
|
||||
indices = np.random.permutation(len(X))
|
||||
val_idx, train_idx = indices[:n_val], indices[n_val:]
|
||||
|
||||
X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
|
||||
X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]
|
||||
|
||||
# Convert to tensors
|
||||
X_t = torch.FloatTensor(X_train).to(self.device)
|
||||
Y_t = torch.FloatTensor(Y_train).to(self.device)
|
||||
X_v = torch.FloatTensor(X_val).to(self.device)
|
||||
Y_v = torch.FloatTensor(Y_val).to(self.device)
|
||||
|
||||
# Train each model with different seed
|
||||
all_val_losses = []
|
||||
for i, model in enumerate(self.models):
|
||||
torch.manual_seed(42 + i * 1000) # Different init per model
|
||||
np.random.seed(42 + i * 1000)
|
||||
|
||||
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
|
||||
criterion = nn.MSELoss()
|
||||
|
||||
best_val_loss = float('inf')
|
||||
patience_counter = 0
|
||||
best_state = None
|
||||
|
||||
for epoch in range(epochs):
|
||||
# Train
|
||||
model.train()
|
||||
optimizer.zero_grad()
|
||||
pred = model(X_t)
|
||||
loss = criterion(pred, Y_t)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# Validate
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
val_pred = model(X_v)
|
||||
val_loss = criterion(val_pred, Y_v).item()
|
||||
|
||||
# Early stopping
|
||||
if val_loss < best_val_loss:
|
||||
best_val_loss = val_loss
|
||||
patience_counter = 0
|
||||
best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
|
||||
else:
|
||||
patience_counter += 1
|
||||
if patience_counter >= patience:
|
||||
break
|
||||
|
||||
# Restore best
|
||||
if best_state:
|
||||
model.load_state_dict(best_state)
|
||||
model.to(self.device)
|
||||
|
||||
all_val_losses.append(best_val_loss)
|
||||
logger.info(f"[ENSEMBLE] Model {i+1}/{self.n_models} trained, val_loss={best_val_loss:.4f}")
|
||||
|
||||
# Initialize OOD detector
|
||||
self.ood_detector = OODDetector(X_norm)
|
||||
|
||||
self.is_trained = True
|
||||
|
||||
# Compute ensemble metrics
|
||||
metrics = self._compute_metrics(X_val, Y_val)
|
||||
metrics['val_losses'] = all_val_losses
|
||||
|
||||
return metrics
|
||||
|
||||
def _compute_metrics(self, X_val: np.ndarray, Y_val: np.ndarray) -> Dict:
|
||||
"""Compute R², MAE, and ensemble disagreement on validation set."""
|
||||
mean, std = self.predict_normalized(X_val)
|
||||
|
||||
# R² for each output
|
||||
ss_res = np.sum((Y_val - mean) ** 2, axis=0)
|
||||
ss_tot = np.sum((Y_val - Y_val.mean(axis=0)) ** 2, axis=0)
|
||||
r2 = 1 - ss_res / (ss_tot + 1e-8)
|
||||
|
||||
# MAE
|
||||
mae = np.abs(Y_val - mean).mean(axis=0)
|
||||
|
||||
# Average ensemble disagreement
|
||||
avg_std = std.mean()
|
||||
|
||||
return {
|
||||
'r2': r2.tolist(),
|
||||
'mae': mae.tolist(),
|
||||
'avg_ensemble_std': float(avg_std),
|
||||
'n_val': len(X_val)
|
||||
}
|
||||
|
||||
def predict_normalized(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""Predict on normalized inputs, return normalized outputs."""
|
||||
X = np.atleast_2d(X)
|
||||
X_t = torch.FloatTensor(X).to(self.device)
|
||||
|
||||
preds = []
|
||||
for model in self.models:
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
pred = model(X_t).cpu().numpy()
|
||||
preds.append(pred)
|
||||
|
||||
preds = np.array(preds) # (n_models, n_samples, n_outputs)
|
||||
mean = preds.mean(axis=0)
|
||||
std = preds.std(axis=0)
|
||||
|
||||
return mean, std
|
||||
|
||||
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Predict with uncertainty.
|
||||
|
||||
Returns:
|
||||
mean: (n_samples, n_outputs) predicted values
|
||||
std: (n_samples, n_outputs) uncertainty (ensemble disagreement)
|
||||
"""
|
||||
X = np.atleast_2d(X)
|
||||
|
||||
# Normalize input
|
||||
X_norm = (X - self.x_mean) / self.x_std
|
||||
|
||||
# Get predictions
|
||||
mean_norm, std_norm = self.predict_normalized(X_norm)
|
||||
|
||||
# Denormalize
|
||||
mean = mean_norm * self.y_std + self.y_mean
|
||||
std = std_norm * self.y_std # Std scales with y_std
|
||||
|
||||
return mean, std
|
||||
|
||||
def predict_with_confidence(self, X: np.ndarray) -> Dict:
|
||||
"""
|
||||
Full prediction with confidence assessment.
|
||||
|
||||
Returns dict with:
|
||||
- mean: predicted values
|
||||
- std: uncertainty
|
||||
- confidence: 0-1 score (higher = more reliable)
|
||||
- in_distribution: OOD check result
|
||||
- recommendation: 'trust', 'verify', or 'reject'
|
||||
"""
|
||||
X = np.atleast_2d(X)
|
||||
|
||||
mean, std = self.predict(X)
|
||||
|
||||
# OOD check
|
||||
X_norm = (X - self.x_mean) / self.x_std
|
||||
ood_results = [self.ood_detector.is_in_distribution(x) for x in X_norm]
|
||||
in_distribution = [r[0] for r in ood_results]
|
||||
|
||||
# Compute confidence score (0 = no confidence, 1 = high confidence)
|
||||
# Based on: relative std (lower = better) and OOD (in = better)
|
||||
relative_std = std / (np.abs(mean) + 1e-6)
|
||||
avg_rel_std = relative_std.mean(axis=1)
|
||||
|
||||
confidence = np.zeros(len(X))
|
||||
for i in range(len(X)):
|
||||
if not in_distribution[i]:
|
||||
confidence[i] = 0.0 # OOD = no confidence
|
||||
elif avg_rel_std[i] > 0.3:
|
||||
confidence[i] = 0.2 # High uncertainty
|
||||
elif avg_rel_std[i] > 0.1:
|
||||
confidence[i] = 0.5 # Medium uncertainty
|
||||
else:
|
||||
confidence[i] = 0.9 # Low uncertainty
|
||||
|
||||
# Recommendations
|
||||
recommendations = []
|
||||
for i in range(len(X)):
|
||||
if confidence[i] >= 0.7:
|
||||
recommendations.append('trust')
|
||||
elif confidence[i] >= 0.3:
|
||||
recommendations.append('verify') # Run FEA to check
|
||||
else:
|
||||
recommendations.append('reject') # Don't use, run FEA instead
|
||||
|
||||
return {
|
||||
'mean': mean,
|
||||
'std': std,
|
||||
'confidence': confidence,
|
||||
'in_distribution': in_distribution,
|
||||
'recommendation': recommendations
|
||||
}
|
||||
|
||||
def acquisition_score(self, X: np.ndarray, best_so_far: float, xi: float = 0.01) -> np.ndarray:
|
||||
"""
|
||||
Expected Improvement acquisition function.
|
||||
|
||||
High score = worth running FEA (either promising or uncertain).
|
||||
|
||||
Args:
|
||||
X: candidate points
|
||||
best_so_far: current best objective value
|
||||
xi: exploration-exploitation tradeoff (higher = more exploration)
|
||||
|
||||
Returns:
|
||||
scores: acquisition score per point
|
||||
"""
|
||||
X = np.atleast_2d(X)
|
||||
mean, std = self.predict(X)
|
||||
|
||||
# For minimization: improvement = best - predicted
|
||||
# Take first objective (weighted sum) for acquisition
|
||||
if mean.ndim > 1:
|
||||
mean = mean[:, 0]
|
||||
std = std[:, 0]
|
||||
|
||||
improvement = best_so_far - mean
|
||||
|
||||
# Expected improvement with exploration bonus
|
||||
# Higher std = more exploration value
|
||||
z = improvement / (std + 1e-8)
|
||||
|
||||
# Simple acquisition: exploitation + exploration
|
||||
scores = improvement + xi * std
|
||||
|
||||
# Penalize OOD points
|
||||
X_norm = (X - self.x_mean) / self.x_std
|
||||
for i, x in enumerate(X_norm):
|
||||
is_ok, _ = self.ood_detector.is_in_distribution(x)
|
||||
if not is_ok:
|
||||
scores[i] *= 0.1 # Heavy penalty for OOD
|
||||
|
||||
return scores
|
||||
|
||||
def select_candidates_for_fea(
|
||||
self,
|
||||
candidates: np.ndarray,
|
||||
best_so_far: float,
|
||||
n_select: int = 5,
|
||||
diversity_weight: float = 0.3
|
||||
) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
Select diverse, high-acquisition candidates for FEA validation.
|
||||
|
||||
Balances:
|
||||
1. High acquisition score (exploitation + exploration)
|
||||
2. Diversity (don't cluster all candidates together)
|
||||
3. In-distribution (avoid OOD predictions)
|
||||
|
||||
Returns:
|
||||
selected: indices of selected candidates
|
||||
scores: acquisition scores
|
||||
"""
|
||||
scores = self.acquisition_score(candidates, best_so_far)
|
||||
|
||||
# Greedy selection with diversity
|
||||
selected = []
|
||||
remaining = list(range(len(candidates)))
|
||||
|
||||
while len(selected) < n_select and remaining:
|
||||
if not selected:
|
||||
# First: pick highest score
|
||||
best_idx = max(remaining, key=lambda i: scores[i])
|
||||
else:
|
||||
# Later: balance score with distance to selected
|
||||
def combined_score(i):
|
||||
# Min distance to already selected
|
||||
min_dist = min(
|
||||
np.linalg.norm(candidates[i] - candidates[j])
|
||||
for j in selected
|
||||
)
|
||||
# Combine acquisition + diversity
|
||||
return scores[i] + diversity_weight * min_dist
|
||||
|
||||
best_idx = max(remaining, key=combined_score)
|
||||
|
||||
selected.append(best_idx)
|
||||
remaining.remove(best_idx)
|
||||
|
||||
return np.array(selected), scores[selected]
|
||||
|
||||
def save(self, path: Path):
|
||||
"""Save ensemble to disk."""
|
||||
path = Path(path)
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save each model
|
||||
for i, model in enumerate(self.models):
|
||||
torch.save(model.state_dict(), path / f"model_{i}.pt")
|
||||
|
||||
# Save normalization stats and config
|
||||
config = {
|
||||
'input_dim': self.input_dim,
|
||||
'output_dim': self.output_dim,
|
||||
'n_models': self.n_models,
|
||||
'hidden_dims': self.hidden_dims,
|
||||
'x_mean': self.x_mean.tolist() if self.x_mean is not None else None,
|
||||
'x_std': self.x_std.tolist() if self.x_std is not None else None,
|
||||
'y_mean': self.y_mean.tolist() if self.y_mean is not None else None,
|
||||
'y_std': self.y_std.tolist() if self.y_std is not None else None,
|
||||
}
|
||||
with open(path / "config.json", 'w') as f:
|
||||
json.dump(config, f, indent=2)
|
||||
|
||||
logger.info(f"[ENSEMBLE] Saved to {path}")
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: Path, device: str = 'auto') -> 'EnsembleSurrogate':
|
||||
"""Load ensemble from disk."""
|
||||
path = Path(path)
|
||||
|
||||
with open(path / "config.json") as f:
|
||||
config = json.load(f)
|
||||
|
||||
surrogate = cls(
|
||||
input_dim=config['input_dim'],
|
||||
output_dim=config['output_dim'],
|
||||
n_models=config['n_models'],
|
||||
hidden_dims=config['hidden_dims'],
|
||||
device=device
|
||||
)
|
||||
|
||||
# Load normalization
|
||||
surrogate.x_mean = np.array(config['x_mean']) if config['x_mean'] else None
|
||||
surrogate.x_std = np.array(config['x_std']) if config['x_std'] else None
|
||||
surrogate.y_mean = np.array(config['y_mean']) if config['y_mean'] else None
|
||||
surrogate.y_std = np.array(config['y_std']) if config['y_std'] else None
|
||||
|
||||
# Load models
|
||||
for i, model in enumerate(surrogate.models):
|
||||
model.load_state_dict(torch.load(path / f"model_{i}.pt", map_location=surrogate.device))
|
||||
model.to(surrogate.device)
|
||||
|
||||
surrogate.is_trained = True
|
||||
logger.info(f"[ENSEMBLE] Loaded from {path}")
|
||||
|
||||
return surrogate
|
||||
|
||||
|
||||
# Convenience function for quick usage
|
||||
def create_and_train_ensemble(
|
||||
X: np.ndarray,
|
||||
Y: np.ndarray,
|
||||
n_models: int = 5,
|
||||
epochs: int = 500
|
||||
) -> EnsembleSurrogate:
|
||||
"""Create and train an ensemble surrogate."""
|
||||
surrogate = EnsembleSurrogate(
|
||||
input_dim=X.shape[1],
|
||||
output_dim=Y.shape[1] if Y.ndim > 1 else 1,
|
||||
n_models=n_models
|
||||
)
|
||||
|
||||
if Y.ndim == 1:
|
||||
Y = Y.reshape(-1, 1)
|
||||
|
||||
metrics = surrogate.train(X, Y, epochs=epochs)
|
||||
logger.info(f"[ENSEMBLE] Training complete: R²={metrics['r2']}, avg_std={metrics['avg_ensemble_std']:.4f}")
|
||||
|
||||
return surrogate
|
||||
@@ -24,6 +24,7 @@ SESSION_LOCK_DIR = Path(os.environ.get('TEMP', '/tmp')) / 'atomizer_nx_sessions'
|
||||
|
||||
# Default NX installation paths (in order of preference)
|
||||
DEFAULT_NX_PATHS = [
|
||||
Path(r"C:\Program Files\Siemens\DesigncenterNX2512\NXBIN\ugraf.exe"), # DesignCenter (preferred)
|
||||
Path(r"C:\Program Files\Siemens\NX2506\NXBIN\ugraf.exe"),
|
||||
Path(r"C:\Program Files\Siemens\NX2412\NXBIN\ugraf.exe"),
|
||||
Path(r"C:\Program Files\Siemens\Simcenter3D_2506\NXBIN\ugraf.exe"),
|
||||
|
||||
438
optimization_engine/utils/study_archiver.py
Normal file
438
optimization_engine/utils/study_archiver.py
Normal file
@@ -0,0 +1,438 @@
|
||||
"""
|
||||
Study Archiver - Disk Space Optimization for Atomizer Studies
|
||||
|
||||
This module provides utilities for:
|
||||
1. Cleaning up completed studies (removing regenerable files)
|
||||
2. Archiving studies to remote storage (dalidou server)
|
||||
3. Restoring archived studies on-demand
|
||||
|
||||
Usage:
|
||||
# Cleanup a completed study (keep only essential files)
|
||||
python -m optimization_engine.utils.study_archiver cleanup studies/M1_Mirror/m1_mirror_V12
|
||||
|
||||
# Archive to remote server
|
||||
python -m optimization_engine.utils.study_archiver archive studies/M1_Mirror/m1_mirror_V12
|
||||
|
||||
# Restore from remote
|
||||
python -m optimization_engine.utils.study_archiver restore m1_mirror_V12
|
||||
|
||||
# Show disk usage analysis
|
||||
python -m optimization_engine.utils.study_archiver analyze studies/M1_Mirror
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
import tarfile
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, List, Tuple
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration
|
||||
REMOTE_CONFIG = {
|
||||
"host": "192.168.86.50", # Local WiFi
|
||||
"host_tailscale": "100.80.199.40", # Remote via Tailscale
|
||||
"user": "papa",
|
||||
"archive_path": "/srv/storage/atomizer-archive",
|
||||
"ssh_port": 22,
|
||||
}
|
||||
|
||||
# Files to KEEP per trial (essential for analysis)
|
||||
ESSENTIAL_EXTENSIONS = {
|
||||
'.op2', # Nastran binary results (Zernike extraction)
|
||||
'.json', # Parameters, results, metadata
|
||||
'.npz', # Pre-computed Zernike coefficients
|
||||
'.html', # Generated reports
|
||||
'.png', # Visualization images
|
||||
'.csv', # Exported data
|
||||
}
|
||||
|
||||
# Files to DELETE per trial (regenerable from master + params)
|
||||
DELETABLE_EXTENSIONS = {
|
||||
'.prt', # NX part files (copy of master)
|
||||
'.fem', # FEM mesh files (copy of master)
|
||||
'.sim', # Simulation files (copy of master)
|
||||
'.afm', # Assembly FEM files
|
||||
'.dat', # Solver input deck (can regenerate)
|
||||
'.f04', # Nastran output log
|
||||
'.f06', # Nastran printed output
|
||||
'.log', # Generic log files
|
||||
'.diag', # Diagnostic files
|
||||
'.txt', # Temp text files
|
||||
'.exp', # Expression files
|
||||
'.bak', # Backup files
|
||||
}
|
||||
|
||||
# Folders to always keep entirely
|
||||
KEEP_FOLDERS = {
|
||||
'1_setup', # Master model files (source of truth)
|
||||
'3_results', # Final results, database, reports
|
||||
'best_design_archive', # Archived best designs
|
||||
}
|
||||
|
||||
|
||||
def analyze_study(study_path: Path) -> Dict:
|
||||
"""Analyze disk usage of a study folder."""
|
||||
study_path = Path(study_path)
|
||||
|
||||
analysis = {
|
||||
"study_name": study_path.name,
|
||||
"total_size_bytes": 0,
|
||||
"by_extension": {},
|
||||
"by_folder": {},
|
||||
"essential_size": 0,
|
||||
"deletable_size": 0,
|
||||
"trial_count": 0,
|
||||
}
|
||||
|
||||
for f in study_path.rglob("*"):
|
||||
if f.is_file():
|
||||
sz = f.stat().st_size
|
||||
ext = f.suffix.lower()
|
||||
|
||||
analysis["total_size_bytes"] += sz
|
||||
analysis["by_extension"][ext] = analysis["by_extension"].get(ext, 0) + sz
|
||||
|
||||
# Categorize by folder
|
||||
rel_parts = f.relative_to(study_path).parts
|
||||
if rel_parts:
|
||||
folder = rel_parts[0]
|
||||
analysis["by_folder"][folder] = analysis["by_folder"].get(folder, 0) + sz
|
||||
|
||||
# Essential vs deletable
|
||||
if ext in ESSENTIAL_EXTENSIONS:
|
||||
analysis["essential_size"] += sz
|
||||
elif ext in DELETABLE_EXTENSIONS:
|
||||
analysis["deletable_size"] += sz
|
||||
|
||||
# Count trials
|
||||
iterations_dir = study_path / "2_iterations"
|
||||
if iterations_dir.exists():
|
||||
analysis["trial_count"] = len([
|
||||
d for d in iterations_dir.iterdir()
|
||||
if d.is_dir() and (d.name.startswith("trial_") or d.name.startswith("iter"))
|
||||
])
|
||||
|
||||
return analysis
|
||||
|
||||
|
||||
def print_analysis(analysis: Dict):
|
||||
"""Print formatted analysis results."""
|
||||
total_gb = analysis["total_size_bytes"] / 1e9
|
||||
essential_gb = analysis["essential_size"] / 1e9
|
||||
deletable_gb = analysis["deletable_size"] / 1e9
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Study: {analysis['study_name']}")
|
||||
print(f"{'='*60}")
|
||||
print(f"Total size: {total_gb:8.2f} GB")
|
||||
print(f"Trials: {analysis['trial_count']:8d}")
|
||||
print(f"Essential: {essential_gb:8.2f} GB ({100*essential_gb/total_gb:.1f}%)")
|
||||
print(f"Deletable: {deletable_gb:8.2f} GB ({100*deletable_gb/total_gb:.1f}%)")
|
||||
print(f"Potential save: {deletable_gb:8.2f} GB")
|
||||
|
||||
print(f"\nBy folder:")
|
||||
for folder, size in sorted(analysis["by_folder"].items(), key=lambda x: -x[1]):
|
||||
print(f" {folder:25} {size/1e9:8.2f} GB")
|
||||
|
||||
print(f"\nTop extensions:")
|
||||
for ext, size in sorted(analysis["by_extension"].items(), key=lambda x: -x[1])[:10]:
|
||||
status = "[KEEP]" if ext in ESSENTIAL_EXTENSIONS else "[DEL?]" if ext in DELETABLE_EXTENSIONS else "[ ]"
|
||||
print(f" {status} {ext:10} {size/1e9:8.2f} GB")
|
||||
|
||||
|
||||
def cleanup_study(study_path: Path, dry_run: bool = True) -> Tuple[int, int]:
|
||||
"""
|
||||
Clean up a completed study by removing regenerable files from trial folders.
|
||||
|
||||
Args:
|
||||
study_path: Path to study folder
|
||||
dry_run: If True, only report what would be deleted
|
||||
|
||||
Returns:
|
||||
(files_deleted, bytes_freed)
|
||||
"""
|
||||
study_path = Path(study_path)
|
||||
iterations_dir = study_path / "2_iterations"
|
||||
|
||||
if not iterations_dir.exists():
|
||||
logger.warning(f"No iterations folder found in {study_path}")
|
||||
return 0, 0
|
||||
|
||||
files_to_delete = []
|
||||
bytes_to_free = 0
|
||||
|
||||
# Find all deletable files in trial folders
|
||||
for trial_dir in iterations_dir.iterdir():
|
||||
if not trial_dir.is_dir():
|
||||
continue
|
||||
|
||||
for f in trial_dir.iterdir():
|
||||
if f.is_file() and f.suffix.lower() in DELETABLE_EXTENSIONS:
|
||||
files_to_delete.append(f)
|
||||
bytes_to_free += f.stat().st_size
|
||||
|
||||
if dry_run:
|
||||
print(f"\n[DRY RUN] Would delete {len(files_to_delete)} files, freeing {bytes_to_free/1e9:.2f} GB")
|
||||
print("\nSample files to delete:")
|
||||
for f in files_to_delete[:10]:
|
||||
print(f" {f.relative_to(study_path)}")
|
||||
if len(files_to_delete) > 10:
|
||||
print(f" ... and {len(files_to_delete) - 10} more")
|
||||
return 0, 0
|
||||
|
||||
# Actually delete
|
||||
deleted = 0
|
||||
freed = 0
|
||||
for f in files_to_delete:
|
||||
try:
|
||||
sz = f.stat().st_size
|
||||
f.unlink()
|
||||
deleted += 1
|
||||
freed += sz
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete {f}: {e}")
|
||||
|
||||
print(f"Deleted {deleted} files, freed {freed/1e9:.2f} GB")
|
||||
return deleted, freed
|
||||
|
||||
|
||||
def archive_to_remote(
|
||||
study_path: Path,
|
||||
use_tailscale: bool = False,
|
||||
dry_run: bool = True
|
||||
) -> bool:
|
||||
"""
|
||||
Archive a study to the remote dalidou server.
|
||||
|
||||
Args:
|
||||
study_path: Path to study folder
|
||||
use_tailscale: Use Tailscale IP (for remote access)
|
||||
dry_run: If True, only report what would be done
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
study_path = Path(study_path)
|
||||
study_name = study_path.name
|
||||
|
||||
host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
|
||||
user = REMOTE_CONFIG["user"]
|
||||
remote_path = REMOTE_CONFIG["archive_path"]
|
||||
|
||||
# Create compressed archive locally first
|
||||
archive_name = f"{study_name}_{datetime.now().strftime('%Y%m%d')}.tar.gz"
|
||||
local_archive = study_path.parent / archive_name
|
||||
|
||||
if dry_run:
|
||||
print(f"\n[DRY RUN] Would archive {study_name}")
|
||||
print(f" 1. Create {archive_name}")
|
||||
print(f" 2. Upload to {user}@{host}:{remote_path}/")
|
||||
print(f" 3. Delete local archive")
|
||||
return True
|
||||
|
||||
print(f"Creating archive: {archive_name}")
|
||||
with tarfile.open(local_archive, "w:gz") as tar:
|
||||
tar.add(study_path, arcname=study_name)
|
||||
|
||||
archive_size = local_archive.stat().st_size
|
||||
print(f"Archive size: {archive_size/1e9:.2f} GB")
|
||||
|
||||
# Upload via rsync (more reliable than scp for large files)
|
||||
print(f"Uploading to {host}...")
|
||||
|
||||
# First ensure remote directory exists
|
||||
ssh_cmd = f'ssh {user}@{host} "mkdir -p {remote_path}"'
|
||||
subprocess.run(ssh_cmd, shell=True, check=True)
|
||||
|
||||
# Upload
|
||||
rsync_cmd = f'rsync -avz --progress "{local_archive}" {user}@{host}:{remote_path}/'
|
||||
result = subprocess.run(rsync_cmd, shell=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
print("Upload successful!")
|
||||
# Clean up local archive
|
||||
local_archive.unlink()
|
||||
return True
|
||||
else:
|
||||
print(f"Upload failed with code {result.returncode}")
|
||||
return False
|
||||
|
||||
|
||||
def restore_from_remote(
|
||||
study_name: str,
|
||||
target_dir: Path,
|
||||
use_tailscale: bool = False
|
||||
) -> bool:
|
||||
"""
|
||||
Restore a study from the remote server.
|
||||
|
||||
Args:
|
||||
study_name: Name of the study to restore
|
||||
target_dir: Where to extract the study
|
||||
use_tailscale: Use Tailscale IP
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
|
||||
user = REMOTE_CONFIG["user"]
|
||||
remote_path = REMOTE_CONFIG["archive_path"]
|
||||
|
||||
target_dir = Path(target_dir)
|
||||
|
||||
# Find the archive on remote
|
||||
print(f"Looking for {study_name} on {host}...")
|
||||
|
||||
ssh_cmd = f'ssh {user}@{host} "ls {remote_path}/{study_name}*.tar.gz 2>/dev/null | head -1"'
|
||||
result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
|
||||
|
||||
if not result.stdout.strip():
|
||||
print(f"No archive found for {study_name}")
|
||||
return False
|
||||
|
||||
remote_archive = result.stdout.strip()
|
||||
local_archive = target_dir / Path(remote_archive).name
|
||||
|
||||
print(f"Downloading: {remote_archive}")
|
||||
rsync_cmd = f'rsync -avz --progress {user}@{host}:"{remote_archive}" "{local_archive}"'
|
||||
result = subprocess.run(rsync_cmd, shell=True)
|
||||
|
||||
if result.returncode != 0:
|
||||
print("Download failed")
|
||||
return False
|
||||
|
||||
print("Extracting...")
|
||||
with tarfile.open(local_archive, "r:gz") as tar:
|
||||
tar.extractall(target_dir)
|
||||
|
||||
# Clean up
|
||||
local_archive.unlink()
|
||||
print(f"Restored to {target_dir / study_name}")
|
||||
return True
|
||||
|
||||
|
||||
def list_remote_archives(use_tailscale: bool = False) -> List[Dict]:
|
||||
"""List all archived studies on the remote server."""
|
||||
host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
|
||||
user = REMOTE_CONFIG["user"]
|
||||
remote_path = REMOTE_CONFIG["archive_path"]
|
||||
|
||||
ssh_cmd = f'ssh {user}@{host} "ls -lh {remote_path}/*.tar.gz 2>/dev/null"'
|
||||
result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
|
||||
|
||||
archives = []
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line and '.tar.gz' in line:
|
||||
parts = line.split()
|
||||
if len(parts) >= 9:
|
||||
archives.append({
|
||||
"name": parts[-1].split('/')[-1],
|
||||
"size": parts[4],
|
||||
"date": f"{parts[5]} {parts[6]} {parts[7]}",
|
||||
})
|
||||
|
||||
return archives
|
||||
|
||||
|
||||
def analyze_all_studies(studies_dir: Path) -> Dict:
|
||||
"""Analyze all studies in a directory."""
|
||||
studies_dir = Path(studies_dir)
|
||||
|
||||
total_analysis = {
|
||||
"total_size": 0,
|
||||
"total_essential": 0,
|
||||
"total_deletable": 0,
|
||||
"studies": [],
|
||||
}
|
||||
|
||||
for study in sorted(studies_dir.iterdir()):
|
||||
if study.is_dir() and not study.name.startswith('.'):
|
||||
analysis = analyze_study(study)
|
||||
total_analysis["studies"].append(analysis)
|
||||
total_analysis["total_size"] += analysis["total_size_bytes"]
|
||||
total_analysis["total_essential"] += analysis["essential_size"]
|
||||
total_analysis["total_deletable"] += analysis["deletable_size"]
|
||||
|
||||
return total_analysis
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Atomizer Study Archiver")
|
||||
parser.add_argument("command", choices=["analyze", "cleanup", "archive", "restore", "list"])
|
||||
parser.add_argument("path", nargs="?", help="Study path or name")
|
||||
parser.add_argument("--dry-run", action="store_true", default=True,
|
||||
help="Don't actually delete/transfer (default: True)")
|
||||
parser.add_argument("--execute", action="store_true",
|
||||
help="Actually perform the operation")
|
||||
parser.add_argument("--tailscale", action="store_true",
|
||||
help="Use Tailscale IP for remote access")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
dry_run = not args.execute
|
||||
|
||||
if args.command == "analyze":
|
||||
if not args.path:
|
||||
print("Usage: study_archiver analyze <path>")
|
||||
return
|
||||
|
||||
path = Path(args.path)
|
||||
if path.is_dir():
|
||||
# Check if it's a single study or a collection
|
||||
if (path / "optimization_config.json").exists() or (path / "1_setup").exists():
|
||||
# Single study
|
||||
analysis = analyze_study(path)
|
||||
print_analysis(analysis)
|
||||
else:
|
||||
# Collection of studies
|
||||
total = analyze_all_studies(path)
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Summary: {len(total['studies'])} studies")
|
||||
print(f"{'='*60}")
|
||||
print(f"Total size: {total['total_size']/1e9:8.2f} GB")
|
||||
print(f"Essential: {total['total_essential']/1e9:8.2f} GB")
|
||||
print(f"Deletable: {total['total_deletable']/1e9:8.2f} GB")
|
||||
print(f"Potential save: {total['total_deletable']/1e9:8.2f} GB")
|
||||
print(f"\nPer study:")
|
||||
for s in total["studies"]:
|
||||
print(f" {s['study_name']:40} {s['total_size_bytes']/1e9:6.2f} GB ({s['trial_count']:3d} trials)")
|
||||
|
||||
elif args.command == "cleanup":
|
||||
if not args.path:
|
||||
print("Usage: study_archiver cleanup <study_path> [--execute]")
|
||||
return
|
||||
cleanup_study(Path(args.path), dry_run=dry_run)
|
||||
|
||||
elif args.command == "archive":
|
||||
if not args.path:
|
||||
print("Usage: study_archiver archive <study_path> [--execute] [--tailscale]")
|
||||
return
|
||||
archive_to_remote(Path(args.path), use_tailscale=args.tailscale, dry_run=dry_run)
|
||||
|
||||
elif args.command == "restore":
|
||||
if not args.path:
|
||||
print("Usage: study_archiver restore <study_name> [--tailscale]")
|
||||
return
|
||||
target = Path.cwd() / "studies"
|
||||
restore_from_remote(args.path, target, use_tailscale=args.tailscale)
|
||||
|
||||
elif args.command == "list":
|
||||
archives = list_remote_archives(use_tailscale=args.tailscale)
|
||||
if archives:
|
||||
print(f"\nArchived studies on dalidou:")
|
||||
print(f"{'='*60}")
|
||||
for a in archives:
|
||||
print(f" {a['name']:40} {a['size']:>8} {a['date']}")
|
||||
else:
|
||||
print("No archives found (or server not reachable)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
411
optimization_engine/utils/study_cleanup.py
Normal file
411
optimization_engine/utils/study_cleanup.py
Normal file
@@ -0,0 +1,411 @@
|
||||
"""
|
||||
Study Cleanup Utility
|
||||
====================
|
||||
|
||||
Cleans up completed optimization studies to save disk space by removing
|
||||
large intermediate files (NX models, FEM meshes, solver results) while
|
||||
preserving essential data (parameters, extracted results, database).
|
||||
|
||||
Usage:
|
||||
python -m optimization_engine.utils.study_cleanup <study_path> [options]
|
||||
|
||||
Options:
|
||||
--dry-run Show what would be deleted without actually deleting
|
||||
--keep-best N Keep iteration folders for the top N best trials
|
||||
--keep-pareto Keep all Pareto-optimal iterations (for multi-objective)
|
||||
--aggressive Delete ALL iteration data (only keep DB and config)
|
||||
|
||||
The database (study.db) contains all optimization results and can regenerate
|
||||
any analysis. The original NX model in 1_setup is always preserved.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# Files to ALWAYS keep in iteration folders (tiny, essential)
|
||||
ESSENTIAL_FILES = {
|
||||
'params.exp', # Design parameters for this iteration
|
||||
'_temp_mass.txt', # Extracted mass
|
||||
'_temp_part_properties.json', # Part properties
|
||||
'_temp_zernike.json', # Zernike coefficients (if exists)
|
||||
'results.json', # Any extracted results
|
||||
}
|
||||
|
||||
# Extensions to DELETE (large, regenerable/already extracted)
|
||||
DELETABLE_EXTENSIONS = {
|
||||
'.op2', # Nastran binary results (~65 MB each)
|
||||
'.prt', # NX Part files (~30-35 MB each)
|
||||
'.fem', # FEM mesh files (~15 MB each)
|
||||
'.dat', # Nastran input deck (~15 MB each)
|
||||
'.sim', # Simulation file (~7 MB each)
|
||||
'.afm', # FEA auxiliary (~4 MB each)
|
||||
'.f04', # Nastran log
|
||||
'.f06', # Nastran output
|
||||
'.log', # Solver log
|
||||
'.diag', # Diagnostics
|
||||
}
|
||||
|
||||
|
||||
def get_study_info(study_path: Path) -> dict:
|
||||
"""Get study metadata from config and database."""
|
||||
config_path = study_path / 'optimization_config.json'
|
||||
# Try both possible DB locations
|
||||
db_path = study_path / '3_results' / 'study.db'
|
||||
if not db_path.exists():
|
||||
db_path = study_path / '2_results' / 'study.db'
|
||||
|
||||
info = {
|
||||
'name': study_path.name,
|
||||
'has_config': config_path.exists(),
|
||||
'has_db': db_path.exists(),
|
||||
'trial_count': 0,
|
||||
'best_trials': [],
|
||||
'pareto_trials': [],
|
||||
}
|
||||
|
||||
if config_path.exists():
|
||||
with open(config_path) as f:
|
||||
info['config'] = json.load(f)
|
||||
|
||||
if db_path.exists():
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Get trial count
|
||||
cursor.execute("SELECT COUNT(*) FROM trials WHERE state = 'COMPLETE'")
|
||||
info['trial_count'] = cursor.fetchone()[0]
|
||||
|
||||
# Try to get best trials (for single objective)
|
||||
try:
|
||||
cursor.execute("""
|
||||
SELECT trial_id, value FROM trial_values
|
||||
WHERE objective = 0
|
||||
ORDER BY value ASC LIMIT 10
|
||||
""")
|
||||
info['best_trials'] = [row[0] for row in cursor.fetchall()]
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Check for Pareto attribute
|
||||
try:
|
||||
cursor.execute("""
|
||||
SELECT DISTINCT trial_id FROM trial_system_attrs
|
||||
WHERE key = 'pareto_optimal' AND value = '1'
|
||||
""")
|
||||
info['pareto_trials'] = [row[0] for row in cursor.fetchall()]
|
||||
except:
|
||||
pass
|
||||
|
||||
conn.close()
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def calculate_cleanup_savings(study_path: Path, keep_iters: set = None) -> dict:
|
||||
"""Calculate how much space would be saved by cleanup."""
|
||||
iterations_path = study_path / '2_iterations'
|
||||
if not iterations_path.exists():
|
||||
iterations_path = study_path / '1_working' # Legacy structure
|
||||
|
||||
if not iterations_path.exists():
|
||||
return {'total_size': 0, 'deletable_size': 0, 'keep_size': 0}
|
||||
|
||||
total_size = 0
|
||||
deletable_size = 0
|
||||
keep_size = 0
|
||||
keep_iters = keep_iters or set()
|
||||
|
||||
for iter_folder in iterations_path.iterdir():
|
||||
if not iter_folder.is_dir():
|
||||
continue
|
||||
|
||||
# Extract iteration number
|
||||
try:
|
||||
iter_num = int(iter_folder.name.replace('iter', ''))
|
||||
except:
|
||||
continue
|
||||
|
||||
for f in iter_folder.iterdir():
|
||||
if not f.is_file():
|
||||
continue
|
||||
size = f.stat().st_size
|
||||
total_size += size
|
||||
|
||||
# Keep entire folder if in keep_iters
|
||||
if iter_num in keep_iters:
|
||||
keep_size += size
|
||||
continue
|
||||
|
||||
# Keep essential files
|
||||
if f.name.lower() in {e.lower() for e in ESSENTIAL_FILES}:
|
||||
keep_size += size
|
||||
elif f.suffix.lower() in DELETABLE_EXTENSIONS:
|
||||
deletable_size += size
|
||||
else:
|
||||
keep_size += size # Keep unknown files by default
|
||||
|
||||
return {
|
||||
'total_size': total_size,
|
||||
'deletable_size': deletable_size,
|
||||
'keep_size': keep_size,
|
||||
}
|
||||
|
||||
|
||||
def cleanup_study(
|
||||
study_path: Path,
|
||||
dry_run: bool = True,
|
||||
keep_best: int = 0,
|
||||
keep_pareto: bool = False,
|
||||
aggressive: bool = False,
|
||||
) -> dict:
|
||||
"""
|
||||
Clean up a study to save disk space.
|
||||
|
||||
Args:
|
||||
study_path: Path to study folder
|
||||
dry_run: If True, only report what would be deleted
|
||||
keep_best: Number of best iterations to keep completely
|
||||
keep_pareto: Keep all Pareto-optimal iterations
|
||||
aggressive: Delete ALL iteration folders (only keep DB)
|
||||
|
||||
Returns:
|
||||
dict with cleanup statistics
|
||||
"""
|
||||
study_path = Path(study_path)
|
||||
if not study_path.exists():
|
||||
raise ValueError(f"Study path does not exist: {study_path}")
|
||||
|
||||
# Get study info
|
||||
info = get_study_info(study_path)
|
||||
|
||||
# Determine which iterations to keep
|
||||
keep_iters = set()
|
||||
if keep_best > 0 and info['best_trials']:
|
||||
keep_iters.update(info['best_trials'][:keep_best])
|
||||
if keep_pareto and info['pareto_trials']:
|
||||
keep_iters.update(info['pareto_trials'])
|
||||
|
||||
# Find iterations folder
|
||||
iterations_path = study_path / '2_iterations'
|
||||
if not iterations_path.exists():
|
||||
iterations_path = study_path / '1_working'
|
||||
|
||||
if not iterations_path.exists():
|
||||
return {'status': 'no_iterations', 'deleted_bytes': 0, 'deleted_files': 0}
|
||||
|
||||
# Calculate savings
|
||||
savings = calculate_cleanup_savings(study_path, keep_iters)
|
||||
|
||||
deleted_bytes = 0
|
||||
deleted_files = 0
|
||||
deleted_folders = 0
|
||||
|
||||
if aggressive:
|
||||
# Delete entire iterations folder
|
||||
if not dry_run:
|
||||
shutil.rmtree(iterations_path)
|
||||
deleted_bytes = savings['total_size']
|
||||
deleted_folders = 1
|
||||
else:
|
||||
deleted_bytes = savings['total_size']
|
||||
else:
|
||||
# Selective cleanup
|
||||
for iter_folder in iterations_path.iterdir():
|
||||
if not iter_folder.is_dir():
|
||||
continue
|
||||
|
||||
# Extract iteration number
|
||||
try:
|
||||
iter_num = int(iter_folder.name.replace('iter', ''))
|
||||
except:
|
||||
continue
|
||||
|
||||
# Skip kept iterations
|
||||
if iter_num in keep_iters:
|
||||
continue
|
||||
|
||||
for f in iter_folder.iterdir():
|
||||
if not f.is_file():
|
||||
continue
|
||||
|
||||
# Keep essential files
|
||||
if f.name.lower() in {e.lower() for e in ESSENTIAL_FILES}:
|
||||
continue
|
||||
|
||||
# Delete deletable extensions
|
||||
if f.suffix.lower() in DELETABLE_EXTENSIONS:
|
||||
size = f.stat().st_size
|
||||
if not dry_run:
|
||||
f.unlink()
|
||||
deleted_bytes += size
|
||||
deleted_files += 1
|
||||
|
||||
return {
|
||||
'status': 'dry_run' if dry_run else 'completed',
|
||||
'study_name': info['name'],
|
||||
'trial_count': info['trial_count'],
|
||||
'kept_iterations': list(keep_iters),
|
||||
'total_size_before': savings['total_size'],
|
||||
'deleted_bytes': deleted_bytes,
|
||||
'deleted_files': deleted_files,
|
||||
'deleted_folders': deleted_folders,
|
||||
'space_saved_gb': deleted_bytes / (1024**3),
|
||||
}
|
||||
|
||||
|
||||
def cleanup_batch(
|
||||
parent_path: Path,
|
||||
pattern: str = "*",
|
||||
dry_run: bool = True,
|
||||
keep_best: int = 3,
|
||||
keep_pareto: bool = False,
|
||||
aggressive: bool = False,
|
||||
) -> list:
|
||||
"""
|
||||
Clean up multiple studies matching a pattern.
|
||||
|
||||
Args:
|
||||
parent_path: Parent directory containing studies
|
||||
pattern: Glob pattern to match study folders (e.g., "m1_mirror_*")
|
||||
dry_run: If True, only report
|
||||
keep_best: Keep N best iterations per study
|
||||
keep_pareto: Keep Pareto-optimal iterations
|
||||
aggressive: Delete all iteration folders
|
||||
|
||||
Returns:
|
||||
List of cleanup results
|
||||
"""
|
||||
parent_path = Path(parent_path)
|
||||
results = []
|
||||
|
||||
for study_path in sorted(parent_path.glob(pattern)):
|
||||
if not study_path.is_dir():
|
||||
continue
|
||||
# Check if it looks like a study (has iterations folder)
|
||||
if not (study_path / '2_iterations').exists() and not (study_path / '1_working').exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
result = cleanup_study(
|
||||
study_path,
|
||||
dry_run=dry_run,
|
||||
keep_best=keep_best,
|
||||
keep_pareto=keep_pareto,
|
||||
aggressive=aggressive,
|
||||
)
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
results.append({
|
||||
'study_name': study_path.name,
|
||||
'status': 'error',
|
||||
'error': str(e),
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Clean up completed optimization studies to save disk space.',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__
|
||||
)
|
||||
parser.add_argument('study_path', type=Path, help='Path to study folder or parent directory')
|
||||
parser.add_argument('--dry-run', action='store_true', default=True,
|
||||
help='Show what would be deleted without deleting (default)')
|
||||
parser.add_argument('--execute', action='store_true',
|
||||
help='Actually delete files (opposite of --dry-run)')
|
||||
parser.add_argument('--keep-best', type=int, default=3,
|
||||
help='Keep N best iterations completely (default: 3)')
|
||||
parser.add_argument('--keep-pareto', action='store_true',
|
||||
help='Keep all Pareto-optimal iterations')
|
||||
parser.add_argument('--aggressive', action='store_true',
|
||||
help='Delete ALL iteration data (only keep DB)')
|
||||
parser.add_argument('--batch', type=str, metavar='PATTERN',
|
||||
help='Clean multiple studies matching pattern (e.g., "m1_mirror_*")')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
dry_run = not args.execute
|
||||
|
||||
if args.batch:
|
||||
# Batch cleanup mode
|
||||
print(f"\n{'='*60}")
|
||||
print(f"BATCH CLEANUP: {args.study_path}")
|
||||
print(f"Pattern: {args.batch}")
|
||||
print(f"{'='*60}")
|
||||
print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}")
|
||||
|
||||
results = cleanup_batch(
|
||||
args.study_path,
|
||||
pattern=args.batch,
|
||||
dry_run=dry_run,
|
||||
keep_best=args.keep_best,
|
||||
keep_pareto=args.keep_pareto,
|
||||
aggressive=args.aggressive,
|
||||
)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("BATCH RESULTS")
|
||||
print(f"{'='*60}")
|
||||
print(f"{'Study':<45} {'Trials':>7} {'Size':>8} {'Savings':>8}")
|
||||
print("-" * 75)
|
||||
|
||||
total_saved = 0
|
||||
for r in results:
|
||||
if r.get('status') == 'error':
|
||||
print(f"{r['study_name']:<45} ERROR: {r.get('error', 'Unknown')}")
|
||||
else:
|
||||
saved = r.get('space_saved_gb', 0)
|
||||
total_saved += saved
|
||||
print(f"{r['study_name']:<45} {r.get('trial_count', 0):>7} "
|
||||
f"{r.get('total_size_before', 0)/(1024**3):>7.1f}G {saved:>7.1f}G")
|
||||
|
||||
print("-" * 75)
|
||||
print(f"{'TOTAL SAVINGS:':<45} {' '*15} {total_saved:>7.1f}G")
|
||||
|
||||
if dry_run:
|
||||
print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
|
||||
|
||||
return results
|
||||
|
||||
else:
|
||||
# Single study cleanup
|
||||
print(f"\n{'='*60}")
|
||||
print(f"STUDY CLEANUP: {args.study_path.name}")
|
||||
print(f"{'='*60}")
|
||||
print(f"Mode: {'DRY RUN (no files deleted)' if dry_run else 'EXECUTE (files WILL be deleted)'}")
|
||||
print(f"Keep best: {args.keep_best} iterations")
|
||||
print(f"Keep Pareto: {args.keep_pareto}")
|
||||
print(f"Aggressive: {args.aggressive}")
|
||||
|
||||
result = cleanup_study(
|
||||
args.study_path,
|
||||
dry_run=dry_run,
|
||||
keep_best=args.keep_best,
|
||||
keep_pareto=args.keep_pareto,
|
||||
aggressive=args.aggressive,
|
||||
)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print("RESULTS")
|
||||
print(f"{'='*60}")
|
||||
print(f"Trials in study: {result['trial_count']}")
|
||||
print(f"Iterations kept: {len(result['kept_iterations'])} {result['kept_iterations'][:5]}{'...' if len(result['kept_iterations']) > 5 else ''}")
|
||||
print(f"Total size before: {result['total_size_before'] / (1024**3):.2f} GB")
|
||||
print(f"{'Would delete' if dry_run else 'Deleted'}: {result['deleted_files']} files")
|
||||
print(f"Space {'to save' if dry_run else 'saved'}: {result['space_saved_gb']:.2f} GB")
|
||||
|
||||
if dry_run:
|
||||
print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user