refactor: Major reorganization of optimization_engine module structure
BREAKING CHANGE: Module paths have been reorganized for better maintainability. Backwards compatibility aliases with deprecation warnings are provided. New Structure: - core/ - Optimization runners (runner, intelligent_optimizer, etc.) - processors/ - Data processing - surrogates/ - Neural network surrogates - nx/ - NX/Nastran integration (solver, updater, session_manager) - study/ - Study management (creator, wizard, state, reset) - reporting/ - Reports and analysis (visualizer, report_generator) - config/ - Configuration management (manager, builder) - utils/ - Utilities (logger, auto_doc, etc.) - future/ - Research/experimental code Migration: - ~200 import changes across 125 files - All __init__.py files use lazy loading to avoid circular imports - Backwards compatibility layer supports old import paths with warnings - All existing functionality preserved To migrate existing code: OLD: from optimization_engine.nx_solver import NXSolver NEW: from optimization_engine.nx.solver import NXSolver OLD: from optimization_engine.runner import OptimizationRunner NEW: from optimization_engine.core.runner import OptimizationRunner 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
274
optimization_engine/nx/model_cleanup.py
Normal file
274
optimization_engine/nx/model_cleanup.py
Normal file
@@ -0,0 +1,274 @@
|
||||
"""
|
||||
Model Cleanup System
|
||||
|
||||
Intelligent cleanup of trial model files to save disk space.
|
||||
Keeps top-N trials based on objective value, deletes CAD/FEM files for poor trials.
|
||||
|
||||
Strategy:
|
||||
- Preserve ALL trial results.json files (small, contain critical data)
|
||||
- Delete large CAD/FEM files (.prt, .sim, .fem, .op2, .f06) for non-top-N trials
|
||||
- Keep best trial models + user-specified number of top trials
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
import json
|
||||
import shutil
|
||||
|
||||
|
||||
class ModelCleanup:
|
||||
"""
|
||||
Clean up trial directories to save disk space.
|
||||
|
||||
Deletes large model files (.prt, .sim, .fem, .op2, .f06) from trials
|
||||
that are not in the top-N performers.
|
||||
"""
|
||||
|
||||
# File extensions to delete (large CAD/FEM/result files)
|
||||
CLEANUP_EXTENSIONS = {
|
||||
'.prt', # NX part files
|
||||
'.sim', # NX simulation files
|
||||
'.fem', # FEM mesh files
|
||||
'.afm', # NX assembly FEM
|
||||
'.op2', # Nastran binary results
|
||||
'.f06', # Nastran text results
|
||||
'.dat', # Nastran input deck
|
||||
'.bdf', # Nastran bulk data
|
||||
'.pch', # Nastran punch file
|
||||
'.log', # Nastran log
|
||||
'.master', # Nastran master file
|
||||
'.dball', # Nastran database
|
||||
'.MASTER', # Nastran master (uppercase)
|
||||
'.DBALL', # Nastran database (uppercase)
|
||||
}
|
||||
|
||||
# Files to ALWAYS keep (small, critical data)
|
||||
PRESERVE_FILES = {
|
||||
'results.json',
|
||||
'trial_metadata.json',
|
||||
'extraction_log.txt',
|
||||
}
|
||||
|
||||
def __init__(self, substudy_dir: Path):
|
||||
"""
|
||||
Initialize cleanup manager.
|
||||
|
||||
Args:
|
||||
substudy_dir: Path to substudy directory containing trial_XXX folders
|
||||
"""
|
||||
self.substudy_dir = Path(substudy_dir)
|
||||
self.history_file = self.substudy_dir / 'history.json'
|
||||
self.cleanup_log = self.substudy_dir / 'cleanup_log.json'
|
||||
|
||||
def cleanup_models(
|
||||
self,
|
||||
keep_top_n: int = 10,
|
||||
dry_run: bool = False
|
||||
) -> Dict:
|
||||
"""
|
||||
Clean up trial model files, keeping only top-N performers.
|
||||
|
||||
Args:
|
||||
keep_top_n: Number of best trials to keep models for
|
||||
dry_run: If True, only report what would be deleted without deleting
|
||||
|
||||
Returns:
|
||||
Dictionary with cleanup statistics
|
||||
"""
|
||||
if not self.history_file.exists():
|
||||
raise FileNotFoundError(f"History file not found: {self.history_file}")
|
||||
|
||||
# Load history
|
||||
with open(self.history_file, 'r') as f:
|
||||
history = json.load(f)
|
||||
|
||||
# Sort trials by objective value (minimize)
|
||||
sorted_trials = sorted(history, key=lambda x: x.get('total_objective', float('inf')))
|
||||
|
||||
# Identify top-N trials to keep
|
||||
keep_trial_numbers = set()
|
||||
for i in range(min(keep_top_n, len(sorted_trials))):
|
||||
keep_trial_numbers.add(sorted_trials[i]['trial_number'])
|
||||
|
||||
# Cleanup statistics
|
||||
stats = {
|
||||
'total_trials': len(history),
|
||||
'kept_trials': len(keep_trial_numbers),
|
||||
'cleaned_trials': 0,
|
||||
'files_deleted': 0,
|
||||
'space_freed_mb': 0.0,
|
||||
'deleted_files': [],
|
||||
'kept_trial_numbers': sorted(list(keep_trial_numbers)),
|
||||
'dry_run': dry_run
|
||||
}
|
||||
|
||||
# Process each trial directory
|
||||
trial_dirs = sorted(self.substudy_dir.glob('trial_*'))
|
||||
|
||||
for trial_dir in trial_dirs:
|
||||
if not trial_dir.is_dir():
|
||||
continue
|
||||
|
||||
# Extract trial number from directory name
|
||||
try:
|
||||
trial_num = int(trial_dir.name.split('_')[-1])
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
# Skip if this trial should be kept
|
||||
if trial_num in keep_trial_numbers:
|
||||
continue
|
||||
|
||||
# Clean up this trial
|
||||
trial_stats = self._cleanup_trial_directory(trial_dir, dry_run)
|
||||
stats['files_deleted'] += trial_stats['files_deleted']
|
||||
stats['space_freed_mb'] += trial_stats['space_freed_mb']
|
||||
stats['deleted_files'].extend(trial_stats['deleted_files'])
|
||||
|
||||
if trial_stats['files_deleted'] > 0:
|
||||
stats['cleaned_trials'] += 1
|
||||
|
||||
# Save cleanup log
|
||||
if not dry_run:
|
||||
with open(self.cleanup_log, 'w') as f:
|
||||
json.dump(stats, f, indent=2)
|
||||
|
||||
return stats
|
||||
|
||||
def _cleanup_trial_directory(self, trial_dir: Path, dry_run: bool) -> Dict:
|
||||
"""
|
||||
Clean up a single trial directory.
|
||||
|
||||
Args:
|
||||
trial_dir: Path to trial directory
|
||||
dry_run: If True, don't actually delete files
|
||||
|
||||
Returns:
|
||||
Dictionary with cleanup statistics for this trial
|
||||
"""
|
||||
stats = {
|
||||
'files_deleted': 0,
|
||||
'space_freed_mb': 0.0,
|
||||
'deleted_files': []
|
||||
}
|
||||
|
||||
for file_path in trial_dir.iterdir():
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
|
||||
# Skip preserved files
|
||||
if file_path.name in self.PRESERVE_FILES:
|
||||
continue
|
||||
|
||||
# Check if file should be deleted
|
||||
if file_path.suffix.lower() in self.CLEANUP_EXTENSIONS:
|
||||
file_size_mb = file_path.stat().st_size / (1024 * 1024)
|
||||
|
||||
stats['files_deleted'] += 1
|
||||
stats['space_freed_mb'] += file_size_mb
|
||||
stats['deleted_files'].append(str(file_path.relative_to(self.substudy_dir)))
|
||||
|
||||
# Delete file (unless dry run)
|
||||
if not dry_run:
|
||||
try:
|
||||
file_path.unlink()
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not delete {file_path}: {e}")
|
||||
|
||||
return stats
|
||||
|
||||
def print_cleanup_report(self, stats: Dict):
|
||||
"""
|
||||
Print human-readable cleanup report.
|
||||
|
||||
Args:
|
||||
stats: Cleanup statistics dictionary
|
||||
"""
|
||||
print("\n" + "="*70)
|
||||
print("MODEL CLEANUP REPORT")
|
||||
print("="*70)
|
||||
|
||||
if stats['dry_run']:
|
||||
print("[DRY RUN - No files were actually deleted]")
|
||||
print()
|
||||
|
||||
print(f"Total trials: {stats['total_trials']}")
|
||||
print(f"Trials kept: {stats['kept_trials']}")
|
||||
print(f"Trials cleaned: {stats['cleaned_trials']}")
|
||||
print(f"Files deleted: {stats['files_deleted']}")
|
||||
print(f"Space freed: {stats['space_freed_mb']:.2f} MB")
|
||||
print()
|
||||
print(f"Kept trial numbers: {stats['kept_trial_numbers']}")
|
||||
print()
|
||||
|
||||
if stats['files_deleted'] > 0:
|
||||
print("Deleted file types:")
|
||||
file_types = {}
|
||||
for filepath in stats['deleted_files']:
|
||||
ext = Path(filepath).suffix.lower()
|
||||
file_types[ext] = file_types.get(ext, 0) + 1
|
||||
|
||||
for ext, count in sorted(file_types.items()):
|
||||
print(f" {ext:15s}: {count:4d} files")
|
||||
|
||||
print("="*70 + "\n")
|
||||
|
||||
|
||||
def cleanup_substudy(
|
||||
substudy_dir: Path,
|
||||
keep_top_n: int = 10,
|
||||
dry_run: bool = False,
|
||||
verbose: bool = True
|
||||
) -> Dict:
|
||||
"""
|
||||
Convenience function to clean up a substudy.
|
||||
|
||||
Args:
|
||||
substudy_dir: Path to substudy directory
|
||||
keep_top_n: Number of best trials to preserve models for
|
||||
dry_run: If True, only report what would be deleted
|
||||
verbose: If True, print cleanup report
|
||||
|
||||
Returns:
|
||||
Cleanup statistics dictionary
|
||||
"""
|
||||
cleaner = ModelCleanup(substudy_dir)
|
||||
stats = cleaner.cleanup_models(keep_top_n=keep_top_n, dry_run=dry_run)
|
||||
|
||||
if verbose:
|
||||
cleaner.print_cleanup_report(stats)
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Clean up optimization trial model files to save disk space'
|
||||
)
|
||||
parser.add_argument(
|
||||
'substudy_dir',
|
||||
type=Path,
|
||||
help='Path to substudy directory'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--keep-top-n',
|
||||
type=int,
|
||||
default=10,
|
||||
help='Number of best trials to keep models for (default: 10)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would be deleted without actually deleting'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
cleanup_substudy(
|
||||
args.substudy_dir,
|
||||
keep_top_n=args.keep_top_n,
|
||||
dry_run=args.dry_run
|
||||
)
|
||||
Reference in New Issue
Block a user