Files
Atomizer/optimization_engine/nx/model_cleanup.py
Anto01 eabcc4c3ca refactor: Major reorganization of optimization_engine module structure
BREAKING CHANGE: Module paths have been reorganized for better maintainability.
Backwards compatibility aliases with deprecation warnings are provided.

New Structure:
- core/           - Optimization runners (runner, intelligent_optimizer, etc.)
- processors/     - Data processing
  - surrogates/   - Neural network surrogates
- nx/             - NX/Nastran integration (solver, updater, session_manager)
- study/          - Study management (creator, wizard, state, reset)
- reporting/      - Reports and analysis (visualizer, report_generator)
- config/         - Configuration management (manager, builder)
- utils/          - Utilities (logger, auto_doc, etc.)
- future/         - Research/experimental code

Migration:
- ~200 import changes across 125 files
- All __init__.py files use lazy loading to avoid circular imports
- Backwards compatibility layer supports old import paths with warnings
- All existing functionality preserved

To migrate existing code:
  OLD: from optimization_engine.nx_solver import NXSolver
  NEW: from optimization_engine.nx.solver import NXSolver

  OLD: from optimization_engine.runner import OptimizationRunner
  NEW: from optimization_engine.core.runner import OptimizationRunner

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 12:30:59 -05:00

275 lines
8.3 KiB
Python

"""
Model Cleanup System
Intelligent cleanup of trial model files to save disk space.
Keeps top-N trials based on objective value, deletes CAD/FEM files for poor trials.
Strategy:
- Preserve ALL trial results.json files (small, contain critical data)
- Delete large CAD/FEM files (.prt, .sim, .fem, .op2, .f06) for non-top-N trials
- Keep best trial models + user-specified number of top trials
"""
from pathlib import Path
from typing import Dict, List, Optional
import json
import shutil
class ModelCleanup:
"""
Clean up trial directories to save disk space.
Deletes large model files (.prt, .sim, .fem, .op2, .f06) from trials
that are not in the top-N performers.
"""
# File extensions to delete (large CAD/FEM/result files)
CLEANUP_EXTENSIONS = {
'.prt', # NX part files
'.sim', # NX simulation files
'.fem', # FEM mesh files
'.afm', # NX assembly FEM
'.op2', # Nastran binary results
'.f06', # Nastran text results
'.dat', # Nastran input deck
'.bdf', # Nastran bulk data
'.pch', # Nastran punch file
'.log', # Nastran log
'.master', # Nastran master file
'.dball', # Nastran database
'.MASTER', # Nastran master (uppercase)
'.DBALL', # Nastran database (uppercase)
}
# Files to ALWAYS keep (small, critical data)
PRESERVE_FILES = {
'results.json',
'trial_metadata.json',
'extraction_log.txt',
}
def __init__(self, substudy_dir: Path):
"""
Initialize cleanup manager.
Args:
substudy_dir: Path to substudy directory containing trial_XXX folders
"""
self.substudy_dir = Path(substudy_dir)
self.history_file = self.substudy_dir / 'history.json'
self.cleanup_log = self.substudy_dir / 'cleanup_log.json'
def cleanup_models(
self,
keep_top_n: int = 10,
dry_run: bool = False
) -> Dict:
"""
Clean up trial model files, keeping only top-N performers.
Args:
keep_top_n: Number of best trials to keep models for
dry_run: If True, only report what would be deleted without deleting
Returns:
Dictionary with cleanup statistics
"""
if not self.history_file.exists():
raise FileNotFoundError(f"History file not found: {self.history_file}")
# Load history
with open(self.history_file, 'r') as f:
history = json.load(f)
# Sort trials by objective value (minimize)
sorted_trials = sorted(history, key=lambda x: x.get('total_objective', float('inf')))
# Identify top-N trials to keep
keep_trial_numbers = set()
for i in range(min(keep_top_n, len(sorted_trials))):
keep_trial_numbers.add(sorted_trials[i]['trial_number'])
# Cleanup statistics
stats = {
'total_trials': len(history),
'kept_trials': len(keep_trial_numbers),
'cleaned_trials': 0,
'files_deleted': 0,
'space_freed_mb': 0.0,
'deleted_files': [],
'kept_trial_numbers': sorted(list(keep_trial_numbers)),
'dry_run': dry_run
}
# Process each trial directory
trial_dirs = sorted(self.substudy_dir.glob('trial_*'))
for trial_dir in trial_dirs:
if not trial_dir.is_dir():
continue
# Extract trial number from directory name
try:
trial_num = int(trial_dir.name.split('_')[-1])
except (ValueError, IndexError):
continue
# Skip if this trial should be kept
if trial_num in keep_trial_numbers:
continue
# Clean up this trial
trial_stats = self._cleanup_trial_directory(trial_dir, dry_run)
stats['files_deleted'] += trial_stats['files_deleted']
stats['space_freed_mb'] += trial_stats['space_freed_mb']
stats['deleted_files'].extend(trial_stats['deleted_files'])
if trial_stats['files_deleted'] > 0:
stats['cleaned_trials'] += 1
# Save cleanup log
if not dry_run:
with open(self.cleanup_log, 'w') as f:
json.dump(stats, f, indent=2)
return stats
def _cleanup_trial_directory(self, trial_dir: Path, dry_run: bool) -> Dict:
"""
Clean up a single trial directory.
Args:
trial_dir: Path to trial directory
dry_run: If True, don't actually delete files
Returns:
Dictionary with cleanup statistics for this trial
"""
stats = {
'files_deleted': 0,
'space_freed_mb': 0.0,
'deleted_files': []
}
for file_path in trial_dir.iterdir():
if not file_path.is_file():
continue
# Skip preserved files
if file_path.name in self.PRESERVE_FILES:
continue
# Check if file should be deleted
if file_path.suffix.lower() in self.CLEANUP_EXTENSIONS:
file_size_mb = file_path.stat().st_size / (1024 * 1024)
stats['files_deleted'] += 1
stats['space_freed_mb'] += file_size_mb
stats['deleted_files'].append(str(file_path.relative_to(self.substudy_dir)))
# Delete file (unless dry run)
if not dry_run:
try:
file_path.unlink()
except Exception as e:
print(f"Warning: Could not delete {file_path}: {e}")
return stats
def print_cleanup_report(self, stats: Dict):
"""
Print human-readable cleanup report.
Args:
stats: Cleanup statistics dictionary
"""
print("\n" + "="*70)
print("MODEL CLEANUP REPORT")
print("="*70)
if stats['dry_run']:
print("[DRY RUN - No files were actually deleted]")
print()
print(f"Total trials: {stats['total_trials']}")
print(f"Trials kept: {stats['kept_trials']}")
print(f"Trials cleaned: {stats['cleaned_trials']}")
print(f"Files deleted: {stats['files_deleted']}")
print(f"Space freed: {stats['space_freed_mb']:.2f} MB")
print()
print(f"Kept trial numbers: {stats['kept_trial_numbers']}")
print()
if stats['files_deleted'] > 0:
print("Deleted file types:")
file_types = {}
for filepath in stats['deleted_files']:
ext = Path(filepath).suffix.lower()
file_types[ext] = file_types.get(ext, 0) + 1
for ext, count in sorted(file_types.items()):
print(f" {ext:15s}: {count:4d} files")
print("="*70 + "\n")
def cleanup_substudy(
substudy_dir: Path,
keep_top_n: int = 10,
dry_run: bool = False,
verbose: bool = True
) -> Dict:
"""
Convenience function to clean up a substudy.
Args:
substudy_dir: Path to substudy directory
keep_top_n: Number of best trials to preserve models for
dry_run: If True, only report what would be deleted
verbose: If True, print cleanup report
Returns:
Cleanup statistics dictionary
"""
cleaner = ModelCleanup(substudy_dir)
stats = cleaner.cleanup_models(keep_top_n=keep_top_n, dry_run=dry_run)
if verbose:
cleaner.print_cleanup_report(stats)
return stats
if __name__ == '__main__':
import sys
import argparse
parser = argparse.ArgumentParser(
description='Clean up optimization trial model files to save disk space'
)
parser.add_argument(
'substudy_dir',
type=Path,
help='Path to substudy directory'
)
parser.add_argument(
'--keep-top-n',
type=int,
default=10,
help='Number of best trials to keep models for (default: 10)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Show what would be deleted without actually deleting'
)
args = parser.parse_args()
cleanup_substudy(
args.substudy_dir,
keep_top_n=args.keep_top_n,
dry_run=args.dry_run
)