275 lines
8.3 KiB
Python
275 lines
8.3 KiB
Python
|
|
"""
|
||
|
|
Model Cleanup System
|
||
|
|
|
||
|
|
Intelligent cleanup of trial model files to save disk space.
|
||
|
|
Keeps top-N trials based on objective value, deletes CAD/FEM files for poor trials.
|
||
|
|
|
||
|
|
Strategy:
|
||
|
|
- Preserve ALL trial results.json files (small, contain critical data)
|
||
|
|
- Delete large CAD/FEM files (.prt, .sim, .fem, .op2, .f06) for non-top-N trials
|
||
|
|
- Keep best trial models + user-specified number of top trials
|
||
|
|
"""
|
||
|
|
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Dict, List, Optional
|
||
|
|
import json
|
||
|
|
import shutil
|
||
|
|
|
||
|
|
|
||
|
|
class ModelCleanup:
|
||
|
|
"""
|
||
|
|
Clean up trial directories to save disk space.
|
||
|
|
|
||
|
|
Deletes large model files (.prt, .sim, .fem, .op2, .f06) from trials
|
||
|
|
that are not in the top-N performers.
|
||
|
|
"""
|
||
|
|
|
||
|
|
# File extensions to delete (large CAD/FEM/result files)
|
||
|
|
CLEANUP_EXTENSIONS = {
|
||
|
|
'.prt', # NX part files
|
||
|
|
'.sim', # NX simulation files
|
||
|
|
'.fem', # FEM mesh files
|
||
|
|
'.afm', # NX assembly FEM
|
||
|
|
'.op2', # Nastran binary results
|
||
|
|
'.f06', # Nastran text results
|
||
|
|
'.dat', # Nastran input deck
|
||
|
|
'.bdf', # Nastran bulk data
|
||
|
|
'.pch', # Nastran punch file
|
||
|
|
'.log', # Nastran log
|
||
|
|
'.master', # Nastran master file
|
||
|
|
'.dball', # Nastran database
|
||
|
|
'.MASTER', # Nastran master (uppercase)
|
||
|
|
'.DBALL', # Nastran database (uppercase)
|
||
|
|
}
|
||
|
|
|
||
|
|
# Files to ALWAYS keep (small, critical data)
|
||
|
|
PRESERVE_FILES = {
|
||
|
|
'results.json',
|
||
|
|
'trial_metadata.json',
|
||
|
|
'extraction_log.txt',
|
||
|
|
}
|
||
|
|
|
||
|
|
def __init__(self, substudy_dir: Path):
|
||
|
|
"""
|
||
|
|
Initialize cleanup manager.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
substudy_dir: Path to substudy directory containing trial_XXX folders
|
||
|
|
"""
|
||
|
|
self.substudy_dir = Path(substudy_dir)
|
||
|
|
self.history_file = self.substudy_dir / 'history.json'
|
||
|
|
self.cleanup_log = self.substudy_dir / 'cleanup_log.json'
|
||
|
|
|
||
|
|
def cleanup_models(
|
||
|
|
self,
|
||
|
|
keep_top_n: int = 10,
|
||
|
|
dry_run: bool = False
|
||
|
|
) -> Dict:
|
||
|
|
"""
|
||
|
|
Clean up trial model files, keeping only top-N performers.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
keep_top_n: Number of best trials to keep models for
|
||
|
|
dry_run: If True, only report what would be deleted without deleting
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Dictionary with cleanup statistics
|
||
|
|
"""
|
||
|
|
if not self.history_file.exists():
|
||
|
|
raise FileNotFoundError(f"History file not found: {self.history_file}")
|
||
|
|
|
||
|
|
# Load history
|
||
|
|
with open(self.history_file, 'r') as f:
|
||
|
|
history = json.load(f)
|
||
|
|
|
||
|
|
# Sort trials by objective value (minimize)
|
||
|
|
sorted_trials = sorted(history, key=lambda x: x.get('total_objective', float('inf')))
|
||
|
|
|
||
|
|
# Identify top-N trials to keep
|
||
|
|
keep_trial_numbers = set()
|
||
|
|
for i in range(min(keep_top_n, len(sorted_trials))):
|
||
|
|
keep_trial_numbers.add(sorted_trials[i]['trial_number'])
|
||
|
|
|
||
|
|
# Cleanup statistics
|
||
|
|
stats = {
|
||
|
|
'total_trials': len(history),
|
||
|
|
'kept_trials': len(keep_trial_numbers),
|
||
|
|
'cleaned_trials': 0,
|
||
|
|
'files_deleted': 0,
|
||
|
|
'space_freed_mb': 0.0,
|
||
|
|
'deleted_files': [],
|
||
|
|
'kept_trial_numbers': sorted(list(keep_trial_numbers)),
|
||
|
|
'dry_run': dry_run
|
||
|
|
}
|
||
|
|
|
||
|
|
# Process each trial directory
|
||
|
|
trial_dirs = sorted(self.substudy_dir.glob('trial_*'))
|
||
|
|
|
||
|
|
for trial_dir in trial_dirs:
|
||
|
|
if not trial_dir.is_dir():
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Extract trial number from directory name
|
||
|
|
try:
|
||
|
|
trial_num = int(trial_dir.name.split('_')[-1])
|
||
|
|
except (ValueError, IndexError):
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Skip if this trial should be kept
|
||
|
|
if trial_num in keep_trial_numbers:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Clean up this trial
|
||
|
|
trial_stats = self._cleanup_trial_directory(trial_dir, dry_run)
|
||
|
|
stats['files_deleted'] += trial_stats['files_deleted']
|
||
|
|
stats['space_freed_mb'] += trial_stats['space_freed_mb']
|
||
|
|
stats['deleted_files'].extend(trial_stats['deleted_files'])
|
||
|
|
|
||
|
|
if trial_stats['files_deleted'] > 0:
|
||
|
|
stats['cleaned_trials'] += 1
|
||
|
|
|
||
|
|
# Save cleanup log
|
||
|
|
if not dry_run:
|
||
|
|
with open(self.cleanup_log, 'w') as f:
|
||
|
|
json.dump(stats, f, indent=2)
|
||
|
|
|
||
|
|
return stats
|
||
|
|
|
||
|
|
def _cleanup_trial_directory(self, trial_dir: Path, dry_run: bool) -> Dict:
|
||
|
|
"""
|
||
|
|
Clean up a single trial directory.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
trial_dir: Path to trial directory
|
||
|
|
dry_run: If True, don't actually delete files
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Dictionary with cleanup statistics for this trial
|
||
|
|
"""
|
||
|
|
stats = {
|
||
|
|
'files_deleted': 0,
|
||
|
|
'space_freed_mb': 0.0,
|
||
|
|
'deleted_files': []
|
||
|
|
}
|
||
|
|
|
||
|
|
for file_path in trial_dir.iterdir():
|
||
|
|
if not file_path.is_file():
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Skip preserved files
|
||
|
|
if file_path.name in self.PRESERVE_FILES:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Check if file should be deleted
|
||
|
|
if file_path.suffix.lower() in self.CLEANUP_EXTENSIONS:
|
||
|
|
file_size_mb = file_path.stat().st_size / (1024 * 1024)
|
||
|
|
|
||
|
|
stats['files_deleted'] += 1
|
||
|
|
stats['space_freed_mb'] += file_size_mb
|
||
|
|
stats['deleted_files'].append(str(file_path.relative_to(self.substudy_dir)))
|
||
|
|
|
||
|
|
# Delete file (unless dry run)
|
||
|
|
if not dry_run:
|
||
|
|
try:
|
||
|
|
file_path.unlink()
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Warning: Could not delete {file_path}: {e}")
|
||
|
|
|
||
|
|
return stats
|
||
|
|
|
||
|
|
def print_cleanup_report(self, stats: Dict):
|
||
|
|
"""
|
||
|
|
Print human-readable cleanup report.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
stats: Cleanup statistics dictionary
|
||
|
|
"""
|
||
|
|
print("\n" + "="*70)
|
||
|
|
print("MODEL CLEANUP REPORT")
|
||
|
|
print("="*70)
|
||
|
|
|
||
|
|
if stats['dry_run']:
|
||
|
|
print("[DRY RUN - No files were actually deleted]")
|
||
|
|
print()
|
||
|
|
|
||
|
|
print(f"Total trials: {stats['total_trials']}")
|
||
|
|
print(f"Trials kept: {stats['kept_trials']}")
|
||
|
|
print(f"Trials cleaned: {stats['cleaned_trials']}")
|
||
|
|
print(f"Files deleted: {stats['files_deleted']}")
|
||
|
|
print(f"Space freed: {stats['space_freed_mb']:.2f} MB")
|
||
|
|
print()
|
||
|
|
print(f"Kept trial numbers: {stats['kept_trial_numbers']}")
|
||
|
|
print()
|
||
|
|
|
||
|
|
if stats['files_deleted'] > 0:
|
||
|
|
print("Deleted file types:")
|
||
|
|
file_types = {}
|
||
|
|
for filepath in stats['deleted_files']:
|
||
|
|
ext = Path(filepath).suffix.lower()
|
||
|
|
file_types[ext] = file_types.get(ext, 0) + 1
|
||
|
|
|
||
|
|
for ext, count in sorted(file_types.items()):
|
||
|
|
print(f" {ext:15s}: {count:4d} files")
|
||
|
|
|
||
|
|
print("="*70 + "\n")
|
||
|
|
|
||
|
|
|
||
|
|
def cleanup_substudy(
|
||
|
|
substudy_dir: Path,
|
||
|
|
keep_top_n: int = 10,
|
||
|
|
dry_run: bool = False,
|
||
|
|
verbose: bool = True
|
||
|
|
) -> Dict:
|
||
|
|
"""
|
||
|
|
Convenience function to clean up a substudy.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
substudy_dir: Path to substudy directory
|
||
|
|
keep_top_n: Number of best trials to preserve models for
|
||
|
|
dry_run: If True, only report what would be deleted
|
||
|
|
verbose: If True, print cleanup report
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Cleanup statistics dictionary
|
||
|
|
"""
|
||
|
|
cleaner = ModelCleanup(substudy_dir)
|
||
|
|
stats = cleaner.cleanup_models(keep_top_n=keep_top_n, dry_run=dry_run)
|
||
|
|
|
||
|
|
if verbose:
|
||
|
|
cleaner.print_cleanup_report(stats)
|
||
|
|
|
||
|
|
return stats
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
import sys
|
||
|
|
import argparse
|
||
|
|
|
||
|
|
parser = argparse.ArgumentParser(
|
||
|
|
description='Clean up optimization trial model files to save disk space'
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
'substudy_dir',
|
||
|
|
type=Path,
|
||
|
|
help='Path to substudy directory'
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
'--keep-top-n',
|
||
|
|
type=int,
|
||
|
|
default=10,
|
||
|
|
help='Number of best trials to keep models for (default: 10)'
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
'--dry-run',
|
||
|
|
action='store_true',
|
||
|
|
help='Show what would be deleted without actually deleting'
|
||
|
|
)
|
||
|
|
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
cleanup_substudy(
|
||
|
|
args.substudy_dir,
|
||
|
|
keep_top_n=args.keep_top_n,
|
||
|
|
dry_run=args.dry_run
|
||
|
|
)
|