refactor: Major reorganization of optimization_engine module structure

BREAKING CHANGE: Module paths have been reorganized for better maintainability. Backwards compatibility aliases with deprecation warnings are provided. New Structure: - core/ - Optimization runners (runner, intelligent_optimizer, etc.) - processors/ - Data processing - surrogates/ - Neural network surrogates - nx/ - NX/Nastran integration (solver, updater, session_manager) - study/ - Study management (creator, wizard, state, reset) - reporting/ - Reports and analysis (visualizer, report_generator) - config/ - Configuration management (manager, builder) - utils/ - Utilities (logger, auto_doc, etc.) - future/ - Research/experimental code Migration: - ~200 import changes across 125 files - All __init__.py files use lazy loading to avoid circular imports - Backwards compatibility layer supports old import paths with warnings - All existing functionality preserved To migrate existing code: OLD: from optimization_engine.nx_solver import NXSolver NEW: from optimization_engine.nx.solver import NXSolver OLD: from optimization_engine.runner import OptimizationRunner NEW: from optimization_engine.core.runner import OptimizationRunner 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 12:30:59 -05:00
parent 82f36689b7
commit eabcc4c3ca
120 changed files with 1127 additions and 637 deletions
--- a/optimization_engine/nx/model_cleanup.py
+++ b/optimization_engine/nx/model_cleanup.py
@@ -0,0 +1,274 @@
+"""
+Model Cleanup System
+
+Intelligent cleanup of trial model files to save disk space.
+Keeps top-N trials based on objective value, deletes CAD/FEM files for poor trials.
+
+Strategy:
+- Preserve ALL trial results.json files (small, contain critical data)
+- Delete large CAD/FEM files (.prt, .sim, .fem, .op2, .f06) for non-top-N trials
+- Keep best trial models + user-specified number of top trials
+"""
+
+from pathlib import Path
+from typing import Dict, List, Optional
+import json
+import shutil
+
+
+class ModelCleanup:
+    """
+    Clean up trial directories to save disk space.
+
+    Deletes large model files (.prt, .sim, .fem, .op2, .f06) from trials
+    that are not in the top-N performers.
+    """
+
+    # File extensions to delete (large CAD/FEM/result files)
+    CLEANUP_EXTENSIONS = {
+        '.prt',      # NX part files
+        '.sim',      # NX simulation files
+        '.fem',      # FEM mesh files
+        '.afm',      # NX assembly FEM
+        '.op2',      # Nastran binary results
+        '.f06',      # Nastran text results
+        '.dat',      # Nastran input deck
+        '.bdf',      # Nastran bulk data
+        '.pch',      # Nastran punch file
+        '.log',      # Nastran log
+        '.master',   # Nastran master file
+        '.dball',    # Nastran database
+        '.MASTER',   # Nastran master (uppercase)
+        '.DBALL',    # Nastran database (uppercase)
+    }
+
+    # Files to ALWAYS keep (small, critical data)
+    PRESERVE_FILES = {
+        'results.json',
+        'trial_metadata.json',
+        'extraction_log.txt',
+    }
+
+    def __init__(self, substudy_dir: Path):
+        """
+        Initialize cleanup manager.
+
+        Args:
+            substudy_dir: Path to substudy directory containing trial_XXX folders
+        """
+        self.substudy_dir = Path(substudy_dir)
+        self.history_file = self.substudy_dir / 'history.json'
+        self.cleanup_log = self.substudy_dir / 'cleanup_log.json'
+
+    def cleanup_models(
+        self,
+        keep_top_n: int = 10,
+        dry_run: bool = False
+    ) -> Dict:
+        """
+        Clean up trial model files, keeping only top-N performers.
+
+        Args:
+            keep_top_n: Number of best trials to keep models for
+            dry_run: If True, only report what would be deleted without deleting
+
+        Returns:
+            Dictionary with cleanup statistics
+        """
+        if not self.history_file.exists():
+            raise FileNotFoundError(f"History file not found: {self.history_file}")
+
+        # Load history
+        with open(self.history_file, 'r') as f:
+            history = json.load(f)
+
+        # Sort trials by objective value (minimize)
+        sorted_trials = sorted(history, key=lambda x: x.get('total_objective', float('inf')))
+
+        # Identify top-N trials to keep
+        keep_trial_numbers = set()
+        for i in range(min(keep_top_n, len(sorted_trials))):
+            keep_trial_numbers.add(sorted_trials[i]['trial_number'])
+
+        # Cleanup statistics
+        stats = {
+            'total_trials': len(history),
+            'kept_trials': len(keep_trial_numbers),
+            'cleaned_trials': 0,
+            'files_deleted': 0,
+            'space_freed_mb': 0.0,
+            'deleted_files': [],
+            'kept_trial_numbers': sorted(list(keep_trial_numbers)),
+            'dry_run': dry_run
+        }
+
+        # Process each trial directory
+        trial_dirs = sorted(self.substudy_dir.glob('trial_*'))
+
+        for trial_dir in trial_dirs:
+            if not trial_dir.is_dir():
+                continue
+
+            # Extract trial number from directory name
+            try:
+                trial_num = int(trial_dir.name.split('_')[-1])
+            except (ValueError, IndexError):
+                continue
+
+            # Skip if this trial should be kept
+            if trial_num in keep_trial_numbers:
+                continue
+
+            # Clean up this trial
+            trial_stats = self._cleanup_trial_directory(trial_dir, dry_run)
+            stats['files_deleted'] += trial_stats['files_deleted']
+            stats['space_freed_mb'] += trial_stats['space_freed_mb']
+            stats['deleted_files'].extend(trial_stats['deleted_files'])
+
+            if trial_stats['files_deleted'] > 0:
+                stats['cleaned_trials'] += 1
+
+        # Save cleanup log
+        if not dry_run:
+            with open(self.cleanup_log, 'w') as f:
+                json.dump(stats, f, indent=2)
+
+        return stats
+
+    def _cleanup_trial_directory(self, trial_dir: Path, dry_run: bool) -> Dict:
+        """
+        Clean up a single trial directory.
+
+        Args:
+            trial_dir: Path to trial directory
+            dry_run: If True, don't actually delete files
+
+        Returns:
+            Dictionary with cleanup statistics for this trial
+        """
+        stats = {
+            'files_deleted': 0,
+            'space_freed_mb': 0.0,
+            'deleted_files': []
+        }
+
+        for file_path in trial_dir.iterdir():
+            if not file_path.is_file():
+                continue
+
+            # Skip preserved files
+            if file_path.name in self.PRESERVE_FILES:
+                continue
+
+            # Check if file should be deleted
+            if file_path.suffix.lower() in self.CLEANUP_EXTENSIONS:
+                file_size_mb = file_path.stat().st_size / (1024 * 1024)
+
+                stats['files_deleted'] += 1
+                stats['space_freed_mb'] += file_size_mb
+                stats['deleted_files'].append(str(file_path.relative_to(self.substudy_dir)))
+
+                # Delete file (unless dry run)
+                if not dry_run:
+                    try:
+                        file_path.unlink()
+                    except Exception as e:
+                        print(f"Warning: Could not delete {file_path}: {e}")
+
+        return stats
+
+    def print_cleanup_report(self, stats: Dict):
+        """
+        Print human-readable cleanup report.
+
+        Args:
+            stats: Cleanup statistics dictionary
+        """
+        print("\n" + "="*70)
+        print("MODEL CLEANUP REPORT")
+        print("="*70)
+
+        if stats['dry_run']:
+            print("[DRY RUN - No files were actually deleted]")
+            print()
+
+        print(f"Total trials:        {stats['total_trials']}")
+        print(f"Trials kept:         {stats['kept_trials']}")
+        print(f"Trials cleaned:      {stats['cleaned_trials']}")
+        print(f"Files deleted:       {stats['files_deleted']}")
+        print(f"Space freed:         {stats['space_freed_mb']:.2f} MB")
+        print()
+        print(f"Kept trial numbers:  {stats['kept_trial_numbers']}")
+        print()
+
+        if stats['files_deleted'] > 0:
+            print("Deleted file types:")
+            file_types = {}
+            for filepath in stats['deleted_files']:
+                ext = Path(filepath).suffix.lower()
+                file_types[ext] = file_types.get(ext, 0) + 1
+
+            for ext, count in sorted(file_types.items()):
+                print(f"  {ext:15s}: {count:4d} files")
+
+        print("="*70 + "\n")
+
+
+def cleanup_substudy(
+    substudy_dir: Path,
+    keep_top_n: int = 10,
+    dry_run: bool = False,
+    verbose: bool = True
+) -> Dict:
+    """
+    Convenience function to clean up a substudy.
+
+    Args:
+        substudy_dir: Path to substudy directory
+        keep_top_n: Number of best trials to preserve models for
+        dry_run: If True, only report what would be deleted
+        verbose: If True, print cleanup report
+
+    Returns:
+        Cleanup statistics dictionary
+    """
+    cleaner = ModelCleanup(substudy_dir)
+    stats = cleaner.cleanup_models(keep_top_n=keep_top_n, dry_run=dry_run)
+
+    if verbose:
+        cleaner.print_cleanup_report(stats)
+
+    return stats
+
+
+if __name__ == '__main__':
+    import sys
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description='Clean up optimization trial model files to save disk space'
+    )
+    parser.add_argument(
+        'substudy_dir',
+        type=Path,
+        help='Path to substudy directory'
+    )
+    parser.add_argument(
+        '--keep-top-n',
+        type=int,
+        default=10,
+        help='Number of best trials to keep models for (default: 10)'
+    )
+    parser.add_argument(
+        '--dry-run',
+        action='store_true',
+        help='Show what would be deleted without actually deleting'
+    )
+
+    args = parser.parse_args()
+
+    cleanup_substudy(
+        args.substudy_dir,
+        keep_top_n=args.keep_top_n,
+        dry_run=args.dry_run
+    )