feat: Pre-migration checkpoint - updated docs and utilities

Updates before optimization_engine migration:
- Updated migration plan to v2.1 with complete file inventory
- Added OP_07 disk optimization protocol
- Added SYS_16 self-aware turbo protocol
- Added study archiver and cleanup utilities
- Added ensemble surrogate module
- Updated NX solver and session manager
- Updated zernike HTML generator
- Added context engineering plan
- LAC session insights updates

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 10:22:45 -05:00
parent faa7779a43
commit 82f36689b7
21 changed files with 6304 additions and 890 deletions

View File

@@ -24,6 +24,7 @@ SESSION_LOCK_DIR = Path(os.environ.get('TEMP', '/tmp')) / 'atomizer_nx_sessions'
# Default NX installation paths (in order of preference)
DEFAULT_NX_PATHS = [
Path(r"C:\Program Files\Siemens\DesigncenterNX2512\NXBIN\ugraf.exe"), # DesignCenter (preferred)
Path(r"C:\Program Files\Siemens\NX2506\NXBIN\ugraf.exe"),
Path(r"C:\Program Files\Siemens\NX2412\NXBIN\ugraf.exe"),
Path(r"C:\Program Files\Siemens\Simcenter3D_2506\NXBIN\ugraf.exe"),

View File

@@ -0,0 +1,438 @@
"""
Study Archiver - Disk Space Optimization for Atomizer Studies
This module provides utilities for:
1. Cleaning up completed studies (removing regenerable files)
2. Archiving studies to remote storage (dalidou server)
3. Restoring archived studies on-demand
Usage:
# Cleanup a completed study (keep only essential files)
python -m optimization_engine.utils.study_archiver cleanup studies/M1_Mirror/m1_mirror_V12
# Archive to remote server
python -m optimization_engine.utils.study_archiver archive studies/M1_Mirror/m1_mirror_V12
# Restore from remote
python -m optimization_engine.utils.study_archiver restore m1_mirror_V12
# Show disk usage analysis
python -m optimization_engine.utils.study_archiver analyze studies/M1_Mirror
"""
import os
import json
import shutil
import tarfile
import subprocess
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, List, Tuple
import logging
logger = logging.getLogger(__name__)
# Configuration
REMOTE_CONFIG = {
"host": "192.168.86.50", # Local WiFi
"host_tailscale": "100.80.199.40", # Remote via Tailscale
"user": "papa",
"archive_path": "/srv/storage/atomizer-archive",
"ssh_port": 22,
}
# Files to KEEP per trial (essential for analysis)
ESSENTIAL_EXTENSIONS = {
'.op2', # Nastran binary results (Zernike extraction)
'.json', # Parameters, results, metadata
'.npz', # Pre-computed Zernike coefficients
'.html', # Generated reports
'.png', # Visualization images
'.csv', # Exported data
}
# Files to DELETE per trial (regenerable from master + params)
DELETABLE_EXTENSIONS = {
'.prt', # NX part files (copy of master)
'.fem', # FEM mesh files (copy of master)
'.sim', # Simulation files (copy of master)
'.afm', # Assembly FEM files
'.dat', # Solver input deck (can regenerate)
'.f04', # Nastran output log
'.f06', # Nastran printed output
'.log', # Generic log files
'.diag', # Diagnostic files
'.txt', # Temp text files
'.exp', # Expression files
'.bak', # Backup files
}
# Folders to always keep entirely
KEEP_FOLDERS = {
'1_setup', # Master model files (source of truth)
'3_results', # Final results, database, reports
'best_design_archive', # Archived best designs
}
def analyze_study(study_path: Path) -> Dict:
"""Analyze disk usage of a study folder."""
study_path = Path(study_path)
analysis = {
"study_name": study_path.name,
"total_size_bytes": 0,
"by_extension": {},
"by_folder": {},
"essential_size": 0,
"deletable_size": 0,
"trial_count": 0,
}
for f in study_path.rglob("*"):
if f.is_file():
sz = f.stat().st_size
ext = f.suffix.lower()
analysis["total_size_bytes"] += sz
analysis["by_extension"][ext] = analysis["by_extension"].get(ext, 0) + sz
# Categorize by folder
rel_parts = f.relative_to(study_path).parts
if rel_parts:
folder = rel_parts[0]
analysis["by_folder"][folder] = analysis["by_folder"].get(folder, 0) + sz
# Essential vs deletable
if ext in ESSENTIAL_EXTENSIONS:
analysis["essential_size"] += sz
elif ext in DELETABLE_EXTENSIONS:
analysis["deletable_size"] += sz
# Count trials
iterations_dir = study_path / "2_iterations"
if iterations_dir.exists():
analysis["trial_count"] = len([
d for d in iterations_dir.iterdir()
if d.is_dir() and (d.name.startswith("trial_") or d.name.startswith("iter"))
])
return analysis
def print_analysis(analysis: Dict):
"""Print formatted analysis results."""
total_gb = analysis["total_size_bytes"] / 1e9
essential_gb = analysis["essential_size"] / 1e9
deletable_gb = analysis["deletable_size"] / 1e9
print(f"\n{'='*60}")
print(f"Study: {analysis['study_name']}")
print(f"{'='*60}")
print(f"Total size: {total_gb:8.2f} GB")
print(f"Trials: {analysis['trial_count']:8d}")
print(f"Essential: {essential_gb:8.2f} GB ({100*essential_gb/total_gb:.1f}%)")
print(f"Deletable: {deletable_gb:8.2f} GB ({100*deletable_gb/total_gb:.1f}%)")
print(f"Potential save: {deletable_gb:8.2f} GB")
print(f"\nBy folder:")
for folder, size in sorted(analysis["by_folder"].items(), key=lambda x: -x[1]):
print(f" {folder:25} {size/1e9:8.2f} GB")
print(f"\nTop extensions:")
for ext, size in sorted(analysis["by_extension"].items(), key=lambda x: -x[1])[:10]:
status = "[KEEP]" if ext in ESSENTIAL_EXTENSIONS else "[DEL?]" if ext in DELETABLE_EXTENSIONS else "[ ]"
print(f" {status} {ext:10} {size/1e9:8.2f} GB")
def cleanup_study(study_path: Path, dry_run: bool = True) -> Tuple[int, int]:
"""
Clean up a completed study by removing regenerable files from trial folders.
Args:
study_path: Path to study folder
dry_run: If True, only report what would be deleted
Returns:
(files_deleted, bytes_freed)
"""
study_path = Path(study_path)
iterations_dir = study_path / "2_iterations"
if not iterations_dir.exists():
logger.warning(f"No iterations folder found in {study_path}")
return 0, 0
files_to_delete = []
bytes_to_free = 0
# Find all deletable files in trial folders
for trial_dir in iterations_dir.iterdir():
if not trial_dir.is_dir():
continue
for f in trial_dir.iterdir():
if f.is_file() and f.suffix.lower() in DELETABLE_EXTENSIONS:
files_to_delete.append(f)
bytes_to_free += f.stat().st_size
if dry_run:
print(f"\n[DRY RUN] Would delete {len(files_to_delete)} files, freeing {bytes_to_free/1e9:.2f} GB")
print("\nSample files to delete:")
for f in files_to_delete[:10]:
print(f" {f.relative_to(study_path)}")
if len(files_to_delete) > 10:
print(f" ... and {len(files_to_delete) - 10} more")
return 0, 0
# Actually delete
deleted = 0
freed = 0
for f in files_to_delete:
try:
sz = f.stat().st_size
f.unlink()
deleted += 1
freed += sz
except Exception as e:
logger.error(f"Failed to delete {f}: {e}")
print(f"Deleted {deleted} files, freed {freed/1e9:.2f} GB")
return deleted, freed
def archive_to_remote(
study_path: Path,
use_tailscale: bool = False,
dry_run: bool = True
) -> bool:
"""
Archive a study to the remote dalidou server.
Args:
study_path: Path to study folder
use_tailscale: Use Tailscale IP (for remote access)
dry_run: If True, only report what would be done
Returns:
True if successful
"""
study_path = Path(study_path)
study_name = study_path.name
host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
user = REMOTE_CONFIG["user"]
remote_path = REMOTE_CONFIG["archive_path"]
# Create compressed archive locally first
archive_name = f"{study_name}_{datetime.now().strftime('%Y%m%d')}.tar.gz"
local_archive = study_path.parent / archive_name
if dry_run:
print(f"\n[DRY RUN] Would archive {study_name}")
print(f" 1. Create {archive_name}")
print(f" 2. Upload to {user}@{host}:{remote_path}/")
print(f" 3. Delete local archive")
return True
print(f"Creating archive: {archive_name}")
with tarfile.open(local_archive, "w:gz") as tar:
tar.add(study_path, arcname=study_name)
archive_size = local_archive.stat().st_size
print(f"Archive size: {archive_size/1e9:.2f} GB")
# Upload via rsync (more reliable than scp for large files)
print(f"Uploading to {host}...")
# First ensure remote directory exists
ssh_cmd = f'ssh {user}@{host} "mkdir -p {remote_path}"'
subprocess.run(ssh_cmd, shell=True, check=True)
# Upload
rsync_cmd = f'rsync -avz --progress "{local_archive}" {user}@{host}:{remote_path}/'
result = subprocess.run(rsync_cmd, shell=True)
if result.returncode == 0:
print("Upload successful!")
# Clean up local archive
local_archive.unlink()
return True
else:
print(f"Upload failed with code {result.returncode}")
return False
def restore_from_remote(
study_name: str,
target_dir: Path,
use_tailscale: bool = False
) -> bool:
"""
Restore a study from the remote server.
Args:
study_name: Name of the study to restore
target_dir: Where to extract the study
use_tailscale: Use Tailscale IP
Returns:
True if successful
"""
host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
user = REMOTE_CONFIG["user"]
remote_path = REMOTE_CONFIG["archive_path"]
target_dir = Path(target_dir)
# Find the archive on remote
print(f"Looking for {study_name} on {host}...")
ssh_cmd = f'ssh {user}@{host} "ls {remote_path}/{study_name}*.tar.gz 2>/dev/null | head -1"'
result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
if not result.stdout.strip():
print(f"No archive found for {study_name}")
return False
remote_archive = result.stdout.strip()
local_archive = target_dir / Path(remote_archive).name
print(f"Downloading: {remote_archive}")
rsync_cmd = f'rsync -avz --progress {user}@{host}:"{remote_archive}" "{local_archive}"'
result = subprocess.run(rsync_cmd, shell=True)
if result.returncode != 0:
print("Download failed")
return False
print("Extracting...")
with tarfile.open(local_archive, "r:gz") as tar:
tar.extractall(target_dir)
# Clean up
local_archive.unlink()
print(f"Restored to {target_dir / study_name}")
return True
def list_remote_archives(use_tailscale: bool = False) -> List[Dict]:
"""List all archived studies on the remote server."""
host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
user = REMOTE_CONFIG["user"]
remote_path = REMOTE_CONFIG["archive_path"]
ssh_cmd = f'ssh {user}@{host} "ls -lh {remote_path}/*.tar.gz 2>/dev/null"'
result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
archives = []
for line in result.stdout.strip().split('\n'):
if line and '.tar.gz' in line:
parts = line.split()
if len(parts) >= 9:
archives.append({
"name": parts[-1].split('/')[-1],
"size": parts[4],
"date": f"{parts[5]} {parts[6]} {parts[7]}",
})
return archives
def analyze_all_studies(studies_dir: Path) -> Dict:
"""Analyze all studies in a directory."""
studies_dir = Path(studies_dir)
total_analysis = {
"total_size": 0,
"total_essential": 0,
"total_deletable": 0,
"studies": [],
}
for study in sorted(studies_dir.iterdir()):
if study.is_dir() and not study.name.startswith('.'):
analysis = analyze_study(study)
total_analysis["studies"].append(analysis)
total_analysis["total_size"] += analysis["total_size_bytes"]
total_analysis["total_essential"] += analysis["essential_size"]
total_analysis["total_deletable"] += analysis["deletable_size"]
return total_analysis
def main():
import argparse
parser = argparse.ArgumentParser(description="Atomizer Study Archiver")
parser.add_argument("command", choices=["analyze", "cleanup", "archive", "restore", "list"])
parser.add_argument("path", nargs="?", help="Study path or name")
parser.add_argument("--dry-run", action="store_true", default=True,
help="Don't actually delete/transfer (default: True)")
parser.add_argument("--execute", action="store_true",
help="Actually perform the operation")
parser.add_argument("--tailscale", action="store_true",
help="Use Tailscale IP for remote access")
args = parser.parse_args()
dry_run = not args.execute
if args.command == "analyze":
if not args.path:
print("Usage: study_archiver analyze <path>")
return
path = Path(args.path)
if path.is_dir():
# Check if it's a single study or a collection
if (path / "optimization_config.json").exists() or (path / "1_setup").exists():
# Single study
analysis = analyze_study(path)
print_analysis(analysis)
else:
# Collection of studies
total = analyze_all_studies(path)
print(f"\n{'='*60}")
print(f"Summary: {len(total['studies'])} studies")
print(f"{'='*60}")
print(f"Total size: {total['total_size']/1e9:8.2f} GB")
print(f"Essential: {total['total_essential']/1e9:8.2f} GB")
print(f"Deletable: {total['total_deletable']/1e9:8.2f} GB")
print(f"Potential save: {total['total_deletable']/1e9:8.2f} GB")
print(f"\nPer study:")
for s in total["studies"]:
print(f" {s['study_name']:40} {s['total_size_bytes']/1e9:6.2f} GB ({s['trial_count']:3d} trials)")
elif args.command == "cleanup":
if not args.path:
print("Usage: study_archiver cleanup <study_path> [--execute]")
return
cleanup_study(Path(args.path), dry_run=dry_run)
elif args.command == "archive":
if not args.path:
print("Usage: study_archiver archive <study_path> [--execute] [--tailscale]")
return
archive_to_remote(Path(args.path), use_tailscale=args.tailscale, dry_run=dry_run)
elif args.command == "restore":
if not args.path:
print("Usage: study_archiver restore <study_name> [--tailscale]")
return
target = Path.cwd() / "studies"
restore_from_remote(args.path, target, use_tailscale=args.tailscale)
elif args.command == "list":
archives = list_remote_archives(use_tailscale=args.tailscale)
if archives:
print(f"\nArchived studies on dalidou:")
print(f"{'='*60}")
for a in archives:
print(f" {a['name']:40} {a['size']:>8} {a['date']}")
else:
print("No archives found (or server not reachable)")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,411 @@
"""
Study Cleanup Utility
====================
Cleans up completed optimization studies to save disk space by removing
large intermediate files (NX models, FEM meshes, solver results) while
preserving essential data (parameters, extracted results, database).
Usage:
python -m optimization_engine.utils.study_cleanup <study_path> [options]
Options:
--dry-run Show what would be deleted without actually deleting
--keep-best N Keep iteration folders for the top N best trials
--keep-pareto Keep all Pareto-optimal iterations (for multi-objective)
--aggressive Delete ALL iteration data (only keep DB and config)
The database (study.db) contains all optimization results and can regenerate
any analysis. The original NX model in 1_setup is always preserved.
"""
import argparse
import json
import shutil
import sqlite3
from pathlib import Path
from typing import Optional
# Files to ALWAYS keep in iteration folders (tiny, essential)
ESSENTIAL_FILES = {
'params.exp', # Design parameters for this iteration
'_temp_mass.txt', # Extracted mass
'_temp_part_properties.json', # Part properties
'_temp_zernike.json', # Zernike coefficients (if exists)
'results.json', # Any extracted results
}
# Extensions to DELETE (large, regenerable/already extracted)
DELETABLE_EXTENSIONS = {
'.op2', # Nastran binary results (~65 MB each)
'.prt', # NX Part files (~30-35 MB each)
'.fem', # FEM mesh files (~15 MB each)
'.dat', # Nastran input deck (~15 MB each)
'.sim', # Simulation file (~7 MB each)
'.afm', # FEA auxiliary (~4 MB each)
'.f04', # Nastran log
'.f06', # Nastran output
'.log', # Solver log
'.diag', # Diagnostics
}
def get_study_info(study_path: Path) -> dict:
"""Get study metadata from config and database."""
config_path = study_path / 'optimization_config.json'
# Try both possible DB locations
db_path = study_path / '3_results' / 'study.db'
if not db_path.exists():
db_path = study_path / '2_results' / 'study.db'
info = {
'name': study_path.name,
'has_config': config_path.exists(),
'has_db': db_path.exists(),
'trial_count': 0,
'best_trials': [],
'pareto_trials': [],
}
if config_path.exists():
with open(config_path) as f:
info['config'] = json.load(f)
if db_path.exists():
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Get trial count
cursor.execute("SELECT COUNT(*) FROM trials WHERE state = 'COMPLETE'")
info['trial_count'] = cursor.fetchone()[0]
# Try to get best trials (for single objective)
try:
cursor.execute("""
SELECT trial_id, value FROM trial_values
WHERE objective = 0
ORDER BY value ASC LIMIT 10
""")
info['best_trials'] = [row[0] for row in cursor.fetchall()]
except Exception as e:
pass
# Check for Pareto attribute
try:
cursor.execute("""
SELECT DISTINCT trial_id FROM trial_system_attrs
WHERE key = 'pareto_optimal' AND value = '1'
""")
info['pareto_trials'] = [row[0] for row in cursor.fetchall()]
except:
pass
conn.close()
return info
def calculate_cleanup_savings(study_path: Path, keep_iters: set = None) -> dict:
"""Calculate how much space would be saved by cleanup."""
iterations_path = study_path / '2_iterations'
if not iterations_path.exists():
iterations_path = study_path / '1_working' # Legacy structure
if not iterations_path.exists():
return {'total_size': 0, 'deletable_size': 0, 'keep_size': 0}
total_size = 0
deletable_size = 0
keep_size = 0
keep_iters = keep_iters or set()
for iter_folder in iterations_path.iterdir():
if not iter_folder.is_dir():
continue
# Extract iteration number
try:
iter_num = int(iter_folder.name.replace('iter', ''))
except:
continue
for f in iter_folder.iterdir():
if not f.is_file():
continue
size = f.stat().st_size
total_size += size
# Keep entire folder if in keep_iters
if iter_num in keep_iters:
keep_size += size
continue
# Keep essential files
if f.name.lower() in {e.lower() for e in ESSENTIAL_FILES}:
keep_size += size
elif f.suffix.lower() in DELETABLE_EXTENSIONS:
deletable_size += size
else:
keep_size += size # Keep unknown files by default
return {
'total_size': total_size,
'deletable_size': deletable_size,
'keep_size': keep_size,
}
def cleanup_study(
study_path: Path,
dry_run: bool = True,
keep_best: int = 0,
keep_pareto: bool = False,
aggressive: bool = False,
) -> dict:
"""
Clean up a study to save disk space.
Args:
study_path: Path to study folder
dry_run: If True, only report what would be deleted
keep_best: Number of best iterations to keep completely
keep_pareto: Keep all Pareto-optimal iterations
aggressive: Delete ALL iteration folders (only keep DB)
Returns:
dict with cleanup statistics
"""
study_path = Path(study_path)
if not study_path.exists():
raise ValueError(f"Study path does not exist: {study_path}")
# Get study info
info = get_study_info(study_path)
# Determine which iterations to keep
keep_iters = set()
if keep_best > 0 and info['best_trials']:
keep_iters.update(info['best_trials'][:keep_best])
if keep_pareto and info['pareto_trials']:
keep_iters.update(info['pareto_trials'])
# Find iterations folder
iterations_path = study_path / '2_iterations'
if not iterations_path.exists():
iterations_path = study_path / '1_working'
if not iterations_path.exists():
return {'status': 'no_iterations', 'deleted_bytes': 0, 'deleted_files': 0}
# Calculate savings
savings = calculate_cleanup_savings(study_path, keep_iters)
deleted_bytes = 0
deleted_files = 0
deleted_folders = 0
if aggressive:
# Delete entire iterations folder
if not dry_run:
shutil.rmtree(iterations_path)
deleted_bytes = savings['total_size']
deleted_folders = 1
else:
deleted_bytes = savings['total_size']
else:
# Selective cleanup
for iter_folder in iterations_path.iterdir():
if not iter_folder.is_dir():
continue
# Extract iteration number
try:
iter_num = int(iter_folder.name.replace('iter', ''))
except:
continue
# Skip kept iterations
if iter_num in keep_iters:
continue
for f in iter_folder.iterdir():
if not f.is_file():
continue
# Keep essential files
if f.name.lower() in {e.lower() for e in ESSENTIAL_FILES}:
continue
# Delete deletable extensions
if f.suffix.lower() in DELETABLE_EXTENSIONS:
size = f.stat().st_size
if not dry_run:
f.unlink()
deleted_bytes += size
deleted_files += 1
return {
'status': 'dry_run' if dry_run else 'completed',
'study_name': info['name'],
'trial_count': info['trial_count'],
'kept_iterations': list(keep_iters),
'total_size_before': savings['total_size'],
'deleted_bytes': deleted_bytes,
'deleted_files': deleted_files,
'deleted_folders': deleted_folders,
'space_saved_gb': deleted_bytes / (1024**3),
}
def cleanup_batch(
parent_path: Path,
pattern: str = "*",
dry_run: bool = True,
keep_best: int = 3,
keep_pareto: bool = False,
aggressive: bool = False,
) -> list:
"""
Clean up multiple studies matching a pattern.
Args:
parent_path: Parent directory containing studies
pattern: Glob pattern to match study folders (e.g., "m1_mirror_*")
dry_run: If True, only report
keep_best: Keep N best iterations per study
keep_pareto: Keep Pareto-optimal iterations
aggressive: Delete all iteration folders
Returns:
List of cleanup results
"""
parent_path = Path(parent_path)
results = []
for study_path in sorted(parent_path.glob(pattern)):
if not study_path.is_dir():
continue
# Check if it looks like a study (has iterations folder)
if not (study_path / '2_iterations').exists() and not (study_path / '1_working').exists():
continue
try:
result = cleanup_study(
study_path,
dry_run=dry_run,
keep_best=keep_best,
keep_pareto=keep_pareto,
aggressive=aggressive,
)
results.append(result)
except Exception as e:
results.append({
'study_name': study_path.name,
'status': 'error',
'error': str(e),
})
return results
def main():
parser = argparse.ArgumentParser(
description='Clean up completed optimization studies to save disk space.',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument('study_path', type=Path, help='Path to study folder or parent directory')
parser.add_argument('--dry-run', action='store_true', default=True,
help='Show what would be deleted without deleting (default)')
parser.add_argument('--execute', action='store_true',
help='Actually delete files (opposite of --dry-run)')
parser.add_argument('--keep-best', type=int, default=3,
help='Keep N best iterations completely (default: 3)')
parser.add_argument('--keep-pareto', action='store_true',
help='Keep all Pareto-optimal iterations')
parser.add_argument('--aggressive', action='store_true',
help='Delete ALL iteration data (only keep DB)')
parser.add_argument('--batch', type=str, metavar='PATTERN',
help='Clean multiple studies matching pattern (e.g., "m1_mirror_*")')
args = parser.parse_args()
dry_run = not args.execute
if args.batch:
# Batch cleanup mode
print(f"\n{'='*60}")
print(f"BATCH CLEANUP: {args.study_path}")
print(f"Pattern: {args.batch}")
print(f"{'='*60}")
print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}")
results = cleanup_batch(
args.study_path,
pattern=args.batch,
dry_run=dry_run,
keep_best=args.keep_best,
keep_pareto=args.keep_pareto,
aggressive=args.aggressive,
)
print(f"\n{'='*60}")
print("BATCH RESULTS")
print(f"{'='*60}")
print(f"{'Study':<45} {'Trials':>7} {'Size':>8} {'Savings':>8}")
print("-" * 75)
total_saved = 0
for r in results:
if r.get('status') == 'error':
print(f"{r['study_name']:<45} ERROR: {r.get('error', 'Unknown')}")
else:
saved = r.get('space_saved_gb', 0)
total_saved += saved
print(f"{r['study_name']:<45} {r.get('trial_count', 0):>7} "
f"{r.get('total_size_before', 0)/(1024**3):>7.1f}G {saved:>7.1f}G")
print("-" * 75)
print(f"{'TOTAL SAVINGS:':<45} {' '*15} {total_saved:>7.1f}G")
if dry_run:
print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
return results
else:
# Single study cleanup
print(f"\n{'='*60}")
print(f"STUDY CLEANUP: {args.study_path.name}")
print(f"{'='*60}")
print(f"Mode: {'DRY RUN (no files deleted)' if dry_run else 'EXECUTE (files WILL be deleted)'}")
print(f"Keep best: {args.keep_best} iterations")
print(f"Keep Pareto: {args.keep_pareto}")
print(f"Aggressive: {args.aggressive}")
result = cleanup_study(
args.study_path,
dry_run=dry_run,
keep_best=args.keep_best,
keep_pareto=args.keep_pareto,
aggressive=args.aggressive,
)
print(f"\n{'='*60}")
print("RESULTS")
print(f"{'='*60}")
print(f"Trials in study: {result['trial_count']}")
print(f"Iterations kept: {len(result['kept_iterations'])} {result['kept_iterations'][:5]}{'...' if len(result['kept_iterations']) > 5 else ''}")
print(f"Total size before: {result['total_size_before'] / (1024**3):.2f} GB")
print(f"{'Would delete' if dry_run else 'Deleted'}: {result['deleted_files']} files")
print(f"Space {'to save' if dry_run else 'saved'}: {result['space_saved_gb']:.2f} GB")
if dry_run:
print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
return result
if __name__ == '__main__':
main()