feat: Pre-migration checkpoint - updated docs and utilities

Updates before optimization_engine migration: - Updated migration plan to v2.1 with complete file inventory - Added OP_07 disk optimization protocol - Added SYS_16 self-aware turbo protocol - Added study archiver and cleanup utilities - Added ensemble surrogate module - Updated NX solver and session manager - Updated zernike HTML generator - Added context engineering plan - LAC session insights updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 10:22:45 -05:00
parent faa7779a43
commit 82f36689b7
21 changed files with 6304 additions and 890 deletions
--- a/optimization_engine/utils/study_archiver.py
+++ b/optimization_engine/utils/study_archiver.py
@@ -0,0 +1,438 @@
+"""
+Study Archiver - Disk Space Optimization for Atomizer Studies
+
+This module provides utilities for:
+1. Cleaning up completed studies (removing regenerable files)
+2. Archiving studies to remote storage (dalidou server)
+3. Restoring archived studies on-demand
+
+Usage:
+    # Cleanup a completed study (keep only essential files)
+    python -m optimization_engine.utils.study_archiver cleanup studies/M1_Mirror/m1_mirror_V12
+
+    # Archive to remote server
+    python -m optimization_engine.utils.study_archiver archive studies/M1_Mirror/m1_mirror_V12
+
+    # Restore from remote
+    python -m optimization_engine.utils.study_archiver restore m1_mirror_V12
+
+    # Show disk usage analysis
+    python -m optimization_engine.utils.study_archiver analyze studies/M1_Mirror
+"""
+
+import os
+import json
+import shutil
+import tarfile
+import subprocess
+from pathlib import Path
+from datetime import datetime
+from typing import Optional, Dict, List, Tuple
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Configuration
+REMOTE_CONFIG = {
+    "host": "192.168.86.50",  # Local WiFi
+    "host_tailscale": "100.80.199.40",  # Remote via Tailscale
+    "user": "papa",
+    "archive_path": "/srv/storage/atomizer-archive",
+    "ssh_port": 22,
+}
+
+# Files to KEEP per trial (essential for analysis)
+ESSENTIAL_EXTENSIONS = {
+    '.op2',   # Nastran binary results (Zernike extraction)
+    '.json',  # Parameters, results, metadata
+    '.npz',   # Pre-computed Zernike coefficients
+    '.html',  # Generated reports
+    '.png',   # Visualization images
+    '.csv',   # Exported data
+}
+
+# Files to DELETE per trial (regenerable from master + params)
+DELETABLE_EXTENSIONS = {
+    '.prt',   # NX part files (copy of master)
+    '.fem',   # FEM mesh files (copy of master)
+    '.sim',   # Simulation files (copy of master)
+    '.afm',   # Assembly FEM files
+    '.dat',   # Solver input deck (can regenerate)
+    '.f04',   # Nastran output log
+    '.f06',   # Nastran printed output
+    '.log',   # Generic log files
+    '.diag',  # Diagnostic files
+    '.txt',   # Temp text files
+    '.exp',   # Expression files
+    '.bak',   # Backup files
+}
+
+# Folders to always keep entirely
+KEEP_FOLDERS = {
+    '1_setup',           # Master model files (source of truth)
+    '3_results',         # Final results, database, reports
+    'best_design_archive',  # Archived best designs
+}
+
+
+def analyze_study(study_path: Path) -> Dict:
+    """Analyze disk usage of a study folder."""
+    study_path = Path(study_path)
+
+    analysis = {
+        "study_name": study_path.name,
+        "total_size_bytes": 0,
+        "by_extension": {},
+        "by_folder": {},
+        "essential_size": 0,
+        "deletable_size": 0,
+        "trial_count": 0,
+    }
+
+    for f in study_path.rglob("*"):
+        if f.is_file():
+            sz = f.stat().st_size
+            ext = f.suffix.lower()
+
+            analysis["total_size_bytes"] += sz
+            analysis["by_extension"][ext] = analysis["by_extension"].get(ext, 0) + sz
+
+            # Categorize by folder
+            rel_parts = f.relative_to(study_path).parts
+            if rel_parts:
+                folder = rel_parts[0]
+                analysis["by_folder"][folder] = analysis["by_folder"].get(folder, 0) + sz
+
+            # Essential vs deletable
+            if ext in ESSENTIAL_EXTENSIONS:
+                analysis["essential_size"] += sz
+            elif ext in DELETABLE_EXTENSIONS:
+                analysis["deletable_size"] += sz
+
+    # Count trials
+    iterations_dir = study_path / "2_iterations"
+    if iterations_dir.exists():
+        analysis["trial_count"] = len([
+            d for d in iterations_dir.iterdir()
+            if d.is_dir() and (d.name.startswith("trial_") or d.name.startswith("iter"))
+        ])
+
+    return analysis
+
+
+def print_analysis(analysis: Dict):
+    """Print formatted analysis results."""
+    total_gb = analysis["total_size_bytes"] / 1e9
+    essential_gb = analysis["essential_size"] / 1e9
+    deletable_gb = analysis["deletable_size"] / 1e9
+
+    print(f"\n{'='*60}")
+    print(f"Study: {analysis['study_name']}")
+    print(f"{'='*60}")
+    print(f"Total size:     {total_gb:8.2f} GB")
+    print(f"Trials:         {analysis['trial_count']:8d}")
+    print(f"Essential:      {essential_gb:8.2f} GB ({100*essential_gb/total_gb:.1f}%)")
+    print(f"Deletable:      {deletable_gb:8.2f} GB ({100*deletable_gb/total_gb:.1f}%)")
+    print(f"Potential save: {deletable_gb:8.2f} GB")
+
+    print(f"\nBy folder:")
+    for folder, size in sorted(analysis["by_folder"].items(), key=lambda x: -x[1]):
+        print(f"  {folder:25} {size/1e9:8.2f} GB")
+
+    print(f"\nTop extensions:")
+    for ext, size in sorted(analysis["by_extension"].items(), key=lambda x: -x[1])[:10]:
+        status = "[KEEP]" if ext in ESSENTIAL_EXTENSIONS else "[DEL?]" if ext in DELETABLE_EXTENSIONS else "[    ]"
+        print(f"  {status} {ext:10} {size/1e9:8.2f} GB")
+
+
+def cleanup_study(study_path: Path, dry_run: bool = True) -> Tuple[int, int]:
+    """
+    Clean up a completed study by removing regenerable files from trial folders.
+
+    Args:
+        study_path: Path to study folder
+        dry_run: If True, only report what would be deleted
+
+    Returns:
+        (files_deleted, bytes_freed)
+    """
+    study_path = Path(study_path)
+    iterations_dir = study_path / "2_iterations"
+
+    if not iterations_dir.exists():
+        logger.warning(f"No iterations folder found in {study_path}")
+        return 0, 0
+
+    files_to_delete = []
+    bytes_to_free = 0
+
+    # Find all deletable files in trial folders
+    for trial_dir in iterations_dir.iterdir():
+        if not trial_dir.is_dir():
+            continue
+
+        for f in trial_dir.iterdir():
+            if f.is_file() and f.suffix.lower() in DELETABLE_EXTENSIONS:
+                files_to_delete.append(f)
+                bytes_to_free += f.stat().st_size
+
+    if dry_run:
+        print(f"\n[DRY RUN] Would delete {len(files_to_delete)} files, freeing {bytes_to_free/1e9:.2f} GB")
+        print("\nSample files to delete:")
+        for f in files_to_delete[:10]:
+            print(f"  {f.relative_to(study_path)}")
+        if len(files_to_delete) > 10:
+            print(f"  ... and {len(files_to_delete) - 10} more")
+        return 0, 0
+
+    # Actually delete
+    deleted = 0
+    freed = 0
+    for f in files_to_delete:
+        try:
+            sz = f.stat().st_size
+            f.unlink()
+            deleted += 1
+            freed += sz
+        except Exception as e:
+            logger.error(f"Failed to delete {f}: {e}")
+
+    print(f"Deleted {deleted} files, freed {freed/1e9:.2f} GB")
+    return deleted, freed
+
+
+def archive_to_remote(
+    study_path: Path,
+    use_tailscale: bool = False,
+    dry_run: bool = True
+) -> bool:
+    """
+    Archive a study to the remote dalidou server.
+
+    Args:
+        study_path: Path to study folder
+        use_tailscale: Use Tailscale IP (for remote access)
+        dry_run: If True, only report what would be done
+
+    Returns:
+        True if successful
+    """
+    study_path = Path(study_path)
+    study_name = study_path.name
+
+    host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
+    user = REMOTE_CONFIG["user"]
+    remote_path = REMOTE_CONFIG["archive_path"]
+
+    # Create compressed archive locally first
+    archive_name = f"{study_name}_{datetime.now().strftime('%Y%m%d')}.tar.gz"
+    local_archive = study_path.parent / archive_name
+
+    if dry_run:
+        print(f"\n[DRY RUN] Would archive {study_name}")
+        print(f"  1. Create {archive_name}")
+        print(f"  2. Upload to {user}@{host}:{remote_path}/")
+        print(f"  3. Delete local archive")
+        return True
+
+    print(f"Creating archive: {archive_name}")
+    with tarfile.open(local_archive, "w:gz") as tar:
+        tar.add(study_path, arcname=study_name)
+
+    archive_size = local_archive.stat().st_size
+    print(f"Archive size: {archive_size/1e9:.2f} GB")
+
+    # Upload via rsync (more reliable than scp for large files)
+    print(f"Uploading to {host}...")
+
+    # First ensure remote directory exists
+    ssh_cmd = f'ssh {user}@{host} "mkdir -p {remote_path}"'
+    subprocess.run(ssh_cmd, shell=True, check=True)
+
+    # Upload
+    rsync_cmd = f'rsync -avz --progress "{local_archive}" {user}@{host}:{remote_path}/'
+    result = subprocess.run(rsync_cmd, shell=True)
+
+    if result.returncode == 0:
+        print("Upload successful!")
+        # Clean up local archive
+        local_archive.unlink()
+        return True
+    else:
+        print(f"Upload failed with code {result.returncode}")
+        return False
+
+
+def restore_from_remote(
+    study_name: str,
+    target_dir: Path,
+    use_tailscale: bool = False
+) -> bool:
+    """
+    Restore a study from the remote server.
+
+    Args:
+        study_name: Name of the study to restore
+        target_dir: Where to extract the study
+        use_tailscale: Use Tailscale IP
+
+    Returns:
+        True if successful
+    """
+    host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
+    user = REMOTE_CONFIG["user"]
+    remote_path = REMOTE_CONFIG["archive_path"]
+
+    target_dir = Path(target_dir)
+
+    # Find the archive on remote
+    print(f"Looking for {study_name} on {host}...")
+
+    ssh_cmd = f'ssh {user}@{host} "ls {remote_path}/{study_name}*.tar.gz 2>/dev/null | head -1"'
+    result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
+
+    if not result.stdout.strip():
+        print(f"No archive found for {study_name}")
+        return False
+
+    remote_archive = result.stdout.strip()
+    local_archive = target_dir / Path(remote_archive).name
+
+    print(f"Downloading: {remote_archive}")
+    rsync_cmd = f'rsync -avz --progress {user}@{host}:"{remote_archive}" "{local_archive}"'
+    result = subprocess.run(rsync_cmd, shell=True)
+
+    if result.returncode != 0:
+        print("Download failed")
+        return False
+
+    print("Extracting...")
+    with tarfile.open(local_archive, "r:gz") as tar:
+        tar.extractall(target_dir)
+
+    # Clean up
+    local_archive.unlink()
+    print(f"Restored to {target_dir / study_name}")
+    return True
+
+
+def list_remote_archives(use_tailscale: bool = False) -> List[Dict]:
+    """List all archived studies on the remote server."""
+    host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
+    user = REMOTE_CONFIG["user"]
+    remote_path = REMOTE_CONFIG["archive_path"]
+
+    ssh_cmd = f'ssh {user}@{host} "ls -lh {remote_path}/*.tar.gz 2>/dev/null"'
+    result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
+
+    archives = []
+    for line in result.stdout.strip().split('\n'):
+        if line and '.tar.gz' in line:
+            parts = line.split()
+            if len(parts) >= 9:
+                archives.append({
+                    "name": parts[-1].split('/')[-1],
+                    "size": parts[4],
+                    "date": f"{parts[5]} {parts[6]} {parts[7]}",
+                })
+
+    return archives
+
+
+def analyze_all_studies(studies_dir: Path) -> Dict:
+    """Analyze all studies in a directory."""
+    studies_dir = Path(studies_dir)
+
+    total_analysis = {
+        "total_size": 0,
+        "total_essential": 0,
+        "total_deletable": 0,
+        "studies": [],
+    }
+
+    for study in sorted(studies_dir.iterdir()):
+        if study.is_dir() and not study.name.startswith('.'):
+            analysis = analyze_study(study)
+            total_analysis["studies"].append(analysis)
+            total_analysis["total_size"] += analysis["total_size_bytes"]
+            total_analysis["total_essential"] += analysis["essential_size"]
+            total_analysis["total_deletable"] += analysis["deletable_size"]
+
+    return total_analysis
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Atomizer Study Archiver")
+    parser.add_argument("command", choices=["analyze", "cleanup", "archive", "restore", "list"])
+    parser.add_argument("path", nargs="?", help="Study path or name")
+    parser.add_argument("--dry-run", action="store_true", default=True,
+                        help="Don't actually delete/transfer (default: True)")
+    parser.add_argument("--execute", action="store_true",
+                        help="Actually perform the operation")
+    parser.add_argument("--tailscale", action="store_true",
+                        help="Use Tailscale IP for remote access")
+
+    args = parser.parse_args()
+
+    dry_run = not args.execute
+
+    if args.command == "analyze":
+        if not args.path:
+            print("Usage: study_archiver analyze <path>")
+            return
+
+        path = Path(args.path)
+        if path.is_dir():
+            # Check if it's a single study or a collection
+            if (path / "optimization_config.json").exists() or (path / "1_setup").exists():
+                # Single study
+                analysis = analyze_study(path)
+                print_analysis(analysis)
+            else:
+                # Collection of studies
+                total = analyze_all_studies(path)
+                print(f"\n{'='*60}")
+                print(f"Summary: {len(total['studies'])} studies")
+                print(f"{'='*60}")
+                print(f"Total size:     {total['total_size']/1e9:8.2f} GB")
+                print(f"Essential:      {total['total_essential']/1e9:8.2f} GB")
+                print(f"Deletable:      {total['total_deletable']/1e9:8.2f} GB")
+                print(f"Potential save: {total['total_deletable']/1e9:8.2f} GB")
+                print(f"\nPer study:")
+                for s in total["studies"]:
+                    print(f"  {s['study_name']:40} {s['total_size_bytes']/1e9:6.2f} GB ({s['trial_count']:3d} trials)")
+
+    elif args.command == "cleanup":
+        if not args.path:
+            print("Usage: study_archiver cleanup <study_path> [--execute]")
+            return
+        cleanup_study(Path(args.path), dry_run=dry_run)
+
+    elif args.command == "archive":
+        if not args.path:
+            print("Usage: study_archiver archive <study_path> [--execute] [--tailscale]")
+            return
+        archive_to_remote(Path(args.path), use_tailscale=args.tailscale, dry_run=dry_run)
+
+    elif args.command == "restore":
+        if not args.path:
+            print("Usage: study_archiver restore <study_name> [--tailscale]")
+            return
+        target = Path.cwd() / "studies"
+        restore_from_remote(args.path, target, use_tailscale=args.tailscale)
+
+    elif args.command == "list":
+        archives = list_remote_archives(use_tailscale=args.tailscale)
+        if archives:
+            print(f"\nArchived studies on dalidou:")
+            print(f"{'='*60}")
+            for a in archives:
+                print(f"  {a['name']:40} {a['size']:>8}  {a['date']}")
+        else:
+            print("No archives found (or server not reachable)")
+
+
+if __name__ == "__main__":
+    main()