feat: Pre-migration checkpoint - updated docs and utilities

Updates before optimization_engine migration: - Updated migration plan to v2.1 with complete file inventory - Added OP_07 disk optimization protocol - Added SYS_16 self-aware turbo protocol - Added study archiver and cleanup utilities - Added ensemble surrogate module - Updated NX solver and session manager - Updated zernike HTML generator - Added context engineering plan - LAC session insights updates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 10:22:45 -05:00
parent faa7779a43
commit 82f36689b7
21 changed files with 6304 additions and 890 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -2,110 +2,42 @@
  "permissions": {
    "allow": [
      "Bash(dir:*)",
-      "Bash(sqlite3:*)",
-      "Bash(timeout /t 30 /nobreak)",
-      "Bash(npm install:*)",
-      "Bash(git add:*)",
-      "Bash(git commit:*)",
-      "Bash(git push:*)",
+      "Bash(powershell -Command:*)",
      "Bash(python:*)",
-      "Bash(conda activate:*)",
-      "Bash(C:/Users/Antoine/miniconda3/envs/atomizer/python.exe:*)",
-      "Bash(cat:*)",
-      "Bash(C:UsersAntoineminiconda3envsatomizerpython.exe run_adaptive_mirror_optimization.py --fea-budget 100 --batch-size 5 --strategy hybrid)",
-      "Bash(/c/Users/Antoine/miniconda3/envs/atomizer/python.exe:*)",
-      "Bash(npm run build:*)",
-      "Bash(npm uninstall:*)",
+      "Bash(git:*)",
+      "Bash(npm:*)",
+      "Bash(conda:*)",
+      "Bash(pip:*)",
+      "Bash(cmd /c:*)",
+      "Bash(tasklist:*)",
+      "Bash(taskkill:*)",
+      "Bash(robocopy:*)",
+      "Bash(xcopy:*)",
+      "Bash(del:*)",
+      "Bash(type:*)",
+      "Bash(where:*)",
      "Bash(netstat:*)",
      "Bash(findstr:*)",
      "Bash(curl:*)",
-      "Bash(npx tsc:*)",
-      "Bash(atomizer-dashboard/README.md )",
-      "Bash(atomizer-dashboard/backend/api/main.py )",
-      "Bash(atomizer-dashboard/backend/api/routes/optimization.py )",
-      "Bash(atomizer-dashboard/backend/api/routes/claude.py )",
-      "Bash(atomizer-dashboard/backend/api/routes/terminal.py )",
-      "Bash(atomizer-dashboard/backend/api/services/ )",
-      "Bash(atomizer-dashboard/backend/requirements.txt )",
-      "Bash(atomizer-dashboard/frontend/package.json )",
-      "Bash(atomizer-dashboard/frontend/package-lock.json )",
-      "Bash(atomizer-dashboard/frontend/src/components/ClaudeChat.tsx )",
-      "Bash(atomizer-dashboard/frontend/src/components/ClaudeTerminal.tsx )",
-      "Bash(atomizer-dashboard/frontend/src/components/dashboard/ControlPanel.tsx )",
-      "Bash(atomizer-dashboard/frontend/src/pages/Dashboard.tsx )",
-      "Bash(atomizer-dashboard/frontend/src/context/ )",
-      "Bash(atomizer-dashboard/frontend/src/pages/Home.tsx )",
-      "Bash(atomizer-dashboard/frontend/src/App.tsx )",
-      "Bash(atomizer-dashboard/frontend/src/api/client.ts )",
-      "Bash(atomizer-dashboard/frontend/src/components/layout/Sidebar.tsx )",
-      "Bash(atomizer-dashboard/frontend/src/index.css )",
-      "Bash(atomizer-dashboard/frontend/src/pages/Results.tsx )",
-      "Bash(atomizer-dashboard/frontend/tailwind.config.js )",
-      "Bash(docs/07_DEVELOPMENT/DASHBOARD_IMPROVEMENT_PLAN.md)",
-      "Bash(taskkill:*)",
-      "Bash(xargs:*)",
-      "Bash(cmd.exe /c:*)",
-      "Bash(powershell.exe -Command:*)",
-      "Bash(where:*)",
-      "Bash(type %USERPROFILE%.claude*)",
-      "Bash(conda create:*)",
-      "Bash(cmd /c \"conda create -n atomizer python=3.10 -y\")",
-      "Bash(cmd /c \"where conda\")",
-      "Bash(cmd /c \"dir /b C:\\Users\\antoi\\anaconda3\\Scripts\\conda.exe 2>nul || dir /b C:\\Users\\antoi\\miniconda3\\Scripts\\conda.exe 2>nul || dir /b C:\\ProgramData\\anaconda3\\Scripts\\conda.exe 2>nul || dir /b C:\\ProgramData\\miniconda3\\Scripts\\conda.exe 2>nul || echo NOT_FOUND\")",
-      "Bash(cmd /c \"if exist C:\\Users\\antoi\\anaconda3\\Scripts\\conda.exe (echo FOUND: anaconda3) else if exist C:\\Users\\antoi\\miniconda3\\Scripts\\conda.exe (echo FOUND: miniconda3) else if exist C:\\ProgramData\\anaconda3\\Scripts\\conda.exe (echo FOUND: ProgramData\\anaconda3) else (echo NOT_FOUND)\")",
-      "Bash(powershell:*)",
-      "Bash(C:Usersantoianaconda3Scriptsconda.exe create -n atomizer python=3.10 -y)",
-      "Bash(cmd /c \"C:\\Users\\antoi\\anaconda3\\Scripts\\conda.exe create -n atomizer python=3.10 -y\")",
-      "Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"\"C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe\"\" \"\"C:\\Users\\antoi\\Atomizer\\optimization_engine\\solve_simulation.py\"\" -args \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_adaptive_V15\\2_iterations\\iter2\\ASSY_M1_assyfem1_sim1.sim\"\" \"\"Solution 1\"\" 2>&1\")",
-      "Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"C:Program FilesSiemensDesigncenterNX2512NXBINrun_journal.exe\" \"C:UsersantoiAtomizernx_journalsextract_part_mass_material.py\" -args \"C:UsersantoiAtomizerstudiesm1_mirror_cost_reduction1_setupmodelM1_Blank.prt\" \"C:UsersantoiAtomizerstudiesm1_mirror_cost_reduction1_setupmodel\" 2>&1\")",
-      "Bash(npm run dev:*)",
-      "Bash(cmd /c \"cd /d C:\\Users\\antoi\\Atomizer\\atomizer-dashboard\\frontend && npm run dev\")",
-      "Bash(cmd /c \"cd /d C:\\Users\\antoi\\Atomizer\\atomizer-dashboard\\frontend && dir package.json && npm --version\")",
-      "Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"\"C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe\"\" \"\"C:\\Users\\antoi\\Atomizer\\nx_journals\\extract_part_mass_material.py\"\" -args \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\\M1_Blank.prt\"\" \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\"\" 2>&1\")",
-      "Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"\"C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe\"\" \"\"C:\\Users\\antoi\\Atomizer\\nx_journals\\extract_expressions.py\"\" -args \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\\M1_Blank.prt\"\" \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\"\" 2>&1\")",
-      "Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"\"C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe\"\" \"\"C:\\Users\\antoi\\Atomizer\\nx_journals\\extract_expressions.py\"\" -args \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\\M1_Blank.prt\"\" \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\"\"\")",
-      "Bash(cmd /c:*)",
-      "Bash(taskkill /F /FI \"WINDOWTITLE eq *uvicorn*\")",
-      "Bash(python -m uvicorn:*)",
-      "Bash(conda run:*)",
-      "Bash(/c/Users/antoi/miniconda3/envs/atomizer/python.exe -m uvicorn:*)",
-      "Bash(/c/Users/antoi/anaconda3/envs/atomizer/python.exe -m uvicorn:*)",
-      "Bash(/c/Users/antoi/anaconda3/envs/atomizer/python.exe:*)",
-      "Bash(tasklist:*)",
-      "Bash(wmic process where \"ProcessId=147068\" delete)",
-      "Bash(cmd.exe //c \"taskkill /F /PID 147068\")",
-      "Bash(pip show:*)",
-      "Bash(python3:*)",
-      "Bash(python extract_all_mirror_data.py:*)",
-      "Bash(C:Usersantoiminiconda3envsatomizerpython.exe extract_all_mirror_data.py)",
-      "Bash(/c/Users/antoi/miniconda3/envs/atomizer/python.exe:*)",
-      "Bash(grep:*)",
-      "Bash(python -c:*)",
-      "Bash(C:Usersantoianaconda3envsatomizerpython.exe -c \"\nimport pandas as pd\ndf = pd.read_csv(r''c:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_all_trials_export.csv'')\n\n# Check which columns have data\nprint(''=== Column data availability ==='')\nfor col in df.columns:\n    non_null = df[col].notna().sum()\n    print(f''{col}: {non_null}/{len(df)} ({100*non_null/len(df):.1f}%)'')\n\nprint(''\\n=== Studies in dataset ==='')\nprint(df[''study''].value_counts())\n\")",
-      "Bash(cmd /c \"C:\\Users\\antoi\\anaconda3\\envs\\atomizer\\python.exe -c \"\"import pandas as pd; df = pd.read_csv(r''c:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_all_trials_export.csv''); print(''Rows:'', len(df)); print(df.columns.tolist())\"\"\")",
-      "Bash(robocopy:*)",
-      "Bash(xcopy:*)",
-      "Bash(ls:*)",
-      "Bash(dir \"c:\\Users\\antoi\\Atomizer\\studies\\*.png\")",
-      "Bash(powershell -Command \"Get-Process | Where-Object { $_Modules.FileName -like ''*study.db*'' } | Select-Object Id, ProcessName\")",
-      "Bash(powershell -Command:*)",
-      "Bash(C:/Users/antoi/miniconda3/envs/atomizer/python.exe -m uvicorn:*)",
-      "Bash(dir /s /b \"C:\\Users\\antoi\\*conda*\")",
-      "Bash(conda run -n atomizer python:*)",
-      "Bash(C:/ProgramData/anaconda3/condabin/conda.bat run -n atomizer python -c \"\nimport sqlite3\n\ndb_path = ''studies/M1_Mirror/m1_mirror_cost_reduction_V6/3_results/study.db''\nconn = sqlite3.connect(db_path)\ncursor = conn.cursor()\n\n# Get counts\ncursor.execute(''SELECT COUNT(*) FROM trials'')\ntotal = cursor.fetchone()[0]\n\ncursor.execute(\"\"SELECT COUNT(*) FROM trials WHERE state = ''COMPLETE''\"\")\ncomplete = cursor.fetchone()[0]\n\nprint(f''=== V6 Study Status ==='')\nprint(f''Total trials: {total}'')\nprint(f''Completed: {complete}'')\nprint(f''Failed/Pruned: {total - complete}'')\nprint(f''Progress: {complete}/200 ({100*complete/200:.1f}%)'')\n\n# Get objectives stats\nobjs = [''rel_filtered_rms_40_vs_20'', ''rel_filtered_rms_60_vs_20'', ''mfg_90_optician_workload'', ''mass_kg'']\nprint(f''\\n=== Objectives Stats ==='')\nfor obj in objs:\n    cursor.execute(f\"\"SELECT MIN({obj}), MAX({obj}), AVG({obj}) FROM trials WHERE state = ''COMPLETE'' AND {obj} IS NOT NULL\"\")\n    result = cursor.fetchone()\n    if result and result[0] is not None:\n        print(f''{obj}: min={result[0]:.4f}, max={result[1]:.4f}, mean={result[2]:.4f}'')\n\n# Design variables stats  \ndvs = [''whiffle_min'', ''whiffle_outer_to_vertical'', ''whiffle_triangle_closeness'', ''blank_backface_angle'', ''Pocket_Radius'']\nprint(f''\\n=== Design Variables Explored ==='')\nfor dv in dvs:\n    try:\n        cursor.execute(f\"\"SELECT MIN({dv}), MAX({dv}), AVG({dv}) FROM trials WHERE state = ''COMPLETE''\"\")\n        result = cursor.fetchone()\n        if result and result[0] is not None:\n            print(f''{dv}: min={result[0]:.3f}, max={result[1]:.3f}, mean={result[2]:.3f}'')\n    except Exception as e:\n        print(f''{dv}: error - {e}'')\n\nconn.close()\n\")",
-      "Bash(/c/Users/antoi/anaconda3/python.exe:*)",
-      "Bash(C:UsersantoiAtomizertemp_extract.bat)",
-      "Bash(dir /b \"C:\\Users\\antoi\\Atomizer\\knowledge_base\\lac\")",
-      "Bash(pip install:*)",
-      "Bash(dir \"C:\\Users\\antoi\\Atomizer\\studies\\M1_Mirror\\m1_mirror_cost_reduction_V7\\3_results\")",
-      "Bash(call \"%USERPROFILE%\\anaconda3\\Scripts\\activate.bat\" atomizer)",
-      "Bash(cmd /c \"cd /d c:\\Users\\antoi\\Atomizer && call %USERPROFILE%\\anaconda3\\Scripts\\activate.bat atomizer && python -c \"\"import sys; sys.path.insert(0, ''.''); from optimization_engine.extractors import ZernikeExtractor; print(''OK''); import inspect; print(inspect.signature(ZernikeExtractor.extract_relative))\"\"\")",
-      "Bash(cmd /c \"cd /d c:\\Users\\antoi\\Atomizer && c:\\Users\\antoi\\anaconda3\\envs\\atomizer\\python.exe -c \"\"import sys; sys.path.insert(0, ''.''); from optimization_engine.extractors import ZernikeExtractor; print(''Import OK''); import inspect; sig = inspect.signature(ZernikeExtractor.extract_relative); print(''Signature:'', sig)\"\"\")",
-      "Bash(c:Usersantoianaconda3envsatomizerpython.exe c:UsersantoiAtomizertoolstest_zernike_import.py)",
-      "Bash(dir \"C:\\Users\\antoi\\Atomizer\\studies\\M1_Mirror\\m1_mirror_cost_reduction_V7\\3_results\\best_design_archive\")",
-      "Bash(dir \"C:\\Users\\antoi\\Atomizer\\studies\\M1_Mirror\\m1_mirror_cost_reduction_V7\\3_results\\best_design_archive\\20251220_010128\")",
-      "Bash(dir /s /b \"C:\\Users\\antoi\\Atomizer\\studies\\M1_Mirror\\m1_mirror_cost_reduction_V8\")",
-      "Bash(c:/Users/antoi/anaconda3/envs/atomizer/python.exe:*)"
+      "Read",
+      "Skill(dashboard:*)",
+      "Bash(C:Usersantoianaconda3envsatomizerpython.exe:*)",
+      "Bash(del \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V5\\\\3_results\\\\study.db\")",
+      "Bash(C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe -c:*)",
+      "Bash(C:Usersantoianaconda3envsatomizerpython.exe run_optimization.py --trials 1)",
+      "Bash(C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe -m py_compile:*)",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver analyze \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\")",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V12\")",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V2\")",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V11\")",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V11\" --execute)",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V3\")",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V3\" --execute)",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V6\" --execute)",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V1\" --execute)",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V5\" --execute)",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V12\" --execute)",
+      "Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction\" --execute)"
    ],
    "deny": [],
    "ask": []
--- a/.claude/skills/00_BOOTSTRAP.md
+++ b/.claude/skills/00_BOOTSTRAP.md
@@ -84,6 +84,10 @@ User Request
    │       ├─ "error", "failed", "not working", "crashed"
    │       └─► Load: OP_06_TROUBLESHOOT.md
    │
+    ├─► MANAGE disk space?
+    │       ├─ "disk", "space", "cleanup", "archive", "storage"
+    │       └─► Load: OP_07_DISK_OPTIMIZATION.md
+    │
    ├─► CONFIGURE settings?
    │       ├─ "change", "modify", "settings", "parameters"
    │       └─► Load relevant SYS_* protocol
@@ -109,6 +113,7 @@ User Request
 | Analyze results | "results", "best", "compare", "pareto" | OP_04 | - | user |
 | Export training data | "export", "training data", "neural" | OP_05 | modules/neural-acceleration.md | user |
 | Debug issues | "error", "failed", "not working", "help" | OP_06 | - | user |
+| **Disk management** | "disk", "space", "cleanup", "archive" | **OP_07** | modules/study-disk-optimization.md | user |
 | Understand IMSO | "protocol 10", "IMSO", "adaptive" | SYS_10 | - | user |
 | Multi-objective | "pareto", "NSGA", "multi-objective" | SYS_11 | - | user |
 | Extractors | "extractor", "displacement", "stress" | SYS_12 | modules/extractors-catalog.md | user |
--- a/.claude/skills/01_CHEATSHEET.md
+++ b/.claude/skills/01_CHEATSHEET.md
@@ -30,6 +30,7 @@ requires_skills:
 | See best results | OP_04 | `optuna-dashboard sqlite:///study.db` or dashboard |
 | Export neural training data | OP_05 | `python run_optimization.py --export-training` |
 | Fix an error | OP_06 | Read error log → follow diagnostic tree |
+| **Free disk space** | **OP_07** | `archive_study.bat cleanup <study> --execute` |
 | Add custom physics extractor | EXT_01 | Create in `optimization_engine/extractors/` |
 | Add lifecycle hook | EXT_02 | Create in `optimization_engine/plugins/` |
 | Generate physics insight | SYS_16 | `python -m optimization_engine.insights generate <study>` |
@@ -219,6 +220,48 @@ python -c "import optuna; s=optuna.load_study('my_study', 'sqlite:///3_results/s

 ---

+## Disk Space Management (OP_07)
+
+FEA studies consume massive disk space. After completion, clean up regenerable files:
+
+### Quick Commands
+
+```bash
+# Analyze disk usage
+archive_study.bat analyze studies\M1_Mirror
+
+# Cleanup completed study (dry run first!)
+archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12
+archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
+
+# Archive to dalidou server
+archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
+
+# List remote archives
+archive_study.bat list
+```
+
+### What Gets Deleted vs Kept
+
+| KEEP | DELETE |
+|------|--------|
+| `.op2` (Nastran results) | `.prt, .fem, .sim` (copies of master) |
+| `.json` (params/metadata) | `.dat` (solver input) |
+| `1_setup/` (master files) | `.f04, .f06, .log` (solver logs) |
+| `3_results/` (database) | `.afm, .diag, .bak` (temp files) |
+
+### Typical Savings
+
+| Stage | M1_Mirror Example |
+|-------|-------------------|
+| Full | 194 GB |
+| After cleanup | 114 GB (41% saved) |
+| Archived to server | 5 GB local (97% saved) |
+
+**Full details**: `docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md`
+
+---
+
 ## LAC (Learning Atomizer Core) Commands

 ```bash
--- a/.claude/skills/modules/OPTIMIZATION_ENGINE_MIGRATION_PLAN.md
+++ b/.claude/skills/modules/OPTIMIZATION_ENGINE_MIGRATION_PLAN.md
--- a/.claude/skills/modules/study-disk-optimization.md
+++ b/.claude/skills/modules/study-disk-optimization.md
@@ -0,0 +1,464 @@
+# Study Disk Optimization Module
+
+## Atomizer Disk Space Management System
+
+**Version:** 1.0
+**Created:** 2025-12-29
+**Status:** PRODUCTION READY
+**Impact:** Reduced M1_Mirror from 194 GB → 114 GB (80 GB freed, 41% reduction)
+
+---
+
+## Executive Summary
+
+FEA optimization studies consume massive disk space due to per-trial file copying. This module provides:
+
+1. **Local Cleanup** - Remove regenerable files from completed studies (50%+ savings)
+2. **Remote Archival** - Archive to dalidou server (14TB available)
+3. **On-Demand Restore** - Pull archived studies when needed
+
+### Key Insight
+
+Each trial folder contains ~150 MB, but only **~70 MB is essential** (OP2 results + metadata). The rest are copies of master files that can be regenerated.
+
+---
+
+## Part 1: File Classification
+
+### Essential Files (KEEP)
+
+| Extension | Purpose | Typical Size |
+|-----------|---------|--------------|
+| `.op2` | Nastran binary results | 68 MB |
+| `.json` | Parameters, results, metadata | <1 MB |
+| `.npz` | Pre-computed Zernike coefficients | <1 MB |
+| `.html` | Generated reports | <1 MB |
+| `.png` | Visualization images | <1 MB |
+| `.csv` | Exported data tables | <1 MB |
+
+### Deletable Files (REGENERABLE)
+
+| Extension | Purpose | Why Deletable |
+|-----------|---------|---------------|
+| `.prt` | NX part files | Copy of master in `1_setup/` |
+| `.fem` | FEM mesh files | Copy of master |
+| `.sim` | Simulation files | Copy of master |
+| `.afm` | Assembly FEM | Regenerable |
+| `.dat` | Solver input deck | Regenerable from params |
+| `.f04` | Nastran output log | Diagnostic only |
+| `.f06` | Nastran printed output | Diagnostic only |
+| `.log` | Generic logs | Diagnostic only |
+| `.diag` | Diagnostic files | Diagnostic only |
+| `.txt` | Temp text files | Intermediate data |
+| `.exp` | Expression files | Regenerable |
+| `.bak` | Backup files | Not needed |
+
+### Protected Folders (NEVER TOUCH)
+
+| Folder | Reason |
+|--------|--------|
+| `1_setup/` | Master model files (source of truth) |
+| `3_results/` | Final database, reports, best designs |
+| `best_design_archive/` | Archived optimal configurations |
+
+---
+
+## Part 2: Disk Usage Analysis
+
+### M1_Mirror Project Baseline (Dec 2025)
+
+```
+Total: 194 GB across 28 studies, 2000+ trials
+
+By File Type:
+  .op2    94 GB (48.5%) - Nastran results [ESSENTIAL]
+  .prt    41 GB (21.4%) - NX parts [DELETABLE]
+  .fem    22 GB (11.5%) - FEM mesh [DELETABLE]
+  .dat    22 GB (11.3%) - Solver input [DELETABLE]
+  .sim     9 GB (4.5%)  - Simulation [DELETABLE]
+  .afm     5 GB (2.5%)  - Assembly FEM [DELETABLE]
+  Other   <1 GB (<1%)   - Logs, configs [MIXED]
+
+By Folder:
+  2_iterations/    168 GB (87%) - Per-trial data
+  3_results/        22 GB (11%) - Final results
+  1_setup/           4 GB (2%)  - Master models
+```
+
+### Per-Trial Breakdown (Typical V11+ Structure)
+
+```
+iter1/
+  assy_m1_assyfem1_sim1-solution_1.op2    68.15 MB  [KEEP]
+  M1_Blank.prt                            29.94 MB  [DELETE]
+  assy_m1_assyfem1_sim1-solution_1.dat    15.86 MB  [DELETE]
+  M1_Blank_fem1.fem                       14.07 MB  [DELETE]
+  ASSY_M1_assyfem1_sim1.sim                7.47 MB  [DELETE]
+  M1_Blank_fem1_i.prt                      5.20 MB  [DELETE]
+  ASSY_M1_assyfem1.afm                     4.13 MB  [DELETE]
+  M1_Vertical_Support_Skeleton_fem1.fem    3.76 MB  [DELETE]
+  ... (logs, temps)                       <1.00 MB  [DELETE]
+  _temp_part_properties.json               0.00 MB  [KEEP]
+  -------------------------------------------------------
+  TOTAL:                                 149.67 MB
+  Essential only:                         68.15 MB
+  Savings:                                54.5%
+```
+
+---
+
+## Part 3: Implementation
+
+### Core Utility
+
+**Location:** `optimization_engine/utils/study_archiver.py`
+
+```python
+from optimization_engine.utils.study_archiver import (
+    analyze_study,        # Get disk usage analysis
+    cleanup_study,        # Remove deletable files
+    archive_to_remote,    # Archive to dalidou
+    restore_from_remote,  # Restore from dalidou
+    list_remote_archives, # List server archives
+)
+```
+
+### Command Line Interface
+
+**Batch Script:** `tools/archive_study.bat`
+
+```bash
+# Analyze disk usage
+archive_study.bat analyze studies\M1_Mirror
+archive_study.bat analyze studies\M1_Mirror\m1_mirror_V12
+
+# Cleanup completed study (dry run by default)
+archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12
+archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
+
+# Archive to remote server
+archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
+archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute --tailscale
+
+# List remote archives
+archive_study.bat list
+archive_study.bat list --tailscale
+
+# Restore from remote
+archive_study.bat restore m1_mirror_V12
+archive_study.bat restore m1_mirror_V12 --tailscale
+```
+
+### Python API
+
+```python
+from pathlib import Path
+from optimization_engine.utils.study_archiver import (
+    analyze_study,
+    cleanup_study,
+    archive_to_remote,
+)
+
+# Analyze
+study_path = Path("studies/M1_Mirror/m1_mirror_V12")
+analysis = analyze_study(study_path)
+print(f"Total: {analysis['total_size_bytes']/1e9:.2f} GB")
+print(f"Essential: {analysis['essential_size']/1e9:.2f} GB")
+print(f"Deletable: {analysis['deletable_size']/1e9:.2f} GB")
+
+# Cleanup (dry_run=False to execute)
+deleted, freed = cleanup_study(study_path, dry_run=False)
+print(f"Freed {freed/1e9:.2f} GB")
+
+# Archive to server
+success = archive_to_remote(study_path, use_tailscale=False, dry_run=False)
+```
+
+---
+
+## Part 4: Remote Server Configuration
+
+### dalidou Server Specs
+
+| Property | Value |
+|----------|-------|
+| Hostname | dalidou |
+| Local IP | 192.168.86.50 |
+| Tailscale IP | 100.80.199.40 |
+| SSH User | papa |
+| Archive Path | /srv/storage/atomizer-archive/ |
+| Available Storage | 3.6 TB (SSD) + 12.7 TB (HDD) |
+
+### First-Time Setup
+
+```bash
+# 1. SSH into server and create archive directory
+ssh papa@192.168.86.50
+mkdir -p /srv/storage/atomizer-archive
+
+# 2. Set up passwordless SSH (on Windows)
+ssh-keygen -t ed25519  # If you don't have a key
+ssh-copy-id papa@192.168.86.50
+
+# 3. Test connection
+ssh papa@192.168.86.50 "echo 'Connection OK'"
+```
+
+### Archive Structure on Server
+
+```
+/srv/storage/atomizer-archive/
+├── m1_mirror_V11_20251229.tar.gz    # Compressed study archive
+├── m1_mirror_V12_20251229.tar.gz
+├── m1_mirror_flat_back_V3_20251229.tar.gz
+└── manifest.json                     # Index of all archives
+```
+
+---
+
+## Part 5: Recommended Workflows
+
+### During Active Optimization
+
+**Keep all files** - You may need to:
+- Re-run specific failed trials
+- Debug mesh issues
+- Analyze intermediate results
+
+### After Study Completion
+
+1. **Generate final report** (STUDY_REPORT.md)
+2. **Archive best design** to `3_results/best_design_archive/`
+3. **Run cleanup:**
+   ```bash
+   archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
+   ```
+4. **Verify results still accessible:**
+   - Database queries work
+   - Best design files intact
+   - OP2 files for Zernike extraction present
+
+### For Long-Term Storage
+
+1. **After cleanup**, archive to server:
+   ```bash
+   archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
+   ```
+2. **Optionally delete local** study folder
+3. **Keep only** `3_results/best_design_archive/` locally if needed
+
+### When Revisiting Old Study
+
+1. **Check if archived:**
+   ```bash
+   archive_study.bat list
+   ```
+2. **Restore:**
+   ```bash
+   archive_study.bat restore m1_mirror_V12
+   ```
+3. **If re-running trials needed**, master files in `1_setup/` allow full regeneration
+
+---
+
+## Part 6: Disk Space Targets
+
+### Per-Project Guidelines
+
+| Stage | Expected Size | Notes |
+|-------|---------------|-------|
+| Active (full) | 100% | All files present |
+| Completed (cleaned) | ~50% | Deletables removed |
+| Archived (minimal) | ~3% | Best design only locally |
+
+### M1_Mirror Specific
+
+| Stage | Size | Notes |
+|-------|------|-------|
+| Full | 194 GB | 28 studies, 2000+ trials |
+| After cleanup | 114 GB | OP2 + metadata only |
+| Minimal local | 5-10 GB | Best designs + database |
+| Server archive | ~50 GB | Compressed |
+
+---
+
+## Part 7: Safety Features
+
+### Built-in Protections
+
+1. **Dry run by default** - Must explicitly add `--execute`
+2. **Master files untouched** - `1_setup/` is never modified
+3. **Results preserved** - `3_results/` is never touched
+4. **Essential files preserved** - OP2, JSON, NPZ always kept
+5. **Archive verification** - rsync checks integrity
+
+### What Cannot Be Recovered After Cleanup
+
+| File Type | Recovery Method |
+|-----------|-----------------|
+| `.prt` | Copy from `1_setup/` + update params |
+| `.fem` | Regenerate from `.prt` in NX |
+| `.sim` | Recreate simulation setup |
+| `.dat` | Regenerate from params.json + model |
+| `.f04/.f06` | Re-run solver (if needed) |
+
+**Note:** With `1_setup/` master files and `params.json`, ANY trial can be fully reconstructed. The only irreplaceable data is the OP2 results (which we keep).
+
+---
+
+## Part 8: Troubleshooting
+
+### SSH Connection Failed
+
+```bash
+# Test connectivity
+ping 192.168.86.50
+
+# Test SSH
+ssh papa@192.168.86.50 "echo connected"
+
+# If on different network, use Tailscale
+ssh papa@100.80.199.40 "echo connected"
+```
+
+### Archive Upload Slow
+
+Large studies (50+ GB) take time. Options:
+- Run overnight
+- Use wired LAN connection
+- Pre-cleanup to reduce size
+
+### Out of Disk Space During Archive
+
+Archive is created locally first. Need ~1.5x study size free:
+- 20 GB study = ~30 GB temp space required
+
+### Cleanup Removed Wrong Files
+
+If accidentally executed without dry run:
+- OP2 files preserved (can still extract results)
+- Master files in `1_setup/` intact
+- Regenerate other files by re-running trial
+
+---
+
+## Part 9: Integration with Atomizer
+
+### Protocol Reference
+
+**Related Protocol:** `docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md`
+
+### Claude Commands
+
+When user says:
+- "analyze disk usage" → Run `analyze_study()`
+- "clean up study" → Run `cleanup_study()` with confirmation
+- "archive to server" → Run `archive_to_remote()`
+- "restore study" → Run `restore_from_remote()`
+
+### Automatic Suggestions
+
+After optimization completion, suggest:
+```
+Optimization complete! The study is using X GB.
+Would you like me to clean up regenerable files to save Y GB?
+(This keeps all results but removes intermediate model copies)
+```
+
+---
+
+## Part 10: File Inventory
+
+### Files Created
+
+| File | Purpose |
+|------|---------|
+| `optimization_engine/utils/study_archiver.py` | Core utility module |
+| `tools/archive_study.bat` | Windows batch script |
+| `docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md` | Full protocol |
+| `.claude/skills/modules/study-disk-optimization.md` | This document |
+
+### Dependencies
+
+- Python 3.8+
+- rsync (for remote operations, usually pre-installed)
+- SSH client (for remote operations)
+- Tailscale (optional, for remote access outside LAN)
+
+---
+
+## Appendix A: Cleanup Results Log (Dec 2025)
+
+### Initial Cleanup Run
+
+| Study | Before | After | Freed | Files Deleted |
+|-------|--------|-------|-------|---------------|
+| m1_mirror_cost_reduction_V11 | 32.24 GB | 15.94 GB | 16.30 GB | 3,403 |
+| m1_mirror_cost_reduction_flat_back_V3 | 52.50 GB | 26.87 GB | 25.63 GB | 5,084 |
+| m1_mirror_cost_reduction_flat_back_V6 | 33.71 GB | 16.64 GB | 17.08 GB | 3,391 |
+| m1_mirror_cost_reduction_V12 | 22.68 GB | 10.60 GB | 12.08 GB | 2,508 |
+| m1_mirror_cost_reduction_flat_back_V1 | 8.76 GB | 4.54 GB | 4.22 GB | 813 |
+| m1_mirror_cost_reduction_flat_back_V5 | 8.01 GB | 4.09 GB | 3.92 GB | 765 |
+| m1_mirror_cost_reduction | 3.58 GB | 3.08 GB | 0.50 GB | 267 |
+| **TOTAL** | **161.48 GB** | **81.76 GB** | **79.73 GB** | **16,231** |
+
+### Project-Wide Summary
+
+```
+Before cleanup: 193.75 GB
+After cleanup:  114.03 GB
+Total freed:     79.72 GB (41% reduction)
+```
+
+---
+
+## Appendix B: Quick Reference Card
+
+### Commands
+
+```bash
+# Analyze
+archive_study.bat analyze <path>
+
+# Cleanup (always dry-run first!)
+archive_study.bat cleanup <study>           # Dry run
+archive_study.bat cleanup <study> --execute # Execute
+
+# Archive
+archive_study.bat archive <study> --execute
+archive_study.bat archive <study> --execute --tailscale
+
+# Remote
+archive_study.bat list
+archive_study.bat restore <name>
+```
+
+### Python
+
+```python
+from optimization_engine.utils.study_archiver import *
+
+# Quick analysis
+analysis = analyze_study(Path("studies/M1_Mirror"))
+print(f"Deletable: {analysis['deletable_size']/1e9:.2f} GB")
+
+# Cleanup
+cleanup_study(Path("studies/M1_Mirror/m1_mirror_V12"), dry_run=False)
+```
+
+### Server Access
+
+```bash
+# Local
+ssh papa@192.168.86.50
+
+# Remote (Tailscale)
+ssh papa@100.80.199.40
+
+# Archive location
+/srv/storage/atomizer-archive/
+```
+
+---
+
+*This module enables efficient disk space management for large-scale FEA optimization studies.*
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -90,6 +90,7 @@ The Protocol Operating System (POS) provides layered documentation:
 | Analyze results | OP_04 | `docs/protocols/operations/OP_04_ANALYZE_RESULTS.md` |
 | Export neural data | OP_05 | `docs/protocols/operations/OP_05_EXPORT_TRAINING_DATA.md` |
 | Debug issues | OP_06 | `docs/protocols/operations/OP_06_TROUBLESHOOT.md` |
+| **Free disk space** | OP_07 | `docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md` |

 ## System Protocols (Technical Specs)

@@ -135,14 +136,15 @@ C:\Users\antoi\anaconda3\envs\atomizer\python.exe your_script.py
 Atomizer/
 ├── .claude/skills/           # LLM skills (Bootstrap + Core + Modules)
 ├── docs/protocols/           # Protocol Operating System
-│   ├── operations/           # OP_01 - OP_06
+│   ├── operations/           # OP_01 - OP_07
 │   ├── system/              # SYS_10 - SYS_15
 │   └── extensions/          # EXT_01 - EXT_04
 ├── optimization_engine/     # Core Python modules
 │   ├── extractors/          # Physics extraction library
 │   ├── gnn/                 # GNN surrogate module (Zernike)
-│   └── utils/               # Utilities (dashboard_db, trial_manager)
+│   └── utils/               # Utilities (dashboard_db, trial_manager, study_archiver)
 ├── studies/                 # User studies
+├── tools/                   # CLI tools (archive_study.bat, zernike_html_generator.py)
 ├── archive/                 # Deprecated code (for reference)
 └── atomizer-dashboard/      # React dashboard
 ```
--- a/docs/TODO_NXOPEN_MCP_SETUP.md
+++ b/docs/TODO_NXOPEN_MCP_SETUP.md
@@ -0,0 +1,132 @@
+# NXOpen Documentation MCP Server - Setup TODO
+
+**Created:** 2025-12-29
+**Status:** PENDING - Waiting for manual configuration
+
+---
+
+## Current State
+
+The NXOpen documentation MCP server exists on **dalidou** (192.168.86.50) but is not accessible from this Windows machine due to hostname resolution issues.
+
+### What's Working
+- ✅ Dalidou server is online and reachable at `192.168.86.50`
+- ✅ Port 5000 (Documentation Proxy) is responding
+- ✅ Port 3000 (Gitea) is responding
+- ✅ MCP server code exists at `/srv/claude-assistant/` on dalidou
+
+### What's NOT Working
+- ❌ `dalidou.local` hostname doesn't resolve (mDNS not configured on this machine)
+- ❌ MCP tools not integrated with Claude Code
+
+---
+
+## Steps to Complete
+
+### Step 1: Fix Hostname Resolution (Manual - requires Admin)
+
+**Option A: Run the script as Administrator**
+```powershell
+# Open PowerShell as Administrator, then:
+C:\Users\antoi\Atomizer\add_dalidou_host.ps1
+```
+
+**Option B: Manually edit hosts file**
+1. Open Notepad as Administrator
+2. Open `C:\Windows\System32\drivers\etc\hosts`
+3. Add this line at the end:
+   ```
+   192.168.86.50  dalidou.local dalidou
+   ```
+4. Save the file
+
+**Verify:**
+```powershell
+ping dalidou.local
+```
+
+### Step 2: Verify MCP Server is Running on Dalidou
+
+SSH into dalidou and check:
+```bash
+ssh root@dalidou
+
+# Check documentation proxy
+systemctl status siemensdocumentationproxyserver
+
+# Check MCP server (if it's a service)
+# Or check what's running on port 5000
+ss -tlnp | grep 5000
+```
+
+### Step 3: Configure Claude Code MCP Integration
+
+The MCP server on dalidou uses **stdio-based MCP protocol**, not HTTP. To connect from Claude Code, you'll need one of:
+
+**Option A: SSH-based MCP (if supported)**
+Configure in `.claude/settings.json` or MCP config to connect via SSH tunnel.
+
+**Option B: Local Proxy**
+Run a local MCP proxy that connects to dalidou's MCP server.
+
+**Option C: HTTP Wrapper**
+The current port 5000 service may already expose HTTP endpoints - need to verify once hostname is fixed.
+
+---
+
+## Server Documentation Reference
+
+Full documentation is in the SERVtomaste repo:
+- **URL:** http://192.168.86.50:3000/Antoine/SERVtomaste
+- **File:** `docs/SIEMENS-DOCS-SERVER.md`
+
+### Key Server Paths (on dalidou)
+```
+/srv/siemens-docs/proxy/          # Documentation Proxy (port 5000)
+/srv/claude-assistant/            # MCP Server
+/srv/claude-assistant/mcp-server/ # MCP server code
+/srv/claude-assistant/tools/      # Tool implementations
+  ├── siemens-auth.js            # Puppeteer authentication
+  ├── siemens-docs.js            # Documentation fetching
+  └── ...
+/srv/claude-assistant/vault/      # Credentials (secured)
+```
+
+### Available MCP Tools (once connected)
+| Tool | Description |
+|------|-------------|
+| `siemens_docs_search` | Search NX Open, Simcenter docs |
+| `siemens_docs_fetch` | Fetch specific documentation page |
+| `siemens_auth_status` | Check if auth session is active |
+| `siemens_login` | Re-login if session expired |
+| `siemens_docs_list` | List documentation categories |
+
+---
+
+## Files Created During Investigation
+
+- `C:\Users\antoi\Atomizer\add_dalidou_host.ps1` - Script to add hosts entry (run as Admin)
+- `C:\Users\antoi\Atomizer\test_mcp.py` - Test script for probing MCP server (can be deleted)
+
+---
+
+## Related Documentation
+
+- `.claude/skills/modules/nx-docs-lookup.md` - How to use MCP tools once configured
+- `docs/08_ARCHIVE/historical/NXOPEN_DOCUMENTATION_INTEGRATION_STRATEGY.md` - Full strategy doc
+- `docs/05_API_REFERENCE/NXOPEN_RESOURCES.md` - Alternative NXOpen resources
+
+---
+
+## Workaround Until Fixed
+
+Without the MCP server, you can still look up NXOpen documentation by:
+
+1. **Using web search** - I can search for NXOpen API documentation online
+2. **Using local stub files** - Python stubs at `C:\Program Files\Siemens\NX2412\UGOPEN\pythonStubs\`
+3. **Using existing extractors** - Check `optimization_engine/extractors/` for patterns
+4. **Recording NX journals** - Record operations in NX to learn the API calls
+
+---
+
+*To continue setup, run the hosts file fix and let me know when ready.*
--- a/docs/plans/ATOMIZER_CONTEXT_ENGINEERING_PLAN.md
+++ b/docs/plans/ATOMIZER_CONTEXT_ENGINEERING_PLAN.md
--- a/docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md
+++ b/docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md
@@ -0,0 +1,239 @@
+# OP_07: Disk Space Optimization
+
+**Version:** 1.0
+**Last Updated:** 2025-12-29
+
+## Overview
+
+This protocol manages disk space for Atomizer studies through:
+1. **Local cleanup** - Remove regenerable files from completed studies
+2. **Remote archival** - Archive to dalidou server (14TB available)
+3. **On-demand restore** - Pull archived studies when needed
+
+## Disk Usage Analysis
+
+### Typical Study Breakdown
+
+| File Type | Size/Trial | Purpose | Keep? |
+|-----------|------------|---------|-------|
+| `.op2` | 68 MB | Nastran results | **YES** - Needed for analysis |
+| `.prt` | 30 MB | NX parts | NO - Copy of master |
+| `.dat` | 16 MB | Solver input | NO - Regenerable |
+| `.fem` | 14 MB | FEM mesh | NO - Copy of master |
+| `.sim` | 7 MB | Simulation | NO - Copy of master |
+| `.afm` | 4 MB | Assembly FEM | NO - Regenerable |
+| `.json` | <1 MB | Params/results | **YES** - Metadata |
+| Logs | <1 MB | F04/F06/log | NO - Diagnostic only |
+
+**Per-trial overhead:** ~150 MB total, only ~70 MB essential
+
+### M1_Mirror Example
+
+```
+Current:     194 GB (28 studies, 2000+ trials)
+After cleanup: 95 GB (51% reduction)
+After archive:  5 GB (keep best_design_archive only)
+```
+
+## Commands
+
+### 1. Analyze Disk Usage
+
+```bash
+# Single study
+archive_study.bat analyze studies\M1_Mirror\m1_mirror_V12
+
+# All studies in a project
+archive_study.bat analyze studies\M1_Mirror
+```
+
+Output shows:
+- Total size
+- Essential vs deletable breakdown
+- Trial count per study
+- Per-extension analysis
+
+### 2. Cleanup Completed Study
+
+```bash
+# Dry run (default) - see what would be deleted
+archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12
+
+# Actually delete
+archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
+```
+
+**What gets deleted:**
+- `.prt`, `.fem`, `.sim`, `.afm` in trial folders
+- `.dat`, `.f04`, `.f06`, `.log`, `.diag` solver files
+- Temp files (`.txt`, `.exp`, `.bak`)
+
+**What is preserved:**
+- `1_setup/` folder (master model)
+- `3_results/` folder (database, reports)
+- All `.op2` files (Nastran results)
+- All `.json` files (params, metadata)
+- All `.npz` files (Zernike coefficients)
+- `best_design_archive/` folder
+
+### 3. Archive to Remote Server
+
+```bash
+# Dry run
+archive_study.bat archive studies\M1_Mirror\m1_mirror_V12
+
+# Actually archive
+archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
+
+# Use Tailscale (when not on local network)
+archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute --tailscale
+```
+
+**Process:**
+1. Creates compressed `.tar.gz` archive
+2. Uploads to `papa@192.168.86.50:/srv/storage/atomizer-archive/`
+3. Deletes local archive after successful upload
+
+### 4. List Remote Archives
+
+```bash
+archive_study.bat list
+
+# Via Tailscale
+archive_study.bat list --tailscale
+```
+
+### 5. Restore from Remote
+
+```bash
+# Restore to studies/ folder
+archive_study.bat restore m1_mirror_V12
+
+# Via Tailscale
+archive_study.bat restore m1_mirror_V12 --tailscale
+```
+
+## Remote Server Setup
+
+**Server:** dalidou (Lenovo W520)
+- Local IP: `192.168.86.50`
+- Tailscale IP: `100.80.199.40`
+- SSH user: `papa`
+- Archive path: `/srv/storage/atomizer-archive/`
+
+### First-Time Setup
+
+SSH into dalidou and create the archive directory:
+
+```bash
+ssh papa@192.168.86.50
+mkdir -p /srv/storage/atomizer-archive
+```
+
+Ensure SSH key authentication is set up for passwordless transfers:
+
+```bash
+# On Windows (PowerShell)
+ssh-copy-id papa@192.168.86.50
+```
+
+## Recommended Workflow
+
+### During Active Optimization
+
+Keep all files - you may need to re-run specific trials.
+
+### After Study Completion
+
+1. **Generate final report** (`STUDY_REPORT.md`)
+2. **Archive best design** to `3_results/best_design_archive/`
+3. **Cleanup:**
+   ```bash
+   archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
+   ```
+
+### For Long-Term Storage
+
+1. **After cleanup**, archive to server:
+   ```bash
+   archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
+   ```
+
+2. **Optionally delete local** (keep only `3_results/best_design_archive/`)
+
+### When Revisiting Old Study
+
+1. **Restore:**
+   ```bash
+   archive_study.bat restore m1_mirror_V12
+   ```
+
+2. If you need to re-run trials, the `1_setup/` master files allow regenerating everything
+
+## Safety Features
+
+- **Dry run by default** - Must add `--execute` to actually delete/transfer
+- **Master files preserved** - `1_setup/` is never touched
+- **Results preserved** - `3_results/` is never touched
+- **Essential files preserved** - OP2, JSON, NPZ always kept
+
+## Disk Space Targets
+
+| Stage | M1_Mirror Target |
+|-------|------------------|
+| Active development | 200 GB (full) |
+| Completed studies | 95 GB (after cleanup) |
+| Archived (minimal local) | 5 GB (best only) |
+| Server archive | 50 GB compressed |
+
+## Troubleshooting
+
+### SSH Connection Failed
+
+```bash
+# Test connectivity
+ping 192.168.86.50
+
+# Test SSH
+ssh papa@192.168.86.50 "echo connected"
+
+# If on different network, use Tailscale
+ssh papa@100.80.199.40 "echo connected"
+```
+
+### Archive Upload Slow
+
+Large studies (50+ GB) take time. The tool uses `rsync` with progress display.
+For very large archives, consider running overnight or using direct LAN connection.
+
+### Out of Disk Space During Archive
+
+The archive is created locally first. Ensure you have ~1.5x the study size free:
+- 20 GB study = ~30 GB temp space needed
+
+## Python API
+
+```python
+from optimization_engine.utils.study_archiver import (
+    analyze_study,
+    cleanup_study,
+    archive_to_remote,
+    restore_from_remote,
+    list_remote_archives,
+)
+
+# Analyze
+analysis = analyze_study(Path("studies/M1_Mirror/m1_mirror_V12"))
+print(f"Deletable: {analysis['deletable_size']/1e9:.2f} GB")
+
+# Cleanup (dry_run=False to actually delete)
+cleanup_study(Path("studies/M1_Mirror/m1_mirror_V12"), dry_run=False)
+
+# Archive
+archive_to_remote(Path("studies/M1_Mirror/m1_mirror_V12"), dry_run=False)
+
+# List remote
+archives = list_remote_archives()
+for a in archives:
+    print(f"{a['name']}: {a['size']}")
+```
--- a/docs/protocols/system/SYS_16_SELF_AWARE_TURBO.md
+++ b/docs/protocols/system/SYS_16_SELF_AWARE_TURBO.md
@@ -0,0 +1,262 @@
+# SYS_16: Self-Aware Turbo (SAT) Optimization
+
+## Version: 1.0
+## Status: PROPOSED
+## Created: 2025-12-28
+
+---
+
+## Problem Statement
+
+V5 surrogate + L-BFGS failed catastrophically because:
+1. MLP predicted WS=280 but actual was WS=376 (30%+ error)
+2. L-BFGS descended to regions **outside training distribution**
+3. Surrogate had no way to signal uncertainty
+4. All L-BFGS solutions converged to the same "fake optimum"
+
+**Root cause:** The surrogate is overconfident in regions where it has no data.
+
+---
+
+## Solution: Uncertainty-Aware Surrogate with Active Learning
+
+### Core Principles
+
+1. **Never trust a point prediction** - Always require uncertainty bounds
+2. **High uncertainty = run FEA** - Don't optimize where you don't know
+3. **Actively fill gaps** - Prioritize FEA in high-uncertainty regions
+4. **Validate gradient solutions** - Check L-BFGS results against FEA before trusting
+
+---
+
+## Architecture
+
+### 1. Ensemble Surrogate (Epistemic Uncertainty)
+
+Instead of one MLP, train **N independent models** with different initializations:
+
+```python
+class EnsembleSurrogate:
+    def __init__(self, n_models=5):
+        self.models = [MLP() for _ in range(n_models)]
+
+    def predict(self, x):
+        preds = [m.predict(x) for m in self.models]
+        mean = np.mean(preds, axis=0)
+        std = np.std(preds, axis=0)  # Epistemic uncertainty
+        return mean, std
+
+    def is_confident(self, x, threshold=0.1):
+        mean, std = self.predict(x)
+        # Confident if std < 10% of mean
+        return (std / (mean + 1e-6)) < threshold
+```
+
+**Why this works:** Models trained on different random seeds will agree in well-sampled regions but disagree wildly in extrapolation regions.
+
+### 2. Distance-Based OOD Detection
+
+Track training data distribution and flag points that are "too far":
+
+```python
+class OODDetector:
+    def __init__(self, X_train):
+        self.X_train = X_train
+        self.mean = X_train.mean(axis=0)
+        self.std = X_train.std(axis=0)
+        # Fit KNN for local density
+        self.knn = NearestNeighbors(n_neighbors=5)
+        self.knn.fit(X_train)
+
+    def distance_to_training(self, x):
+        """Return distance to nearest training points."""
+        distances, _ = self.knn.kneighbors(x.reshape(1, -1))
+        return distances.mean()
+
+    def is_in_distribution(self, x, threshold=2.0):
+        """Check if point is within 2 std of training data."""
+        z_scores = np.abs((x - self.mean) / (self.std + 1e-6))
+        return z_scores.max() < threshold
+```
+
+### 3. Trust-Region L-BFGS
+
+Constrain L-BFGS to stay within training distribution:
+
+```python
+def trust_region_lbfgs(surrogate, ood_detector, x0, max_iter=100):
+    """L-BFGS that respects training data boundaries."""
+
+    def constrained_objective(x):
+        # If OOD, return large penalty
+        if not ood_detector.is_in_distribution(x):
+            return 1e9
+
+        mean, std = surrogate.predict(x)
+        # If uncertain, return upper confidence bound (pessimistic)
+        if std > 0.1 * mean:
+            return mean + 2 * std  # Be conservative
+
+        return mean
+
+    result = minimize(constrained_objective, x0, method='L-BFGS-B')
+    return result.x
+```
+
+### 4. Acquisition Function with Uncertainty
+
+Use **Expected Improvement with Uncertainty** (like Bayesian Optimization):
+
+```python
+def acquisition_score(x, surrogate, best_so_far):
+    """Score = potential improvement weighted by confidence."""
+    mean, std = surrogate.predict(x)
+
+    # Expected improvement (lower is better for minimization)
+    improvement = best_so_far - mean
+
+    # Exploration bonus for uncertain regions
+    exploration = 0.5 * std
+
+    # High score = worth evaluating with FEA
+    return improvement + exploration
+
+def select_next_fea_candidates(surrogate, candidates, best_so_far, n=5):
+    """Select candidates balancing exploitation and exploration."""
+    scores = [acquisition_score(c, surrogate, best_so_far) for c in candidates]
+
+    # Pick top candidates by acquisition score
+    top_indices = np.argsort(scores)[-n:]
+    return [candidates[i] for i in top_indices]
+```
+
+---
+
+## Algorithm: Self-Aware Turbo (SAT)
+
+```
+INITIALIZE:
+  - Load existing FEA data (X_train, Y_train)
+  - Train ensemble surrogate on data
+  - Fit OOD detector on X_train
+  - Set best_ws = min(Y_train)
+
+PHASE 1: UNCERTAINTY MAPPING (10% of budget)
+  FOR i in 1..N_mapping:
+    - Sample random point x
+    - Get uncertainty: mean, std = surrogate.predict(x)
+    - If std > threshold: run FEA, add to training data
+    - Retrain ensemble periodically
+
+  This fills in the "holes" in the surrogate's knowledge.
+
+PHASE 2: EXPLOITATION WITH VALIDATION (80% of budget)
+  FOR i in 1..N_exploit:
+    - Generate 1000 TPE samples
+    - Filter to keep only confident predictions (std < 10% of mean)
+    - Filter to keep only in-distribution (OOD check)
+    - Rank by predicted WS
+
+    - Take top 5 candidates
+    - Run FEA on all 5
+
+    - For each FEA result:
+      - Compare predicted vs actual
+      - If error > 20%: mark region as "unreliable", force exploration there
+      - If error < 10%: update best, retrain surrogate
+
+    - Every 10 iterations: retrain ensemble with new data
+
+PHASE 3: L-BFGS REFINEMENT (10% of budget)
+  - Only run L-BFGS if ensemble R² > 0.95 on validation set
+  - Use trust-region L-BFGS (stay within training distribution)
+
+  FOR each L-BFGS solution:
+    - Check ensemble disagreement
+    - If models agree (std < 5%): run FEA to validate
+    - If models disagree: skip, too uncertain
+
+    - Compare L-BFGS prediction vs FEA
+    - If error > 15%: ABORT L-BFGS phase, return to Phase 2
+    - If error < 10%: accept as candidate
+
+FINAL:
+  - Return best FEA-validated design
+  - Report uncertainty bounds for all objectives
+```
+
+---
+
+## Key Differences from V5
+
+| Aspect | V5 (Failed) | SAT (Proposed) |
+|--------|-------------|----------------|
+| **Model** | Single MLP | Ensemble of 5 MLPs |
+| **Uncertainty** | None | Ensemble disagreement + OOD detection |
+| **L-BFGS** | Trust blindly | Trust-region, validate every step |
+| **Extrapolation** | Accept | Reject or penalize |
+| **Active learning** | No | Yes - prioritize uncertain regions |
+| **Validation** | After L-BFGS | Throughout |
+
+---
+
+## Implementation Checklist
+
+1. [ ] `EnsembleSurrogate` class with N=5 MLPs
+2. [ ] `OODDetector` with KNN + z-score checks
+3. [ ] `acquisition_score()` balancing exploitation/exploration
+4. [ ] Trust-region L-BFGS with OOD penalties
+5. [ ] Automatic retraining when new FEA data arrives
+6. [ ] Logging of prediction errors to track surrogate quality
+7. [ ] Early abort if L-BFGS predictions consistently wrong
+
+---
+
+## Expected Behavior
+
+**In well-sampled regions:**
+- Ensemble agrees → Low uncertainty → Trust predictions
+- L-BFGS finds valid optima → FEA confirms → Success
+
+**In poorly-sampled regions:**
+- Ensemble disagrees → High uncertainty → Run FEA instead
+- L-BFGS penalized → Stays in trusted zone → No fake optima
+
+**At distribution boundaries:**
+- OOD detector flags → Reject predictions
+- Acquisition prioritizes → Active learning fills gaps
+
+---
+
+## Metrics to Track
+
+1. **Surrogate R² on validation set** - Target > 0.95 before L-BFGS
+2. **Prediction error histogram** - Should be centered at 0
+3. **OOD rejection rate** - How often we refuse to predict
+4. **Ensemble disagreement** - Average std across predictions
+5. **L-BFGS success rate** - % of L-BFGS solutions that validate
+
+---
+
+## When to Use SAT vs Pure TPE
+
+| Scenario | Recommendation |
+|----------|----------------|
+| < 100 existing samples | Pure TPE (not enough for good surrogate) |
+| 100-500 samples | SAT Phase 1-2 only (no L-BFGS) |
+| > 500 samples | Full SAT with L-BFGS refinement |
+| High-dimensional (>20 params) | Pure TPE (curse of dimensionality) |
+| Noisy FEA | Pure TPE (surrogates struggle with noise) |
+
+---
+
+## References
+
+- Gaussian Process literature on uncertainty quantification
+- Deep Ensembles: Lakshminarayanan et al. (2017)
+- Bayesian Optimization with Expected Improvement
+- Trust-region methods for constrained optimization
+
+---
+
+*The key insight: A surrogate that knows when it doesn't know is infinitely more valuable than one that's confidently wrong.*
--- a/knowledge_base/lac/session_insights/failure.jsonl
+++ b/knowledge_base/lac/session_insights/failure.jsonl
@@ -3,3 +3,5 @@
 {"timestamp":"2025-12-19T10:00:00","category":"workaround","context":"NX journal execution via cmd /c with environment variables fails silently or produces garbled output. Multiple attempts with cmd /c SET and && chaining failed to capture run_journal.exe output.","insight":"CRITICAL WORKAROUND: When executing NX journals from Claude Code on Windows, use PowerShell with [Environment]::SetEnvironmentVariable() method instead of cmd /c or $env: syntax. The correct pattern is: powershell -Command \"[Environment]::SetEnvironmentVariable('SPLM_LICENSE_SERVER', '28000@dalidou;28000@100.80.199.40', 'Process'); & 'C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe' 'journal.py' -args 'arg1' 'arg2' 2>&1\". The $env: syntax gets corrupted when passed through bash (colon gets interpreted). The cmd /c SET syntax often fails to capture output. This PowerShell pattern reliably sets license server and captures all output.","confidence":1.0,"tags":["nx","powershell","run_journal","license-server","windows","cmd-workaround"],"severity":"high","rule":"ALWAYS use PowerShell with [Environment]::SetEnvironmentVariable() for NX journal execution. NEVER use cmd /c SET or $env: syntax for setting SPLM_LICENSE_SERVER."}
 {"timestamp":"2025-12-19T15:30:00","category":"failure","context":"CMA-ES optimization V7 started with random sample instead of baseline. First trial had whiffle_min=45.73 instead of baseline 62.75, resulting in WS=329 instead of expected ~281.","insight":"CMA-ES with Optuna CmaEsSampler does NOT evaluate x0 (baseline) first - it samples AROUND x0 with sigma0 step size. The x0 parameter only sets the CENTER of the initial sampling distribution, not the first trial. To ensure baseline is evaluated first, use study.enqueue_trial(x0) after creating the study. This is critical for refinement studies where you need to compare against a known-good baseline. Pattern: if len(study.trials) == 0: study.enqueue_trial(x0)","confidence":1.0,"tags":["cma-es","optuna","baseline","x0","enqueue","optimization"],"severity":"high","rule":"When using CmaEsSampler with a known baseline, ALWAYS enqueue the baseline as trial 0 using study.enqueue_trial(x0). The x0 parameter alone does NOT guarantee baseline evaluation."}
 {"timestamp":"2025-12-22T14:00:00","category":"failure","context":"V10 mirror optimization reported impossibly good relative WFE values (40-20=1.99nm instead of ~6nm, 60-20=6.82nm instead of ~13nm). User noticed results were 'too good to be true'.","insight":"CRITICAL BUG IN RELATIVE WFE CALCULATION: The V10 run_optimization.py computed relative WFE as abs(RMS_target - RMS_ref) instead of RMS(WFE_target - WFE_ref). This is mathematically WRONG because |RMS(A) - RMS(B)| ≠ RMS(A - B). The correct approach is to compute the node-by-node WFE difference FIRST, then fit Zernike to the difference field, then compute RMS. The bug gave values 3-4x lower than correct values because the 20° reference had HIGHER absolute WFE than 40°/60°, so the subtraction gave negative values, and abs() hid the problem. The fix is to use extractor.extract_relative() which correctly computes node-by-node differences. Both ZernikeExtractor and ZernikeOPDExtractor now have extract_relative() methods.","confidence":1.0,"tags":["zernike","wfe","relative-wfe","extract_relative","critical-bug","v10"],"severity":"critical","rule":"NEVER compute relative WFE as abs(RMS_target - RMS_ref). ALWAYS use extract_relative() which computes RMS(WFE_target - WFE_ref) by doing node-by-node subtraction first, then Zernike fitting, then RMS."}
+{"timestamp":"2025-12-28T17:30:00","category":"failure","context":"V5 turbo optimization created from scratch instead of copying V4. Multiple critical components were missing or wrong: no license server, wrong extraction keys (filtered_rms_nm vs relative_filtered_rms_nm), wrong mfg_90 key, missing figure_path parameter, incomplete version regex.","insight":"STUDY DERIVATION FAILURE: When creating a new study version (V5 from V4), NEVER rewrite the run_optimization.py from scratch. ALWAYS copy the working version first, then add/modify only the new feature (e.g., L-BFGS polish). Rewriting caused 5 independent bugs: (1) missing LICENSE_SERVER setup, (2) wrong extraction key filtered_rms_nm instead of relative_filtered_rms_nm, (3) wrong mfg_90 key, (4) missing figure_path=None in extractor call, (5) incomplete version regex missing DesigncenterNX pattern. The FEA/extraction pipeline is PROVEN CODE - never rewrite it. Only add new optimization strategies as modules on top.","confidence":1.0,"tags":["study-creation","copy-dont-rewrite","extraction","license-server","v5","critical"],"severity":"critical","rule":"When deriving a new study version, COPY the entire working run_optimization.py first. Add new features as ADDITIONS, not rewrites. The FEA pipeline (license, NXSolver setup, extraction) is proven - never rewrite it."}
+{"timestamp":"2025-12-28T21:30:00","category":"failure","context":"V5 flat back turbo optimization with MLP surrogate + L-BFGS polish. Surrogate predicted WS~280 but actual FEA gave WS~365-377. Error of 85-96 (30%+ relative error). All L-BFGS solutions converged to same fake optimum that didn't exist in reality.","insight":"SURROGATE + L-BFGS FAILURE MODE: Gradient-based optimization on MLP surrogates finds 'fake optima' that don't exist in real FEA. The surrogate has smooth gradients everywhere, but L-BFGS descends to regions OUTSIDE the training distribution where predictions are wildly wrong. V5 results: (1) Best TPE trial: WS=290.18, (2) Best L-BFGS trial: WS=325.27, (3) Worst L-BFGS trials: WS=376.52. The fancy L-BFGS polish made results WORSE than random TPE. Key issues: (a) No uncertainty quantification - can't detect out-of-distribution, (b) No mass constraint in surrogate - L-BFGS finds infeasible designs (122-124kg vs 120kg limit), (c) L-BFGS converges to same bad point from multiple starting locations (trials 31-44 all gave WS=376.52).","confidence":1.0,"tags":["surrogate","mlp","lbfgs","gradient-descent","fake-optima","out-of-distribution","v5","turbo"],"severity":"critical","rule":"NEVER trust gradient descent on surrogates without: (1) Uncertainty quantification to reject OOD predictions, (2) Mass/constraint prediction to enforce feasibility, (3) Trust-region to stay within training distribution. Pure TPE with real FEA often beats surrogate+gradient methods."}
--- a/knowledge_base/lac/session_insights/success_pattern.jsonl
+++ b/knowledge_base/lac/session_insights/success_pattern.jsonl
@@ -5,3 +5,5 @@
 {"timestamp": "2025-12-28T10:15:00", "category": "success_pattern", "context": "Unified trial management with TrialManager and DashboardDB", "insight": "TRIAL MANAGEMENT PATTERN: Use TrialManager for consistent trial_NNNN naming across all optimization methods (Optuna, Turbo, GNN, manual). Key principles: (1) Trial numbers NEVER reset (monotonic), (2) Folders NEVER get overwritten, (3) Database always synced with filesystem, (4) Surrogate predictions are NOT trials - only FEA results. DashboardDB provides Optuna-compatible schema for dashboard integration. Path: optimization_engine/utils/trial_manager.py", "confidence": 0.95, "tags": ["trial_manager", "dashboard_db", "optuna", "trial_naming", "turbo"]}
 {"timestamp": "2025-12-28T10:15:00", "category": "success_pattern", "context": "GNN Turbo training data loading from multiple studies", "insight": "MULTI-STUDY TRAINING: When loading training data from multiple prior studies for GNN surrogate training, param names may have unit prefixes like '[mm]rib_thickness' or '[Degrees]angle'. Strip prefixes: if ']' in name: name = name.split(']', 1)[1]. Also, objective attribute names vary between studies (rel_filtered_rms_40_vs_20 vs obj_rel_filtered_rms_40_vs_20) - use fallback chain with 'or'. V5 successfully trained on 316 samples (V3: 297, V4: 19) with R²=[0.94, 0.94, 0.89, 0.95].", "confidence": 0.9, "tags": ["gnn", "turbo", "training_data", "multi_study", "param_naming"]}
 {"timestamp": "2025-12-28T12:28:04.706624", "category": "success_pattern", "context": "Implemented L-BFGS gradient optimizer for surrogate polish phase", "insight": "L-BFGS on trained MLP surrogates provides 100-1000x faster convergence than derivative-free methods (TPE, CMA-ES) for local refinement. Key: use multi-start from top FEA candidates, not random initialization. Integration: GradientOptimizer class in optimization_engine/gradient_optimizer.py.", "confidence": 0.9, "tags": ["optimization", "lbfgs", "surrogate", "gradient", "polish"]}
+{"timestamp": "2025-12-29T09:30:00", "category": "success_pattern", "context": "V6 pure TPE outperformed V5 surrogate+L-BFGS by 22%", "insight": "SIMPLE BEATS COMPLEX: V6 Pure TPE achieved WS=225.41 vs V5's WS=290.18 (22.3% better). Key insight: surrogates fail when gradient methods descend to OOD regions. Fix: EnsembleSurrogate with (1) N=5 MLPs for disagreement-based uncertainty, (2) OODDetector with KNN+z-score, (3) acquisition_score balancing exploitation+exploration, (4) trust-region L-BFGS that stays in training distribution. Never trust point predictions - always require uncertainty bounds. Protocol: SYS_16_SELF_AWARE_TURBO.md. Code: optimization_engine/surrogates/ensemble_surrogate.py", "confidence": 1.0, "tags": ["ensemble", "uncertainty", "ood", "surrogate", "v6", "tpe", "self-aware"]}
+{"timestamp": "2025-12-29T09:47:47.612485", "category": "success_pattern", "context": "Disk space optimization for FEA studies", "insight": "Per-trial FEA files are ~150MB but only OP2+JSON (~70MB) are essential. PRT/FEM/SIM/DAT are copies of master files and can be deleted after study completion. Archive to dalidou server for long-term storage.", "confidence": 0.95, "tags": ["disk_optimization", "archival", "study_management", "dalidou"], "related_files": ["optimization_engine/utils/study_archiver.py", "docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md"]}
--- a/optimization_engine/nx_solver.py
+++ b/optimization_engine/nx_solver.py
@@ -242,19 +242,28 @@ class NXSolver:
        Format: [unit]name=value
        Example: [mm]whiffle_min=42.5
        """
-        # Default unit mapping (could be extended or made configurable)
+        # Default unit mapping - MUST match NX model expression units exactly
+        # Verified against working turbo V1 runs
        UNIT_MAPPING = {
            # Length parameters (mm)
            'whiffle_min': 'mm',
            'whiffle_triangle_closeness': 'mm',
            'inner_circular_rib_dia': 'mm',
            'outer_circular_rib_offset_from_outer': 'mm',
+            'Pocket_Radius': 'mm',
+            'center_thickness': 'mm',
+            # Lateral pivot/closeness - mm in NX model (verified from V1)
            'lateral_outer_pivot': 'mm',
            'lateral_inner_pivot': 'mm',
            'lateral_middle_pivot': 'mm',
            'lateral_closeness': 'mm',
-            # Angle parameters (degrees)
-            'whiffle_outer_to_vertical': 'Degrees',
+            # Rib/face thickness parameters (mm)
+            'rib_thickness': 'mm',
+            'ribs_circular_thk': 'mm',
+            'rib_thickness_lateral_truss': 'mm',
+            'mirror_face_thickness': 'mm',
+            # Angle parameters (Degrees) - verified from working V1 runs
+            'whiffle_outer_to_vertical': 'Degrees',  # NX expects Degrees (verified V1)
            'lateral_inner_angle': 'Degrees',
            'lateral_outer_angle': 'Degrees',
            'blank_backface_angle': 'Degrees',
--- a/optimization_engine/surrogates/init.py
+++ b/optimization_engine/surrogates/init.py
@@ -0,0 +1,19 @@
+"""
+Surrogate models for FEA acceleration.
+
+Available surrogates:
+- EnsembleSurrogate: Multiple MLPs with uncertainty quantification
+- OODDetector: Out-of-distribution detection
+"""
+
+from .ensemble_surrogate import (
+    EnsembleSurrogate,
+    OODDetector,
+    create_and_train_ensemble
+)
+
+__all__ = [
+    'EnsembleSurrogate',
+    'OODDetector',
+    'create_and_train_ensemble'
+]
--- a/optimization_engine/surrogates/ensemble_surrogate.py
+++ b/optimization_engine/surrogates/ensemble_surrogate.py
@@ -0,0 +1,540 @@
+#!/usr/bin/env python3
+"""
+Ensemble Surrogate with Uncertainty Quantification
+
+Addresses the V5 failure mode where single MLPs gave overconfident predictions
+in out-of-distribution regions, leading L-BFGS to fake optima.
+
+Key features:
+1. Ensemble of N MLPs - disagreement = uncertainty
+2. OOD detection - reject predictions far from training data
+3. Confidence bounds - never trust point predictions alone
+4. Active learning - prioritize FEA in uncertain regions
+
+Author: Atomizer
+Created: 2025-12-28
+"""
+
+import numpy as np
+from typing import Tuple, List, Dict, Optional
+from pathlib import Path
+import json
+import logging
+
+logger = logging.getLogger(__name__)
+
+try:
+    import torch
+    import torch.nn as nn
+    HAS_TORCH = True
+except ImportError:
+    HAS_TORCH = False
+    logger.warning("PyTorch not available - ensemble features limited")
+
+from sklearn.neighbors import NearestNeighbors
+
+
+class MLP(nn.Module):
+    """Single MLP for ensemble."""
+
+    def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = None):
+        super().__init__()
+        hidden_dims = hidden_dims or [64, 32]
+
+        layers = []
+        in_dim = input_dim
+        for h_dim in hidden_dims:
+            layers.append(nn.Linear(in_dim, h_dim))
+            layers.append(nn.ReLU())
+            layers.append(nn.Dropout(0.1))
+            in_dim = h_dim
+        layers.append(nn.Linear(in_dim, output_dim))
+
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class OODDetector:
+    """
+    Out-of-Distribution detector using multiple methods:
+    1. Z-score check (is input within N std of training mean)
+    2. KNN distance (is input close to training points)
+    """
+
+    def __init__(self, X_train: np.ndarray, z_threshold: float = 3.0, knn_k: int = 5):
+        self.X_train = X_train
+        self.z_threshold = z_threshold
+        self.knn_k = knn_k
+
+        # Compute training statistics
+        self.mean = X_train.mean(axis=0)
+        self.std = X_train.std(axis=0) + 1e-8
+
+        # Fit KNN for local density estimation
+        self.knn = NearestNeighbors(n_neighbors=min(knn_k, len(X_train)))
+        self.knn.fit(X_train)
+
+        # Compute typical KNN distances in training set
+        train_distances, _ = self.knn.kneighbors(X_train)
+        self.typical_knn_dist = np.median(train_distances.mean(axis=1))
+
+        logger.info(f"[OOD] Initialized with {len(X_train)} training points")
+        logger.info(f"[OOD] Typical KNN distance: {self.typical_knn_dist:.4f}")
+
+    def z_score_check(self, x: np.ndarray) -> Tuple[bool, float]:
+        """Check if point is within z_threshold std of training mean."""
+        x = np.atleast_2d(x)
+        z_scores = np.abs((x - self.mean) / self.std)
+        max_z = z_scores.max(axis=1)
+        is_ok = max_z < self.z_threshold
+        return is_ok[0] if len(is_ok) == 1 else is_ok, max_z[0] if len(max_z) == 1 else max_z
+
+    def knn_distance_check(self, x: np.ndarray) -> Tuple[bool, float]:
+        """Check if point is close enough to training data."""
+        x = np.atleast_2d(x)
+        distances, _ = self.knn.kneighbors(x)
+        avg_dist = distances.mean(axis=1)
+        # Allow up to 3x typical distance
+        is_ok = avg_dist < 3 * self.typical_knn_dist
+        return is_ok[0] if len(is_ok) == 1 else is_ok, avg_dist[0] if len(avg_dist) == 1 else avg_dist
+
+    def is_in_distribution(self, x: np.ndarray) -> Tuple[bool, Dict]:
+        """Combined OOD check."""
+        z_ok, z_score = self.z_score_check(x)
+        knn_ok, knn_dist = self.knn_distance_check(x)
+
+        is_ok = z_ok and knn_ok
+        details = {
+            'z_score': float(z_score),
+            'z_ok': bool(z_ok),
+            'knn_dist': float(knn_dist),
+            'knn_ok': bool(knn_ok),
+            'in_distribution': bool(is_ok)
+        }
+
+        return is_ok, details
+
+
+class EnsembleSurrogate:
+    """
+    Ensemble of MLPs with uncertainty quantification.
+
+    Key insight: Models trained with different random seeds will agree
+    in well-sampled regions but disagree in extrapolation regions.
+    Disagreement = epistemic uncertainty.
+    """
+
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        n_models: int = 5,
+        hidden_dims: List[int] = None,
+        device: str = 'auto'
+    ):
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.n_models = n_models
+        self.hidden_dims = hidden_dims or [64, 32]
+
+        if device == 'auto':
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        else:
+            self.device = torch.device(device)
+
+        # Create ensemble
+        self.models = [
+            MLP(input_dim, output_dim, hidden_dims).to(self.device)
+            for _ in range(n_models)
+        ]
+
+        # Normalization stats
+        self.x_mean = None
+        self.x_std = None
+        self.y_mean = None
+        self.y_std = None
+
+        # OOD detector
+        self.ood_detector = None
+
+        # Training state
+        self.is_trained = False
+
+        logger.info(f"[ENSEMBLE] Created {n_models} MLPs on {self.device}")
+
+    def train(
+        self,
+        X: np.ndarray,
+        Y: np.ndarray,
+        epochs: int = 500,
+        lr: float = 0.001,
+        val_split: float = 0.1,
+        patience: int = 50
+    ) -> Dict:
+        """Train all models in ensemble with different random seeds."""
+
+        # Compute normalization
+        self.x_mean = X.mean(axis=0)
+        self.x_std = X.std(axis=0) + 1e-8
+        self.y_mean = Y.mean(axis=0)
+        self.y_std = Y.std(axis=0) + 1e-8
+
+        X_norm = (X - self.x_mean) / self.x_std
+        Y_norm = (Y - self.y_mean) / self.y_std
+
+        # Split data
+        n_val = max(int(len(X) * val_split), 5)
+        indices = np.random.permutation(len(X))
+        val_idx, train_idx = indices[:n_val], indices[n_val:]
+
+        X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
+        X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]
+
+        # Convert to tensors
+        X_t = torch.FloatTensor(X_train).to(self.device)
+        Y_t = torch.FloatTensor(Y_train).to(self.device)
+        X_v = torch.FloatTensor(X_val).to(self.device)
+        Y_v = torch.FloatTensor(Y_val).to(self.device)
+
+        # Train each model with different seed
+        all_val_losses = []
+        for i, model in enumerate(self.models):
+            torch.manual_seed(42 + i * 1000)  # Different init per model
+            np.random.seed(42 + i * 1000)
+
+            optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
+            criterion = nn.MSELoss()
+
+            best_val_loss = float('inf')
+            patience_counter = 0
+            best_state = None
+
+            for epoch in range(epochs):
+                # Train
+                model.train()
+                optimizer.zero_grad()
+                pred = model(X_t)
+                loss = criterion(pred, Y_t)
+                loss.backward()
+                optimizer.step()
+
+                # Validate
+                model.eval()
+                with torch.no_grad():
+                    val_pred = model(X_v)
+                    val_loss = criterion(val_pred, Y_v).item()
+
+                # Early stopping
+                if val_loss < best_val_loss:
+                    best_val_loss = val_loss
+                    patience_counter = 0
+                    best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
+                else:
+                    patience_counter += 1
+                    if patience_counter >= patience:
+                        break
+
+            # Restore best
+            if best_state:
+                model.load_state_dict(best_state)
+                model.to(self.device)
+
+            all_val_losses.append(best_val_loss)
+            logger.info(f"[ENSEMBLE] Model {i+1}/{self.n_models} trained, val_loss={best_val_loss:.4f}")
+
+        # Initialize OOD detector
+        self.ood_detector = OODDetector(X_norm)
+
+        self.is_trained = True
+
+        # Compute ensemble metrics
+        metrics = self._compute_metrics(X_val, Y_val)
+        metrics['val_losses'] = all_val_losses
+
+        return metrics
+
+    def _compute_metrics(self, X_val: np.ndarray, Y_val: np.ndarray) -> Dict:
+        """Compute R², MAE, and ensemble disagreement on validation set."""
+        mean, std = self.predict_normalized(X_val)
+
+        # R² for each output
+        ss_res = np.sum((Y_val - mean) ** 2, axis=0)
+        ss_tot = np.sum((Y_val - Y_val.mean(axis=0)) ** 2, axis=0)
+        r2 = 1 - ss_res / (ss_tot + 1e-8)
+
+        # MAE
+        mae = np.abs(Y_val - mean).mean(axis=0)
+
+        # Average ensemble disagreement
+        avg_std = std.mean()
+
+        return {
+            'r2': r2.tolist(),
+            'mae': mae.tolist(),
+            'avg_ensemble_std': float(avg_std),
+            'n_val': len(X_val)
+        }
+
+    def predict_normalized(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """Predict on normalized inputs, return normalized outputs."""
+        X = np.atleast_2d(X)
+        X_t = torch.FloatTensor(X).to(self.device)
+
+        preds = []
+        for model in self.models:
+            model.eval()
+            with torch.no_grad():
+                pred = model(X_t).cpu().numpy()
+            preds.append(pred)
+
+        preds = np.array(preds)  # (n_models, n_samples, n_outputs)
+        mean = preds.mean(axis=0)
+        std = preds.std(axis=0)
+
+        return mean, std
+
+    def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Predict with uncertainty.
+
+        Returns:
+            mean: (n_samples, n_outputs) predicted values
+            std: (n_samples, n_outputs) uncertainty (ensemble disagreement)
+        """
+        X = np.atleast_2d(X)
+
+        # Normalize input
+        X_norm = (X - self.x_mean) / self.x_std
+
+        # Get predictions
+        mean_norm, std_norm = self.predict_normalized(X_norm)
+
+        # Denormalize
+        mean = mean_norm * self.y_std + self.y_mean
+        std = std_norm * self.y_std  # Std scales with y_std
+
+        return mean, std
+
+    def predict_with_confidence(self, X: np.ndarray) -> Dict:
+        """
+        Full prediction with confidence assessment.
+
+        Returns dict with:
+        - mean: predicted values
+        - std: uncertainty
+        - confidence: 0-1 score (higher = more reliable)
+        - in_distribution: OOD check result
+        - recommendation: 'trust', 'verify', or 'reject'
+        """
+        X = np.atleast_2d(X)
+
+        mean, std = self.predict(X)
+
+        # OOD check
+        X_norm = (X - self.x_mean) / self.x_std
+        ood_results = [self.ood_detector.is_in_distribution(x) for x in X_norm]
+        in_distribution = [r[0] for r in ood_results]
+
+        # Compute confidence score (0 = no confidence, 1 = high confidence)
+        # Based on: relative std (lower = better) and OOD (in = better)
+        relative_std = std / (np.abs(mean) + 1e-6)
+        avg_rel_std = relative_std.mean(axis=1)
+
+        confidence = np.zeros(len(X))
+        for i in range(len(X)):
+            if not in_distribution[i]:
+                confidence[i] = 0.0  # OOD = no confidence
+            elif avg_rel_std[i] > 0.3:
+                confidence[i] = 0.2  # High uncertainty
+            elif avg_rel_std[i] > 0.1:
+                confidence[i] = 0.5  # Medium uncertainty
+            else:
+                confidence[i] = 0.9  # Low uncertainty
+
+        # Recommendations
+        recommendations = []
+        for i in range(len(X)):
+            if confidence[i] >= 0.7:
+                recommendations.append('trust')
+            elif confidence[i] >= 0.3:
+                recommendations.append('verify')  # Run FEA to check
+            else:
+                recommendations.append('reject')  # Don't use, run FEA instead
+
+        return {
+            'mean': mean,
+            'std': std,
+            'confidence': confidence,
+            'in_distribution': in_distribution,
+            'recommendation': recommendations
+        }
+
+    def acquisition_score(self, X: np.ndarray, best_so_far: float, xi: float = 0.01) -> np.ndarray:
+        """
+        Expected Improvement acquisition function.
+
+        High score = worth running FEA (either promising or uncertain).
+
+        Args:
+            X: candidate points
+            best_so_far: current best objective value
+            xi: exploration-exploitation tradeoff (higher = more exploration)
+
+        Returns:
+            scores: acquisition score per point
+        """
+        X = np.atleast_2d(X)
+        mean, std = self.predict(X)
+
+        # For minimization: improvement = best - predicted
+        # Take first objective (weighted sum) for acquisition
+        if mean.ndim > 1:
+            mean = mean[:, 0]
+            std = std[:, 0]
+
+        improvement = best_so_far - mean
+
+        # Expected improvement with exploration bonus
+        # Higher std = more exploration value
+        z = improvement / (std + 1e-8)
+
+        # Simple acquisition: exploitation + exploration
+        scores = improvement + xi * std
+
+        # Penalize OOD points
+        X_norm = (X - self.x_mean) / self.x_std
+        for i, x in enumerate(X_norm):
+            is_ok, _ = self.ood_detector.is_in_distribution(x)
+            if not is_ok:
+                scores[i] *= 0.1  # Heavy penalty for OOD
+
+        return scores
+
+    def select_candidates_for_fea(
+        self,
+        candidates: np.ndarray,
+        best_so_far: float,
+        n_select: int = 5,
+        diversity_weight: float = 0.3
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Select diverse, high-acquisition candidates for FEA validation.
+
+        Balances:
+        1. High acquisition score (exploitation + exploration)
+        2. Diversity (don't cluster all candidates together)
+        3. In-distribution (avoid OOD predictions)
+
+        Returns:
+            selected: indices of selected candidates
+            scores: acquisition scores
+        """
+        scores = self.acquisition_score(candidates, best_so_far)
+
+        # Greedy selection with diversity
+        selected = []
+        remaining = list(range(len(candidates)))
+
+        while len(selected) < n_select and remaining:
+            if not selected:
+                # First: pick highest score
+                best_idx = max(remaining, key=lambda i: scores[i])
+            else:
+                # Later: balance score with distance to selected
+                def combined_score(i):
+                    # Min distance to already selected
+                    min_dist = min(
+                        np.linalg.norm(candidates[i] - candidates[j])
+                        for j in selected
+                    )
+                    # Combine acquisition + diversity
+                    return scores[i] + diversity_weight * min_dist
+
+                best_idx = max(remaining, key=combined_score)
+
+            selected.append(best_idx)
+            remaining.remove(best_idx)
+
+        return np.array(selected), scores[selected]
+
+    def save(self, path: Path):
+        """Save ensemble to disk."""
+        path = Path(path)
+        path.mkdir(parents=True, exist_ok=True)
+
+        # Save each model
+        for i, model in enumerate(self.models):
+            torch.save(model.state_dict(), path / f"model_{i}.pt")
+
+        # Save normalization stats and config
+        config = {
+            'input_dim': self.input_dim,
+            'output_dim': self.output_dim,
+            'n_models': self.n_models,
+            'hidden_dims': self.hidden_dims,
+            'x_mean': self.x_mean.tolist() if self.x_mean is not None else None,
+            'x_std': self.x_std.tolist() if self.x_std is not None else None,
+            'y_mean': self.y_mean.tolist() if self.y_mean is not None else None,
+            'y_std': self.y_std.tolist() if self.y_std is not None else None,
+        }
+        with open(path / "config.json", 'w') as f:
+            json.dump(config, f, indent=2)
+
+        logger.info(f"[ENSEMBLE] Saved to {path}")
+
+    @classmethod
+    def load(cls, path: Path, device: str = 'auto') -> 'EnsembleSurrogate':
+        """Load ensemble from disk."""
+        path = Path(path)
+
+        with open(path / "config.json") as f:
+            config = json.load(f)
+
+        surrogate = cls(
+            input_dim=config['input_dim'],
+            output_dim=config['output_dim'],
+            n_models=config['n_models'],
+            hidden_dims=config['hidden_dims'],
+            device=device
+        )
+
+        # Load normalization
+        surrogate.x_mean = np.array(config['x_mean']) if config['x_mean'] else None
+        surrogate.x_std = np.array(config['x_std']) if config['x_std'] else None
+        surrogate.y_mean = np.array(config['y_mean']) if config['y_mean'] else None
+        surrogate.y_std = np.array(config['y_std']) if config['y_std'] else None
+
+        # Load models
+        for i, model in enumerate(surrogate.models):
+            model.load_state_dict(torch.load(path / f"model_{i}.pt", map_location=surrogate.device))
+            model.to(surrogate.device)
+
+        surrogate.is_trained = True
+        logger.info(f"[ENSEMBLE] Loaded from {path}")
+
+        return surrogate
+
+
+# Convenience function for quick usage
+def create_and_train_ensemble(
+    X: np.ndarray,
+    Y: np.ndarray,
+    n_models: int = 5,
+    epochs: int = 500
+) -> EnsembleSurrogate:
+    """Create and train an ensemble surrogate."""
+    surrogate = EnsembleSurrogate(
+        input_dim=X.shape[1],
+        output_dim=Y.shape[1] if Y.ndim > 1 else 1,
+        n_models=n_models
+    )
+
+    if Y.ndim == 1:
+        Y = Y.reshape(-1, 1)
+
+    metrics = surrogate.train(X, Y, epochs=epochs)
+    logger.info(f"[ENSEMBLE] Training complete: R²={metrics['r2']}, avg_std={metrics['avg_ensemble_std']:.4f}")
+
+    return surrogate
--- a/optimization_engine/utils/nx_session_manager.py
+++ b/optimization_engine/utils/nx_session_manager.py
@@ -24,6 +24,7 @@ SESSION_LOCK_DIR = Path(os.environ.get('TEMP', '/tmp')) / 'atomizer_nx_sessions'

 # Default NX installation paths (in order of preference)
 DEFAULT_NX_PATHS = [
+    Path(r"C:\Program Files\Siemens\DesigncenterNX2512\NXBIN\ugraf.exe"),  # DesignCenter (preferred)
    Path(r"C:\Program Files\Siemens\NX2506\NXBIN\ugraf.exe"),
    Path(r"C:\Program Files\Siemens\NX2412\NXBIN\ugraf.exe"),
    Path(r"C:\Program Files\Siemens\Simcenter3D_2506\NXBIN\ugraf.exe"),
--- a/optimization_engine/utils/study_archiver.py
+++ b/optimization_engine/utils/study_archiver.py
@@ -0,0 +1,438 @@
+"""
+Study Archiver - Disk Space Optimization for Atomizer Studies
+
+This module provides utilities for:
+1. Cleaning up completed studies (removing regenerable files)
+2. Archiving studies to remote storage (dalidou server)
+3. Restoring archived studies on-demand
+
+Usage:
+    # Cleanup a completed study (keep only essential files)
+    python -m optimization_engine.utils.study_archiver cleanup studies/M1_Mirror/m1_mirror_V12
+
+    # Archive to remote server
+    python -m optimization_engine.utils.study_archiver archive studies/M1_Mirror/m1_mirror_V12
+
+    # Restore from remote
+    python -m optimization_engine.utils.study_archiver restore m1_mirror_V12
+
+    # Show disk usage analysis
+    python -m optimization_engine.utils.study_archiver analyze studies/M1_Mirror
+"""
+
+import os
+import json
+import shutil
+import tarfile
+import subprocess
+from pathlib import Path
+from datetime import datetime
+from typing import Optional, Dict, List, Tuple
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Configuration
+REMOTE_CONFIG = {
+    "host": "192.168.86.50",  # Local WiFi
+    "host_tailscale": "100.80.199.40",  # Remote via Tailscale
+    "user": "papa",
+    "archive_path": "/srv/storage/atomizer-archive",
+    "ssh_port": 22,
+}
+
+# Files to KEEP per trial (essential for analysis)
+ESSENTIAL_EXTENSIONS = {
+    '.op2',   # Nastran binary results (Zernike extraction)
+    '.json',  # Parameters, results, metadata
+    '.npz',   # Pre-computed Zernike coefficients
+    '.html',  # Generated reports
+    '.png',   # Visualization images
+    '.csv',   # Exported data
+}
+
+# Files to DELETE per trial (regenerable from master + params)
+DELETABLE_EXTENSIONS = {
+    '.prt',   # NX part files (copy of master)
+    '.fem',   # FEM mesh files (copy of master)
+    '.sim',   # Simulation files (copy of master)
+    '.afm',   # Assembly FEM files
+    '.dat',   # Solver input deck (can regenerate)
+    '.f04',   # Nastran output log
+    '.f06',   # Nastran printed output
+    '.log',   # Generic log files
+    '.diag',  # Diagnostic files
+    '.txt',   # Temp text files
+    '.exp',   # Expression files
+    '.bak',   # Backup files
+}
+
+# Folders to always keep entirely
+KEEP_FOLDERS = {
+    '1_setup',           # Master model files (source of truth)
+    '3_results',         # Final results, database, reports
+    'best_design_archive',  # Archived best designs
+}
+
+
+def analyze_study(study_path: Path) -> Dict:
+    """Analyze disk usage of a study folder."""
+    study_path = Path(study_path)
+
+    analysis = {
+        "study_name": study_path.name,
+        "total_size_bytes": 0,
+        "by_extension": {},
+        "by_folder": {},
+        "essential_size": 0,
+        "deletable_size": 0,
+        "trial_count": 0,
+    }
+
+    for f in study_path.rglob("*"):
+        if f.is_file():
+            sz = f.stat().st_size
+            ext = f.suffix.lower()
+
+            analysis["total_size_bytes"] += sz
+            analysis["by_extension"][ext] = analysis["by_extension"].get(ext, 0) + sz
+
+            # Categorize by folder
+            rel_parts = f.relative_to(study_path).parts
+            if rel_parts:
+                folder = rel_parts[0]
+                analysis["by_folder"][folder] = analysis["by_folder"].get(folder, 0) + sz
+
+            # Essential vs deletable
+            if ext in ESSENTIAL_EXTENSIONS:
+                analysis["essential_size"] += sz
+            elif ext in DELETABLE_EXTENSIONS:
+                analysis["deletable_size"] += sz
+
+    # Count trials
+    iterations_dir = study_path / "2_iterations"
+    if iterations_dir.exists():
+        analysis["trial_count"] = len([
+            d for d in iterations_dir.iterdir()
+            if d.is_dir() and (d.name.startswith("trial_") or d.name.startswith("iter"))
+        ])
+
+    return analysis
+
+
+def print_analysis(analysis: Dict):
+    """Print formatted analysis results."""
+    total_gb = analysis["total_size_bytes"] / 1e9
+    essential_gb = analysis["essential_size"] / 1e9
+    deletable_gb = analysis["deletable_size"] / 1e9
+
+    print(f"\n{'='*60}")
+    print(f"Study: {analysis['study_name']}")
+    print(f"{'='*60}")
+    print(f"Total size:     {total_gb:8.2f} GB")
+    print(f"Trials:         {analysis['trial_count']:8d}")
+    print(f"Essential:      {essential_gb:8.2f} GB ({100*essential_gb/total_gb:.1f}%)")
+    print(f"Deletable:      {deletable_gb:8.2f} GB ({100*deletable_gb/total_gb:.1f}%)")
+    print(f"Potential save: {deletable_gb:8.2f} GB")
+
+    print(f"\nBy folder:")
+    for folder, size in sorted(analysis["by_folder"].items(), key=lambda x: -x[1]):
+        print(f"  {folder:25} {size/1e9:8.2f} GB")
+
+    print(f"\nTop extensions:")
+    for ext, size in sorted(analysis["by_extension"].items(), key=lambda x: -x[1])[:10]:
+        status = "[KEEP]" if ext in ESSENTIAL_EXTENSIONS else "[DEL?]" if ext in DELETABLE_EXTENSIONS else "[    ]"
+        print(f"  {status} {ext:10} {size/1e9:8.2f} GB")
+
+
+def cleanup_study(study_path: Path, dry_run: bool = True) -> Tuple[int, int]:
+    """
+    Clean up a completed study by removing regenerable files from trial folders.
+
+    Args:
+        study_path: Path to study folder
+        dry_run: If True, only report what would be deleted
+
+    Returns:
+        (files_deleted, bytes_freed)
+    """
+    study_path = Path(study_path)
+    iterations_dir = study_path / "2_iterations"
+
+    if not iterations_dir.exists():
+        logger.warning(f"No iterations folder found in {study_path}")
+        return 0, 0
+
+    files_to_delete = []
+    bytes_to_free = 0
+
+    # Find all deletable files in trial folders
+    for trial_dir in iterations_dir.iterdir():
+        if not trial_dir.is_dir():
+            continue
+
+        for f in trial_dir.iterdir():
+            if f.is_file() and f.suffix.lower() in DELETABLE_EXTENSIONS:
+                files_to_delete.append(f)
+                bytes_to_free += f.stat().st_size
+
+    if dry_run:
+        print(f"\n[DRY RUN] Would delete {len(files_to_delete)} files, freeing {bytes_to_free/1e9:.2f} GB")
+        print("\nSample files to delete:")
+        for f in files_to_delete[:10]:
+            print(f"  {f.relative_to(study_path)}")
+        if len(files_to_delete) > 10:
+            print(f"  ... and {len(files_to_delete) - 10} more")
+        return 0, 0
+
+    # Actually delete
+    deleted = 0
+    freed = 0
+    for f in files_to_delete:
+        try:
+            sz = f.stat().st_size
+            f.unlink()
+            deleted += 1
+            freed += sz
+        except Exception as e:
+            logger.error(f"Failed to delete {f}: {e}")
+
+    print(f"Deleted {deleted} files, freed {freed/1e9:.2f} GB")
+    return deleted, freed
+
+
+def archive_to_remote(
+    study_path: Path,
+    use_tailscale: bool = False,
+    dry_run: bool = True
+) -> bool:
+    """
+    Archive a study to the remote dalidou server.
+
+    Args:
+        study_path: Path to study folder
+        use_tailscale: Use Tailscale IP (for remote access)
+        dry_run: If True, only report what would be done
+
+    Returns:
+        True if successful
+    """
+    study_path = Path(study_path)
+    study_name = study_path.name
+
+    host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
+    user = REMOTE_CONFIG["user"]
+    remote_path = REMOTE_CONFIG["archive_path"]
+
+    # Create compressed archive locally first
+    archive_name = f"{study_name}_{datetime.now().strftime('%Y%m%d')}.tar.gz"
+    local_archive = study_path.parent / archive_name
+
+    if dry_run:
+        print(f"\n[DRY RUN] Would archive {study_name}")
+        print(f"  1. Create {archive_name}")
+        print(f"  2. Upload to {user}@{host}:{remote_path}/")
+        print(f"  3. Delete local archive")
+        return True
+
+    print(f"Creating archive: {archive_name}")
+    with tarfile.open(local_archive, "w:gz") as tar:
+        tar.add(study_path, arcname=study_name)
+
+    archive_size = local_archive.stat().st_size
+    print(f"Archive size: {archive_size/1e9:.2f} GB")
+
+    # Upload via rsync (more reliable than scp for large files)
+    print(f"Uploading to {host}...")
+
+    # First ensure remote directory exists
+    ssh_cmd = f'ssh {user}@{host} "mkdir -p {remote_path}"'
+    subprocess.run(ssh_cmd, shell=True, check=True)
+
+    # Upload
+    rsync_cmd = f'rsync -avz --progress "{local_archive}" {user}@{host}:{remote_path}/'
+    result = subprocess.run(rsync_cmd, shell=True)
+
+    if result.returncode == 0:
+        print("Upload successful!")
+        # Clean up local archive
+        local_archive.unlink()
+        return True
+    else:
+        print(f"Upload failed with code {result.returncode}")
+        return False
+
+
+def restore_from_remote(
+    study_name: str,
+    target_dir: Path,
+    use_tailscale: bool = False
+) -> bool:
+    """
+    Restore a study from the remote server.
+
+    Args:
+        study_name: Name of the study to restore
+        target_dir: Where to extract the study
+        use_tailscale: Use Tailscale IP
+
+    Returns:
+        True if successful
+    """
+    host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
+    user = REMOTE_CONFIG["user"]
+    remote_path = REMOTE_CONFIG["archive_path"]
+
+    target_dir = Path(target_dir)
+
+    # Find the archive on remote
+    print(f"Looking for {study_name} on {host}...")
+
+    ssh_cmd = f'ssh {user}@{host} "ls {remote_path}/{study_name}*.tar.gz 2>/dev/null | head -1"'
+    result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
+
+    if not result.stdout.strip():
+        print(f"No archive found for {study_name}")
+        return False
+
+    remote_archive = result.stdout.strip()
+    local_archive = target_dir / Path(remote_archive).name
+
+    print(f"Downloading: {remote_archive}")
+    rsync_cmd = f'rsync -avz --progress {user}@{host}:"{remote_archive}" "{local_archive}"'
+    result = subprocess.run(rsync_cmd, shell=True)
+
+    if result.returncode != 0:
+        print("Download failed")
+        return False
+
+    print("Extracting...")
+    with tarfile.open(local_archive, "r:gz") as tar:
+        tar.extractall(target_dir)
+
+    # Clean up
+    local_archive.unlink()
+    print(f"Restored to {target_dir / study_name}")
+    return True
+
+
+def list_remote_archives(use_tailscale: bool = False) -> List[Dict]:
+    """List all archived studies on the remote server."""
+    host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
+    user = REMOTE_CONFIG["user"]
+    remote_path = REMOTE_CONFIG["archive_path"]
+
+    ssh_cmd = f'ssh {user}@{host} "ls -lh {remote_path}/*.tar.gz 2>/dev/null"'
+    result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
+
+    archives = []
+    for line in result.stdout.strip().split('\n'):
+        if line and '.tar.gz' in line:
+            parts = line.split()
+            if len(parts) >= 9:
+                archives.append({
+                    "name": parts[-1].split('/')[-1],
+                    "size": parts[4],
+                    "date": f"{parts[5]} {parts[6]} {parts[7]}",
+                })
+
+    return archives
+
+
+def analyze_all_studies(studies_dir: Path) -> Dict:
+    """Analyze all studies in a directory."""
+    studies_dir = Path(studies_dir)
+
+    total_analysis = {
+        "total_size": 0,
+        "total_essential": 0,
+        "total_deletable": 0,
+        "studies": [],
+    }
+
+    for study in sorted(studies_dir.iterdir()):
+        if study.is_dir() and not study.name.startswith('.'):
+            analysis = analyze_study(study)
+            total_analysis["studies"].append(analysis)
+            total_analysis["total_size"] += analysis["total_size_bytes"]
+            total_analysis["total_essential"] += analysis["essential_size"]
+            total_analysis["total_deletable"] += analysis["deletable_size"]
+
+    return total_analysis
+
+
+def main():
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Atomizer Study Archiver")
+    parser.add_argument("command", choices=["analyze", "cleanup", "archive", "restore", "list"])
+    parser.add_argument("path", nargs="?", help="Study path or name")
+    parser.add_argument("--dry-run", action="store_true", default=True,
+                        help="Don't actually delete/transfer (default: True)")
+    parser.add_argument("--execute", action="store_true",
+                        help="Actually perform the operation")
+    parser.add_argument("--tailscale", action="store_true",
+                        help="Use Tailscale IP for remote access")
+
+    args = parser.parse_args()
+
+    dry_run = not args.execute
+
+    if args.command == "analyze":
+        if not args.path:
+            print("Usage: study_archiver analyze <path>")
+            return
+
+        path = Path(args.path)
+        if path.is_dir():
+            # Check if it's a single study or a collection
+            if (path / "optimization_config.json").exists() or (path / "1_setup").exists():
+                # Single study
+                analysis = analyze_study(path)
+                print_analysis(analysis)
+            else:
+                # Collection of studies
+                total = analyze_all_studies(path)
+                print(f"\n{'='*60}")
+                print(f"Summary: {len(total['studies'])} studies")
+                print(f"{'='*60}")
+                print(f"Total size:     {total['total_size']/1e9:8.2f} GB")
+                print(f"Essential:      {total['total_essential']/1e9:8.2f} GB")
+                print(f"Deletable:      {total['total_deletable']/1e9:8.2f} GB")
+                print(f"Potential save: {total['total_deletable']/1e9:8.2f} GB")
+                print(f"\nPer study:")
+                for s in total["studies"]:
+                    print(f"  {s['study_name']:40} {s['total_size_bytes']/1e9:6.2f} GB ({s['trial_count']:3d} trials)")
+
+    elif args.command == "cleanup":
+        if not args.path:
+            print("Usage: study_archiver cleanup <study_path> [--execute]")
+            return
+        cleanup_study(Path(args.path), dry_run=dry_run)
+
+    elif args.command == "archive":
+        if not args.path:
+            print("Usage: study_archiver archive <study_path> [--execute] [--tailscale]")
+            return
+        archive_to_remote(Path(args.path), use_tailscale=args.tailscale, dry_run=dry_run)
+
+    elif args.command == "restore":
+        if not args.path:
+            print("Usage: study_archiver restore <study_name> [--tailscale]")
+            return
+        target = Path.cwd() / "studies"
+        restore_from_remote(args.path, target, use_tailscale=args.tailscale)
+
+    elif args.command == "list":
+        archives = list_remote_archives(use_tailscale=args.tailscale)
+        if archives:
+            print(f"\nArchived studies on dalidou:")
+            print(f"{'='*60}")
+            for a in archives:
+                print(f"  {a['name']:40} {a['size']:>8}  {a['date']}")
+        else:
+            print("No archives found (or server not reachable)")
+
+
+if __name__ == "__main__":
+    main()
--- a/optimization_engine/utils/study_cleanup.py
+++ b/optimization_engine/utils/study_cleanup.py
@@ -0,0 +1,411 @@
+"""
+Study Cleanup Utility
+====================
+
+Cleans up completed optimization studies to save disk space by removing
+large intermediate files (NX models, FEM meshes, solver results) while
+preserving essential data (parameters, extracted results, database).
+
+Usage:
+    python -m optimization_engine.utils.study_cleanup <study_path> [options]
+
+Options:
+    --dry-run       Show what would be deleted without actually deleting
+    --keep-best N   Keep iteration folders for the top N best trials
+    --keep-pareto   Keep all Pareto-optimal iterations (for multi-objective)
+    --aggressive    Delete ALL iteration data (only keep DB and config)
+
+The database (study.db) contains all optimization results and can regenerate
+any analysis. The original NX model in 1_setup is always preserved.
+"""
+
+import argparse
+import json
+import shutil
+import sqlite3
+from pathlib import Path
+from typing import Optional
+
+
+# Files to ALWAYS keep in iteration folders (tiny, essential)
+ESSENTIAL_FILES = {
+    'params.exp',           # Design parameters for this iteration
+    '_temp_mass.txt',       # Extracted mass
+    '_temp_part_properties.json',  # Part properties
+    '_temp_zernike.json',   # Zernike coefficients (if exists)
+    'results.json',         # Any extracted results
+}
+
+# Extensions to DELETE (large, regenerable/already extracted)
+DELETABLE_EXTENSIONS = {
+    '.op2',   # Nastran binary results (~65 MB each)
+    '.prt',   # NX Part files (~30-35 MB each)
+    '.fem',   # FEM mesh files (~15 MB each)
+    '.dat',   # Nastran input deck (~15 MB each)
+    '.sim',   # Simulation file (~7 MB each)
+    '.afm',   # FEA auxiliary (~4 MB each)
+    '.f04',   # Nastran log
+    '.f06',   # Nastran output
+    '.log',   # Solver log
+    '.diag',  # Diagnostics
+}
+
+
+def get_study_info(study_path: Path) -> dict:
+    """Get study metadata from config and database."""
+    config_path = study_path / 'optimization_config.json'
+    # Try both possible DB locations
+    db_path = study_path / '3_results' / 'study.db'
+    if not db_path.exists():
+        db_path = study_path / '2_results' / 'study.db'
+
+    info = {
+        'name': study_path.name,
+        'has_config': config_path.exists(),
+        'has_db': db_path.exists(),
+        'trial_count': 0,
+        'best_trials': [],
+        'pareto_trials': [],
+    }
+
+    if config_path.exists():
+        with open(config_path) as f:
+            info['config'] = json.load(f)
+
+    if db_path.exists():
+        conn = sqlite3.connect(db_path)
+        cursor = conn.cursor()
+
+        # Get trial count
+        cursor.execute("SELECT COUNT(*) FROM trials WHERE state = 'COMPLETE'")
+        info['trial_count'] = cursor.fetchone()[0]
+
+        # Try to get best trials (for single objective)
+        try:
+            cursor.execute("""
+                SELECT trial_id, value FROM trial_values
+                WHERE objective = 0
+                ORDER BY value ASC LIMIT 10
+            """)
+            info['best_trials'] = [row[0] for row in cursor.fetchall()]
+        except Exception as e:
+            pass
+
+        # Check for Pareto attribute
+        try:
+            cursor.execute("""
+                SELECT DISTINCT trial_id FROM trial_system_attrs
+                WHERE key = 'pareto_optimal' AND value = '1'
+            """)
+            info['pareto_trials'] = [row[0] for row in cursor.fetchall()]
+        except:
+            pass
+
+        conn.close()
+
+    return info
+
+
+def calculate_cleanup_savings(study_path: Path, keep_iters: set = None) -> dict:
+    """Calculate how much space would be saved by cleanup."""
+    iterations_path = study_path / '2_iterations'
+    if not iterations_path.exists():
+        iterations_path = study_path / '1_working'  # Legacy structure
+
+    if not iterations_path.exists():
+        return {'total_size': 0, 'deletable_size': 0, 'keep_size': 0}
+
+    total_size = 0
+    deletable_size = 0
+    keep_size = 0
+    keep_iters = keep_iters or set()
+
+    for iter_folder in iterations_path.iterdir():
+        if not iter_folder.is_dir():
+            continue
+
+        # Extract iteration number
+        try:
+            iter_num = int(iter_folder.name.replace('iter', ''))
+        except:
+            continue
+
+        for f in iter_folder.iterdir():
+            if not f.is_file():
+                continue
+            size = f.stat().st_size
+            total_size += size
+
+            # Keep entire folder if in keep_iters
+            if iter_num in keep_iters:
+                keep_size += size
+                continue
+
+            # Keep essential files
+            if f.name.lower() in {e.lower() for e in ESSENTIAL_FILES}:
+                keep_size += size
+            elif f.suffix.lower() in DELETABLE_EXTENSIONS:
+                deletable_size += size
+            else:
+                keep_size += size  # Keep unknown files by default
+
+    return {
+        'total_size': total_size,
+        'deletable_size': deletable_size,
+        'keep_size': keep_size,
+    }
+
+
+def cleanup_study(
+    study_path: Path,
+    dry_run: bool = True,
+    keep_best: int = 0,
+    keep_pareto: bool = False,
+    aggressive: bool = False,
+) -> dict:
+    """
+    Clean up a study to save disk space.
+
+    Args:
+        study_path: Path to study folder
+        dry_run: If True, only report what would be deleted
+        keep_best: Number of best iterations to keep completely
+        keep_pareto: Keep all Pareto-optimal iterations
+        aggressive: Delete ALL iteration folders (only keep DB)
+
+    Returns:
+        dict with cleanup statistics
+    """
+    study_path = Path(study_path)
+    if not study_path.exists():
+        raise ValueError(f"Study path does not exist: {study_path}")
+
+    # Get study info
+    info = get_study_info(study_path)
+
+    # Determine which iterations to keep
+    keep_iters = set()
+    if keep_best > 0 and info['best_trials']:
+        keep_iters.update(info['best_trials'][:keep_best])
+    if keep_pareto and info['pareto_trials']:
+        keep_iters.update(info['pareto_trials'])
+
+    # Find iterations folder
+    iterations_path = study_path / '2_iterations'
+    if not iterations_path.exists():
+        iterations_path = study_path / '1_working'
+
+    if not iterations_path.exists():
+        return {'status': 'no_iterations', 'deleted_bytes': 0, 'deleted_files': 0}
+
+    # Calculate savings
+    savings = calculate_cleanup_savings(study_path, keep_iters)
+
+    deleted_bytes = 0
+    deleted_files = 0
+    deleted_folders = 0
+
+    if aggressive:
+        # Delete entire iterations folder
+        if not dry_run:
+            shutil.rmtree(iterations_path)
+            deleted_bytes = savings['total_size']
+            deleted_folders = 1
+        else:
+            deleted_bytes = savings['total_size']
+    else:
+        # Selective cleanup
+        for iter_folder in iterations_path.iterdir():
+            if not iter_folder.is_dir():
+                continue
+
+            # Extract iteration number
+            try:
+                iter_num = int(iter_folder.name.replace('iter', ''))
+            except:
+                continue
+
+            # Skip kept iterations
+            if iter_num in keep_iters:
+                continue
+
+            for f in iter_folder.iterdir():
+                if not f.is_file():
+                    continue
+
+                # Keep essential files
+                if f.name.lower() in {e.lower() for e in ESSENTIAL_FILES}:
+                    continue
+
+                # Delete deletable extensions
+                if f.suffix.lower() in DELETABLE_EXTENSIONS:
+                    size = f.stat().st_size
+                    if not dry_run:
+                        f.unlink()
+                    deleted_bytes += size
+                    deleted_files += 1
+
+    return {
+        'status': 'dry_run' if dry_run else 'completed',
+        'study_name': info['name'],
+        'trial_count': info['trial_count'],
+        'kept_iterations': list(keep_iters),
+        'total_size_before': savings['total_size'],
+        'deleted_bytes': deleted_bytes,
+        'deleted_files': deleted_files,
+        'deleted_folders': deleted_folders,
+        'space_saved_gb': deleted_bytes / (1024**3),
+    }
+
+
+def cleanup_batch(
+    parent_path: Path,
+    pattern: str = "*",
+    dry_run: bool = True,
+    keep_best: int = 3,
+    keep_pareto: bool = False,
+    aggressive: bool = False,
+) -> list:
+    """
+    Clean up multiple studies matching a pattern.
+
+    Args:
+        parent_path: Parent directory containing studies
+        pattern: Glob pattern to match study folders (e.g., "m1_mirror_*")
+        dry_run: If True, only report
+        keep_best: Keep N best iterations per study
+        keep_pareto: Keep Pareto-optimal iterations
+        aggressive: Delete all iteration folders
+
+    Returns:
+        List of cleanup results
+    """
+    parent_path = Path(parent_path)
+    results = []
+
+    for study_path in sorted(parent_path.glob(pattern)):
+        if not study_path.is_dir():
+            continue
+        # Check if it looks like a study (has iterations folder)
+        if not (study_path / '2_iterations').exists() and not (study_path / '1_working').exists():
+            continue
+
+        try:
+            result = cleanup_study(
+                study_path,
+                dry_run=dry_run,
+                keep_best=keep_best,
+                keep_pareto=keep_pareto,
+                aggressive=aggressive,
+            )
+            results.append(result)
+        except Exception as e:
+            results.append({
+                'study_name': study_path.name,
+                'status': 'error',
+                'error': str(e),
+            })
+
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Clean up completed optimization studies to save disk space.',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__
+    )
+    parser.add_argument('study_path', type=Path, help='Path to study folder or parent directory')
+    parser.add_argument('--dry-run', action='store_true', default=True,
+                       help='Show what would be deleted without deleting (default)')
+    parser.add_argument('--execute', action='store_true',
+                       help='Actually delete files (opposite of --dry-run)')
+    parser.add_argument('--keep-best', type=int, default=3,
+                       help='Keep N best iterations completely (default: 3)')
+    parser.add_argument('--keep-pareto', action='store_true',
+                       help='Keep all Pareto-optimal iterations')
+    parser.add_argument('--aggressive', action='store_true',
+                       help='Delete ALL iteration data (only keep DB)')
+    parser.add_argument('--batch', type=str, metavar='PATTERN',
+                       help='Clean multiple studies matching pattern (e.g., "m1_mirror_*")')
+
+    args = parser.parse_args()
+
+    dry_run = not args.execute
+
+    if args.batch:
+        # Batch cleanup mode
+        print(f"\n{'='*60}")
+        print(f"BATCH CLEANUP: {args.study_path}")
+        print(f"Pattern: {args.batch}")
+        print(f"{'='*60}")
+        print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}")
+
+        results = cleanup_batch(
+            args.study_path,
+            pattern=args.batch,
+            dry_run=dry_run,
+            keep_best=args.keep_best,
+            keep_pareto=args.keep_pareto,
+            aggressive=args.aggressive,
+        )
+
+        print(f"\n{'='*60}")
+        print("BATCH RESULTS")
+        print(f"{'='*60}")
+        print(f"{'Study':<45} {'Trials':>7} {'Size':>8} {'Savings':>8}")
+        print("-" * 75)
+
+        total_saved = 0
+        for r in results:
+            if r.get('status') == 'error':
+                print(f"{r['study_name']:<45} ERROR: {r.get('error', 'Unknown')}")
+            else:
+                saved = r.get('space_saved_gb', 0)
+                total_saved += saved
+                print(f"{r['study_name']:<45} {r.get('trial_count', 0):>7} "
+                      f"{r.get('total_size_before', 0)/(1024**3):>7.1f}G {saved:>7.1f}G")
+
+        print("-" * 75)
+        print(f"{'TOTAL SAVINGS:':<45} {' '*15} {total_saved:>7.1f}G")
+
+        if dry_run:
+            print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
+
+        return results
+
+    else:
+        # Single study cleanup
+        print(f"\n{'='*60}")
+        print(f"STUDY CLEANUP: {args.study_path.name}")
+        print(f"{'='*60}")
+        print(f"Mode: {'DRY RUN (no files deleted)' if dry_run else 'EXECUTE (files WILL be deleted)'}")
+        print(f"Keep best: {args.keep_best} iterations")
+        print(f"Keep Pareto: {args.keep_pareto}")
+        print(f"Aggressive: {args.aggressive}")
+
+        result = cleanup_study(
+            args.study_path,
+            dry_run=dry_run,
+            keep_best=args.keep_best,
+            keep_pareto=args.keep_pareto,
+            aggressive=args.aggressive,
+        )
+
+        print(f"\n{'='*60}")
+        print("RESULTS")
+        print(f"{'='*60}")
+        print(f"Trials in study: {result['trial_count']}")
+        print(f"Iterations kept: {len(result['kept_iterations'])} {result['kept_iterations'][:5]}{'...' if len(result['kept_iterations']) > 5 else ''}")
+        print(f"Total size before: {result['total_size_before'] / (1024**3):.2f} GB")
+        print(f"{'Would delete' if dry_run else 'Deleted'}: {result['deleted_files']} files")
+        print(f"Space {'to save' if dry_run else 'saved'}: {result['space_saved_gb']:.2f} GB")
+
+        if dry_run:
+            print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
+
+        return result
+
+
+if __name__ == '__main__':
+    main()
--- a/run_cleanup.py
+++ b/run_cleanup.py
@@ -0,0 +1,67 @@
+"""Run cleanup excluding protected studies."""
+import sys
+from pathlib import Path
+
+# Add project root to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from optimization_engine.utils.study_cleanup import cleanup_study, get_study_info
+
+m1_dir = Path(r"C:\Users\antoi\Atomizer\studies\M1_Mirror")
+
+# Studies to SKIP (user requested)
+skip_patterns = [
+    "cost_reduction_V10",
+    "cost_reduction_V11",
+    "cost_reduction_V12",
+    "flat_back",
+]
+
+# Parse args
+dry_run = "--execute" not in sys.argv
+keep_best = 5
+
+total_saved = 0
+studies_to_clean = []
+
+print("=" * 75)
+print(f"CLEANUP (excluding V10-V12 and flat_back studies)")
+print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}")
+print("=" * 75)
+print(f"{'Study':<45} {'Trials':>7} {'Size':>8} {'Savings':>8}")
+print("-" * 75)
+
+for study_path in sorted(m1_dir.iterdir()):
+    if not study_path.is_dir():
+        continue
+    # Check if has iterations
+    if not (study_path / "2_iterations").exists():
+        continue
+
+    # Skip protected studies
+    skip = False
+    for pattern in skip_patterns:
+        if pattern in study_path.name:
+            skip = True
+            break
+
+    if skip:
+        info = get_study_info(study_path)
+        print(f"{study_path.name:<45} {info['trial_count']:>7}   SKIPPED")
+        continue
+
+    # This study will be cleaned
+    result = cleanup_study(study_path, dry_run=dry_run, keep_best=keep_best)
+    saved = result["space_saved_gb"]
+    total_saved += saved
+    status = "would save" if dry_run else "saved"
+    print(f"{study_path.name:<45} {result['trial_count']:>7} {result['total_size_before']/(1024**3):>7.1f}G {saved:>7.1f}G")
+    studies_to_clean.append(study_path.name)
+
+print("-" * 75)
+print(f"{'TOTAL SAVINGS:':<45} {' '*15} {total_saved:>7.1f}G")
+
+if dry_run:
+    print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
+else:
+    print(f"\n[OK] Cleanup complete! Freed {total_saved:.1f} GB")
--- a/tools/archive_study.bat
+++ b/tools/archive_study.bat
@@ -0,0 +1,32 @@
+@echo off
+REM Atomizer Study Archiver - Convenience Script
+REM Usage: archive_study.bat <command> [study_path]
+REM
+REM Commands:
+REM   analyze   - Show disk usage analysis
+REM   cleanup   - Remove regenerable files (dry run by default)
+REM   archive   - Archive to dalidou server
+REM   list      - List archived studies on server
+REM
+REM Examples:
+REM   archive_study.bat analyze studies\M1_Mirror
+REM   archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
+REM   archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
+
+cd /d C:\Users\antoi\Atomizer
+
+if "%1"=="" (
+    echo Usage: archive_study.bat ^<command^> [path] [options]
+    echo.
+    echo Commands:
+    echo   analyze ^<path^>              - Analyze disk usage
+    echo   cleanup ^<study^> [--execute] - Remove regenerable files
+    echo   archive ^<study^> [--execute] - Archive to dalidou
+    echo   restore ^<name^>              - Restore from dalidou
+    echo   list                         - List remote archives
+    echo.
+    echo Add --tailscale for remote access via Tailscale
+    exit /b 1
+)
+
+C:\Users\antoi\anaconda3\envs\atomizer\python.exe -m optimization_engine.utils.study_archiver %*
--- a/tools/zernike_html_generator.py
+++ b/tools/zernike_html_generator.py
@@ -8,6 +8,11 @@ Generates 3 interactive HTML reports for Zernike wavefront analysis:
 2. 60° vs 20° (relative) - Operational angle comparison
 3. 90° (Manufacturing) - Absolute with manufacturing metrics

+Uses the rigorous OPD method from extract_zernike_figure.py which:
+- Accounts for lateral (X, Y) displacement via interpolation
+- Uses actual mesh geometry as reference (no shape assumptions)
+- Provides more accurate WFE for mirror optimization
+
 Usage:
  conda activate atomizer
  python zernike_html_generator.py "path/to/solution.op2"
@@ -23,6 +28,7 @@ Output:

 Author: Atomizer
 Created: 2025-12-19
+Updated: 2025-12-28 - Upgraded to use rigorous OPD method
 """

 import sys
@@ -49,6 +55,15 @@ except ImportError as e:
    print("Run: conda activate atomizer")
    sys.exit(1)

+# Import the rigorous OPD extractor
+try:
+    from optimization_engine.extractors.extract_zernike_figure import ZernikeOPDExtractor
+    USE_OPD_METHOD = True
+    print("[INFO] Using rigorous OPD method (accounts for lateral displacement)")
+except ImportError:
+    USE_OPD_METHOD = False
+    print("[WARN] OPD extractor not available, falling back to simple Z-only method")
+

 # ============================================================================
 # Configuration
@@ -278,13 +293,31 @@ def compute_rms_metrics(X, Y, W_nm):


 def compute_mfg_metrics(coeffs):
-    """Manufacturing aberration magnitudes."""
+    """Manufacturing aberration magnitudes from Zernike coefficients.
+
+    Noll indexing (1-based): J1=Piston, J2=TiltX, J3=TiltY, J4=Defocus,
+    J5=Astig45, J6=Astig0, J7=ComaX, J8=ComaY, J9=TrefoilX, J10=TrefoilY, J11=Spherical
+
+    Python 0-indexed: coeffs[0]=J1, coeffs[3]=J4, etc.
+    """
+    # Individual mode magnitudes (RSS for paired modes)
+    defocus = float(abs(coeffs[3]))  # J4
+    astigmatism = float(np.sqrt(coeffs[4]**2 + coeffs[5]**2))  # RSS(J5, J6)
+    coma = float(np.sqrt(coeffs[6]**2 + coeffs[7]**2))  # RSS(J7, J8)
+    trefoil = float(np.sqrt(coeffs[8]**2 + coeffs[9]**2))  # RSS(J9, J10)
+    spherical = float(abs(coeffs[10])) if len(coeffs) > 10 else 0.0  # J11
+
+    # RMS of higher-order terms (J4+): sqrt(sum of squares of coefficients)
+    # This is the proper Zernike-coefficient-based RMS excluding piston/tip/tilt
+    higher_order_rms = float(np.sqrt(np.sum(coeffs[3:]**2)))
+
    return {
-        'defocus_nm': float(abs(coeffs[3])),
-        'astigmatism_rms': float(np.sqrt(coeffs[4]**2 + coeffs[5]**2)),
-        'coma_rms': float(np.sqrt(coeffs[6]**2 + coeffs[7]**2)),
-        'trefoil_rms': float(np.sqrt(coeffs[8]**2 + coeffs[9]**2)),
-        'spherical_nm': float(abs(coeffs[10])) if len(coeffs) > 10 else 0.0,
+        'defocus_nm': defocus,
+        'astigmatism_rms': astigmatism,
+        'coma_rms': coma,
+        'trefoil_rms': trefoil,
+        'spherical_nm': spherical,
+        'higher_order_rms': higher_order_rms,  # RMS of all J4+ coefficients
    }


@@ -502,19 +535,22 @@ def generate_html(
            ], align="left", fill_color='#374151', font=dict(color='white'))
        ), row=3, col=1)

-        # Pre-correction (row 4)
+        # Pre-correction (row 4) - Aberrations to polish out (90° - 20°)
+        # Shows what correction is needed when manufacturing at 90° to achieve 20° figure
        fig.add_trace(go.Table(
-            header=dict(values=["<b>Mode</b>", "<b>Correction (nm)</b>"],
+            header=dict(values=["<b>Aberration</b>", "<b>Magnitude (nm)</b>"],
                       align="left", fill_color='#1f2937', font=dict(color='white')),
            cells=dict(values=[
-                ["Total RMS (J1-J3 filter)",
-                 "Defocus (J4)",
+                ["Defocus (J4)",
                 "Astigmatism (J5+J6)",
-                 "Coma (J7+J8)"],
-                [f"{correction_metrics['rms_filter_j1to3']:.2f}",
-                 f"{correction_metrics['defocus_nm']:.2f}",
+                 "Coma (J7+J8)",
+                 "Trefoil (J9+J10)",
+                 "Spherical (J11)"],
+                [f"{correction_metrics['defocus_nm']:.2f}",
                 f"{correction_metrics['astigmatism_rms']:.2f}",
-                 f"{correction_metrics['coma_rms']:.2f}"]
+                 f"{correction_metrics['coma_rms']:.2f}",
+                 f"{correction_metrics['trefoil_rms']:.2f}",
+                 f"{correction_metrics['spherical_nm']:.2f}"]
            ], align="left", fill_color='#374151', font=dict(color='white'))
        ), row=4, col=1)
    else:
@@ -595,8 +631,248 @@ def find_op2_file(working_dir=None):
    return max(op2_files, key=lambda p: p.stat().st_mtime)


+def main_opd(op2_path: Path):
+    """Generate all 3 HTML files using rigorous OPD method."""
+    print("=" * 70)
+    print("  ATOMIZER ZERNIKE HTML GENERATOR (OPD METHOD)")
+    print("=" * 70)
+    print(f"\nOP2 File: {op2_path.name}")
+    print(f"Directory: {op2_path.parent}")
+    print("\n[INFO] Using OPD method: accounts for lateral (X,Y) displacement")
+
+    # Initialize extractor
+    extractor = ZernikeOPDExtractor(
+        op2_path,
+        displacement_unit='mm',
+        n_modes=N_MODES,
+        filter_orders=FILTER_LOW_ORDERS
+    )
+
+    print(f"\nAvailable subcases: {list(extractor.displacements.keys())}")
+
+    # Map subcases (try common patterns)
+    displacements = extractor.displacements
+    subcase_map = {}
+
+    if '1' in displacements and '2' in displacements:
+        subcase_map = {'90': '1', '20': '2', '40': '3', '60': '4'}
+    elif '90' in displacements and '20' in displacements:
+        subcase_map = {'90': '90', '20': '20', '40': '40', '60': '60'}
+    else:
+        available = sorted(displacements.keys(), key=lambda x: int(x) if x.isdigit() else 0)
+        if len(available) >= 4:
+            subcase_map = {'90': available[0], '20': available[1], '40': available[2], '60': available[3]}
+            print(f"[WARN] Using mapped subcases: {subcase_map}")
+        else:
+            print(f"[ERROR] Need 4 subcases, found: {available}")
+            return
+
+    output_dir = op2_path.parent
+    base = op2_path.stem
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    html_files = []
+
+    # ========================================================================
+    # Extract absolute metrics for each subcase
+    # ========================================================================
+    print("\nExtracting absolute metrics (OPD method)...")
+
+    results_abs = {}
+    for angle, label in subcase_map.items():
+        result = extractor.extract_subcase(label, include_coefficients=True)
+        results_abs[angle] = result
+        lat_disp = result.get('max_lateral_displacement_um', 0)
+        print(f"  {angle} deg: Filtered RMS = {result['filtered_rms_nm']:.2f} nm, "
+              f"Lateral disp max = {lat_disp:.3f} um")
+
+    # ========================================================================
+    # Extract relative metrics (40-20, 60-20, 90-20)
+    # ========================================================================
+    print("\nExtracting relative metrics (OPD method)...")
+
+    # 40 vs 20
+    result_40_rel = extractor.extract_relative(subcase_map['40'], subcase_map['20'], include_coefficients=True)
+    print(f"  40-20: Relative Filtered RMS = {result_40_rel['relative_filtered_rms_nm']:.2f} nm")
+
+    # 60 vs 20
+    result_60_rel = extractor.extract_relative(subcase_map['60'], subcase_map['20'], include_coefficients=True)
+    print(f"  60-20: Relative Filtered RMS = {result_60_rel['relative_filtered_rms_nm']:.2f} nm")
+
+    # 90 vs 20 (for correction metrics)
+    result_90_rel = extractor.extract_relative(subcase_map['90'], subcase_map['20'], include_coefficients=True)
+    print(f"  90-20: Relative Filtered RMS = {result_90_rel['relative_filtered_rms_nm']:.2f} nm")
+
+    # ========================================================================
+    # Generate HTML files
+    # ========================================================================
+
+    # Helper to convert OPD results to the format expected by generate_html
+    def opd_to_rms_data(result, is_relative=False):
+        """Convert OPD extractor result to rms_data dict for generate_html."""
+        coeffs = np.array(result.get('coefficients', [0] * N_MODES))
+
+        # Recompute filtered residuals for visualization
+        # For now, use simplified approach - the metrics are correct
+        filtered_rms = result.get('relative_filtered_rms_nm' if is_relative else 'filtered_rms_nm', 0)
+        global_rms = result.get('relative_global_rms_nm' if is_relative else 'global_rms_nm', 0)
+        rms_j1to3 = result.get('relative_rms_filter_j1to3' if is_relative else 'rms_filter_j1to3_nm', 0)
+
+        # We need W_res_filt for visualization - extract from diagnostic data
+        # For now, create a placeholder that will be updated
+        return {
+            'coefficients': coeffs,
+            'R': 1.0,  # Will be updated
+            'global_rms': global_rms,
+            'filtered_rms': filtered_rms,
+            'rms_filter_j1to3': rms_j1to3,
+            'W_res_filt': None,  # Will compute separately for visualization
+        }
+
+    # For visualization, we need the actual WFE arrays
+    # Get diagnostic data from extractor
+    print("\nGenerating HTML reports...")
+
+    # 40 vs 20
+    print("  Generating 40 deg vs 20 deg...")
+    opd_40 = extractor._build_figure_opd_data(subcase_map['40'])
+    opd_20 = extractor._build_figure_opd_data(subcase_map['20'])
+
+    # Build relative WFE arrays
+    ref_wfe_map = {int(nid): wfe for nid, wfe in zip(opd_20['node_ids'], opd_20['wfe_nm'])}
+    X_40_rel, Y_40_rel, WFE_40_rel = [], [], []
+    for i, nid in enumerate(opd_40['node_ids']):
+        nid = int(nid)
+        if nid in ref_wfe_map:
+            X_40_rel.append(opd_40['x_deformed'][i])
+            Y_40_rel.append(opd_40['y_deformed'][i])
+            WFE_40_rel.append(opd_40['wfe_nm'][i] - ref_wfe_map[nid])
+    X_40_rel = np.array(X_40_rel)
+    Y_40_rel = np.array(Y_40_rel)
+    WFE_40_rel = np.array(WFE_40_rel)
+
+    rms_40_rel = compute_rms_metrics(X_40_rel, Y_40_rel, WFE_40_rel)
+    rms_40_abs = compute_rms_metrics(opd_40['x_deformed'], opd_40['y_deformed'], opd_40['wfe_nm'])
+
+    html_40 = generate_html(
+        title="40 deg (OPD)",
+        X=X_40_rel, Y=Y_40_rel, W_nm=WFE_40_rel,
+        rms_data=rms_40_rel,
+        is_relative=True,
+        ref_title="20 deg",
+        abs_pair=(rms_40_abs['global_rms'], rms_40_abs['filtered_rms'])
+    )
+    path_40 = output_dir / f"{base}_{timestamp}_40_vs_20.html"
+    path_40.write_text(html_40, encoding='utf-8')
+    html_files.append(path_40)
+    print(f"    Created: {path_40.name}")
+
+    # 60 vs 20
+    print("  Generating 60 deg vs 20 deg...")
+    opd_60 = extractor._build_figure_opd_data(subcase_map['60'])
+
+    X_60_rel, Y_60_rel, WFE_60_rel = [], [], []
+    for i, nid in enumerate(opd_60['node_ids']):
+        nid = int(nid)
+        if nid in ref_wfe_map:
+            X_60_rel.append(opd_60['x_deformed'][i])
+            Y_60_rel.append(opd_60['y_deformed'][i])
+            WFE_60_rel.append(opd_60['wfe_nm'][i] - ref_wfe_map[nid])
+    X_60_rel = np.array(X_60_rel)
+    Y_60_rel = np.array(Y_60_rel)
+    WFE_60_rel = np.array(WFE_60_rel)
+
+    rms_60_rel = compute_rms_metrics(X_60_rel, Y_60_rel, WFE_60_rel)
+    rms_60_abs = compute_rms_metrics(opd_60['x_deformed'], opd_60['y_deformed'], opd_60['wfe_nm'])
+
+    html_60 = generate_html(
+        title="60 deg (OPD)",
+        X=X_60_rel, Y=Y_60_rel, W_nm=WFE_60_rel,
+        rms_data=rms_60_rel,
+        is_relative=True,
+        ref_title="20 deg",
+        abs_pair=(rms_60_abs['global_rms'], rms_60_abs['filtered_rms'])
+    )
+    path_60 = output_dir / f"{base}_{timestamp}_60_vs_20.html"
+    path_60.write_text(html_60, encoding='utf-8')
+    html_files.append(path_60)
+    print(f"    Created: {path_60.name}")
+
+    # 90 deg Manufacturing
+    print("  Generating 90 deg Manufacturing...")
+    opd_90 = extractor._build_figure_opd_data(subcase_map['90'])
+    rms_90 = compute_rms_metrics(opd_90['x_deformed'], opd_90['y_deformed'], opd_90['wfe_nm'])
+    mfg_metrics = compute_mfg_metrics(rms_90['coefficients'])
+
+    # 90-20 relative for correction metrics
+    X_90_rel, Y_90_rel, WFE_90_rel = [], [], []
+    for i, nid in enumerate(opd_90['node_ids']):
+        nid = int(nid)
+        if nid in ref_wfe_map:
+            X_90_rel.append(opd_90['x_deformed'][i])
+            Y_90_rel.append(opd_90['y_deformed'][i])
+            WFE_90_rel.append(opd_90['wfe_nm'][i] - ref_wfe_map[nid])
+    X_90_rel = np.array(X_90_rel)
+    Y_90_rel = np.array(Y_90_rel)
+    WFE_90_rel = np.array(WFE_90_rel)
+    rms_90_rel = compute_rms_metrics(X_90_rel, Y_90_rel, WFE_90_rel)
+
+    # Get all correction metrics from Zernike coefficients (90° - 20°)
+    correction_metrics = compute_mfg_metrics(rms_90_rel['coefficients'])
+
+    html_90 = generate_html(
+        title="90 deg Manufacturing (OPD)",
+        X=opd_90['x_deformed'], Y=opd_90['y_deformed'], W_nm=opd_90['wfe_nm'],
+        rms_data=rms_90,
+        is_relative=False,
+        is_manufacturing=True,
+        mfg_metrics=mfg_metrics,
+        correction_metrics=correction_metrics
+    )
+    path_90 = output_dir / f"{base}_{timestamp}_90_mfg.html"
+    path_90.write_text(html_90, encoding='utf-8')
+    html_files.append(path_90)
+    print(f"    Created: {path_90.name}")
+
+    # ========================================================================
+    # Summary
+    # ========================================================================
+    print("\n" + "=" * 70)
+    print("SUMMARY (OPD Method)")
+    print("=" * 70)
+    print(f"\nGenerated {len(html_files)} HTML files:")
+    for f in html_files:
+        print(f"  - {f.name}")
+
+    print("\n" + "-" * 70)
+    print("OPTIMIZATION OBJECTIVES (OPD Method)")
+    print("-" * 70)
+    print(f"  40-20 Filtered RMS:  {rms_40_rel['filtered_rms']:.2f} nm")
+    print(f"  60-20 Filtered RMS:  {rms_60_rel['filtered_rms']:.2f} nm")
+    print(f"  MFG 90 (J1-J3):      {rms_90_rel['rms_filter_j1to3']:.2f} nm")
+
+    # Weighted sums
+    ws_v4 = 5*rms_40_rel['filtered_rms'] + 5*rms_60_rel['filtered_rms'] + 2*rms_90_rel['rms_filter_j1to3']
+    ws_v5 = 5*rms_40_rel['filtered_rms'] + 5*rms_60_rel['filtered_rms'] + 3*rms_90_rel['rms_filter_j1to3']
+    print(f"\n  V4 Weighted Sum (5/5/2): {ws_v4:.2f}")
+    print(f"  V5 Weighted Sum (5/5/3): {ws_v5:.2f}")
+
+    # Lateral displacement summary
+    print("\n" + "-" * 70)
+    print("LATERAL DISPLACEMENT SUMMARY")
+    print("-" * 70)
+    for angle in ['20', '40', '60', '90']:
+        lat = results_abs[angle].get('max_lateral_displacement_um', 0)
+        print(f"  {angle} deg: max {lat:.3f} um")
+
+    print("\n" + "=" * 70)
+    print("DONE")
+    print("=" * 70)
+
+    return html_files
+
+
 def main(op2_path: Path):
-    """Generate all 3 HTML files."""
+    """Generate all 3 HTML files (legacy Z-only method)."""
    print("=" * 70)
    print("  ATOMIZER ZERNIKE HTML GENERATOR")
    print("=" * 70)
@@ -753,12 +1029,8 @@ def main(op2_path: Path):
        X_ref, Y_ref, WFE_ref, ref_data['node_ids']
    )
    rms_90_rel = compute_rms_metrics(X_90_rel, Y_90_rel, WFE_90_rel)
-    correction_metrics = {
-        'rms_filter_j1to3': rms_90_rel['rms_filter_j1to3'],
-        'defocus_nm': compute_mfg_metrics(rms_90_rel['coefficients'])['defocus_nm'],
-        'astigmatism_rms': compute_mfg_metrics(rms_90_rel['coefficients'])['astigmatism_rms'],
-        'coma_rms': compute_mfg_metrics(rms_90_rel['coefficients'])['coma_rms'],
-    }
+    # Get all correction metrics from Zernike coefficients (90° - 20°)
+    correction_metrics = compute_mfg_metrics(rms_90_rel['coefficients'])

    html_90 = generate_html(
        title="90 deg (Manufacturing)",
@@ -822,8 +1094,16 @@ if __name__ == '__main__':
            sys.exit(1)
        print(f"Found: {op2_path}")

+    # Check for --legacy flag to use old Z-only method
+    use_legacy = '--legacy' in sys.argv or '--z-only' in sys.argv
+
    try:
-        main(op2_path)
+        if USE_OPD_METHOD and not use_legacy:
+            main_opd(op2_path)
+        else:
+            if use_legacy:
+                print("[INFO] Using legacy Z-only method (--legacy flag)")
+            main(op2_path)
    except Exception as e:
        print(f"\nERROR: {e}")
        import traceback