feat: Pre-migration checkpoint - updated docs and utilities

Updates before optimization_engine migration:
- Updated migration plan to v2.1 with complete file inventory
- Added OP_07 disk optimization protocol
- Added SYS_16 self-aware turbo protocol
- Added study archiver and cleanup utilities
- Added ensemble surrogate module
- Updated NX solver and session manager
- Updated zernike HTML generator
- Added context engineering plan
- LAC session insights updates

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-29 10:22:45 -05:00
parent faa7779a43
commit 82f36689b7
21 changed files with 6304 additions and 890 deletions

View File

@@ -2,110 +2,42 @@
"permissions": {
"allow": [
"Bash(dir:*)",
"Bash(sqlite3:*)",
"Bash(timeout /t 30 /nobreak)",
"Bash(npm install:*)",
"Bash(git add:*)",
"Bash(git commit:*)",
"Bash(git push:*)",
"Bash(powershell -Command:*)",
"Bash(python:*)",
"Bash(conda activate:*)",
"Bash(C:/Users/Antoine/miniconda3/envs/atomizer/python.exe:*)",
"Bash(cat:*)",
"Bash(C:UsersAntoineminiconda3envsatomizerpython.exe run_adaptive_mirror_optimization.py --fea-budget 100 --batch-size 5 --strategy hybrid)",
"Bash(/c/Users/Antoine/miniconda3/envs/atomizer/python.exe:*)",
"Bash(npm run build:*)",
"Bash(npm uninstall:*)",
"Bash(git:*)",
"Bash(npm:*)",
"Bash(conda:*)",
"Bash(pip:*)",
"Bash(cmd /c:*)",
"Bash(tasklist:*)",
"Bash(taskkill:*)",
"Bash(robocopy:*)",
"Bash(xcopy:*)",
"Bash(del:*)",
"Bash(type:*)",
"Bash(where:*)",
"Bash(netstat:*)",
"Bash(findstr:*)",
"Bash(curl:*)",
"Bash(npx tsc:*)",
"Bash(atomizer-dashboard/README.md )",
"Bash(atomizer-dashboard/backend/api/main.py )",
"Bash(atomizer-dashboard/backend/api/routes/optimization.py )",
"Bash(atomizer-dashboard/backend/api/routes/claude.py )",
"Bash(atomizer-dashboard/backend/api/routes/terminal.py )",
"Bash(atomizer-dashboard/backend/api/services/ )",
"Bash(atomizer-dashboard/backend/requirements.txt )",
"Bash(atomizer-dashboard/frontend/package.json )",
"Bash(atomizer-dashboard/frontend/package-lock.json )",
"Bash(atomizer-dashboard/frontend/src/components/ClaudeChat.tsx )",
"Bash(atomizer-dashboard/frontend/src/components/ClaudeTerminal.tsx )",
"Bash(atomizer-dashboard/frontend/src/components/dashboard/ControlPanel.tsx )",
"Bash(atomizer-dashboard/frontend/src/pages/Dashboard.tsx )",
"Bash(atomizer-dashboard/frontend/src/context/ )",
"Bash(atomizer-dashboard/frontend/src/pages/Home.tsx )",
"Bash(atomizer-dashboard/frontend/src/App.tsx )",
"Bash(atomizer-dashboard/frontend/src/api/client.ts )",
"Bash(atomizer-dashboard/frontend/src/components/layout/Sidebar.tsx )",
"Bash(atomizer-dashboard/frontend/src/index.css )",
"Bash(atomizer-dashboard/frontend/src/pages/Results.tsx )",
"Bash(atomizer-dashboard/frontend/tailwind.config.js )",
"Bash(docs/07_DEVELOPMENT/DASHBOARD_IMPROVEMENT_PLAN.md)",
"Bash(taskkill:*)",
"Bash(xargs:*)",
"Bash(cmd.exe /c:*)",
"Bash(powershell.exe -Command:*)",
"Bash(where:*)",
"Bash(type %USERPROFILE%.claude*)",
"Bash(conda create:*)",
"Bash(cmd /c \"conda create -n atomizer python=3.10 -y\")",
"Bash(cmd /c \"where conda\")",
"Bash(cmd /c \"dir /b C:\\Users\\antoi\\anaconda3\\Scripts\\conda.exe 2>nul || dir /b C:\\Users\\antoi\\miniconda3\\Scripts\\conda.exe 2>nul || dir /b C:\\ProgramData\\anaconda3\\Scripts\\conda.exe 2>nul || dir /b C:\\ProgramData\\miniconda3\\Scripts\\conda.exe 2>nul || echo NOT_FOUND\")",
"Bash(cmd /c \"if exist C:\\Users\\antoi\\anaconda3\\Scripts\\conda.exe (echo FOUND: anaconda3) else if exist C:\\Users\\antoi\\miniconda3\\Scripts\\conda.exe (echo FOUND: miniconda3) else if exist C:\\ProgramData\\anaconda3\\Scripts\\conda.exe (echo FOUND: ProgramData\\anaconda3) else (echo NOT_FOUND)\")",
"Bash(powershell:*)",
"Bash(C:Usersantoianaconda3Scriptsconda.exe create -n atomizer python=3.10 -y)",
"Bash(cmd /c \"C:\\Users\\antoi\\anaconda3\\Scripts\\conda.exe create -n atomizer python=3.10 -y\")",
"Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"\"C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe\"\" \"\"C:\\Users\\antoi\\Atomizer\\optimization_engine\\solve_simulation.py\"\" -args \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_adaptive_V15\\2_iterations\\iter2\\ASSY_M1_assyfem1_sim1.sim\"\" \"\"Solution 1\"\" 2>&1\")",
"Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"C:Program FilesSiemensDesigncenterNX2512NXBINrun_journal.exe\" \"C:UsersantoiAtomizernx_journalsextract_part_mass_material.py\" -args \"C:UsersantoiAtomizerstudiesm1_mirror_cost_reduction1_setupmodelM1_Blank.prt\" \"C:UsersantoiAtomizerstudiesm1_mirror_cost_reduction1_setupmodel\" 2>&1\")",
"Bash(npm run dev:*)",
"Bash(cmd /c \"cd /d C:\\Users\\antoi\\Atomizer\\atomizer-dashboard\\frontend && npm run dev\")",
"Bash(cmd /c \"cd /d C:\\Users\\antoi\\Atomizer\\atomizer-dashboard\\frontend && dir package.json && npm --version\")",
"Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"\"C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe\"\" \"\"C:\\Users\\antoi\\Atomizer\\nx_journals\\extract_part_mass_material.py\"\" -args \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\\M1_Blank.prt\"\" \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\"\" 2>&1\")",
"Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"\"C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe\"\" \"\"C:\\Users\\antoi\\Atomizer\\nx_journals\\extract_expressions.py\"\" -args \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\\M1_Blank.prt\"\" \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\"\" 2>&1\")",
"Bash(cmd /c \"set SPLM_LICENSE_SERVER=28000@dalidou;28000@100.80.199.40 && \"\"C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe\"\" \"\"C:\\Users\\antoi\\Atomizer\\nx_journals\\extract_expressions.py\"\" -args \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\\M1_Blank.prt\"\" \"\"C:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_cost_reduction\\1_setup\\model\"\"\")",
"Bash(cmd /c:*)",
"Bash(taskkill /F /FI \"WINDOWTITLE eq *uvicorn*\")",
"Bash(python -m uvicorn:*)",
"Bash(conda run:*)",
"Bash(/c/Users/antoi/miniconda3/envs/atomizer/python.exe -m uvicorn:*)",
"Bash(/c/Users/antoi/anaconda3/envs/atomizer/python.exe -m uvicorn:*)",
"Bash(/c/Users/antoi/anaconda3/envs/atomizer/python.exe:*)",
"Bash(tasklist:*)",
"Bash(wmic process where \"ProcessId=147068\" delete)",
"Bash(cmd.exe //c \"taskkill /F /PID 147068\")",
"Bash(pip show:*)",
"Bash(python3:*)",
"Bash(python extract_all_mirror_data.py:*)",
"Bash(C:Usersantoiminiconda3envsatomizerpython.exe extract_all_mirror_data.py)",
"Bash(/c/Users/antoi/miniconda3/envs/atomizer/python.exe:*)",
"Bash(grep:*)",
"Bash(python -c:*)",
"Bash(C:Usersantoianaconda3envsatomizerpython.exe -c \"\nimport pandas as pd\ndf = pd.read_csv(r''c:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_all_trials_export.csv'')\n\n# Check which columns have data\nprint(''=== Column data availability ==='')\nfor col in df.columns:\n non_null = df[col].notna().sum()\n print(f''{col}: {non_null}/{len(df)} ({100*non_null/len(df):.1f}%)'')\n\nprint(''\\n=== Studies in dataset ==='')\nprint(df[''study''].value_counts())\n\")",
"Bash(cmd /c \"C:\\Users\\antoi\\anaconda3\\envs\\atomizer\\python.exe -c \"\"import pandas as pd; df = pd.read_csv(r''c:\\Users\\antoi\\Atomizer\\studies\\m1_mirror_all_trials_export.csv''); print(''Rows:'', len(df)); print(df.columns.tolist())\"\"\")",
"Bash(robocopy:*)",
"Bash(xcopy:*)",
"Bash(ls:*)",
"Bash(dir \"c:\\Users\\antoi\\Atomizer\\studies\\*.png\")",
"Bash(powershell -Command \"Get-Process | Where-Object { $_Modules.FileName -like ''*study.db*'' } | Select-Object Id, ProcessName\")",
"Bash(powershell -Command:*)",
"Bash(C:/Users/antoi/miniconda3/envs/atomizer/python.exe -m uvicorn:*)",
"Bash(dir /s /b \"C:\\Users\\antoi\\*conda*\")",
"Bash(conda run -n atomizer python:*)",
"Bash(C:/ProgramData/anaconda3/condabin/conda.bat run -n atomizer python -c \"\nimport sqlite3\n\ndb_path = ''studies/M1_Mirror/m1_mirror_cost_reduction_V6/3_results/study.db''\nconn = sqlite3.connect(db_path)\ncursor = conn.cursor()\n\n# Get counts\ncursor.execute(''SELECT COUNT(*) FROM trials'')\ntotal = cursor.fetchone()[0]\n\ncursor.execute(\"\"SELECT COUNT(*) FROM trials WHERE state = ''COMPLETE''\"\")\ncomplete = cursor.fetchone()[0]\n\nprint(f''=== V6 Study Status ==='')\nprint(f''Total trials: {total}'')\nprint(f''Completed: {complete}'')\nprint(f''Failed/Pruned: {total - complete}'')\nprint(f''Progress: {complete}/200 ({100*complete/200:.1f}%)'')\n\n# Get objectives stats\nobjs = [''rel_filtered_rms_40_vs_20'', ''rel_filtered_rms_60_vs_20'', ''mfg_90_optician_workload'', ''mass_kg'']\nprint(f''\\n=== Objectives Stats ==='')\nfor obj in objs:\n cursor.execute(f\"\"SELECT MIN({obj}), MAX({obj}), AVG({obj}) FROM trials WHERE state = ''COMPLETE'' AND {obj} IS NOT NULL\"\")\n result = cursor.fetchone()\n if result and result[0] is not None:\n print(f''{obj}: min={result[0]:.4f}, max={result[1]:.4f}, mean={result[2]:.4f}'')\n\n# Design variables stats \ndvs = [''whiffle_min'', ''whiffle_outer_to_vertical'', ''whiffle_triangle_closeness'', ''blank_backface_angle'', ''Pocket_Radius'']\nprint(f''\\n=== Design Variables Explored ==='')\nfor dv in dvs:\n try:\n cursor.execute(f\"\"SELECT MIN({dv}), MAX({dv}), AVG({dv}) FROM trials WHERE state = ''COMPLETE''\"\")\n result = cursor.fetchone()\n if result and result[0] is not None:\n print(f''{dv}: min={result[0]:.3f}, max={result[1]:.3f}, mean={result[2]:.3f}'')\n except Exception as e:\n print(f''{dv}: error - {e}'')\n\nconn.close()\n\")",
"Bash(/c/Users/antoi/anaconda3/python.exe:*)",
"Bash(C:UsersantoiAtomizertemp_extract.bat)",
"Bash(dir /b \"C:\\Users\\antoi\\Atomizer\\knowledge_base\\lac\")",
"Bash(pip install:*)",
"Bash(dir \"C:\\Users\\antoi\\Atomizer\\studies\\M1_Mirror\\m1_mirror_cost_reduction_V7\\3_results\")",
"Bash(call \"%USERPROFILE%\\anaconda3\\Scripts\\activate.bat\" atomizer)",
"Bash(cmd /c \"cd /d c:\\Users\\antoi\\Atomizer && call %USERPROFILE%\\anaconda3\\Scripts\\activate.bat atomizer && python -c \"\"import sys; sys.path.insert(0, ''.''); from optimization_engine.extractors import ZernikeExtractor; print(''OK''); import inspect; print(inspect.signature(ZernikeExtractor.extract_relative))\"\"\")",
"Bash(cmd /c \"cd /d c:\\Users\\antoi\\Atomizer && c:\\Users\\antoi\\anaconda3\\envs\\atomizer\\python.exe -c \"\"import sys; sys.path.insert(0, ''.''); from optimization_engine.extractors import ZernikeExtractor; print(''Import OK''); import inspect; sig = inspect.signature(ZernikeExtractor.extract_relative); print(''Signature:'', sig)\"\"\")",
"Bash(c:Usersantoianaconda3envsatomizerpython.exe c:UsersantoiAtomizertoolstest_zernike_import.py)",
"Bash(dir \"C:\\Users\\antoi\\Atomizer\\studies\\M1_Mirror\\m1_mirror_cost_reduction_V7\\3_results\\best_design_archive\")",
"Bash(dir \"C:\\Users\\antoi\\Atomizer\\studies\\M1_Mirror\\m1_mirror_cost_reduction_V7\\3_results\\best_design_archive\\20251220_010128\")",
"Bash(dir /s /b \"C:\\Users\\antoi\\Atomizer\\studies\\M1_Mirror\\m1_mirror_cost_reduction_V8\")",
"Bash(c:/Users/antoi/anaconda3/envs/atomizer/python.exe:*)"
"Read",
"Skill(dashboard:*)",
"Bash(C:Usersantoianaconda3envsatomizerpython.exe:*)",
"Bash(del \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V5\\\\3_results\\\\study.db\")",
"Bash(C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe -c:*)",
"Bash(C:Usersantoianaconda3envsatomizerpython.exe run_optimization.py --trials 1)",
"Bash(C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe -m py_compile:*)",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver analyze \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\")",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V12\")",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V2\")",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V11\")",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V11\" --execute)",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V3\")",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V3\" --execute)",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V6\" --execute)",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V1\" --execute)",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_flat_back_V5\" --execute)",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction_V12\" --execute)",
"Bash(\"C:\\\\Users\\\\antoi\\\\anaconda3\\\\envs\\\\atomizer\\\\python.exe\" -m optimization_engine.utils.study_archiver cleanup \"C:\\\\Users\\\\antoi\\\\Atomizer\\\\studies\\\\M1_Mirror\\\\m1_mirror_cost_reduction\" --execute)"
],
"deny": [],
"ask": []

View File

@@ -84,6 +84,10 @@ User Request
│ ├─ "error", "failed", "not working", "crashed"
│ └─► Load: OP_06_TROUBLESHOOT.md
├─► MANAGE disk space?
│ ├─ "disk", "space", "cleanup", "archive", "storage"
│ └─► Load: OP_07_DISK_OPTIMIZATION.md
├─► CONFIGURE settings?
│ ├─ "change", "modify", "settings", "parameters"
│ └─► Load relevant SYS_* protocol
@@ -109,6 +113,7 @@ User Request
| Analyze results | "results", "best", "compare", "pareto" | OP_04 | - | user |
| Export training data | "export", "training data", "neural" | OP_05 | modules/neural-acceleration.md | user |
| Debug issues | "error", "failed", "not working", "help" | OP_06 | - | user |
| **Disk management** | "disk", "space", "cleanup", "archive" | **OP_07** | modules/study-disk-optimization.md | user |
| Understand IMSO | "protocol 10", "IMSO", "adaptive" | SYS_10 | - | user |
| Multi-objective | "pareto", "NSGA", "multi-objective" | SYS_11 | - | user |
| Extractors | "extractor", "displacement", "stress" | SYS_12 | modules/extractors-catalog.md | user |

View File

@@ -30,6 +30,7 @@ requires_skills:
| See best results | OP_04 | `optuna-dashboard sqlite:///study.db` or dashboard |
| Export neural training data | OP_05 | `python run_optimization.py --export-training` |
| Fix an error | OP_06 | Read error log → follow diagnostic tree |
| **Free disk space** | **OP_07** | `archive_study.bat cleanup <study> --execute` |
| Add custom physics extractor | EXT_01 | Create in `optimization_engine/extractors/` |
| Add lifecycle hook | EXT_02 | Create in `optimization_engine/plugins/` |
| Generate physics insight | SYS_16 | `python -m optimization_engine.insights generate <study>` |
@@ -219,6 +220,48 @@ python -c "import optuna; s=optuna.load_study('my_study', 'sqlite:///3_results/s
---
## Disk Space Management (OP_07)
FEA studies consume massive disk space. After completion, clean up regenerable files:
### Quick Commands
```bash
# Analyze disk usage
archive_study.bat analyze studies\M1_Mirror
# Cleanup completed study (dry run first!)
archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12
archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
# Archive to dalidou server
archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
# List remote archives
archive_study.bat list
```
### What Gets Deleted vs Kept
| KEEP | DELETE |
|------|--------|
| `.op2` (Nastran results) | `.prt, .fem, .sim` (copies of master) |
| `.json` (params/metadata) | `.dat` (solver input) |
| `1_setup/` (master files) | `.f04, .f06, .log` (solver logs) |
| `3_results/` (database) | `.afm, .diag, .bak` (temp files) |
### Typical Savings
| Stage | M1_Mirror Example |
|-------|-------------------|
| Full | 194 GB |
| After cleanup | 114 GB (41% saved) |
| Archived to server | 5 GB local (97% saved) |
**Full details**: `docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md`
---
## LAC (Learning Atomizer Core) Commands
```bash

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,464 @@
# Study Disk Optimization Module
## Atomizer Disk Space Management System
**Version:** 1.0
**Created:** 2025-12-29
**Status:** PRODUCTION READY
**Impact:** Reduced M1_Mirror from 194 GB → 114 GB (80 GB freed, 41% reduction)
---
## Executive Summary
FEA optimization studies consume massive disk space due to per-trial file copying. This module provides:
1. **Local Cleanup** - Remove regenerable files from completed studies (50%+ savings)
2. **Remote Archival** - Archive to dalidou server (14TB available)
3. **On-Demand Restore** - Pull archived studies when needed
### Key Insight
Each trial folder contains ~150 MB, but only **~70 MB is essential** (OP2 results + metadata). The rest are copies of master files that can be regenerated.
---
## Part 1: File Classification
### Essential Files (KEEP)
| Extension | Purpose | Typical Size |
|-----------|---------|--------------|
| `.op2` | Nastran binary results | 68 MB |
| `.json` | Parameters, results, metadata | <1 MB |
| `.npz` | Pre-computed Zernike coefficients | <1 MB |
| `.html` | Generated reports | <1 MB |
| `.png` | Visualization images | <1 MB |
| `.csv` | Exported data tables | <1 MB |
### Deletable Files (REGENERABLE)
| Extension | Purpose | Why Deletable |
|-----------|---------|---------------|
| `.prt` | NX part files | Copy of master in `1_setup/` |
| `.fem` | FEM mesh files | Copy of master |
| `.sim` | Simulation files | Copy of master |
| `.afm` | Assembly FEM | Regenerable |
| `.dat` | Solver input deck | Regenerable from params |
| `.f04` | Nastran output log | Diagnostic only |
| `.f06` | Nastran printed output | Diagnostic only |
| `.log` | Generic logs | Diagnostic only |
| `.diag` | Diagnostic files | Diagnostic only |
| `.txt` | Temp text files | Intermediate data |
| `.exp` | Expression files | Regenerable |
| `.bak` | Backup files | Not needed |
### Protected Folders (NEVER TOUCH)
| Folder | Reason |
|--------|--------|
| `1_setup/` | Master model files (source of truth) |
| `3_results/` | Final database, reports, best designs |
| `best_design_archive/` | Archived optimal configurations |
---
## Part 2: Disk Usage Analysis
### M1_Mirror Project Baseline (Dec 2025)
```
Total: 194 GB across 28 studies, 2000+ trials
By File Type:
.op2 94 GB (48.5%) - Nastran results [ESSENTIAL]
.prt 41 GB (21.4%) - NX parts [DELETABLE]
.fem 22 GB (11.5%) - FEM mesh [DELETABLE]
.dat 22 GB (11.3%) - Solver input [DELETABLE]
.sim 9 GB (4.5%) - Simulation [DELETABLE]
.afm 5 GB (2.5%) - Assembly FEM [DELETABLE]
Other <1 GB (<1%) - Logs, configs [MIXED]
By Folder:
2_iterations/ 168 GB (87%) - Per-trial data
3_results/ 22 GB (11%) - Final results
1_setup/ 4 GB (2%) - Master models
```
### Per-Trial Breakdown (Typical V11+ Structure)
```
iter1/
assy_m1_assyfem1_sim1-solution_1.op2 68.15 MB [KEEP]
M1_Blank.prt 29.94 MB [DELETE]
assy_m1_assyfem1_sim1-solution_1.dat 15.86 MB [DELETE]
M1_Blank_fem1.fem 14.07 MB [DELETE]
ASSY_M1_assyfem1_sim1.sim 7.47 MB [DELETE]
M1_Blank_fem1_i.prt 5.20 MB [DELETE]
ASSY_M1_assyfem1.afm 4.13 MB [DELETE]
M1_Vertical_Support_Skeleton_fem1.fem 3.76 MB [DELETE]
... (logs, temps) <1.00 MB [DELETE]
_temp_part_properties.json 0.00 MB [KEEP]
-------------------------------------------------------
TOTAL: 149.67 MB
Essential only: 68.15 MB
Savings: 54.5%
```
---
## Part 3: Implementation
### Core Utility
**Location:** `optimization_engine/utils/study_archiver.py`
```python
from optimization_engine.utils.study_archiver import (
analyze_study, # Get disk usage analysis
cleanup_study, # Remove deletable files
archive_to_remote, # Archive to dalidou
restore_from_remote, # Restore from dalidou
list_remote_archives, # List server archives
)
```
### Command Line Interface
**Batch Script:** `tools/archive_study.bat`
```bash
# Analyze disk usage
archive_study.bat analyze studies\M1_Mirror
archive_study.bat analyze studies\M1_Mirror\m1_mirror_V12
# Cleanup completed study (dry run by default)
archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12
archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
# Archive to remote server
archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute --tailscale
# List remote archives
archive_study.bat list
archive_study.bat list --tailscale
# Restore from remote
archive_study.bat restore m1_mirror_V12
archive_study.bat restore m1_mirror_V12 --tailscale
```
### Python API
```python
from pathlib import Path
from optimization_engine.utils.study_archiver import (
analyze_study,
cleanup_study,
archive_to_remote,
)
# Analyze
study_path = Path("studies/M1_Mirror/m1_mirror_V12")
analysis = analyze_study(study_path)
print(f"Total: {analysis['total_size_bytes']/1e9:.2f} GB")
print(f"Essential: {analysis['essential_size']/1e9:.2f} GB")
print(f"Deletable: {analysis['deletable_size']/1e9:.2f} GB")
# Cleanup (dry_run=False to execute)
deleted, freed = cleanup_study(study_path, dry_run=False)
print(f"Freed {freed/1e9:.2f} GB")
# Archive to server
success = archive_to_remote(study_path, use_tailscale=False, dry_run=False)
```
---
## Part 4: Remote Server Configuration
### dalidou Server Specs
| Property | Value |
|----------|-------|
| Hostname | dalidou |
| Local IP | 192.168.86.50 |
| Tailscale IP | 100.80.199.40 |
| SSH User | papa |
| Archive Path | /srv/storage/atomizer-archive/ |
| Available Storage | 3.6 TB (SSD) + 12.7 TB (HDD) |
### First-Time Setup
```bash
# 1. SSH into server and create archive directory
ssh papa@192.168.86.50
mkdir -p /srv/storage/atomizer-archive
# 2. Set up passwordless SSH (on Windows)
ssh-keygen -t ed25519 # If you don't have a key
ssh-copy-id papa@192.168.86.50
# 3. Test connection
ssh papa@192.168.86.50 "echo 'Connection OK'"
```
### Archive Structure on Server
```
/srv/storage/atomizer-archive/
├── m1_mirror_V11_20251229.tar.gz # Compressed study archive
├── m1_mirror_V12_20251229.tar.gz
├── m1_mirror_flat_back_V3_20251229.tar.gz
└── manifest.json # Index of all archives
```
---
## Part 5: Recommended Workflows
### During Active Optimization
**Keep all files** - You may need to:
- Re-run specific failed trials
- Debug mesh issues
- Analyze intermediate results
### After Study Completion
1. **Generate final report** (STUDY_REPORT.md)
2. **Archive best design** to `3_results/best_design_archive/`
3. **Run cleanup:**
```bash
archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
```
4. **Verify results still accessible:**
- Database queries work
- Best design files intact
- OP2 files for Zernike extraction present
### For Long-Term Storage
1. **After cleanup**, archive to server:
```bash
archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
```
2. **Optionally delete local** study folder
3. **Keep only** `3_results/best_design_archive/` locally if needed
### When Revisiting Old Study
1. **Check if archived:**
```bash
archive_study.bat list
```
2. **Restore:**
```bash
archive_study.bat restore m1_mirror_V12
```
3. **If re-running trials needed**, master files in `1_setup/` allow full regeneration
---
## Part 6: Disk Space Targets
### Per-Project Guidelines
| Stage | Expected Size | Notes |
|-------|---------------|-------|
| Active (full) | 100% | All files present |
| Completed (cleaned) | ~50% | Deletables removed |
| Archived (minimal) | ~3% | Best design only locally |
### M1_Mirror Specific
| Stage | Size | Notes |
|-------|------|-------|
| Full | 194 GB | 28 studies, 2000+ trials |
| After cleanup | 114 GB | OP2 + metadata only |
| Minimal local | 5-10 GB | Best designs + database |
| Server archive | ~50 GB | Compressed |
---
## Part 7: Safety Features
### Built-in Protections
1. **Dry run by default** - Must explicitly add `--execute`
2. **Master files untouched** - `1_setup/` is never modified
3. **Results preserved** - `3_results/` is never touched
4. **Essential files preserved** - OP2, JSON, NPZ always kept
5. **Archive verification** - rsync checks integrity
### What Cannot Be Recovered After Cleanup
| File Type | Recovery Method |
|-----------|-----------------|
| `.prt` | Copy from `1_setup/` + update params |
| `.fem` | Regenerate from `.prt` in NX |
| `.sim` | Recreate simulation setup |
| `.dat` | Regenerate from params.json + model |
| `.f04/.f06` | Re-run solver (if needed) |
**Note:** With `1_setup/` master files and `params.json`, ANY trial can be fully reconstructed. The only irreplaceable data is the OP2 results (which we keep).
---
## Part 8: Troubleshooting
### SSH Connection Failed
```bash
# Test connectivity
ping 192.168.86.50
# Test SSH
ssh papa@192.168.86.50 "echo connected"
# If on different network, use Tailscale
ssh papa@100.80.199.40 "echo connected"
```
### Archive Upload Slow
Large studies (50+ GB) take time. Options:
- Run overnight
- Use wired LAN connection
- Pre-cleanup to reduce size
### Out of Disk Space During Archive
Archive is created locally first. Need ~1.5x study size free:
- 20 GB study = ~30 GB temp space required
### Cleanup Removed Wrong Files
If accidentally executed without dry run:
- OP2 files preserved (can still extract results)
- Master files in `1_setup/` intact
- Regenerate other files by re-running trial
---
## Part 9: Integration with Atomizer
### Protocol Reference
**Related Protocol:** `docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md`
### Claude Commands
When user says:
- "analyze disk usage" → Run `analyze_study()`
- "clean up study" → Run `cleanup_study()` with confirmation
- "archive to server" → Run `archive_to_remote()`
- "restore study" → Run `restore_from_remote()`
### Automatic Suggestions
After optimization completion, suggest:
```
Optimization complete! The study is using X GB.
Would you like me to clean up regenerable files to save Y GB?
(This keeps all results but removes intermediate model copies)
```
---
## Part 10: File Inventory
### Files Created
| File | Purpose |
|------|---------|
| `optimization_engine/utils/study_archiver.py` | Core utility module |
| `tools/archive_study.bat` | Windows batch script |
| `docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md` | Full protocol |
| `.claude/skills/modules/study-disk-optimization.md` | This document |
### Dependencies
- Python 3.8+
- rsync (for remote operations, usually pre-installed)
- SSH client (for remote operations)
- Tailscale (optional, for remote access outside LAN)
---
## Appendix A: Cleanup Results Log (Dec 2025)
### Initial Cleanup Run
| Study | Before | After | Freed | Files Deleted |
|-------|--------|-------|-------|---------------|
| m1_mirror_cost_reduction_V11 | 32.24 GB | 15.94 GB | 16.30 GB | 3,403 |
| m1_mirror_cost_reduction_flat_back_V3 | 52.50 GB | 26.87 GB | 25.63 GB | 5,084 |
| m1_mirror_cost_reduction_flat_back_V6 | 33.71 GB | 16.64 GB | 17.08 GB | 3,391 |
| m1_mirror_cost_reduction_V12 | 22.68 GB | 10.60 GB | 12.08 GB | 2,508 |
| m1_mirror_cost_reduction_flat_back_V1 | 8.76 GB | 4.54 GB | 4.22 GB | 813 |
| m1_mirror_cost_reduction_flat_back_V5 | 8.01 GB | 4.09 GB | 3.92 GB | 765 |
| m1_mirror_cost_reduction | 3.58 GB | 3.08 GB | 0.50 GB | 267 |
| **TOTAL** | **161.48 GB** | **81.76 GB** | **79.73 GB** | **16,231** |
### Project-Wide Summary
```
Before cleanup: 193.75 GB
After cleanup: 114.03 GB
Total freed: 79.72 GB (41% reduction)
```
---
## Appendix B: Quick Reference Card
### Commands
```bash
# Analyze
archive_study.bat analyze <path>
# Cleanup (always dry-run first!)
archive_study.bat cleanup <study> # Dry run
archive_study.bat cleanup <study> --execute # Execute
# Archive
archive_study.bat archive <study> --execute
archive_study.bat archive <study> --execute --tailscale
# Remote
archive_study.bat list
archive_study.bat restore <name>
```
### Python
```python
from optimization_engine.utils.study_archiver import *
# Quick analysis
analysis = analyze_study(Path("studies/M1_Mirror"))
print(f"Deletable: {analysis['deletable_size']/1e9:.2f} GB")
# Cleanup
cleanup_study(Path("studies/M1_Mirror/m1_mirror_V12"), dry_run=False)
```
### Server Access
```bash
# Local
ssh papa@192.168.86.50
# Remote (Tailscale)
ssh papa@100.80.199.40
# Archive location
/srv/storage/atomizer-archive/
```
---
*This module enables efficient disk space management for large-scale FEA optimization studies.*

View File

@@ -90,6 +90,7 @@ The Protocol Operating System (POS) provides layered documentation:
| Analyze results | OP_04 | `docs/protocols/operations/OP_04_ANALYZE_RESULTS.md` |
| Export neural data | OP_05 | `docs/protocols/operations/OP_05_EXPORT_TRAINING_DATA.md` |
| Debug issues | OP_06 | `docs/protocols/operations/OP_06_TROUBLESHOOT.md` |
| **Free disk space** | OP_07 | `docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md` |
## System Protocols (Technical Specs)
@@ -135,14 +136,15 @@ C:\Users\antoi\anaconda3\envs\atomizer\python.exe your_script.py
Atomizer/
├── .claude/skills/ # LLM skills (Bootstrap + Core + Modules)
├── docs/protocols/ # Protocol Operating System
│ ├── operations/ # OP_01 - OP_06
│ ├── operations/ # OP_01 - OP_07
│ ├── system/ # SYS_10 - SYS_15
│ └── extensions/ # EXT_01 - EXT_04
├── optimization_engine/ # Core Python modules
│ ├── extractors/ # Physics extraction library
│ ├── gnn/ # GNN surrogate module (Zernike)
│ └── utils/ # Utilities (dashboard_db, trial_manager)
│ └── utils/ # Utilities (dashboard_db, trial_manager, study_archiver)
├── studies/ # User studies
├── tools/ # CLI tools (archive_study.bat, zernike_html_generator.py)
├── archive/ # Deprecated code (for reference)
└── atomizer-dashboard/ # React dashboard
```

View File

@@ -0,0 +1,132 @@
# NXOpen Documentation MCP Server - Setup TODO
**Created:** 2025-12-29
**Status:** PENDING - Waiting for manual configuration
---
## Current State
The NXOpen documentation MCP server exists on **dalidou** (192.168.86.50) but is not accessible from this Windows machine due to hostname resolution issues.
### What's Working
- ✅ Dalidou server is online and reachable at `192.168.86.50`
- ✅ Port 5000 (Documentation Proxy) is responding
- ✅ Port 3000 (Gitea) is responding
- ✅ MCP server code exists at `/srv/claude-assistant/` on dalidou
### What's NOT Working
-`dalidou.local` hostname doesn't resolve (mDNS not configured on this machine)
- ❌ MCP tools not integrated with Claude Code
---
## Steps to Complete
### Step 1: Fix Hostname Resolution (Manual - requires Admin)
**Option A: Run the script as Administrator**
```powershell
# Open PowerShell as Administrator, then:
C:\Users\antoi\Atomizer\add_dalidou_host.ps1
```
**Option B: Manually edit hosts file**
1. Open Notepad as Administrator
2. Open `C:\Windows\System32\drivers\etc\hosts`
3. Add this line at the end:
```
192.168.86.50 dalidou.local dalidou
```
4. Save the file
**Verify:**
```powershell
ping dalidou.local
```
### Step 2: Verify MCP Server is Running on Dalidou
SSH into dalidou and check:
```bash
ssh root@dalidou
# Check documentation proxy
systemctl status siemensdocumentationproxyserver
# Check MCP server (if it's a service)
# Or check what's running on port 5000
ss -tlnp | grep 5000
```
### Step 3: Configure Claude Code MCP Integration
The MCP server on dalidou uses **stdio-based MCP protocol**, not HTTP. To connect from Claude Code, you'll need one of:
**Option A: SSH-based MCP (if supported)**
Configure in `.claude/settings.json` or MCP config to connect via SSH tunnel.
**Option B: Local Proxy**
Run a local MCP proxy that connects to dalidou's MCP server.
**Option C: HTTP Wrapper**
The current port 5000 service may already expose HTTP endpoints - need to verify once hostname is fixed.
---
## Server Documentation Reference
Full documentation is in the SERVtomaste repo:
- **URL:** http://192.168.86.50:3000/Antoine/SERVtomaste
- **File:** `docs/SIEMENS-DOCS-SERVER.md`
### Key Server Paths (on dalidou)
```
/srv/siemens-docs/proxy/ # Documentation Proxy (port 5000)
/srv/claude-assistant/ # MCP Server
/srv/claude-assistant/mcp-server/ # MCP server code
/srv/claude-assistant/tools/ # Tool implementations
├── siemens-auth.js # Puppeteer authentication
├── siemens-docs.js # Documentation fetching
└── ...
/srv/claude-assistant/vault/ # Credentials (secured)
```
### Available MCP Tools (once connected)
| Tool | Description |
|------|-------------|
| `siemens_docs_search` | Search NX Open, Simcenter docs |
| `siemens_docs_fetch` | Fetch specific documentation page |
| `siemens_auth_status` | Check if auth session is active |
| `siemens_login` | Re-login if session expired |
| `siemens_docs_list` | List documentation categories |
---
## Files Created During Investigation
- `C:\Users\antoi\Atomizer\add_dalidou_host.ps1` - Script to add hosts entry (run as Admin)
- `C:\Users\antoi\Atomizer\test_mcp.py` - Test script for probing MCP server (can be deleted)
---
## Related Documentation
- `.claude/skills/modules/nx-docs-lookup.md` - How to use MCP tools once configured
- `docs/08_ARCHIVE/historical/NXOPEN_DOCUMENTATION_INTEGRATION_STRATEGY.md` - Full strategy doc
- `docs/05_API_REFERENCE/NXOPEN_RESOURCES.md` - Alternative NXOpen resources
---
## Workaround Until Fixed
Without the MCP server, you can still look up NXOpen documentation by:
1. **Using web search** - I can search for NXOpen API documentation online
2. **Using local stub files** - Python stubs at `C:\Program Files\Siemens\NX2412\UGOPEN\pythonStubs\`
3. **Using existing extractors** - Check `optimization_engine/extractors/` for patterns
4. **Recording NX journals** - Record operations in NX to learn the API calls
---
*To continue setup, run the hosts file fix and let me know when ready.*

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,239 @@
# OP_07: Disk Space Optimization
**Version:** 1.0
**Last Updated:** 2025-12-29
## Overview
This protocol manages disk space for Atomizer studies through:
1. **Local cleanup** - Remove regenerable files from completed studies
2. **Remote archival** - Archive to dalidou server (14TB available)
3. **On-demand restore** - Pull archived studies when needed
## Disk Usage Analysis
### Typical Study Breakdown
| File Type | Size/Trial | Purpose | Keep? |
|-----------|------------|---------|-------|
| `.op2` | 68 MB | Nastran results | **YES** - Needed for analysis |
| `.prt` | 30 MB | NX parts | NO - Copy of master |
| `.dat` | 16 MB | Solver input | NO - Regenerable |
| `.fem` | 14 MB | FEM mesh | NO - Copy of master |
| `.sim` | 7 MB | Simulation | NO - Copy of master |
| `.afm` | 4 MB | Assembly FEM | NO - Regenerable |
| `.json` | <1 MB | Params/results | **YES** - Metadata |
| Logs | <1 MB | F04/F06/log | NO - Diagnostic only |
**Per-trial overhead:** ~150 MB total, only ~70 MB essential
### M1_Mirror Example
```
Current: 194 GB (28 studies, 2000+ trials)
After cleanup: 95 GB (51% reduction)
After archive: 5 GB (keep best_design_archive only)
```
## Commands
### 1. Analyze Disk Usage
```bash
# Single study
archive_study.bat analyze studies\M1_Mirror\m1_mirror_V12
# All studies in a project
archive_study.bat analyze studies\M1_Mirror
```
Output shows:
- Total size
- Essential vs deletable breakdown
- Trial count per study
- Per-extension analysis
### 2. Cleanup Completed Study
```bash
# Dry run (default) - see what would be deleted
archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12
# Actually delete
archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
```
**What gets deleted:**
- `.prt`, `.fem`, `.sim`, `.afm` in trial folders
- `.dat`, `.f04`, `.f06`, `.log`, `.diag` solver files
- Temp files (`.txt`, `.exp`, `.bak`)
**What is preserved:**
- `1_setup/` folder (master model)
- `3_results/` folder (database, reports)
- All `.op2` files (Nastran results)
- All `.json` files (params, metadata)
- All `.npz` files (Zernike coefficients)
- `best_design_archive/` folder
### 3. Archive to Remote Server
```bash
# Dry run
archive_study.bat archive studies\M1_Mirror\m1_mirror_V12
# Actually archive
archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
# Use Tailscale (when not on local network)
archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute --tailscale
```
**Process:**
1. Creates compressed `.tar.gz` archive
2. Uploads to `papa@192.168.86.50:/srv/storage/atomizer-archive/`
3. Deletes local archive after successful upload
### 4. List Remote Archives
```bash
archive_study.bat list
# Via Tailscale
archive_study.bat list --tailscale
```
### 5. Restore from Remote
```bash
# Restore to studies/ folder
archive_study.bat restore m1_mirror_V12
# Via Tailscale
archive_study.bat restore m1_mirror_V12 --tailscale
```
## Remote Server Setup
**Server:** dalidou (Lenovo W520)
- Local IP: `192.168.86.50`
- Tailscale IP: `100.80.199.40`
- SSH user: `papa`
- Archive path: `/srv/storage/atomizer-archive/`
### First-Time Setup
SSH into dalidou and create the archive directory:
```bash
ssh papa@192.168.86.50
mkdir -p /srv/storage/atomizer-archive
```
Ensure SSH key authentication is set up for passwordless transfers:
```bash
# On Windows (PowerShell)
ssh-copy-id papa@192.168.86.50
```
## Recommended Workflow
### During Active Optimization
Keep all files - you may need to re-run specific trials.
### After Study Completion
1. **Generate final report** (`STUDY_REPORT.md`)
2. **Archive best design** to `3_results/best_design_archive/`
3. **Cleanup:**
```bash
archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
```
### For Long-Term Storage
1. **After cleanup**, archive to server:
```bash
archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
```
2. **Optionally delete local** (keep only `3_results/best_design_archive/`)
### When Revisiting Old Study
1. **Restore:**
```bash
archive_study.bat restore m1_mirror_V12
```
2. If you need to re-run trials, the `1_setup/` master files allow regenerating everything
## Safety Features
- **Dry run by default** - Must add `--execute` to actually delete/transfer
- **Master files preserved** - `1_setup/` is never touched
- **Results preserved** - `3_results/` is never touched
- **Essential files preserved** - OP2, JSON, NPZ always kept
## Disk Space Targets
| Stage | M1_Mirror Target |
|-------|------------------|
| Active development | 200 GB (full) |
| Completed studies | 95 GB (after cleanup) |
| Archived (minimal local) | 5 GB (best only) |
| Server archive | 50 GB compressed |
## Troubleshooting
### SSH Connection Failed
```bash
# Test connectivity
ping 192.168.86.50
# Test SSH
ssh papa@192.168.86.50 "echo connected"
# If on different network, use Tailscale
ssh papa@100.80.199.40 "echo connected"
```
### Archive Upload Slow
Large studies (50+ GB) take time. The tool uses `rsync` with progress display.
For very large archives, consider running overnight or using direct LAN connection.
### Out of Disk Space During Archive
The archive is created locally first. Ensure you have ~1.5x the study size free:
- 20 GB study = ~30 GB temp space needed
## Python API
```python
from optimization_engine.utils.study_archiver import (
analyze_study,
cleanup_study,
archive_to_remote,
restore_from_remote,
list_remote_archives,
)
# Analyze
analysis = analyze_study(Path("studies/M1_Mirror/m1_mirror_V12"))
print(f"Deletable: {analysis['deletable_size']/1e9:.2f} GB")
# Cleanup (dry_run=False to actually delete)
cleanup_study(Path("studies/M1_Mirror/m1_mirror_V12"), dry_run=False)
# Archive
archive_to_remote(Path("studies/M1_Mirror/m1_mirror_V12"), dry_run=False)
# List remote
archives = list_remote_archives()
for a in archives:
print(f"{a['name']}: {a['size']}")
```

View File

@@ -0,0 +1,262 @@
# SYS_16: Self-Aware Turbo (SAT) Optimization
## Version: 1.0
## Status: PROPOSED
## Created: 2025-12-28
---
## Problem Statement
V5 surrogate + L-BFGS failed catastrophically because:
1. MLP predicted WS=280 but actual was WS=376 (30%+ error)
2. L-BFGS descended to regions **outside training distribution**
3. Surrogate had no way to signal uncertainty
4. All L-BFGS solutions converged to the same "fake optimum"
**Root cause:** The surrogate is overconfident in regions where it has no data.
---
## Solution: Uncertainty-Aware Surrogate with Active Learning
### Core Principles
1. **Never trust a point prediction** - Always require uncertainty bounds
2. **High uncertainty = run FEA** - Don't optimize where you don't know
3. **Actively fill gaps** - Prioritize FEA in high-uncertainty regions
4. **Validate gradient solutions** - Check L-BFGS results against FEA before trusting
---
## Architecture
### 1. Ensemble Surrogate (Epistemic Uncertainty)
Instead of one MLP, train **N independent models** with different initializations:
```python
class EnsembleSurrogate:
def __init__(self, n_models=5):
self.models = [MLP() for _ in range(n_models)]
def predict(self, x):
preds = [m.predict(x) for m in self.models]
mean = np.mean(preds, axis=0)
std = np.std(preds, axis=0) # Epistemic uncertainty
return mean, std
def is_confident(self, x, threshold=0.1):
mean, std = self.predict(x)
# Confident if std < 10% of mean
return (std / (mean + 1e-6)) < threshold
```
**Why this works:** Models trained on different random seeds will agree in well-sampled regions but disagree wildly in extrapolation regions.
### 2. Distance-Based OOD Detection
Track training data distribution and flag points that are "too far":
```python
class OODDetector:
def __init__(self, X_train):
self.X_train = X_train
self.mean = X_train.mean(axis=0)
self.std = X_train.std(axis=0)
# Fit KNN for local density
self.knn = NearestNeighbors(n_neighbors=5)
self.knn.fit(X_train)
def distance_to_training(self, x):
"""Return distance to nearest training points."""
distances, _ = self.knn.kneighbors(x.reshape(1, -1))
return distances.mean()
def is_in_distribution(self, x, threshold=2.0):
"""Check if point is within 2 std of training data."""
z_scores = np.abs((x - self.mean) / (self.std + 1e-6))
return z_scores.max() < threshold
```
### 3. Trust-Region L-BFGS
Constrain L-BFGS to stay within training distribution:
```python
def trust_region_lbfgs(surrogate, ood_detector, x0, max_iter=100):
"""L-BFGS that respects training data boundaries."""
def constrained_objective(x):
# If OOD, return large penalty
if not ood_detector.is_in_distribution(x):
return 1e9
mean, std = surrogate.predict(x)
# If uncertain, return upper confidence bound (pessimistic)
if std > 0.1 * mean:
return mean + 2 * std # Be conservative
return mean
result = minimize(constrained_objective, x0, method='L-BFGS-B')
return result.x
```
### 4. Acquisition Function with Uncertainty
Use **Expected Improvement with Uncertainty** (like Bayesian Optimization):
```python
def acquisition_score(x, surrogate, best_so_far):
"""Score = potential improvement weighted by confidence."""
mean, std = surrogate.predict(x)
# Expected improvement (lower is better for minimization)
improvement = best_so_far - mean
# Exploration bonus for uncertain regions
exploration = 0.5 * std
# High score = worth evaluating with FEA
return improvement + exploration
def select_next_fea_candidates(surrogate, candidates, best_so_far, n=5):
"""Select candidates balancing exploitation and exploration."""
scores = [acquisition_score(c, surrogate, best_so_far) for c in candidates]
# Pick top candidates by acquisition score
top_indices = np.argsort(scores)[-n:]
return [candidates[i] for i in top_indices]
```
---
## Algorithm: Self-Aware Turbo (SAT)
```
INITIALIZE:
- Load existing FEA data (X_train, Y_train)
- Train ensemble surrogate on data
- Fit OOD detector on X_train
- Set best_ws = min(Y_train)
PHASE 1: UNCERTAINTY MAPPING (10% of budget)
FOR i in 1..N_mapping:
- Sample random point x
- Get uncertainty: mean, std = surrogate.predict(x)
- If std > threshold: run FEA, add to training data
- Retrain ensemble periodically
This fills in the "holes" in the surrogate's knowledge.
PHASE 2: EXPLOITATION WITH VALIDATION (80% of budget)
FOR i in 1..N_exploit:
- Generate 1000 TPE samples
- Filter to keep only confident predictions (std < 10% of mean)
- Filter to keep only in-distribution (OOD check)
- Rank by predicted WS
- Take top 5 candidates
- Run FEA on all 5
- For each FEA result:
- Compare predicted vs actual
- If error > 20%: mark region as "unreliable", force exploration there
- If error < 10%: update best, retrain surrogate
- Every 10 iterations: retrain ensemble with new data
PHASE 3: L-BFGS REFINEMENT (10% of budget)
- Only run L-BFGS if ensemble R² > 0.95 on validation set
- Use trust-region L-BFGS (stay within training distribution)
FOR each L-BFGS solution:
- Check ensemble disagreement
- If models agree (std < 5%): run FEA to validate
- If models disagree: skip, too uncertain
- Compare L-BFGS prediction vs FEA
- If error > 15%: ABORT L-BFGS phase, return to Phase 2
- If error < 10%: accept as candidate
FINAL:
- Return best FEA-validated design
- Report uncertainty bounds for all objectives
```
---
## Key Differences from V5
| Aspect | V5 (Failed) | SAT (Proposed) |
|--------|-------------|----------------|
| **Model** | Single MLP | Ensemble of 5 MLPs |
| **Uncertainty** | None | Ensemble disagreement + OOD detection |
| **L-BFGS** | Trust blindly | Trust-region, validate every step |
| **Extrapolation** | Accept | Reject or penalize |
| **Active learning** | No | Yes - prioritize uncertain regions |
| **Validation** | After L-BFGS | Throughout |
---
## Implementation Checklist
1. [ ] `EnsembleSurrogate` class with N=5 MLPs
2. [ ] `OODDetector` with KNN + z-score checks
3. [ ] `acquisition_score()` balancing exploitation/exploration
4. [ ] Trust-region L-BFGS with OOD penalties
5. [ ] Automatic retraining when new FEA data arrives
6. [ ] Logging of prediction errors to track surrogate quality
7. [ ] Early abort if L-BFGS predictions consistently wrong
---
## Expected Behavior
**In well-sampled regions:**
- Ensemble agrees → Low uncertainty → Trust predictions
- L-BFGS finds valid optima → FEA confirms → Success
**In poorly-sampled regions:**
- Ensemble disagrees → High uncertainty → Run FEA instead
- L-BFGS penalized → Stays in trusted zone → No fake optima
**At distribution boundaries:**
- OOD detector flags → Reject predictions
- Acquisition prioritizes → Active learning fills gaps
---
## Metrics to Track
1. **Surrogate R² on validation set** - Target > 0.95 before L-BFGS
2. **Prediction error histogram** - Should be centered at 0
3. **OOD rejection rate** - How often we refuse to predict
4. **Ensemble disagreement** - Average std across predictions
5. **L-BFGS success rate** - % of L-BFGS solutions that validate
---
## When to Use SAT vs Pure TPE
| Scenario | Recommendation |
|----------|----------------|
| < 100 existing samples | Pure TPE (not enough for good surrogate) |
| 100-500 samples | SAT Phase 1-2 only (no L-BFGS) |
| > 500 samples | Full SAT with L-BFGS refinement |
| High-dimensional (>20 params) | Pure TPE (curse of dimensionality) |
| Noisy FEA | Pure TPE (surrogates struggle with noise) |
---
## References
- Gaussian Process literature on uncertainty quantification
- Deep Ensembles: Lakshminarayanan et al. (2017)
- Bayesian Optimization with Expected Improvement
- Trust-region methods for constrained optimization
---
*The key insight: A surrogate that knows when it doesn't know is infinitely more valuable than one that's confidently wrong.*

View File

@@ -3,3 +3,5 @@
{"timestamp":"2025-12-19T10:00:00","category":"workaround","context":"NX journal execution via cmd /c with environment variables fails silently or produces garbled output. Multiple attempts with cmd /c SET and && chaining failed to capture run_journal.exe output.","insight":"CRITICAL WORKAROUND: When executing NX journals from Claude Code on Windows, use PowerShell with [Environment]::SetEnvironmentVariable() method instead of cmd /c or $env: syntax. The correct pattern is: powershell -Command \"[Environment]::SetEnvironmentVariable('SPLM_LICENSE_SERVER', '28000@dalidou;28000@100.80.199.40', 'Process'); & 'C:\\Program Files\\Siemens\\DesigncenterNX2512\\NXBIN\\run_journal.exe' 'journal.py' -args 'arg1' 'arg2' 2>&1\". The $env: syntax gets corrupted when passed through bash (colon gets interpreted). The cmd /c SET syntax often fails to capture output. This PowerShell pattern reliably sets license server and captures all output.","confidence":1.0,"tags":["nx","powershell","run_journal","license-server","windows","cmd-workaround"],"severity":"high","rule":"ALWAYS use PowerShell with [Environment]::SetEnvironmentVariable() for NX journal execution. NEVER use cmd /c SET or $env: syntax for setting SPLM_LICENSE_SERVER."}
{"timestamp":"2025-12-19T15:30:00","category":"failure","context":"CMA-ES optimization V7 started with random sample instead of baseline. First trial had whiffle_min=45.73 instead of baseline 62.75, resulting in WS=329 instead of expected ~281.","insight":"CMA-ES with Optuna CmaEsSampler does NOT evaluate x0 (baseline) first - it samples AROUND x0 with sigma0 step size. The x0 parameter only sets the CENTER of the initial sampling distribution, not the first trial. To ensure baseline is evaluated first, use study.enqueue_trial(x0) after creating the study. This is critical for refinement studies where you need to compare against a known-good baseline. Pattern: if len(study.trials) == 0: study.enqueue_trial(x0)","confidence":1.0,"tags":["cma-es","optuna","baseline","x0","enqueue","optimization"],"severity":"high","rule":"When using CmaEsSampler with a known baseline, ALWAYS enqueue the baseline as trial 0 using study.enqueue_trial(x0). The x0 parameter alone does NOT guarantee baseline evaluation."}
{"timestamp":"2025-12-22T14:00:00","category":"failure","context":"V10 mirror optimization reported impossibly good relative WFE values (40-20=1.99nm instead of ~6nm, 60-20=6.82nm instead of ~13nm). User noticed results were 'too good to be true'.","insight":"CRITICAL BUG IN RELATIVE WFE CALCULATION: The V10 run_optimization.py computed relative WFE as abs(RMS_target - RMS_ref) instead of RMS(WFE_target - WFE_ref). This is mathematically WRONG because |RMS(A) - RMS(B)| ≠ RMS(A - B). The correct approach is to compute the node-by-node WFE difference FIRST, then fit Zernike to the difference field, then compute RMS. The bug gave values 3-4x lower than correct values because the 20° reference had HIGHER absolute WFE than 40°/60°, so the subtraction gave negative values, and abs() hid the problem. The fix is to use extractor.extract_relative() which correctly computes node-by-node differences. Both ZernikeExtractor and ZernikeOPDExtractor now have extract_relative() methods.","confidence":1.0,"tags":["zernike","wfe","relative-wfe","extract_relative","critical-bug","v10"],"severity":"critical","rule":"NEVER compute relative WFE as abs(RMS_target - RMS_ref). ALWAYS use extract_relative() which computes RMS(WFE_target - WFE_ref) by doing node-by-node subtraction first, then Zernike fitting, then RMS."}
{"timestamp":"2025-12-28T17:30:00","category":"failure","context":"V5 turbo optimization created from scratch instead of copying V4. Multiple critical components were missing or wrong: no license server, wrong extraction keys (filtered_rms_nm vs relative_filtered_rms_nm), wrong mfg_90 key, missing figure_path parameter, incomplete version regex.","insight":"STUDY DERIVATION FAILURE: When creating a new study version (V5 from V4), NEVER rewrite the run_optimization.py from scratch. ALWAYS copy the working version first, then add/modify only the new feature (e.g., L-BFGS polish). Rewriting caused 5 independent bugs: (1) missing LICENSE_SERVER setup, (2) wrong extraction key filtered_rms_nm instead of relative_filtered_rms_nm, (3) wrong mfg_90 key, (4) missing figure_path=None in extractor call, (5) incomplete version regex missing DesigncenterNX pattern. The FEA/extraction pipeline is PROVEN CODE - never rewrite it. Only add new optimization strategies as modules on top.","confidence":1.0,"tags":["study-creation","copy-dont-rewrite","extraction","license-server","v5","critical"],"severity":"critical","rule":"When deriving a new study version, COPY the entire working run_optimization.py first. Add new features as ADDITIONS, not rewrites. The FEA pipeline (license, NXSolver setup, extraction) is proven - never rewrite it."}
{"timestamp":"2025-12-28T21:30:00","category":"failure","context":"V5 flat back turbo optimization with MLP surrogate + L-BFGS polish. Surrogate predicted WS~280 but actual FEA gave WS~365-377. Error of 85-96 (30%+ relative error). All L-BFGS solutions converged to same fake optimum that didn't exist in reality.","insight":"SURROGATE + L-BFGS FAILURE MODE: Gradient-based optimization on MLP surrogates finds 'fake optima' that don't exist in real FEA. The surrogate has smooth gradients everywhere, but L-BFGS descends to regions OUTSIDE the training distribution where predictions are wildly wrong. V5 results: (1) Best TPE trial: WS=290.18, (2) Best L-BFGS trial: WS=325.27, (3) Worst L-BFGS trials: WS=376.52. The fancy L-BFGS polish made results WORSE than random TPE. Key issues: (a) No uncertainty quantification - can't detect out-of-distribution, (b) No mass constraint in surrogate - L-BFGS finds infeasible designs (122-124kg vs 120kg limit), (c) L-BFGS converges to same bad point from multiple starting locations (trials 31-44 all gave WS=376.52).","confidence":1.0,"tags":["surrogate","mlp","lbfgs","gradient-descent","fake-optima","out-of-distribution","v5","turbo"],"severity":"critical","rule":"NEVER trust gradient descent on surrogates without: (1) Uncertainty quantification to reject OOD predictions, (2) Mass/constraint prediction to enforce feasibility, (3) Trust-region to stay within training distribution. Pure TPE with real FEA often beats surrogate+gradient methods."}

View File

@@ -5,3 +5,5 @@
{"timestamp": "2025-12-28T10:15:00", "category": "success_pattern", "context": "Unified trial management with TrialManager and DashboardDB", "insight": "TRIAL MANAGEMENT PATTERN: Use TrialManager for consistent trial_NNNN naming across all optimization methods (Optuna, Turbo, GNN, manual). Key principles: (1) Trial numbers NEVER reset (monotonic), (2) Folders NEVER get overwritten, (3) Database always synced with filesystem, (4) Surrogate predictions are NOT trials - only FEA results. DashboardDB provides Optuna-compatible schema for dashboard integration. Path: optimization_engine/utils/trial_manager.py", "confidence": 0.95, "tags": ["trial_manager", "dashboard_db", "optuna", "trial_naming", "turbo"]}
{"timestamp": "2025-12-28T10:15:00", "category": "success_pattern", "context": "GNN Turbo training data loading from multiple studies", "insight": "MULTI-STUDY TRAINING: When loading training data from multiple prior studies for GNN surrogate training, param names may have unit prefixes like '[mm]rib_thickness' or '[Degrees]angle'. Strip prefixes: if ']' in name: name = name.split(']', 1)[1]. Also, objective attribute names vary between studies (rel_filtered_rms_40_vs_20 vs obj_rel_filtered_rms_40_vs_20) - use fallback chain with 'or'. V5 successfully trained on 316 samples (V3: 297, V4: 19) with R²=[0.94, 0.94, 0.89, 0.95].", "confidence": 0.9, "tags": ["gnn", "turbo", "training_data", "multi_study", "param_naming"]}
{"timestamp": "2025-12-28T12:28:04.706624", "category": "success_pattern", "context": "Implemented L-BFGS gradient optimizer for surrogate polish phase", "insight": "L-BFGS on trained MLP surrogates provides 100-1000x faster convergence than derivative-free methods (TPE, CMA-ES) for local refinement. Key: use multi-start from top FEA candidates, not random initialization. Integration: GradientOptimizer class in optimization_engine/gradient_optimizer.py.", "confidence": 0.9, "tags": ["optimization", "lbfgs", "surrogate", "gradient", "polish"]}
{"timestamp": "2025-12-29T09:30:00", "category": "success_pattern", "context": "V6 pure TPE outperformed V5 surrogate+L-BFGS by 22%", "insight": "SIMPLE BEATS COMPLEX: V6 Pure TPE achieved WS=225.41 vs V5's WS=290.18 (22.3% better). Key insight: surrogates fail when gradient methods descend to OOD regions. Fix: EnsembleSurrogate with (1) N=5 MLPs for disagreement-based uncertainty, (2) OODDetector with KNN+z-score, (3) acquisition_score balancing exploitation+exploration, (4) trust-region L-BFGS that stays in training distribution. Never trust point predictions - always require uncertainty bounds. Protocol: SYS_16_SELF_AWARE_TURBO.md. Code: optimization_engine/surrogates/ensemble_surrogate.py", "confidence": 1.0, "tags": ["ensemble", "uncertainty", "ood", "surrogate", "v6", "tpe", "self-aware"]}
{"timestamp": "2025-12-29T09:47:47.612485", "category": "success_pattern", "context": "Disk space optimization for FEA studies", "insight": "Per-trial FEA files are ~150MB but only OP2+JSON (~70MB) are essential. PRT/FEM/SIM/DAT are copies of master files and can be deleted after study completion. Archive to dalidou server for long-term storage.", "confidence": 0.95, "tags": ["disk_optimization", "archival", "study_management", "dalidou"], "related_files": ["optimization_engine/utils/study_archiver.py", "docs/protocols/operations/OP_07_DISK_OPTIMIZATION.md"]}

View File

@@ -242,19 +242,28 @@ class NXSolver:
Format: [unit]name=value
Example: [mm]whiffle_min=42.5
"""
# Default unit mapping (could be extended or made configurable)
# Default unit mapping - MUST match NX model expression units exactly
# Verified against working turbo V1 runs
UNIT_MAPPING = {
# Length parameters (mm)
'whiffle_min': 'mm',
'whiffle_triangle_closeness': 'mm',
'inner_circular_rib_dia': 'mm',
'outer_circular_rib_offset_from_outer': 'mm',
'Pocket_Radius': 'mm',
'center_thickness': 'mm',
# Lateral pivot/closeness - mm in NX model (verified from V1)
'lateral_outer_pivot': 'mm',
'lateral_inner_pivot': 'mm',
'lateral_middle_pivot': 'mm',
'lateral_closeness': 'mm',
# Angle parameters (degrees)
'whiffle_outer_to_vertical': 'Degrees',
# Rib/face thickness parameters (mm)
'rib_thickness': 'mm',
'ribs_circular_thk': 'mm',
'rib_thickness_lateral_truss': 'mm',
'mirror_face_thickness': 'mm',
# Angle parameters (Degrees) - verified from working V1 runs
'whiffle_outer_to_vertical': 'Degrees', # NX expects Degrees (verified V1)
'lateral_inner_angle': 'Degrees',
'lateral_outer_angle': 'Degrees',
'blank_backface_angle': 'Degrees',

View File

@@ -0,0 +1,19 @@
"""
Surrogate models for FEA acceleration.
Available surrogates:
- EnsembleSurrogate: Multiple MLPs with uncertainty quantification
- OODDetector: Out-of-distribution detection
"""
from .ensemble_surrogate import (
EnsembleSurrogate,
OODDetector,
create_and_train_ensemble
)
__all__ = [
'EnsembleSurrogate',
'OODDetector',
'create_and_train_ensemble'
]

View File

@@ -0,0 +1,540 @@
#!/usr/bin/env python3
"""
Ensemble Surrogate with Uncertainty Quantification
Addresses the V5 failure mode where single MLPs gave overconfident predictions
in out-of-distribution regions, leading L-BFGS to fake optima.
Key features:
1. Ensemble of N MLPs - disagreement = uncertainty
2. OOD detection - reject predictions far from training data
3. Confidence bounds - never trust point predictions alone
4. Active learning - prioritize FEA in uncertain regions
Author: Atomizer
Created: 2025-12-28
"""
import numpy as np
from typing import Tuple, List, Dict, Optional
from pathlib import Path
import json
import logging
logger = logging.getLogger(__name__)
try:
import torch
import torch.nn as nn
HAS_TORCH = True
except ImportError:
HAS_TORCH = False
logger.warning("PyTorch not available - ensemble features limited")
from sklearn.neighbors import NearestNeighbors
class MLP(nn.Module):
"""Single MLP for ensemble."""
def __init__(self, input_dim: int, output_dim: int, hidden_dims: List[int] = None):
super().__init__()
hidden_dims = hidden_dims or [64, 32]
layers = []
in_dim = input_dim
for h_dim in hidden_dims:
layers.append(nn.Linear(in_dim, h_dim))
layers.append(nn.ReLU())
layers.append(nn.Dropout(0.1))
in_dim = h_dim
layers.append(nn.Linear(in_dim, output_dim))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
class OODDetector:
"""
Out-of-Distribution detector using multiple methods:
1. Z-score check (is input within N std of training mean)
2. KNN distance (is input close to training points)
"""
def __init__(self, X_train: np.ndarray, z_threshold: float = 3.0, knn_k: int = 5):
self.X_train = X_train
self.z_threshold = z_threshold
self.knn_k = knn_k
# Compute training statistics
self.mean = X_train.mean(axis=0)
self.std = X_train.std(axis=0) + 1e-8
# Fit KNN for local density estimation
self.knn = NearestNeighbors(n_neighbors=min(knn_k, len(X_train)))
self.knn.fit(X_train)
# Compute typical KNN distances in training set
train_distances, _ = self.knn.kneighbors(X_train)
self.typical_knn_dist = np.median(train_distances.mean(axis=1))
logger.info(f"[OOD] Initialized with {len(X_train)} training points")
logger.info(f"[OOD] Typical KNN distance: {self.typical_knn_dist:.4f}")
def z_score_check(self, x: np.ndarray) -> Tuple[bool, float]:
"""Check if point is within z_threshold std of training mean."""
x = np.atleast_2d(x)
z_scores = np.abs((x - self.mean) / self.std)
max_z = z_scores.max(axis=1)
is_ok = max_z < self.z_threshold
return is_ok[0] if len(is_ok) == 1 else is_ok, max_z[0] if len(max_z) == 1 else max_z
def knn_distance_check(self, x: np.ndarray) -> Tuple[bool, float]:
"""Check if point is close enough to training data."""
x = np.atleast_2d(x)
distances, _ = self.knn.kneighbors(x)
avg_dist = distances.mean(axis=1)
# Allow up to 3x typical distance
is_ok = avg_dist < 3 * self.typical_knn_dist
return is_ok[0] if len(is_ok) == 1 else is_ok, avg_dist[0] if len(avg_dist) == 1 else avg_dist
def is_in_distribution(self, x: np.ndarray) -> Tuple[bool, Dict]:
"""Combined OOD check."""
z_ok, z_score = self.z_score_check(x)
knn_ok, knn_dist = self.knn_distance_check(x)
is_ok = z_ok and knn_ok
details = {
'z_score': float(z_score),
'z_ok': bool(z_ok),
'knn_dist': float(knn_dist),
'knn_ok': bool(knn_ok),
'in_distribution': bool(is_ok)
}
return is_ok, details
class EnsembleSurrogate:
"""
Ensemble of MLPs with uncertainty quantification.
Key insight: Models trained with different random seeds will agree
in well-sampled regions but disagree in extrapolation regions.
Disagreement = epistemic uncertainty.
"""
def __init__(
self,
input_dim: int,
output_dim: int,
n_models: int = 5,
hidden_dims: List[int] = None,
device: str = 'auto'
):
self.input_dim = input_dim
self.output_dim = output_dim
self.n_models = n_models
self.hidden_dims = hidden_dims or [64, 32]
if device == 'auto':
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
else:
self.device = torch.device(device)
# Create ensemble
self.models = [
MLP(input_dim, output_dim, hidden_dims).to(self.device)
for _ in range(n_models)
]
# Normalization stats
self.x_mean = None
self.x_std = None
self.y_mean = None
self.y_std = None
# OOD detector
self.ood_detector = None
# Training state
self.is_trained = False
logger.info(f"[ENSEMBLE] Created {n_models} MLPs on {self.device}")
def train(
self,
X: np.ndarray,
Y: np.ndarray,
epochs: int = 500,
lr: float = 0.001,
val_split: float = 0.1,
patience: int = 50
) -> Dict:
"""Train all models in ensemble with different random seeds."""
# Compute normalization
self.x_mean = X.mean(axis=0)
self.x_std = X.std(axis=0) + 1e-8
self.y_mean = Y.mean(axis=0)
self.y_std = Y.std(axis=0) + 1e-8
X_norm = (X - self.x_mean) / self.x_std
Y_norm = (Y - self.y_mean) / self.y_std
# Split data
n_val = max(int(len(X) * val_split), 5)
indices = np.random.permutation(len(X))
val_idx, train_idx = indices[:n_val], indices[n_val:]
X_train, Y_train = X_norm[train_idx], Y_norm[train_idx]
X_val, Y_val = X_norm[val_idx], Y_norm[val_idx]
# Convert to tensors
X_t = torch.FloatTensor(X_train).to(self.device)
Y_t = torch.FloatTensor(Y_train).to(self.device)
X_v = torch.FloatTensor(X_val).to(self.device)
Y_v = torch.FloatTensor(Y_val).to(self.device)
# Train each model with different seed
all_val_losses = []
for i, model in enumerate(self.models):
torch.manual_seed(42 + i * 1000) # Different init per model
np.random.seed(42 + i * 1000)
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
criterion = nn.MSELoss()
best_val_loss = float('inf')
patience_counter = 0
best_state = None
for epoch in range(epochs):
# Train
model.train()
optimizer.zero_grad()
pred = model(X_t)
loss = criterion(pred, Y_t)
loss.backward()
optimizer.step()
# Validate
model.eval()
with torch.no_grad():
val_pred = model(X_v)
val_loss = criterion(val_pred, Y_v).item()
# Early stopping
if val_loss < best_val_loss:
best_val_loss = val_loss
patience_counter = 0
best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
else:
patience_counter += 1
if patience_counter >= patience:
break
# Restore best
if best_state:
model.load_state_dict(best_state)
model.to(self.device)
all_val_losses.append(best_val_loss)
logger.info(f"[ENSEMBLE] Model {i+1}/{self.n_models} trained, val_loss={best_val_loss:.4f}")
# Initialize OOD detector
self.ood_detector = OODDetector(X_norm)
self.is_trained = True
# Compute ensemble metrics
metrics = self._compute_metrics(X_val, Y_val)
metrics['val_losses'] = all_val_losses
return metrics
def _compute_metrics(self, X_val: np.ndarray, Y_val: np.ndarray) -> Dict:
"""Compute R², MAE, and ensemble disagreement on validation set."""
mean, std = self.predict_normalized(X_val)
# R² for each output
ss_res = np.sum((Y_val - mean) ** 2, axis=0)
ss_tot = np.sum((Y_val - Y_val.mean(axis=0)) ** 2, axis=0)
r2 = 1 - ss_res / (ss_tot + 1e-8)
# MAE
mae = np.abs(Y_val - mean).mean(axis=0)
# Average ensemble disagreement
avg_std = std.mean()
return {
'r2': r2.tolist(),
'mae': mae.tolist(),
'avg_ensemble_std': float(avg_std),
'n_val': len(X_val)
}
def predict_normalized(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Predict on normalized inputs, return normalized outputs."""
X = np.atleast_2d(X)
X_t = torch.FloatTensor(X).to(self.device)
preds = []
for model in self.models:
model.eval()
with torch.no_grad():
pred = model(X_t).cpu().numpy()
preds.append(pred)
preds = np.array(preds) # (n_models, n_samples, n_outputs)
mean = preds.mean(axis=0)
std = preds.std(axis=0)
return mean, std
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
Predict with uncertainty.
Returns:
mean: (n_samples, n_outputs) predicted values
std: (n_samples, n_outputs) uncertainty (ensemble disagreement)
"""
X = np.atleast_2d(X)
# Normalize input
X_norm = (X - self.x_mean) / self.x_std
# Get predictions
mean_norm, std_norm = self.predict_normalized(X_norm)
# Denormalize
mean = mean_norm * self.y_std + self.y_mean
std = std_norm * self.y_std # Std scales with y_std
return mean, std
def predict_with_confidence(self, X: np.ndarray) -> Dict:
"""
Full prediction with confidence assessment.
Returns dict with:
- mean: predicted values
- std: uncertainty
- confidence: 0-1 score (higher = more reliable)
- in_distribution: OOD check result
- recommendation: 'trust', 'verify', or 'reject'
"""
X = np.atleast_2d(X)
mean, std = self.predict(X)
# OOD check
X_norm = (X - self.x_mean) / self.x_std
ood_results = [self.ood_detector.is_in_distribution(x) for x in X_norm]
in_distribution = [r[0] for r in ood_results]
# Compute confidence score (0 = no confidence, 1 = high confidence)
# Based on: relative std (lower = better) and OOD (in = better)
relative_std = std / (np.abs(mean) + 1e-6)
avg_rel_std = relative_std.mean(axis=1)
confidence = np.zeros(len(X))
for i in range(len(X)):
if not in_distribution[i]:
confidence[i] = 0.0 # OOD = no confidence
elif avg_rel_std[i] > 0.3:
confidence[i] = 0.2 # High uncertainty
elif avg_rel_std[i] > 0.1:
confidence[i] = 0.5 # Medium uncertainty
else:
confidence[i] = 0.9 # Low uncertainty
# Recommendations
recommendations = []
for i in range(len(X)):
if confidence[i] >= 0.7:
recommendations.append('trust')
elif confidence[i] >= 0.3:
recommendations.append('verify') # Run FEA to check
else:
recommendations.append('reject') # Don't use, run FEA instead
return {
'mean': mean,
'std': std,
'confidence': confidence,
'in_distribution': in_distribution,
'recommendation': recommendations
}
def acquisition_score(self, X: np.ndarray, best_so_far: float, xi: float = 0.01) -> np.ndarray:
"""
Expected Improvement acquisition function.
High score = worth running FEA (either promising or uncertain).
Args:
X: candidate points
best_so_far: current best objective value
xi: exploration-exploitation tradeoff (higher = more exploration)
Returns:
scores: acquisition score per point
"""
X = np.atleast_2d(X)
mean, std = self.predict(X)
# For minimization: improvement = best - predicted
# Take first objective (weighted sum) for acquisition
if mean.ndim > 1:
mean = mean[:, 0]
std = std[:, 0]
improvement = best_so_far - mean
# Expected improvement with exploration bonus
# Higher std = more exploration value
z = improvement / (std + 1e-8)
# Simple acquisition: exploitation + exploration
scores = improvement + xi * std
# Penalize OOD points
X_norm = (X - self.x_mean) / self.x_std
for i, x in enumerate(X_norm):
is_ok, _ = self.ood_detector.is_in_distribution(x)
if not is_ok:
scores[i] *= 0.1 # Heavy penalty for OOD
return scores
def select_candidates_for_fea(
self,
candidates: np.ndarray,
best_so_far: float,
n_select: int = 5,
diversity_weight: float = 0.3
) -> Tuple[np.ndarray, np.ndarray]:
"""
Select diverse, high-acquisition candidates for FEA validation.
Balances:
1. High acquisition score (exploitation + exploration)
2. Diversity (don't cluster all candidates together)
3. In-distribution (avoid OOD predictions)
Returns:
selected: indices of selected candidates
scores: acquisition scores
"""
scores = self.acquisition_score(candidates, best_so_far)
# Greedy selection with diversity
selected = []
remaining = list(range(len(candidates)))
while len(selected) < n_select and remaining:
if not selected:
# First: pick highest score
best_idx = max(remaining, key=lambda i: scores[i])
else:
# Later: balance score with distance to selected
def combined_score(i):
# Min distance to already selected
min_dist = min(
np.linalg.norm(candidates[i] - candidates[j])
for j in selected
)
# Combine acquisition + diversity
return scores[i] + diversity_weight * min_dist
best_idx = max(remaining, key=combined_score)
selected.append(best_idx)
remaining.remove(best_idx)
return np.array(selected), scores[selected]
def save(self, path: Path):
"""Save ensemble to disk."""
path = Path(path)
path.mkdir(parents=True, exist_ok=True)
# Save each model
for i, model in enumerate(self.models):
torch.save(model.state_dict(), path / f"model_{i}.pt")
# Save normalization stats and config
config = {
'input_dim': self.input_dim,
'output_dim': self.output_dim,
'n_models': self.n_models,
'hidden_dims': self.hidden_dims,
'x_mean': self.x_mean.tolist() if self.x_mean is not None else None,
'x_std': self.x_std.tolist() if self.x_std is not None else None,
'y_mean': self.y_mean.tolist() if self.y_mean is not None else None,
'y_std': self.y_std.tolist() if self.y_std is not None else None,
}
with open(path / "config.json", 'w') as f:
json.dump(config, f, indent=2)
logger.info(f"[ENSEMBLE] Saved to {path}")
@classmethod
def load(cls, path: Path, device: str = 'auto') -> 'EnsembleSurrogate':
"""Load ensemble from disk."""
path = Path(path)
with open(path / "config.json") as f:
config = json.load(f)
surrogate = cls(
input_dim=config['input_dim'],
output_dim=config['output_dim'],
n_models=config['n_models'],
hidden_dims=config['hidden_dims'],
device=device
)
# Load normalization
surrogate.x_mean = np.array(config['x_mean']) if config['x_mean'] else None
surrogate.x_std = np.array(config['x_std']) if config['x_std'] else None
surrogate.y_mean = np.array(config['y_mean']) if config['y_mean'] else None
surrogate.y_std = np.array(config['y_std']) if config['y_std'] else None
# Load models
for i, model in enumerate(surrogate.models):
model.load_state_dict(torch.load(path / f"model_{i}.pt", map_location=surrogate.device))
model.to(surrogate.device)
surrogate.is_trained = True
logger.info(f"[ENSEMBLE] Loaded from {path}")
return surrogate
# Convenience function for quick usage
def create_and_train_ensemble(
X: np.ndarray,
Y: np.ndarray,
n_models: int = 5,
epochs: int = 500
) -> EnsembleSurrogate:
"""Create and train an ensemble surrogate."""
surrogate = EnsembleSurrogate(
input_dim=X.shape[1],
output_dim=Y.shape[1] if Y.ndim > 1 else 1,
n_models=n_models
)
if Y.ndim == 1:
Y = Y.reshape(-1, 1)
metrics = surrogate.train(X, Y, epochs=epochs)
logger.info(f"[ENSEMBLE] Training complete: R²={metrics['r2']}, avg_std={metrics['avg_ensemble_std']:.4f}")
return surrogate

View File

@@ -24,6 +24,7 @@ SESSION_LOCK_DIR = Path(os.environ.get('TEMP', '/tmp')) / 'atomizer_nx_sessions'
# Default NX installation paths (in order of preference)
DEFAULT_NX_PATHS = [
Path(r"C:\Program Files\Siemens\DesigncenterNX2512\NXBIN\ugraf.exe"), # DesignCenter (preferred)
Path(r"C:\Program Files\Siemens\NX2506\NXBIN\ugraf.exe"),
Path(r"C:\Program Files\Siemens\NX2412\NXBIN\ugraf.exe"),
Path(r"C:\Program Files\Siemens\Simcenter3D_2506\NXBIN\ugraf.exe"),

View File

@@ -0,0 +1,438 @@
"""
Study Archiver - Disk Space Optimization for Atomizer Studies
This module provides utilities for:
1. Cleaning up completed studies (removing regenerable files)
2. Archiving studies to remote storage (dalidou server)
3. Restoring archived studies on-demand
Usage:
# Cleanup a completed study (keep only essential files)
python -m optimization_engine.utils.study_archiver cleanup studies/M1_Mirror/m1_mirror_V12
# Archive to remote server
python -m optimization_engine.utils.study_archiver archive studies/M1_Mirror/m1_mirror_V12
# Restore from remote
python -m optimization_engine.utils.study_archiver restore m1_mirror_V12
# Show disk usage analysis
python -m optimization_engine.utils.study_archiver analyze studies/M1_Mirror
"""
import os
import json
import shutil
import tarfile
import subprocess
from pathlib import Path
from datetime import datetime
from typing import Optional, Dict, List, Tuple
import logging
logger = logging.getLogger(__name__)
# Configuration
REMOTE_CONFIG = {
"host": "192.168.86.50", # Local WiFi
"host_tailscale": "100.80.199.40", # Remote via Tailscale
"user": "papa",
"archive_path": "/srv/storage/atomizer-archive",
"ssh_port": 22,
}
# Files to KEEP per trial (essential for analysis)
ESSENTIAL_EXTENSIONS = {
'.op2', # Nastran binary results (Zernike extraction)
'.json', # Parameters, results, metadata
'.npz', # Pre-computed Zernike coefficients
'.html', # Generated reports
'.png', # Visualization images
'.csv', # Exported data
}
# Files to DELETE per trial (regenerable from master + params)
DELETABLE_EXTENSIONS = {
'.prt', # NX part files (copy of master)
'.fem', # FEM mesh files (copy of master)
'.sim', # Simulation files (copy of master)
'.afm', # Assembly FEM files
'.dat', # Solver input deck (can regenerate)
'.f04', # Nastran output log
'.f06', # Nastran printed output
'.log', # Generic log files
'.diag', # Diagnostic files
'.txt', # Temp text files
'.exp', # Expression files
'.bak', # Backup files
}
# Folders to always keep entirely
KEEP_FOLDERS = {
'1_setup', # Master model files (source of truth)
'3_results', # Final results, database, reports
'best_design_archive', # Archived best designs
}
def analyze_study(study_path: Path) -> Dict:
"""Analyze disk usage of a study folder."""
study_path = Path(study_path)
analysis = {
"study_name": study_path.name,
"total_size_bytes": 0,
"by_extension": {},
"by_folder": {},
"essential_size": 0,
"deletable_size": 0,
"trial_count": 0,
}
for f in study_path.rglob("*"):
if f.is_file():
sz = f.stat().st_size
ext = f.suffix.lower()
analysis["total_size_bytes"] += sz
analysis["by_extension"][ext] = analysis["by_extension"].get(ext, 0) + sz
# Categorize by folder
rel_parts = f.relative_to(study_path).parts
if rel_parts:
folder = rel_parts[0]
analysis["by_folder"][folder] = analysis["by_folder"].get(folder, 0) + sz
# Essential vs deletable
if ext in ESSENTIAL_EXTENSIONS:
analysis["essential_size"] += sz
elif ext in DELETABLE_EXTENSIONS:
analysis["deletable_size"] += sz
# Count trials
iterations_dir = study_path / "2_iterations"
if iterations_dir.exists():
analysis["trial_count"] = len([
d for d in iterations_dir.iterdir()
if d.is_dir() and (d.name.startswith("trial_") or d.name.startswith("iter"))
])
return analysis
def print_analysis(analysis: Dict):
"""Print formatted analysis results."""
total_gb = analysis["total_size_bytes"] / 1e9
essential_gb = analysis["essential_size"] / 1e9
deletable_gb = analysis["deletable_size"] / 1e9
print(f"\n{'='*60}")
print(f"Study: {analysis['study_name']}")
print(f"{'='*60}")
print(f"Total size: {total_gb:8.2f} GB")
print(f"Trials: {analysis['trial_count']:8d}")
print(f"Essential: {essential_gb:8.2f} GB ({100*essential_gb/total_gb:.1f}%)")
print(f"Deletable: {deletable_gb:8.2f} GB ({100*deletable_gb/total_gb:.1f}%)")
print(f"Potential save: {deletable_gb:8.2f} GB")
print(f"\nBy folder:")
for folder, size in sorted(analysis["by_folder"].items(), key=lambda x: -x[1]):
print(f" {folder:25} {size/1e9:8.2f} GB")
print(f"\nTop extensions:")
for ext, size in sorted(analysis["by_extension"].items(), key=lambda x: -x[1])[:10]:
status = "[KEEP]" if ext in ESSENTIAL_EXTENSIONS else "[DEL?]" if ext in DELETABLE_EXTENSIONS else "[ ]"
print(f" {status} {ext:10} {size/1e9:8.2f} GB")
def cleanup_study(study_path: Path, dry_run: bool = True) -> Tuple[int, int]:
"""
Clean up a completed study by removing regenerable files from trial folders.
Args:
study_path: Path to study folder
dry_run: If True, only report what would be deleted
Returns:
(files_deleted, bytes_freed)
"""
study_path = Path(study_path)
iterations_dir = study_path / "2_iterations"
if not iterations_dir.exists():
logger.warning(f"No iterations folder found in {study_path}")
return 0, 0
files_to_delete = []
bytes_to_free = 0
# Find all deletable files in trial folders
for trial_dir in iterations_dir.iterdir():
if not trial_dir.is_dir():
continue
for f in trial_dir.iterdir():
if f.is_file() and f.suffix.lower() in DELETABLE_EXTENSIONS:
files_to_delete.append(f)
bytes_to_free += f.stat().st_size
if dry_run:
print(f"\n[DRY RUN] Would delete {len(files_to_delete)} files, freeing {bytes_to_free/1e9:.2f} GB")
print("\nSample files to delete:")
for f in files_to_delete[:10]:
print(f" {f.relative_to(study_path)}")
if len(files_to_delete) > 10:
print(f" ... and {len(files_to_delete) - 10} more")
return 0, 0
# Actually delete
deleted = 0
freed = 0
for f in files_to_delete:
try:
sz = f.stat().st_size
f.unlink()
deleted += 1
freed += sz
except Exception as e:
logger.error(f"Failed to delete {f}: {e}")
print(f"Deleted {deleted} files, freed {freed/1e9:.2f} GB")
return deleted, freed
def archive_to_remote(
study_path: Path,
use_tailscale: bool = False,
dry_run: bool = True
) -> bool:
"""
Archive a study to the remote dalidou server.
Args:
study_path: Path to study folder
use_tailscale: Use Tailscale IP (for remote access)
dry_run: If True, only report what would be done
Returns:
True if successful
"""
study_path = Path(study_path)
study_name = study_path.name
host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
user = REMOTE_CONFIG["user"]
remote_path = REMOTE_CONFIG["archive_path"]
# Create compressed archive locally first
archive_name = f"{study_name}_{datetime.now().strftime('%Y%m%d')}.tar.gz"
local_archive = study_path.parent / archive_name
if dry_run:
print(f"\n[DRY RUN] Would archive {study_name}")
print(f" 1. Create {archive_name}")
print(f" 2. Upload to {user}@{host}:{remote_path}/")
print(f" 3. Delete local archive")
return True
print(f"Creating archive: {archive_name}")
with tarfile.open(local_archive, "w:gz") as tar:
tar.add(study_path, arcname=study_name)
archive_size = local_archive.stat().st_size
print(f"Archive size: {archive_size/1e9:.2f} GB")
# Upload via rsync (more reliable than scp for large files)
print(f"Uploading to {host}...")
# First ensure remote directory exists
ssh_cmd = f'ssh {user}@{host} "mkdir -p {remote_path}"'
subprocess.run(ssh_cmd, shell=True, check=True)
# Upload
rsync_cmd = f'rsync -avz --progress "{local_archive}" {user}@{host}:{remote_path}/'
result = subprocess.run(rsync_cmd, shell=True)
if result.returncode == 0:
print("Upload successful!")
# Clean up local archive
local_archive.unlink()
return True
else:
print(f"Upload failed with code {result.returncode}")
return False
def restore_from_remote(
study_name: str,
target_dir: Path,
use_tailscale: bool = False
) -> bool:
"""
Restore a study from the remote server.
Args:
study_name: Name of the study to restore
target_dir: Where to extract the study
use_tailscale: Use Tailscale IP
Returns:
True if successful
"""
host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
user = REMOTE_CONFIG["user"]
remote_path = REMOTE_CONFIG["archive_path"]
target_dir = Path(target_dir)
# Find the archive on remote
print(f"Looking for {study_name} on {host}...")
ssh_cmd = f'ssh {user}@{host} "ls {remote_path}/{study_name}*.tar.gz 2>/dev/null | head -1"'
result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
if not result.stdout.strip():
print(f"No archive found for {study_name}")
return False
remote_archive = result.stdout.strip()
local_archive = target_dir / Path(remote_archive).name
print(f"Downloading: {remote_archive}")
rsync_cmd = f'rsync -avz --progress {user}@{host}:"{remote_archive}" "{local_archive}"'
result = subprocess.run(rsync_cmd, shell=True)
if result.returncode != 0:
print("Download failed")
return False
print("Extracting...")
with tarfile.open(local_archive, "r:gz") as tar:
tar.extractall(target_dir)
# Clean up
local_archive.unlink()
print(f"Restored to {target_dir / study_name}")
return True
def list_remote_archives(use_tailscale: bool = False) -> List[Dict]:
"""List all archived studies on the remote server."""
host = REMOTE_CONFIG["host_tailscale"] if use_tailscale else REMOTE_CONFIG["host"]
user = REMOTE_CONFIG["user"]
remote_path = REMOTE_CONFIG["archive_path"]
ssh_cmd = f'ssh {user}@{host} "ls -lh {remote_path}/*.tar.gz 2>/dev/null"'
result = subprocess.run(ssh_cmd, shell=True, capture_output=True, text=True)
archives = []
for line in result.stdout.strip().split('\n'):
if line and '.tar.gz' in line:
parts = line.split()
if len(parts) >= 9:
archives.append({
"name": parts[-1].split('/')[-1],
"size": parts[4],
"date": f"{parts[5]} {parts[6]} {parts[7]}",
})
return archives
def analyze_all_studies(studies_dir: Path) -> Dict:
"""Analyze all studies in a directory."""
studies_dir = Path(studies_dir)
total_analysis = {
"total_size": 0,
"total_essential": 0,
"total_deletable": 0,
"studies": [],
}
for study in sorted(studies_dir.iterdir()):
if study.is_dir() and not study.name.startswith('.'):
analysis = analyze_study(study)
total_analysis["studies"].append(analysis)
total_analysis["total_size"] += analysis["total_size_bytes"]
total_analysis["total_essential"] += analysis["essential_size"]
total_analysis["total_deletable"] += analysis["deletable_size"]
return total_analysis
def main():
import argparse
parser = argparse.ArgumentParser(description="Atomizer Study Archiver")
parser.add_argument("command", choices=["analyze", "cleanup", "archive", "restore", "list"])
parser.add_argument("path", nargs="?", help="Study path or name")
parser.add_argument("--dry-run", action="store_true", default=True,
help="Don't actually delete/transfer (default: True)")
parser.add_argument("--execute", action="store_true",
help="Actually perform the operation")
parser.add_argument("--tailscale", action="store_true",
help="Use Tailscale IP for remote access")
args = parser.parse_args()
dry_run = not args.execute
if args.command == "analyze":
if not args.path:
print("Usage: study_archiver analyze <path>")
return
path = Path(args.path)
if path.is_dir():
# Check if it's a single study or a collection
if (path / "optimization_config.json").exists() or (path / "1_setup").exists():
# Single study
analysis = analyze_study(path)
print_analysis(analysis)
else:
# Collection of studies
total = analyze_all_studies(path)
print(f"\n{'='*60}")
print(f"Summary: {len(total['studies'])} studies")
print(f"{'='*60}")
print(f"Total size: {total['total_size']/1e9:8.2f} GB")
print(f"Essential: {total['total_essential']/1e9:8.2f} GB")
print(f"Deletable: {total['total_deletable']/1e9:8.2f} GB")
print(f"Potential save: {total['total_deletable']/1e9:8.2f} GB")
print(f"\nPer study:")
for s in total["studies"]:
print(f" {s['study_name']:40} {s['total_size_bytes']/1e9:6.2f} GB ({s['trial_count']:3d} trials)")
elif args.command == "cleanup":
if not args.path:
print("Usage: study_archiver cleanup <study_path> [--execute]")
return
cleanup_study(Path(args.path), dry_run=dry_run)
elif args.command == "archive":
if not args.path:
print("Usage: study_archiver archive <study_path> [--execute] [--tailscale]")
return
archive_to_remote(Path(args.path), use_tailscale=args.tailscale, dry_run=dry_run)
elif args.command == "restore":
if not args.path:
print("Usage: study_archiver restore <study_name> [--tailscale]")
return
target = Path.cwd() / "studies"
restore_from_remote(args.path, target, use_tailscale=args.tailscale)
elif args.command == "list":
archives = list_remote_archives(use_tailscale=args.tailscale)
if archives:
print(f"\nArchived studies on dalidou:")
print(f"{'='*60}")
for a in archives:
print(f" {a['name']:40} {a['size']:>8} {a['date']}")
else:
print("No archives found (or server not reachable)")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,411 @@
"""
Study Cleanup Utility
====================
Cleans up completed optimization studies to save disk space by removing
large intermediate files (NX models, FEM meshes, solver results) while
preserving essential data (parameters, extracted results, database).
Usage:
python -m optimization_engine.utils.study_cleanup <study_path> [options]
Options:
--dry-run Show what would be deleted without actually deleting
--keep-best N Keep iteration folders for the top N best trials
--keep-pareto Keep all Pareto-optimal iterations (for multi-objective)
--aggressive Delete ALL iteration data (only keep DB and config)
The database (study.db) contains all optimization results and can regenerate
any analysis. The original NX model in 1_setup is always preserved.
"""
import argparse
import json
import shutil
import sqlite3
from pathlib import Path
from typing import Optional
# Files to ALWAYS keep in iteration folders (tiny, essential)
ESSENTIAL_FILES = {
'params.exp', # Design parameters for this iteration
'_temp_mass.txt', # Extracted mass
'_temp_part_properties.json', # Part properties
'_temp_zernike.json', # Zernike coefficients (if exists)
'results.json', # Any extracted results
}
# Extensions to DELETE (large, regenerable/already extracted)
DELETABLE_EXTENSIONS = {
'.op2', # Nastran binary results (~65 MB each)
'.prt', # NX Part files (~30-35 MB each)
'.fem', # FEM mesh files (~15 MB each)
'.dat', # Nastran input deck (~15 MB each)
'.sim', # Simulation file (~7 MB each)
'.afm', # FEA auxiliary (~4 MB each)
'.f04', # Nastran log
'.f06', # Nastran output
'.log', # Solver log
'.diag', # Diagnostics
}
def get_study_info(study_path: Path) -> dict:
"""Get study metadata from config and database."""
config_path = study_path / 'optimization_config.json'
# Try both possible DB locations
db_path = study_path / '3_results' / 'study.db'
if not db_path.exists():
db_path = study_path / '2_results' / 'study.db'
info = {
'name': study_path.name,
'has_config': config_path.exists(),
'has_db': db_path.exists(),
'trial_count': 0,
'best_trials': [],
'pareto_trials': [],
}
if config_path.exists():
with open(config_path) as f:
info['config'] = json.load(f)
if db_path.exists():
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# Get trial count
cursor.execute("SELECT COUNT(*) FROM trials WHERE state = 'COMPLETE'")
info['trial_count'] = cursor.fetchone()[0]
# Try to get best trials (for single objective)
try:
cursor.execute("""
SELECT trial_id, value FROM trial_values
WHERE objective = 0
ORDER BY value ASC LIMIT 10
""")
info['best_trials'] = [row[0] for row in cursor.fetchall()]
except Exception as e:
pass
# Check for Pareto attribute
try:
cursor.execute("""
SELECT DISTINCT trial_id FROM trial_system_attrs
WHERE key = 'pareto_optimal' AND value = '1'
""")
info['pareto_trials'] = [row[0] for row in cursor.fetchall()]
except:
pass
conn.close()
return info
def calculate_cleanup_savings(study_path: Path, keep_iters: set = None) -> dict:
"""Calculate how much space would be saved by cleanup."""
iterations_path = study_path / '2_iterations'
if not iterations_path.exists():
iterations_path = study_path / '1_working' # Legacy structure
if not iterations_path.exists():
return {'total_size': 0, 'deletable_size': 0, 'keep_size': 0}
total_size = 0
deletable_size = 0
keep_size = 0
keep_iters = keep_iters or set()
for iter_folder in iterations_path.iterdir():
if not iter_folder.is_dir():
continue
# Extract iteration number
try:
iter_num = int(iter_folder.name.replace('iter', ''))
except:
continue
for f in iter_folder.iterdir():
if not f.is_file():
continue
size = f.stat().st_size
total_size += size
# Keep entire folder if in keep_iters
if iter_num in keep_iters:
keep_size += size
continue
# Keep essential files
if f.name.lower() in {e.lower() for e in ESSENTIAL_FILES}:
keep_size += size
elif f.suffix.lower() in DELETABLE_EXTENSIONS:
deletable_size += size
else:
keep_size += size # Keep unknown files by default
return {
'total_size': total_size,
'deletable_size': deletable_size,
'keep_size': keep_size,
}
def cleanup_study(
study_path: Path,
dry_run: bool = True,
keep_best: int = 0,
keep_pareto: bool = False,
aggressive: bool = False,
) -> dict:
"""
Clean up a study to save disk space.
Args:
study_path: Path to study folder
dry_run: If True, only report what would be deleted
keep_best: Number of best iterations to keep completely
keep_pareto: Keep all Pareto-optimal iterations
aggressive: Delete ALL iteration folders (only keep DB)
Returns:
dict with cleanup statistics
"""
study_path = Path(study_path)
if not study_path.exists():
raise ValueError(f"Study path does not exist: {study_path}")
# Get study info
info = get_study_info(study_path)
# Determine which iterations to keep
keep_iters = set()
if keep_best > 0 and info['best_trials']:
keep_iters.update(info['best_trials'][:keep_best])
if keep_pareto and info['pareto_trials']:
keep_iters.update(info['pareto_trials'])
# Find iterations folder
iterations_path = study_path / '2_iterations'
if not iterations_path.exists():
iterations_path = study_path / '1_working'
if not iterations_path.exists():
return {'status': 'no_iterations', 'deleted_bytes': 0, 'deleted_files': 0}
# Calculate savings
savings = calculate_cleanup_savings(study_path, keep_iters)
deleted_bytes = 0
deleted_files = 0
deleted_folders = 0
if aggressive:
# Delete entire iterations folder
if not dry_run:
shutil.rmtree(iterations_path)
deleted_bytes = savings['total_size']
deleted_folders = 1
else:
deleted_bytes = savings['total_size']
else:
# Selective cleanup
for iter_folder in iterations_path.iterdir():
if not iter_folder.is_dir():
continue
# Extract iteration number
try:
iter_num = int(iter_folder.name.replace('iter', ''))
except:
continue
# Skip kept iterations
if iter_num in keep_iters:
continue
for f in iter_folder.iterdir():
if not f.is_file():
continue
# Keep essential files
if f.name.lower() in {e.lower() for e in ESSENTIAL_FILES}:
continue
# Delete deletable extensions
if f.suffix.lower() in DELETABLE_EXTENSIONS:
size = f.stat().st_size
if not dry_run:
f.unlink()
deleted_bytes += size
deleted_files += 1
return {
'status': 'dry_run' if dry_run else 'completed',
'study_name': info['name'],
'trial_count': info['trial_count'],
'kept_iterations': list(keep_iters),
'total_size_before': savings['total_size'],
'deleted_bytes': deleted_bytes,
'deleted_files': deleted_files,
'deleted_folders': deleted_folders,
'space_saved_gb': deleted_bytes / (1024**3),
}
def cleanup_batch(
parent_path: Path,
pattern: str = "*",
dry_run: bool = True,
keep_best: int = 3,
keep_pareto: bool = False,
aggressive: bool = False,
) -> list:
"""
Clean up multiple studies matching a pattern.
Args:
parent_path: Parent directory containing studies
pattern: Glob pattern to match study folders (e.g., "m1_mirror_*")
dry_run: If True, only report
keep_best: Keep N best iterations per study
keep_pareto: Keep Pareto-optimal iterations
aggressive: Delete all iteration folders
Returns:
List of cleanup results
"""
parent_path = Path(parent_path)
results = []
for study_path in sorted(parent_path.glob(pattern)):
if not study_path.is_dir():
continue
# Check if it looks like a study (has iterations folder)
if not (study_path / '2_iterations').exists() and not (study_path / '1_working').exists():
continue
try:
result = cleanup_study(
study_path,
dry_run=dry_run,
keep_best=keep_best,
keep_pareto=keep_pareto,
aggressive=aggressive,
)
results.append(result)
except Exception as e:
results.append({
'study_name': study_path.name,
'status': 'error',
'error': str(e),
})
return results
def main():
parser = argparse.ArgumentParser(
description='Clean up completed optimization studies to save disk space.',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument('study_path', type=Path, help='Path to study folder or parent directory')
parser.add_argument('--dry-run', action='store_true', default=True,
help='Show what would be deleted without deleting (default)')
parser.add_argument('--execute', action='store_true',
help='Actually delete files (opposite of --dry-run)')
parser.add_argument('--keep-best', type=int, default=3,
help='Keep N best iterations completely (default: 3)')
parser.add_argument('--keep-pareto', action='store_true',
help='Keep all Pareto-optimal iterations')
parser.add_argument('--aggressive', action='store_true',
help='Delete ALL iteration data (only keep DB)')
parser.add_argument('--batch', type=str, metavar='PATTERN',
help='Clean multiple studies matching pattern (e.g., "m1_mirror_*")')
args = parser.parse_args()
dry_run = not args.execute
if args.batch:
# Batch cleanup mode
print(f"\n{'='*60}")
print(f"BATCH CLEANUP: {args.study_path}")
print(f"Pattern: {args.batch}")
print(f"{'='*60}")
print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}")
results = cleanup_batch(
args.study_path,
pattern=args.batch,
dry_run=dry_run,
keep_best=args.keep_best,
keep_pareto=args.keep_pareto,
aggressive=args.aggressive,
)
print(f"\n{'='*60}")
print("BATCH RESULTS")
print(f"{'='*60}")
print(f"{'Study':<45} {'Trials':>7} {'Size':>8} {'Savings':>8}")
print("-" * 75)
total_saved = 0
for r in results:
if r.get('status') == 'error':
print(f"{r['study_name']:<45} ERROR: {r.get('error', 'Unknown')}")
else:
saved = r.get('space_saved_gb', 0)
total_saved += saved
print(f"{r['study_name']:<45} {r.get('trial_count', 0):>7} "
f"{r.get('total_size_before', 0)/(1024**3):>7.1f}G {saved:>7.1f}G")
print("-" * 75)
print(f"{'TOTAL SAVINGS:':<45} {' '*15} {total_saved:>7.1f}G")
if dry_run:
print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
return results
else:
# Single study cleanup
print(f"\n{'='*60}")
print(f"STUDY CLEANUP: {args.study_path.name}")
print(f"{'='*60}")
print(f"Mode: {'DRY RUN (no files deleted)' if dry_run else 'EXECUTE (files WILL be deleted)'}")
print(f"Keep best: {args.keep_best} iterations")
print(f"Keep Pareto: {args.keep_pareto}")
print(f"Aggressive: {args.aggressive}")
result = cleanup_study(
args.study_path,
dry_run=dry_run,
keep_best=args.keep_best,
keep_pareto=args.keep_pareto,
aggressive=args.aggressive,
)
print(f"\n{'='*60}")
print("RESULTS")
print(f"{'='*60}")
print(f"Trials in study: {result['trial_count']}")
print(f"Iterations kept: {len(result['kept_iterations'])} {result['kept_iterations'][:5]}{'...' if len(result['kept_iterations']) > 5 else ''}")
print(f"Total size before: {result['total_size_before'] / (1024**3):.2f} GB")
print(f"{'Would delete' if dry_run else 'Deleted'}: {result['deleted_files']} files")
print(f"Space {'to save' if dry_run else 'saved'}: {result['space_saved_gb']:.2f} GB")
if dry_run:
print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
return result
if __name__ == '__main__':
main()

67
run_cleanup.py Normal file
View File

@@ -0,0 +1,67 @@
"""Run cleanup excluding protected studies."""
import sys
from pathlib import Path
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent))
from optimization_engine.utils.study_cleanup import cleanup_study, get_study_info
m1_dir = Path(r"C:\Users\antoi\Atomizer\studies\M1_Mirror")
# Studies to SKIP (user requested)
skip_patterns = [
"cost_reduction_V10",
"cost_reduction_V11",
"cost_reduction_V12",
"flat_back",
]
# Parse args
dry_run = "--execute" not in sys.argv
keep_best = 5
total_saved = 0
studies_to_clean = []
print("=" * 75)
print(f"CLEANUP (excluding V10-V12 and flat_back studies)")
print(f"Mode: {'DRY RUN' if dry_run else 'EXECUTE'}")
print("=" * 75)
print(f"{'Study':<45} {'Trials':>7} {'Size':>8} {'Savings':>8}")
print("-" * 75)
for study_path in sorted(m1_dir.iterdir()):
if not study_path.is_dir():
continue
# Check if has iterations
if not (study_path / "2_iterations").exists():
continue
# Skip protected studies
skip = False
for pattern in skip_patterns:
if pattern in study_path.name:
skip = True
break
if skip:
info = get_study_info(study_path)
print(f"{study_path.name:<45} {info['trial_count']:>7} SKIPPED")
continue
# This study will be cleaned
result = cleanup_study(study_path, dry_run=dry_run, keep_best=keep_best)
saved = result["space_saved_gb"]
total_saved += saved
status = "would save" if dry_run else "saved"
print(f"{study_path.name:<45} {result['trial_count']:>7} {result['total_size_before']/(1024**3):>7.1f}G {saved:>7.1f}G")
studies_to_clean.append(study_path.name)
print("-" * 75)
print(f"{'TOTAL SAVINGS:':<45} {' '*15} {total_saved:>7.1f}G")
if dry_run:
print(f"\n[!] This was a dry run. Run with --execute to actually delete files.")
else:
print(f"\n[OK] Cleanup complete! Freed {total_saved:.1f} GB")

32
tools/archive_study.bat Normal file
View File

@@ -0,0 +1,32 @@
@echo off
REM Atomizer Study Archiver - Convenience Script
REM Usage: archive_study.bat <command> [study_path]
REM
REM Commands:
REM analyze - Show disk usage analysis
REM cleanup - Remove regenerable files (dry run by default)
REM archive - Archive to dalidou server
REM list - List archived studies on server
REM
REM Examples:
REM archive_study.bat analyze studies\M1_Mirror
REM archive_study.bat cleanup studies\M1_Mirror\m1_mirror_V12 --execute
REM archive_study.bat archive studies\M1_Mirror\m1_mirror_V12 --execute
cd /d C:\Users\antoi\Atomizer
if "%1"=="" (
echo Usage: archive_study.bat ^<command^> [path] [options]
echo.
echo Commands:
echo analyze ^<path^> - Analyze disk usage
echo cleanup ^<study^> [--execute] - Remove regenerable files
echo archive ^<study^> [--execute] - Archive to dalidou
echo restore ^<name^> - Restore from dalidou
echo list - List remote archives
echo.
echo Add --tailscale for remote access via Tailscale
exit /b 1
)
C:\Users\antoi\anaconda3\envs\atomizer\python.exe -m optimization_engine.utils.study_archiver %*

View File

@@ -8,6 +8,11 @@ Generates 3 interactive HTML reports for Zernike wavefront analysis:
2. 60° vs 20° (relative) - Operational angle comparison
3. 90° (Manufacturing) - Absolute with manufacturing metrics
Uses the rigorous OPD method from extract_zernike_figure.py which:
- Accounts for lateral (X, Y) displacement via interpolation
- Uses actual mesh geometry as reference (no shape assumptions)
- Provides more accurate WFE for mirror optimization
Usage:
conda activate atomizer
python zernike_html_generator.py "path/to/solution.op2"
@@ -23,6 +28,7 @@ Output:
Author: Atomizer
Created: 2025-12-19
Updated: 2025-12-28 - Upgraded to use rigorous OPD method
"""
import sys
@@ -49,6 +55,15 @@ except ImportError as e:
print("Run: conda activate atomizer")
sys.exit(1)
# Import the rigorous OPD extractor
try:
from optimization_engine.extractors.extract_zernike_figure import ZernikeOPDExtractor
USE_OPD_METHOD = True
print("[INFO] Using rigorous OPD method (accounts for lateral displacement)")
except ImportError:
USE_OPD_METHOD = False
print("[WARN] OPD extractor not available, falling back to simple Z-only method")
# ============================================================================
# Configuration
@@ -278,13 +293,31 @@ def compute_rms_metrics(X, Y, W_nm):
def compute_mfg_metrics(coeffs):
"""Manufacturing aberration magnitudes."""
"""Manufacturing aberration magnitudes from Zernike coefficients.
Noll indexing (1-based): J1=Piston, J2=TiltX, J3=TiltY, J4=Defocus,
J5=Astig45, J6=Astig0, J7=ComaX, J8=ComaY, J9=TrefoilX, J10=TrefoilY, J11=Spherical
Python 0-indexed: coeffs[0]=J1, coeffs[3]=J4, etc.
"""
# Individual mode magnitudes (RSS for paired modes)
defocus = float(abs(coeffs[3])) # J4
astigmatism = float(np.sqrt(coeffs[4]**2 + coeffs[5]**2)) # RSS(J5, J6)
coma = float(np.sqrt(coeffs[6]**2 + coeffs[7]**2)) # RSS(J7, J8)
trefoil = float(np.sqrt(coeffs[8]**2 + coeffs[9]**2)) # RSS(J9, J10)
spherical = float(abs(coeffs[10])) if len(coeffs) > 10 else 0.0 # J11
# RMS of higher-order terms (J4+): sqrt(sum of squares of coefficients)
# This is the proper Zernike-coefficient-based RMS excluding piston/tip/tilt
higher_order_rms = float(np.sqrt(np.sum(coeffs[3:]**2)))
return {
'defocus_nm': float(abs(coeffs[3])),
'astigmatism_rms': float(np.sqrt(coeffs[4]**2 + coeffs[5]**2)),
'coma_rms': float(np.sqrt(coeffs[6]**2 + coeffs[7]**2)),
'trefoil_rms': float(np.sqrt(coeffs[8]**2 + coeffs[9]**2)),
'spherical_nm': float(abs(coeffs[10])) if len(coeffs) > 10 else 0.0,
'defocus_nm': defocus,
'astigmatism_rms': astigmatism,
'coma_rms': coma,
'trefoil_rms': trefoil,
'spherical_nm': spherical,
'higher_order_rms': higher_order_rms, # RMS of all J4+ coefficients
}
@@ -502,19 +535,22 @@ def generate_html(
], align="left", fill_color='#374151', font=dict(color='white'))
), row=3, col=1)
# Pre-correction (row 4)
# Pre-correction (row 4) - Aberrations to polish out (90° - 20°)
# Shows what correction is needed when manufacturing at 90° to achieve 20° figure
fig.add_trace(go.Table(
header=dict(values=["<b>Mode</b>", "<b>Correction (nm)</b>"],
header=dict(values=["<b>Aberration</b>", "<b>Magnitude (nm)</b>"],
align="left", fill_color='#1f2937', font=dict(color='white')),
cells=dict(values=[
["Total RMS (J1-J3 filter)",
"Defocus (J4)",
["Defocus (J4)",
"Astigmatism (J5+J6)",
"Coma (J7+J8)"],
[f"{correction_metrics['rms_filter_j1to3']:.2f}",
f"{correction_metrics['defocus_nm']:.2f}",
"Coma (J7+J8)",
"Trefoil (J9+J10)",
"Spherical (J11)"],
[f"{correction_metrics['defocus_nm']:.2f}",
f"{correction_metrics['astigmatism_rms']:.2f}",
f"{correction_metrics['coma_rms']:.2f}"]
f"{correction_metrics['coma_rms']:.2f}",
f"{correction_metrics['trefoil_rms']:.2f}",
f"{correction_metrics['spherical_nm']:.2f}"]
], align="left", fill_color='#374151', font=dict(color='white'))
), row=4, col=1)
else:
@@ -595,8 +631,248 @@ def find_op2_file(working_dir=None):
return max(op2_files, key=lambda p: p.stat().st_mtime)
def main_opd(op2_path: Path):
"""Generate all 3 HTML files using rigorous OPD method."""
print("=" * 70)
print(" ATOMIZER ZERNIKE HTML GENERATOR (OPD METHOD)")
print("=" * 70)
print(f"\nOP2 File: {op2_path.name}")
print(f"Directory: {op2_path.parent}")
print("\n[INFO] Using OPD method: accounts for lateral (X,Y) displacement")
# Initialize extractor
extractor = ZernikeOPDExtractor(
op2_path,
displacement_unit='mm',
n_modes=N_MODES,
filter_orders=FILTER_LOW_ORDERS
)
print(f"\nAvailable subcases: {list(extractor.displacements.keys())}")
# Map subcases (try common patterns)
displacements = extractor.displacements
subcase_map = {}
if '1' in displacements and '2' in displacements:
subcase_map = {'90': '1', '20': '2', '40': '3', '60': '4'}
elif '90' in displacements and '20' in displacements:
subcase_map = {'90': '90', '20': '20', '40': '40', '60': '60'}
else:
available = sorted(displacements.keys(), key=lambda x: int(x) if x.isdigit() else 0)
if len(available) >= 4:
subcase_map = {'90': available[0], '20': available[1], '40': available[2], '60': available[3]}
print(f"[WARN] Using mapped subcases: {subcase_map}")
else:
print(f"[ERROR] Need 4 subcases, found: {available}")
return
output_dir = op2_path.parent
base = op2_path.stem
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
html_files = []
# ========================================================================
# Extract absolute metrics for each subcase
# ========================================================================
print("\nExtracting absolute metrics (OPD method)...")
results_abs = {}
for angle, label in subcase_map.items():
result = extractor.extract_subcase(label, include_coefficients=True)
results_abs[angle] = result
lat_disp = result.get('max_lateral_displacement_um', 0)
print(f" {angle} deg: Filtered RMS = {result['filtered_rms_nm']:.2f} nm, "
f"Lateral disp max = {lat_disp:.3f} um")
# ========================================================================
# Extract relative metrics (40-20, 60-20, 90-20)
# ========================================================================
print("\nExtracting relative metrics (OPD method)...")
# 40 vs 20
result_40_rel = extractor.extract_relative(subcase_map['40'], subcase_map['20'], include_coefficients=True)
print(f" 40-20: Relative Filtered RMS = {result_40_rel['relative_filtered_rms_nm']:.2f} nm")
# 60 vs 20
result_60_rel = extractor.extract_relative(subcase_map['60'], subcase_map['20'], include_coefficients=True)
print(f" 60-20: Relative Filtered RMS = {result_60_rel['relative_filtered_rms_nm']:.2f} nm")
# 90 vs 20 (for correction metrics)
result_90_rel = extractor.extract_relative(subcase_map['90'], subcase_map['20'], include_coefficients=True)
print(f" 90-20: Relative Filtered RMS = {result_90_rel['relative_filtered_rms_nm']:.2f} nm")
# ========================================================================
# Generate HTML files
# ========================================================================
# Helper to convert OPD results to the format expected by generate_html
def opd_to_rms_data(result, is_relative=False):
"""Convert OPD extractor result to rms_data dict for generate_html."""
coeffs = np.array(result.get('coefficients', [0] * N_MODES))
# Recompute filtered residuals for visualization
# For now, use simplified approach - the metrics are correct
filtered_rms = result.get('relative_filtered_rms_nm' if is_relative else 'filtered_rms_nm', 0)
global_rms = result.get('relative_global_rms_nm' if is_relative else 'global_rms_nm', 0)
rms_j1to3 = result.get('relative_rms_filter_j1to3' if is_relative else 'rms_filter_j1to3_nm', 0)
# We need W_res_filt for visualization - extract from diagnostic data
# For now, create a placeholder that will be updated
return {
'coefficients': coeffs,
'R': 1.0, # Will be updated
'global_rms': global_rms,
'filtered_rms': filtered_rms,
'rms_filter_j1to3': rms_j1to3,
'W_res_filt': None, # Will compute separately for visualization
}
# For visualization, we need the actual WFE arrays
# Get diagnostic data from extractor
print("\nGenerating HTML reports...")
# 40 vs 20
print(" Generating 40 deg vs 20 deg...")
opd_40 = extractor._build_figure_opd_data(subcase_map['40'])
opd_20 = extractor._build_figure_opd_data(subcase_map['20'])
# Build relative WFE arrays
ref_wfe_map = {int(nid): wfe for nid, wfe in zip(opd_20['node_ids'], opd_20['wfe_nm'])}
X_40_rel, Y_40_rel, WFE_40_rel = [], [], []
for i, nid in enumerate(opd_40['node_ids']):
nid = int(nid)
if nid in ref_wfe_map:
X_40_rel.append(opd_40['x_deformed'][i])
Y_40_rel.append(opd_40['y_deformed'][i])
WFE_40_rel.append(opd_40['wfe_nm'][i] - ref_wfe_map[nid])
X_40_rel = np.array(X_40_rel)
Y_40_rel = np.array(Y_40_rel)
WFE_40_rel = np.array(WFE_40_rel)
rms_40_rel = compute_rms_metrics(X_40_rel, Y_40_rel, WFE_40_rel)
rms_40_abs = compute_rms_metrics(opd_40['x_deformed'], opd_40['y_deformed'], opd_40['wfe_nm'])
html_40 = generate_html(
title="40 deg (OPD)",
X=X_40_rel, Y=Y_40_rel, W_nm=WFE_40_rel,
rms_data=rms_40_rel,
is_relative=True,
ref_title="20 deg",
abs_pair=(rms_40_abs['global_rms'], rms_40_abs['filtered_rms'])
)
path_40 = output_dir / f"{base}_{timestamp}_40_vs_20.html"
path_40.write_text(html_40, encoding='utf-8')
html_files.append(path_40)
print(f" Created: {path_40.name}")
# 60 vs 20
print(" Generating 60 deg vs 20 deg...")
opd_60 = extractor._build_figure_opd_data(subcase_map['60'])
X_60_rel, Y_60_rel, WFE_60_rel = [], [], []
for i, nid in enumerate(opd_60['node_ids']):
nid = int(nid)
if nid in ref_wfe_map:
X_60_rel.append(opd_60['x_deformed'][i])
Y_60_rel.append(opd_60['y_deformed'][i])
WFE_60_rel.append(opd_60['wfe_nm'][i] - ref_wfe_map[nid])
X_60_rel = np.array(X_60_rel)
Y_60_rel = np.array(Y_60_rel)
WFE_60_rel = np.array(WFE_60_rel)
rms_60_rel = compute_rms_metrics(X_60_rel, Y_60_rel, WFE_60_rel)
rms_60_abs = compute_rms_metrics(opd_60['x_deformed'], opd_60['y_deformed'], opd_60['wfe_nm'])
html_60 = generate_html(
title="60 deg (OPD)",
X=X_60_rel, Y=Y_60_rel, W_nm=WFE_60_rel,
rms_data=rms_60_rel,
is_relative=True,
ref_title="20 deg",
abs_pair=(rms_60_abs['global_rms'], rms_60_abs['filtered_rms'])
)
path_60 = output_dir / f"{base}_{timestamp}_60_vs_20.html"
path_60.write_text(html_60, encoding='utf-8')
html_files.append(path_60)
print(f" Created: {path_60.name}")
# 90 deg Manufacturing
print(" Generating 90 deg Manufacturing...")
opd_90 = extractor._build_figure_opd_data(subcase_map['90'])
rms_90 = compute_rms_metrics(opd_90['x_deformed'], opd_90['y_deformed'], opd_90['wfe_nm'])
mfg_metrics = compute_mfg_metrics(rms_90['coefficients'])
# 90-20 relative for correction metrics
X_90_rel, Y_90_rel, WFE_90_rel = [], [], []
for i, nid in enumerate(opd_90['node_ids']):
nid = int(nid)
if nid in ref_wfe_map:
X_90_rel.append(opd_90['x_deformed'][i])
Y_90_rel.append(opd_90['y_deformed'][i])
WFE_90_rel.append(opd_90['wfe_nm'][i] - ref_wfe_map[nid])
X_90_rel = np.array(X_90_rel)
Y_90_rel = np.array(Y_90_rel)
WFE_90_rel = np.array(WFE_90_rel)
rms_90_rel = compute_rms_metrics(X_90_rel, Y_90_rel, WFE_90_rel)
# Get all correction metrics from Zernike coefficients (90° - 20°)
correction_metrics = compute_mfg_metrics(rms_90_rel['coefficients'])
html_90 = generate_html(
title="90 deg Manufacturing (OPD)",
X=opd_90['x_deformed'], Y=opd_90['y_deformed'], W_nm=opd_90['wfe_nm'],
rms_data=rms_90,
is_relative=False,
is_manufacturing=True,
mfg_metrics=mfg_metrics,
correction_metrics=correction_metrics
)
path_90 = output_dir / f"{base}_{timestamp}_90_mfg.html"
path_90.write_text(html_90, encoding='utf-8')
html_files.append(path_90)
print(f" Created: {path_90.name}")
# ========================================================================
# Summary
# ========================================================================
print("\n" + "=" * 70)
print("SUMMARY (OPD Method)")
print("=" * 70)
print(f"\nGenerated {len(html_files)} HTML files:")
for f in html_files:
print(f" - {f.name}")
print("\n" + "-" * 70)
print("OPTIMIZATION OBJECTIVES (OPD Method)")
print("-" * 70)
print(f" 40-20 Filtered RMS: {rms_40_rel['filtered_rms']:.2f} nm")
print(f" 60-20 Filtered RMS: {rms_60_rel['filtered_rms']:.2f} nm")
print(f" MFG 90 (J1-J3): {rms_90_rel['rms_filter_j1to3']:.2f} nm")
# Weighted sums
ws_v4 = 5*rms_40_rel['filtered_rms'] + 5*rms_60_rel['filtered_rms'] + 2*rms_90_rel['rms_filter_j1to3']
ws_v5 = 5*rms_40_rel['filtered_rms'] + 5*rms_60_rel['filtered_rms'] + 3*rms_90_rel['rms_filter_j1to3']
print(f"\n V4 Weighted Sum (5/5/2): {ws_v4:.2f}")
print(f" V5 Weighted Sum (5/5/3): {ws_v5:.2f}")
# Lateral displacement summary
print("\n" + "-" * 70)
print("LATERAL DISPLACEMENT SUMMARY")
print("-" * 70)
for angle in ['20', '40', '60', '90']:
lat = results_abs[angle].get('max_lateral_displacement_um', 0)
print(f" {angle} deg: max {lat:.3f} um")
print("\n" + "=" * 70)
print("DONE")
print("=" * 70)
return html_files
def main(op2_path: Path):
"""Generate all 3 HTML files."""
"""Generate all 3 HTML files (legacy Z-only method)."""
print("=" * 70)
print(" ATOMIZER ZERNIKE HTML GENERATOR")
print("=" * 70)
@@ -753,12 +1029,8 @@ def main(op2_path: Path):
X_ref, Y_ref, WFE_ref, ref_data['node_ids']
)
rms_90_rel = compute_rms_metrics(X_90_rel, Y_90_rel, WFE_90_rel)
correction_metrics = {
'rms_filter_j1to3': rms_90_rel['rms_filter_j1to3'],
'defocus_nm': compute_mfg_metrics(rms_90_rel['coefficients'])['defocus_nm'],
'astigmatism_rms': compute_mfg_metrics(rms_90_rel['coefficients'])['astigmatism_rms'],
'coma_rms': compute_mfg_metrics(rms_90_rel['coefficients'])['coma_rms'],
}
# Get all correction metrics from Zernike coefficients (90° - 20°)
correction_metrics = compute_mfg_metrics(rms_90_rel['coefficients'])
html_90 = generate_html(
title="90 deg (Manufacturing)",
@@ -822,8 +1094,16 @@ if __name__ == '__main__':
sys.exit(1)
print(f"Found: {op2_path}")
# Check for --legacy flag to use old Z-only method
use_legacy = '--legacy' in sys.argv or '--z-only' in sys.argv
try:
main(op2_path)
if USE_OPD_METHOD and not use_legacy:
main_opd(op2_path)
else:
if use_legacy:
print("[INFO] Using legacy Z-only method (--legacy flag)")
main(op2_path)
except Exception as e:
print(f"\nERROR: {e}")
import traceback