Files
Atomizer/studies/m1_mirror_adaptive_V12/compute_full_calibration.py
Antoine 96b196de58 feat: Add Zernike GNN surrogate module and M1 mirror V12/V13 studies
This commit introduces the GNN-based surrogate for Zernike mirror optimization
and the M1 mirror study progression from V12 (GNN validation) to V13 (pure NSGA-II).

## GNN Surrogate Module (optimization_engine/gnn/)

New module for Graph Neural Network surrogate prediction of mirror deformations:

- `polar_graph.py`: PolarMirrorGraph - fixed 3000-node polar grid structure
- `zernike_gnn.py`: ZernikeGNN with design-conditioned message passing
- `differentiable_zernike.py`: GPU-accelerated Zernike fitting and objectives
- `train_zernike_gnn.py`: ZernikeGNNTrainer with multi-task loss
- `gnn_optimizer.py`: ZernikeGNNOptimizer for turbo mode (~900k trials/hour)
- `extract_displacement_field.py`: OP2 to HDF5 field extraction
- `backfill_field_data.py`: Extract fields from existing FEA trials

Key innovation: Design-conditioned convolutions that modulate message passing
based on structural design parameters, enabling accurate field prediction.

## M1 Mirror Studies

### V12: GNN Field Prediction + FEA Validation
- Zernike GNN trained on V10/V11 FEA data (238 samples)
- Turbo mode: 5000 GNN predictions → top candidates → FEA validation
- Calibration workflow for GNN-to-FEA error correction
- Scripts: run_gnn_turbo.py, validate_gnn_best.py, compute_full_calibration.py

### V13: Pure NSGA-II FEA (Ground Truth)
- Seeds 217 FEA trials from V11+V12
- Pure multi-objective NSGA-II without any surrogate
- Establishes ground-truth Pareto front for GNN accuracy evaluation
- Narrowed blank_backface_angle range to [4.0, 5.0]

## Documentation Updates

- SYS_14: Added Zernike GNN section with architecture diagrams
- CLAUDE.md: Added GNN module reference and quick start
- V13 README: Study documentation with seeding strategy

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-10 08:44:04 -05:00

215 lines
7.4 KiB
Python

#!/usr/bin/env python3
"""
Compute Calibration Factors from Full FEA Dataset
==================================================
Uses ALL 153 FEA training samples to compute robust calibration factors.
This is much better than calibrating only on the GNN's "best" designs,
which are clustered in a narrow region of the design space.
"""
import sys
import json
import numpy as np
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
import torch
from optimization_engine.gnn.gnn_optimizer import ZernikeGNNOptimizer
# Paths
STUDY_DIR = Path(__file__).parent
CONFIG_PATH = STUDY_DIR / "1_setup" / "optimization_config.json"
CHECKPOINT_PATH = Path("C:/Users/Antoine/Atomizer/zernike_gnn_checkpoint.pt")
# Objective names
OBJECTIVES = [
'rel_filtered_rms_40_vs_20',
'rel_filtered_rms_60_vs_20',
'mfg_90_optician_workload'
]
def main():
print("="*60)
print("FULL DATASET CALIBRATION")
print("="*60)
# Load GNN optimizer (includes trained model and config)
print("\nLoading GNN model...")
optimizer = ZernikeGNNOptimizer.from_checkpoint(CHECKPOINT_PATH, CONFIG_PATH)
print(f" Design variables: {len(optimizer.design_names)}")
# Load training data from gnn_data folder
print("\nLoading training data from gnn_data folder...")
gnn_data_dir = STUDY_DIR / "gnn_data"
training_data = []
if gnn_data_dir.exists():
import h5py
for trial_dir in sorted(gnn_data_dir.iterdir()):
if trial_dir.is_dir() and trial_dir.name.startswith('trial_'):
metadata_path = trial_dir / "metadata.json"
field_path = trial_dir / "displacement_field.h5"
if metadata_path.exists():
with open(metadata_path) as f:
metadata = json.load(f)
if 'objectives' in metadata and metadata.get('objectives'):
training_data.append({
'design_vars': metadata['params'],
'objectives': metadata['objectives'],
})
if not training_data:
# Fallback: load from V11 database
print(" No gnn_data with objectives found, loading from V11 database...")
import sqlite3
v11_db = STUDY_DIR.parent / "m1_mirror_adaptive_V11" / "3_results" / "study.db"
if v11_db.exists():
conn = sqlite3.connect(str(v11_db))
cursor = conn.cursor()
# Get completed trials - filter for FEA trials only (source='fea' or no source means early trials)
cursor.execute("""
SELECT t.trial_id, t.number
FROM trials t
WHERE t.state = 'COMPLETE'
""")
trial_ids = cursor.fetchall()
for trial_id, trial_num in trial_ids:
# Get user attributes
cursor.execute("""
SELECT key, value_json FROM trial_user_attributes
WHERE trial_id = ?
""", (trial_id,))
attrs = {row[0]: json.loads(row[1]) for row in cursor.fetchall()}
# Check if this is an FEA trial (source contains 'FEA' - matches "FEA" and "V10_FEA")
source = attrs.get('source', 'FEA') # Default to 'FEA' for old trials without source tag
if 'FEA' not in source:
continue # Skip NN trials
# Get params
cursor.execute("""
SELECT param_name, param_value FROM trial_params
WHERE trial_id = ?
""", (trial_id,))
params = {row[0]: float(row[1]) for row in cursor.fetchall()}
# Check if objectives exist (stored as individual attributes)
if all(obj in attrs for obj in OBJECTIVES):
training_data.append({
'design_vars': params,
'objectives': {obj: attrs[obj] for obj in OBJECTIVES},
})
conn.close()
print(f" Found {len(training_data)} FEA trials in V11 database")
print(f" Loaded {len(training_data)} training samples")
if not training_data:
print("\n ERROR: No training data found!")
return 1
# Compute GNN predictions for all training samples
print("\nComputing GNN predictions for all training samples...")
gnn_predictions = []
fea_ground_truth = []
for i, sample in enumerate(training_data):
# Get design variables
design_vars = sample['design_vars']
# Get FEA ground truth objectives
fea_obj = sample['objectives']
# Predict with GNN
gnn_pred = optimizer.predict(design_vars)
gnn_obj = gnn_pred.objectives
gnn_predictions.append(gnn_obj)
fea_ground_truth.append(fea_obj)
if (i + 1) % 25 == 0:
print(f" Processed {i+1}/{len(training_data)} samples")
print(f"\n Total: {len(gnn_predictions)} samples")
# Compute calibration factors for each objective
print("\n" + "="*60)
print("CALIBRATION RESULTS")
print("="*60)
calibration = {}
for obj_name in OBJECTIVES:
gnn_vals = np.array([p[obj_name] for p in gnn_predictions])
fea_vals = np.array([f[obj_name] for f in fea_ground_truth])
# Calibration factor = mean(FEA / GNN)
# This gives the multiplicative correction
ratios = fea_vals / gnn_vals
factor = np.mean(ratios)
factor_std = np.std(ratios)
factor_cv = 100 * factor_std / factor # Coefficient of variation
# Also compute after-calibration errors
calibrated_gnn = gnn_vals * factor
abs_errors = np.abs(calibrated_gnn - fea_vals)
pct_errors = 100 * abs_errors / fea_vals
calibration[obj_name] = {
'factor': float(factor),
'std': float(factor_std),
'cv_pct': float(factor_cv),
'calibrated_mean_error_pct': float(np.mean(pct_errors)),
'calibrated_max_error_pct': float(np.max(pct_errors)),
'raw_mean_error_pct': float(np.mean(100 * np.abs(gnn_vals - fea_vals) / fea_vals)),
}
print(f"\n{obj_name}:")
print(f" Calibration factor: {factor:.4f} ± {factor_std:.4f} (CV: {factor_cv:.1f}%)")
print(f" Raw GNN error: {calibration[obj_name]['raw_mean_error_pct']:.1f}%")
print(f" Calibrated error: {np.mean(pct_errors):.1f}% (max: {np.max(pct_errors):.1f}%)")
# Summary
print("\n" + "="*60)
print("SUMMARY")
print("="*60)
print(f"\nCalibration factors (multiply GNN predictions by these):")
for obj_name in OBJECTIVES:
print(f" {obj_name}: {calibration[obj_name]['factor']:.4f}")
print(f"\nExpected error reduction:")
for obj_name in OBJECTIVES:
raw = calibration[obj_name]['raw_mean_error_pct']
cal = calibration[obj_name]['calibrated_mean_error_pct']
print(f" {obj_name}: {raw:.1f}% → {cal:.1f}%")
# Save calibration
output_path = STUDY_DIR / "full_calibration.json"
result = {
'timestamp': str(np.datetime64('now')),
'n_samples': len(training_data),
'calibration': calibration,
'objectives': OBJECTIVES,
}
with open(output_path, 'w') as f:
json.dump(result, f, indent=2)
print(f"\nCalibration saved to: {output_path}")
return 0
if __name__ == "__main__":
sys.exit(main())