#!/usr/bin/env python3 """ Compute Calibration Factors from Full FEA Dataset ================================================== Uses ALL 153 FEA training samples to compute robust calibration factors. This is much better than calibrating only on the GNN's "best" designs, which are clustered in a narrow region of the design space. """ import sys import json import numpy as np from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) import torch from optimization_engine.gnn.gnn_optimizer import ZernikeGNNOptimizer # Paths STUDY_DIR = Path(__file__).parent CONFIG_PATH = STUDY_DIR / "1_setup" / "optimization_config.json" CHECKPOINT_PATH = Path("C:/Users/Antoine/Atomizer/zernike_gnn_checkpoint.pt") # Objective names OBJECTIVES = [ 'rel_filtered_rms_40_vs_20', 'rel_filtered_rms_60_vs_20', 'mfg_90_optician_workload' ] def main(): print("="*60) print("FULL DATASET CALIBRATION") print("="*60) # Load GNN optimizer (includes trained model and config) print("\nLoading GNN model...") optimizer = ZernikeGNNOptimizer.from_checkpoint(CHECKPOINT_PATH, CONFIG_PATH) print(f" Design variables: {len(optimizer.design_names)}") # Load training data from gnn_data folder print("\nLoading training data from gnn_data folder...") gnn_data_dir = STUDY_DIR / "gnn_data" training_data = [] if gnn_data_dir.exists(): import h5py for trial_dir in sorted(gnn_data_dir.iterdir()): if trial_dir.is_dir() and trial_dir.name.startswith('trial_'): metadata_path = trial_dir / "metadata.json" field_path = trial_dir / "displacement_field.h5" if metadata_path.exists(): with open(metadata_path) as f: metadata = json.load(f) if 'objectives' in metadata and metadata.get('objectives'): training_data.append({ 'design_vars': metadata['params'], 'objectives': metadata['objectives'], }) if not training_data: # Fallback: load from V11 database print(" No gnn_data with objectives found, loading from V11 database...") import sqlite3 v11_db = STUDY_DIR.parent / "m1_mirror_adaptive_V11" / "3_results" / "study.db" if v11_db.exists(): conn = sqlite3.connect(str(v11_db)) cursor = conn.cursor() # Get completed trials - filter for FEA trials only (source='fea' or no source means early trials) cursor.execute(""" SELECT t.trial_id, t.number FROM trials t WHERE t.state = 'COMPLETE' """) trial_ids = cursor.fetchall() for trial_id, trial_num in trial_ids: # Get user attributes cursor.execute(""" SELECT key, value_json FROM trial_user_attributes WHERE trial_id = ? """, (trial_id,)) attrs = {row[0]: json.loads(row[1]) for row in cursor.fetchall()} # Check if this is an FEA trial (source contains 'FEA' - matches "FEA" and "V10_FEA") source = attrs.get('source', 'FEA') # Default to 'FEA' for old trials without source tag if 'FEA' not in source: continue # Skip NN trials # Get params cursor.execute(""" SELECT param_name, param_value FROM trial_params WHERE trial_id = ? """, (trial_id,)) params = {row[0]: float(row[1]) for row in cursor.fetchall()} # Check if objectives exist (stored as individual attributes) if all(obj in attrs for obj in OBJECTIVES): training_data.append({ 'design_vars': params, 'objectives': {obj: attrs[obj] for obj in OBJECTIVES}, }) conn.close() print(f" Found {len(training_data)} FEA trials in V11 database") print(f" Loaded {len(training_data)} training samples") if not training_data: print("\n ERROR: No training data found!") return 1 # Compute GNN predictions for all training samples print("\nComputing GNN predictions for all training samples...") gnn_predictions = [] fea_ground_truth = [] for i, sample in enumerate(training_data): # Get design variables design_vars = sample['design_vars'] # Get FEA ground truth objectives fea_obj = sample['objectives'] # Predict with GNN gnn_pred = optimizer.predict(design_vars) gnn_obj = gnn_pred.objectives gnn_predictions.append(gnn_obj) fea_ground_truth.append(fea_obj) if (i + 1) % 25 == 0: print(f" Processed {i+1}/{len(training_data)} samples") print(f"\n Total: {len(gnn_predictions)} samples") # Compute calibration factors for each objective print("\n" + "="*60) print("CALIBRATION RESULTS") print("="*60) calibration = {} for obj_name in OBJECTIVES: gnn_vals = np.array([p[obj_name] for p in gnn_predictions]) fea_vals = np.array([f[obj_name] for f in fea_ground_truth]) # Calibration factor = mean(FEA / GNN) # This gives the multiplicative correction ratios = fea_vals / gnn_vals factor = np.mean(ratios) factor_std = np.std(ratios) factor_cv = 100 * factor_std / factor # Coefficient of variation # Also compute after-calibration errors calibrated_gnn = gnn_vals * factor abs_errors = np.abs(calibrated_gnn - fea_vals) pct_errors = 100 * abs_errors / fea_vals calibration[obj_name] = { 'factor': float(factor), 'std': float(factor_std), 'cv_pct': float(factor_cv), 'calibrated_mean_error_pct': float(np.mean(pct_errors)), 'calibrated_max_error_pct': float(np.max(pct_errors)), 'raw_mean_error_pct': float(np.mean(100 * np.abs(gnn_vals - fea_vals) / fea_vals)), } print(f"\n{obj_name}:") print(f" Calibration factor: {factor:.4f} ± {factor_std:.4f} (CV: {factor_cv:.1f}%)") print(f" Raw GNN error: {calibration[obj_name]['raw_mean_error_pct']:.1f}%") print(f" Calibrated error: {np.mean(pct_errors):.1f}% (max: {np.max(pct_errors):.1f}%)") # Summary print("\n" + "="*60) print("SUMMARY") print("="*60) print(f"\nCalibration factors (multiply GNN predictions by these):") for obj_name in OBJECTIVES: print(f" {obj_name}: {calibration[obj_name]['factor']:.4f}") print(f"\nExpected error reduction:") for obj_name in OBJECTIVES: raw = calibration[obj_name]['raw_mean_error_pct'] cal = calibration[obj_name]['calibrated_mean_error_pct'] print(f" {obj_name}: {raw:.1f}% → {cal:.1f}%") # Save calibration output_path = STUDY_DIR / "full_calibration.json" result = { 'timestamp': str(np.datetime64('now')), 'n_samples': len(training_data), 'calibration': calibration, 'objectives': OBJECTIVES, } with open(output_path, 'w') as f: json.dump(result, f, indent=2) print(f"\nCalibration saved to: {output_path}") return 0 if __name__ == "__main__": sys.exit(main())