#!/usr/bin/env python3 """Analyze all flat back campaign data to design optimal SAT V9.""" import sqlite3 import json import numpy as np from pathlib import Path STUDIES_DIR = Path(__file__).parent # All flat back databases STUDIES = [ ('V3', STUDIES_DIR / 'm1_mirror_cost_reduction_flat_back_V3' / '3_results' / 'study.db'), ('V4', STUDIES_DIR / 'm1_mirror_cost_reduction_flat_back_V4' / '3_results' / 'study.db'), ('V5', STUDIES_DIR / 'm1_mirror_cost_reduction_flat_back_V5' / '3_results' / 'study.db'), ('V6', STUDIES_DIR / 'm1_mirror_cost_reduction_flat_back_V6' / '3_results' / 'study.db'), ('V7', STUDIES_DIR / 'm1_mirror_cost_reduction_flat_back_V7' / '3_results' / 'study.db'), ('V8', STUDIES_DIR / 'm1_mirror_cost_reduction_flat_back_V8' / '3_results' / 'study.db'), ] MAX_MASS = 120.0 def load_all_data(): """Load all trial data from all studies.""" all_data = [] for name, db_path in STUDIES: if not db_path.exists(): continue conn = sqlite3.connect(db_path) cursor = conn.cursor() cursor.execute('SELECT trial_id FROM trials WHERE state = "COMPLETE"') trial_ids = [r[0] for r in cursor.fetchall()] for tid in trial_ids: # Get params cursor.execute('SELECT param_name, param_value FROM trial_params WHERE trial_id = ?', (tid,)) params_raw = {r[0]: r[1] for r in cursor.fetchall()} params = {(k.split(']', 1)[1] if ']' in k else k): v for k, v in params_raw.items()} # Get attributes cursor.execute('SELECT key, value_json FROM trial_user_attributes WHERE trial_id = ?', (tid,)) attrs = {r[0]: json.loads(r[1]) for r in cursor.fetchall()} # Get WS cursor.execute('SELECT value FROM trial_values WHERE trial_id = ?', (tid,)) ws_row = cursor.fetchone() ws = ws_row[0] if ws_row else None mass = attrs.get('mass_kg', 999.0) wfe_40 = attrs.get('obj_wfe_40_20') or attrs.get('wfe_40_20') wfe_60 = attrs.get('obj_wfe_60_20') or attrs.get('wfe_60_20') mfg_90 = attrs.get('obj_mfg_90') or attrs.get('mfg_90') if wfe_40 is None or wfe_60 is None or mfg_90 is None: continue all_data.append({ 'study': name, 'trial_id': tid, 'params': params, 'mass': mass, 'wfe_40': wfe_40, 'wfe_60': wfe_60, 'mfg_90': mfg_90, 'ws': ws, 'feasible': mass <= MAX_MASS }) conn.close() return all_data def main(): data = load_all_data() print("=" * 70) print("FLAT BACK CAMPAIGN - COMPLETE DATA ANALYSIS") print("=" * 70) print() # Summary by study print("1. DATA INVENTORY BY STUDY") print("-" * 70) from collections import defaultdict by_study = defaultdict(list) for d in data: by_study[d['study']].append(d) total = 0 total_feasible = 0 for name in ['V3', 'V4', 'V5', 'V6', 'V7', 'V8']: trials = by_study.get(name, []) feasible = [t for t in trials if t['feasible']] best = min([t['ws'] for t in feasible]) if feasible else None total += len(trials) total_feasible += len(feasible) if best: print(f" {name}: {len(trials):4d} trials, {len(feasible):4d} feasible, best WS = {best:.2f}") else: print(f" {name}: {len(trials):4d} trials, {len(feasible):4d} feasible") print(f"\n TOTAL: {total} trials, {total_feasible} feasible") # Global best analysis print() print("2. TOP 10 DESIGNS (ALL STUDIES)") print("-" * 70) feasible_data = [d for d in data if d['feasible']] top10 = sorted(feasible_data, key=lambda x: x['ws'])[:10] print(f" {'Rank':<5} {'Study':<6} {'WS':<10} {'40-20':<8} {'60-20':<8} {'Mfg90':<8} {'Mass':<8}") print(" " + "-" * 60) for i, d in enumerate(top10, 1): print(f" {i:<5} {d['study']:<6} {d['ws']:<10.2f} {d['wfe_40']:<8.2f} {d['wfe_60']:<8.2f} {d['mfg_90']:<8.2f} {d['mass']:<8.2f}") # Analyze optimal region print() print("3. OPTIMAL PARAMETER REGION (Top 20 designs)") print("-" * 70) top20 = sorted(feasible_data, key=lambda x: x['ws'])[:20] # Get param names from first design param_names = list(top20[0]['params'].keys()) print(f"\n Parameter ranges in top 20 designs:") print(f" {'Parameter':<35} {'Min':<10} {'Max':<10} {'Mean':<10}") print(" " + "-" * 65) optimal_ranges = {} for pname in sorted(param_names): values = [d['params'].get(pname) for d in top20 if pname in d['params']] if values and all(v is not None for v in values): optimal_ranges[pname] = { 'min': min(values), 'max': max(values), 'mean': np.mean(values) } print(f" {pname:<35} {min(values):<10.2f} {max(values):<10.2f} {np.mean(values):<10.2f}") # Mass analysis print() print("4. MASS VS WS CORRELATION") print("-" * 70) masses = [d['mass'] for d in feasible_data] ws_values = [d['ws'] for d in feasible_data] # Bin by mass bins = [(105, 110), (110, 115), (115, 118), (118, 120)] print(f"\n {'Mass Range':<15} {'Count':<8} {'Best WS':<10} {'Mean WS':<10}") print(" " + "-" * 45) for low, high in bins: in_bin = [d for d in feasible_data if low <= d['mass'] < high] if in_bin: best = min(d['ws'] for d in in_bin) mean = np.mean([d['ws'] for d in in_bin]) print(f" {low}-{high} kg{'':<5} {len(in_bin):<8} {best:<10.2f} {mean:<10.2f}") # Find sweet spot print() print("5. RECOMMENDED SAT V9 STRATEGY") print("-" * 70) best_design = top10[0] print(f""" A. USE ALL {total_feasible} FEASIBLE SAMPLES FOR TRAINING - V8 only used V6 data (196 samples) - With {total_feasible} samples, surrogate will be much more accurate B. FOCUS ON OPTIMAL MASS REGION - Best designs have mass 115-119 kg - V8's threshold at 115 kg was too conservative - Recommendation: soft threshold at 118 kg C. ADAPTIVE EXPLORATION SCHEDULE - Phase 1 (trials 1-30): exploration_weight = 0.2 - Phase 2 (trials 31-80): exploration_weight = 0.1 - Phase 3 (trials 81+): exploration_weight = 0.05 (pure exploitation) D. EXPLOIT BEST REGION - Best design: WS={best_design['ws']:.2f} from {best_design['study']} - Sample 70% of candidates within 5% of best params - Only 30% random exploration E. L-BFGS POLISH (last 10 trials) - Start from best found design - Trust region around current best - Gradient descent with surrogate """) # Output best params for V9 seeding print("6. BEST DESIGN PARAMS (FOR V9 SEEDING)") print("-" * 70) print() for pname, value in sorted(best_design['params'].items()): print(f" {pname}: {value}") print() print("=" * 70) if __name__ == "__main__": main()