""" Benchmarking Substudy - Mandatory Discovery & Validation System The benchmarking substudy is a mandatory first step for all optimization studies. It performs model introspection, validation, and configuration proposal before any optimization trials are run. Purpose: - Discover available expressions, OP2 contents, baseline performance - Validate that model can be simulated and results extracted - Propose initial optimization configuration - Act as gatekeeper before full optimization This substudy ALWAYS runs before any other substudy and auto-updates when new substudies are created. Author: Antoine Letarte Date: 2025-11-17 Version: 1.0.0 """ import json import logging from pathlib import Path from typing import Dict, Any, List, Optional from dataclasses import dataclass, asdict from datetime import datetime from optimization_engine.config.setup_wizard import OptimizationSetupWizard, ModelIntrospection, OP2Introspection logger = logging.getLogger(__name__) @dataclass class BenchmarkResults: """Results from benchmarking analysis.""" timestamp: str # Model introspection expressions: Dict[str, Dict[str, Any]] # name -> {value, units, formula} expression_count: int # OP2 introspection element_types: List[str] result_types: List[str] subcases: List[int] node_count: int element_count: int # Baseline simulation results baseline_op2_path: str baseline_results: Dict[str, float] # e.g., max_stress, max_displacement, mass # Validation status simulation_works: bool extraction_works: bool validation_passed: bool # Proposals proposed_design_variables: List[Dict[str, Any]] proposed_extractors: List[Dict[str, Any]] proposed_objectives: List[str] # Issues found warnings: List[str] errors: List[str] class BenchmarkingSubstudy: """ Mandatory benchmarking substudy for discovery and validation. This runs before any optimization to: 1. Discover what's in the model 2. Validate the pipeline works 3. Propose configuration 4. Gate-keep before optimization """ def __init__(self, study_dir: Path, prt_file: Path, sim_file: Path): """ Initialize benchmarking substudy. Args: study_dir: Root study directory prt_file: Path to NX part file sim_file: Path to NX simulation file """ self.study_dir = Path(study_dir) self.prt_file = Path(prt_file) self.sim_file = Path(sim_file) # Benchmarking substudy directory self.benchmark_dir = self.study_dir / "substudies" / "benchmarking" self.benchmark_dir.mkdir(parents=True, exist_ok=True) # Results file self.results_file = self.benchmark_dir / "benchmark_results.json" # Use Phase 3.3 wizard for introspection self.wizard = OptimizationSetupWizard(prt_file, sim_file) logger.info(f"Benchmarking substudy initialized for: {study_dir.name}") def run_discovery(self) -> BenchmarkResults: """ Run complete discovery and validation. Returns: BenchmarkResults with all discovery information """ logger.info("=" * 80) logger.info("BENCHMARKING SUBSTUDY - Discovery & Validation") logger.info("=" * 80) logger.info("") results = BenchmarkResults( timestamp=datetime.now().isoformat(), expressions={}, expression_count=0, element_types=[], result_types=[], subcases=[], node_count=0, element_count=0, baseline_op2_path="", baseline_results={}, simulation_works=False, extraction_works=False, validation_passed=False, proposed_design_variables=[], proposed_extractors=[], proposed_objectives=[], warnings=[], errors=[] ) # Step 1: Model Introspection logger.info("Step 1: Model Introspection") logger.info("-" * 40) try: model_info = self.wizard.introspect_model() results.expressions = model_info.expressions results.expression_count = len(model_info.expressions) logger.info(f"Found {results.expression_count} expressions:") for name, info in model_info.expressions.items(): logger.info(f" - {name}: {info['value']} {info['units']}") logger.info("") except Exception as e: error_msg = f"Model introspection failed: {e}" logger.error(error_msg) results.errors.append(error_msg) results.validation_passed = False return results # Step 2: Baseline Simulation logger.info("Step 2: Baseline Simulation") logger.info("-" * 40) try: baseline_op2 = self.wizard.run_baseline_simulation() if baseline_op2: results.baseline_op2_path = str(baseline_op2) results.simulation_works = True logger.info(f"Baseline simulation complete: {baseline_op2.name}") logger.info("") else: warning_msg = "Baseline simulation returned no OP2 file" logger.warning(warning_msg) results.warnings.append(warning_msg) logger.info("") except Exception as e: error_msg = f"Baseline simulation failed: {e}" logger.error(error_msg) results.errors.append(error_msg) logger.info("Continuing with available information...") logger.info("") # Step 3: OP2 Introspection logger.info("Step 3: OP2 Introspection") logger.info("-" * 40) try: op2_info = self.wizard.introspect_op2() results.element_types = op2_info.element_types results.result_types = op2_info.result_types results.subcases = op2_info.subcases results.node_count = op2_info.node_count results.element_count = op2_info.element_count logger.info(f"OP2 Analysis:") logger.info(f" - Element types: {', '.join(results.element_types)}") logger.info(f" - Result types: {', '.join(results.result_types)}") logger.info(f" - Subcases: {results.subcases}") logger.info(f" - Nodes: {results.node_count}") logger.info(f" - Elements: {results.element_count}") logger.info("") except Exception as e: error_msg = f"OP2 introspection failed: {e}" logger.error(error_msg) results.errors.append(error_msg) results.validation_passed = False return results # Step 4: Extract Baseline Results logger.info("Step 4: Extract Baseline Results") logger.info("-" * 40) try: # Try to extract common results baseline_results = self._extract_baseline_results(Path(results.baseline_op2_path)) results.baseline_results = baseline_results results.extraction_works = True logger.info("Baseline performance:") for key, value in baseline_results.items(): logger.info(f" - {key}: {value}") logger.info("") except Exception as e: warning_msg = f"Baseline extraction partially failed: {e}" logger.warning(warning_msg) results.warnings.append(warning_msg) # Not a hard failure - continue # Step 5: Generate Proposals logger.info("Step 5: Generate Configuration Proposals") logger.info("-" * 40) proposals = self._generate_proposals(model_info, op2_info, results.baseline_results) results.proposed_design_variables = proposals['design_variables'] results.proposed_extractors = proposals['extractors'] results.proposed_objectives = proposals['objectives'] logger.info(f"Proposed design variables ({len(results.proposed_design_variables)}):") for var in results.proposed_design_variables: logger.info(f" - {var['parameter']}: {var.get('suggested_range', 'range needed')}") logger.info(f"\nProposed extractors ({len(results.proposed_extractors)}):") for ext in results.proposed_extractors: logger.info(f" - {ext['action']}: {ext['description']}") logger.info(f"\nProposed objectives ({len(results.proposed_objectives)}):") for obj in results.proposed_objectives: logger.info(f" - {obj}") logger.info("") # Validation passed if simulation and basic extraction work results.validation_passed = results.simulation_works and len(results.element_types) > 0 # Save results self._save_results(results) logger.info("=" * 80) if results.validation_passed: logger.info("BENCHMARKING COMPLETE - Validation PASSED") else: logger.info("BENCHMARKING COMPLETE - Validation FAILED") logger.info("=" * 80) logger.info("") return results def _extract_baseline_results(self, op2_file: Path) -> Dict[str, float]: """Extract baseline results from OP2 file.""" from pyNastran.op2.op2 import OP2 results = {} try: op2 = OP2() op2.read_op2(str(op2_file), load_geometry=False) # Try to extract displacement if hasattr(op2, 'displacements') and op2.displacements: disp_data = list(op2.displacements.values())[0] if hasattr(disp_data, 'data'): max_disp = float(abs(disp_data.data).max()) results['max_displacement'] = round(max_disp, 6) # Try to extract stress if hasattr(op2, 'ctetra_stress') and op2.ctetra_stress: stress_data = list(op2.ctetra_stress.values())[0] if hasattr(stress_data, 'data'): max_stress = float(abs(stress_data.data).max()) results['max_von_mises'] = round(max_stress, 3) elif hasattr(op2, 'chexa_stress') and op2.chexa_stress: stress_data = list(op2.chexa_stress.values())[0] if hasattr(stress_data, 'data'): max_stress = float(abs(stress_data.data).max()) results['max_von_mises'] = round(max_stress, 3) except Exception as e: logger.warning(f"Could not extract all baseline results: {e}") return results def _generate_proposals(self, model_info: ModelIntrospection, op2_info: OP2Introspection, baseline_results: Dict[str, float]) -> Dict[str, Any]: """Generate configuration proposals based on discovery.""" proposals = { 'design_variables': [], 'extractors': [], 'objectives': [] } # Propose design variables from expressions # Filter out likely constants (e.g., material properties, loads) constant_keywords = ['modulus', 'poisson', 'density', 'load', 'force', 'pressure'] for name, info in model_info.expressions.items(): # Skip if likely a constant if any(keyword in name.lower() for keyword in constant_keywords): continue # Propose as design variable proposals['design_variables'].append({ 'parameter': name, 'current_value': info['value'], 'units': info['units'], 'suggested_range': f"±20% of {info['value']} {info['units']}" }) # Propose extractors based on OP2 contents if 'displacement' in op2_info.result_types or 'DISPLACEMENT' in op2_info.result_types: proposals['extractors'].append({ 'action': 'extract_displacement', 'description': 'Extract displacement results from OP2 file', 'params': {'result_type': 'displacement'} }) proposals['objectives'].append('max_displacement (minimize or maximize)') if op2_info.element_types: element_type = op2_info.element_types[0].lower() proposals['extractors'].append({ 'action': 'extract_solid_stress', 'description': f'Extract stress from {element_type.upper()} elements', 'params': { 'result_type': 'stress', 'element_type': element_type } }) proposals['objectives'].append('max_von_mises (minimize for safety)') return proposals def _save_results(self, results: BenchmarkResults): """Save benchmark results to JSON file.""" import numpy as np results_dict = asdict(results) # Convert numpy types to native Python types for JSON serialization def convert_numpy(obj): if isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, dict): return {k: convert_numpy(v) for k, v in obj.items()} elif isinstance(obj, list): return [convert_numpy(item) for item in obj] return obj results_dict = convert_numpy(results_dict) with open(self.results_file, 'w') as f: json.dump(results_dict, f, indent=2) logger.info(f"Benchmark results saved to: {self.results_file}") def load_results(self) -> Optional[BenchmarkResults]: """Load previous benchmark results if they exist.""" if not self.results_file.exists(): return None with open(self.results_file, 'r') as f: data = json.load(f) return BenchmarkResults(**data) def generate_report(self, results: BenchmarkResults) -> str: """ Generate human-readable benchmark report. Returns: Markdown formatted report """ report = [] report.append("# Benchmarking Report") report.append("") report.append(f"**Study**: {self.study_dir.name}") report.append(f"**Date**: {results.timestamp}") report.append(f"**Validation**: {'✅ PASSED' if results.validation_passed else '❌ FAILED'}") report.append("") report.append("## Model Introspection") report.append("") report.append(f"**Expressions Found**: {results.expression_count}") report.append("") report.append("| Expression | Value | Units |") report.append("|------------|-------|-------|") for name, info in results.expressions.items(): report.append(f"| {name} | {info['value']} | {info['units']} |") report.append("") report.append("## OP2 Analysis") report.append("") report.append(f"- **Element Types**: {', '.join(results.element_types)}") report.append(f"- **Result Types**: {', '.join(results.result_types)}") report.append(f"- **Subcases**: {results.subcases}") report.append(f"- **Nodes**: {results.node_count}") report.append(f"- **Elements**: {results.element_count}") report.append("") report.append("## Baseline Performance") report.append("") if results.baseline_results: for key, value in results.baseline_results.items(): report.append(f"- **{key}**: {value}") else: report.append("*No baseline results extracted*") report.append("") report.append("## Configuration Proposals") report.append("") report.append("### Proposed Design Variables") report.append("") for var in results.proposed_design_variables: report.append(f"- **{var['parameter']}**: {var['suggested_range']}") report.append("") report.append("### Proposed Extractors") report.append("") for ext in results.proposed_extractors: report.append(f"- **{ext['action']}**: {ext['description']}") report.append("") report.append("### Proposed Objectives") report.append("") for obj in results.proposed_objectives: report.append(f"- {obj}") report.append("") if results.warnings: report.append("## Warnings") report.append("") for warning in results.warnings: report.append(f"⚠️ {warning}") report.append("") if results.errors: report.append("## Errors") report.append("") for error in results.errors: report.append(f"❌ {error}") report.append("") return "\n".join(report) def main(): """Test benchmarking substudy.""" print("Benchmarking Substudy Test") print("=" * 80) print() print("This module provides mandatory discovery and validation for all studies.") print("Use it via the study setup workflow.") print() if __name__ == '__main__': main()