""" Training Data Exporter for AtomizerField This module exports training data from Atomizer optimization runs for AtomizerField neural network training. It saves NX Nastran input (.dat) and output (.op2) files along with metadata for each trial. Usage: from optimization_engine.training_data_exporter import create_exporter_from_config exporter = create_exporter_from_config(config) if exporter: exporter.export_trial(trial_number, design_vars, results, simulation_files) exporter.finalize() """ import json import shutil from pathlib import Path from datetime import datetime from typing import Dict, Any, Optional, List import logging logger = logging.getLogger(__name__) class TrainingDataExporter: """ Exports training data for AtomizerField neural network training. After each FEA solve, saves: - Input: NX Nastran .dat file (BDF format) - Output: NX Nastran .op2 file (binary results) - Metadata: JSON with design parameters, objectives, constraints Directory structure: {export_dir}/ ├── trial_0001/ │ ├── input/ │ │ └── model.bdf │ ├── output/ │ │ └── model.op2 │ └── metadata.json ├── trial_0002/ │ └── ... ├── study_summary.json └── README.md """ def __init__( self, export_dir: Path, study_name: str, design_variable_names: List[str], objective_names: List[str], constraint_names: Optional[List[str]] = None, metadata: Optional[Dict[str, Any]] = None ): """ Initialize the training data exporter. Args: export_dir: Root directory for exported training data study_name: Name of the optimization study design_variable_names: List of design variable names objective_names: List of objective function names constraint_names: List of constraint names (optional) metadata: Additional study metadata (optional) """ self.export_dir = Path(export_dir) self.study_name = study_name self.design_variable_names = design_variable_names self.objective_names = objective_names self.constraint_names = constraint_names or [] self.study_metadata = metadata or {} self.trial_count = 0 self.exported_trials: List[Dict[str, Any]] = [] # Create root export directory self.export_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Training data exporter initialized: {self.export_dir}") # Create README self._create_readme() def export_trial( self, trial_number: int, design_variables: Dict[str, float], results: Dict[str, Any], simulation_files: Dict[str, Path] ) -> bool: """ Export training data for a single trial. Args: trial_number: Optuna trial number design_variables: Dict of design parameter names and values results: Dict containing objectives, constraints, and other results simulation_files: Dict with paths to 'dat_file' and 'op2_file' Returns: True if export successful, False otherwise """ try: # Create trial directory trial_dir = self.export_dir / f"trial_{trial_number:04d}" trial_dir.mkdir(parents=True, exist_ok=True) input_dir = trial_dir / "input" output_dir = trial_dir / "output" input_dir.mkdir(exist_ok=True) output_dir.mkdir(exist_ok=True) # Copy .dat file (Nastran input deck) dat_file = simulation_files.get('dat_file') if dat_file and Path(dat_file).exists(): shutil.copy2(dat_file, input_dir / "model.bdf") logger.debug(f"Copied .dat file: {dat_file} -> {input_dir / 'model.bdf'}") else: logger.warning(f"Trial {trial_number}: .dat file not found at {dat_file}") return False # Copy .op2 file (Nastran binary results) op2_file = simulation_files.get('op2_file') if op2_file and Path(op2_file).exists(): shutil.copy2(op2_file, output_dir / "model.op2") logger.debug(f"Copied .op2 file: {op2_file} -> {output_dir / 'model.op2'}") else: logger.warning(f"Trial {trial_number}: .op2 file not found at {op2_file}") return False # Create metadata.json metadata = self._create_trial_metadata( trial_number, design_variables, results ) metadata_path = trial_dir / "metadata.json" with open(metadata_path, 'w') as f: json.dump(metadata, f, indent=2) logger.info(f"Exported training data for trial {trial_number}") self.trial_count += 1 self.exported_trials.append(metadata) return True except Exception as e: logger.error(f"Failed to export trial {trial_number}: {e}", exc_info=True) return False def _create_trial_metadata( self, trial_number: int, design_variables: Dict[str, float], results: Dict[str, Any] ) -> Dict[str, Any]: """ Create metadata dictionary for a trial. Args: trial_number: Optuna trial number design_variables: Design parameter values results: Optimization results Returns: Metadata dictionary """ metadata = { "trial_number": trial_number, "timestamp": datetime.now().isoformat(), "atomizer_study": self.study_name, "design_parameters": design_variables, "results": {} } # Extract objectives if "objectives" in results: metadata["results"]["objectives"] = results["objectives"] # Extract constraints if "constraints" in results: metadata["results"]["constraints"] = results["constraints"] # Extract any scalar results (max_stress, max_displacement, etc.) for key in ["max_stress", "max_displacement", "mass", "volume"]: if key in results: metadata["results"][key] = results[key] # Add any additional result fields for key, value in results.items(): if key not in ["objectives", "constraints"] and isinstance(value, (int, float, str, bool)): metadata["results"][key] = value return metadata def finalize(self) -> None: """ Finalize the training data export. Creates study_summary.json with metadata about the entire study. """ try: summary = { "study_name": self.study_name, "total_trials": self.trial_count, "design_variables": self.design_variable_names, "objectives": self.objective_names, "constraints": self.constraint_names, "export_timestamp": datetime.now().isoformat(), "metadata": self.study_metadata } summary_path = self.export_dir / "study_summary.json" with open(summary_path, 'w') as f: json.dump(summary, f, indent=2) logger.info(f"Training data export finalized: {self.trial_count} trials exported") logger.info(f"Summary saved to: {summary_path}") except Exception as e: logger.error(f"Failed to finalize training data export: {e}", exc_info=True) def _create_readme(self) -> None: """Create README.md explaining the training data structure.""" readme_content = f"""# AtomizerField Training Data **Study Name**: {self.study_name} **Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ## Directory Structure ``` {self.export_dir.name}/ ├── trial_0001/ │ ├── input/ │ │ └── model.bdf # NX Nastran input deck (BDF format) │ ├── output/ │ │ └── model.op2 # NX Nastran binary results (OP2 format) │ └── metadata.json # Design parameters, objectives, constraints ├── trial_0002/ │ └── ... ├── study_summary.json # Overall study metadata └── README.md # This file ``` ## Design Variables {chr(10).join(f'- {name}' for name in self.design_variable_names)} ## Objectives {chr(10).join(f'- {name}' for name in self.objective_names)} ## Constraints {chr(10).join(f'- {name}' for name in self.constraint_names) if self.constraint_names else '- None'} ## Usage with AtomizerField ### 1. Parse Training Data ```bash cd Atomizer-Field python batch_parser.py --data-dir "{self.export_dir}" ``` This converts BDF/OP2 files to PyTorch Geometric format. ### 2. Validate Parsed Data ```bash python validate_parsed_data.py ``` ### 3. Train Neural Network ```bash python train.py --data-dir "training_data/parsed/" --epochs 200 ``` ### 4. Use Trained Model in Atomizer ```bash cd ../Atomizer python run_optimization.py --config studies/{self.study_name}/workflow_config.json --use-neural ``` ## File Formats - **BDF (.bdf)**: Nastran Bulk Data File - contains mesh, materials, loads, BCs - **OP2 (.op2)**: Nastran Output2 - binary results with displacements, stresses, etc. - **metadata.json**: Human-readable trial metadata ## AtomizerField Documentation See `Atomizer-Field/docs/` for complete documentation on: - Neural network architecture - Training procedures - Integration with Atomizer - Uncertainty quantification --- *Generated by Atomizer Training Data Exporter* """ readme_path = self.export_dir / "README.md" with open(readme_path, 'w', encoding='utf-8') as f: f.write(readme_content) logger.debug(f"Created README: {readme_path}") def create_exporter_from_config(config: Dict[str, Any]) -> Optional[TrainingDataExporter]: """ Factory function to create TrainingDataExporter from workflow configuration. Args: config: Workflow configuration dictionary Returns: TrainingDataExporter instance if enabled, None otherwise Example config YAML: training_data_export: enabled: true export_dir: "atomizer_field_training_data/beam_study_001" """ if not config.get("training_data_export", {}).get("enabled", False): logger.info("Training data export is disabled") return None export_config = config["training_data_export"] # Get export directory export_dir = export_config.get("export_dir") if not export_dir: logger.error("Training data export enabled but 'export_dir' not specified") return None # Get study name study_name = config.get("study_name", "unnamed_study") # Get design variable names design_vars = config.get("design_variables", []) design_var_names = [dv.get("name", dv.get("parameter", f"var_{i}")) for i, dv in enumerate(design_vars)] # Get objective names objectives = config.get("objectives", []) objective_names = [obj.get("name", f"obj_{i}") for i, obj in enumerate(objectives)] # Get constraint names constraints = config.get("constraints", []) constraint_names = [c.get("name", f"constraint_{i}") for i, c in enumerate(constraints)] # Additional metadata metadata = { "atomizer_version": config.get("version", "unknown"), "optimization_algorithm": config.get("optimization", {}).get("algorithm", "unknown"), "n_trials": config.get("optimization", {}).get("n_trials", 0) } try: exporter = TrainingDataExporter( export_dir=Path(export_dir), study_name=study_name, design_variable_names=design_var_names, objective_names=objective_names, constraint_names=constraint_names, metadata=metadata ) logger.info("Training data exporter created successfully") return exporter except Exception as e: logger.error(f"Failed to create training data exporter: {e}", exc_info=True) return None