Files
Atomizer/optimization_engine/training_data_exporter.py
Anto01 e3bdb08a22 feat: Major update with validators, skills, dashboard, and docs reorganization
- Add validation framework (config, model, results, study validators)
- Add Claude Code skills (create-study, run-optimization, generate-report,
  troubleshoot, analyze-model)
- Add Atomizer Dashboard (React frontend + FastAPI backend)
- Reorganize docs into structured directories (00-09)
- Add neural surrogate modules and training infrastructure
- Add multi-objective optimization support

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 19:23:58 -05:00

386 lines
12 KiB
Python

"""
Training Data Exporter for AtomizerField
This module exports training data from Atomizer optimization runs for AtomizerField neural network training.
It saves NX Nastran input (.dat) and output (.op2) files along with metadata for each trial.
Usage:
from optimization_engine.training_data_exporter import create_exporter_from_config
exporter = create_exporter_from_config(config)
if exporter:
exporter.export_trial(trial_number, design_vars, results, simulation_files)
exporter.finalize()
"""
import json
import shutil
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional, List
import logging
logger = logging.getLogger(__name__)
class TrainingDataExporter:
"""
Exports training data for AtomizerField neural network training.
After each FEA solve, saves:
- Input: NX Nastran .dat file (BDF format)
- Output: NX Nastran .op2 file (binary results)
- Metadata: JSON with design parameters, objectives, constraints
Directory structure:
{export_dir}/
├── trial_0001/
│ ├── input/
│ │ └── model.bdf
│ ├── output/
│ │ └── model.op2
│ └── metadata.json
├── trial_0002/
│ └── ...
├── study_summary.json
└── README.md
"""
def __init__(
self,
export_dir: Path,
study_name: str,
design_variable_names: List[str],
objective_names: List[str],
constraint_names: Optional[List[str]] = None,
metadata: Optional[Dict[str, Any]] = None
):
"""
Initialize the training data exporter.
Args:
export_dir: Root directory for exported training data
study_name: Name of the optimization study
design_variable_names: List of design variable names
objective_names: List of objective function names
constraint_names: List of constraint names (optional)
metadata: Additional study metadata (optional)
"""
self.export_dir = Path(export_dir)
self.study_name = study_name
self.design_variable_names = design_variable_names
self.objective_names = objective_names
self.constraint_names = constraint_names or []
self.study_metadata = metadata or {}
self.trial_count = 0
self.exported_trials: List[Dict[str, Any]] = []
# Create root export directory
self.export_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Training data exporter initialized: {self.export_dir}")
# Create README
self._create_readme()
def export_trial(
self,
trial_number: int,
design_variables: Dict[str, float],
results: Dict[str, Any],
simulation_files: Dict[str, Path]
) -> bool:
"""
Export training data for a single trial.
Args:
trial_number: Optuna trial number
design_variables: Dict of design parameter names and values
results: Dict containing objectives, constraints, and other results
simulation_files: Dict with paths to 'dat_file' and 'op2_file'
Returns:
True if export successful, False otherwise
"""
try:
# Create trial directory
trial_dir = self.export_dir / f"trial_{trial_number:04d}"
trial_dir.mkdir(parents=True, exist_ok=True)
input_dir = trial_dir / "input"
output_dir = trial_dir / "output"
input_dir.mkdir(exist_ok=True)
output_dir.mkdir(exist_ok=True)
# Copy .dat file (Nastran input deck)
dat_file = simulation_files.get('dat_file')
if dat_file and Path(dat_file).exists():
shutil.copy2(dat_file, input_dir / "model.bdf")
logger.debug(f"Copied .dat file: {dat_file} -> {input_dir / 'model.bdf'}")
else:
logger.warning(f"Trial {trial_number}: .dat file not found at {dat_file}")
return False
# Copy .op2 file (Nastran binary results)
op2_file = simulation_files.get('op2_file')
if op2_file and Path(op2_file).exists():
shutil.copy2(op2_file, output_dir / "model.op2")
logger.debug(f"Copied .op2 file: {op2_file} -> {output_dir / 'model.op2'}")
else:
logger.warning(f"Trial {trial_number}: .op2 file not found at {op2_file}")
return False
# Create metadata.json
metadata = self._create_trial_metadata(
trial_number,
design_variables,
results
)
metadata_path = trial_dir / "metadata.json"
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2)
logger.info(f"Exported training data for trial {trial_number}")
self.trial_count += 1
self.exported_trials.append(metadata)
return True
except Exception as e:
logger.error(f"Failed to export trial {trial_number}: {e}", exc_info=True)
return False
def _create_trial_metadata(
self,
trial_number: int,
design_variables: Dict[str, float],
results: Dict[str, Any]
) -> Dict[str, Any]:
"""
Create metadata dictionary for a trial.
Args:
trial_number: Optuna trial number
design_variables: Design parameter values
results: Optimization results
Returns:
Metadata dictionary
"""
metadata = {
"trial_number": trial_number,
"timestamp": datetime.now().isoformat(),
"atomizer_study": self.study_name,
"design_parameters": design_variables,
"results": {}
}
# Extract objectives
if "objectives" in results:
metadata["results"]["objectives"] = results["objectives"]
# Extract constraints
if "constraints" in results:
metadata["results"]["constraints"] = results["constraints"]
# Extract any scalar results (max_stress, max_displacement, etc.)
for key in ["max_stress", "max_displacement", "mass", "volume"]:
if key in results:
metadata["results"][key] = results[key]
# Add any additional result fields
for key, value in results.items():
if key not in ["objectives", "constraints"] and isinstance(value, (int, float, str, bool)):
metadata["results"][key] = value
return metadata
def finalize(self) -> None:
"""
Finalize the training data export.
Creates study_summary.json with metadata about the entire study.
"""
try:
summary = {
"study_name": self.study_name,
"total_trials": self.trial_count,
"design_variables": self.design_variable_names,
"objectives": self.objective_names,
"constraints": self.constraint_names,
"export_timestamp": datetime.now().isoformat(),
"metadata": self.study_metadata
}
summary_path = self.export_dir / "study_summary.json"
with open(summary_path, 'w') as f:
json.dump(summary, f, indent=2)
logger.info(f"Training data export finalized: {self.trial_count} trials exported")
logger.info(f"Summary saved to: {summary_path}")
except Exception as e:
logger.error(f"Failed to finalize training data export: {e}", exc_info=True)
def _create_readme(self) -> None:
"""Create README.md explaining the training data structure."""
readme_content = f"""# AtomizerField Training Data
**Study Name**: {self.study_name}
**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
## Directory Structure
```
{self.export_dir.name}/
├── trial_0001/
│ ├── input/
│ │ └── model.bdf # NX Nastran input deck (BDF format)
│ ├── output/
│ │ └── model.op2 # NX Nastran binary results (OP2 format)
│ └── metadata.json # Design parameters, objectives, constraints
├── trial_0002/
│ └── ...
├── study_summary.json # Overall study metadata
└── README.md # This file
```
## Design Variables
{chr(10).join(f'- {name}' for name in self.design_variable_names)}
## Objectives
{chr(10).join(f'- {name}' for name in self.objective_names)}
## Constraints
{chr(10).join(f'- {name}' for name in self.constraint_names) if self.constraint_names else '- None'}
## Usage with AtomizerField
### 1. Parse Training Data
```bash
cd Atomizer-Field
python batch_parser.py --data-dir "{self.export_dir}"
```
This converts BDF/OP2 files to PyTorch Geometric format.
### 2. Validate Parsed Data
```bash
python validate_parsed_data.py
```
### 3. Train Neural Network
```bash
python train.py --data-dir "training_data/parsed/" --epochs 200
```
### 4. Use Trained Model in Atomizer
```bash
cd ../Atomizer
python run_optimization.py --config studies/{self.study_name}/workflow_config.json --use-neural
```
## File Formats
- **BDF (.bdf)**: Nastran Bulk Data File - contains mesh, materials, loads, BCs
- **OP2 (.op2)**: Nastran Output2 - binary results with displacements, stresses, etc.
- **metadata.json**: Human-readable trial metadata
## AtomizerField Documentation
See `Atomizer-Field/docs/` for complete documentation on:
- Neural network architecture
- Training procedures
- Integration with Atomizer
- Uncertainty quantification
---
*Generated by Atomizer Training Data Exporter*
"""
readme_path = self.export_dir / "README.md"
with open(readme_path, 'w', encoding='utf-8') as f:
f.write(readme_content)
logger.debug(f"Created README: {readme_path}")
def create_exporter_from_config(config: Dict[str, Any]) -> Optional[TrainingDataExporter]:
"""
Factory function to create TrainingDataExporter from workflow configuration.
Args:
config: Workflow configuration dictionary
Returns:
TrainingDataExporter instance if enabled, None otherwise
Example config YAML:
training_data_export:
enabled: true
export_dir: "atomizer_field_training_data/beam_study_001"
"""
if not config.get("training_data_export", {}).get("enabled", False):
logger.info("Training data export is disabled")
return None
export_config = config["training_data_export"]
# Get export directory
export_dir = export_config.get("export_dir")
if not export_dir:
logger.error("Training data export enabled but 'export_dir' not specified")
return None
# Get study name
study_name = config.get("study_name", "unnamed_study")
# Get design variable names
design_vars = config.get("design_variables", [])
design_var_names = [dv.get("name", dv.get("parameter", f"var_{i}"))
for i, dv in enumerate(design_vars)]
# Get objective names
objectives = config.get("objectives", [])
objective_names = [obj.get("name", f"obj_{i}")
for i, obj in enumerate(objectives)]
# Get constraint names
constraints = config.get("constraints", [])
constraint_names = [c.get("name", f"constraint_{i}")
for i, c in enumerate(constraints)]
# Additional metadata
metadata = {
"atomizer_version": config.get("version", "unknown"),
"optimization_algorithm": config.get("optimization", {}).get("algorithm", "unknown"),
"n_trials": config.get("optimization", {}).get("n_trials", 0)
}
try:
exporter = TrainingDataExporter(
export_dir=Path(export_dir),
study_name=study_name,
design_variable_names=design_var_names,
objective_names=objective_names,
constraint_names=constraint_names,
metadata=metadata
)
logger.info("Training data exporter created successfully")
return exporter
except Exception as e:
logger.error(f"Failed to create training data exporter: {e}", exc_info=True)
return None