Files
Atomizer/atomizer-field/batch_parser.py
Antoine d5ffba099e feat: Merge Atomizer-Field neural network module into main repository
Permanently integrates the Atomizer-Field GNN surrogate system:
- neural_models/: Graph Neural Network for FEA field prediction
- batch_parser.py: Parse training data from FEA exports
- train.py: Neural network training pipeline
- predict.py: Inference engine for fast predictions

This enables 600x-2200x speedup over traditional FEA by replacing
expensive simulations with millisecond neural network predictions.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 15:31:33 -05:00

361 lines
11 KiB
Python

"""
batch_parser.py
Parse multiple NX Nastran cases in batch
AtomizerField Batch Parser v1.0.0
Efficiently processes multiple FEA cases for neural network training dataset creation.
Usage:
python batch_parser.py <root_directory>
Example:
python batch_parser.py ./training_data
Directory structure expected:
training_data/
├── case_001/
│ ├── input/model.bdf
│ └── output/model.op2
├── case_002/
│ ├── input/model.bdf
│ └── output/model.op2
└── ...
"""
import os
import sys
import json
from pathlib import Path
from datetime import datetime
import traceback
from neural_field_parser import NastranToNeuralFieldParser
from validate_parsed_data import NeuralFieldDataValidator
class BatchParser:
"""
Batch parser for processing multiple FEA cases
This enables rapid dataset creation for neural network training.
Processes each case sequentially and generates a summary report.
"""
def __init__(self, root_directory, validate=True, continue_on_error=True):
"""
Initialize batch parser
Args:
root_directory (str or Path): Root directory containing case subdirectories
validate (bool): Run validation after parsing each case
continue_on_error (bool): Continue processing if a case fails
"""
self.root_dir = Path(root_directory)
self.validate = validate
self.continue_on_error = continue_on_error
self.results = []
def find_cases(self):
"""
Find all case directories in root directory
A valid case directory contains:
- input/ subdirectory with .bdf or .dat file
- output/ subdirectory with .op2 file
Returns:
list: List of Path objects for valid case directories
"""
cases = []
for item in self.root_dir.iterdir():
if not item.is_dir():
continue
# Check for required subdirectories and files
input_dir = item / "input"
output_dir = item / "output"
if not input_dir.exists() or not output_dir.exists():
continue
# Check for BDF file
bdf_files = list(input_dir.glob("*.bdf")) + list(input_dir.glob("*.dat"))
if not bdf_files:
continue
# Check for OP2 file
op2_files = list(output_dir.glob("*.op2"))
if not op2_files:
continue
cases.append(item)
return sorted(cases)
def parse_case(self, case_dir):
"""
Parse a single case
Args:
case_dir (Path): Path to case directory
Returns:
dict: Result dictionary with status and metadata
"""
result = {
"case": case_dir.name,
"status": "unknown",
"timestamp": datetime.now().isoformat()
}
try:
print(f"\n{'='*60}")
print(f"Processing: {case_dir.name}")
print(f"{'='*60}")
# Parse
parser = NastranToNeuralFieldParser(case_dir)
data = parser.parse_all()
result["status"] = "parsed"
result["nodes"] = data["mesh"]["statistics"]["n_nodes"]
result["elements"] = data["mesh"]["statistics"]["n_elements"]
# Get max displacement and stress if available
if "displacement" in data.get("results", {}):
result["max_displacement"] = data["results"]["displacement"].get("max_translation")
if "stress" in data.get("results", {}):
for stress_type, stress_data in data["results"]["stress"].items():
if "max_von_mises" in stress_data and stress_data["max_von_mises"] is not None:
result["max_stress"] = stress_data["max_von_mises"]
break
# Validate if requested
if self.validate:
print(f"\nValidating {case_dir.name}...")
validator = NeuralFieldDataValidator(case_dir)
validation_passed = validator.validate()
result["validated"] = validation_passed
if validation_passed:
result["status"] = "success"
else:
result["status"] = "validation_failed"
result["message"] = "Validation failed (see output above)"
else:
result["status"] = "success"
except Exception as e:
result["status"] = "failed"
result["error"] = str(e)
result["traceback"] = traceback.format_exc()
print(f"\n✗ ERROR: {e}")
if not self.continue_on_error:
raise
return result
def batch_parse(self):
"""
Parse all cases in root directory
Returns:
list: List of result dictionaries
"""
print("\n" + "="*60)
print("AtomizerField Batch Parser v1.0")
print("="*60)
print(f"\nRoot directory: {self.root_dir}")
# Find all cases
cases = self.find_cases()
if not cases:
print(f"\n✗ No valid cases found in {self.root_dir}")
print("\nCase directories should contain:")
print(" input/model.bdf (or model.dat)")
print(" output/model.op2")
return []
print(f"\nFound {len(cases)} case(s) to process:")
for case in cases:
print(f" - {case.name}")
# Process each case
self.results = []
start_time = datetime.now()
for i, case in enumerate(cases, 1):
print(f"\n[{i}/{len(cases)}] Processing {case.name}...")
result = self.parse_case(case)
self.results.append(result)
# Show progress
success_count = sum(1 for r in self.results if r["status"] == "success")
print(f"\nProgress: {i}/{len(cases)} processed, {success_count} successful")
end_time = datetime.now()
elapsed = (end_time - start_time).total_seconds()
# Print summary
self._print_summary(elapsed)
# Save summary to JSON
self._save_summary()
return self.results
def _print_summary(self, elapsed_time):
"""Print batch processing summary"""
print("\n" + "="*60)
print("BATCH PROCESSING COMPLETE")
print("="*60)
success_count = sum(1 for r in self.results if r["status"] == "success")
failed_count = sum(1 for r in self.results if r["status"] == "failed")
validation_failed = sum(1 for r in self.results if r["status"] == "validation_failed")
print(f"\nTotal cases: {len(self.results)}")
print(f" ✓ Successful: {success_count}")
if validation_failed > 0:
print(f" ⚠ Validation failed: {validation_failed}")
if failed_count > 0:
print(f" ✗ Failed: {failed_count}")
print(f"\nProcessing time: {elapsed_time:.1f} seconds")
if len(self.results) > 0:
print(f"Average time per case: {elapsed_time/len(self.results):.1f} seconds")
# Detailed results
print("\nDetailed Results:")
print("-" * 60)
for result in self.results:
status_symbol = {
"success": "",
"failed": "",
"validation_failed": "",
"parsed": ""
}.get(result["status"], "?")
case_info = f"{status_symbol} {result['case']}: {result['status']}"
if "nodes" in result and "elements" in result:
case_info += f" ({result['nodes']:,} nodes, {result['elements']:,} elements)"
if "max_stress" in result:
case_info += f" | Max VM: {result['max_stress']:.2f} MPa"
if result["status"] == "failed" and "error" in result:
case_info += f"\n Error: {result['error']}"
print(f" {case_info}")
print("\n" + "="*60)
if success_count == len(self.results):
print("✓ ALL CASES PROCESSED SUCCESSFULLY")
elif success_count > 0:
print(f"{success_count}/{len(self.results)} CASES SUCCESSFUL")
else:
print("✗ ALL CASES FAILED")
print("="*60 + "\n")
def _save_summary(self):
"""Save batch processing summary to JSON file"""
summary_file = self.root_dir / "batch_processing_summary.json"
summary = {
"batch_info": {
"root_directory": str(self.root_dir),
"timestamp": datetime.now().isoformat(),
"total_cases": len(self.results),
"successful_cases": sum(1 for r in self.results if r["status"] == "success"),
"failed_cases": sum(1 for r in self.results if r["status"] == "failed"),
"validation_enabled": self.validate
},
"cases": self.results
}
with open(summary_file, 'w') as f:
json.dump(summary, f, indent=2)
print(f"Summary saved to: {summary_file}\n")
def main():
"""
Main entry point for batch parser
"""
if len(sys.argv) < 2:
print("\nAtomizerField Batch Parser v1.0")
print("="*60)
print("\nUsage:")
print(" python batch_parser.py <root_directory> [options]")
print("\nOptions:")
print(" --no-validate Skip validation step")
print(" --stop-on-error Stop processing if a case fails")
print("\nExample:")
print(" python batch_parser.py ./training_data")
print("\nDirectory structure:")
print(" training_data/")
print(" ├── case_001/")
print(" │ ├── input/model.bdf")
print(" │ └── output/model.op2")
print(" ├── case_002/")
print(" │ ├── input/model.bdf")
print(" │ └── output/model.op2")
print(" └── ...")
print()
sys.exit(1)
root_dir = sys.argv[1]
# Parse options
validate = "--no-validate" not in sys.argv
continue_on_error = "--stop-on-error" not in sys.argv
if not Path(root_dir).exists():
print(f"ERROR: Directory not found: {root_dir}")
sys.exit(1)
# Create batch parser
batch_parser = BatchParser(
root_dir,
validate=validate,
continue_on_error=continue_on_error
)
# Process all cases
try:
results = batch_parser.batch_parse()
# Exit with appropriate code
if not results:
sys.exit(1)
success_count = sum(1 for r in results if r["status"] == "success")
if success_count == len(results):
sys.exit(0) # All successful
elif success_count > 0:
sys.exit(2) # Partial success
else:
sys.exit(1) # All failed
except KeyboardInterrupt:
print("\n\nBatch processing interrupted by user")
sys.exit(130)
except Exception as e:
print(f"\n\nFATAL ERROR: {e}")
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()