feat: Merge Atomizer-Field neural network module into main repository

Permanently integrates the Atomizer-Field GNN surrogate system: - neural_models/: Graph Neural Network for FEA field prediction - batch_parser.py: Parse training data from FEA exports - train.py: Neural network training pipeline - predict.py: Inference engine for fast predictions This enables 600x-2200x speedup over traditional FEA by replacing expensive simulations with millisecond neural network predictions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 15:31:33 -05:00
parent a4805947d1
commit d5ffba099e
47 changed files with 18446 additions and 0 deletions
--- a/atomizer-field/validate_parsed_data.py
+++ b/atomizer-field/validate_parsed_data.py
@@ -0,0 +1,454 @@
+"""
+validate_parsed_data.py
+Validates the parsed neural field data for completeness and physics consistency
+
+AtomizerField Data Validator v1.0.0
+Ensures parsed data meets quality standards for neural network training.
+
+Usage:
+    python validate_parsed_data.py <case_directory>
+
+Example:
+    python validate_parsed_data.py training_case_001
+"""
+
+import json
+import h5py
+import numpy as np
+from pathlib import Path
+import sys
+
+
+class NeuralFieldDataValidator:
+    """
+    Validates parsed neural field data for:
+    - File existence and format
+    - Data completeness
+    - Physics consistency
+    - Data quality
+
+    This ensures that data fed to neural networks is reliable and consistent.
+    """
+
+    def __init__(self, case_directory):
+        """
+        Initialize validator
+
+        Args:
+            case_directory (str or Path): Path to case containing parsed data
+        """
+        self.case_dir = Path(case_directory)
+        self.json_file = self.case_dir / "neural_field_data.json"
+        self.h5_file = self.case_dir / "neural_field_data.h5"
+        self.errors = []
+        self.warnings = []
+        self.info = []
+
+    def validate(self):
+        """
+        Run all validation checks
+
+        Returns:
+            bool: True if validation passed, False otherwise
+        """
+        print("\n" + "="*60)
+        print("AtomizerField Data Validator v1.0")
+        print("="*60)
+        print(f"\nValidating: {self.case_dir.name}\n")
+
+        # Check file existence
+        if not self._check_files_exist():
+            return False
+
+        # Load data
+        try:
+            with open(self.json_file, 'r') as f:
+                self.data = json.load(f)
+            self.h5_data = h5py.File(self.h5_file, 'r')
+        except Exception as e:
+            self._add_error(f"Failed to load data files: {e}")
+            return False
+
+        # Run validation checks
+        self._validate_structure()
+        self._validate_metadata()
+        self._validate_mesh()
+        self._validate_materials()
+        self._validate_boundary_conditions()
+        self._validate_loads()
+        self._validate_results()
+        self._validate_physics_consistency()
+        self._validate_data_quality()
+
+        # Close HDF5 file
+        self.h5_data.close()
+
+        # Print results
+        self._print_results()
+
+        return len(self.errors) == 0
+
+    def _check_files_exist(self):
+        """Check that required files exist"""
+        if not self.json_file.exists():
+            self._add_error(f"JSON file not found: {self.json_file}")
+            return False
+
+        if not self.h5_file.exists():
+            self._add_error(f"HDF5 file not found: {self.h5_file}")
+            return False
+
+        self._add_info(f"Found JSON: {self.json_file.name}")
+        self._add_info(f"Found HDF5: {self.h5_file.name}")
+        return True
+
+    def _validate_structure(self):
+        """Validate data structure has all required fields"""
+        required_fields = [
+            "metadata",
+            "mesh",
+            "materials",
+            "boundary_conditions",
+            "loads",
+            "results"
+        ]
+
+        for field in required_fields:
+            if field not in self.data:
+                self._add_error(f"Missing required field: {field}")
+            else:
+                self._add_info(f"Found field: {field}")
+
+    def _validate_metadata(self):
+        """Validate metadata completeness"""
+        if "metadata" not in self.data:
+            return
+
+        meta = self.data["metadata"]
+
+        # Check version
+        if "version" in meta:
+            if meta["version"] != "1.0.0":
+                self._add_warning(f"Data version {meta['version']} may not be compatible")
+            else:
+                self._add_info(f"Data version: {meta['version']}")
+
+        # Check required metadata fields
+        required = ["created_at", "source", "analysis_type", "units"]
+        for field in required:
+            if field not in meta:
+                self._add_warning(f"Missing metadata field: {field}")
+
+        if "analysis_type" in meta:
+            self._add_info(f"Analysis type: {meta['analysis_type']}")
+
+    def _validate_mesh(self):
+        """Validate mesh data"""
+        if "mesh" not in self.data:
+            return
+
+        mesh = self.data["mesh"]
+
+        # Check statistics
+        if "statistics" in mesh:
+            stats = mesh["statistics"]
+            n_nodes = stats.get("n_nodes", 0)
+            n_elements = stats.get("n_elements", 0)
+
+            self._add_info(f"Mesh: {n_nodes:,} nodes, {n_elements:,} elements")
+
+            if n_nodes == 0:
+                self._add_error("Mesh has no nodes")
+            if n_elements == 0:
+                self._add_error("Mesh has no elements")
+
+            # Check element types
+            if "element_types" in stats:
+                elem_types = stats["element_types"]
+                total_by_type = sum(elem_types.values())
+                if total_by_type != n_elements:
+                    self._add_warning(
+                        f"Element type count ({total_by_type}) doesn't match "
+                        f"total elements ({n_elements})"
+                    )
+
+                for etype, count in elem_types.items():
+                    if count > 0:
+                        self._add_info(f"  {etype}: {count:,} elements")
+
+        # Validate HDF5 mesh data
+        if 'mesh' in self.h5_data:
+            mesh_grp = self.h5_data['mesh']
+
+            if 'node_coordinates' in mesh_grp:
+                coords = mesh_grp['node_coordinates'][:]
+                self._add_info(f"Node coordinates: shape {coords.shape}")
+
+                # Check for NaN or inf
+                if np.any(np.isnan(coords)):
+                    self._add_error("Node coordinates contain NaN values")
+                if np.any(np.isinf(coords)):
+                    self._add_error("Node coordinates contain infinite values")
+
+                # Check bounding box reasonableness
+                bbox_size = np.max(coords, axis=0) - np.min(coords, axis=0)
+                if np.any(bbox_size == 0):
+                    self._add_warning("Mesh is planar or degenerate in one dimension")
+
+    def _validate_materials(self):
+        """Validate material data"""
+        if "materials" not in self.data:
+            return
+
+        materials = self.data["materials"]
+
+        if len(materials) == 0:
+            self._add_warning("No materials defined")
+            return
+
+        self._add_info(f"Materials: {len(materials)} defined")
+
+        for mat in materials:
+            mat_id = mat.get("id", "unknown")
+            mat_type = mat.get("type", "unknown")
+
+            if mat_type == "MAT1":
+                # Check required properties
+                E = mat.get("E")
+                nu = mat.get("nu")
+
+                if E is None:
+                    self._add_error(f"Material {mat_id}: Missing Young's modulus (E)")
+                elif E <= 0:
+                    self._add_error(f"Material {mat_id}: Invalid E = {E} (must be > 0)")
+
+                if nu is None:
+                    self._add_error(f"Material {mat_id}: Missing Poisson's ratio (nu)")
+                elif nu < 0 or nu >= 0.5:
+                    self._add_error(f"Material {mat_id}: Invalid nu = {nu} (must be 0 <= nu < 0.5)")
+
+    def _validate_boundary_conditions(self):
+        """Validate boundary conditions"""
+        if "boundary_conditions" not in self.data:
+            return
+
+        bcs = self.data["boundary_conditions"]
+
+        spc_count = len(bcs.get("spc", []))
+        mpc_count = len(bcs.get("mpc", []))
+
+        self._add_info(f"Boundary conditions: {spc_count} SPCs, {mpc_count} MPCs")
+
+        if spc_count == 0:
+            self._add_warning("No SPCs defined - model may be unconstrained")
+
+    def _validate_loads(self):
+        """Validate load data"""
+        if "loads" not in self.data:
+            return
+
+        loads = self.data["loads"]
+
+        force_count = len(loads.get("point_forces", []))
+        pressure_count = len(loads.get("pressure", []))
+        gravity_count = len(loads.get("gravity", []))
+        thermal_count = len(loads.get("thermal", []))
+
+        total_loads = force_count + pressure_count + gravity_count + thermal_count
+
+        self._add_info(
+            f"Loads: {force_count} forces, {pressure_count} pressures, "
+            f"{gravity_count} gravity, {thermal_count} thermal"
+        )
+
+        if total_loads == 0:
+            self._add_warning("No loads defined")
+
+        # Validate force magnitudes
+        for force in loads.get("point_forces", []):
+            mag = force.get("magnitude")
+            if mag == 0:
+                self._add_warning(f"Force at node {force.get('node')} has zero magnitude")
+
+    def _validate_results(self):
+        """Validate results data"""
+        if "results" not in self.data:
+            self._add_error("No results data found")
+            return
+
+        results = self.data["results"]
+
+        # Check displacement
+        if "displacement" not in results:
+            self._add_error("No displacement results found")
+        else:
+            disp = results["displacement"]
+            n_nodes = len(disp.get("node_ids", []))
+            max_disp = disp.get("max_translation")
+
+            self._add_info(f"Displacement: {n_nodes:,} nodes")
+            if max_disp is not None:
+                self._add_info(f"  Max displacement: {max_disp:.6f} mm")
+
+                if max_disp == 0:
+                    self._add_warning("Maximum displacement is zero - check loads")
+                elif max_disp > 1000:
+                    self._add_warning(f"Very large displacement ({max_disp:.2f} mm) - check units or model")
+
+        # Check stress
+        if "stress" not in results or len(results["stress"]) == 0:
+            self._add_warning("No stress results found")
+        else:
+            for stress_type, stress_data in results["stress"].items():
+                n_elem = len(stress_data.get("element_ids", []))
+                max_vm = stress_data.get("max_von_mises")
+
+                self._add_info(f"Stress ({stress_type}): {n_elem:,} elements")
+                if max_vm is not None:
+                    self._add_info(f"  Max von Mises: {max_vm:.2f} MPa")
+
+                    if max_vm == 0:
+                        self._add_warning(f"{stress_type}: Zero stress - check loads")
+
+        # Validate HDF5 results
+        if 'results' in self.h5_data:
+            results_grp = self.h5_data['results']
+
+            if 'displacement' in results_grp:
+                disp_data = results_grp['displacement'][:]
+
+                # Check for NaN or inf
+                if np.any(np.isnan(disp_data)):
+                    self._add_error("Displacement results contain NaN values")
+                if np.any(np.isinf(disp_data)):
+                    self._add_error("Displacement results contain infinite values")
+
+    def _validate_physics_consistency(self):
+        """Validate physics consistency of results"""
+        if "results" not in self.data or "mesh" not in self.data:
+            return
+
+        results = self.data["results"]
+        mesh = self.data["mesh"]
+
+        # Check node count consistency
+        mesh_nodes = mesh.get("statistics", {}).get("n_nodes", 0)
+
+        if "displacement" in results:
+            disp_nodes = len(results["displacement"].get("node_ids", []))
+            if disp_nodes != mesh_nodes:
+                self._add_warning(
+                    f"Displacement nodes ({disp_nodes:,}) != mesh nodes ({mesh_nodes:,})"
+                )
+
+        # Check for rigid body motion (if no constraints)
+        if "boundary_conditions" in self.data:
+            spc_count = len(self.data["boundary_conditions"].get("spc", []))
+            if spc_count == 0 and "displacement" in results:
+                max_disp = results["displacement"].get("max_translation", 0)
+                if max_disp > 1e6:
+                    self._add_error("Unconstrained model with very large displacements - likely rigid body motion")
+
+    def _validate_data_quality(self):
+        """Validate data quality for neural network training"""
+
+        # Check HDF5 data types and shapes
+        if 'results' in self.h5_data:
+            results_grp = self.h5_data['results']
+
+            # Check displacement shape
+            if 'displacement' in results_grp:
+                disp = results_grp['displacement'][:]
+                if len(disp.shape) != 2:
+                    self._add_error(f"Displacement has wrong shape: {disp.shape} (expected 2D)")
+                elif disp.shape[1] != 6:
+                    self._add_error(f"Displacement has {disp.shape[1]} DOFs (expected 6)")
+
+        # Check file sizes
+        json_size = self.json_file.stat().st_size / 1024  # KB
+        h5_size = self.h5_file.stat().st_size / 1024  # KB
+
+        self._add_info(f"File sizes: JSON={json_size:.1f} KB, HDF5={h5_size:.1f} KB")
+
+        if json_size > 10000:  # 10 MB
+            self._add_warning("JSON file is very large - consider moving more data to HDF5")
+
+    def _add_error(self, message):
+        """Add error message"""
+        self.errors.append(message)
+
+    def _add_warning(self, message):
+        """Add warning message"""
+        self.warnings.append(message)
+
+    def _add_info(self, message):
+        """Add info message"""
+        self.info.append(message)
+
+    def _print_results(self):
+        """Print validation results"""
+        print("\n" + "="*60)
+        print("VALIDATION RESULTS")
+        print("="*60)
+
+        # Print info
+        if self.info:
+            print("\nInformation:")
+            for msg in self.info:
+                print(f"  [INFO] {msg}")
+
+        # Print warnings
+        if self.warnings:
+            print("\nWarnings:")
+            for msg in self.warnings:
+                print(f"  [WARN] {msg}")
+
+        # Print errors
+        if self.errors:
+            print("\nErrors:")
+            for msg in self.errors:
+                print(f"  [X] {msg}")
+
+        # Summary
+        print("\n" + "="*60)
+        if len(self.errors) == 0:
+            print("[OK] VALIDATION PASSED")
+            print("="*60)
+            print("\nData is ready for neural network training!")
+        else:
+            print("[X] VALIDATION FAILED")
+            print("="*60)
+            print(f"\nFound {len(self.errors)} error(s), {len(self.warnings)} warning(s)")
+            print("Please fix errors before using this data for training.")
+
+        print()
+
+
+def main():
+    """
+    Main entry point for validation script
+    """
+    if len(sys.argv) < 2:
+        print("\nAtomizerField Data Validator v1.0")
+        print("="*60)
+        print("\nUsage:")
+        print("  python validate_parsed_data.py <case_directory>")
+        print("\nExample:")
+        print("  python validate_parsed_data.py training_case_001")
+        print()
+        sys.exit(1)
+
+    case_dir = sys.argv[1]
+
+    if not Path(case_dir).exists():
+        print(f"ERROR: Directory not found: {case_dir}")
+        sys.exit(1)
+
+    validator = NeuralFieldDataValidator(case_dir)
+    success = validator.validate()
+
+    sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+    main()