""" validate_parsed_data.py Validates the parsed neural field data for completeness and physics consistency AtomizerField Data Validator v1.0.0 Ensures parsed data meets quality standards for neural network training. Usage: python validate_parsed_data.py Example: python validate_parsed_data.py training_case_001 """ import json import h5py import numpy as np from pathlib import Path import sys class NeuralFieldDataValidator: """ Validates parsed neural field data for: - File existence and format - Data completeness - Physics consistency - Data quality This ensures that data fed to neural networks is reliable and consistent. """ def __init__(self, case_directory): """ Initialize validator Args: case_directory (str or Path): Path to case containing parsed data """ self.case_dir = Path(case_directory) self.json_file = self.case_dir / "neural_field_data.json" self.h5_file = self.case_dir / "neural_field_data.h5" self.errors = [] self.warnings = [] self.info = [] def validate(self): """ Run all validation checks Returns: bool: True if validation passed, False otherwise """ print("\n" + "="*60) print("AtomizerField Data Validator v1.0") print("="*60) print(f"\nValidating: {self.case_dir.name}\n") # Check file existence if not self._check_files_exist(): return False # Load data try: with open(self.json_file, 'r') as f: self.data = json.load(f) self.h5_data = h5py.File(self.h5_file, 'r') except Exception as e: self._add_error(f"Failed to load data files: {e}") return False # Run validation checks self._validate_structure() self._validate_metadata() self._validate_mesh() self._validate_materials() self._validate_boundary_conditions() self._validate_loads() self._validate_results() self._validate_physics_consistency() self._validate_data_quality() # Close HDF5 file self.h5_data.close() # Print results self._print_results() return len(self.errors) == 0 def _check_files_exist(self): """Check that required files exist""" if not self.json_file.exists(): self._add_error(f"JSON file not found: {self.json_file}") return False if not self.h5_file.exists(): self._add_error(f"HDF5 file not found: {self.h5_file}") return False self._add_info(f"Found JSON: {self.json_file.name}") self._add_info(f"Found HDF5: {self.h5_file.name}") return True def _validate_structure(self): """Validate data structure has all required fields""" required_fields = [ "metadata", "mesh", "materials", "boundary_conditions", "loads", "results" ] for field in required_fields: if field not in self.data: self._add_error(f"Missing required field: {field}") else: self._add_info(f"Found field: {field}") def _validate_metadata(self): """Validate metadata completeness""" if "metadata" not in self.data: return meta = self.data["metadata"] # Check version if "version" in meta: if meta["version"] != "1.0.0": self._add_warning(f"Data version {meta['version']} may not be compatible") else: self._add_info(f"Data version: {meta['version']}") # Check required metadata fields required = ["created_at", "source", "analysis_type", "units"] for field in required: if field not in meta: self._add_warning(f"Missing metadata field: {field}") if "analysis_type" in meta: self._add_info(f"Analysis type: {meta['analysis_type']}") def _validate_mesh(self): """Validate mesh data""" if "mesh" not in self.data: return mesh = self.data["mesh"] # Check statistics if "statistics" in mesh: stats = mesh["statistics"] n_nodes = stats.get("n_nodes", 0) n_elements = stats.get("n_elements", 0) self._add_info(f"Mesh: {n_nodes:,} nodes, {n_elements:,} elements") if n_nodes == 0: self._add_error("Mesh has no nodes") if n_elements == 0: self._add_error("Mesh has no elements") # Check element types if "element_types" in stats: elem_types = stats["element_types"] total_by_type = sum(elem_types.values()) if total_by_type != n_elements: self._add_warning( f"Element type count ({total_by_type}) doesn't match " f"total elements ({n_elements})" ) for etype, count in elem_types.items(): if count > 0: self._add_info(f" {etype}: {count:,} elements") # Validate HDF5 mesh data if 'mesh' in self.h5_data: mesh_grp = self.h5_data['mesh'] if 'node_coordinates' in mesh_grp: coords = mesh_grp['node_coordinates'][:] self._add_info(f"Node coordinates: shape {coords.shape}") # Check for NaN or inf if np.any(np.isnan(coords)): self._add_error("Node coordinates contain NaN values") if np.any(np.isinf(coords)): self._add_error("Node coordinates contain infinite values") # Check bounding box reasonableness bbox_size = np.max(coords, axis=0) - np.min(coords, axis=0) if np.any(bbox_size == 0): self._add_warning("Mesh is planar or degenerate in one dimension") def _validate_materials(self): """Validate material data""" if "materials" not in self.data: return materials = self.data["materials"] if len(materials) == 0: self._add_warning("No materials defined") return self._add_info(f"Materials: {len(materials)} defined") for mat in materials: mat_id = mat.get("id", "unknown") mat_type = mat.get("type", "unknown") if mat_type == "MAT1": # Check required properties E = mat.get("E") nu = mat.get("nu") if E is None: self._add_error(f"Material {mat_id}: Missing Young's modulus (E)") elif E <= 0: self._add_error(f"Material {mat_id}: Invalid E = {E} (must be > 0)") if nu is None: self._add_error(f"Material {mat_id}: Missing Poisson's ratio (nu)") elif nu < 0 or nu >= 0.5: self._add_error(f"Material {mat_id}: Invalid nu = {nu} (must be 0 <= nu < 0.5)") def _validate_boundary_conditions(self): """Validate boundary conditions""" if "boundary_conditions" not in self.data: return bcs = self.data["boundary_conditions"] spc_count = len(bcs.get("spc", [])) mpc_count = len(bcs.get("mpc", [])) self._add_info(f"Boundary conditions: {spc_count} SPCs, {mpc_count} MPCs") if spc_count == 0: self._add_warning("No SPCs defined - model may be unconstrained") def _validate_loads(self): """Validate load data""" if "loads" not in self.data: return loads = self.data["loads"] force_count = len(loads.get("point_forces", [])) pressure_count = len(loads.get("pressure", [])) gravity_count = len(loads.get("gravity", [])) thermal_count = len(loads.get("thermal", [])) total_loads = force_count + pressure_count + gravity_count + thermal_count self._add_info( f"Loads: {force_count} forces, {pressure_count} pressures, " f"{gravity_count} gravity, {thermal_count} thermal" ) if total_loads == 0: self._add_warning("No loads defined") # Validate force magnitudes for force in loads.get("point_forces", []): mag = force.get("magnitude") if mag == 0: self._add_warning(f"Force at node {force.get('node')} has zero magnitude") def _validate_results(self): """Validate results data""" if "results" not in self.data: self._add_error("No results data found") return results = self.data["results"] # Check displacement if "displacement" not in results: self._add_error("No displacement results found") else: disp = results["displacement"] n_nodes = len(disp.get("node_ids", [])) max_disp = disp.get("max_translation") self._add_info(f"Displacement: {n_nodes:,} nodes") if max_disp is not None: self._add_info(f" Max displacement: {max_disp:.6f} mm") if max_disp == 0: self._add_warning("Maximum displacement is zero - check loads") elif max_disp > 1000: self._add_warning(f"Very large displacement ({max_disp:.2f} mm) - check units or model") # Check stress if "stress" not in results or len(results["stress"]) == 0: self._add_warning("No stress results found") else: for stress_type, stress_data in results["stress"].items(): n_elem = len(stress_data.get("element_ids", [])) max_vm = stress_data.get("max_von_mises") self._add_info(f"Stress ({stress_type}): {n_elem:,} elements") if max_vm is not None: self._add_info(f" Max von Mises: {max_vm:.2f} MPa") if max_vm == 0: self._add_warning(f"{stress_type}: Zero stress - check loads") # Validate HDF5 results if 'results' in self.h5_data: results_grp = self.h5_data['results'] if 'displacement' in results_grp: disp_data = results_grp['displacement'][:] # Check for NaN or inf if np.any(np.isnan(disp_data)): self._add_error("Displacement results contain NaN values") if np.any(np.isinf(disp_data)): self._add_error("Displacement results contain infinite values") def _validate_physics_consistency(self): """Validate physics consistency of results""" if "results" not in self.data or "mesh" not in self.data: return results = self.data["results"] mesh = self.data["mesh"] # Check node count consistency mesh_nodes = mesh.get("statistics", {}).get("n_nodes", 0) if "displacement" in results: disp_nodes = len(results["displacement"].get("node_ids", [])) if disp_nodes != mesh_nodes: self._add_warning( f"Displacement nodes ({disp_nodes:,}) != mesh nodes ({mesh_nodes:,})" ) # Check for rigid body motion (if no constraints) if "boundary_conditions" in self.data: spc_count = len(self.data["boundary_conditions"].get("spc", [])) if spc_count == 0 and "displacement" in results: max_disp = results["displacement"].get("max_translation", 0) if max_disp > 1e6: self._add_error("Unconstrained model with very large displacements - likely rigid body motion") def _validate_data_quality(self): """Validate data quality for neural network training""" # Check HDF5 data types and shapes if 'results' in self.h5_data: results_grp = self.h5_data['results'] # Check displacement shape if 'displacement' in results_grp: disp = results_grp['displacement'][:] if len(disp.shape) != 2: self._add_error(f"Displacement has wrong shape: {disp.shape} (expected 2D)") elif disp.shape[1] != 6: self._add_error(f"Displacement has {disp.shape[1]} DOFs (expected 6)") # Check file sizes json_size = self.json_file.stat().st_size / 1024 # KB h5_size = self.h5_file.stat().st_size / 1024 # KB self._add_info(f"File sizes: JSON={json_size:.1f} KB, HDF5={h5_size:.1f} KB") if json_size > 10000: # 10 MB self._add_warning("JSON file is very large - consider moving more data to HDF5") def _add_error(self, message): """Add error message""" self.errors.append(message) def _add_warning(self, message): """Add warning message""" self.warnings.append(message) def _add_info(self, message): """Add info message""" self.info.append(message) def _print_results(self): """Print validation results""" print("\n" + "="*60) print("VALIDATION RESULTS") print("="*60) # Print info if self.info: print("\nInformation:") for msg in self.info: print(f" [INFO] {msg}") # Print warnings if self.warnings: print("\nWarnings:") for msg in self.warnings: print(f" [WARN] {msg}") # Print errors if self.errors: print("\nErrors:") for msg in self.errors: print(f" [X] {msg}") # Summary print("\n" + "="*60) if len(self.errors) == 0: print("[OK] VALIDATION PASSED") print("="*60) print("\nData is ready for neural network training!") else: print("[X] VALIDATION FAILED") print("="*60) print(f"\nFound {len(self.errors)} error(s), {len(self.warnings)} warning(s)") print("Please fix errors before using this data for training.") print() def main(): """ Main entry point for validation script """ if len(sys.argv) < 2: print("\nAtomizerField Data Validator v1.0") print("="*60) print("\nUsage:") print(" python validate_parsed_data.py ") print("\nExample:") print(" python validate_parsed_data.py training_case_001") print() sys.exit(1) case_dir = sys.argv[1] if not Path(case_dir).exists(): print(f"ERROR: Directory not found: {case_dir}") sys.exit(1) validator = NeuralFieldDataValidator(case_dir) success = validator.validate() sys.exit(0 if success else 1) if __name__ == "__main__": main()