455 lines
15 KiB
Python
455 lines
15 KiB
Python
|
|
"""
|
||
|
|
validate_parsed_data.py
|
||
|
|
Validates the parsed neural field data for completeness and physics consistency
|
||
|
|
|
||
|
|
AtomizerField Data Validator v1.0.0
|
||
|
|
Ensures parsed data meets quality standards for neural network training.
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python validate_parsed_data.py <case_directory>
|
||
|
|
|
||
|
|
Example:
|
||
|
|
python validate_parsed_data.py training_case_001
|
||
|
|
"""
|
||
|
|
|
||
|
|
import json
|
||
|
|
import h5py
|
||
|
|
import numpy as np
|
||
|
|
from pathlib import Path
|
||
|
|
import sys
|
||
|
|
|
||
|
|
|
||
|
|
class NeuralFieldDataValidator:
|
||
|
|
"""
|
||
|
|
Validates parsed neural field data for:
|
||
|
|
- File existence and format
|
||
|
|
- Data completeness
|
||
|
|
- Physics consistency
|
||
|
|
- Data quality
|
||
|
|
|
||
|
|
This ensures that data fed to neural networks is reliable and consistent.
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self, case_directory):
|
||
|
|
"""
|
||
|
|
Initialize validator
|
||
|
|
|
||
|
|
Args:
|
||
|
|
case_directory (str or Path): Path to case containing parsed data
|
||
|
|
"""
|
||
|
|
self.case_dir = Path(case_directory)
|
||
|
|
self.json_file = self.case_dir / "neural_field_data.json"
|
||
|
|
self.h5_file = self.case_dir / "neural_field_data.h5"
|
||
|
|
self.errors = []
|
||
|
|
self.warnings = []
|
||
|
|
self.info = []
|
||
|
|
|
||
|
|
def validate(self):
|
||
|
|
"""
|
||
|
|
Run all validation checks
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
bool: True if validation passed, False otherwise
|
||
|
|
"""
|
||
|
|
print("\n" + "="*60)
|
||
|
|
print("AtomizerField Data Validator v1.0")
|
||
|
|
print("="*60)
|
||
|
|
print(f"\nValidating: {self.case_dir.name}\n")
|
||
|
|
|
||
|
|
# Check file existence
|
||
|
|
if not self._check_files_exist():
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Load data
|
||
|
|
try:
|
||
|
|
with open(self.json_file, 'r') as f:
|
||
|
|
self.data = json.load(f)
|
||
|
|
self.h5_data = h5py.File(self.h5_file, 'r')
|
||
|
|
except Exception as e:
|
||
|
|
self._add_error(f"Failed to load data files: {e}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
# Run validation checks
|
||
|
|
self._validate_structure()
|
||
|
|
self._validate_metadata()
|
||
|
|
self._validate_mesh()
|
||
|
|
self._validate_materials()
|
||
|
|
self._validate_boundary_conditions()
|
||
|
|
self._validate_loads()
|
||
|
|
self._validate_results()
|
||
|
|
self._validate_physics_consistency()
|
||
|
|
self._validate_data_quality()
|
||
|
|
|
||
|
|
# Close HDF5 file
|
||
|
|
self.h5_data.close()
|
||
|
|
|
||
|
|
# Print results
|
||
|
|
self._print_results()
|
||
|
|
|
||
|
|
return len(self.errors) == 0
|
||
|
|
|
||
|
|
def _check_files_exist(self):
|
||
|
|
"""Check that required files exist"""
|
||
|
|
if not self.json_file.exists():
|
||
|
|
self._add_error(f"JSON file not found: {self.json_file}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
if not self.h5_file.exists():
|
||
|
|
self._add_error(f"HDF5 file not found: {self.h5_file}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
self._add_info(f"Found JSON: {self.json_file.name}")
|
||
|
|
self._add_info(f"Found HDF5: {self.h5_file.name}")
|
||
|
|
return True
|
||
|
|
|
||
|
|
def _validate_structure(self):
|
||
|
|
"""Validate data structure has all required fields"""
|
||
|
|
required_fields = [
|
||
|
|
"metadata",
|
||
|
|
"mesh",
|
||
|
|
"materials",
|
||
|
|
"boundary_conditions",
|
||
|
|
"loads",
|
||
|
|
"results"
|
||
|
|
]
|
||
|
|
|
||
|
|
for field in required_fields:
|
||
|
|
if field not in self.data:
|
||
|
|
self._add_error(f"Missing required field: {field}")
|
||
|
|
else:
|
||
|
|
self._add_info(f"Found field: {field}")
|
||
|
|
|
||
|
|
def _validate_metadata(self):
|
||
|
|
"""Validate metadata completeness"""
|
||
|
|
if "metadata" not in self.data:
|
||
|
|
return
|
||
|
|
|
||
|
|
meta = self.data["metadata"]
|
||
|
|
|
||
|
|
# Check version
|
||
|
|
if "version" in meta:
|
||
|
|
if meta["version"] != "1.0.0":
|
||
|
|
self._add_warning(f"Data version {meta['version']} may not be compatible")
|
||
|
|
else:
|
||
|
|
self._add_info(f"Data version: {meta['version']}")
|
||
|
|
|
||
|
|
# Check required metadata fields
|
||
|
|
required = ["created_at", "source", "analysis_type", "units"]
|
||
|
|
for field in required:
|
||
|
|
if field not in meta:
|
||
|
|
self._add_warning(f"Missing metadata field: {field}")
|
||
|
|
|
||
|
|
if "analysis_type" in meta:
|
||
|
|
self._add_info(f"Analysis type: {meta['analysis_type']}")
|
||
|
|
|
||
|
|
def _validate_mesh(self):
|
||
|
|
"""Validate mesh data"""
|
||
|
|
if "mesh" not in self.data:
|
||
|
|
return
|
||
|
|
|
||
|
|
mesh = self.data["mesh"]
|
||
|
|
|
||
|
|
# Check statistics
|
||
|
|
if "statistics" in mesh:
|
||
|
|
stats = mesh["statistics"]
|
||
|
|
n_nodes = stats.get("n_nodes", 0)
|
||
|
|
n_elements = stats.get("n_elements", 0)
|
||
|
|
|
||
|
|
self._add_info(f"Mesh: {n_nodes:,} nodes, {n_elements:,} elements")
|
||
|
|
|
||
|
|
if n_nodes == 0:
|
||
|
|
self._add_error("Mesh has no nodes")
|
||
|
|
if n_elements == 0:
|
||
|
|
self._add_error("Mesh has no elements")
|
||
|
|
|
||
|
|
# Check element types
|
||
|
|
if "element_types" in stats:
|
||
|
|
elem_types = stats["element_types"]
|
||
|
|
total_by_type = sum(elem_types.values())
|
||
|
|
if total_by_type != n_elements:
|
||
|
|
self._add_warning(
|
||
|
|
f"Element type count ({total_by_type}) doesn't match "
|
||
|
|
f"total elements ({n_elements})"
|
||
|
|
)
|
||
|
|
|
||
|
|
for etype, count in elem_types.items():
|
||
|
|
if count > 0:
|
||
|
|
self._add_info(f" {etype}: {count:,} elements")
|
||
|
|
|
||
|
|
# Validate HDF5 mesh data
|
||
|
|
if 'mesh' in self.h5_data:
|
||
|
|
mesh_grp = self.h5_data['mesh']
|
||
|
|
|
||
|
|
if 'node_coordinates' in mesh_grp:
|
||
|
|
coords = mesh_grp['node_coordinates'][:]
|
||
|
|
self._add_info(f"Node coordinates: shape {coords.shape}")
|
||
|
|
|
||
|
|
# Check for NaN or inf
|
||
|
|
if np.any(np.isnan(coords)):
|
||
|
|
self._add_error("Node coordinates contain NaN values")
|
||
|
|
if np.any(np.isinf(coords)):
|
||
|
|
self._add_error("Node coordinates contain infinite values")
|
||
|
|
|
||
|
|
# Check bounding box reasonableness
|
||
|
|
bbox_size = np.max(coords, axis=0) - np.min(coords, axis=0)
|
||
|
|
if np.any(bbox_size == 0):
|
||
|
|
self._add_warning("Mesh is planar or degenerate in one dimension")
|
||
|
|
|
||
|
|
def _validate_materials(self):
|
||
|
|
"""Validate material data"""
|
||
|
|
if "materials" not in self.data:
|
||
|
|
return
|
||
|
|
|
||
|
|
materials = self.data["materials"]
|
||
|
|
|
||
|
|
if len(materials) == 0:
|
||
|
|
self._add_warning("No materials defined")
|
||
|
|
return
|
||
|
|
|
||
|
|
self._add_info(f"Materials: {len(materials)} defined")
|
||
|
|
|
||
|
|
for mat in materials:
|
||
|
|
mat_id = mat.get("id", "unknown")
|
||
|
|
mat_type = mat.get("type", "unknown")
|
||
|
|
|
||
|
|
if mat_type == "MAT1":
|
||
|
|
# Check required properties
|
||
|
|
E = mat.get("E")
|
||
|
|
nu = mat.get("nu")
|
||
|
|
|
||
|
|
if E is None:
|
||
|
|
self._add_error(f"Material {mat_id}: Missing Young's modulus (E)")
|
||
|
|
elif E <= 0:
|
||
|
|
self._add_error(f"Material {mat_id}: Invalid E = {E} (must be > 0)")
|
||
|
|
|
||
|
|
if nu is None:
|
||
|
|
self._add_error(f"Material {mat_id}: Missing Poisson's ratio (nu)")
|
||
|
|
elif nu < 0 or nu >= 0.5:
|
||
|
|
self._add_error(f"Material {mat_id}: Invalid nu = {nu} (must be 0 <= nu < 0.5)")
|
||
|
|
|
||
|
|
def _validate_boundary_conditions(self):
|
||
|
|
"""Validate boundary conditions"""
|
||
|
|
if "boundary_conditions" not in self.data:
|
||
|
|
return
|
||
|
|
|
||
|
|
bcs = self.data["boundary_conditions"]
|
||
|
|
|
||
|
|
spc_count = len(bcs.get("spc", []))
|
||
|
|
mpc_count = len(bcs.get("mpc", []))
|
||
|
|
|
||
|
|
self._add_info(f"Boundary conditions: {spc_count} SPCs, {mpc_count} MPCs")
|
||
|
|
|
||
|
|
if spc_count == 0:
|
||
|
|
self._add_warning("No SPCs defined - model may be unconstrained")
|
||
|
|
|
||
|
|
def _validate_loads(self):
|
||
|
|
"""Validate load data"""
|
||
|
|
if "loads" not in self.data:
|
||
|
|
return
|
||
|
|
|
||
|
|
loads = self.data["loads"]
|
||
|
|
|
||
|
|
force_count = len(loads.get("point_forces", []))
|
||
|
|
pressure_count = len(loads.get("pressure", []))
|
||
|
|
gravity_count = len(loads.get("gravity", []))
|
||
|
|
thermal_count = len(loads.get("thermal", []))
|
||
|
|
|
||
|
|
total_loads = force_count + pressure_count + gravity_count + thermal_count
|
||
|
|
|
||
|
|
self._add_info(
|
||
|
|
f"Loads: {force_count} forces, {pressure_count} pressures, "
|
||
|
|
f"{gravity_count} gravity, {thermal_count} thermal"
|
||
|
|
)
|
||
|
|
|
||
|
|
if total_loads == 0:
|
||
|
|
self._add_warning("No loads defined")
|
||
|
|
|
||
|
|
# Validate force magnitudes
|
||
|
|
for force in loads.get("point_forces", []):
|
||
|
|
mag = force.get("magnitude")
|
||
|
|
if mag == 0:
|
||
|
|
self._add_warning(f"Force at node {force.get('node')} has zero magnitude")
|
||
|
|
|
||
|
|
def _validate_results(self):
|
||
|
|
"""Validate results data"""
|
||
|
|
if "results" not in self.data:
|
||
|
|
self._add_error("No results data found")
|
||
|
|
return
|
||
|
|
|
||
|
|
results = self.data["results"]
|
||
|
|
|
||
|
|
# Check displacement
|
||
|
|
if "displacement" not in results:
|
||
|
|
self._add_error("No displacement results found")
|
||
|
|
else:
|
||
|
|
disp = results["displacement"]
|
||
|
|
n_nodes = len(disp.get("node_ids", []))
|
||
|
|
max_disp = disp.get("max_translation")
|
||
|
|
|
||
|
|
self._add_info(f"Displacement: {n_nodes:,} nodes")
|
||
|
|
if max_disp is not None:
|
||
|
|
self._add_info(f" Max displacement: {max_disp:.6f} mm")
|
||
|
|
|
||
|
|
if max_disp == 0:
|
||
|
|
self._add_warning("Maximum displacement is zero - check loads")
|
||
|
|
elif max_disp > 1000:
|
||
|
|
self._add_warning(f"Very large displacement ({max_disp:.2f} mm) - check units or model")
|
||
|
|
|
||
|
|
# Check stress
|
||
|
|
if "stress" not in results or len(results["stress"]) == 0:
|
||
|
|
self._add_warning("No stress results found")
|
||
|
|
else:
|
||
|
|
for stress_type, stress_data in results["stress"].items():
|
||
|
|
n_elem = len(stress_data.get("element_ids", []))
|
||
|
|
max_vm = stress_data.get("max_von_mises")
|
||
|
|
|
||
|
|
self._add_info(f"Stress ({stress_type}): {n_elem:,} elements")
|
||
|
|
if max_vm is not None:
|
||
|
|
self._add_info(f" Max von Mises: {max_vm:.2f} MPa")
|
||
|
|
|
||
|
|
if max_vm == 0:
|
||
|
|
self._add_warning(f"{stress_type}: Zero stress - check loads")
|
||
|
|
|
||
|
|
# Validate HDF5 results
|
||
|
|
if 'results' in self.h5_data:
|
||
|
|
results_grp = self.h5_data['results']
|
||
|
|
|
||
|
|
if 'displacement' in results_grp:
|
||
|
|
disp_data = results_grp['displacement'][:]
|
||
|
|
|
||
|
|
# Check for NaN or inf
|
||
|
|
if np.any(np.isnan(disp_data)):
|
||
|
|
self._add_error("Displacement results contain NaN values")
|
||
|
|
if np.any(np.isinf(disp_data)):
|
||
|
|
self._add_error("Displacement results contain infinite values")
|
||
|
|
|
||
|
|
def _validate_physics_consistency(self):
|
||
|
|
"""Validate physics consistency of results"""
|
||
|
|
if "results" not in self.data or "mesh" not in self.data:
|
||
|
|
return
|
||
|
|
|
||
|
|
results = self.data["results"]
|
||
|
|
mesh = self.data["mesh"]
|
||
|
|
|
||
|
|
# Check node count consistency
|
||
|
|
mesh_nodes = mesh.get("statistics", {}).get("n_nodes", 0)
|
||
|
|
|
||
|
|
if "displacement" in results:
|
||
|
|
disp_nodes = len(results["displacement"].get("node_ids", []))
|
||
|
|
if disp_nodes != mesh_nodes:
|
||
|
|
self._add_warning(
|
||
|
|
f"Displacement nodes ({disp_nodes:,}) != mesh nodes ({mesh_nodes:,})"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Check for rigid body motion (if no constraints)
|
||
|
|
if "boundary_conditions" in self.data:
|
||
|
|
spc_count = len(self.data["boundary_conditions"].get("spc", []))
|
||
|
|
if spc_count == 0 and "displacement" in results:
|
||
|
|
max_disp = results["displacement"].get("max_translation", 0)
|
||
|
|
if max_disp > 1e6:
|
||
|
|
self._add_error("Unconstrained model with very large displacements - likely rigid body motion")
|
||
|
|
|
||
|
|
def _validate_data_quality(self):
|
||
|
|
"""Validate data quality for neural network training"""
|
||
|
|
|
||
|
|
# Check HDF5 data types and shapes
|
||
|
|
if 'results' in self.h5_data:
|
||
|
|
results_grp = self.h5_data['results']
|
||
|
|
|
||
|
|
# Check displacement shape
|
||
|
|
if 'displacement' in results_grp:
|
||
|
|
disp = results_grp['displacement'][:]
|
||
|
|
if len(disp.shape) != 2:
|
||
|
|
self._add_error(f"Displacement has wrong shape: {disp.shape} (expected 2D)")
|
||
|
|
elif disp.shape[1] != 6:
|
||
|
|
self._add_error(f"Displacement has {disp.shape[1]} DOFs (expected 6)")
|
||
|
|
|
||
|
|
# Check file sizes
|
||
|
|
json_size = self.json_file.stat().st_size / 1024 # KB
|
||
|
|
h5_size = self.h5_file.stat().st_size / 1024 # KB
|
||
|
|
|
||
|
|
self._add_info(f"File sizes: JSON={json_size:.1f} KB, HDF5={h5_size:.1f} KB")
|
||
|
|
|
||
|
|
if json_size > 10000: # 10 MB
|
||
|
|
self._add_warning("JSON file is very large - consider moving more data to HDF5")
|
||
|
|
|
||
|
|
def _add_error(self, message):
|
||
|
|
"""Add error message"""
|
||
|
|
self.errors.append(message)
|
||
|
|
|
||
|
|
def _add_warning(self, message):
|
||
|
|
"""Add warning message"""
|
||
|
|
self.warnings.append(message)
|
||
|
|
|
||
|
|
def _add_info(self, message):
|
||
|
|
"""Add info message"""
|
||
|
|
self.info.append(message)
|
||
|
|
|
||
|
|
def _print_results(self):
|
||
|
|
"""Print validation results"""
|
||
|
|
print("\n" + "="*60)
|
||
|
|
print("VALIDATION RESULTS")
|
||
|
|
print("="*60)
|
||
|
|
|
||
|
|
# Print info
|
||
|
|
if self.info:
|
||
|
|
print("\nInformation:")
|
||
|
|
for msg in self.info:
|
||
|
|
print(f" [INFO] {msg}")
|
||
|
|
|
||
|
|
# Print warnings
|
||
|
|
if self.warnings:
|
||
|
|
print("\nWarnings:")
|
||
|
|
for msg in self.warnings:
|
||
|
|
print(f" [WARN] {msg}")
|
||
|
|
|
||
|
|
# Print errors
|
||
|
|
if self.errors:
|
||
|
|
print("\nErrors:")
|
||
|
|
for msg in self.errors:
|
||
|
|
print(f" [X] {msg}")
|
||
|
|
|
||
|
|
# Summary
|
||
|
|
print("\n" + "="*60)
|
||
|
|
if len(self.errors) == 0:
|
||
|
|
print("[OK] VALIDATION PASSED")
|
||
|
|
print("="*60)
|
||
|
|
print("\nData is ready for neural network training!")
|
||
|
|
else:
|
||
|
|
print("[X] VALIDATION FAILED")
|
||
|
|
print("="*60)
|
||
|
|
print(f"\nFound {len(self.errors)} error(s), {len(self.warnings)} warning(s)")
|
||
|
|
print("Please fix errors before using this data for training.")
|
||
|
|
|
||
|
|
print()
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
"""
|
||
|
|
Main entry point for validation script
|
||
|
|
"""
|
||
|
|
if len(sys.argv) < 2:
|
||
|
|
print("\nAtomizerField Data Validator v1.0")
|
||
|
|
print("="*60)
|
||
|
|
print("\nUsage:")
|
||
|
|
print(" python validate_parsed_data.py <case_directory>")
|
||
|
|
print("\nExample:")
|
||
|
|
print(" python validate_parsed_data.py training_case_001")
|
||
|
|
print()
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
case_dir = sys.argv[1]
|
||
|
|
|
||
|
|
if not Path(case_dir).exists():
|
||
|
|
print(f"ERROR: Directory not found: {case_dir}")
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
validator = NeuralFieldDataValidator(case_dir)
|
||
|
|
success = validator.validate()
|
||
|
|
|
||
|
|
sys.exit(0 if success else 1)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|