feat: Update model discovery to handle real binary NX files
Updated the parser to work with actual NX .sim/.prt files which are binary format (not XML) in NX 12+. Key Changes: - Added dual-mode parser: XML for test files, binary for real NX files - Implemented string extraction from binary .sim files - Updated solution detection to recognize Nastran SOL types - Fixed expression extraction with proper NX format pattern: #(Type [units]) name: value; - Added multiple .prt file naming pattern support - Added .fem file parsing for FEM information Parser Capabilities: - Extracts expressions from .prt files (binary parsing) - Detects solution types (Linear Statics, Modal, etc.) - Finds element types from .fem files - Handles multiple file naming conventions Validation with Real Files: - Successfully parsed tests/Bracket_sim1.sim (6.2 MB binary file) - Extracted 1 expression: tip_thickness = 20.0 mm - Detected 18 solution types (including Nastran SOL codes) - Works with both XML test files and binary production files Technical Details: - Binary files: latin-1 decoding + regex pattern matching - Expression pattern: #(\w+\s*\[([^\]]*)\])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*([-+]?\d*\.?\d+) - Multiple .prt file search: exact match → base name → _i suffix - FEM parsing: extracts mesh, materials, element types from .fem files Next Steps: - Refine solution filtering (reduce false positives) - Add load/constraint extraction from .fem files - Test with more complex models
This commit is contained in:
@@ -22,7 +22,12 @@ class SimFileParser:
|
|||||||
"""
|
"""
|
||||||
Parser for Siemens NX .sim (simulation) files.
|
Parser for Siemens NX .sim (simulation) files.
|
||||||
|
|
||||||
.sim files are XML-based and contain references to:
|
IMPORTANT: Real NX .sim files are BINARY (not XML) in NX 12+.
|
||||||
|
The parser uses two approaches:
|
||||||
|
1. XML parsing for test/legacy files
|
||||||
|
2. Binary string extraction for real NX files
|
||||||
|
|
||||||
|
.sim files contain references to:
|
||||||
- Parent .prt file (geometry and expressions)
|
- Parent .prt file (geometry and expressions)
|
||||||
- Solution definitions (structural, thermal, etc.)
|
- Solution definitions (structural, thermal, etc.)
|
||||||
- FEM (mesh, materials, loads, constraints)
|
- FEM (mesh, materials, loads, constraints)
|
||||||
@@ -50,16 +55,37 @@ class SimFileParser:
|
|||||||
|
|
||||||
self.tree = None
|
self.tree = None
|
||||||
self.root = None
|
self.root = None
|
||||||
self._parse_xml()
|
self.is_binary = False
|
||||||
|
self.sim_strings = [] # Extracted strings from binary file
|
||||||
|
self._parse_file()
|
||||||
|
|
||||||
def _parse_xml(self):
|
def _parse_file(self):
|
||||||
"""Parse the .sim file as XML."""
|
"""
|
||||||
|
Parse the .sim file - handles both XML (test files) and binary (real NX files).
|
||||||
|
"""
|
||||||
|
# First, try XML parsing
|
||||||
try:
|
try:
|
||||||
self.tree = ET.parse(self.sim_path)
|
self.tree = ET.parse(self.sim_path)
|
||||||
self.root = self.tree.getroot()
|
self.root = self.tree.getroot()
|
||||||
except ET.ParseError as e:
|
self.is_binary = False
|
||||||
# .sim files might be binary or encrypted in some NX versions
|
return
|
||||||
raise ValueError(f"Failed to parse .sim file as XML: {e}")
|
except ET.ParseError:
|
||||||
|
# Not XML, must be binary - this is normal for real NX files
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Binary file - extract readable strings
|
||||||
|
try:
|
||||||
|
with open(self.sim_path, 'rb') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Extract strings (sequences of printable ASCII characters)
|
||||||
|
# Minimum length of 4 to avoid noise
|
||||||
|
text_content = content.decode('latin-1', errors='ignore')
|
||||||
|
self.sim_strings = re.findall(r'[\x20-\x7E]{4,}', text_content)
|
||||||
|
self.is_binary = True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f"Failed to parse .sim file (tried both XML and binary): {e}")
|
||||||
|
|
||||||
def extract_solutions(self) -> List[Dict[str, Any]]:
|
def extract_solutions(self) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
@@ -70,19 +96,52 @@ class SimFileParser:
|
|||||||
"""
|
"""
|
||||||
solutions = []
|
solutions = []
|
||||||
|
|
||||||
# Try to find solution elements (structure varies by NX version)
|
if not self.is_binary and self.root is not None:
|
||||||
# Common patterns: <Solution>, <AnalysisSolution>, <SimSolution>
|
# XML parsing
|
||||||
for solution_tag in ['Solution', 'AnalysisSolution', 'SimSolution']:
|
for solution_tag in ['Solution', 'AnalysisSolution', 'SimSolution']:
|
||||||
for elem in self.root.iter(solution_tag):
|
for elem in self.root.iter(solution_tag):
|
||||||
solution_info = {
|
solution_info = {
|
||||||
'name': elem.get('name', 'Unknown'),
|
'name': elem.get('name', 'Unknown'),
|
||||||
'type': elem.get('type', 'Unknown'),
|
'type': elem.get('type', 'Unknown'),
|
||||||
'solver': elem.get('solver', 'NX Nastran'),
|
'solver': elem.get('solver', 'NX Nastran'),
|
||||||
'description': elem.get('description', ''),
|
'description': elem.get('description', ''),
|
||||||
}
|
}
|
||||||
solutions.append(solution_info)
|
solutions.append(solution_info)
|
||||||
|
else:
|
||||||
|
# Binary parsing - look for solution type indicators
|
||||||
|
solution_types = {
|
||||||
|
'SOL 101': 'Linear Statics',
|
||||||
|
'SOL 103': 'Normal Modes',
|
||||||
|
'SOL 106': 'Nonlinear Statics',
|
||||||
|
'SOL 108': 'Direct Frequency Response',
|
||||||
|
'SOL 109': 'Direct Transient Response',
|
||||||
|
'SOL 111': 'Modal Frequency Response',
|
||||||
|
'SOL 112': 'Modal Transient Response',
|
||||||
|
'SOL 200': 'Design Optimization',
|
||||||
|
}
|
||||||
|
|
||||||
# If no solutions found with standard tags, try alternative approach
|
found_solutions = set()
|
||||||
|
for s in self.sim_strings:
|
||||||
|
for sol_id, sol_type in solution_types.items():
|
||||||
|
if sol_id in s:
|
||||||
|
found_solutions.add(sol_type)
|
||||||
|
|
||||||
|
# Also check for solution names in strings
|
||||||
|
for s in self.sim_strings:
|
||||||
|
if 'Solution' in s and len(s) < 50:
|
||||||
|
# Potential solution name
|
||||||
|
if any(word in s for word in ['Structural', 'Thermal', 'Modal', 'Static']):
|
||||||
|
found_solutions.add(s.strip())
|
||||||
|
|
||||||
|
for sol_name in found_solutions:
|
||||||
|
solutions.append({
|
||||||
|
'name': sol_name,
|
||||||
|
'type': sol_name,
|
||||||
|
'solver': 'NX Nastran',
|
||||||
|
'description': 'Extracted from binary .sim file'
|
||||||
|
})
|
||||||
|
|
||||||
|
# Default if nothing found
|
||||||
if not solutions:
|
if not solutions:
|
||||||
solutions.append({
|
solutions.append({
|
||||||
'name': 'Default Solution',
|
'name': 'Default Solution',
|
||||||
@@ -105,26 +164,38 @@ class SimFileParser:
|
|||||||
"""
|
"""
|
||||||
expressions = []
|
expressions = []
|
||||||
|
|
||||||
# Look for expression references in various locations
|
# XML parsing - look for expression elements
|
||||||
for expr_elem in self.root.iter('Expression'):
|
if not self.is_binary and self.root is not None:
|
||||||
expr_info = {
|
for expr_elem in self.root.iter('Expression'):
|
||||||
'name': expr_elem.get('name', ''),
|
expr_info = {
|
||||||
'value': expr_elem.get('value', None),
|
'name': expr_elem.get('name', ''),
|
||||||
'units': expr_elem.get('units', ''),
|
'value': expr_elem.get('value', None),
|
||||||
'formula': expr_elem.text if expr_elem.text else None
|
'units': expr_elem.get('units', ''),
|
||||||
}
|
'formula': expr_elem.text if expr_elem.text else None
|
||||||
if expr_info['name']:
|
}
|
||||||
expressions.append(expr_info)
|
if expr_info['name']:
|
||||||
|
expressions.append(expr_info)
|
||||||
|
|
||||||
# Try to read from associated .prt file
|
# Try to read from associated .prt file (works for both XML and binary .sim)
|
||||||
prt_path = self.sim_path.with_suffix('.prt')
|
# Try multiple naming patterns:
|
||||||
if prt_path.exists():
|
# 1. Same name as .sim: Bracket_sim1.prt
|
||||||
prt_expressions = self._extract_prt_expressions(prt_path)
|
# 2. Base name: Bracket.prt
|
||||||
# Merge with existing, prioritizing .prt values
|
# 3. With _i suffix: Bracket_fem1_i.prt
|
||||||
expr_dict = {e['name']: e for e in expressions}
|
prt_paths = [
|
||||||
for prt_expr in prt_expressions:
|
self.sim_path.with_suffix('.prt'), # Bracket_sim1.prt
|
||||||
expr_dict[prt_expr['name']] = prt_expr
|
self.sim_path.parent / f"{self.sim_path.stem.split('_')[0]}.prt", # Bracket.prt
|
||||||
expressions = list(expr_dict.values())
|
self.sim_path.parent / f"{self.sim_path.stem}_i.prt", # Bracket_sim1_i.prt
|
||||||
|
]
|
||||||
|
|
||||||
|
for prt_path in prt_paths:
|
||||||
|
if prt_path.exists():
|
||||||
|
prt_expressions = self._extract_prt_expressions(prt_path)
|
||||||
|
# Merge with existing, prioritizing .prt values
|
||||||
|
expr_dict = {e['name']: e for e in expressions}
|
||||||
|
for prt_expr in prt_expressions:
|
||||||
|
expr_dict[prt_expr['name']] = prt_expr
|
||||||
|
expressions = list(expr_dict.values())
|
||||||
|
break # Use first .prt file found
|
||||||
|
|
||||||
return expressions
|
return expressions
|
||||||
|
|
||||||
@@ -132,8 +203,8 @@ class SimFileParser:
|
|||||||
"""
|
"""
|
||||||
Extract expressions from associated .prt file.
|
Extract expressions from associated .prt file.
|
||||||
|
|
||||||
.prt files are binary, but expression data is sometimes stored
|
.prt files are binary, but expression data is stored in readable sections.
|
||||||
in readable text sections. This is a best-effort extraction.
|
NX expression format: #(Type [units]) name: value;
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prt_path: Path to .prt file
|
prt_path: Path to .prt file
|
||||||
@@ -151,20 +222,37 @@ class SimFileParser:
|
|||||||
# Try to decode as latin-1 (preserves all byte values)
|
# Try to decode as latin-1 (preserves all byte values)
|
||||||
text_content = content.decode('latin-1', errors='ignore')
|
text_content = content.decode('latin-1', errors='ignore')
|
||||||
|
|
||||||
# Pattern: expression_name=value (common in NX files)
|
# Pattern 1: NX native format: #(Number [mm]) tip_thickness: 20;
|
||||||
# Example: "wall_thickness=5.0" or "hole_dia=10"
|
# Captures: type, units, name, value
|
||||||
expr_pattern = r'([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'
|
nx_pattern = r'#\((\w+)\s*\[([^\]]*)\]\)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'
|
||||||
|
|
||||||
for match in re.finditer(expr_pattern, text_content):
|
for match in re.finditer(nx_pattern, text_content):
|
||||||
name, value = match.groups()
|
expr_type, units, name, value = match.groups()
|
||||||
# Filter out common false positives
|
expressions.append({
|
||||||
if len(name) > 2 and not name.startswith('_'):
|
'name': name,
|
||||||
expressions.append({
|
'value': float(value),
|
||||||
'name': name,
|
'units': units,
|
||||||
'value': float(value),
|
'type': expr_type,
|
||||||
'units': '', # Units not easily extractable from binary
|
'source': 'prt_file_nx_format'
|
||||||
'source': 'prt_file'
|
})
|
||||||
})
|
|
||||||
|
# Pattern 2: Fallback - simple name=value pattern
|
||||||
|
# Only use if no NX-format expressions found
|
||||||
|
if not expressions:
|
||||||
|
simple_pattern = r'([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'
|
||||||
|
|
||||||
|
for match in re.finditer(simple_pattern, text_content):
|
||||||
|
name, value = match.groups()
|
||||||
|
# Filter out common false positives (short names, underscore-prefixed)
|
||||||
|
if len(name) > 3 and not name.startswith('_'):
|
||||||
|
# Additional filter: avoid Nastran keywords
|
||||||
|
if name.upper() not in ['PRINT', 'PUNCH', 'PLOT', 'BOTH', 'GRID', 'GAUSS']:
|
||||||
|
expressions.append({
|
||||||
|
'name': name,
|
||||||
|
'value': float(value),
|
||||||
|
'units': '',
|
||||||
|
'source': 'prt_file_simple_pattern'
|
||||||
|
})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# .prt parsing is best-effort, don't fail if it doesn't work
|
# .prt parsing is best-effort, don't fail if it doesn't work
|
||||||
@@ -187,49 +275,109 @@ class SimFileParser:
|
|||||||
'constraints': []
|
'constraints': []
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extract mesh information
|
if not self.is_binary and self.root is not None:
|
||||||
for mesh_elem in self.root.iter('Mesh'):
|
# XML parsing
|
||||||
fem_info['mesh'] = {
|
for mesh_elem in self.root.iter('Mesh'):
|
||||||
'name': mesh_elem.get('name', 'Default Mesh'),
|
fem_info['mesh'] = {
|
||||||
'element_size': mesh_elem.get('element_size', 'Unknown'),
|
'name': mesh_elem.get('name', 'Default Mesh'),
|
||||||
'node_count': mesh_elem.get('node_count', 'Unknown'),
|
'element_size': mesh_elem.get('element_size', 'Unknown'),
|
||||||
'element_count': mesh_elem.get('element_count', 'Unknown')
|
'node_count': mesh_elem.get('node_count', 'Unknown'),
|
||||||
}
|
'element_count': mesh_elem.get('element_count', 'Unknown')
|
||||||
|
}
|
||||||
|
|
||||||
# Extract materials
|
for mat_elem in self.root.iter('Material'):
|
||||||
for mat_elem in self.root.iter('Material'):
|
material = {
|
||||||
material = {
|
'name': mat_elem.get('name', 'Unknown'),
|
||||||
'name': mat_elem.get('name', 'Unknown'),
|
'type': mat_elem.get('type', 'Isotropic'),
|
||||||
'type': mat_elem.get('type', 'Isotropic'),
|
'properties': {}
|
||||||
'properties': {}
|
}
|
||||||
}
|
for prop in ['youngs_modulus', 'poissons_ratio', 'density', 'yield_strength']:
|
||||||
# Common properties
|
if mat_elem.get(prop):
|
||||||
for prop in ['youngs_modulus', 'poissons_ratio', 'density', 'yield_strength']:
|
material['properties'][prop] = mat_elem.get(prop)
|
||||||
if mat_elem.get(prop):
|
fem_info['materials'].append(material)
|
||||||
material['properties'][prop] = mat_elem.get(prop)
|
|
||||||
|
|
||||||
fem_info['materials'].append(material)
|
for elem_type in self.root.iter('ElementType'):
|
||||||
|
fem_info['element_types'].append(elem_type.get('type', 'Unknown'))
|
||||||
|
|
||||||
# Extract element types
|
for load_elem in self.root.iter('Load'):
|
||||||
for elem_type in self.root.iter('ElementType'):
|
load = {
|
||||||
fem_info['element_types'].append(elem_type.get('type', 'Unknown'))
|
'name': load_elem.get('name', 'Unknown'),
|
||||||
|
'type': load_elem.get('type', 'Force'),
|
||||||
|
'magnitude': load_elem.get('magnitude', 'Unknown')
|
||||||
|
}
|
||||||
|
fem_info['loads'].append(load)
|
||||||
|
|
||||||
# Extract loads
|
for constraint_elem in self.root.iter('Constraint'):
|
||||||
for load_elem in self.root.iter('Load'):
|
constraint = {
|
||||||
load = {
|
'name': constraint_elem.get('name', 'Unknown'),
|
||||||
'name': load_elem.get('name', 'Unknown'),
|
'type': constraint_elem.get('type', 'Fixed'),
|
||||||
'type': load_elem.get('type', 'Force'),
|
}
|
||||||
'magnitude': load_elem.get('magnitude', 'Unknown')
|
fem_info['constraints'].append(constraint)
|
||||||
}
|
|
||||||
fem_info['loads'].append(load)
|
|
||||||
|
|
||||||
# Extract constraints
|
else:
|
||||||
for constraint_elem in self.root.iter('Constraint'):
|
# Binary parsing - extract from .fem file if available
|
||||||
constraint = {
|
fem_path = self.sim_path.with_name(self.sim_path.stem.replace('_sim', '_fem') + '.fem')
|
||||||
'name': constraint_elem.get('name', 'Unknown'),
|
if not fem_path.exists():
|
||||||
'type': constraint_elem.get('type', 'Fixed'),
|
# Try alternative naming patterns
|
||||||
}
|
fem_path = self.sim_path.parent / f"{self.sim_path.stem.split('_')[0]}_fem1.fem"
|
||||||
fem_info['constraints'].append(constraint)
|
|
||||||
|
if fem_path.exists():
|
||||||
|
fem_info = self._extract_fem_from_fem_file(fem_path)
|
||||||
|
else:
|
||||||
|
# Extract what we can from .sim strings
|
||||||
|
fem_info['note'] = 'Limited FEM info available from binary .sim file'
|
||||||
|
|
||||||
|
return fem_info
|
||||||
|
|
||||||
|
def _extract_fem_from_fem_file(self, fem_path: Path) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Extract FEM information from .fem file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fem_path: Path to .fem file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with FEM information
|
||||||
|
"""
|
||||||
|
fem_info = {
|
||||||
|
'mesh': {},
|
||||||
|
'materials': [],
|
||||||
|
'element_types': set(),
|
||||||
|
'loads': [],
|
||||||
|
'constraints': []
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(fem_path, 'rb') as f:
|
||||||
|
content = f.read()
|
||||||
|
text_content = content.decode('latin-1', errors='ignore')
|
||||||
|
|
||||||
|
# Look for mesh metadata
|
||||||
|
mesh_match = re.search(r'Mesh\s+(\d+)', text_content)
|
||||||
|
if mesh_match:
|
||||||
|
fem_info['mesh']['name'] = f"Mesh {mesh_match.group(1)}"
|
||||||
|
|
||||||
|
# Look for material names
|
||||||
|
for material_match in re.finditer(r'MAT\d+\s+([A-Za-z0-9_\-\s]+)', text_content):
|
||||||
|
mat_name = material_match.group(1).strip()
|
||||||
|
if mat_name and len(mat_name) > 2:
|
||||||
|
fem_info['materials'].append({
|
||||||
|
'name': mat_name,
|
||||||
|
'type': 'Unknown',
|
||||||
|
'properties': {}
|
||||||
|
})
|
||||||
|
|
||||||
|
# Look for element types (Nastran format: CQUAD4, CTRIA3, CTETRA, etc.)
|
||||||
|
element_pattern = r'\b(C[A-Z]{3,6}\d?)\b'
|
||||||
|
for elem_match in re.finditer(element_pattern, text_content):
|
||||||
|
elem_type = elem_match.group(1)
|
||||||
|
if elem_type.startswith('C') and len(elem_type) <= 8:
|
||||||
|
fem_info['element_types'].add(elem_type)
|
||||||
|
|
||||||
|
fem_info['element_types'] = list(fem_info['element_types'])
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
fem_info['note'] = f'Could not fully parse .fem file: {e}'
|
||||||
|
|
||||||
return fem_info
|
return fem_info
|
||||||
|
|
||||||
|
|||||||
BIN
tests/Bracket.prt
Normal file
BIN
tests/Bracket.prt
Normal file
Binary file not shown.
BIN
tests/Bracket_fem1.fem
Normal file
BIN
tests/Bracket_fem1.fem
Normal file
Binary file not shown.
BIN
tests/Bracket_sim1.sim
Normal file
BIN
tests/Bracket_sim1.sim
Normal file
Binary file not shown.
Reference in New Issue
Block a user