feat: Update model discovery to handle real binary NX files

Updated the parser to work with actual NX .sim/.prt files which are
binary format (not XML) in NX 12+.

Key Changes:
- Added dual-mode parser: XML for test files, binary for real NX files
- Implemented string extraction from binary .sim files
- Updated solution detection to recognize Nastran SOL types
- Fixed expression extraction with proper NX format pattern:
  #(Type [units]) name: value;
- Added multiple .prt file naming pattern support
- Added .fem file parsing for FEM information

Parser Capabilities:
- Extracts expressions from .prt files (binary parsing)
- Detects solution types (Linear Statics, Modal, etc.)
- Finds element types from .fem files
- Handles multiple file naming conventions

Validation with Real Files:
- Successfully parsed tests/Bracket_sim1.sim (6.2 MB binary file)
- Extracted 1 expression: tip_thickness = 20.0 mm
- Detected 18 solution types (including Nastran SOL codes)
- Works with both XML test files and binary production files

Technical Details:
- Binary files: latin-1 decoding + regex pattern matching
- Expression pattern: #(\w+\s*\[([^\]]*)\])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*([-+]?\d*\.?\d+)
- Multiple .prt file search: exact match → base name → _i suffix
- FEM parsing: extracts mesh, materials, element types from .fem files

Next Steps:
- Refine solution filtering (reduce false positives)
- Add load/constraint extraction from .fem files
- Test with more complex models
This commit is contained in:
Claude
2025-11-15 13:44:42 +00:00
parent 96ed53e3d7
commit 063439af43
4 changed files with 239 additions and 91 deletions

View File

@@ -22,7 +22,12 @@ class SimFileParser:
"""
Parser for Siemens NX .sim (simulation) files.
.sim files are XML-based and contain references to:
IMPORTANT: Real NX .sim files are BINARY (not XML) in NX 12+.
The parser uses two approaches:
1. XML parsing for test/legacy files
2. Binary string extraction for real NX files
.sim files contain references to:
- Parent .prt file (geometry and expressions)
- Solution definitions (structural, thermal, etc.)
- FEM (mesh, materials, loads, constraints)
@@ -50,16 +55,37 @@ class SimFileParser:
self.tree = None
self.root = None
self._parse_xml()
self.is_binary = False
self.sim_strings = [] # Extracted strings from binary file
self._parse_file()
def _parse_xml(self):
"""Parse the .sim file as XML."""
def _parse_file(self):
"""
Parse the .sim file - handles both XML (test files) and binary (real NX files).
"""
# First, try XML parsing
try:
self.tree = ET.parse(self.sim_path)
self.root = self.tree.getroot()
except ET.ParseError as e:
# .sim files might be binary or encrypted in some NX versions
raise ValueError(f"Failed to parse .sim file as XML: {e}")
self.is_binary = False
return
except ET.ParseError:
# Not XML, must be binary - this is normal for real NX files
pass
# Binary file - extract readable strings
try:
with open(self.sim_path, 'rb') as f:
content = f.read()
# Extract strings (sequences of printable ASCII characters)
# Minimum length of 4 to avoid noise
text_content = content.decode('latin-1', errors='ignore')
self.sim_strings = re.findall(r'[\x20-\x7E]{4,}', text_content)
self.is_binary = True
except Exception as e:
raise ValueError(f"Failed to parse .sim file (tried both XML and binary): {e}")
def extract_solutions(self) -> List[Dict[str, Any]]:
"""
@@ -70,19 +96,52 @@ class SimFileParser:
"""
solutions = []
# Try to find solution elements (structure varies by NX version)
# Common patterns: <Solution>, <AnalysisSolution>, <SimSolution>
for solution_tag in ['Solution', 'AnalysisSolution', 'SimSolution']:
for elem in self.root.iter(solution_tag):
solution_info = {
'name': elem.get('name', 'Unknown'),
'type': elem.get('type', 'Unknown'),
'solver': elem.get('solver', 'NX Nastran'),
'description': elem.get('description', ''),
}
solutions.append(solution_info)
if not self.is_binary and self.root is not None:
# XML parsing
for solution_tag in ['Solution', 'AnalysisSolution', 'SimSolution']:
for elem in self.root.iter(solution_tag):
solution_info = {
'name': elem.get('name', 'Unknown'),
'type': elem.get('type', 'Unknown'),
'solver': elem.get('solver', 'NX Nastran'),
'description': elem.get('description', ''),
}
solutions.append(solution_info)
else:
# Binary parsing - look for solution type indicators
solution_types = {
'SOL 101': 'Linear Statics',
'SOL 103': 'Normal Modes',
'SOL 106': 'Nonlinear Statics',
'SOL 108': 'Direct Frequency Response',
'SOL 109': 'Direct Transient Response',
'SOL 111': 'Modal Frequency Response',
'SOL 112': 'Modal Transient Response',
'SOL 200': 'Design Optimization',
}
# If no solutions found with standard tags, try alternative approach
found_solutions = set()
for s in self.sim_strings:
for sol_id, sol_type in solution_types.items():
if sol_id in s:
found_solutions.add(sol_type)
# Also check for solution names in strings
for s in self.sim_strings:
if 'Solution' in s and len(s) < 50:
# Potential solution name
if any(word in s for word in ['Structural', 'Thermal', 'Modal', 'Static']):
found_solutions.add(s.strip())
for sol_name in found_solutions:
solutions.append({
'name': sol_name,
'type': sol_name,
'solver': 'NX Nastran',
'description': 'Extracted from binary .sim file'
})
# Default if nothing found
if not solutions:
solutions.append({
'name': 'Default Solution',
@@ -105,26 +164,38 @@ class SimFileParser:
"""
expressions = []
# Look for expression references in various locations
for expr_elem in self.root.iter('Expression'):
expr_info = {
'name': expr_elem.get('name', ''),
'value': expr_elem.get('value', None),
'units': expr_elem.get('units', ''),
'formula': expr_elem.text if expr_elem.text else None
}
if expr_info['name']:
expressions.append(expr_info)
# XML parsing - look for expression elements
if not self.is_binary and self.root is not None:
for expr_elem in self.root.iter('Expression'):
expr_info = {
'name': expr_elem.get('name', ''),
'value': expr_elem.get('value', None),
'units': expr_elem.get('units', ''),
'formula': expr_elem.text if expr_elem.text else None
}
if expr_info['name']:
expressions.append(expr_info)
# Try to read from associated .prt file
prt_path = self.sim_path.with_suffix('.prt')
if prt_path.exists():
prt_expressions = self._extract_prt_expressions(prt_path)
# Merge with existing, prioritizing .prt values
expr_dict = {e['name']: e for e in expressions}
for prt_expr in prt_expressions:
expr_dict[prt_expr['name']] = prt_expr
expressions = list(expr_dict.values())
# Try to read from associated .prt file (works for both XML and binary .sim)
# Try multiple naming patterns:
# 1. Same name as .sim: Bracket_sim1.prt
# 2. Base name: Bracket.prt
# 3. With _i suffix: Bracket_fem1_i.prt
prt_paths = [
self.sim_path.with_suffix('.prt'), # Bracket_sim1.prt
self.sim_path.parent / f"{self.sim_path.stem.split('_')[0]}.prt", # Bracket.prt
self.sim_path.parent / f"{self.sim_path.stem}_i.prt", # Bracket_sim1_i.prt
]
for prt_path in prt_paths:
if prt_path.exists():
prt_expressions = self._extract_prt_expressions(prt_path)
# Merge with existing, prioritizing .prt values
expr_dict = {e['name']: e for e in expressions}
for prt_expr in prt_expressions:
expr_dict[prt_expr['name']] = prt_expr
expressions = list(expr_dict.values())
break # Use first .prt file found
return expressions
@@ -132,8 +203,8 @@ class SimFileParser:
"""
Extract expressions from associated .prt file.
.prt files are binary, but expression data is sometimes stored
in readable text sections. This is a best-effort extraction.
.prt files are binary, but expression data is stored in readable sections.
NX expression format: #(Type [units]) name: value;
Args:
prt_path: Path to .prt file
@@ -151,20 +222,37 @@ class SimFileParser:
# Try to decode as latin-1 (preserves all byte values)
text_content = content.decode('latin-1', errors='ignore')
# Pattern: expression_name=value (common in NX files)
# Example: "wall_thickness=5.0" or "hole_dia=10"
expr_pattern = r'([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'
# Pattern 1: NX native format: #(Number [mm]) tip_thickness: 20;
# Captures: type, units, name, value
nx_pattern = r'#\((\w+)\s*\[([^\]]*)\]\)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'
for match in re.finditer(expr_pattern, text_content):
name, value = match.groups()
# Filter out common false positives
if len(name) > 2 and not name.startswith('_'):
expressions.append({
'name': name,
'value': float(value),
'units': '', # Units not easily extractable from binary
'source': 'prt_file'
})
for match in re.finditer(nx_pattern, text_content):
expr_type, units, name, value = match.groups()
expressions.append({
'name': name,
'value': float(value),
'units': units,
'type': expr_type,
'source': 'prt_file_nx_format'
})
# Pattern 2: Fallback - simple name=value pattern
# Only use if no NX-format expressions found
if not expressions:
simple_pattern = r'([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'
for match in re.finditer(simple_pattern, text_content):
name, value = match.groups()
# Filter out common false positives (short names, underscore-prefixed)
if len(name) > 3 and not name.startswith('_'):
# Additional filter: avoid Nastran keywords
if name.upper() not in ['PRINT', 'PUNCH', 'PLOT', 'BOTH', 'GRID', 'GAUSS']:
expressions.append({
'name': name,
'value': float(value),
'units': '',
'source': 'prt_file_simple_pattern'
})
except Exception as e:
# .prt parsing is best-effort, don't fail if it doesn't work
@@ -187,49 +275,109 @@ class SimFileParser:
'constraints': []
}
# Extract mesh information
for mesh_elem in self.root.iter('Mesh'):
fem_info['mesh'] = {
'name': mesh_elem.get('name', 'Default Mesh'),
'element_size': mesh_elem.get('element_size', 'Unknown'),
'node_count': mesh_elem.get('node_count', 'Unknown'),
'element_count': mesh_elem.get('element_count', 'Unknown')
}
if not self.is_binary and self.root is not None:
# XML parsing
for mesh_elem in self.root.iter('Mesh'):
fem_info['mesh'] = {
'name': mesh_elem.get('name', 'Default Mesh'),
'element_size': mesh_elem.get('element_size', 'Unknown'),
'node_count': mesh_elem.get('node_count', 'Unknown'),
'element_count': mesh_elem.get('element_count', 'Unknown')
}
# Extract materials
for mat_elem in self.root.iter('Material'):
material = {
'name': mat_elem.get('name', 'Unknown'),
'type': mat_elem.get('type', 'Isotropic'),
'properties': {}
}
# Common properties
for prop in ['youngs_modulus', 'poissons_ratio', 'density', 'yield_strength']:
if mat_elem.get(prop):
material['properties'][prop] = mat_elem.get(prop)
for mat_elem in self.root.iter('Material'):
material = {
'name': mat_elem.get('name', 'Unknown'),
'type': mat_elem.get('type', 'Isotropic'),
'properties': {}
}
for prop in ['youngs_modulus', 'poissons_ratio', 'density', 'yield_strength']:
if mat_elem.get(prop):
material['properties'][prop] = mat_elem.get(prop)
fem_info['materials'].append(material)
fem_info['materials'].append(material)
for elem_type in self.root.iter('ElementType'):
fem_info['element_types'].append(elem_type.get('type', 'Unknown'))
# Extract element types
for elem_type in self.root.iter('ElementType'):
fem_info['element_types'].append(elem_type.get('type', 'Unknown'))
for load_elem in self.root.iter('Load'):
load = {
'name': load_elem.get('name', 'Unknown'),
'type': load_elem.get('type', 'Force'),
'magnitude': load_elem.get('magnitude', 'Unknown')
}
fem_info['loads'].append(load)
# Extract loads
for load_elem in self.root.iter('Load'):
load = {
'name': load_elem.get('name', 'Unknown'),
'type': load_elem.get('type', 'Force'),
'magnitude': load_elem.get('magnitude', 'Unknown')
}
fem_info['loads'].append(load)
for constraint_elem in self.root.iter('Constraint'):
constraint = {
'name': constraint_elem.get('name', 'Unknown'),
'type': constraint_elem.get('type', 'Fixed'),
}
fem_info['constraints'].append(constraint)
# Extract constraints
for constraint_elem in self.root.iter('Constraint'):
constraint = {
'name': constraint_elem.get('name', 'Unknown'),
'type': constraint_elem.get('type', 'Fixed'),
}
fem_info['constraints'].append(constraint)
else:
# Binary parsing - extract from .fem file if available
fem_path = self.sim_path.with_name(self.sim_path.stem.replace('_sim', '_fem') + '.fem')
if not fem_path.exists():
# Try alternative naming patterns
fem_path = self.sim_path.parent / f"{self.sim_path.stem.split('_')[0]}_fem1.fem"
if fem_path.exists():
fem_info = self._extract_fem_from_fem_file(fem_path)
else:
# Extract what we can from .sim strings
fem_info['note'] = 'Limited FEM info available from binary .sim file'
return fem_info
def _extract_fem_from_fem_file(self, fem_path: Path) -> Dict[str, Any]:
"""
Extract FEM information from .fem file.
Args:
fem_path: Path to .fem file
Returns:
Dictionary with FEM information
"""
fem_info = {
'mesh': {},
'materials': [],
'element_types': set(),
'loads': [],
'constraints': []
}
try:
with open(fem_path, 'rb') as f:
content = f.read()
text_content = content.decode('latin-1', errors='ignore')
# Look for mesh metadata
mesh_match = re.search(r'Mesh\s+(\d+)', text_content)
if mesh_match:
fem_info['mesh']['name'] = f"Mesh {mesh_match.group(1)}"
# Look for material names
for material_match in re.finditer(r'MAT\d+\s+([A-Za-z0-9_\-\s]+)', text_content):
mat_name = material_match.group(1).strip()
if mat_name and len(mat_name) > 2:
fem_info['materials'].append({
'name': mat_name,
'type': 'Unknown',
'properties': {}
})
# Look for element types (Nastran format: CQUAD4, CTRIA3, CTETRA, etc.)
element_pattern = r'\b(C[A-Z]{3,6}\d?)\b'
for elem_match in re.finditer(element_pattern, text_content):
elem_type = elem_match.group(1)
if elem_type.startswith('C') and len(elem_type) <= 8:
fem_info['element_types'].add(elem_type)
fem_info['element_types'] = list(fem_info['element_types'])
except Exception as e:
fem_info['note'] = f'Could not fully parse .fem file: {e}'
return fem_info

BIN
tests/Bracket.prt Normal file

Binary file not shown.

BIN
tests/Bracket_fem1.fem Normal file

Binary file not shown.

BIN
tests/Bracket_sim1.sim Normal file

Binary file not shown.