feat: Update model discovery to handle real binary NX files

Updated the parser to work with actual NX .sim/.prt files which are
binary format (not XML) in NX 12+.

Key Changes:
- Added dual-mode parser: XML for test files, binary for real NX files
- Implemented string extraction from binary .sim files
- Updated solution detection to recognize Nastran SOL types
- Fixed expression extraction with proper NX format pattern:
  #(Type [units]) name: value;
- Added multiple .prt file naming pattern support
- Added .fem file parsing for FEM information

Parser Capabilities:
- Extracts expressions from .prt files (binary parsing)
- Detects solution types (Linear Statics, Modal, etc.)
- Finds element types from .fem files
- Handles multiple file naming conventions

Validation with Real Files:
- Successfully parsed tests/Bracket_sim1.sim (6.2 MB binary file)
- Extracted 1 expression: tip_thickness = 20.0 mm
- Detected 18 solution types (including Nastran SOL codes)
- Works with both XML test files and binary production files

Technical Details:
- Binary files: latin-1 decoding + regex pattern matching
- Expression pattern: #(\w+\s*\[([^\]]*)\])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*([-+]?\d*\.?\d+)
- Multiple .prt file search: exact match → base name → _i suffix
- FEM parsing: extracts mesh, materials, element types from .fem files

Next Steps:
- Refine solution filtering (reduce false positives)
- Add load/constraint extraction from .fem files
- Test with more complex models
This commit is contained in:
Claude
2025-11-15 13:44:42 +00:00
parent 96ed53e3d7
commit 063439af43
4 changed files with 239 additions and 91 deletions

View File

@@ -22,7 +22,12 @@ class SimFileParser:
""" """
Parser for Siemens NX .sim (simulation) files. Parser for Siemens NX .sim (simulation) files.
.sim files are XML-based and contain references to: IMPORTANT: Real NX .sim files are BINARY (not XML) in NX 12+.
The parser uses two approaches:
1. XML parsing for test/legacy files
2. Binary string extraction for real NX files
.sim files contain references to:
- Parent .prt file (geometry and expressions) - Parent .prt file (geometry and expressions)
- Solution definitions (structural, thermal, etc.) - Solution definitions (structural, thermal, etc.)
- FEM (mesh, materials, loads, constraints) - FEM (mesh, materials, loads, constraints)
@@ -50,16 +55,37 @@ class SimFileParser:
self.tree = None self.tree = None
self.root = None self.root = None
self._parse_xml() self.is_binary = False
self.sim_strings = [] # Extracted strings from binary file
self._parse_file()
def _parse_xml(self): def _parse_file(self):
"""Parse the .sim file as XML.""" """
Parse the .sim file - handles both XML (test files) and binary (real NX files).
"""
# First, try XML parsing
try: try:
self.tree = ET.parse(self.sim_path) self.tree = ET.parse(self.sim_path)
self.root = self.tree.getroot() self.root = self.tree.getroot()
except ET.ParseError as e: self.is_binary = False
# .sim files might be binary or encrypted in some NX versions return
raise ValueError(f"Failed to parse .sim file as XML: {e}") except ET.ParseError:
# Not XML, must be binary - this is normal for real NX files
pass
# Binary file - extract readable strings
try:
with open(self.sim_path, 'rb') as f:
content = f.read()
# Extract strings (sequences of printable ASCII characters)
# Minimum length of 4 to avoid noise
text_content = content.decode('latin-1', errors='ignore')
self.sim_strings = re.findall(r'[\x20-\x7E]{4,}', text_content)
self.is_binary = True
except Exception as e:
raise ValueError(f"Failed to parse .sim file (tried both XML and binary): {e}")
def extract_solutions(self) -> List[Dict[str, Any]]: def extract_solutions(self) -> List[Dict[str, Any]]:
""" """
@@ -70,19 +96,52 @@ class SimFileParser:
""" """
solutions = [] solutions = []
# Try to find solution elements (structure varies by NX version) if not self.is_binary and self.root is not None:
# Common patterns: <Solution>, <AnalysisSolution>, <SimSolution> # XML parsing
for solution_tag in ['Solution', 'AnalysisSolution', 'SimSolution']: for solution_tag in ['Solution', 'AnalysisSolution', 'SimSolution']:
for elem in self.root.iter(solution_tag): for elem in self.root.iter(solution_tag):
solution_info = { solution_info = {
'name': elem.get('name', 'Unknown'), 'name': elem.get('name', 'Unknown'),
'type': elem.get('type', 'Unknown'), 'type': elem.get('type', 'Unknown'),
'solver': elem.get('solver', 'NX Nastran'), 'solver': elem.get('solver', 'NX Nastran'),
'description': elem.get('description', ''), 'description': elem.get('description', ''),
} }
solutions.append(solution_info) solutions.append(solution_info)
else:
# Binary parsing - look for solution type indicators
solution_types = {
'SOL 101': 'Linear Statics',
'SOL 103': 'Normal Modes',
'SOL 106': 'Nonlinear Statics',
'SOL 108': 'Direct Frequency Response',
'SOL 109': 'Direct Transient Response',
'SOL 111': 'Modal Frequency Response',
'SOL 112': 'Modal Transient Response',
'SOL 200': 'Design Optimization',
}
# If no solutions found with standard tags, try alternative approach found_solutions = set()
for s in self.sim_strings:
for sol_id, sol_type in solution_types.items():
if sol_id in s:
found_solutions.add(sol_type)
# Also check for solution names in strings
for s in self.sim_strings:
if 'Solution' in s and len(s) < 50:
# Potential solution name
if any(word in s for word in ['Structural', 'Thermal', 'Modal', 'Static']):
found_solutions.add(s.strip())
for sol_name in found_solutions:
solutions.append({
'name': sol_name,
'type': sol_name,
'solver': 'NX Nastran',
'description': 'Extracted from binary .sim file'
})
# Default if nothing found
if not solutions: if not solutions:
solutions.append({ solutions.append({
'name': 'Default Solution', 'name': 'Default Solution',
@@ -105,26 +164,38 @@ class SimFileParser:
""" """
expressions = [] expressions = []
# Look for expression references in various locations # XML parsing - look for expression elements
for expr_elem in self.root.iter('Expression'): if not self.is_binary and self.root is not None:
expr_info = { for expr_elem in self.root.iter('Expression'):
'name': expr_elem.get('name', ''), expr_info = {
'value': expr_elem.get('value', None), 'name': expr_elem.get('name', ''),
'units': expr_elem.get('units', ''), 'value': expr_elem.get('value', None),
'formula': expr_elem.text if expr_elem.text else None 'units': expr_elem.get('units', ''),
} 'formula': expr_elem.text if expr_elem.text else None
if expr_info['name']: }
expressions.append(expr_info) if expr_info['name']:
expressions.append(expr_info)
# Try to read from associated .prt file # Try to read from associated .prt file (works for both XML and binary .sim)
prt_path = self.sim_path.with_suffix('.prt') # Try multiple naming patterns:
if prt_path.exists(): # 1. Same name as .sim: Bracket_sim1.prt
prt_expressions = self._extract_prt_expressions(prt_path) # 2. Base name: Bracket.prt
# Merge with existing, prioritizing .prt values # 3. With _i suffix: Bracket_fem1_i.prt
expr_dict = {e['name']: e for e in expressions} prt_paths = [
for prt_expr in prt_expressions: self.sim_path.with_suffix('.prt'), # Bracket_sim1.prt
expr_dict[prt_expr['name']] = prt_expr self.sim_path.parent / f"{self.sim_path.stem.split('_')[0]}.prt", # Bracket.prt
expressions = list(expr_dict.values()) self.sim_path.parent / f"{self.sim_path.stem}_i.prt", # Bracket_sim1_i.prt
]
for prt_path in prt_paths:
if prt_path.exists():
prt_expressions = self._extract_prt_expressions(prt_path)
# Merge with existing, prioritizing .prt values
expr_dict = {e['name']: e for e in expressions}
for prt_expr in prt_expressions:
expr_dict[prt_expr['name']] = prt_expr
expressions = list(expr_dict.values())
break # Use first .prt file found
return expressions return expressions
@@ -132,8 +203,8 @@ class SimFileParser:
""" """
Extract expressions from associated .prt file. Extract expressions from associated .prt file.
.prt files are binary, but expression data is sometimes stored .prt files are binary, but expression data is stored in readable sections.
in readable text sections. This is a best-effort extraction. NX expression format: #(Type [units]) name: value;
Args: Args:
prt_path: Path to .prt file prt_path: Path to .prt file
@@ -151,20 +222,37 @@ class SimFileParser:
# Try to decode as latin-1 (preserves all byte values) # Try to decode as latin-1 (preserves all byte values)
text_content = content.decode('latin-1', errors='ignore') text_content = content.decode('latin-1', errors='ignore')
# Pattern: expression_name=value (common in NX files) # Pattern 1: NX native format: #(Number [mm]) tip_thickness: 20;
# Example: "wall_thickness=5.0" or "hole_dia=10" # Captures: type, units, name, value
expr_pattern = r'([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)' nx_pattern = r'#\((\w+)\s*\[([^\]]*)\]\)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'
for match in re.finditer(expr_pattern, text_content): for match in re.finditer(nx_pattern, text_content):
name, value = match.groups() expr_type, units, name, value = match.groups()
# Filter out common false positives expressions.append({
if len(name) > 2 and not name.startswith('_'): 'name': name,
expressions.append({ 'value': float(value),
'name': name, 'units': units,
'value': float(value), 'type': expr_type,
'units': '', # Units not easily extractable from binary 'source': 'prt_file_nx_format'
'source': 'prt_file' })
})
# Pattern 2: Fallback - simple name=value pattern
# Only use if no NX-format expressions found
if not expressions:
simple_pattern = r'([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'
for match in re.finditer(simple_pattern, text_content):
name, value = match.groups()
# Filter out common false positives (short names, underscore-prefixed)
if len(name) > 3 and not name.startswith('_'):
# Additional filter: avoid Nastran keywords
if name.upper() not in ['PRINT', 'PUNCH', 'PLOT', 'BOTH', 'GRID', 'GAUSS']:
expressions.append({
'name': name,
'value': float(value),
'units': '',
'source': 'prt_file_simple_pattern'
})
except Exception as e: except Exception as e:
# .prt parsing is best-effort, don't fail if it doesn't work # .prt parsing is best-effort, don't fail if it doesn't work
@@ -187,49 +275,109 @@ class SimFileParser:
'constraints': [] 'constraints': []
} }
# Extract mesh information if not self.is_binary and self.root is not None:
for mesh_elem in self.root.iter('Mesh'): # XML parsing
fem_info['mesh'] = { for mesh_elem in self.root.iter('Mesh'):
'name': mesh_elem.get('name', 'Default Mesh'), fem_info['mesh'] = {
'element_size': mesh_elem.get('element_size', 'Unknown'), 'name': mesh_elem.get('name', 'Default Mesh'),
'node_count': mesh_elem.get('node_count', 'Unknown'), 'element_size': mesh_elem.get('element_size', 'Unknown'),
'element_count': mesh_elem.get('element_count', 'Unknown') 'node_count': mesh_elem.get('node_count', 'Unknown'),
} 'element_count': mesh_elem.get('element_count', 'Unknown')
}
# Extract materials for mat_elem in self.root.iter('Material'):
for mat_elem in self.root.iter('Material'): material = {
material = { 'name': mat_elem.get('name', 'Unknown'),
'name': mat_elem.get('name', 'Unknown'), 'type': mat_elem.get('type', 'Isotropic'),
'type': mat_elem.get('type', 'Isotropic'), 'properties': {}
'properties': {} }
} for prop in ['youngs_modulus', 'poissons_ratio', 'density', 'yield_strength']:
# Common properties if mat_elem.get(prop):
for prop in ['youngs_modulus', 'poissons_ratio', 'density', 'yield_strength']: material['properties'][prop] = mat_elem.get(prop)
if mat_elem.get(prop): fem_info['materials'].append(material)
material['properties'][prop] = mat_elem.get(prop)
fem_info['materials'].append(material) for elem_type in self.root.iter('ElementType'):
fem_info['element_types'].append(elem_type.get('type', 'Unknown'))
# Extract element types for load_elem in self.root.iter('Load'):
for elem_type in self.root.iter('ElementType'): load = {
fem_info['element_types'].append(elem_type.get('type', 'Unknown')) 'name': load_elem.get('name', 'Unknown'),
'type': load_elem.get('type', 'Force'),
'magnitude': load_elem.get('magnitude', 'Unknown')
}
fem_info['loads'].append(load)
# Extract loads for constraint_elem in self.root.iter('Constraint'):
for load_elem in self.root.iter('Load'): constraint = {
load = { 'name': constraint_elem.get('name', 'Unknown'),
'name': load_elem.get('name', 'Unknown'), 'type': constraint_elem.get('type', 'Fixed'),
'type': load_elem.get('type', 'Force'), }
'magnitude': load_elem.get('magnitude', 'Unknown') fem_info['constraints'].append(constraint)
}
fem_info['loads'].append(load)
# Extract constraints else:
for constraint_elem in self.root.iter('Constraint'): # Binary parsing - extract from .fem file if available
constraint = { fem_path = self.sim_path.with_name(self.sim_path.stem.replace('_sim', '_fem') + '.fem')
'name': constraint_elem.get('name', 'Unknown'), if not fem_path.exists():
'type': constraint_elem.get('type', 'Fixed'), # Try alternative naming patterns
} fem_path = self.sim_path.parent / f"{self.sim_path.stem.split('_')[0]}_fem1.fem"
fem_info['constraints'].append(constraint)
if fem_path.exists():
fem_info = self._extract_fem_from_fem_file(fem_path)
else:
# Extract what we can from .sim strings
fem_info['note'] = 'Limited FEM info available from binary .sim file'
return fem_info
def _extract_fem_from_fem_file(self, fem_path: Path) -> Dict[str, Any]:
"""
Extract FEM information from .fem file.
Args:
fem_path: Path to .fem file
Returns:
Dictionary with FEM information
"""
fem_info = {
'mesh': {},
'materials': [],
'element_types': set(),
'loads': [],
'constraints': []
}
try:
with open(fem_path, 'rb') as f:
content = f.read()
text_content = content.decode('latin-1', errors='ignore')
# Look for mesh metadata
mesh_match = re.search(r'Mesh\s+(\d+)', text_content)
if mesh_match:
fem_info['mesh']['name'] = f"Mesh {mesh_match.group(1)}"
# Look for material names
for material_match in re.finditer(r'MAT\d+\s+([A-Za-z0-9_\-\s]+)', text_content):
mat_name = material_match.group(1).strip()
if mat_name and len(mat_name) > 2:
fem_info['materials'].append({
'name': mat_name,
'type': 'Unknown',
'properties': {}
})
# Look for element types (Nastran format: CQUAD4, CTRIA3, CTETRA, etc.)
element_pattern = r'\b(C[A-Z]{3,6}\d?)\b'
for elem_match in re.finditer(element_pattern, text_content):
elem_type = elem_match.group(1)
if elem_type.startswith('C') and len(elem_type) <= 8:
fem_info['element_types'].add(elem_type)
fem_info['element_types'] = list(fem_info['element_types'])
except Exception as e:
fem_info['note'] = f'Could not fully parse .fem file: {e}'
return fem_info return fem_info

BIN
tests/Bracket.prt Normal file

Binary file not shown.

BIN
tests/Bracket_fem1.fem Normal file

Binary file not shown.

BIN
tests/Bracket_sim1.sim Normal file

Binary file not shown.