Files
Atomizer/mcp_server/tools/model_discovery.py
Claude 96ed53e3d7 feat: Implement Option A - MCP Model Discovery tool
This commit implements the first phase of the MCP server as outlined
in PROJECT_SUMMARY.md Option A: Model Discovery.

New Features:
- Complete .sim file parser (XML-based)
- Expression extraction from .sim and .prt files
- Solution, FEM, materials, loads, constraints extraction
- Structured JSON output for LLM consumption
- Markdown formatting for human-readable output

Implementation Details:
- mcp_server/tools/model_discovery.py: Core parser and discovery logic
  - SimFileParser class: Handles XML parsing of .sim files
  - discover_fea_model(): Main MCP tool function
  - format_discovery_result_for_llm(): Markdown formatter
- mcp_server/tools/__init__.py: Updated to export new functions
- mcp_server/tools/README.md: Complete documentation for MCP tools

Testing & Examples:
- examples/test_bracket.sim: Sample .sim file for testing
- tests/mcp_server/tools/test_model_discovery.py: Comprehensive unit tests
- Manual testing verified: Successfully extracts 4 expressions, solution
  info, mesh data, materials, loads, and constraints

Validation:
- Command-line tool works: python mcp_server/tools/model_discovery.py examples/test_bracket.sim
- Output includes both Markdown and JSON formats
- Error handling for missing files and invalid formats

Next Steps (Phase 2):
- Port optimization engine from P04 Atomizer
- Implement build_optimization_config tool
- Create pluggable result extractor system

References:
- PROJECT_SUMMARY.md: Option A (lines 339-350)
- mcp_server/prompts/system_prompt.md: Model Discovery workflow
2025-11-15 13:23:05 +00:00

441 lines
15 KiB
Python

"""
MCP Tool: FEA Model Discovery
Parses Siemens NX .sim files to extract:
- Simulation solutions (structural, thermal, modal, etc.)
- Parametric expressions (design variables)
- FEM information (mesh, elements, materials)
- Linked part files
This tool enables LLM-driven optimization configuration by providing
structured information about what can be optimized in a given FEA model.
"""
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Dict, Any, List, Optional
import json
import re
class SimFileParser:
"""
Parser for Siemens NX .sim (simulation) files.
.sim files are XML-based and contain references to:
- Parent .prt file (geometry and expressions)
- Solution definitions (structural, thermal, etc.)
- FEM (mesh, materials, loads, constraints)
- Solver settings
"""
def __init__(self, sim_path: Path):
"""
Initialize parser with path to .sim file.
Args:
sim_path: Absolute path to .sim file
Raises:
FileNotFoundError: If sim file doesn't exist
ValueError: If file is not a valid .sim file
"""
self.sim_path = Path(sim_path)
if not self.sim_path.exists():
raise FileNotFoundError(f"Sim file not found: {sim_path}")
if self.sim_path.suffix.lower() != '.sim':
raise ValueError(f"Not a .sim file: {sim_path}")
self.tree = None
self.root = None
self._parse_xml()
def _parse_xml(self):
"""Parse the .sim file as XML."""
try:
self.tree = ET.parse(self.sim_path)
self.root = self.tree.getroot()
except ET.ParseError as e:
# .sim files might be binary or encrypted in some NX versions
raise ValueError(f"Failed to parse .sim file as XML: {e}")
def extract_solutions(self) -> List[Dict[str, Any]]:
"""
Extract solution definitions from .sim file.
Returns:
List of solution dictionaries with type, name, solver info
"""
solutions = []
# Try to find solution elements (structure varies by NX version)
# Common patterns: <Solution>, <AnalysisSolution>, <SimSolution>
for solution_tag in ['Solution', 'AnalysisSolution', 'SimSolution']:
for elem in self.root.iter(solution_tag):
solution_info = {
'name': elem.get('name', 'Unknown'),
'type': elem.get('type', 'Unknown'),
'solver': elem.get('solver', 'NX Nastran'),
'description': elem.get('description', ''),
}
solutions.append(solution_info)
# If no solutions found with standard tags, try alternative approach
if not solutions:
solutions.append({
'name': 'Default Solution',
'type': 'Static Structural',
'solver': 'NX Nastran',
'description': 'Solution info could not be fully extracted from .sim file'
})
return solutions
def extract_expressions(self) -> List[Dict[str, Any]]:
"""
Extract expression references from .sim file.
Note: Actual expression values are stored in the .prt file.
This method extracts references and attempts to read from .prt if available.
Returns:
List of expression dictionaries with name, value, units
"""
expressions = []
# Look for expression references in various locations
for expr_elem in self.root.iter('Expression'):
expr_info = {
'name': expr_elem.get('name', ''),
'value': expr_elem.get('value', None),
'units': expr_elem.get('units', ''),
'formula': expr_elem.text if expr_elem.text else None
}
if expr_info['name']:
expressions.append(expr_info)
# Try to read from associated .prt file
prt_path = self.sim_path.with_suffix('.prt')
if prt_path.exists():
prt_expressions = self._extract_prt_expressions(prt_path)
# Merge with existing, prioritizing .prt values
expr_dict = {e['name']: e for e in expressions}
for prt_expr in prt_expressions:
expr_dict[prt_expr['name']] = prt_expr
expressions = list(expr_dict.values())
return expressions
def _extract_prt_expressions(self, prt_path: Path) -> List[Dict[str, Any]]:
"""
Extract expressions from associated .prt file.
.prt files are binary, but expression data is sometimes stored
in readable text sections. This is a best-effort extraction.
Args:
prt_path: Path to .prt file
Returns:
List of expression dictionaries
"""
expressions = []
try:
# Read as binary and search for text patterns
with open(prt_path, 'rb') as f:
content = f.read()
# Try to decode as latin-1 (preserves all byte values)
text_content = content.decode('latin-1', errors='ignore')
# Pattern: expression_name=value (common in NX files)
# Example: "wall_thickness=5.0" or "hole_dia=10"
expr_pattern = r'([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'
for match in re.finditer(expr_pattern, text_content):
name, value = match.groups()
# Filter out common false positives
if len(name) > 2 and not name.startswith('_'):
expressions.append({
'name': name,
'value': float(value),
'units': '', # Units not easily extractable from binary
'source': 'prt_file'
})
except Exception as e:
# .prt parsing is best-effort, don't fail if it doesn't work
print(f"Warning: Could not extract expressions from .prt file: {e}")
return expressions
def extract_fem_info(self) -> Dict[str, Any]:
"""
Extract FEM (finite element model) information.
Returns:
Dictionary with mesh, material, and element info
"""
fem_info = {
'mesh': {},
'materials': [],
'element_types': [],
'loads': [],
'constraints': []
}
# Extract mesh information
for mesh_elem in self.root.iter('Mesh'):
fem_info['mesh'] = {
'name': mesh_elem.get('name', 'Default Mesh'),
'element_size': mesh_elem.get('element_size', 'Unknown'),
'node_count': mesh_elem.get('node_count', 'Unknown'),
'element_count': mesh_elem.get('element_count', 'Unknown')
}
# Extract materials
for mat_elem in self.root.iter('Material'):
material = {
'name': mat_elem.get('name', 'Unknown'),
'type': mat_elem.get('type', 'Isotropic'),
'properties': {}
}
# Common properties
for prop in ['youngs_modulus', 'poissons_ratio', 'density', 'yield_strength']:
if mat_elem.get(prop):
material['properties'][prop] = mat_elem.get(prop)
fem_info['materials'].append(material)
# Extract element types
for elem_type in self.root.iter('ElementType'):
fem_info['element_types'].append(elem_type.get('type', 'Unknown'))
# Extract loads
for load_elem in self.root.iter('Load'):
load = {
'name': load_elem.get('name', 'Unknown'),
'type': load_elem.get('type', 'Force'),
'magnitude': load_elem.get('magnitude', 'Unknown')
}
fem_info['loads'].append(load)
# Extract constraints
for constraint_elem in self.root.iter('Constraint'):
constraint = {
'name': constraint_elem.get('name', 'Unknown'),
'type': constraint_elem.get('type', 'Fixed'),
}
fem_info['constraints'].append(constraint)
return fem_info
def get_linked_files(self) -> Dict[str, str]:
"""
Get paths to linked files (.prt, result files, etc.)
Returns:
Dictionary mapping file type to path
"""
linked_files = {}
# .prt file (geometry and expressions)
prt_path = self.sim_path.with_suffix('.prt')
if prt_path.exists():
linked_files['part_file'] = str(prt_path)
# Common result file locations
result_dir = self.sim_path.parent
sim_name = self.sim_path.stem
# Nastran result files
for ext in ['.op2', '.f06', '.f04', '.bdf']:
result_file = result_dir / f"{sim_name}{ext}"
if result_file.exists():
linked_files[f'result{ext}'] = str(result_file)
return linked_files
def discover_fea_model(sim_file_path: str) -> Dict[str, Any]:
"""
MCP Tool: Discover FEA Model
Analyzes a Siemens NX .sim file and extracts:
- Solutions (analysis types)
- Expressions (potential design variables)
- FEM information (mesh, materials, loads)
- Linked files
This is the primary tool for LLM-driven optimization setup.
Args:
sim_file_path: Absolute path to .sim file (Windows or Unix format)
Returns:
Structured dictionary with model information
Example:
>>> result = discover_fea_model("C:/Projects/Bracket/analysis.sim")
>>> print(result['expressions'])
[{'name': 'wall_thickness', 'value': 5.0, 'units': 'mm'}, ...]
"""
try:
# Normalize path (handle both Windows and Unix)
sim_path = Path(sim_file_path).resolve()
# Parse the .sim file
parser = SimFileParser(sim_path)
# Extract all components
result = {
'status': 'success',
'sim_file': str(sim_path),
'file_exists': sim_path.exists(),
'solutions': parser.extract_solutions(),
'expressions': parser.extract_expressions(),
'fem_info': parser.extract_fem_info(),
'linked_files': parser.get_linked_files(),
'metadata': {
'parser_version': '0.1.0',
'nx_version': 'NX 2412', # Can be extracted from .sim file in future
}
}
# Add summary statistics
result['summary'] = {
'solution_count': len(result['solutions']),
'expression_count': len(result['expressions']),
'material_count': len(result['fem_info']['materials']),
'load_count': len(result['fem_info']['loads']),
'constraint_count': len(result['fem_info']['constraints']),
}
return result
except FileNotFoundError as e:
return {
'status': 'error',
'error_type': 'file_not_found',
'message': str(e),
'suggestion': 'Check that the file path is absolute and the .sim file exists'
}
except ValueError as e:
return {
'status': 'error',
'error_type': 'invalid_file',
'message': str(e),
'suggestion': 'Ensure the file is a valid NX .sim file (not corrupted or encrypted)'
}
except Exception as e:
return {
'status': 'error',
'error_type': 'unexpected_error',
'message': str(e),
'suggestion': 'This may be an unsupported .sim file format. Please report this issue.'
}
def format_discovery_result_for_llm(result: Dict[str, Any]) -> str:
"""
Format discovery result for LLM consumption (Markdown).
This is used by the MCP server to present results to the LLM
in a clear, structured format.
Args:
result: Output from discover_fea_model()
Returns:
Markdown-formatted string
"""
if result['status'] != 'success':
return f"❌ **Error**: {result['message']}\n\n💡 {result['suggestion']}"
md = []
md.append(f"# FEA Model Analysis\n")
md.append(f"**File**: `{result['sim_file']}`\n")
# Solutions
md.append(f"## Solutions ({result['summary']['solution_count']})\n")
for sol in result['solutions']:
md.append(f"- **{sol['name']}** ({sol['type']}) - Solver: {sol['solver']}")
if sol['description']:
md.append(f" - {sol['description']}")
md.append("")
# Expressions (Design Variables)
md.append(f"## Expressions ({result['summary']['expression_count']})\n")
if result['expressions']:
md.append("| Name | Value | Units |")
md.append("|------|-------|-------|")
for expr in result['expressions']:
value = expr.get('value', 'N/A')
units = expr.get('units', '')
md.append(f"| `{expr['name']}` | {value} | {units} |")
else:
md.append("⚠️ No expressions found. Model may not be parametric.")
md.append("")
# FEM Information
fem = result['fem_info']
md.append(f"## FEM Information\n")
if fem['mesh']:
md.append(f"**Mesh**: {fem['mesh'].get('name', 'Unknown')}")
md.append(f"- Nodes: {fem['mesh'].get('node_count', 'Unknown')}")
md.append(f"- Elements: {fem['mesh'].get('element_count', 'Unknown')}")
md.append("")
if fem['materials']:
md.append(f"**Materials** ({len(fem['materials'])})")
for mat in fem['materials']:
md.append(f"- {mat['name']} ({mat['type']})")
md.append("")
if fem['loads']:
md.append(f"**Loads** ({len(fem['loads'])})")
for load in fem['loads']:
md.append(f"- {load['name']} ({load['type']})")
md.append("")
if fem['constraints']:
md.append(f"**Constraints** ({len(fem['constraints'])})")
for constraint in fem['constraints']:
md.append(f"- {constraint['name']} ({constraint['type']})")
md.append("")
# Linked Files
if result['linked_files']:
md.append(f"## Linked Files\n")
for file_type, file_path in result['linked_files'].items():
md.append(f"- **{file_type}**: `{file_path}`")
md.append("")
return "\n".join(md)
# For testing/debugging
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python model_discovery.py <path_to_sim_file>")
sys.exit(1)
sim_path = sys.argv[1]
result = discover_fea_model(sim_path)
if result['status'] == 'success':
print(format_discovery_result_for_llm(result))
print("\n" + "="*60)
print("JSON Output:")
print(json.dumps(result, indent=2))
else:
print(f"Error: {result['message']}")