Atomizer/mcp_server/tools/model_discovery.py

"""
MCP Tool: FEA Model Discovery

Parses Siemens NX .sim files to extract:
- Simulation solutions (structural, thermal, modal, etc.)
- Parametric expressions (design variables)
- FEM information (mesh, elements, materials)
- Linked part files

This tool enables LLM-driven optimization configuration by providing
structured information about what can be optimized in a given FEA model.
"""

import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Dict, Any, List, Optional
import json
import re


class SimFileParser:
    """
    Parser for Siemens NX .sim (simulation) files.

    IMPORTANT: Real NX .sim files are BINARY (not XML) in NX 12+.
    The parser uses two approaches:
    1. XML parsing for test/legacy files
    2. Binary string extraction for real NX files

    .sim files contain references to:
    - Parent .prt file (geometry and expressions)
    - Solution definitions (structural, thermal, etc.)
    - FEM (mesh, materials, loads, constraints)
    - Solver settings
    """

    def __init__(self, sim_path: Path):
        """
        Initialize parser with path to .sim file.

        Args:
            sim_path: Absolute path to .sim file

        Raises:
            FileNotFoundError: If sim file doesn't exist
            ValueError: If file is not a valid .sim file
        """
        self.sim_path = Path(sim_path)

        if not self.sim_path.exists():
            raise FileNotFoundError(f"Sim file not found: {sim_path}")

        if self.sim_path.suffix.lower() != '.sim':
            raise ValueError(f"Not a .sim file: {sim_path}")

        self.tree = None
        self.root = None
        self.is_binary = False
        self.sim_strings = []  # Extracted strings from binary file
        self._parse_file()

    def _parse_file(self):
        """
        Parse the .sim file - handles both XML (test files) and binary (real NX files).
        """
        # First, try XML parsing
        try:
            self.tree = ET.parse(self.sim_path)
            self.root = self.tree.getroot()
            self.is_binary = False
            return
        except ET.ParseError:
            # Not XML, must be binary - this is normal for real NX files
            pass

        # Binary file - extract readable strings
        try:
            with open(self.sim_path, 'rb') as f:
                content = f.read()

            # Extract strings (sequences of printable ASCII characters)
            # Minimum length of 4 to avoid noise
            text_content = content.decode('latin-1', errors='ignore')
            self.sim_strings = re.findall(r'[\x20-\x7E]{4,}', text_content)
            self.is_binary = True

        except Exception as e:
            raise ValueError(f"Failed to parse .sim file (tried both XML and binary): {e}")

    def extract_solutions(self) -> List[Dict[str, Any]]:
        """
        Extract solution definitions from .sim file.

        Returns:
            List of solution dictionaries with type, name, solver info
        """
        solutions = []

        if not self.is_binary and self.root is not None:
            # XML parsing
            for solution_tag in ['Solution', 'AnalysisSolution', 'SimSolution']:
                for elem in self.root.iter(solution_tag):
                    solution_info = {
                        'name': elem.get('name', 'Unknown'),
                        'type': elem.get('type', 'Unknown'),
                        'solver': elem.get('solver', 'NX Nastran'),
                        'description': elem.get('description', ''),
                    }
                    solutions.append(solution_info)
        else:
            # Binary parsing - look for solution type indicators
            solution_types = {
                'SOL 101': 'Linear Statics',
                'SOL 103': 'Normal Modes',
                'SOL 106': 'Nonlinear Statics',
                'SOL 108': 'Direct Frequency Response',
                'SOL 109': 'Direct Transient Response',
                'SOL 111': 'Modal Frequency Response',
                'SOL 112': 'Modal Transient Response',
                'SOL 200': 'Design Optimization',
            }

            found_solutions = set()
            for s in self.sim_strings:
                for sol_id, sol_type in solution_types.items():
                    if sol_id in s:
                        found_solutions.add(sol_type)

            # Also check for solution names in strings
            for s in self.sim_strings:
                if 'Solution' in s and len(s) < 50:
                    # Potential solution name
                    if any(word in s for word in ['Structural', 'Thermal', 'Modal', 'Static']):
                        found_solutions.add(s.strip())

            for sol_name in found_solutions:
                solutions.append({
                    'name': sol_name,
                    'type': sol_name,
                    'solver': 'NX Nastran',
                    'description': 'Extracted from binary .sim file'
                })

        # Default if nothing found
        if not solutions:
            solutions.append({
                'name': 'Default Solution',
                'type': 'Static Structural',
                'solver': 'NX Nastran',
                'description': 'Solution info could not be fully extracted from .sim file'
            })

        return solutions

    def extract_expressions(self) -> List[Dict[str, Any]]:
        """
        Extract expression references from .sim file.

        Note: Actual expression values are stored in the .prt file.
        This method extracts references and attempts to read from .prt if available.

        Returns:
            List of expression dictionaries with name, value, units
        """
        expressions = []

        # XML parsing - look for expression elements
        if not self.is_binary and self.root is not None:
            for expr_elem in self.root.iter('Expression'):
                expr_info = {
                    'name': expr_elem.get('name', ''),
                    'value': expr_elem.get('value', None),
                    'units': expr_elem.get('units', ''),
                    'formula': expr_elem.text if expr_elem.text else None
                }
                if expr_info['name']:
                    expressions.append(expr_info)

        # Try to read from associated .prt file (works for both XML and binary .sim)
        # Try multiple naming patterns:
        # 1. Same name as .sim: Bracket_sim1.prt
        # 2. Base name: Bracket.prt
        # 3. With _i suffix: Bracket_fem1_i.prt
        prt_paths = [
            self.sim_path.with_suffix('.prt'),  # Bracket_sim1.prt
            self.sim_path.parent / f"{self.sim_path.stem.split('_')[0]}.prt",  # Bracket.prt
            self.sim_path.parent / f"{self.sim_path.stem}_i.prt",  # Bracket_sim1_i.prt
        ]

        for prt_path in prt_paths:
            if prt_path.exists():
                prt_expressions = self._extract_prt_expressions(prt_path)
                # Merge with existing, prioritizing .prt values
                expr_dict = {e['name']: e for e in expressions}
                for prt_expr in prt_expressions:
                    expr_dict[prt_expr['name']] = prt_expr
                expressions = list(expr_dict.values())
                break  # Use first .prt file found

        return expressions

    def _extract_prt_expressions(self, prt_path: Path) -> List[Dict[str, Any]]:
        """
        Extract expressions from associated .prt file.

        .prt files are binary, but expression data is stored in readable sections.
        NX expression format: #(Type [units]) name: value;

        Args:
            prt_path: Path to .prt file

        Returns:
            List of expression dictionaries
        """
        expressions = []

        try:
            # Read as binary and search for text patterns
            with open(prt_path, 'rb') as f:
                content = f.read()

            # Try to decode as latin-1 (preserves all byte values)
            text_content = content.decode('latin-1', errors='ignore')

            # Pattern 1: NX native format with variations:
            # #(Number [mm]) tip_thickness: 20;
            # (Number [mm]) p3: 10;
            # *(Number [mm]) support_blend_radius: 10;
            # ((Number [degrees]) support_angle: 30;
            # Prefix can be: #(, *(, (, ((
            nx_pattern = r'[#*\(]*\((\w+)\s*\[([^\]]*)\]\)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'

            # Use set to avoid duplicates
            expr_names_seen = set()

            for match in re.finditer(nx_pattern, text_content):
                expr_type, units, name, value = match.groups()
                if name not in expr_names_seen:
                    expr_names_seen.add(name)
                    expressions.append({
                        'name': name,
                        'value': float(value),
                        'units': units,
                        'type': expr_type,
                        'source': 'prt_file_nx_format'
                    })

            # Pattern 2: Find expression names from Root: references
            # Format: Root:expression_name:
            root_pattern = r'Root:([a-zA-Z_][a-zA-Z0-9_]{2,}):'
            potential_expr_names = set()

            for match in re.finditer(root_pattern, text_content):
                name = match.group(1)
                # Filter out common NX internal names
                if name not in ['index', '%%Name', '%%ug_objects_for_', 'WorldModifier']:
                    if not name.startswith('%%'):
                        potential_expr_names.add(name)

            # For names found in Root: but not in value patterns,
            # mark as "found but value unknown"
            for name in potential_expr_names:
                if name not in expr_names_seen:
                    expressions.append({
                        'name': name,
                        'value': None,
                        'units': '',
                        'type': 'Unknown',
                        'source': 'prt_file_reference_only'
                    })

            # Pattern 3: Fallback - simple name=value pattern
            # Only use if no NX-format expressions found
            if not expressions:
                simple_pattern = r'([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*([-+]?\d*\.?\d+(?:[eE][-+]?\d+)?)'

                for match in re.finditer(simple_pattern, text_content):
                    name, value = match.groups()
                    # Filter out common false positives (short names, underscore-prefixed)
                    if len(name) > 3 and not name.startswith('_'):
                        # Additional filter: avoid Nastran keywords
                        if name.upper() not in ['PRINT', 'PUNCH', 'PLOT', 'BOTH', 'GRID', 'GAUSS']:
                            expressions.append({
                                'name': name,
                                'value': float(value),
                                'units': '',
                                'source': 'prt_file_simple_pattern'
                            })

        except Exception as e:
            # .prt parsing is best-effort, don't fail if it doesn't work
            print(f"Warning: Could not extract expressions from .prt file: {e}")

        return expressions

    def extract_fem_info(self) -> Dict[str, Any]:
        """
        Extract FEM (finite element model) information.

        Returns:
            Dictionary with mesh, material, and element info
        """
        fem_info = {
            'mesh': {},
            'materials': [],
            'element_types': [],
            'loads': [],
            'constraints': []
        }

        if not self.is_binary and self.root is not None:
            # XML parsing
            for mesh_elem in self.root.iter('Mesh'):
                fem_info['mesh'] = {
                    'name': mesh_elem.get('name', 'Default Mesh'),
                    'element_size': mesh_elem.get('element_size', 'Unknown'),
                    'node_count': mesh_elem.get('node_count', 'Unknown'),
                    'element_count': mesh_elem.get('element_count', 'Unknown')
                }

            for mat_elem in self.root.iter('Material'):
                material = {
                    'name': mat_elem.get('name', 'Unknown'),
                    'type': mat_elem.get('type', 'Isotropic'),
                    'properties': {}
                }
                for prop in ['youngs_modulus', 'poissons_ratio', 'density', 'yield_strength']:
                    if mat_elem.get(prop):
                        material['properties'][prop] = mat_elem.get(prop)
                fem_info['materials'].append(material)

            for elem_type in self.root.iter('ElementType'):
                fem_info['element_types'].append(elem_type.get('type', 'Unknown'))

            for load_elem in self.root.iter('Load'):
                load = {
                    'name': load_elem.get('name', 'Unknown'),
                    'type': load_elem.get('type', 'Force'),
                    'magnitude': load_elem.get('magnitude', 'Unknown')
                }
                fem_info['loads'].append(load)

            for constraint_elem in self.root.iter('Constraint'):
                constraint = {
                    'name': constraint_elem.get('name', 'Unknown'),
                    'type': constraint_elem.get('type', 'Fixed'),
                }
                fem_info['constraints'].append(constraint)

        else:
            # Binary parsing - extract from .fem file if available
            fem_path = self.sim_path.with_name(self.sim_path.stem.replace('_sim', '_fem') + '.fem')
            if not fem_path.exists():
                # Try alternative naming patterns
                fem_path = self.sim_path.parent / f"{self.sim_path.stem.split('_')[0]}_fem1.fem"

            if fem_path.exists():
                fem_info = self._extract_fem_from_fem_file(fem_path)
            else:
                # Extract what we can from .sim strings
                fem_info['note'] = 'Limited FEM info available from binary .sim file'

        return fem_info

    def _extract_fem_from_fem_file(self, fem_path: Path) -> Dict[str, Any]:
        """
        Extract FEM information from .fem file.

        Args:
            fem_path: Path to .fem file

        Returns:
            Dictionary with FEM information
        """
        fem_info = {
            'mesh': {},
            'materials': [],
            'element_types': set(),
            'loads': [],
            'constraints': []
        }

        try:
            with open(fem_path, 'rb') as f:
                content = f.read()
            text_content = content.decode('latin-1', errors='ignore')

            # Look for mesh metadata
            mesh_match = re.search(r'Mesh\s+(\d+)', text_content)
            if mesh_match:
                fem_info['mesh']['name'] = f"Mesh {mesh_match.group(1)}"

            # Look for material names
            for material_match in re.finditer(r'MAT\d+\s+([A-Za-z0-9_\-\s]+)', text_content):
                mat_name = material_match.group(1).strip()
                if mat_name and len(mat_name) > 2:
                    fem_info['materials'].append({
                        'name': mat_name,
                        'type': 'Unknown',
                        'properties': {}
                    })

            # Look for element types (Nastran format: CQUAD4, CTRIA3, CTETRA, etc.)
            element_pattern = r'\b(C[A-Z]{3,6}\d?)\b'
            for elem_match in re.finditer(element_pattern, text_content):
                elem_type = elem_match.group(1)
                if elem_type.startswith('C') and len(elem_type) <= 8:
                    fem_info['element_types'].add(elem_type)

            fem_info['element_types'] = list(fem_info['element_types'])

        except Exception as e:
            fem_info['note'] = f'Could not fully parse .fem file: {e}'

        return fem_info

    def get_linked_files(self) -> Dict[str, str]:
        """
        Get paths to linked files (.prt, result files, etc.)

        Returns:
            Dictionary mapping file type to path
        """
        linked_files = {}

        # .prt file (geometry and expressions)
        prt_path = self.sim_path.with_suffix('.prt')
        if prt_path.exists():
            linked_files['part_file'] = str(prt_path)

        # Common result file locations
        result_dir = self.sim_path.parent
        sim_name = self.sim_path.stem

        # Nastran result files
        for ext in ['.op2', '.f06', '.f04', '.bdf']:
            result_file = result_dir / f"{sim_name}{ext}"
            if result_file.exists():
                linked_files[f'result{ext}'] = str(result_file)

        return linked_files


def discover_fea_model(sim_file_path: str) -> Dict[str, Any]:
    """
    MCP Tool: Discover FEA Model

    Analyzes a Siemens NX .sim file and extracts:
    - Solutions (analysis types)
    - Expressions (potential design variables)
    - FEM information (mesh, materials, loads)
    - Linked files

    This is the primary tool for LLM-driven optimization setup.

    Args:
        sim_file_path: Absolute path to .sim file (Windows or Unix format)

    Returns:
        Structured dictionary with model information

    Example:
        >>> result = discover_fea_model("C:/Projects/Bracket/analysis.sim")
        >>> print(result['expressions'])
        [{'name': 'wall_thickness', 'value': 5.0, 'units': 'mm'}, ...]
    """
    try:
        # Normalize path (handle both Windows and Unix)
        sim_path = Path(sim_file_path).resolve()

        # Parse the .sim file
        parser = SimFileParser(sim_path)

        # Extract all components
        result = {
            'status': 'success',
            'sim_file': str(sim_path),
            'file_exists': sim_path.exists(),
            'solutions': parser.extract_solutions(),
            'expressions': parser.extract_expressions(),
            'fem_info': parser.extract_fem_info(),
            'linked_files': parser.get_linked_files(),
            'metadata': {
                'parser_version': '0.1.0',
                'nx_version': 'NX 2412',  # Can be extracted from .sim file in future
            }
        }

        # Add summary statistics
        result['summary'] = {
            'solution_count': len(result['solutions']),
            'expression_count': len(result['expressions']),
            'material_count': len(result['fem_info']['materials']),
            'load_count': len(result['fem_info']['loads']),
            'constraint_count': len(result['fem_info']['constraints']),
        }

        return result

    except FileNotFoundError as e:
        return {
            'status': 'error',
            'error_type': 'file_not_found',
            'message': str(e),
            'suggestion': 'Check that the file path is absolute and the .sim file exists'
        }

    except ValueError as e:
        return {
            'status': 'error',
            'error_type': 'invalid_file',
            'message': str(e),
            'suggestion': 'Ensure the file is a valid NX .sim file (not corrupted or encrypted)'
        }

    except Exception as e:
        return {
            'status': 'error',
            'error_type': 'unexpected_error',
            'message': str(e),
            'suggestion': 'This may be an unsupported .sim file format. Please report this issue.'
        }


def format_discovery_result_for_llm(result: Dict[str, Any]) -> str:
    """
    Format discovery result for LLM consumption (Markdown).

    This is used by the MCP server to present results to the LLM
    in a clear, structured format.

    Args:
        result: Output from discover_fea_model()

    Returns:
        Markdown-formatted string
    """
    if result['status'] != 'success':
        return f"❌ **Error**: {result['message']}\n\n💡 {result['suggestion']}"

    md = []
    md.append(f"# FEA Model Analysis\n")
    md.append(f"**File**: `{result['sim_file']}`\n")

    # Solutions
    md.append(f"## Solutions ({result['summary']['solution_count']})\n")
    for sol in result['solutions']:
        md.append(f"- **{sol['name']}** ({sol['type']}) - Solver: {sol['solver']}")
        if sol['description']:
            md.append(f"  - {sol['description']}")
    md.append("")

    # Expressions (Design Variables)
    md.append(f"## Expressions ({result['summary']['expression_count']})\n")
    if result['expressions']:
        md.append("| Name | Value | Units |")
        md.append("|------|-------|-------|")
        for expr in result['expressions']:
            value = expr.get('value', 'N/A')
            units = expr.get('units', '')
            md.append(f"| `{expr['name']}` | {value} | {units} |")
    else:
        md.append("⚠️ No expressions found. Model may not be parametric.")
    md.append("")

    # FEM Information
    fem = result['fem_info']
    md.append(f"## FEM Information\n")

    if fem['mesh']:
        md.append(f"**Mesh**: {fem['mesh'].get('name', 'Unknown')}")
        md.append(f"- Nodes: {fem['mesh'].get('node_count', 'Unknown')}")
        md.append(f"- Elements: {fem['mesh'].get('element_count', 'Unknown')}")
        md.append("")

    if fem['materials']:
        md.append(f"**Materials** ({len(fem['materials'])})")
        for mat in fem['materials']:
            md.append(f"- {mat['name']} ({mat['type']})")
        md.append("")

    if fem['loads']:
        md.append(f"**Loads** ({len(fem['loads'])})")
        for load in fem['loads']:
            md.append(f"- {load['name']} ({load['type']})")
        md.append("")

    if fem['constraints']:
        md.append(f"**Constraints** ({len(fem['constraints'])})")
        for constraint in fem['constraints']:
            md.append(f"- {constraint['name']} ({constraint['type']})")
        md.append("")

    # Linked Files
    if result['linked_files']:
        md.append(f"## Linked Files\n")
        for file_type, file_path in result['linked_files'].items():
            md.append(f"- **{file_type}**: `{file_path}`")
        md.append("")

    return "\n".join(md)


# For testing/debugging
if __name__ == "__main__":
    import sys

    if len(sys.argv) < 2:
        print("Usage: python model_discovery.py <path_to_sim_file>")
        sys.exit(1)

    sim_path = sys.argv[1]
    result = discover_fea_model(sim_path)

    if result['status'] == 'success':
        print(format_discovery_result_for_llm(result))
        print("\n" + "="*60)
        print("JSON Output:")
        print(json.dumps(result, indent=2))
    else:
        print(f"Error: {result['message']}")