feat(config): AtomizerSpec v2.0 Pydantic models, validators, and tests

Config Layer: - spec_models.py: Pydantic models for AtomizerSpec v2.0 - spec_validator.py: Semantic validation with detailed error reporting Extractors: - custom_extractor_loader.py: Runtime custom extractor loading - spec_extractor_builder.py: Build extractors from spec definitions Tools: - migrate_to_spec_v2.py: CLI tool for batch migration Tests: - test_migrator.py: Migration tests - test_spec_manager.py: SpecManager service tests - test_spec_api.py: REST API tests - test_mcp_tools.py: MCP tool tests - test_e2e_unified_config.py: End-to-end config tests
2026-01-20 13:12:03 -05:00
parent 27e78d3d56
commit 6c30224341
10 changed files with 4705 additions and 0 deletions
--- a/optimization_engine/extractors/custom_extractor_loader.py
+++ b/optimization_engine/extractors/custom_extractor_loader.py
@@ -0,0 +1,541 @@
+"""
+Custom Extractor Loader
+
+Dynamically loads and executes custom Python extractors defined in AtomizerSpec v2.0.
+Provides sandboxed execution with access to FEA results and common analysis libraries.
+
+P3.9: Custom extractor runtime loader
+"""
+
+import ast
+import hashlib
+import importlib
+import logging
+import re
+import sys
+import traceback
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+
+# Lazy imports for optional dependencies
+_PYOP2 = None
+_SCIPY = None
+
+logger = logging.getLogger(__name__)
+
+
+# ============================================================================
+# Allowed modules for custom extractors (sandboxed environment)
+# ============================================================================
+
+ALLOWED_MODULES = {
+    # Core Python
+    "math",
+    "statistics",
+    "collections",
+    "itertools",
+    "functools",
+    # Scientific computing
+    "numpy",
+    "scipy",
+    "scipy.interpolate",
+    "scipy.optimize",
+    "scipy.integrate",
+    "scipy.linalg",
+    # FEA result parsing
+    "pyNastran",
+    "pyNastran.op2",
+    "pyNastran.op2.op2",
+    "pyNastran.bdf",
+    "pyNastran.bdf.bdf",
+    # Atomizer extractors
+    "optimization_engine.extractors",
+}
+
+BLOCKED_MODULES = {
+    "os",
+    "subprocess",
+    "shutil",
+    "sys",
+    "builtins",
+    "__builtins__",
+    "importlib",
+    "eval",
+    "exec",
+    "compile",
+    "open",
+    "file",
+    "socket",
+    "requests",
+    "urllib",
+    "http",
+}
+
+
+# ============================================================================
+# Code Validation
+# ============================================================================
+
+class ExtractorSecurityError(Exception):
+    """Raised when custom extractor code contains disallowed patterns."""
+    pass
+
+
+class ExtractorValidationError(Exception):
+    """Raised when custom extractor code is invalid."""
+    pass
+
+
+def validate_extractor_code(code: str, function_name: str) -> Tuple[bool, List[str]]:
+    """
+    Validate custom extractor code for security and correctness.
+
+    Args:
+        code: Python source code string
+        function_name: Expected function name to find in code
+
+    Returns:
+        Tuple of (is_valid, list of error messages)
+
+    Raises:
+        ExtractorSecurityError: If dangerous patterns detected
+    """
+    errors = []
+
+    # Check for syntax errors first
+    try:
+        tree = ast.parse(code)
+    except SyntaxError as e:
+        return False, [f"Syntax error: {e}"]
+
+    # Check for disallowed patterns
+    dangerous_patterns = [
+        (r'\bexec\s*\(', 'exec() is not allowed'),
+        (r'\beval\s*\(', 'eval() is not allowed'),
+        (r'\bcompile\s*\(', 'compile() is not allowed'),
+        (r'\b__import__\s*\(', '__import__() is not allowed'),
+        (r'\bopen\s*\(', 'open() is not allowed - use op2_path parameter'),
+        (r'\bos\.(system|popen|spawn|exec)', 'os.system/popen/spawn/exec is not allowed'),
+        (r'\bsubprocess\.', 'subprocess module is not allowed'),
+        (r'\bshutil\.', 'shutil module is not allowed'),
+        (r'import\s+os\b', 'import os is not allowed'),
+        (r'from\s+os\b', 'from os import is not allowed'),
+        (r'import\s+subprocess', 'import subprocess is not allowed'),
+        (r'import\s+sys\b', 'import sys is not allowed'),
+    ]
+
+    for pattern, message in dangerous_patterns:
+        if re.search(pattern, code):
+            raise ExtractorSecurityError(message)
+
+    # Check that the expected function exists
+    function_found = False
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef) and node.name == function_name:
+            function_found = True
+
+            # Check function signature
+            args = node.args
+            arg_names = [arg.arg for arg in args.args]
+
+            # Must have op2_path as first argument (or op2_result/results)
+            valid_first_args = {'op2_path', 'op2_result', 'results', 'data'}
+            if not arg_names or arg_names[0] not in valid_first_args:
+                errors.append(
+                    f"Function {function_name} must have first argument from: "
+                    f"{valid_first_args}, got: {arg_names[0] if arg_names else 'none'}"
+                )
+            break
+
+    if not function_found:
+        errors.append(f"Function '{function_name}' not found in code")
+
+    # Check imports
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                module = alias.name.split('.')[0]
+                if module in BLOCKED_MODULES:
+                    errors.append(f"Import of '{alias.name}' is not allowed")
+        elif isinstance(node, ast.ImportFrom):
+            if node.module:
+                module = node.module.split('.')[0]
+                if module in BLOCKED_MODULES:
+                    errors.append(f"Import from '{node.module}' is not allowed")
+
+    return len(errors) == 0, errors
+
+
+# ============================================================================
+# Extractor Compilation and Execution
+# ============================================================================
+
+class CustomExtractorContext:
+    """
+    Execution context for custom extractors.
+    Provides safe access to FEA results and common utilities.
+    """
+
+    def __init__(self, op2_path: Optional[Path] = None,
+                 bdf_path: Optional[Path] = None,
+                 working_dir: Optional[Path] = None,
+                 params: Optional[Dict[str, float]] = None):
+        """
+        Initialize extractor context.
+
+        Args:
+            op2_path: Path to OP2 results file
+            bdf_path: Path to BDF model file
+            working_dir: Working directory for the trial
+            params: Current design parameters
+        """
+        self.op2_path = Path(op2_path) if op2_path else None
+        self.bdf_path = Path(bdf_path) if bdf_path else None
+        self.working_dir = Path(working_dir) if working_dir else None
+        self.params = params or {}
+
+        # Lazy-loaded results
+        self._op2_result = None
+        self._bdf_model = None
+
+    @property
+    def op2_result(self):
+        """Lazy-load OP2 results."""
+        if self._op2_result is None and self.op2_path and self.op2_path.exists():
+            global _PYOP2
+            if _PYOP2 is None:
+                from pyNastran.op2.op2 import OP2
+                _PYOP2 = OP2
+            self._op2_result = _PYOP2(str(self.op2_path), debug=False)
+        return self._op2_result
+
+    @property
+    def bdf_model(self):
+        """Lazy-load BDF model."""
+        if self._bdf_model is None and self.bdf_path and self.bdf_path.exists():
+            from pyNastran.bdf.bdf import BDF
+            self._bdf_model = BDF(debug=False)
+            self._bdf_model.read_bdf(str(self.bdf_path))
+        return self._bdf_model
+
+
+class CustomExtractor:
+    """
+    Compiled custom extractor ready for execution.
+    """
+
+    def __init__(self, extractor_id: str, name: str, function_name: str,
+                 code: str, outputs: List[Dict[str, Any]], dependencies: List[str] = None):
+        """
+        Initialize custom extractor.
+
+        Args:
+            extractor_id: Unique extractor ID
+            name: Human-readable name
+            function_name: Name of the extraction function
+            code: Python source code
+            outputs: List of output definitions
+            dependencies: Optional list of required pip packages
+        """
+        self.extractor_id = extractor_id
+        self.name = name
+        self.function_name = function_name
+        self.code = code
+        self.outputs = outputs
+        self.dependencies = dependencies or []
+
+        # Compiled function
+        self._compiled_func: Optional[Callable] = None
+        self._code_hash: Optional[str] = None
+
+    def compile(self) -> None:
+        """
+        Compile the extractor code and extract the function.
+
+        Raises:
+            ExtractorValidationError: If code is invalid
+            ExtractorSecurityError: If code contains dangerous patterns
+        """
+        # Validate code
+        is_valid, errors = validate_extractor_code(self.code, self.function_name)
+        if not is_valid:
+            raise ExtractorValidationError(f"Validation failed: {'; '.join(errors)}")
+
+        # Compute code hash for caching
+        self._code_hash = hashlib.sha256(self.code.encode()).hexdigest()[:12]
+
+        # Create execution namespace with allowed imports
+        namespace = {
+            'np': np,
+            'numpy': np,
+            'math': __import__('math'),
+            'statistics': __import__('statistics'),
+            'collections': __import__('collections'),
+            'itertools': __import__('itertools'),
+            'functools': __import__('functools'),
+        }
+
+        # Add scipy if available
+        try:
+            import scipy
+            namespace['scipy'] = scipy
+            from scipy import interpolate, optimize, integrate, linalg
+            namespace['interpolate'] = interpolate
+            namespace['optimize'] = optimize
+            namespace['integrate'] = integrate
+            namespace['linalg'] = linalg
+        except ImportError:
+            pass
+
+        # Add pyNastran if available
+        try:
+            from pyNastran.op2.op2 import OP2
+            from pyNastran.bdf.bdf import BDF
+            namespace['OP2'] = OP2
+            namespace['BDF'] = BDF
+        except ImportError:
+            pass
+
+        # Add Atomizer extractors
+        try:
+            from optimization_engine import extractors
+            namespace['extractors'] = extractors
+        except ImportError:
+            pass
+
+        # Execute the code to define the function
+        try:
+            exec(self.code, namespace)
+        except Exception as e:
+            raise ExtractorValidationError(f"Failed to compile: {e}")
+
+        # Extract the function
+        if self.function_name not in namespace:
+            raise ExtractorValidationError(f"Function '{self.function_name}' not defined")
+
+        self._compiled_func = namespace[self.function_name]
+        logger.info(f"Compiled custom extractor: {self.name} ({self._code_hash})")
+
+    def execute(self, context: CustomExtractorContext) -> Dict[str, float]:
+        """
+        Execute the extractor and return results.
+
+        Args:
+            context: Execution context with FEA results
+
+        Returns:
+            Dictionary of output_name -> value
+
+        Raises:
+            RuntimeError: If execution fails
+        """
+        if self._compiled_func is None:
+            self.compile()
+
+        try:
+            # Call the function with appropriate arguments
+            result = self._compiled_func(
+                op2_path=str(context.op2_path) if context.op2_path else None,
+                bdf_path=str(context.bdf_path) if context.bdf_path else None,
+                params=context.params,
+                working_dir=str(context.working_dir) if context.working_dir else None,
+            )
+
+            # Normalize result to dict
+            if isinstance(result, dict):
+                return result
+            elif isinstance(result, (int, float)):
+                # Single value - use first output name
+                if self.outputs:
+                    return {self.outputs[0]['name']: float(result)}
+                return {'value': float(result)}
+            elif isinstance(result, (list, tuple)):
+                # Multiple values - map to output names
+                output_dict = {}
+                for i, val in enumerate(result):
+                    if i < len(self.outputs):
+                        output_dict[self.outputs[i]['name']] = float(val)
+                    else:
+                        output_dict[f'output_{i}'] = float(val)
+                return output_dict
+            else:
+                raise RuntimeError(f"Unexpected result type: {type(result)}")
+
+        except Exception as e:
+            logger.error(f"Custom extractor {self.name} failed: {e}")
+            logger.debug(traceback.format_exc())
+            raise RuntimeError(f"Extractor {self.name} failed: {e}")
+
+
+# ============================================================================
+# Extractor Loader
+# ============================================================================
+
+class CustomExtractorLoader:
+    """
+    Loads and manages custom extractors from AtomizerSpec.
+    """
+
+    def __init__(self):
+        """Initialize loader with empty cache."""
+        self._cache: Dict[str, CustomExtractor] = {}
+
+    def load_from_spec(self, spec: Dict[str, Any]) -> Dict[str, CustomExtractor]:
+        """
+        Load all custom extractors from an AtomizerSpec.
+
+        Args:
+            spec: AtomizerSpec dictionary
+
+        Returns:
+            Dictionary of extractor_id -> CustomExtractor
+        """
+        extractors = {}
+
+        for ext_def in spec.get('extractors', []):
+            # Skip builtin extractors
+            if ext_def.get('builtin', True):
+                continue
+
+            # Custom extractor must have function definition
+            func_def = ext_def.get('function', {})
+            if not func_def.get('source'):
+                logger.warning(f"Custom extractor {ext_def.get('id')} has no source code")
+                continue
+
+            extractor = CustomExtractor(
+                extractor_id=ext_def.get('id', 'custom'),
+                name=ext_def.get('name', 'Custom Extractor'),
+                function_name=func_def.get('name', 'extract'),
+                code=func_def.get('source', ''),
+                outputs=ext_def.get('outputs', []),
+                dependencies=func_def.get('dependencies', []),
+            )
+
+            try:
+                extractor.compile()
+                extractors[extractor.extractor_id] = extractor
+                self._cache[extractor.extractor_id] = extractor
+            except (ExtractorValidationError, ExtractorSecurityError) as e:
+                logger.error(f"Failed to load extractor {extractor.name}: {e}")
+
+        return extractors
+
+    def get(self, extractor_id: str) -> Optional[CustomExtractor]:
+        """Get a cached extractor by ID."""
+        return self._cache.get(extractor_id)
+
+    def execute_all(self, extractors: Dict[str, CustomExtractor],
+                    context: CustomExtractorContext) -> Dict[str, Dict[str, float]]:
+        """
+        Execute all custom extractors and collect results.
+
+        Args:
+            extractors: Dictionary of extractor_id -> CustomExtractor
+            context: Execution context
+
+        Returns:
+            Dictionary of extractor_id -> {output_name: value}
+        """
+        results = {}
+
+        for ext_id, extractor in extractors.items():
+            try:
+                results[ext_id] = extractor.execute(context)
+            except Exception as e:
+                logger.error(f"Extractor {ext_id} failed: {e}")
+                # Return NaN for failed extractors
+                results[ext_id] = {
+                    out['name']: float('nan')
+                    for out in extractor.outputs
+                }
+
+        return results
+
+    def clear_cache(self) -> None:
+        """Clear the extractor cache."""
+        self._cache.clear()
+
+
+# ============================================================================
+# Convenience Functions
+# ============================================================================
+
+# Global loader instance
+_loader = CustomExtractorLoader()
+
+
+def load_custom_extractors(spec: Dict[str, Any]) -> Dict[str, CustomExtractor]:
+    """
+    Load custom extractors from an AtomizerSpec.
+
+    Args:
+        spec: AtomizerSpec dictionary
+
+    Returns:
+        Dictionary of extractor_id -> CustomExtractor
+    """
+    return _loader.load_from_spec(spec)
+
+
+def execute_custom_extractor(extractor_id: str,
+                              op2_path: Union[str, Path],
+                              bdf_path: Optional[Union[str, Path]] = None,
+                              working_dir: Optional[Union[str, Path]] = None,
+                              params: Optional[Dict[str, float]] = None) -> Dict[str, float]:
+    """
+    Execute a single cached custom extractor.
+
+    Args:
+        extractor_id: ID of the extractor to run
+        op2_path: Path to OP2 results file
+        bdf_path: Optional path to BDF file
+        working_dir: Optional working directory
+        params: Optional design parameters
+
+    Returns:
+        Dictionary of output_name -> value
+
+    Raises:
+        KeyError: If extractor not found in cache
+    """
+    extractor = _loader.get(extractor_id)
+    if extractor is None:
+        raise KeyError(f"Extractor '{extractor_id}' not found in cache")
+
+    context = CustomExtractorContext(
+        op2_path=op2_path,
+        bdf_path=bdf_path,
+        working_dir=working_dir,
+        params=params
+    )
+
+    return extractor.execute(context)
+
+
+def validate_custom_extractor(code: str, function_name: str = "extract") -> Tuple[bool, List[str]]:
+    """
+    Validate custom extractor code without executing it.
+
+    Args:
+        code: Python source code
+        function_name: Expected function name
+
+    Returns:
+        Tuple of (is_valid, list of error/warning messages)
+    """
+    return validate_extractor_code(code, function_name)
+
+
+__all__ = [
+    'CustomExtractor',
+    'CustomExtractorLoader',
+    'CustomExtractorContext',
+    'ExtractorSecurityError',
+    'ExtractorValidationError',
+    'load_custom_extractors',
+    'execute_custom_extractor',
+    'validate_custom_extractor',
+]