feat(config): AtomizerSpec v2.0 Pydantic models, validators, and tests

Config Layer: - spec_models.py: Pydantic models for AtomizerSpec v2.0 - spec_validator.py: Semantic validation with detailed error reporting Extractors: - custom_extractor_loader.py: Runtime custom extractor loading - spec_extractor_builder.py: Build extractors from spec definitions Tools: - migrate_to_spec_v2.py: CLI tool for batch migration Tests: - test_migrator.py: Migration tests - test_spec_manager.py: SpecManager service tests - test_spec_api.py: REST API tests - test_mcp_tools.py: MCP tool tests - test_e2e_unified_config.py: End-to-end config tests
2026-01-20 13:12:03 -05:00
parent 27e78d3d56
commit 6c30224341
10 changed files with 4705 additions and 0 deletions
--- a/optimization_engine/extractors/custom_extractor_loader.py
+++ b/optimization_engine/extractors/custom_extractor_loader.py
@@ -0,0 +1,541 @@
+"""
+Custom Extractor Loader
+
+Dynamically loads and executes custom Python extractors defined in AtomizerSpec v2.0.
+Provides sandboxed execution with access to FEA results and common analysis libraries.
+
+P3.9: Custom extractor runtime loader
+"""
+
+import ast
+import hashlib
+import importlib
+import logging
+import re
+import sys
+import traceback
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+
+# Lazy imports for optional dependencies
+_PYOP2 = None
+_SCIPY = None
+
+logger = logging.getLogger(__name__)
+
+
+# ============================================================================
+# Allowed modules for custom extractors (sandboxed environment)
+# ============================================================================
+
+ALLOWED_MODULES = {
+    # Core Python
+    "math",
+    "statistics",
+    "collections",
+    "itertools",
+    "functools",
+    # Scientific computing
+    "numpy",
+    "scipy",
+    "scipy.interpolate",
+    "scipy.optimize",
+    "scipy.integrate",
+    "scipy.linalg",
+    # FEA result parsing
+    "pyNastran",
+    "pyNastran.op2",
+    "pyNastran.op2.op2",
+    "pyNastran.bdf",
+    "pyNastran.bdf.bdf",
+    # Atomizer extractors
+    "optimization_engine.extractors",
+}
+
+BLOCKED_MODULES = {
+    "os",
+    "subprocess",
+    "shutil",
+    "sys",
+    "builtins",
+    "__builtins__",
+    "importlib",
+    "eval",
+    "exec",
+    "compile",
+    "open",
+    "file",
+    "socket",
+    "requests",
+    "urllib",
+    "http",
+}
+
+
+# ============================================================================
+# Code Validation
+# ============================================================================
+
+class ExtractorSecurityError(Exception):
+    """Raised when custom extractor code contains disallowed patterns."""
+    pass
+
+
+class ExtractorValidationError(Exception):
+    """Raised when custom extractor code is invalid."""
+    pass
+
+
+def validate_extractor_code(code: str, function_name: str) -> Tuple[bool, List[str]]:
+    """
+    Validate custom extractor code for security and correctness.
+
+    Args:
+        code: Python source code string
+        function_name: Expected function name to find in code
+
+    Returns:
+        Tuple of (is_valid, list of error messages)
+
+    Raises:
+        ExtractorSecurityError: If dangerous patterns detected
+    """
+    errors = []
+
+    # Check for syntax errors first
+    try:
+        tree = ast.parse(code)
+    except SyntaxError as e:
+        return False, [f"Syntax error: {e}"]
+
+    # Check for disallowed patterns
+    dangerous_patterns = [
+        (r'\bexec\s*\(', 'exec() is not allowed'),
+        (r'\beval\s*\(', 'eval() is not allowed'),
+        (r'\bcompile\s*\(', 'compile() is not allowed'),
+        (r'\b__import__\s*\(', '__import__() is not allowed'),
+        (r'\bopen\s*\(', 'open() is not allowed - use op2_path parameter'),
+        (r'\bos\.(system|popen|spawn|exec)', 'os.system/popen/spawn/exec is not allowed'),
+        (r'\bsubprocess\.', 'subprocess module is not allowed'),
+        (r'\bshutil\.', 'shutil module is not allowed'),
+        (r'import\s+os\b', 'import os is not allowed'),
+        (r'from\s+os\b', 'from os import is not allowed'),
+        (r'import\s+subprocess', 'import subprocess is not allowed'),
+        (r'import\s+sys\b', 'import sys is not allowed'),
+    ]
+
+    for pattern, message in dangerous_patterns:
+        if re.search(pattern, code):
+            raise ExtractorSecurityError(message)
+
+    # Check that the expected function exists
+    function_found = False
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef) and node.name == function_name:
+            function_found = True
+
+            # Check function signature
+            args = node.args
+            arg_names = [arg.arg for arg in args.args]
+
+            # Must have op2_path as first argument (or op2_result/results)
+            valid_first_args = {'op2_path', 'op2_result', 'results', 'data'}
+            if not arg_names or arg_names[0] not in valid_first_args:
+                errors.append(
+                    f"Function {function_name} must have first argument from: "
+                    f"{valid_first_args}, got: {arg_names[0] if arg_names else 'none'}"
+                )
+            break
+
+    if not function_found:
+        errors.append(f"Function '{function_name}' not found in code")
+
+    # Check imports
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                module = alias.name.split('.')[0]
+                if module in BLOCKED_MODULES:
+                    errors.append(f"Import of '{alias.name}' is not allowed")
+        elif isinstance(node, ast.ImportFrom):
+            if node.module:
+                module = node.module.split('.')[0]
+                if module in BLOCKED_MODULES:
+                    errors.append(f"Import from '{node.module}' is not allowed")
+
+    return len(errors) == 0, errors
+
+
+# ============================================================================
+# Extractor Compilation and Execution
+# ============================================================================
+
+class CustomExtractorContext:
+    """
+    Execution context for custom extractors.
+    Provides safe access to FEA results and common utilities.
+    """
+
+    def __init__(self, op2_path: Optional[Path] = None,
+                 bdf_path: Optional[Path] = None,
+                 working_dir: Optional[Path] = None,
+                 params: Optional[Dict[str, float]] = None):
+        """
+        Initialize extractor context.
+
+        Args:
+            op2_path: Path to OP2 results file
+            bdf_path: Path to BDF model file
+            working_dir: Working directory for the trial
+            params: Current design parameters
+        """
+        self.op2_path = Path(op2_path) if op2_path else None
+        self.bdf_path = Path(bdf_path) if bdf_path else None
+        self.working_dir = Path(working_dir) if working_dir else None
+        self.params = params or {}
+
+        # Lazy-loaded results
+        self._op2_result = None
+        self._bdf_model = None
+
+    @property
+    def op2_result(self):
+        """Lazy-load OP2 results."""
+        if self._op2_result is None and self.op2_path and self.op2_path.exists():
+            global _PYOP2
+            if _PYOP2 is None:
+                from pyNastran.op2.op2 import OP2
+                _PYOP2 = OP2
+            self._op2_result = _PYOP2(str(self.op2_path), debug=False)
+        return self._op2_result
+
+    @property
+    def bdf_model(self):
+        """Lazy-load BDF model."""
+        if self._bdf_model is None and self.bdf_path and self.bdf_path.exists():
+            from pyNastran.bdf.bdf import BDF
+            self._bdf_model = BDF(debug=False)
+            self._bdf_model.read_bdf(str(self.bdf_path))
+        return self._bdf_model
+
+
+class CustomExtractor:
+    """
+    Compiled custom extractor ready for execution.
+    """
+
+    def __init__(self, extractor_id: str, name: str, function_name: str,
+                 code: str, outputs: List[Dict[str, Any]], dependencies: List[str] = None):
+        """
+        Initialize custom extractor.
+
+        Args:
+            extractor_id: Unique extractor ID
+            name: Human-readable name
+            function_name: Name of the extraction function
+            code: Python source code
+            outputs: List of output definitions
+            dependencies: Optional list of required pip packages
+        """
+        self.extractor_id = extractor_id
+        self.name = name
+        self.function_name = function_name
+        self.code = code
+        self.outputs = outputs
+        self.dependencies = dependencies or []
+
+        # Compiled function
+        self._compiled_func: Optional[Callable] = None
+        self._code_hash: Optional[str] = None
+
+    def compile(self) -> None:
+        """
+        Compile the extractor code and extract the function.
+
+        Raises:
+            ExtractorValidationError: If code is invalid
+            ExtractorSecurityError: If code contains dangerous patterns
+        """
+        # Validate code
+        is_valid, errors = validate_extractor_code(self.code, self.function_name)
+        if not is_valid:
+            raise ExtractorValidationError(f"Validation failed: {'; '.join(errors)}")
+
+        # Compute code hash for caching
+        self._code_hash = hashlib.sha256(self.code.encode()).hexdigest()[:12]
+
+        # Create execution namespace with allowed imports
+        namespace = {
+            'np': np,
+            'numpy': np,
+            'math': __import__('math'),
+            'statistics': __import__('statistics'),
+            'collections': __import__('collections'),
+            'itertools': __import__('itertools'),
+            'functools': __import__('functools'),
+        }
+
+        # Add scipy if available
+        try:
+            import scipy
+            namespace['scipy'] = scipy
+            from scipy import interpolate, optimize, integrate, linalg
+            namespace['interpolate'] = interpolate
+            namespace['optimize'] = optimize
+            namespace['integrate'] = integrate
+            namespace['linalg'] = linalg
+        except ImportError:
+            pass
+
+        # Add pyNastran if available
+        try:
+            from pyNastran.op2.op2 import OP2
+            from pyNastran.bdf.bdf import BDF
+            namespace['OP2'] = OP2
+            namespace['BDF'] = BDF
+        except ImportError:
+            pass
+
+        # Add Atomizer extractors
+        try:
+            from optimization_engine import extractors
+            namespace['extractors'] = extractors
+        except ImportError:
+            pass
+
+        # Execute the code to define the function
+        try:
+            exec(self.code, namespace)
+        except Exception as e:
+            raise ExtractorValidationError(f"Failed to compile: {e}")
+
+        # Extract the function
+        if self.function_name not in namespace:
+            raise ExtractorValidationError(f"Function '{self.function_name}' not defined")
+
+        self._compiled_func = namespace[self.function_name]
+        logger.info(f"Compiled custom extractor: {self.name} ({self._code_hash})")
+
+    def execute(self, context: CustomExtractorContext) -> Dict[str, float]:
+        """
+        Execute the extractor and return results.
+
+        Args:
+            context: Execution context with FEA results
+
+        Returns:
+            Dictionary of output_name -> value
+
+        Raises:
+            RuntimeError: If execution fails
+        """
+        if self._compiled_func is None:
+            self.compile()
+
+        try:
+            # Call the function with appropriate arguments
+            result = self._compiled_func(
+                op2_path=str(context.op2_path) if context.op2_path else None,
+                bdf_path=str(context.bdf_path) if context.bdf_path else None,
+                params=context.params,
+                working_dir=str(context.working_dir) if context.working_dir else None,
+            )
+
+            # Normalize result to dict
+            if isinstance(result, dict):
+                return result
+            elif isinstance(result, (int, float)):
+                # Single value - use first output name
+                if self.outputs:
+                    return {self.outputs[0]['name']: float(result)}
+                return {'value': float(result)}
+            elif isinstance(result, (list, tuple)):
+                # Multiple values - map to output names
+                output_dict = {}
+                for i, val in enumerate(result):
+                    if i < len(self.outputs):
+                        output_dict[self.outputs[i]['name']] = float(val)
+                    else:
+                        output_dict[f'output_{i}'] = float(val)
+                return output_dict
+            else:
+                raise RuntimeError(f"Unexpected result type: {type(result)}")
+
+        except Exception as e:
+            logger.error(f"Custom extractor {self.name} failed: {e}")
+            logger.debug(traceback.format_exc())
+            raise RuntimeError(f"Extractor {self.name} failed: {e}")
+
+
+# ============================================================================
+# Extractor Loader
+# ============================================================================
+
+class CustomExtractorLoader:
+    """
+    Loads and manages custom extractors from AtomizerSpec.
+    """
+
+    def __init__(self):
+        """Initialize loader with empty cache."""
+        self._cache: Dict[str, CustomExtractor] = {}
+
+    def load_from_spec(self, spec: Dict[str, Any]) -> Dict[str, CustomExtractor]:
+        """
+        Load all custom extractors from an AtomizerSpec.
+
+        Args:
+            spec: AtomizerSpec dictionary
+
+        Returns:
+            Dictionary of extractor_id -> CustomExtractor
+        """
+        extractors = {}
+
+        for ext_def in spec.get('extractors', []):
+            # Skip builtin extractors
+            if ext_def.get('builtin', True):
+                continue
+
+            # Custom extractor must have function definition
+            func_def = ext_def.get('function', {})
+            if not func_def.get('source'):
+                logger.warning(f"Custom extractor {ext_def.get('id')} has no source code")
+                continue
+
+            extractor = CustomExtractor(
+                extractor_id=ext_def.get('id', 'custom'),
+                name=ext_def.get('name', 'Custom Extractor'),
+                function_name=func_def.get('name', 'extract'),
+                code=func_def.get('source', ''),
+                outputs=ext_def.get('outputs', []),
+                dependencies=func_def.get('dependencies', []),
+            )
+
+            try:
+                extractor.compile()
+                extractors[extractor.extractor_id] = extractor
+                self._cache[extractor.extractor_id] = extractor
+            except (ExtractorValidationError, ExtractorSecurityError) as e:
+                logger.error(f"Failed to load extractor {extractor.name}: {e}")
+
+        return extractors
+
+    def get(self, extractor_id: str) -> Optional[CustomExtractor]:
+        """Get a cached extractor by ID."""
+        return self._cache.get(extractor_id)
+
+    def execute_all(self, extractors: Dict[str, CustomExtractor],
+                    context: CustomExtractorContext) -> Dict[str, Dict[str, float]]:
+        """
+        Execute all custom extractors and collect results.
+
+        Args:
+            extractors: Dictionary of extractor_id -> CustomExtractor
+            context: Execution context
+
+        Returns:
+            Dictionary of extractor_id -> {output_name: value}
+        """
+        results = {}
+
+        for ext_id, extractor in extractors.items():
+            try:
+                results[ext_id] = extractor.execute(context)
+            except Exception as e:
+                logger.error(f"Extractor {ext_id} failed: {e}")
+                # Return NaN for failed extractors
+                results[ext_id] = {
+                    out['name']: float('nan')
+                    for out in extractor.outputs
+                }
+
+        return results
+
+    def clear_cache(self) -> None:
+        """Clear the extractor cache."""
+        self._cache.clear()
+
+
+# ============================================================================
+# Convenience Functions
+# ============================================================================
+
+# Global loader instance
+_loader = CustomExtractorLoader()
+
+
+def load_custom_extractors(spec: Dict[str, Any]) -> Dict[str, CustomExtractor]:
+    """
+    Load custom extractors from an AtomizerSpec.
+
+    Args:
+        spec: AtomizerSpec dictionary
+
+    Returns:
+        Dictionary of extractor_id -> CustomExtractor
+    """
+    return _loader.load_from_spec(spec)
+
+
+def execute_custom_extractor(extractor_id: str,
+                              op2_path: Union[str, Path],
+                              bdf_path: Optional[Union[str, Path]] = None,
+                              working_dir: Optional[Union[str, Path]] = None,
+                              params: Optional[Dict[str, float]] = None) -> Dict[str, float]:
+    """
+    Execute a single cached custom extractor.
+
+    Args:
+        extractor_id: ID of the extractor to run
+        op2_path: Path to OP2 results file
+        bdf_path: Optional path to BDF file
+        working_dir: Optional working directory
+        params: Optional design parameters
+
+    Returns:
+        Dictionary of output_name -> value
+
+    Raises:
+        KeyError: If extractor not found in cache
+    """
+    extractor = _loader.get(extractor_id)
+    if extractor is None:
+        raise KeyError(f"Extractor '{extractor_id}' not found in cache")
+
+    context = CustomExtractorContext(
+        op2_path=op2_path,
+        bdf_path=bdf_path,
+        working_dir=working_dir,
+        params=params
+    )
+
+    return extractor.execute(context)
+
+
+def validate_custom_extractor(code: str, function_name: str = "extract") -> Tuple[bool, List[str]]:
+    """
+    Validate custom extractor code without executing it.
+
+    Args:
+        code: Python source code
+        function_name: Expected function name
+
+    Returns:
+        Tuple of (is_valid, list of error/warning messages)
+    """
+    return validate_extractor_code(code, function_name)
+
+
+__all__ = [
+    'CustomExtractor',
+    'CustomExtractorLoader',
+    'CustomExtractorContext',
+    'ExtractorSecurityError',
+    'ExtractorValidationError',
+    'load_custom_extractors',
+    'execute_custom_extractor',
+    'validate_custom_extractor',
+]
--- a/optimization_engine/extractors/spec_extractor_builder.py
+++ b/optimization_engine/extractors/spec_extractor_builder.py
@@ -0,0 +1,328 @@
+"""
+Spec Extractor Builder
+
+Builds result extractors from AtomizerSpec v2.0 configuration.
+Combines builtin extractors with custom Python extractors.
+
+P3.10: Integration with optimization runner
+"""
+
+import json
+import logging
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Union
+
+from optimization_engine.extractors.custom_extractor_loader import (
+    CustomExtractor,
+    CustomExtractorContext,
+    CustomExtractorLoader,
+    load_custom_extractors,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ============================================================================
+# Builtin Extractor Registry
+# ============================================================================
+
+# Map of builtin extractor types to their extraction functions
+BUILTIN_EXTRACTORS = {}
+
+
+def _register_builtin_extractors():
+    """Lazily register builtin extractors to avoid circular imports."""
+    global BUILTIN_EXTRACTORS
+    if BUILTIN_EXTRACTORS:
+        return
+
+    try:
+        # Zernike OPD (recommended for mirrors)
+        from optimization_engine.extractors.extract_zernike_figure import (
+            ZernikeOPDExtractor,
+        )
+        BUILTIN_EXTRACTORS['zernike_opd'] = ZernikeOPDExtractor
+
+        # Mass extractors
+        from optimization_engine.extractors.bdf_mass_extractor import extract_mass_from_bdf
+        BUILTIN_EXTRACTORS['mass'] = extract_mass_from_bdf
+
+        from optimization_engine.extractors.extract_mass_from_expression import (
+            extract_mass_from_expression,
+        )
+        BUILTIN_EXTRACTORS['mass_expression'] = extract_mass_from_expression
+
+        # Displacement
+        from optimization_engine.extractors.extract_displacement import extract_displacement
+        BUILTIN_EXTRACTORS['displacement'] = extract_displacement
+
+        # Stress
+        from optimization_engine.extractors.extract_von_mises_stress import extract_solid_stress
+        BUILTIN_EXTRACTORS['stress'] = extract_solid_stress
+
+        from optimization_engine.extractors.extract_principal_stress import (
+            extract_principal_stress,
+        )
+        BUILTIN_EXTRACTORS['principal_stress'] = extract_principal_stress
+
+        # Frequency
+        from optimization_engine.extractors.extract_frequency import extract_frequency
+        BUILTIN_EXTRACTORS['frequency'] = extract_frequency
+
+        # Temperature
+        from optimization_engine.extractors.extract_temperature import extract_temperature
+        BUILTIN_EXTRACTORS['temperature'] = extract_temperature
+
+        # Strain energy
+        from optimization_engine.extractors.extract_strain_energy import (
+            extract_strain_energy,
+            extract_total_strain_energy,
+        )
+        BUILTIN_EXTRACTORS['strain_energy'] = extract_strain_energy
+        BUILTIN_EXTRACTORS['total_strain_energy'] = extract_total_strain_energy
+
+        # SPC forces
+        from optimization_engine.extractors.extract_spc_forces import (
+            extract_spc_forces,
+            extract_total_reaction_force,
+        )
+        BUILTIN_EXTRACTORS['spc_forces'] = extract_spc_forces
+        BUILTIN_EXTRACTORS['reaction_force'] = extract_total_reaction_force
+
+        logger.debug(f"Registered {len(BUILTIN_EXTRACTORS)} builtin extractors")
+
+    except ImportError as e:
+        logger.warning(f"Some builtin extractors unavailable: {e}")
+
+
+# ============================================================================
+# Spec Extractor Builder
+# ============================================================================
+
+class SpecExtractorBuilder:
+    """
+    Builds extraction functions from AtomizerSpec extractor definitions.
+    """
+
+    def __init__(self, spec: Dict[str, Any]):
+        """
+        Initialize builder with an AtomizerSpec.
+
+        Args:
+            spec: AtomizerSpec dictionary
+        """
+        self.spec = spec
+        self.custom_loader = CustomExtractorLoader()
+        self._extractors: Dict[str, Callable] = {}
+        self._custom_extractors: Dict[str, CustomExtractor] = {}
+
+        # Register builtin extractors
+        _register_builtin_extractors()
+
+    def build(self) -> Dict[str, Callable]:
+        """
+        Build all extractors from the spec.
+
+        Returns:
+            Dictionary of extractor_id -> extraction_function
+        """
+        for ext_def in self.spec.get('extractors', []):
+            ext_id = ext_def.get('id', 'unknown')
+
+            if ext_def.get('builtin', True):
+                # Builtin extractor
+                extractor_func = self._build_builtin_extractor(ext_def)
+            else:
+                # Custom extractor
+                extractor_func = self._build_custom_extractor(ext_def)
+
+            if extractor_func:
+                self._extractors[ext_id] = extractor_func
+            else:
+                logger.warning(f"Failed to build extractor: {ext_id}")
+
+        return self._extractors
+
+    def _build_builtin_extractor(self, ext_def: Dict[str, Any]) -> Optional[Callable]:
+        """
+        Build a builtin extractor function.
+
+        Args:
+            ext_def: Extractor definition from spec
+
+        Returns:
+            Callable extraction function or None
+        """
+        ext_type = ext_def.get('type', '')
+        ext_id = ext_def.get('id', '')
+        config = ext_def.get('config', {})
+        outputs = ext_def.get('outputs', [])
+
+        # Get base extractor
+        base_extractor = BUILTIN_EXTRACTORS.get(ext_type)
+        if base_extractor is None:
+            logger.warning(f"Unknown builtin extractor type: {ext_type}")
+            return None
+
+        # Create configured wrapper
+        def create_extractor_wrapper(base, cfg, outs):
+            """Create a wrapper that applies config and extracts specified outputs."""
+            def wrapper(op2_path: str, **kwargs) -> Dict[str, float]:
+                """Execute extractor and return outputs dict."""
+                try:
+                    # Handle class-based extractors (like ZernikeOPDExtractor)
+                    if isinstance(base, type):
+                        # Instantiate with config
+                        instance = base(
+                            inner_radius=cfg.get('inner_radius_mm', 0),
+                            n_modes=cfg.get('n_modes', 21),
+                            **{k: v for k, v in cfg.items()
+                               if k not in ['inner_radius_mm', 'n_modes']}
+                        )
+                        raw_result = instance.extract(op2_path, **kwargs)
+                    else:
+                        # Function-based extractor
+                        raw_result = base(op2_path, **cfg, **kwargs)
+
+                    # Map to output names
+                    result = {}
+                    if isinstance(raw_result, dict):
+                        # Use output definitions to select values
+                        for out_def in outs:
+                            out_name = out_def.get('name', '')
+                            source = out_def.get('source', out_name)
+                            if source in raw_result:
+                                result[out_name] = float(raw_result[source])
+                            elif out_name in raw_result:
+                                result[out_name] = float(raw_result[out_name])
+
+                        # If no outputs defined, return all
+                        if not outs:
+                            result = {k: float(v) for k, v in raw_result.items()
+                                      if isinstance(v, (int, float))}
+                    elif isinstance(raw_result, (int, float)):
+                        # Single value - use first output name or 'value'
+                        out_name = outs[0]['name'] if outs else 'value'
+                        result[out_name] = float(raw_result)
+
+                    return result
+
+                except Exception as e:
+                    logger.error(f"Extractor failed: {e}")
+                    return {out['name']: float('nan') for out in outs}
+
+            return wrapper
+
+        return create_extractor_wrapper(base_extractor, config, outputs)
+
+    def _build_custom_extractor(self, ext_def: Dict[str, Any]) -> Optional[Callable]:
+        """
+        Build a custom Python extractor function.
+
+        Args:
+            ext_def: Extractor definition with function source
+
+        Returns:
+            Callable extraction function or None
+        """
+        ext_id = ext_def.get('id', 'custom')
+        func_def = ext_def.get('function', {})
+
+        if not func_def.get('source'):
+            logger.error(f"Custom extractor {ext_id} has no source code")
+            return None
+
+        try:
+            custom_ext = CustomExtractor(
+                extractor_id=ext_id,
+                name=ext_def.get('name', 'Custom'),
+                function_name=func_def.get('name', 'extract'),
+                code=func_def.get('source', ''),
+                outputs=ext_def.get('outputs', []),
+                dependencies=func_def.get('dependencies', []),
+            )
+            custom_ext.compile()
+            self._custom_extractors[ext_id] = custom_ext
+
+            # Create wrapper function
+            def create_custom_wrapper(extractor):
+                def wrapper(op2_path: str, bdf_path: str = None,
+                            params: Dict[str, float] = None,
+                            working_dir: str = None, **kwargs) -> Dict[str, float]:
+                    context = CustomExtractorContext(
+                        op2_path=op2_path,
+                        bdf_path=bdf_path,
+                        working_dir=working_dir,
+                        params=params or {}
+                    )
+                    return extractor.execute(context)
+                return wrapper
+
+            return create_custom_wrapper(custom_ext)
+
+        except Exception as e:
+            logger.error(f"Failed to build custom extractor {ext_id}: {e}")
+            return None
+
+
+# ============================================================================
+# Convenience Functions
+# ============================================================================
+
+def build_extractors_from_spec(spec: Union[Dict[str, Any], Path, str]) -> Dict[str, Callable]:
+    """
+    Build extraction functions from an AtomizerSpec.
+
+    Args:
+        spec: AtomizerSpec dict, or path to spec JSON file
+
+    Returns:
+        Dictionary of extractor_id -> extraction_function
+
+    Example:
+        extractors = build_extractors_from_spec("atomizer_spec.json")
+        results = extractors['E1']("model.op2")
+    """
+    if isinstance(spec, (str, Path)):
+        with open(spec) as f:
+            spec = json.load(f)
+
+    builder = SpecExtractorBuilder(spec)
+    return builder.build()
+
+
+def get_extractor_outputs(spec: Dict[str, Any], extractor_id: str) -> List[Dict[str, Any]]:
+    """
+    Get output definitions for an extractor.
+
+    Args:
+        spec: AtomizerSpec dictionary
+        extractor_id: ID of the extractor
+
+    Returns:
+        List of output definitions [{name, units, description}, ...]
+    """
+    for ext in spec.get('extractors', []):
+        if ext.get('id') == extractor_id:
+            return ext.get('outputs', [])
+    return []
+
+
+def list_available_builtin_extractors() -> List[str]:
+    """
+    List all available builtin extractor types.
+
+    Returns:
+        List of extractor type names
+    """
+    _register_builtin_extractors()
+    return list(BUILTIN_EXTRACTORS.keys())
+
+
+__all__ = [
+    'SpecExtractorBuilder',
+    'build_extractors_from_spec',
+    'get_extractor_outputs',
+    'list_available_builtin_extractors',
+    'BUILTIN_EXTRACTORS',
+]