feat(config): AtomizerSpec v2.0 Pydantic models, validators, and tests
Config Layer: - spec_models.py: Pydantic models for AtomizerSpec v2.0 - spec_validator.py: Semantic validation with detailed error reporting Extractors: - custom_extractor_loader.py: Runtime custom extractor loading - spec_extractor_builder.py: Build extractors from spec definitions Tools: - migrate_to_spec_v2.py: CLI tool for batch migration Tests: - test_migrator.py: Migration tests - test_spec_manager.py: SpecManager service tests - test_spec_api.py: REST API tests - test_mcp_tools.py: MCP tool tests - test_e2e_unified_config.py: End-to-end config tests
This commit is contained in:
541
optimization_engine/extractors/custom_extractor_loader.py
Normal file
541
optimization_engine/extractors/custom_extractor_loader.py
Normal file
@@ -0,0 +1,541 @@
|
||||
"""
|
||||
Custom Extractor Loader
|
||||
|
||||
Dynamically loads and executes custom Python extractors defined in AtomizerSpec v2.0.
|
||||
Provides sandboxed execution with access to FEA results and common analysis libraries.
|
||||
|
||||
P3.9: Custom extractor runtime loader
|
||||
"""
|
||||
|
||||
import ast
|
||||
import hashlib
|
||||
import importlib
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Lazy imports for optional dependencies
|
||||
_PYOP2 = None
|
||||
_SCIPY = None
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Allowed modules for custom extractors (sandboxed environment)
|
||||
# ============================================================================
|
||||
|
||||
ALLOWED_MODULES = {
|
||||
# Core Python
|
||||
"math",
|
||||
"statistics",
|
||||
"collections",
|
||||
"itertools",
|
||||
"functools",
|
||||
# Scientific computing
|
||||
"numpy",
|
||||
"scipy",
|
||||
"scipy.interpolate",
|
||||
"scipy.optimize",
|
||||
"scipy.integrate",
|
||||
"scipy.linalg",
|
||||
# FEA result parsing
|
||||
"pyNastran",
|
||||
"pyNastran.op2",
|
||||
"pyNastran.op2.op2",
|
||||
"pyNastran.bdf",
|
||||
"pyNastran.bdf.bdf",
|
||||
# Atomizer extractors
|
||||
"optimization_engine.extractors",
|
||||
}
|
||||
|
||||
BLOCKED_MODULES = {
|
||||
"os",
|
||||
"subprocess",
|
||||
"shutil",
|
||||
"sys",
|
||||
"builtins",
|
||||
"__builtins__",
|
||||
"importlib",
|
||||
"eval",
|
||||
"exec",
|
||||
"compile",
|
||||
"open",
|
||||
"file",
|
||||
"socket",
|
||||
"requests",
|
||||
"urllib",
|
||||
"http",
|
||||
}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Code Validation
|
||||
# ============================================================================
|
||||
|
||||
class ExtractorSecurityError(Exception):
|
||||
"""Raised when custom extractor code contains disallowed patterns."""
|
||||
pass
|
||||
|
||||
|
||||
class ExtractorValidationError(Exception):
|
||||
"""Raised when custom extractor code is invalid."""
|
||||
pass
|
||||
|
||||
|
||||
def validate_extractor_code(code: str, function_name: str) -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
Validate custom extractor code for security and correctness.
|
||||
|
||||
Args:
|
||||
code: Python source code string
|
||||
function_name: Expected function name to find in code
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, list of error messages)
|
||||
|
||||
Raises:
|
||||
ExtractorSecurityError: If dangerous patterns detected
|
||||
"""
|
||||
errors = []
|
||||
|
||||
# Check for syntax errors first
|
||||
try:
|
||||
tree = ast.parse(code)
|
||||
except SyntaxError as e:
|
||||
return False, [f"Syntax error: {e}"]
|
||||
|
||||
# Check for disallowed patterns
|
||||
dangerous_patterns = [
|
||||
(r'\bexec\s*\(', 'exec() is not allowed'),
|
||||
(r'\beval\s*\(', 'eval() is not allowed'),
|
||||
(r'\bcompile\s*\(', 'compile() is not allowed'),
|
||||
(r'\b__import__\s*\(', '__import__() is not allowed'),
|
||||
(r'\bopen\s*\(', 'open() is not allowed - use op2_path parameter'),
|
||||
(r'\bos\.(system|popen|spawn|exec)', 'os.system/popen/spawn/exec is not allowed'),
|
||||
(r'\bsubprocess\.', 'subprocess module is not allowed'),
|
||||
(r'\bshutil\.', 'shutil module is not allowed'),
|
||||
(r'import\s+os\b', 'import os is not allowed'),
|
||||
(r'from\s+os\b', 'from os import is not allowed'),
|
||||
(r'import\s+subprocess', 'import subprocess is not allowed'),
|
||||
(r'import\s+sys\b', 'import sys is not allowed'),
|
||||
]
|
||||
|
||||
for pattern, message in dangerous_patterns:
|
||||
if re.search(pattern, code):
|
||||
raise ExtractorSecurityError(message)
|
||||
|
||||
# Check that the expected function exists
|
||||
function_found = False
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.FunctionDef) and node.name == function_name:
|
||||
function_found = True
|
||||
|
||||
# Check function signature
|
||||
args = node.args
|
||||
arg_names = [arg.arg for arg in args.args]
|
||||
|
||||
# Must have op2_path as first argument (or op2_result/results)
|
||||
valid_first_args = {'op2_path', 'op2_result', 'results', 'data'}
|
||||
if not arg_names or arg_names[0] not in valid_first_args:
|
||||
errors.append(
|
||||
f"Function {function_name} must have first argument from: "
|
||||
f"{valid_first_args}, got: {arg_names[0] if arg_names else 'none'}"
|
||||
)
|
||||
break
|
||||
|
||||
if not function_found:
|
||||
errors.append(f"Function '{function_name}' not found in code")
|
||||
|
||||
# Check imports
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
module = alias.name.split('.')[0]
|
||||
if module in BLOCKED_MODULES:
|
||||
errors.append(f"Import of '{alias.name}' is not allowed")
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
if node.module:
|
||||
module = node.module.split('.')[0]
|
||||
if module in BLOCKED_MODULES:
|
||||
errors.append(f"Import from '{node.module}' is not allowed")
|
||||
|
||||
return len(errors) == 0, errors
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Extractor Compilation and Execution
|
||||
# ============================================================================
|
||||
|
||||
class CustomExtractorContext:
|
||||
"""
|
||||
Execution context for custom extractors.
|
||||
Provides safe access to FEA results and common utilities.
|
||||
"""
|
||||
|
||||
def __init__(self, op2_path: Optional[Path] = None,
|
||||
bdf_path: Optional[Path] = None,
|
||||
working_dir: Optional[Path] = None,
|
||||
params: Optional[Dict[str, float]] = None):
|
||||
"""
|
||||
Initialize extractor context.
|
||||
|
||||
Args:
|
||||
op2_path: Path to OP2 results file
|
||||
bdf_path: Path to BDF model file
|
||||
working_dir: Working directory for the trial
|
||||
params: Current design parameters
|
||||
"""
|
||||
self.op2_path = Path(op2_path) if op2_path else None
|
||||
self.bdf_path = Path(bdf_path) if bdf_path else None
|
||||
self.working_dir = Path(working_dir) if working_dir else None
|
||||
self.params = params or {}
|
||||
|
||||
# Lazy-loaded results
|
||||
self._op2_result = None
|
||||
self._bdf_model = None
|
||||
|
||||
@property
|
||||
def op2_result(self):
|
||||
"""Lazy-load OP2 results."""
|
||||
if self._op2_result is None and self.op2_path and self.op2_path.exists():
|
||||
global _PYOP2
|
||||
if _PYOP2 is None:
|
||||
from pyNastran.op2.op2 import OP2
|
||||
_PYOP2 = OP2
|
||||
self._op2_result = _PYOP2(str(self.op2_path), debug=False)
|
||||
return self._op2_result
|
||||
|
||||
@property
|
||||
def bdf_model(self):
|
||||
"""Lazy-load BDF model."""
|
||||
if self._bdf_model is None and self.bdf_path and self.bdf_path.exists():
|
||||
from pyNastran.bdf.bdf import BDF
|
||||
self._bdf_model = BDF(debug=False)
|
||||
self._bdf_model.read_bdf(str(self.bdf_path))
|
||||
return self._bdf_model
|
||||
|
||||
|
||||
class CustomExtractor:
|
||||
"""
|
||||
Compiled custom extractor ready for execution.
|
||||
"""
|
||||
|
||||
def __init__(self, extractor_id: str, name: str, function_name: str,
|
||||
code: str, outputs: List[Dict[str, Any]], dependencies: List[str] = None):
|
||||
"""
|
||||
Initialize custom extractor.
|
||||
|
||||
Args:
|
||||
extractor_id: Unique extractor ID
|
||||
name: Human-readable name
|
||||
function_name: Name of the extraction function
|
||||
code: Python source code
|
||||
outputs: List of output definitions
|
||||
dependencies: Optional list of required pip packages
|
||||
"""
|
||||
self.extractor_id = extractor_id
|
||||
self.name = name
|
||||
self.function_name = function_name
|
||||
self.code = code
|
||||
self.outputs = outputs
|
||||
self.dependencies = dependencies or []
|
||||
|
||||
# Compiled function
|
||||
self._compiled_func: Optional[Callable] = None
|
||||
self._code_hash: Optional[str] = None
|
||||
|
||||
def compile(self) -> None:
|
||||
"""
|
||||
Compile the extractor code and extract the function.
|
||||
|
||||
Raises:
|
||||
ExtractorValidationError: If code is invalid
|
||||
ExtractorSecurityError: If code contains dangerous patterns
|
||||
"""
|
||||
# Validate code
|
||||
is_valid, errors = validate_extractor_code(self.code, self.function_name)
|
||||
if not is_valid:
|
||||
raise ExtractorValidationError(f"Validation failed: {'; '.join(errors)}")
|
||||
|
||||
# Compute code hash for caching
|
||||
self._code_hash = hashlib.sha256(self.code.encode()).hexdigest()[:12]
|
||||
|
||||
# Create execution namespace with allowed imports
|
||||
namespace = {
|
||||
'np': np,
|
||||
'numpy': np,
|
||||
'math': __import__('math'),
|
||||
'statistics': __import__('statistics'),
|
||||
'collections': __import__('collections'),
|
||||
'itertools': __import__('itertools'),
|
||||
'functools': __import__('functools'),
|
||||
}
|
||||
|
||||
# Add scipy if available
|
||||
try:
|
||||
import scipy
|
||||
namespace['scipy'] = scipy
|
||||
from scipy import interpolate, optimize, integrate, linalg
|
||||
namespace['interpolate'] = interpolate
|
||||
namespace['optimize'] = optimize
|
||||
namespace['integrate'] = integrate
|
||||
namespace['linalg'] = linalg
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Add pyNastran if available
|
||||
try:
|
||||
from pyNastran.op2.op2 import OP2
|
||||
from pyNastran.bdf.bdf import BDF
|
||||
namespace['OP2'] = OP2
|
||||
namespace['BDF'] = BDF
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Add Atomizer extractors
|
||||
try:
|
||||
from optimization_engine import extractors
|
||||
namespace['extractors'] = extractors
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Execute the code to define the function
|
||||
try:
|
||||
exec(self.code, namespace)
|
||||
except Exception as e:
|
||||
raise ExtractorValidationError(f"Failed to compile: {e}")
|
||||
|
||||
# Extract the function
|
||||
if self.function_name not in namespace:
|
||||
raise ExtractorValidationError(f"Function '{self.function_name}' not defined")
|
||||
|
||||
self._compiled_func = namespace[self.function_name]
|
||||
logger.info(f"Compiled custom extractor: {self.name} ({self._code_hash})")
|
||||
|
||||
def execute(self, context: CustomExtractorContext) -> Dict[str, float]:
|
||||
"""
|
||||
Execute the extractor and return results.
|
||||
|
||||
Args:
|
||||
context: Execution context with FEA results
|
||||
|
||||
Returns:
|
||||
Dictionary of output_name -> value
|
||||
|
||||
Raises:
|
||||
RuntimeError: If execution fails
|
||||
"""
|
||||
if self._compiled_func is None:
|
||||
self.compile()
|
||||
|
||||
try:
|
||||
# Call the function with appropriate arguments
|
||||
result = self._compiled_func(
|
||||
op2_path=str(context.op2_path) if context.op2_path else None,
|
||||
bdf_path=str(context.bdf_path) if context.bdf_path else None,
|
||||
params=context.params,
|
||||
working_dir=str(context.working_dir) if context.working_dir else None,
|
||||
)
|
||||
|
||||
# Normalize result to dict
|
||||
if isinstance(result, dict):
|
||||
return result
|
||||
elif isinstance(result, (int, float)):
|
||||
# Single value - use first output name
|
||||
if self.outputs:
|
||||
return {self.outputs[0]['name']: float(result)}
|
||||
return {'value': float(result)}
|
||||
elif isinstance(result, (list, tuple)):
|
||||
# Multiple values - map to output names
|
||||
output_dict = {}
|
||||
for i, val in enumerate(result):
|
||||
if i < len(self.outputs):
|
||||
output_dict[self.outputs[i]['name']] = float(val)
|
||||
else:
|
||||
output_dict[f'output_{i}'] = float(val)
|
||||
return output_dict
|
||||
else:
|
||||
raise RuntimeError(f"Unexpected result type: {type(result)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Custom extractor {self.name} failed: {e}")
|
||||
logger.debug(traceback.format_exc())
|
||||
raise RuntimeError(f"Extractor {self.name} failed: {e}")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Extractor Loader
|
||||
# ============================================================================
|
||||
|
||||
class CustomExtractorLoader:
|
||||
"""
|
||||
Loads and manages custom extractors from AtomizerSpec.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize loader with empty cache."""
|
||||
self._cache: Dict[str, CustomExtractor] = {}
|
||||
|
||||
def load_from_spec(self, spec: Dict[str, Any]) -> Dict[str, CustomExtractor]:
|
||||
"""
|
||||
Load all custom extractors from an AtomizerSpec.
|
||||
|
||||
Args:
|
||||
spec: AtomizerSpec dictionary
|
||||
|
||||
Returns:
|
||||
Dictionary of extractor_id -> CustomExtractor
|
||||
"""
|
||||
extractors = {}
|
||||
|
||||
for ext_def in spec.get('extractors', []):
|
||||
# Skip builtin extractors
|
||||
if ext_def.get('builtin', True):
|
||||
continue
|
||||
|
||||
# Custom extractor must have function definition
|
||||
func_def = ext_def.get('function', {})
|
||||
if not func_def.get('source'):
|
||||
logger.warning(f"Custom extractor {ext_def.get('id')} has no source code")
|
||||
continue
|
||||
|
||||
extractor = CustomExtractor(
|
||||
extractor_id=ext_def.get('id', 'custom'),
|
||||
name=ext_def.get('name', 'Custom Extractor'),
|
||||
function_name=func_def.get('name', 'extract'),
|
||||
code=func_def.get('source', ''),
|
||||
outputs=ext_def.get('outputs', []),
|
||||
dependencies=func_def.get('dependencies', []),
|
||||
)
|
||||
|
||||
try:
|
||||
extractor.compile()
|
||||
extractors[extractor.extractor_id] = extractor
|
||||
self._cache[extractor.extractor_id] = extractor
|
||||
except (ExtractorValidationError, ExtractorSecurityError) as e:
|
||||
logger.error(f"Failed to load extractor {extractor.name}: {e}")
|
||||
|
||||
return extractors
|
||||
|
||||
def get(self, extractor_id: str) -> Optional[CustomExtractor]:
|
||||
"""Get a cached extractor by ID."""
|
||||
return self._cache.get(extractor_id)
|
||||
|
||||
def execute_all(self, extractors: Dict[str, CustomExtractor],
|
||||
context: CustomExtractorContext) -> Dict[str, Dict[str, float]]:
|
||||
"""
|
||||
Execute all custom extractors and collect results.
|
||||
|
||||
Args:
|
||||
extractors: Dictionary of extractor_id -> CustomExtractor
|
||||
context: Execution context
|
||||
|
||||
Returns:
|
||||
Dictionary of extractor_id -> {output_name: value}
|
||||
"""
|
||||
results = {}
|
||||
|
||||
for ext_id, extractor in extractors.items():
|
||||
try:
|
||||
results[ext_id] = extractor.execute(context)
|
||||
except Exception as e:
|
||||
logger.error(f"Extractor {ext_id} failed: {e}")
|
||||
# Return NaN for failed extractors
|
||||
results[ext_id] = {
|
||||
out['name']: float('nan')
|
||||
for out in extractor.outputs
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
def clear_cache(self) -> None:
|
||||
"""Clear the extractor cache."""
|
||||
self._cache.clear()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Convenience Functions
|
||||
# ============================================================================
|
||||
|
||||
# Global loader instance
|
||||
_loader = CustomExtractorLoader()
|
||||
|
||||
|
||||
def load_custom_extractors(spec: Dict[str, Any]) -> Dict[str, CustomExtractor]:
|
||||
"""
|
||||
Load custom extractors from an AtomizerSpec.
|
||||
|
||||
Args:
|
||||
spec: AtomizerSpec dictionary
|
||||
|
||||
Returns:
|
||||
Dictionary of extractor_id -> CustomExtractor
|
||||
"""
|
||||
return _loader.load_from_spec(spec)
|
||||
|
||||
|
||||
def execute_custom_extractor(extractor_id: str,
|
||||
op2_path: Union[str, Path],
|
||||
bdf_path: Optional[Union[str, Path]] = None,
|
||||
working_dir: Optional[Union[str, Path]] = None,
|
||||
params: Optional[Dict[str, float]] = None) -> Dict[str, float]:
|
||||
"""
|
||||
Execute a single cached custom extractor.
|
||||
|
||||
Args:
|
||||
extractor_id: ID of the extractor to run
|
||||
op2_path: Path to OP2 results file
|
||||
bdf_path: Optional path to BDF file
|
||||
working_dir: Optional working directory
|
||||
params: Optional design parameters
|
||||
|
||||
Returns:
|
||||
Dictionary of output_name -> value
|
||||
|
||||
Raises:
|
||||
KeyError: If extractor not found in cache
|
||||
"""
|
||||
extractor = _loader.get(extractor_id)
|
||||
if extractor is None:
|
||||
raise KeyError(f"Extractor '{extractor_id}' not found in cache")
|
||||
|
||||
context = CustomExtractorContext(
|
||||
op2_path=op2_path,
|
||||
bdf_path=bdf_path,
|
||||
working_dir=working_dir,
|
||||
params=params
|
||||
)
|
||||
|
||||
return extractor.execute(context)
|
||||
|
||||
|
||||
def validate_custom_extractor(code: str, function_name: str = "extract") -> Tuple[bool, List[str]]:
|
||||
"""
|
||||
Validate custom extractor code without executing it.
|
||||
|
||||
Args:
|
||||
code: Python source code
|
||||
function_name: Expected function name
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, list of error/warning messages)
|
||||
"""
|
||||
return validate_extractor_code(code, function_name)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'CustomExtractor',
|
||||
'CustomExtractorLoader',
|
||||
'CustomExtractorContext',
|
||||
'ExtractorSecurityError',
|
||||
'ExtractorValidationError',
|
||||
'load_custom_extractors',
|
||||
'execute_custom_extractor',
|
||||
'validate_custom_extractor',
|
||||
]
|
||||
Reference in New Issue
Block a user