feat(config): AtomizerSpec v2.0 Pydantic models, validators, and tests

Config Layer:
- spec_models.py: Pydantic models for AtomizerSpec v2.0
- spec_validator.py: Semantic validation with detailed error reporting

Extractors:
- custom_extractor_loader.py: Runtime custom extractor loading
- spec_extractor_builder.py: Build extractors from spec definitions

Tools:
- migrate_to_spec_v2.py: CLI tool for batch migration

Tests:
- test_migrator.py: Migration tests
- test_spec_manager.py: SpecManager service tests
- test_spec_api.py: REST API tests
- test_mcp_tools.py: MCP tool tests
- test_e2e_unified_config.py: End-to-end config tests
This commit is contained in:
2026-01-20 13:12:03 -05:00
parent 27e78d3d56
commit 6c30224341
10 changed files with 4705 additions and 0 deletions

View File

@@ -0,0 +1,541 @@
"""
Custom Extractor Loader
Dynamically loads and executes custom Python extractors defined in AtomizerSpec v2.0.
Provides sandboxed execution with access to FEA results and common analysis libraries.
P3.9: Custom extractor runtime loader
"""
import ast
import hashlib
import importlib
import logging
import re
import sys
import traceback
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import numpy as np
# Lazy imports for optional dependencies
_PYOP2 = None
_SCIPY = None
logger = logging.getLogger(__name__)
# ============================================================================
# Allowed modules for custom extractors (sandboxed environment)
# ============================================================================
ALLOWED_MODULES = {
# Core Python
"math",
"statistics",
"collections",
"itertools",
"functools",
# Scientific computing
"numpy",
"scipy",
"scipy.interpolate",
"scipy.optimize",
"scipy.integrate",
"scipy.linalg",
# FEA result parsing
"pyNastran",
"pyNastran.op2",
"pyNastran.op2.op2",
"pyNastran.bdf",
"pyNastran.bdf.bdf",
# Atomizer extractors
"optimization_engine.extractors",
}
BLOCKED_MODULES = {
"os",
"subprocess",
"shutil",
"sys",
"builtins",
"__builtins__",
"importlib",
"eval",
"exec",
"compile",
"open",
"file",
"socket",
"requests",
"urllib",
"http",
}
# ============================================================================
# Code Validation
# ============================================================================
class ExtractorSecurityError(Exception):
"""Raised when custom extractor code contains disallowed patterns."""
pass
class ExtractorValidationError(Exception):
"""Raised when custom extractor code is invalid."""
pass
def validate_extractor_code(code: str, function_name: str) -> Tuple[bool, List[str]]:
"""
Validate custom extractor code for security and correctness.
Args:
code: Python source code string
function_name: Expected function name to find in code
Returns:
Tuple of (is_valid, list of error messages)
Raises:
ExtractorSecurityError: If dangerous patterns detected
"""
errors = []
# Check for syntax errors first
try:
tree = ast.parse(code)
except SyntaxError as e:
return False, [f"Syntax error: {e}"]
# Check for disallowed patterns
dangerous_patterns = [
(r'\bexec\s*\(', 'exec() is not allowed'),
(r'\beval\s*\(', 'eval() is not allowed'),
(r'\bcompile\s*\(', 'compile() is not allowed'),
(r'\b__import__\s*\(', '__import__() is not allowed'),
(r'\bopen\s*\(', 'open() is not allowed - use op2_path parameter'),
(r'\bos\.(system|popen|spawn|exec)', 'os.system/popen/spawn/exec is not allowed'),
(r'\bsubprocess\.', 'subprocess module is not allowed'),
(r'\bshutil\.', 'shutil module is not allowed'),
(r'import\s+os\b', 'import os is not allowed'),
(r'from\s+os\b', 'from os import is not allowed'),
(r'import\s+subprocess', 'import subprocess is not allowed'),
(r'import\s+sys\b', 'import sys is not allowed'),
]
for pattern, message in dangerous_patterns:
if re.search(pattern, code):
raise ExtractorSecurityError(message)
# Check that the expected function exists
function_found = False
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == function_name:
function_found = True
# Check function signature
args = node.args
arg_names = [arg.arg for arg in args.args]
# Must have op2_path as first argument (or op2_result/results)
valid_first_args = {'op2_path', 'op2_result', 'results', 'data'}
if not arg_names or arg_names[0] not in valid_first_args:
errors.append(
f"Function {function_name} must have first argument from: "
f"{valid_first_args}, got: {arg_names[0] if arg_names else 'none'}"
)
break
if not function_found:
errors.append(f"Function '{function_name}' not found in code")
# Check imports
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
module = alias.name.split('.')[0]
if module in BLOCKED_MODULES:
errors.append(f"Import of '{alias.name}' is not allowed")
elif isinstance(node, ast.ImportFrom):
if node.module:
module = node.module.split('.')[0]
if module in BLOCKED_MODULES:
errors.append(f"Import from '{node.module}' is not allowed")
return len(errors) == 0, errors
# ============================================================================
# Extractor Compilation and Execution
# ============================================================================
class CustomExtractorContext:
"""
Execution context for custom extractors.
Provides safe access to FEA results and common utilities.
"""
def __init__(self, op2_path: Optional[Path] = None,
bdf_path: Optional[Path] = None,
working_dir: Optional[Path] = None,
params: Optional[Dict[str, float]] = None):
"""
Initialize extractor context.
Args:
op2_path: Path to OP2 results file
bdf_path: Path to BDF model file
working_dir: Working directory for the trial
params: Current design parameters
"""
self.op2_path = Path(op2_path) if op2_path else None
self.bdf_path = Path(bdf_path) if bdf_path else None
self.working_dir = Path(working_dir) if working_dir else None
self.params = params or {}
# Lazy-loaded results
self._op2_result = None
self._bdf_model = None
@property
def op2_result(self):
"""Lazy-load OP2 results."""
if self._op2_result is None and self.op2_path and self.op2_path.exists():
global _PYOP2
if _PYOP2 is None:
from pyNastran.op2.op2 import OP2
_PYOP2 = OP2
self._op2_result = _PYOP2(str(self.op2_path), debug=False)
return self._op2_result
@property
def bdf_model(self):
"""Lazy-load BDF model."""
if self._bdf_model is None and self.bdf_path and self.bdf_path.exists():
from pyNastran.bdf.bdf import BDF
self._bdf_model = BDF(debug=False)
self._bdf_model.read_bdf(str(self.bdf_path))
return self._bdf_model
class CustomExtractor:
"""
Compiled custom extractor ready for execution.
"""
def __init__(self, extractor_id: str, name: str, function_name: str,
code: str, outputs: List[Dict[str, Any]], dependencies: List[str] = None):
"""
Initialize custom extractor.
Args:
extractor_id: Unique extractor ID
name: Human-readable name
function_name: Name of the extraction function
code: Python source code
outputs: List of output definitions
dependencies: Optional list of required pip packages
"""
self.extractor_id = extractor_id
self.name = name
self.function_name = function_name
self.code = code
self.outputs = outputs
self.dependencies = dependencies or []
# Compiled function
self._compiled_func: Optional[Callable] = None
self._code_hash: Optional[str] = None
def compile(self) -> None:
"""
Compile the extractor code and extract the function.
Raises:
ExtractorValidationError: If code is invalid
ExtractorSecurityError: If code contains dangerous patterns
"""
# Validate code
is_valid, errors = validate_extractor_code(self.code, self.function_name)
if not is_valid:
raise ExtractorValidationError(f"Validation failed: {'; '.join(errors)}")
# Compute code hash for caching
self._code_hash = hashlib.sha256(self.code.encode()).hexdigest()[:12]
# Create execution namespace with allowed imports
namespace = {
'np': np,
'numpy': np,
'math': __import__('math'),
'statistics': __import__('statistics'),
'collections': __import__('collections'),
'itertools': __import__('itertools'),
'functools': __import__('functools'),
}
# Add scipy if available
try:
import scipy
namespace['scipy'] = scipy
from scipy import interpolate, optimize, integrate, linalg
namespace['interpolate'] = interpolate
namespace['optimize'] = optimize
namespace['integrate'] = integrate
namespace['linalg'] = linalg
except ImportError:
pass
# Add pyNastran if available
try:
from pyNastran.op2.op2 import OP2
from pyNastran.bdf.bdf import BDF
namespace['OP2'] = OP2
namespace['BDF'] = BDF
except ImportError:
pass
# Add Atomizer extractors
try:
from optimization_engine import extractors
namespace['extractors'] = extractors
except ImportError:
pass
# Execute the code to define the function
try:
exec(self.code, namespace)
except Exception as e:
raise ExtractorValidationError(f"Failed to compile: {e}")
# Extract the function
if self.function_name not in namespace:
raise ExtractorValidationError(f"Function '{self.function_name}' not defined")
self._compiled_func = namespace[self.function_name]
logger.info(f"Compiled custom extractor: {self.name} ({self._code_hash})")
def execute(self, context: CustomExtractorContext) -> Dict[str, float]:
"""
Execute the extractor and return results.
Args:
context: Execution context with FEA results
Returns:
Dictionary of output_name -> value
Raises:
RuntimeError: If execution fails
"""
if self._compiled_func is None:
self.compile()
try:
# Call the function with appropriate arguments
result = self._compiled_func(
op2_path=str(context.op2_path) if context.op2_path else None,
bdf_path=str(context.bdf_path) if context.bdf_path else None,
params=context.params,
working_dir=str(context.working_dir) if context.working_dir else None,
)
# Normalize result to dict
if isinstance(result, dict):
return result
elif isinstance(result, (int, float)):
# Single value - use first output name
if self.outputs:
return {self.outputs[0]['name']: float(result)}
return {'value': float(result)}
elif isinstance(result, (list, tuple)):
# Multiple values - map to output names
output_dict = {}
for i, val in enumerate(result):
if i < len(self.outputs):
output_dict[self.outputs[i]['name']] = float(val)
else:
output_dict[f'output_{i}'] = float(val)
return output_dict
else:
raise RuntimeError(f"Unexpected result type: {type(result)}")
except Exception as e:
logger.error(f"Custom extractor {self.name} failed: {e}")
logger.debug(traceback.format_exc())
raise RuntimeError(f"Extractor {self.name} failed: {e}")
# ============================================================================
# Extractor Loader
# ============================================================================
class CustomExtractorLoader:
"""
Loads and manages custom extractors from AtomizerSpec.
"""
def __init__(self):
"""Initialize loader with empty cache."""
self._cache: Dict[str, CustomExtractor] = {}
def load_from_spec(self, spec: Dict[str, Any]) -> Dict[str, CustomExtractor]:
"""
Load all custom extractors from an AtomizerSpec.
Args:
spec: AtomizerSpec dictionary
Returns:
Dictionary of extractor_id -> CustomExtractor
"""
extractors = {}
for ext_def in spec.get('extractors', []):
# Skip builtin extractors
if ext_def.get('builtin', True):
continue
# Custom extractor must have function definition
func_def = ext_def.get('function', {})
if not func_def.get('source'):
logger.warning(f"Custom extractor {ext_def.get('id')} has no source code")
continue
extractor = CustomExtractor(
extractor_id=ext_def.get('id', 'custom'),
name=ext_def.get('name', 'Custom Extractor'),
function_name=func_def.get('name', 'extract'),
code=func_def.get('source', ''),
outputs=ext_def.get('outputs', []),
dependencies=func_def.get('dependencies', []),
)
try:
extractor.compile()
extractors[extractor.extractor_id] = extractor
self._cache[extractor.extractor_id] = extractor
except (ExtractorValidationError, ExtractorSecurityError) as e:
logger.error(f"Failed to load extractor {extractor.name}: {e}")
return extractors
def get(self, extractor_id: str) -> Optional[CustomExtractor]:
"""Get a cached extractor by ID."""
return self._cache.get(extractor_id)
def execute_all(self, extractors: Dict[str, CustomExtractor],
context: CustomExtractorContext) -> Dict[str, Dict[str, float]]:
"""
Execute all custom extractors and collect results.
Args:
extractors: Dictionary of extractor_id -> CustomExtractor
context: Execution context
Returns:
Dictionary of extractor_id -> {output_name: value}
"""
results = {}
for ext_id, extractor in extractors.items():
try:
results[ext_id] = extractor.execute(context)
except Exception as e:
logger.error(f"Extractor {ext_id} failed: {e}")
# Return NaN for failed extractors
results[ext_id] = {
out['name']: float('nan')
for out in extractor.outputs
}
return results
def clear_cache(self) -> None:
"""Clear the extractor cache."""
self._cache.clear()
# ============================================================================
# Convenience Functions
# ============================================================================
# Global loader instance
_loader = CustomExtractorLoader()
def load_custom_extractors(spec: Dict[str, Any]) -> Dict[str, CustomExtractor]:
"""
Load custom extractors from an AtomizerSpec.
Args:
spec: AtomizerSpec dictionary
Returns:
Dictionary of extractor_id -> CustomExtractor
"""
return _loader.load_from_spec(spec)
def execute_custom_extractor(extractor_id: str,
op2_path: Union[str, Path],
bdf_path: Optional[Union[str, Path]] = None,
working_dir: Optional[Union[str, Path]] = None,
params: Optional[Dict[str, float]] = None) -> Dict[str, float]:
"""
Execute a single cached custom extractor.
Args:
extractor_id: ID of the extractor to run
op2_path: Path to OP2 results file
bdf_path: Optional path to BDF file
working_dir: Optional working directory
params: Optional design parameters
Returns:
Dictionary of output_name -> value
Raises:
KeyError: If extractor not found in cache
"""
extractor = _loader.get(extractor_id)
if extractor is None:
raise KeyError(f"Extractor '{extractor_id}' not found in cache")
context = CustomExtractorContext(
op2_path=op2_path,
bdf_path=bdf_path,
working_dir=working_dir,
params=params
)
return extractor.execute(context)
def validate_custom_extractor(code: str, function_name: str = "extract") -> Tuple[bool, List[str]]:
"""
Validate custom extractor code without executing it.
Args:
code: Python source code
function_name: Expected function name
Returns:
Tuple of (is_valid, list of error/warning messages)
"""
return validate_extractor_code(code, function_name)
__all__ = [
'CustomExtractor',
'CustomExtractorLoader',
'CustomExtractorContext',
'ExtractorSecurityError',
'ExtractorValidationError',
'load_custom_extractors',
'execute_custom_extractor',
'validate_custom_extractor',
]

View File

@@ -0,0 +1,328 @@
"""
Spec Extractor Builder
Builds result extractors from AtomizerSpec v2.0 configuration.
Combines builtin extractors with custom Python extractors.
P3.10: Integration with optimization runner
"""
import json
import logging
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union
from optimization_engine.extractors.custom_extractor_loader import (
CustomExtractor,
CustomExtractorContext,
CustomExtractorLoader,
load_custom_extractors,
)
logger = logging.getLogger(__name__)
# ============================================================================
# Builtin Extractor Registry
# ============================================================================
# Map of builtin extractor types to their extraction functions
BUILTIN_EXTRACTORS = {}
def _register_builtin_extractors():
"""Lazily register builtin extractors to avoid circular imports."""
global BUILTIN_EXTRACTORS
if BUILTIN_EXTRACTORS:
return
try:
# Zernike OPD (recommended for mirrors)
from optimization_engine.extractors.extract_zernike_figure import (
ZernikeOPDExtractor,
)
BUILTIN_EXTRACTORS['zernike_opd'] = ZernikeOPDExtractor
# Mass extractors
from optimization_engine.extractors.bdf_mass_extractor import extract_mass_from_bdf
BUILTIN_EXTRACTORS['mass'] = extract_mass_from_bdf
from optimization_engine.extractors.extract_mass_from_expression import (
extract_mass_from_expression,
)
BUILTIN_EXTRACTORS['mass_expression'] = extract_mass_from_expression
# Displacement
from optimization_engine.extractors.extract_displacement import extract_displacement
BUILTIN_EXTRACTORS['displacement'] = extract_displacement
# Stress
from optimization_engine.extractors.extract_von_mises_stress import extract_solid_stress
BUILTIN_EXTRACTORS['stress'] = extract_solid_stress
from optimization_engine.extractors.extract_principal_stress import (
extract_principal_stress,
)
BUILTIN_EXTRACTORS['principal_stress'] = extract_principal_stress
# Frequency
from optimization_engine.extractors.extract_frequency import extract_frequency
BUILTIN_EXTRACTORS['frequency'] = extract_frequency
# Temperature
from optimization_engine.extractors.extract_temperature import extract_temperature
BUILTIN_EXTRACTORS['temperature'] = extract_temperature
# Strain energy
from optimization_engine.extractors.extract_strain_energy import (
extract_strain_energy,
extract_total_strain_energy,
)
BUILTIN_EXTRACTORS['strain_energy'] = extract_strain_energy
BUILTIN_EXTRACTORS['total_strain_energy'] = extract_total_strain_energy
# SPC forces
from optimization_engine.extractors.extract_spc_forces import (
extract_spc_forces,
extract_total_reaction_force,
)
BUILTIN_EXTRACTORS['spc_forces'] = extract_spc_forces
BUILTIN_EXTRACTORS['reaction_force'] = extract_total_reaction_force
logger.debug(f"Registered {len(BUILTIN_EXTRACTORS)} builtin extractors")
except ImportError as e:
logger.warning(f"Some builtin extractors unavailable: {e}")
# ============================================================================
# Spec Extractor Builder
# ============================================================================
class SpecExtractorBuilder:
"""
Builds extraction functions from AtomizerSpec extractor definitions.
"""
def __init__(self, spec: Dict[str, Any]):
"""
Initialize builder with an AtomizerSpec.
Args:
spec: AtomizerSpec dictionary
"""
self.spec = spec
self.custom_loader = CustomExtractorLoader()
self._extractors: Dict[str, Callable] = {}
self._custom_extractors: Dict[str, CustomExtractor] = {}
# Register builtin extractors
_register_builtin_extractors()
def build(self) -> Dict[str, Callable]:
"""
Build all extractors from the spec.
Returns:
Dictionary of extractor_id -> extraction_function
"""
for ext_def in self.spec.get('extractors', []):
ext_id = ext_def.get('id', 'unknown')
if ext_def.get('builtin', True):
# Builtin extractor
extractor_func = self._build_builtin_extractor(ext_def)
else:
# Custom extractor
extractor_func = self._build_custom_extractor(ext_def)
if extractor_func:
self._extractors[ext_id] = extractor_func
else:
logger.warning(f"Failed to build extractor: {ext_id}")
return self._extractors
def _build_builtin_extractor(self, ext_def: Dict[str, Any]) -> Optional[Callable]:
"""
Build a builtin extractor function.
Args:
ext_def: Extractor definition from spec
Returns:
Callable extraction function or None
"""
ext_type = ext_def.get('type', '')
ext_id = ext_def.get('id', '')
config = ext_def.get('config', {})
outputs = ext_def.get('outputs', [])
# Get base extractor
base_extractor = BUILTIN_EXTRACTORS.get(ext_type)
if base_extractor is None:
logger.warning(f"Unknown builtin extractor type: {ext_type}")
return None
# Create configured wrapper
def create_extractor_wrapper(base, cfg, outs):
"""Create a wrapper that applies config and extracts specified outputs."""
def wrapper(op2_path: str, **kwargs) -> Dict[str, float]:
"""Execute extractor and return outputs dict."""
try:
# Handle class-based extractors (like ZernikeOPDExtractor)
if isinstance(base, type):
# Instantiate with config
instance = base(
inner_radius=cfg.get('inner_radius_mm', 0),
n_modes=cfg.get('n_modes', 21),
**{k: v for k, v in cfg.items()
if k not in ['inner_radius_mm', 'n_modes']}
)
raw_result = instance.extract(op2_path, **kwargs)
else:
# Function-based extractor
raw_result = base(op2_path, **cfg, **kwargs)
# Map to output names
result = {}
if isinstance(raw_result, dict):
# Use output definitions to select values
for out_def in outs:
out_name = out_def.get('name', '')
source = out_def.get('source', out_name)
if source in raw_result:
result[out_name] = float(raw_result[source])
elif out_name in raw_result:
result[out_name] = float(raw_result[out_name])
# If no outputs defined, return all
if not outs:
result = {k: float(v) for k, v in raw_result.items()
if isinstance(v, (int, float))}
elif isinstance(raw_result, (int, float)):
# Single value - use first output name or 'value'
out_name = outs[0]['name'] if outs else 'value'
result[out_name] = float(raw_result)
return result
except Exception as e:
logger.error(f"Extractor failed: {e}")
return {out['name']: float('nan') for out in outs}
return wrapper
return create_extractor_wrapper(base_extractor, config, outputs)
def _build_custom_extractor(self, ext_def: Dict[str, Any]) -> Optional[Callable]:
"""
Build a custom Python extractor function.
Args:
ext_def: Extractor definition with function source
Returns:
Callable extraction function or None
"""
ext_id = ext_def.get('id', 'custom')
func_def = ext_def.get('function', {})
if not func_def.get('source'):
logger.error(f"Custom extractor {ext_id} has no source code")
return None
try:
custom_ext = CustomExtractor(
extractor_id=ext_id,
name=ext_def.get('name', 'Custom'),
function_name=func_def.get('name', 'extract'),
code=func_def.get('source', ''),
outputs=ext_def.get('outputs', []),
dependencies=func_def.get('dependencies', []),
)
custom_ext.compile()
self._custom_extractors[ext_id] = custom_ext
# Create wrapper function
def create_custom_wrapper(extractor):
def wrapper(op2_path: str, bdf_path: str = None,
params: Dict[str, float] = None,
working_dir: str = None, **kwargs) -> Dict[str, float]:
context = CustomExtractorContext(
op2_path=op2_path,
bdf_path=bdf_path,
working_dir=working_dir,
params=params or {}
)
return extractor.execute(context)
return wrapper
return create_custom_wrapper(custom_ext)
except Exception as e:
logger.error(f"Failed to build custom extractor {ext_id}: {e}")
return None
# ============================================================================
# Convenience Functions
# ============================================================================
def build_extractors_from_spec(spec: Union[Dict[str, Any], Path, str]) -> Dict[str, Callable]:
"""
Build extraction functions from an AtomizerSpec.
Args:
spec: AtomizerSpec dict, or path to spec JSON file
Returns:
Dictionary of extractor_id -> extraction_function
Example:
extractors = build_extractors_from_spec("atomizer_spec.json")
results = extractors['E1']("model.op2")
"""
if isinstance(spec, (str, Path)):
with open(spec) as f:
spec = json.load(f)
builder = SpecExtractorBuilder(spec)
return builder.build()
def get_extractor_outputs(spec: Dict[str, Any], extractor_id: str) -> List[Dict[str, Any]]:
"""
Get output definitions for an extractor.
Args:
spec: AtomizerSpec dictionary
extractor_id: ID of the extractor
Returns:
List of output definitions [{name, units, description}, ...]
"""
for ext in spec.get('extractors', []):
if ext.get('id') == extractor_id:
return ext.get('outputs', [])
return []
def list_available_builtin_extractors() -> List[str]:
"""
List all available builtin extractor types.
Returns:
List of extractor type names
"""
_register_builtin_extractors()
return list(BUILTIN_EXTRACTORS.keys())
__all__ = [
'SpecExtractorBuilder',
'build_extractors_from_spec',
'get_extractor_outputs',
'list_available_builtin_extractors',
'BUILTIN_EXTRACTORS',
]