optimization_engine/config/spec_validator.py

"""
AtomizerSpec v2.0 Validator

Provides comprehensive validation including:
- JSON Schema validation
- Pydantic model validation
- Semantic validation (bounds, references, dependencies)
- Extractor-specific validation
"""

import json
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
from pydantic import ValidationError as PydanticValidationError

try:
    import jsonschema
    HAS_JSONSCHEMA = True
except ImportError:
    HAS_JSONSCHEMA = False

from .spec_models import (
    AtomizerSpec,
    ValidationReport,
    ValidationError,
    ValidationWarning,
    ValidationSummary,
    ExtractorType,
    AlgorithmType,
    ConstraintType,
)


class SpecValidationError(Exception):
    """Raised when spec validation fails."""

    def __init__(self, message: str, errors: List[ValidationError] = None):
        super().__init__(message)
        self.errors = errors or []


class SpecValidator:
    """
    Validates AtomizerSpec v2.0 configurations.

    Provides three levels of validation:
    1. JSON Schema validation (structural)
    2. Pydantic model validation (type safety)
    3. Semantic validation (business logic)
    """

    # Path to JSON Schema file
    SCHEMA_PATH = Path(__file__).parent.parent / "schemas" / "atomizer_spec_v2.json"

    def __init__(self):
        """Initialize validator with schema."""
        self._schema: Optional[Dict] = None

    @property
    def schema(self) -> Dict:
        """Lazy load the JSON Schema."""
        if self._schema is None:
            if self.SCHEMA_PATH.exists():
                with open(self.SCHEMA_PATH) as f:
                    self._schema = json.load(f)
            else:
                self._schema = {}
        return self._schema

    def validate(
        self,
        spec_data: Union[Dict[str, Any], AtomizerSpec],
        strict: bool = True
    ) -> ValidationReport:
        """
        Validate a spec and return a detailed report.

        Args:
            spec_data: Either a dict or AtomizerSpec instance
            strict: If True, raise exception on errors; if False, return report only

        Returns:
            ValidationReport with errors, warnings, and summary

        Raises:
            SpecValidationError: If strict=True and validation fails
        """
        errors: List[ValidationError] = []
        warnings: List[ValidationWarning] = []

        # Convert to dict if needed
        if isinstance(spec_data, AtomizerSpec):
            data = spec_data.model_dump(mode='json')
        else:
            data = spec_data

        # Phase 1: JSON Schema validation
        schema_errors = self._validate_json_schema(data)
        errors.extend(schema_errors)

        # Phase 2: Pydantic model validation (only if schema passes)
        if not schema_errors:
            pydantic_errors = self._validate_pydantic(data)
            errors.extend(pydantic_errors)

        # Phase 3: Semantic validation (only if pydantic passes)
        if not errors:
            spec = AtomizerSpec.model_validate(data)
            semantic_errors, semantic_warnings = self._validate_semantic(spec)
            errors.extend(semantic_errors)
            warnings.extend(semantic_warnings)

        # Build summary
        summary = self._build_summary(data)

        # Build report
        report = ValidationReport(
            valid=len(errors) == 0,
            errors=errors,
            warnings=warnings,
            summary=summary
        )

        # Raise if strict mode and errors found
        if strict and not report.valid:
            error_messages = "; ".join(e.message for e in report.errors[:3])
            raise SpecValidationError(
                f"Spec validation failed: {error_messages}",
                errors=report.errors
            )

        return report

    def validate_partial(
        self,
        path: str,
        value: Any,
        current_spec: AtomizerSpec
    ) -> Tuple[bool, List[str]]:
        """
        Validate a partial update before applying.

        Args:
            path: JSONPath to the field being updated
            value: New value
            current_spec: Current full spec

        Returns:
            Tuple of (is_valid, list of error messages)
        """
        errors = []

        # Parse path
        parts = self._parse_path(path)
        if not parts:
            return False, ["Invalid path format"]

        # Get target type from path
        root = parts[0]

        # Validate based on root section
        if root == "design_variables":
            errors.extend(self._validate_dv_update(parts, value, current_spec))
        elif root == "extractors":
            errors.extend(self._validate_extractor_update(parts, value, current_spec))
        elif root == "objectives":
            errors.extend(self._validate_objective_update(parts, value, current_spec))
        elif root == "constraints":
            errors.extend(self._validate_constraint_update(parts, value, current_spec))
        elif root == "optimization":
            errors.extend(self._validate_optimization_update(parts, value))
        elif root == "meta":
            errors.extend(self._validate_meta_update(parts, value))

        return len(errors) == 0, errors

    def _validate_json_schema(self, data: Dict) -> List[ValidationError]:
        """Validate against JSON Schema."""
        errors = []

        if not HAS_JSONSCHEMA or not self.schema:
            return errors  # Skip if jsonschema not available

        try:
            jsonschema.validate(instance=data, schema=self.schema)
        except jsonschema.ValidationError as e:
            errors.append(ValidationError(
                type="schema",
                path=list(e.absolute_path),
                message=e.message
            ))
        except jsonschema.SchemaError as e:
            errors.append(ValidationError(
                type="schema",
                path=[],
                message=f"Invalid schema: {e.message}"
            ))

        return errors

    def _validate_pydantic(self, data: Dict) -> List[ValidationError]:
        """Validate using Pydantic models."""
        errors = []

        try:
            AtomizerSpec.model_validate(data)
        except PydanticValidationError as e:
            for err in e.errors():
                errors.append(ValidationError(
                    type="schema",
                    path=[str(p) for p in err.get("loc", [])],
                    message=err.get("msg", "Validation error")
                ))

        return errors

    def _validate_semantic(
        self,
        spec: AtomizerSpec
    ) -> Tuple[List[ValidationError], List[ValidationWarning]]:
        """
        Perform semantic validation.

        Checks business logic and constraints that can't be expressed in schema.
        """
        errors: List[ValidationError] = []
        warnings: List[ValidationWarning] = []

        # Validate design variable bounds
        errors.extend(self._validate_dv_bounds(spec))

        # Validate extractor configurations
        errors.extend(self._validate_extractor_configs(spec))
        warnings.extend(self._warn_extractor_configs(spec))

        # Validate reference integrity (done in Pydantic, but double-check)
        errors.extend(self._validate_references(spec))

        # Validate optimization settings
        errors.extend(self._validate_optimization_settings(spec))
        warnings.extend(self._warn_optimization_settings(spec))

        # Validate canvas edges
        warnings.extend(self._validate_canvas_edges(spec))

        # Check for duplicate IDs
        errors.extend(self._validate_unique_ids(spec))

        # Validate custom function syntax
        errors.extend(self._validate_custom_functions(spec))

        return errors, warnings

    def _validate_dv_bounds(self, spec: AtomizerSpec) -> List[ValidationError]:
        """Validate design variable bounds."""
        errors = []

        for i, dv in enumerate(spec.design_variables):
            # Check baseline within bounds
            if dv.baseline is not None:
                if dv.baseline < dv.bounds.min or dv.baseline > dv.bounds.max:
                    errors.append(ValidationError(
                        type="semantic",
                        path=["design_variables", str(i), "baseline"],
                        message=f"Baseline {dv.baseline} outside bounds [{dv.bounds.min}, {dv.bounds.max}]"
                    ))

            # Check step size for integer type
            if dv.type.value == "integer":
                range_size = dv.bounds.max - dv.bounds.min
                if range_size < 1:
                    errors.append(ValidationError(
                        type="semantic",
                        path=["design_variables", str(i), "bounds"],
                        message="Integer variable must have range >= 1"
                    ))

        return errors

    def _validate_extractor_configs(self, spec: AtomizerSpec) -> List[ValidationError]:
        """Validate extractor-specific configurations."""
        errors = []

        for i, ext in enumerate(spec.extractors):
            # Zernike extractors need specific config
            if ext.type in [ExtractorType.ZERNIKE_OPD, ExtractorType.ZERNIKE_CSV]:
                if not ext.config:
                    errors.append(ValidationError(
                        type="semantic",
                        path=["extractors", str(i), "config"],
                        message=f"Zernike extractor requires config with radius settings"
                    ))
                elif ext.config:
                    if ext.config.inner_radius_mm is None:
                        errors.append(ValidationError(
                            type="semantic",
                            path=["extractors", str(i), "config", "inner_radius_mm"],
                            message="Zernike extractor requires inner_radius_mm"
                        ))
                    if ext.config.outer_radius_mm is None:
                        errors.append(ValidationError(
                            type="semantic",
                            path=["extractors", str(i), "config", "outer_radius_mm"],
                            message="Zernike extractor requires outer_radius_mm"
                        ))

            # Mass expression extractor needs expression_name
            if ext.type == ExtractorType.MASS_EXPRESSION:
                if not ext.config or not ext.config.expression_name:
                    errors.append(ValidationError(
                        type="semantic",
                        path=["extractors", str(i), "config", "expression_name"],
                        message="Mass expression extractor requires expression_name in config"
                    ))

        return errors

    def _warn_extractor_configs(self, spec: AtomizerSpec) -> List[ValidationWarning]:
        """Generate warnings for extractor configurations."""
        warnings = []

        for i, ext in enumerate(spec.extractors):
            # Zernike mode count warning
            if ext.type in [ExtractorType.ZERNIKE_OPD, ExtractorType.ZERNIKE_CSV]:
                if ext.config and ext.config.n_modes:
                    if ext.config.n_modes > 66:
                        warnings.append(ValidationWarning(
                            type="performance",
                            path=["extractors", str(i), "config", "n_modes"],
                            message=f"n_modes={ext.config.n_modes} is high; consider <=66 for performance"
                        ))

        return warnings

    def _validate_references(self, spec: AtomizerSpec) -> List[ValidationError]:
        """Validate reference integrity."""
        errors = []

        # Collect all valid IDs
        dv_ids = {dv.id for dv in spec.design_variables}
        ext_ids = {ext.id for ext in spec.extractors}
        ext_outputs: Dict[str, set] = {}
        for ext in spec.extractors:
            ext_outputs[ext.id] = {o.name for o in ext.outputs}

        # Validate canvas edges
        if spec.canvas and spec.canvas.edges:
            all_ids = dv_ids | ext_ids
            all_ids.add("model")
            all_ids.add("solver")
            all_ids.add("optimization")
            all_ids.update(obj.id for obj in spec.objectives)
            if spec.constraints:
                all_ids.update(con.id for con in spec.constraints)

            for i, edge in enumerate(spec.canvas.edges):
                if edge.source not in all_ids:
                    errors.append(ValidationError(
                        type="reference",
                        path=["canvas", "edges", str(i), "source"],
                        message=f"Edge source '{edge.source}' not found"
                    ))
                if edge.target not in all_ids:
                    errors.append(ValidationError(
                        type="reference",
                        path=["canvas", "edges", str(i), "target"],
                        message=f"Edge target '{edge.target}' not found"
                    ))

        return errors

    def _validate_optimization_settings(self, spec: AtomizerSpec) -> List[ValidationError]:
        """Validate optimization settings."""
        errors = []

        algo_type = spec.optimization.algorithm.type

        # NSGA-II requires multiple objectives
        if algo_type == AlgorithmType.NSGA_II and len(spec.objectives) < 2:
            errors.append(ValidationError(
                type="semantic",
                path=["optimization", "algorithm", "type"],
                message="NSGA-II requires at least 2 objectives"
            ))

        return errors

    def _warn_optimization_settings(self, spec: AtomizerSpec) -> List[ValidationWarning]:
        """Generate warnings for optimization settings."""
        warnings = []

        budget = spec.optimization.budget

        # Warn about small trial budgets
        if budget.max_trials and budget.max_trials < 20:
            warnings.append(ValidationWarning(
                type="recommendation",
                path=["optimization", "budget", "max_trials"],
                message=f"max_trials={budget.max_trials} is low; recommend >= 20 for convergence"
            ))

        # Warn about large design space with small budget
        num_dvs = len(spec.get_enabled_design_variables())
        if budget.max_trials and num_dvs > 5 and budget.max_trials < num_dvs * 10:
            warnings.append(ValidationWarning(
                type="recommendation",
                path=["optimization", "budget", "max_trials"],
                message=f"{num_dvs} DVs suggest at least {num_dvs * 10} trials"
            ))

        return warnings

    def _validate_canvas_edges(self, spec: AtomizerSpec) -> List[ValidationWarning]:
        """Validate canvas edge structure."""
        warnings = []

        if not spec.canvas or not spec.canvas.edges:
            warnings.append(ValidationWarning(
                type="completeness",
                path=["canvas", "edges"],
                message="No canvas edges defined; canvas may not render correctly"
            ))

        return warnings

    def _validate_unique_ids(self, spec: AtomizerSpec) -> List[ValidationError]:
        """Validate that all IDs are unique."""
        errors = []
        seen_ids: Dict[str, str] = {}

        # Check all ID-bearing elements
        for i, dv in enumerate(spec.design_variables):
            if dv.id in seen_ids:
                errors.append(ValidationError(
                    type="semantic",
                    path=["design_variables", str(i), "id"],
                    message=f"Duplicate ID '{dv.id}' (also in {seen_ids[dv.id]})"
                ))
            seen_ids[dv.id] = f"design_variables[{i}]"

        for i, ext in enumerate(spec.extractors):
            if ext.id in seen_ids:
                errors.append(ValidationError(
                    type="semantic",
                    path=["extractors", str(i), "id"],
                    message=f"Duplicate ID '{ext.id}' (also in {seen_ids[ext.id]})"
                ))
            seen_ids[ext.id] = f"extractors[{i}]"

        for i, obj in enumerate(spec.objectives):
            if obj.id in seen_ids:
                errors.append(ValidationError(
                    type="semantic",
                    path=["objectives", str(i), "id"],
                    message=f"Duplicate ID '{obj.id}' (also in {seen_ids[obj.id]})"
                ))
            seen_ids[obj.id] = f"objectives[{i}]"

        if spec.constraints:
            for i, con in enumerate(spec.constraints):
                if con.id in seen_ids:
                    errors.append(ValidationError(
                        type="semantic",
                        path=["constraints", str(i), "id"],
                        message=f"Duplicate ID '{con.id}' (also in {seen_ids[con.id]})"
                    ))
                seen_ids[con.id] = f"constraints[{i}]"

        return errors

    def _validate_custom_functions(self, spec: AtomizerSpec) -> List[ValidationError]:
        """Validate custom function Python syntax."""
        errors = []

        for i, ext in enumerate(spec.extractors):
            if ext.type == ExtractorType.CUSTOM_FUNCTION and ext.function:
                if ext.function.source_code:
                    try:
                        compile(ext.function.source_code, f"<custom:{ext.name}>", "exec")
                    except SyntaxError as e:
                        errors.append(ValidationError(
                            type="semantic",
                            path=["extractors", str(i), "function", "source_code"],
                            message=f"Python syntax error: {e.msg} at line {e.lineno}"
                        ))

        return errors

    def _build_summary(self, data: Dict) -> ValidationSummary:
        """Build validation summary."""
        extractors = data.get("extractors", [])
        custom_count = sum(
            1 for e in extractors
            if e.get("type") == "custom_function" or not e.get("builtin", True)
        )

        return ValidationSummary(
            design_variables=len(data.get("design_variables", [])),
            extractors=len(extractors),
            objectives=len(data.get("objectives", [])),
            constraints=len(data.get("constraints", []) or []),
            custom_functions=custom_count
        )

    def _parse_path(self, path: str) -> List[str]:
        """Parse a JSONPath-style path into parts."""
        import re
        # Handle both dot notation and bracket notation
        # e.g., "design_variables[0].bounds.max" or "objectives.0.weight"
        parts = []
        for part in re.split(r'\.|\[|\]', path):
            if part:
                parts.append(part)
        return parts

    def _validate_dv_update(
        self,
        parts: List[str],
        value: Any,
        spec: AtomizerSpec
    ) -> List[str]:
        """Validate a design variable update."""
        errors = []

        if len(parts) >= 2:
            try:
                idx = int(parts[1])
                if idx >= len(spec.design_variables):
                    errors.append(f"Design variable index {idx} out of range")
            except ValueError:
                errors.append(f"Invalid design variable index: {parts[1]}")

        return errors

    def _validate_extractor_update(
        self,
        parts: List[str],
        value: Any,
        spec: AtomizerSpec
    ) -> List[str]:
        """Validate an extractor update."""
        errors = []

        if len(parts) >= 2:
            try:
                idx = int(parts[1])
                if idx >= len(spec.extractors):
                    errors.append(f"Extractor index {idx} out of range")
            except ValueError:
                errors.append(f"Invalid extractor index: {parts[1]}")

        return errors

    def _validate_objective_update(
        self,
        parts: List[str],
        value: Any,
        spec: AtomizerSpec
    ) -> List[str]:
        """Validate an objective update."""
        errors = []

        if len(parts) >= 2:
            try:
                idx = int(parts[1])
                if idx >= len(spec.objectives):
                    errors.append(f"Objective index {idx} out of range")
            except ValueError:
                errors.append(f"Invalid objective index: {parts[1]}")

        # Validate weight
        if len(parts) >= 3 and parts[2] == "weight":
            if not isinstance(value, (int, float)) or value < 0:
                errors.append("Weight must be a non-negative number")

        return errors

    def _validate_constraint_update(
        self,
        parts: List[str],
        value: Any,
        spec: AtomizerSpec
    ) -> List[str]:
        """Validate a constraint update."""
        errors = []

        if not spec.constraints:
            errors.append("No constraints defined")
            return errors

        if len(parts) >= 2:
            try:
                idx = int(parts[1])
                if idx >= len(spec.constraints):
                    errors.append(f"Constraint index {idx} out of range")
            except ValueError:
                errors.append(f"Invalid constraint index: {parts[1]}")

        return errors

    def _validate_optimization_update(
        self,
        parts: List[str],
        value: Any
    ) -> List[str]:
        """Validate an optimization update."""
        errors = []

        if len(parts) >= 2:
            if parts[1] == "algorithm" and len(parts) >= 3:
                if parts[2] == "type":
                    valid_types = [t.value for t in AlgorithmType]
                    if value not in valid_types:
                        errors.append(f"Invalid algorithm type. Valid: {valid_types}")

        return errors

    def _validate_meta_update(
        self,
        parts: List[str],
        value: Any
    ) -> List[str]:
        """Validate a meta update."""
        errors = []

        if len(parts) >= 2:
            if parts[1] == "study_name":
                import re
                if not re.match(r"^[a-z0-9_]+$", str(value)):
                    errors.append("study_name must be snake_case (lowercase, numbers, underscores)")

        return errors


# Module-level convenience function
def validate_spec(
    spec_data: Union[Dict[str, Any], AtomizerSpec],
    strict: bool = True
) -> ValidationReport:
    """
    Validate an AtomizerSpec.

    Args:
        spec_data: Spec data (dict or AtomizerSpec)
        strict: Raise exception on errors

    Returns:
        ValidationReport

    Raises:
        SpecValidationError: If strict=True and validation fails
    """
    validator = SpecValidator()
    return validator.validate(spec_data, strict=strict)
feat(config): AtomizerSpec v2.0 Pydantic models, validators, and tests Config Layer: - spec_models.py: Pydantic models for AtomizerSpec v2.0 - spec_validator.py: Semantic validation with detailed error reporting Extractors: - custom_extractor_loader.py: Runtime custom extractor loading - spec_extractor_builder.py: Build extractors from spec definitions Tools: - migrate_to_spec_v2.py: CLI tool for batch migration Tests: - test_migrator.py: Migration tests - test_spec_manager.py: SpecManager service tests - test_spec_api.py: REST API tests - test_mcp_tools.py: MCP tool tests - test_e2e_unified_config.py: End-to-end config tests 2026-01-20 13:12:03 -05:00			`"""`
			`AtomizerSpec v2.0 Validator`

			`Provides comprehensive validation including:`
			`- JSON Schema validation`
			`- Pydantic model validation`
			`- Semantic validation (bounds, references, dependencies)`
			`- Extractor-specific validation`
			`"""`

			`import json`
			`from pathlib import Path`
			`from typing import Any, Dict, List, Optional, Tuple, Union`
			`from pydantic import ValidationError as PydanticValidationError`

			`try:`
			`import jsonschema`
			`HAS_JSONSCHEMA = True`
			`except ImportError:`
			`HAS_JSONSCHEMA = False`

			`from .spec_models import (`
			`AtomizerSpec,`
			`ValidationReport,`
			`ValidationError,`
			`ValidationWarning,`
			`ValidationSummary,`
			`ExtractorType,`
			`AlgorithmType,`
			`ConstraintType,`
			`)`


			`class SpecValidationError(Exception):`
			`"""Raised when spec validation fails."""`

			`def __init__(self, message: str, errors: List[ValidationError] = None):`
			`super().__init__(message)`
			`self.errors = errors or []`


			`class SpecValidator:`
			`"""`
			`Validates AtomizerSpec v2.0 configurations.`

			`Provides three levels of validation:`
			`1. JSON Schema validation (structural)`
			`2. Pydantic model validation (type safety)`
			`3. Semantic validation (business logic)`
			`"""`

			`# Path to JSON Schema file`
			`SCHEMA_PATH = Path(__file__).parent.parent / "schemas" / "atomizer_spec_v2.json"`

			`def __init__(self):`
			`"""Initialize validator with schema."""`
			`self._schema: Optional[Dict] = None`

			`@property`
			`def schema(self) -> Dict:`
			`"""Lazy load the JSON Schema."""`
			`if self._schema is None:`
			`if self.SCHEMA_PATH.exists():`
			`with open(self.SCHEMA_PATH) as f:`
			`self._schema = json.load(f)`
			`else:`
			`self._schema = {}`
			`return self._schema`

			`def validate(`
			`self,`
			`spec_data: Union[Dict[str, Any], AtomizerSpec],`
			`strict: bool = True`
			`) -> ValidationReport:`
			`"""`
			`Validate a spec and return a detailed report.`

			`Args:`
			`spec_data: Either a dict or AtomizerSpec instance`
			`strict: If True, raise exception on errors; if False, return report only`

			`Returns:`
			`ValidationReport with errors, warnings, and summary`

			`Raises:`
			`SpecValidationError: If strict=True and validation fails`
			`"""`
			`errors: List[ValidationError] = []`
			`warnings: List[ValidationWarning] = []`

			`# Convert to dict if needed`
			`if isinstance(spec_data, AtomizerSpec):`
			`data = spec_data.model_dump(mode='json')`
			`else:`
			`data = spec_data`

			`# Phase 1: JSON Schema validation`
			`schema_errors = self._validate_json_schema(data)`
			`errors.extend(schema_errors)`

			`# Phase 2: Pydantic model validation (only if schema passes)`
			`if not schema_errors:`
			`pydantic_errors = self._validate_pydantic(data)`
			`errors.extend(pydantic_errors)`

			`# Phase 3: Semantic validation (only if pydantic passes)`
			`if not errors:`
			`spec = AtomizerSpec.model_validate(data)`
			`semantic_errors, semantic_warnings = self._validate_semantic(spec)`
			`errors.extend(semantic_errors)`
			`warnings.extend(semantic_warnings)`

			`# Build summary`
			`summary = self._build_summary(data)`

			`# Build report`
			`report = ValidationReport(`
			`valid=len(errors) == 0,`
			`errors=errors,`
			`warnings=warnings,`
			`summary=summary`
			`)`

			`# Raise if strict mode and errors found`
			`if strict and not report.valid:`
			`error_messages = "; ".join(e.message for e in report.errors[:3])`
			`raise SpecValidationError(`
			`f"Spec validation failed: {error_messages}",`
			`errors=report.errors`
			`)`

			`return report`

			`def validate_partial(`
			`self,`
			`path: str,`
			`value: Any,`
			`current_spec: AtomizerSpec`
			`) -> Tuple[bool, List[str]]:`
			`"""`
			`Validate a partial update before applying.`

			`Args:`
			`path: JSONPath to the field being updated`
			`value: New value`
			`current_spec: Current full spec`

			`Returns:`
			`Tuple of (is_valid, list of error messages)`
			`"""`
			`errors = []`

			`# Parse path`
			`parts = self._parse_path(path)`
			`if not parts:`
			`return False, ["Invalid path format"]`

			`# Get target type from path`
			`root = parts[0]`

			`# Validate based on root section`
			`if root == "design_variables":`
			`errors.extend(self._validate_dv_update(parts, value, current_spec))`
			`elif root == "extractors":`
			`errors.extend(self._validate_extractor_update(parts, value, current_spec))`
			`elif root == "objectives":`
			`errors.extend(self._validate_objective_update(parts, value, current_spec))`
			`elif root == "constraints":`
			`errors.extend(self._validate_constraint_update(parts, value, current_spec))`
			`elif root == "optimization":`
			`errors.extend(self._validate_optimization_update(parts, value))`
			`elif root == "meta":`
			`errors.extend(self._validate_meta_update(parts, value))`

			`return len(errors) == 0, errors`

			`def _validate_json_schema(self, data: Dict) -> List[ValidationError]:`
			`"""Validate against JSON Schema."""`
			`errors = []`

			`if not HAS_JSONSCHEMA or not self.schema:`
			`return errors # Skip if jsonschema not available`

			`try:`
			`jsonschema.validate(instance=data, schema=self.schema)`
			`except jsonschema.ValidationError as e:`
			`errors.append(ValidationError(`
			`type="schema",`
			`path=list(e.absolute_path),`
			`message=e.message`
			`))`
			`except jsonschema.SchemaError as e:`
			`errors.append(ValidationError(`
			`type="schema",`
			`path=[],`
			`message=f"Invalid schema: {e.message}"`
			`))`

			`return errors`

			`def _validate_pydantic(self, data: Dict) -> List[ValidationError]:`
			`"""Validate using Pydantic models."""`
			`errors = []`

			`try:`
			`AtomizerSpec.model_validate(data)`
			`except PydanticValidationError as e:`
			`for err in e.errors():`
			`errors.append(ValidationError(`
			`type="schema",`
			`path=[str(p) for p in err.get("loc", [])],`
			`message=err.get("msg", "Validation error")`
			`))`

			`return errors`

			`def _validate_semantic(`
			`self,`
			`spec: AtomizerSpec`
			`) -> Tuple[List[ValidationError], List[ValidationWarning]]:`
			`"""`
			`Perform semantic validation.`

			`Checks business logic and constraints that can't be expressed in schema.`
			`"""`
			`errors: List[ValidationError] = []`
			`warnings: List[ValidationWarning] = []`

			`# Validate design variable bounds`
			`errors.extend(self._validate_dv_bounds(spec))`

			`# Validate extractor configurations`
			`errors.extend(self._validate_extractor_configs(spec))`
			`warnings.extend(self._warn_extractor_configs(spec))`

			`# Validate reference integrity (done in Pydantic, but double-check)`
			`errors.extend(self._validate_references(spec))`

			`# Validate optimization settings`
			`errors.extend(self._validate_optimization_settings(spec))`
			`warnings.extend(self._warn_optimization_settings(spec))`

			`# Validate canvas edges`
			`warnings.extend(self._validate_canvas_edges(spec))`

			`# Check for duplicate IDs`
			`errors.extend(self._validate_unique_ids(spec))`

			`# Validate custom function syntax`
			`errors.extend(self._validate_custom_functions(spec))`

			`return errors, warnings`

			`def _validate_dv_bounds(self, spec: AtomizerSpec) -> List[ValidationError]:`
			`"""Validate design variable bounds."""`
			`errors = []`

			`for i, dv in enumerate(spec.design_variables):`
			`# Check baseline within bounds`
			`if dv.baseline is not None:`
			`if dv.baseline < dv.bounds.min or dv.baseline > dv.bounds.max:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["design_variables", str(i), "baseline"],`
			`message=f"Baseline {dv.baseline} outside bounds [{dv.bounds.min}, {dv.bounds.max}]"`
			`))`

			`# Check step size for integer type`
			`if dv.type.value == "integer":`
			`range_size = dv.bounds.max - dv.bounds.min`
			`if range_size < 1:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["design_variables", str(i), "bounds"],`
			`message="Integer variable must have range >= 1"`
			`))`

			`return errors`

			`def _validate_extractor_configs(self, spec: AtomizerSpec) -> List[ValidationError]:`
			`"""Validate extractor-specific configurations."""`
			`errors = []`

			`for i, ext in enumerate(spec.extractors):`
			`# Zernike extractors need specific config`
			`if ext.type in [ExtractorType.ZERNIKE_OPD, ExtractorType.ZERNIKE_CSV]:`
			`if not ext.config:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["extractors", str(i), "config"],`
			`message=f"Zernike extractor requires config with radius settings"`
			`))`
			`elif ext.config:`
			`if ext.config.inner_radius_mm is None:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["extractors", str(i), "config", "inner_radius_mm"],`
			`message="Zernike extractor requires inner_radius_mm"`
			`))`
			`if ext.config.outer_radius_mm is None:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["extractors", str(i), "config", "outer_radius_mm"],`
			`message="Zernike extractor requires outer_radius_mm"`
			`))`

			`# Mass expression extractor needs expression_name`
			`if ext.type == ExtractorType.MASS_EXPRESSION:`
			`if not ext.config or not ext.config.expression_name:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["extractors", str(i), "config", "expression_name"],`
			`message="Mass expression extractor requires expression_name in config"`
			`))`

			`return errors`

			`def _warn_extractor_configs(self, spec: AtomizerSpec) -> List[ValidationWarning]:`
			`"""Generate warnings for extractor configurations."""`
			`warnings = []`

			`for i, ext in enumerate(spec.extractors):`
			`# Zernike mode count warning`
			`if ext.type in [ExtractorType.ZERNIKE_OPD, ExtractorType.ZERNIKE_CSV]:`
			`if ext.config and ext.config.n_modes:`
			`if ext.config.n_modes > 66:`
			`warnings.append(ValidationWarning(`
			`type="performance",`
			`path=["extractors", str(i), "config", "n_modes"],`
			`message=f"n_modes={ext.config.n_modes} is high; consider <=66 for performance"`
			`))`

			`return warnings`

			`def _validate_references(self, spec: AtomizerSpec) -> List[ValidationError]:`
			`"""Validate reference integrity."""`
			`errors = []`

			`# Collect all valid IDs`
			`dv_ids = {dv.id for dv in spec.design_variables}`
			`ext_ids = {ext.id for ext in spec.extractors}`
			`ext_outputs: Dict[str, set] = {}`
			`for ext in spec.extractors:`
			`ext_outputs[ext.id] = {o.name for o in ext.outputs}`

			`# Validate canvas edges`
			`if spec.canvas and spec.canvas.edges:`
			`all_ids = dv_ids \| ext_ids`
			`all_ids.add("model")`
			`all_ids.add("solver")`
			`all_ids.add("optimization")`
			`all_ids.update(obj.id for obj in spec.objectives)`
			`if spec.constraints:`
			`all_ids.update(con.id for con in spec.constraints)`

			`for i, edge in enumerate(spec.canvas.edges):`
			`if edge.source not in all_ids:`
			`errors.append(ValidationError(`
			`type="reference",`
			`path=["canvas", "edges", str(i), "source"],`
			`message=f"Edge source '{edge.source}' not found"`
			`))`
			`if edge.target not in all_ids:`
			`errors.append(ValidationError(`
			`type="reference",`
			`path=["canvas", "edges", str(i), "target"],`
			`message=f"Edge target '{edge.target}' not found"`
			`))`

			`return errors`

			`def _validate_optimization_settings(self, spec: AtomizerSpec) -> List[ValidationError]:`
			`"""Validate optimization settings."""`
			`errors = []`

			`algo_type = spec.optimization.algorithm.type`

			`# NSGA-II requires multiple objectives`
			`if algo_type == AlgorithmType.NSGA_II and len(spec.objectives) < 2:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["optimization", "algorithm", "type"],`
			`message="NSGA-II requires at least 2 objectives"`
			`))`

			`return errors`

			`def _warn_optimization_settings(self, spec: AtomizerSpec) -> List[ValidationWarning]:`
			`"""Generate warnings for optimization settings."""`
			`warnings = []`

			`budget = spec.optimization.budget`

			`# Warn about small trial budgets`
			`if budget.max_trials and budget.max_trials < 20:`
			`warnings.append(ValidationWarning(`
			`type="recommendation",`
			`path=["optimization", "budget", "max_trials"],`
			`message=f"max_trials={budget.max_trials} is low; recommend >= 20 for convergence"`
			`))`

			`# Warn about large design space with small budget`
			`num_dvs = len(spec.get_enabled_design_variables())`
			`if budget.max_trials and num_dvs > 5 and budget.max_trials < num_dvs * 10:`
			`warnings.append(ValidationWarning(`
			`type="recommendation",`
			`path=["optimization", "budget", "max_trials"],`
			`message=f"{num_dvs} DVs suggest at least {num_dvs * 10} trials"`
			`))`

			`return warnings`

			`def _validate_canvas_edges(self, spec: AtomizerSpec) -> List[ValidationWarning]:`
			`"""Validate canvas edge structure."""`
			`warnings = []`

			`if not spec.canvas or not spec.canvas.edges:`
			`warnings.append(ValidationWarning(`
			`type="completeness",`
			`path=["canvas", "edges"],`
			`message="No canvas edges defined; canvas may not render correctly"`
			`))`

			`return warnings`

			`def _validate_unique_ids(self, spec: AtomizerSpec) -> List[ValidationError]:`
			`"""Validate that all IDs are unique."""`
			`errors = []`
			`seen_ids: Dict[str, str] = {}`

			`# Check all ID-bearing elements`
			`for i, dv in enumerate(spec.design_variables):`
			`if dv.id in seen_ids:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["design_variables", str(i), "id"],`
			`message=f"Duplicate ID '{dv.id}' (also in {seen_ids[dv.id]})"`
			`))`
			`seen_ids[dv.id] = f"design_variables[{i}]"`

			`for i, ext in enumerate(spec.extractors):`
			`if ext.id in seen_ids:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["extractors", str(i), "id"],`
			`message=f"Duplicate ID '{ext.id}' (also in {seen_ids[ext.id]})"`
			`))`
			`seen_ids[ext.id] = f"extractors[{i}]"`

			`for i, obj in enumerate(spec.objectives):`
			`if obj.id in seen_ids:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["objectives", str(i), "id"],`
			`message=f"Duplicate ID '{obj.id}' (also in {seen_ids[obj.id]})"`
			`))`
			`seen_ids[obj.id] = f"objectives[{i}]"`

			`if spec.constraints:`
			`for i, con in enumerate(spec.constraints):`
			`if con.id in seen_ids:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["constraints", str(i), "id"],`
			`message=f"Duplicate ID '{con.id}' (also in {seen_ids[con.id]})"`
			`))`
			`seen_ids[con.id] = f"constraints[{i}]"`

			`return errors`

			`def _validate_custom_functions(self, spec: AtomizerSpec) -> List[ValidationError]:`
			`"""Validate custom function Python syntax."""`
			`errors = []`

			`for i, ext in enumerate(spec.extractors):`
			`if ext.type == ExtractorType.CUSTOM_FUNCTION and ext.function:`
			`if ext.function.source_code:`
			`try:`
			`compile(ext.function.source_code, f"<custom:{ext.name}>", "exec")`
			`except SyntaxError as e:`
			`errors.append(ValidationError(`
			`type="semantic",`
			`path=["extractors", str(i), "function", "source_code"],`
			`message=f"Python syntax error: {e.msg} at line {e.lineno}"`
			`))`

			`return errors`

			`def _build_summary(self, data: Dict) -> ValidationSummary:`
			`"""Build validation summary."""`
			`extractors = data.get("extractors", [])`
			`custom_count = sum(`
			`1 for e in extractors`
			`if e.get("type") == "custom_function" or not e.get("builtin", True)`
			`)`

			`return ValidationSummary(`
			`design_variables=len(data.get("design_variables", [])),`
			`extractors=len(extractors),`
			`objectives=len(data.get("objectives", [])),`
			`constraints=len(data.get("constraints", []) or []),`
			`custom_functions=custom_count`
			`)`

			`def _parse_path(self, path: str) -> List[str]:`
			`"""Parse a JSONPath-style path into parts."""`
			`import re`
			`# Handle both dot notation and bracket notation`
			`# e.g., "design_variables[0].bounds.max" or "objectives.0.weight"`
			`parts = []`
			`for part in re.split(r'\.\|\[\|\]', path):`
			`if part:`
			`parts.append(part)`
			`return parts`

			`def _validate_dv_update(`
			`self,`
			`parts: List[str],`
			`value: Any,`
			`spec: AtomizerSpec`
			`) -> List[str]:`
			`"""Validate a design variable update."""`
			`errors = []`

			`if len(parts) >= 2:`
			`try:`
			`idx = int(parts[1])`
			`if idx >= len(spec.design_variables):`
			`errors.append(f"Design variable index {idx} out of range")`
			`except ValueError:`
			`errors.append(f"Invalid design variable index: {parts[1]}")`

			`return errors`

			`def _validate_extractor_update(`
			`self,`
			`parts: List[str],`
			`value: Any,`
			`spec: AtomizerSpec`
			`) -> List[str]:`
			`"""Validate an extractor update."""`
			`errors = []`

			`if len(parts) >= 2:`
			`try:`
			`idx = int(parts[1])`
			`if idx >= len(spec.extractors):`
			`errors.append(f"Extractor index {idx} out of range")`
			`except ValueError:`
			`errors.append(f"Invalid extractor index: {parts[1]}")`

			`return errors`

			`def _validate_objective_update(`
			`self,`
			`parts: List[str],`
			`value: Any,`
			`spec: AtomizerSpec`
			`) -> List[str]:`
			`"""Validate an objective update."""`
			`errors = []`

			`if len(parts) >= 2:`
			`try:`
			`idx = int(parts[1])`
			`if idx >= len(spec.objectives):`
			`errors.append(f"Objective index {idx} out of range")`
			`except ValueError:`
			`errors.append(f"Invalid objective index: {parts[1]}")`

			`# Validate weight`
			`if len(parts) >= 3 and parts[2] == "weight":`
			`if not isinstance(value, (int, float)) or value < 0:`
			`errors.append("Weight must be a non-negative number")`

			`return errors`

			`def _validate_constraint_update(`
			`self,`
			`parts: List[str],`
			`value: Any,`
			`spec: AtomizerSpec`
			`) -> List[str]:`
			`"""Validate a constraint update."""`
			`errors = []`

			`if not spec.constraints:`
			`errors.append("No constraints defined")`
			`return errors`

			`if len(parts) >= 2:`
			`try:`
			`idx = int(parts[1])`
			`if idx >= len(spec.constraints):`
			`errors.append(f"Constraint index {idx} out of range")`
			`except ValueError:`
			`errors.append(f"Invalid constraint index: {parts[1]}")`

			`return errors`

			`def _validate_optimization_update(`
			`self,`
			`parts: List[str],`
			`value: Any`
			`) -> List[str]:`
			`"""Validate an optimization update."""`
			`errors = []`

			`if len(parts) >= 2:`
			`if parts[1] == "algorithm" and len(parts) >= 3:`
			`if parts[2] == "type":`
			`valid_types = [t.value for t in AlgorithmType]`
			`if value not in valid_types:`
			`errors.append(f"Invalid algorithm type. Valid: {valid_types}")`

			`return errors`

			`def _validate_meta_update(`
			`self,`
			`parts: List[str],`
			`value: Any`
			`) -> List[str]:`
			`"""Validate a meta update."""`
			`errors = []`

			`if len(parts) >= 2:`
			`if parts[1] == "study_name":`
			`import re`
			`if not re.match(r"^[a-z0-9_]+$", str(value)):`
			`errors.append("study_name must be snake_case (lowercase, numbers, underscores)")`

			`return errors`


			`# Module-level convenience function`
			`def validate_spec(`
			`spec_data: Union[Dict[str, Any], AtomizerSpec],`
			`strict: bool = True`
			`) -> ValidationReport:`
			`"""`
			`Validate an AtomizerSpec.`

			`Args:`
			`spec_data: Spec data (dict or AtomizerSpec)`
			`strict: Raise exception on errors`

			`Returns:`
			`ValidationReport`

			`Raises:`
			`SpecValidationError: If strict=True and validation fails`
			`"""`
			`validator = SpecValidator()`
			`return validator.validate(spec_data, strict=strict)`