Atomizer/optimization_engine/future/step_classifier.py

"""
Step Classifier - Phase 2.6

Classifies workflow steps into:
1. Engineering Features - Complex FEA/CAE operations needing research/documentation
2. Inline Calculations - Simple math operations to generate on-the-fly
3. Post-Processing Hooks - Middleware scripts between engineering steps

Author: Atomizer Development Team
Version: 0.1.0 (Phase 2.6)
Last Updated: 2025-01-16
"""

from typing import Dict, List, Any, Optional
from dataclasses import dataclass
from pathlib import Path
import re


@dataclass
class StepClassification:
    """Classification result for a workflow step."""
    step_type: str  # 'engineering_feature', 'inline_calculation', 'post_processing_hook'
    complexity: str  # 'simple', 'moderate', 'complex'
    requires_research: bool
    requires_documentation: bool
    auto_generate: bool
    reasoning: str


class StepClassifier:
    """
    Intelligently classifies workflow steps to determine if they need:
    - Full feature engineering (FEA/CAE operations)
    - Inline code generation (simple math)
    - Post-processing hooks (middleware)
    """

    def __init__(self):
        # Engineering operations that require research/documentation
        self.engineering_operations = {
            # FEA Result Extraction
            'extract_result': ['displacement', 'stress', 'strain', 'reaction_force',
                             'element_force', 'temperature', 'modal', 'buckling'],

            # FEA Property Modifications
            'update_fea_property': ['cbush_stiffness', 'pcomp_layup', 'mat1_properties',
                                   'pshell_thickness', 'pbeam_properties', 'contact_stiffness'],

            # Geometry/CAD Operations
            'modify_geometry': ['extrude', 'revolve', 'boolean', 'fillet', 'chamfer'],
            'read_expression': ['part_expression', 'assembly_expression'],

            # Simulation Setup
            'run_analysis': ['sol101', 'sol103', 'sol106', 'sol111', 'sol400'],
            'create_material': ['mat1', 'mat8', 'mat9', 'physical_material'],
            'apply_loads': ['force', 'moment', 'pressure', 'thermal_load'],
            'create_mesh': ['tetra', 'hex', 'shell', 'beam'],
        }

        # Simple mathematical operations (no feature needed)
        self.simple_math_operations = {
            'average', 'mean', 'max', 'maximum', 'min', 'minimum',
            'sum', 'total', 'count', 'ratio', 'percentage',
            'compare', 'difference', 'delta', 'absolute',
            'normalize', 'scale', 'round', 'floor', 'ceil'
        }

        # Statistical operations (still simple, but slightly more complex)
        self.statistical_operations = {
            'std', 'stddev', 'variance', 'median', 'mode',
            'percentile', 'quartile', 'range', 'iqr'
        }

        # Post-processing indicators
        self.post_processing_indicators = {
            'custom objective', 'metric', 'criteria', 'evaluation',
            'transform', 'filter', 'aggregate', 'combine'
        }

    def classify_step(self, action: str, domain: str, params: Dict[str, Any],
                     request_context: str = "") -> StepClassification:
        """
        Classify a workflow step into engineering feature, inline calc, or hook.

        Args:
            action: The action type (e.g., 'extract_result', 'update_parameters')
            domain: The domain (e.g., 'result_extraction', 'optimization')
            params: Step parameters
            request_context: Original user request for context

        Returns:
            StepClassification with type and reasoning
        """
        action_lower = action.lower()
        request_lower = request_context.lower()

        # Check for engineering operations
        if self._is_engineering_operation(action, params):
            return StepClassification(
                step_type='engineering_feature',
                complexity='complex',
                requires_research=True,
                requires_documentation=True,
                auto_generate=False,
                reasoning=f"FEA/CAE operation '{action}' requires specialized knowledge and documentation"
            )

        # Check for simple mathematical calculations
        if self._is_simple_calculation(action, params, request_lower):
            return StepClassification(
                step_type='inline_calculation',
                complexity='simple',
                requires_research=False,
                requires_documentation=False,
                auto_generate=True,
                reasoning=f"Simple mathematical operation that can be generated inline"
            )

        # Check for post-processing hooks
        if self._is_post_processing_hook(action, params, request_lower):
            return StepClassification(
                step_type='post_processing_hook',
                complexity='moderate',
                requires_research=False,
                requires_documentation=False,
                auto_generate=True,
                reasoning=f"Post-processing calculation between FEA steps"
            )

        # Check if it's a known simple action
        if action in ['identify_parameters', 'update_parameters', 'optimize']:
            return StepClassification(
                step_type='engineering_feature',
                complexity='moderate',
                requires_research=False,  # May already exist
                requires_documentation=True,
                auto_generate=False,
                reasoning=f"Standard optimization workflow step"
            )

        # Default: treat as engineering feature to be safe
        return StepClassification(
            step_type='engineering_feature',
            complexity='moderate',
            requires_research=True,
            requires_documentation=True,
            auto_generate=False,
            reasoning=f"Unknown action type, treating as engineering feature"
        )

    def _is_engineering_operation(self, action: str, params: Dict[str, Any]) -> bool:
        """Check if this is a complex engineering operation."""
        # Check action type
        if action in self.engineering_operations:
            return True

        # Check for FEA-specific parameters
        fea_indicators = [
            'result_type', 'solver', 'element_type', 'material_type',
            'mesh_type', 'load_type', 'subcase', 'solution'
        ]

        for indicator in fea_indicators:
            if indicator in params:
                return True

        # Check for specific result types that need FEA extraction
        if 'result_type' in params:
            result_type = params['result_type']
            engineering_results = ['displacement', 'stress', 'strain', 'reaction_force',
                                 'element_force', 'temperature', 'modal', 'buckling']
            if result_type in engineering_results:
                return True

        return False

    def _is_simple_calculation(self, action: str, params: Dict[str, Any],
                              request_context: str) -> bool:
        """Check if this is a simple mathematical calculation."""
        # Check for math keywords in action
        action_words = set(action.lower().split('_'))
        if action_words & self.simple_math_operations:
            return True

        # Check for statistical operations
        if action_words & self.statistical_operations:
            return True

        # Check for calculation keywords in request
        calc_patterns = [
            r'\b(calculate|compute|find)\s+(average|mean|max|min|sum)\b',
            r'\b(average|mean)\s+of\b',
            r'\bfind\s+the\s+(maximum|minimum)\b',
            r'\bcompare\s+.+\s+to\s+',
        ]

        for pattern in calc_patterns:
            if re.search(pattern, request_context):
                return True

        return False

    def _is_post_processing_hook(self, action: str, params: Dict[str, Any],
                                 request_context: str) -> bool:
        """Check if this is a post-processing hook between steps."""
        # Look for custom objective/metric definitions
        for indicator in self.post_processing_indicators:
            if indicator in request_context:
                # Check if it involves multiple inputs (sign of post-processing)
                if 'average' in request_context and 'maximum' in request_context:
                    return True
                if 'compare' in request_context:
                    return True
                if 'assign' in request_context and 'metric' in request_context:
                    return True

        return False

    def classify_workflow(self, workflow_steps: List[Any],
                         request_context: str = "") -> Dict[str, List[Any]]:
        """
        Classify all steps in a workflow.

        Returns:
            {
                'engineering_features': [...],
                'inline_calculations': [...],
                'post_processing_hooks': [...]
            }
        """
        classified = {
            'engineering_features': [],
            'inline_calculations': [],
            'post_processing_hooks': []
        }

        for step in workflow_steps:
            classification = self.classify_step(
                step.action,
                step.domain,
                step.params,
                request_context
            )

            step_with_classification = {
                'step': step,
                'classification': classification
            }

            if classification.step_type == 'engineering_feature':
                classified['engineering_features'].append(step_with_classification)
            elif classification.step_type == 'inline_calculation':
                classified['inline_calculations'].append(step_with_classification)
            elif classification.step_type == 'post_processing_hook':
                classified['post_processing_hooks'].append(step_with_classification)

        return classified

    def get_summary(self, classified_workflow: Dict[str, List[Any]]) -> str:
        """Get human-readable summary of classification."""
        lines = []
        lines.append("Workflow Classification Summary")
        lines.append("=" * 80)
        lines.append("")

        # Engineering features
        eng_features = classified_workflow['engineering_features']
        lines.append(f"Engineering Features (Need Research): {len(eng_features)}")
        for item in eng_features:
            step = item['step']
            classification = item['classification']
            lines.append(f"  - {step.action} ({step.domain})")
            lines.append(f"    Reason: {classification.reasoning}")

        lines.append("")

        # Inline calculations
        inline_calcs = classified_workflow['inline_calculations']
        lines.append(f"Inline Calculations (Auto-Generate): {len(inline_calcs)}")
        for item in inline_calcs:
            step = item['step']
            lines.append(f"  - {step.action}: {step.params}")

        lines.append("")

        # Post-processing hooks
        hooks = classified_workflow['post_processing_hooks']
        lines.append(f"Post-Processing Hooks (Auto-Generate): {len(hooks)}")
        for item in hooks:
            step = item['step']
            lines.append(f"  - {step.action}: {step.params}")

        return "\n".join(lines)


def main():
    """Test the step classifier."""
    from optimization_engine.future.workflow_decomposer import WorkflowDecomposer

    print("Step Classifier Test")
    print("=" * 80)
    print()

    # Test with CBUSH optimization request
    request = """I want to extract forces in direction Z of all the 1D elements and find the average of it,
    then find the maximum value and compare it to the average, then assign it to a objective metric that needs to be minimized."""

    decomposer = WorkflowDecomposer()
    classifier = StepClassifier()

    print("Request:")
    print(request)
    print()

    # Decompose workflow
    steps = decomposer.decompose(request)

    print("Workflow Steps:")
    for i, step in enumerate(steps, 1):
        print(f"{i}. {step.action} ({step.domain})")
    print()

    # Classify steps
    classified = classifier.classify_workflow(steps, request)

    # Display summary
    print(classifier.get_summary(classified))


if __name__ == '__main__':
    main()