optimization_engine/future/hook_generator.py

"""
Post-Processing Hook Generator - Phase 2.9

Auto-generates middleware Python scripts for post-processing operations in optimization workflows.

This handles the "post_processing_hooks" from Phase 2.7 LLM analysis.

Hook scripts sit between optimization steps to:
- Calculate custom objective functions
- Combine multiple metrics with weights
- Apply complex formulas
- Transform results for next step

Examples:
- Weighted objective: 0.7 * norm_stress + 0.3 * norm_disp
- Custom constraint: max_stress / yield_strength < 1.0
- Multi-criteria metric: sqrt(stress^2 + disp^2)

Author: Atomizer Development Team
Version: 0.1.0 (Phase 2.9)
Last Updated: 2025-01-16
"""

from typing import Dict, Any, List, Optional
from dataclasses import dataclass
from pathlib import Path
import textwrap


@dataclass
class GeneratedHook:
    """Result of hook generation."""
    script_name: str
    script_content: str
    inputs_required: List[str]
    outputs_created: List[str]
    description: str
    hook_type: str  # 'weighted_objective', 'custom_formula', 'constraint', etc.


class HookGenerator:
    """
    Generates post-processing hook scripts for optimization workflows.

    Hook scripts are standalone Python modules that execute between optimization
    steps to perform custom calculations, combine metrics, or transform results.
    """

    def __init__(self):
        """Initialize the hook generator."""
        self.supported_hook_types = {
            'weighted_objective',
            'weighted_combination',
            'custom_formula',
            'constraint_check',
            'multi_objective',
            'custom_metric',
            'comparison',
            'threshold_check'
        }

    def generate_from_llm_output(self, hook_spec: Dict[str, Any]) -> GeneratedHook:
        """
        Generate hook script from LLM-analyzed post-processing requirement.

        Args:
            hook_spec: Dictionary from LLM with keys:
                - action: str (e.g., "weighted_objective")
                - description: str
                - params: dict with inputs/weights/formula/etc.

        Returns:
            GeneratedHook with complete Python script
        """
        action = hook_spec.get('action', '').lower()
        params = hook_spec.get('params', {})
        description = hook_spec.get('description', '')

        # Determine hook type and generate appropriate script
        if 'weighted' in action or 'combination' in action:
            return self._generate_weighted_objective(params, description)

        elif 'formula' in action or 'custom' in action:
            return self._generate_custom_formula(params, description)

        elif 'constraint' in action or 'check' in action:
            return self._generate_constraint_check(params, description)

        elif 'comparison' in action or 'compare' in action:
            return self._generate_comparison(params, description)

        else:
            # Generic hook
            return self._generate_generic_hook(action, params, description)

    def _generate_weighted_objective(self, params: Dict[str, Any],
                                     description: str) -> GeneratedHook:
        """
        Generate weighted objective function hook.

        Example params:
        {
            "inputs": ["norm_stress", "norm_disp"],
            "weights": [0.7, 0.3],
            "formula": "0.7 * norm_stress + 0.3 * norm_disp",  # optional
            "objective": "minimize"
        }
        """
        inputs = params.get('inputs', [])
        weights = params.get('weights', [])
        formula = params.get('formula', '')
        objective = params.get('objective', 'minimize')

        # Validate inputs and weights match
        if len(inputs) != len(weights):
            weights = [1.0 / len(inputs)] * len(inputs)  # Equal weights if mismatch

        # Generate script name
        script_name = f"hook_weighted_objective_{'_'.join(inputs)}.py"

        # Build formula if not provided
        if not formula:
            terms = [f"{w} * {inp}" for w, inp in zip(weights, inputs)]
            formula = " + ".join(terms)

        # Generate script content
        script_content = f'''"""
Weighted Objective Function Hook
Auto-generated by Atomizer Phase 2.9

{description}

Inputs: {', '.join(inputs)}
Weights: {', '.join(map(str, weights))}
Formula: {formula}
Objective: {objective}
"""

import sys
import json
from pathlib import Path


def weighted_objective({', '.join(inputs)}):
    """
    Calculate weighted objective from multiple inputs.

    Args:
{self._format_args_doc(inputs)}

    Returns:
        float: Weighted objective value
    """
    result = {formula}
    return result


def main():
    """
    Main entry point for hook execution.
    Reads inputs from JSON file, calculates objective, writes output.
    """
    # Parse command line arguments
    if len(sys.argv) < 2:
        print("Usage: python {{}} <input_file.json>".format(sys.argv[0]))
        sys.exit(1)

    input_file = Path(sys.argv[1])

    # Read inputs
    if not input_file.exists():
        print(f"Error: Input file {{input_file}} not found")
        sys.exit(1)

    with open(input_file, 'r') as f:
        inputs = json.load(f)

    # Extract required inputs
{self._format_input_extraction(inputs)}

    # Calculate weighted objective
    result = weighted_objective({', '.join(inputs)})

    # Write output
    output_file = input_file.parent / "weighted_objective_result.json"
    output = {{
        "weighted_objective": result,
        "objective_type": "{objective}",
        "inputs_used": {{{', '.join([f'"{inp}": {inp}' for inp in inputs])}}},
        "formula": "{formula}"
    }}

    with open(output_file, 'w') as f:
        json.dump(output, f, indent=2)

    print(f"Weighted objective calculated: {{result:.6f}}")
    print(f"Result saved to: {{output_file}}")

    return result


if __name__ == '__main__':
    main()
'''

        return GeneratedHook(
            script_name=script_name,
            script_content=script_content,
            inputs_required=inputs,
            outputs_created=['weighted_objective'],
            description=description or f"Weighted combination of {', '.join(inputs)}",
            hook_type='weighted_objective'
        )

    def _generate_custom_formula(self, params: Dict[str, Any],
                                 description: str) -> GeneratedHook:
        """
        Generate custom formula hook.

        Example params:
        {
            "inputs": ["max_stress", "yield_strength"],
            "formula": "max_stress / yield_strength",
            "output_name": "safety_factor"
        }
        """
        inputs = params.get('inputs', [])
        formula = params.get('formula', '')
        output_name = params.get('output_name', 'custom_result')

        if not formula:
            raise ValueError("Custom formula hook requires 'formula' parameter")

        script_name = f"hook_custom_{output_name}.py"

        script_content = f'''"""
Custom Formula Hook
Auto-generated by Atomizer Phase 2.9

{description}

Formula: {output_name} = {formula}
Inputs: {', '.join(inputs)}
"""

import sys
import json
from pathlib import Path


def calculate_{output_name}({', '.join(inputs)}):
    """
    Calculate custom metric using formula.

    Args:
{self._format_args_doc(inputs)}

    Returns:
        float: {output_name}
    """
    {output_name} = {formula}
    return {output_name}


def main():
    """Main entry point for hook execution."""
    if len(sys.argv) < 2:
        print("Usage: python {{}} <input_file.json>".format(sys.argv[0]))
        sys.exit(1)

    input_file = Path(sys.argv[1])

    # Read inputs
    with open(input_file, 'r') as f:
        inputs = json.load(f)

    # Extract required inputs
{self._format_input_extraction(inputs)}

    # Calculate result
    result = calculate_{output_name}({', '.join(inputs)})

    # Write output
    output_file = input_file.parent / "{output_name}_result.json"
    output = {{
        "{output_name}": result,
        "formula": "{formula}",
        "inputs_used": {{{', '.join([f'"{inp}": {inp}' for inp in inputs])}}}
    }}

    with open(output_file, 'w') as f:
        json.dump(output, f, indent=2)

    print(f"{output_name} = {{result:.6f}}")
    print(f"Result saved to: {{output_file}}")

    return result


if __name__ == '__main__':
    main()
'''

        return GeneratedHook(
            script_name=script_name,
            script_content=script_content,
            inputs_required=inputs,
            outputs_created=[output_name],
            description=description or f"Custom formula: {formula}",
            hook_type='custom_formula'
        )

    def _generate_constraint_check(self, params: Dict[str, Any],
                                   description: str) -> GeneratedHook:
        """
        Generate constraint checking hook.

        Example params:
        {
            "inputs": ["max_stress", "yield_strength"],
            "condition": "max_stress < yield_strength",
            "threshold": 1.0,
            "constraint_name": "stress_limit"
        }
        """
        inputs = params.get('inputs', [])
        condition = params.get('condition', '')
        threshold = params.get('threshold', 1.0)
        constraint_name = params.get('constraint_name', 'constraint')

        script_name = f"hook_constraint_{constraint_name}.py"

        script_content = f'''"""
Constraint Check Hook
Auto-generated by Atomizer Phase 2.9

{description}

Constraint: {condition}
Threshold: {threshold}
"""

import sys
import json
from pathlib import Path


def check_{constraint_name}({', '.join(inputs)}):
    """
    Check constraint condition.

    Args:
{self._format_args_doc(inputs)}

    Returns:
        tuple: (satisfied: bool, value: float, violation: float)
    """
    value = {condition if condition else f"{inputs[0]} / {threshold}"}
    satisfied = value <= {threshold}
    violation = max(0.0, value - {threshold})

    return satisfied, value, violation


def main():
    """Main entry point for hook execution."""
    if len(sys.argv) < 2:
        print("Usage: python {{}} <input_file.json>".format(sys.argv[0]))
        sys.exit(1)

    input_file = Path(sys.argv[1])

    # Read inputs
    with open(input_file, 'r') as f:
        inputs = json.load(f)

    # Extract required inputs
{self._format_input_extraction(inputs)}

    # Check constraint
    satisfied, value, violation = check_{constraint_name}({', '.join(inputs)})

    # Write output
    output_file = input_file.parent / "{constraint_name}_check.json"
    output = {{
        "constraint_name": "{constraint_name}",
        "satisfied": satisfied,
        "value": value,
        "threshold": {threshold},
        "violation": violation,
        "inputs_used": {{{', '.join([f'"{inp}": {inp}' for inp in inputs])}}}
    }}

    with open(output_file, 'w') as f:
        json.dump(output, f, indent=2)

    status = "SATISFIED" if satisfied else "VIOLATED"
    print(f"Constraint {{status}}: {{value:.6f}} (threshold: {threshold})")
    if not satisfied:
        print(f"Violation: {{violation:.6f}}")
    print(f"Result saved to: {{output_file}}")

    return value


if __name__ == '__main__':
    main()
'''

        return GeneratedHook(
            script_name=script_name,
            script_content=script_content,
            inputs_required=inputs,
            outputs_created=[constraint_name, f'{constraint_name}_satisfied', f'{constraint_name}_violation'],
            description=description or f"Constraint check: {condition}",
            hook_type='constraint_check'
        )

    def _generate_comparison(self, params: Dict[str, Any],
                            description: str) -> GeneratedHook:
        """
        Generate comparison hook (min/max ratio, difference, etc.).

        Example params:
        {
            "inputs": ["min_force", "avg_force"],
            "operation": "ratio",
            "output_name": "min_to_avg_ratio"
        }
        """
        inputs = params.get('inputs', [])
        operation = params.get('operation', 'ratio').lower()
        output_name = params.get('output_name', f"{operation}_result")

        if len(inputs) < 2:
            raise ValueError("Comparison hook requires at least 2 inputs")

        # Determine formula based on operation
        if operation == 'ratio':
            formula = f"{inputs[0]} / {inputs[1]}"
        elif operation == 'difference':
            formula = f"{inputs[0]} - {inputs[1]}"
        elif operation == 'percent_difference':
            formula = f"(({inputs[0]} - {inputs[1]}) / {inputs[1]}) * 100.0"
        else:
            formula = f"{inputs[0]} / {inputs[1]}"  # Default to ratio

        script_name = f"hook_compare_{output_name}.py"

        script_content = f'''"""
Comparison Hook
Auto-generated by Atomizer Phase 2.9

{description}

Operation: {operation}
Formula: {output_name} = {formula}
"""

import sys
import json
from pathlib import Path


def compare_{operation}({', '.join(inputs)}):
    """
    Compare values using {operation}.

    Args:
{self._format_args_doc(inputs)}

    Returns:
        float: Comparison result
    """
    result = {formula}
    return result


def main():
    """Main entry point for hook execution."""
    if len(sys.argv) < 2:
        print("Usage: python {{}} <input_file.json>".format(sys.argv[0]))
        sys.exit(1)

    input_file = Path(sys.argv[1])

    # Read inputs
    with open(input_file, 'r') as f:
        inputs = json.load(f)

    # Extract required inputs
{self._format_input_extraction(inputs)}

    # Calculate comparison
    result = compare_{operation}({', '.join(inputs)})

    # Write output
    output_file = input_file.parent / "{output_name}.json"
    output = {{
        "{output_name}": result,
        "operation": "{operation}",
        "formula": "{formula}",
        "inputs_used": {{{', '.join([f'"{inp}": {inp}' for inp in inputs])}}}
    }}

    with open(output_file, 'w') as f:
        json.dump(output, f, indent=2)

    print(f"{output_name} = {{result:.6f}}")
    print(f"Result saved to: {{output_file}}")

    return result


if __name__ == '__main__':
    main()
'''

        return GeneratedHook(
            script_name=script_name,
            script_content=script_content,
            inputs_required=inputs,
            outputs_created=[output_name],
            description=description or f"{operation.capitalize()} of {', '.join(inputs)}",
            hook_type='comparison'
        )

    def _generate_generic_hook(self, action: str, params: Dict[str, Any],
                               description: str) -> GeneratedHook:
        """Generate generic hook for unknown action types."""
        inputs = params.get('inputs', ['input_value'])
        formula = params.get('formula', 'input_value')
        output_name = params.get('output_name', 'result')

        script_name = f"hook_generic_{action.replace(' ', '_')}.py"

        script_content = f'''"""
Generic Hook
Auto-generated by Atomizer Phase 2.9

{description}

Action: {action}
"""

import sys
import json
from pathlib import Path


def process({', '.join(inputs)}):
    """Process inputs according to action."""
    # TODO: Implement {action}
    result = {formula}
    return result


def main():
    """Main entry point for hook execution."""
    if len(sys.argv) < 2:
        print("Usage: python {{}} <input_file.json>".format(sys.argv[0]))
        sys.exit(1)

    input_file = Path(sys.argv[1])

    with open(input_file, 'r') as f:
        inputs = json.load(f)

{self._format_input_extraction(inputs)}

    result = process({', '.join(inputs)})

    output_file = input_file.parent / "{output_name}.json"
    with open(output_file, 'w') as f:
        json.dump({{"result": result}}, f, indent=2)

    print(f"Result: {{result}}")
    return result


if __name__ == '__main__':
    main()
'''

        return GeneratedHook(
            script_name=script_name,
            script_content=script_content,
            inputs_required=inputs,
            outputs_created=[output_name],
            description=description or f"Generic hook: {action}",
            hook_type='generic'
        )

    def _format_args_doc(self, args: List[str]) -> str:
        """Format argument documentation for docstrings."""
        lines = []
        for arg in args:
            lines.append(f"        {arg}: float")
        return '\n'.join(lines)

    def _format_input_extraction(self, inputs: List[str]) -> str:
        """Format input extraction code."""
        lines = []
        for inp in inputs:
            lines.append(f'    {inp} = inputs.get("{inp}")')
            lines.append(f'    if {inp} is None:')
            lines.append(f'        print(f"Error: Required input \'{inp}\' not found")')
            lines.append(f'        sys.exit(1)')
        return '\n'.join(lines)

    def generate_batch(self, hook_specs: List[Dict[str, Any]]) -> List[GeneratedHook]:
        """
        Generate multiple hook scripts.

        Args:
            hook_specs: List of hook specifications from LLM

        Returns:
            List of GeneratedHook objects
        """
        return [self.generate_from_llm_output(spec) for spec in hook_specs]

    def save_hook_to_file(self, hook: GeneratedHook, output_dir: Path) -> Path:
        """
        Save generated hook script to file.

        Args:
            hook: GeneratedHook object
            output_dir: Directory to save script

        Returns:
            Path to saved script file
        """
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)

        script_path = output_dir / hook.script_name
        with open(script_path, 'w') as f:
            f.write(hook.script_content)

        return script_path

    def generate_hook_registry(self, hooks: List[GeneratedHook], output_file: Path):
        """
        Generate a registry file documenting all hooks.

        Args:
            hooks: List of generated hooks
            output_file: Path to registry JSON file
        """
        registry = {
            "hooks": [
                {
                    "name": hook.script_name,
                    "type": hook.hook_type,
                    "description": hook.description,
                    "inputs": hook.inputs_required,
                    "outputs": hook.outputs_created
                }
                for hook in hooks
            ]
        }

        import json
        with open(output_file, 'w') as f:
            json.dump(registry, f, indent=2)

    def generate_lifecycle_hook(self, hook_spec: Dict[str, Any],
                                hook_point: str = "post_calculation") -> str:
        """
        Generate a hook compatible with Atomizer's lifecycle hook system (Phase 1).

        This creates a hook that integrates with HookManager and can be loaded
        from the plugins directory structure.

        Args:
            hook_spec: Hook specification from LLM (same as generate_from_llm_output)
            hook_point: Which lifecycle point to hook into (default: post_calculation)

        Returns:
            Complete Python module content with register_hooks() function

        Example output file: optimization_engine/plugins/post_calculation/weighted_objective.py
        """
        # Generate the core hook logic first
        generated_hook = self.generate_from_llm_output(hook_spec)

        action = hook_spec.get('action', '').lower()
        params = hook_spec.get('params', {})
        description = hook_spec.get('description', '')

        # Extract function name from hook type
        if 'weighted' in action:
            func_name = "weighted_objective_hook"
        elif 'formula' in action or 'custom' in action:
            output_name = params.get('output_name', 'custom_result')
            func_name = f"{output_name}_hook"
        elif 'constraint' in action:
            constraint_name = params.get('constraint_name', 'constraint')
            func_name = f"{constraint_name}_hook"
        elif 'comparison' in action:
            operation = params.get('operation', 'comparison')
            func_name = f"{operation}_hook"
        else:
            func_name = "custom_hook"

        # Build the lifecycle-compatible hook module
        module_content = f'''"""
{description}
Auto-generated lifecycle hook by Atomizer Phase 2.9

Hook Point: {hook_point}
Inputs: {', '.join(generated_hook.inputs_required)}
Outputs: {', '.join(generated_hook.outputs_created)}
"""

import logging
from typing import Dict, Any, Optional

logger = logging.getLogger(__name__)


def {func_name}(context: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    """
    {description}

    Args:
        context: Hook context containing:
            - trial_number: Current optimization trial
            - results: Dictionary with extracted FEA results
            - calculations: Dictionary with inline calculation results

    Returns:
        Dictionary with calculated values to add to context
    """
    logger.info(f"Executing {func_name} for trial {{context.get('trial_number', 'unknown')}}")

    # Extract inputs from context
    results = context.get('results', {{}})
    calculations = context.get('calculations', {{}})

'''

        # Add input extraction based on hook type
        for input_var in generated_hook.inputs_required:
            module_content += f'''    {input_var} = calculations.get('{input_var}') or results.get('{input_var}')
    if {input_var} is None:
        logger.error(f"Required input '{input_var}' not found in context")
        raise ValueError(f"Missing required input: {input_var}")

'''

        # Add the core calculation logic
        if 'weighted' in action:
            inputs = params.get('inputs', [])
            weights = params.get('weights', [])
            formula = params.get('formula', '')
            if not formula:
                terms = [f"{w} * {inp}" for w, inp in zip(weights, inputs)]
                formula = " + ".join(terms)

            module_content += f'''    # Calculate weighted objective
    result = {formula}

    logger.info(f"Weighted objective calculated: {{result:.6f}}")

    return {{
        'weighted_objective': result,
        '{generated_hook.outputs_created[0]}': result
    }}
'''

        elif 'formula' in action or 'custom' in action:
            formula = params.get('formula', '')
            output_name = params.get('output_name', 'custom_result')

            module_content += f'''    # Calculate using custom formula
    {output_name} = {formula}

    logger.info(f"{output_name} = {{{output_name}:.6f}}")

    return {{
        '{output_name}': {output_name}
    }}
'''

        elif 'constraint' in action:
            condition = params.get('condition', '')
            threshold = params.get('threshold', 1.0)
            constraint_name = params.get('constraint_name', 'constraint')

            module_content += f'''    # Check constraint
    value = {condition if condition else f"{generated_hook.inputs_required[0]} / {threshold}"}
    satisfied = value <= {threshold}
    violation = max(0.0, value - {threshold})

    status = "SATISFIED" if satisfied else "VIOLATED"
    logger.info(f"Constraint {{status}}: {{value:.6f}} (threshold: {threshold})")

    return {{
        '{constraint_name}': value,
        '{constraint_name}_satisfied': satisfied,
        '{constraint_name}_violation': violation
    }}
'''

        elif 'comparison' in action:
            operation = params.get('operation', 'ratio').lower()
            inputs = params.get('inputs', [])
            output_name = params.get('output_name', f"{operation}_result")

            if operation == 'ratio':
                formula = f"{inputs[0]} / {inputs[1]}"
            elif operation == 'difference':
                formula = f"{inputs[0]} - {inputs[1]}"
            elif operation == 'percent_difference':
                formula = f"(({inputs[0]} - {inputs[1]}) / {inputs[1]}) * 100.0"
            else:
                formula = f"{inputs[0]} / {inputs[1]}"

            module_content += f'''    # Calculate comparison
    result = {formula}

    logger.info(f"{output_name} = {{result:.6f}}")

    return {{
        '{output_name}': result
    }}
'''

        # Add registration function for HookManager
        module_content += f'''

def register_hooks(hook_manager):
    """
    Register this hook with the HookManager.

    This function is called automatically when the plugin is loaded.

    Args:
        hook_manager: The HookManager instance
    """
    hook_manager.register_hook(
        hook_point='{hook_point}',
        function={func_name},
        description="{description}",
        name="{func_name}",
        priority=100,
        enabled=True
    )
    logger.info(f"Registered {func_name} at {hook_point}")
'''

        return module_content


def main():
    """Test the hook generator."""
    print("=" * 80)
    print("Phase 2.9: Post-Processing Hook Generator Test")
    print("=" * 80)
    print()

    generator = HookGenerator()

    # Test cases from Phase 2.7 LLM output
    test_hooks = [
        {
            "action": "weighted_objective",
            "description": "Combine normalized stress (70%) and displacement (30%)",
            "params": {
                "inputs": ["norm_stress", "norm_disp"],
                "weights": [0.7, 0.3],
                "objective": "minimize"
            }
        },
        {
            "action": "custom_formula",
            "description": "Calculate safety factor",
            "params": {
                "inputs": ["max_stress", "yield_strength"],
                "formula": "yield_strength / max_stress",
                "output_name": "safety_factor"
            }
        },
        {
            "action": "comparison",
            "description": "Compare min force to average",
            "params": {
                "inputs": ["min_force", "avg_force"],
                "operation": "ratio",
                "output_name": "min_to_avg_ratio"
            }
        },
        {
            "action": "constraint_check",
            "description": "Check if stress is below yield",
            "params": {
                "inputs": ["max_stress", "yield_strength"],
                "condition": "max_stress / yield_strength",
                "threshold": 1.0,
                "constraint_name": "yield_constraint"
            }
        }
    ]

    print("Test Hook Generation:")
    print()

    for i, hook_spec in enumerate(test_hooks, 1):
        print(f"{i}. {hook_spec['description']}")
        hook = generator.generate_from_llm_output(hook_spec)
        print(f"   Script: {hook.script_name}")
        print(f"   Type: {hook.hook_type}")
        print(f"   Inputs: {', '.join(hook.inputs_required)}")
        print(f"   Outputs: {', '.join(hook.outputs_created)}")
        print()

    # Generate and save example hooks
    print("=" * 80)
    print("Example: Weighted Objective Hook Script")
    print("=" * 80)
    print()

    weighted_hook = generator.generate_from_llm_output(test_hooks[0])
    print(weighted_hook.script_content)

    # Save hooks to files
    output_dir = Path("generated_hooks")
    print("=" * 80)
    print(f"Saving generated hooks to: {output_dir}")
    print("=" * 80)
    print()

    generated_hooks = generator.generate_batch(test_hooks)
    for hook in generated_hooks:
        script_path = generator.save_hook_to_file(hook, output_dir)
        print(f"[OK] Saved: {script_path}")

    # Generate registry
    registry_path = output_dir / "hook_registry.json"
    generator.generate_hook_registry(generated_hooks, registry_path)
    print(f"[OK] Registry: {registry_path}")


if __name__ == '__main__':
    main()