Atomizer/optimization_engine/llm_workflow_analyzer.py

"""
LLM-Powered Workflow Analyzer - Phase 2.7

Uses Claude (LLM) to intelligently analyze user requests instead of dumb regex patterns.
This is what we should have built from the start!

Integration modes:
1. Claude Code Skill (preferred for development) - uses Claude Code's built-in AI
2. Anthropic API (fallback for standalone) - requires API key

Author: Atomizer Development Team
Version: 0.2.0 (Phase 2.7)
Last Updated: 2025-01-16
"""

import json
import os
import subprocess
import tempfile
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from pathlib import Path

try:
    from anthropic import Anthropic
    HAS_ANTHROPIC = True
except ImportError:
    HAS_ANTHROPIC = False


@dataclass
class WorkflowStep:
    """A single step in an optimization workflow."""
    action: str
    domain: str
    params: Dict[str, Any]
    step_type: str  # 'engineering_feature', 'inline_calculation', 'post_processing_hook'
    priority: int = 0


class LLMWorkflowAnalyzer:
    """
    Uses Claude LLM to intelligently analyze optimization requests.
    NO MORE DUMB REGEX PATTERNS!

    Integration modes:
    1. Claude Code integration (use_claude_code=True) - preferred for development
    2. Direct API (api_key provided) - for standalone execution
    3. Fallback heuristics (neither provided) - basic pattern matching
    """

    def __init__(self, api_key: Optional[str] = None, use_claude_code: bool = True):
        """
        Initialize LLM analyzer.

        Args:
            api_key: Anthropic API key (optional, for standalone mode)
            use_claude_code: Use Claude Code skill for analysis (default: True)
        """
        self.use_claude_code = use_claude_code
        self.client = None

        if api_key and HAS_ANTHROPIC:
            self.client = Anthropic(api_key=api_key)
            self.use_claude_code = False  # Prefer direct API if key provided

    def analyze_request(self, user_request: str) -> Dict[str, Any]:
        """
        Use Claude to analyze the request and extract workflow steps intelligently.

        Returns:
            {
                'engineering_features': [...],
                'inline_calculations': [...],
                'post_processing_hooks': [...],
                'optimization': {...}
            }
        """

        prompt = f"""You are analyzing a structural optimization request for the Atomizer system.

USER REQUEST:
{user_request}

Your task: Break this down into atomic workflow steps and classify each step.

STEP TYPES:
1. ENGINEERING FEATURES - Complex FEA/CAE operations needing specialized knowledge:
   - Extract results from OP2 files (displacement, stress, strain, element forces, etc.)
   - Modify FEA properties (CBUSH/CBAR stiffness, PCOMP layup, material properties)
   - Run simulations (SOL101, SOL103, etc.)
   - Create/modify geometry in NX

2. INLINE CALCULATIONS - Simple math operations (auto-generate Python):
   - Calculate average, min, max, sum
   - Compare values, compute ratios
   - Statistical operations

3. POST-PROCESSING HOOKS - Custom calculations between FEA steps:
   - Custom objective functions combining multiple results
   - Data transformations
   - Filtering/aggregation logic

4. OPTIMIZATION - Algorithm and configuration:
   - Optuna, genetic algorithm, etc.
   - Design variables and their ranges
   - Multi-objective vs single objective

IMPORTANT DISTINCTIONS:
- "extract forces from 1D elements" → ENGINEERING FEATURE (needs pyNastran/OP2 knowledge)
- "find average of forces" → INLINE CALCULATION (simple Python: sum/len)
- "compare max to average and create metric" → POST-PROCESSING HOOK (custom logic)
- Element forces vs Reaction forces are DIFFERENT (element internal forces vs nodal reactions)
- CBUSH vs CBAR are different element types with different properties

Return a JSON object with this EXACT structure:
{{
    "engineering_features": [
        {{
            "action": "extract_1d_element_forces",
            "domain": "result_extraction",
            "description": "Extract element forces from 1D elements (CBAR/CBUSH) in Z direction",
            "params": {{
                "element_types": ["CBAR", "CBUSH"],
                "result_type": "element_force",
                "direction": "Z"
            }}
        }}
    ],
    "inline_calculations": [
        {{
            "action": "calculate_average",
            "description": "Calculate average of extracted forces",
            "params": {{
                "input": "forces_z",
                "operation": "mean"
            }}
        }},
        {{
            "action": "find_minimum",
            "description": "Find minimum force value",
            "params": {{
                "input": "forces_z",
                "operation": "min"
            }}
        }}
    ],
    "post_processing_hooks": [
        {{
            "action": "custom_objective_metric",
            "description": "Compare minimum to average and create objective metric",
            "params": {{
                "inputs": ["min_force", "avg_force"],
                "formula": "min_force / avg_force",
                "objective": "minimize"
            }}
        }}
    ],
    "optimization": {{
        "algorithm": "genetic_algorithm",
        "design_variables": [
            {{
                "parameter": "cbar_stiffness_x",
                "type": "FEA_property",
                "element_type": "CBAR"
            }}
        ],
        "objectives": [
            {{
                "type": "minimize",
                "target": "custom_objective_metric"
            }}
        ]
    }}
}}

Analyze the request and return ONLY the JSON, no other text."""

        if self.client:
            # Use Claude API
            response = self.client.messages.create(
                model="claude-sonnet-4-20250514",
                max_tokens=4000,
                messages=[{
                    "role": "user",
                    "content": prompt
                }]
            )

            # Extract JSON from response
            content = response.content[0].text

            # Find JSON in response
            start = content.find('{')
            end = content.rfind('}') + 1
            json_str = content[start:end]

            return json.loads(json_str)
        else:
            # Fallback: return a template showing expected format
            return {
                "engineering_features": [],
                "inline_calculations": [],
                "post_processing_hooks": [],
                "optimization": {},
                "error": "No API key provided - cannot analyze request"
            }

    def to_workflow_steps(self, analysis: Dict[str, Any]) -> List[WorkflowStep]:
        """Convert LLM analysis to WorkflowStep objects."""
        steps = []
        priority = 0

        # Add engineering features
        for feature in analysis.get('engineering_features', []):
            steps.append(WorkflowStep(
                action=feature['action'],
                domain=feature['domain'],
                params=feature.get('params', {}),
                step_type='engineering_feature',
                priority=priority
            ))
            priority += 1

        # Add inline calculations
        for calc in analysis.get('inline_calculations', []):
            steps.append(WorkflowStep(
                action=calc['action'],
                domain='calculation',
                params=calc.get('params', {}),
                step_type='inline_calculation',
                priority=priority
            ))
            priority += 1

        # Add post-processing hooks
        for hook in analysis.get('post_processing_hooks', []):
            steps.append(WorkflowStep(
                action=hook['action'],
                domain='post_processing',
                params=hook.get('params', {}),
                step_type='post_processing_hook',
                priority=priority
            ))
            priority += 1

        # Add optimization
        opt = analysis.get('optimization', {})
        if opt:
            steps.append(WorkflowStep(
                action='optimize',
                domain='optimization',
                params=opt,
                step_type='engineering_feature',
                priority=priority
            ))

        return steps

    def get_summary(self, analysis: Dict[str, Any]) -> str:
        """Generate human-readable summary of the analysis."""
        lines = []
        lines.append("LLM Workflow Analysis")
        lines.append("=" * 80)
        lines.append("")

        # Engineering features
        eng_features = analysis.get('engineering_features', [])
        lines.append(f"Engineering Features (Need Research): {len(eng_features)}")
        for feature in eng_features:
            lines.append(f"  - {feature['action']}")
            lines.append(f"    Description: {feature.get('description', 'N/A')}")
            lines.append(f"    Domain: {feature['domain']}")
        lines.append("")

        # Inline calculations
        inline_calcs = analysis.get('inline_calculations', [])
        lines.append(f"Inline Calculations (Auto-Generate): {len(inline_calcs)}")
        for calc in inline_calcs:
            lines.append(f"  - {calc['action']}")
            lines.append(f"    Description: {calc.get('description', 'N/A')}")
        lines.append("")

        # Post-processing hooks
        hooks = analysis.get('post_processing_hooks', [])
        lines.append(f"Post-Processing Hooks (Generate Middleware): {len(hooks)}")
        for hook in hooks:
            lines.append(f"  - {hook['action']}")
            lines.append(f"    Description: {hook.get('description', 'N/A')}")
            if 'formula' in hook.get('params', {}):
                lines.append(f"    Formula: {hook['params']['formula']}")
        lines.append("")

        # Optimization
        opt = analysis.get('optimization', {})
        if opt:
            lines.append("Optimization Configuration:")
            lines.append(f"  Algorithm: {opt.get('algorithm', 'N/A')}")
            if 'design_variables' in opt:
                lines.append(f"  Design Variables: {len(opt['design_variables'])}")
                for var in opt['design_variables']:
                    lines.append(f"    - {var.get('parameter', 'N/A')} ({var.get('type', 'N/A')})")
            if 'objectives' in opt:
                lines.append(f"  Objectives:")
                for obj in opt['objectives']:
                    lines.append(f"    - {obj.get('type', 'N/A')} {obj.get('target', 'N/A')}")
        lines.append("")

        # Summary
        total_steps = len(eng_features) + len(inline_calcs) + len(hooks) + (1 if opt else 0)
        lines.append(f"Total Steps: {total_steps}")
        lines.append(f"  Engineering: {len(eng_features)} (need research/documentation)")
        lines.append(f"  Simple Math: {len(inline_calcs)} (auto-generate Python)")
        lines.append(f"  Hooks: {len(hooks)} (generate middleware)")
        lines.append(f"  Optimization: {1 if opt else 0}")

        return "\n".join(lines)


def main():
    """Test the LLM workflow analyzer."""
    import os

    print("=" * 80)
    print("LLM-Powered Workflow Analyzer Test")
    print("=" * 80)
    print()

    # Test request
    request = """I want to extract forces in direction Z of all the 1D elements and find the average of it,
then find the minimum value and compare it to the average, then assign it to a objective metric that needs to be minimized.

I want to iterate on the FEA properties of the Cbar element stiffness in X to make the objective function minimized.

I want to use genetic algorithm to iterate and optimize this"""

    print("User Request:")
    print(request)
    print()
    print("=" * 80)
    print()

    # Get API key from environment
    api_key = os.environ.get('ANTHROPIC_API_KEY')

    if not api_key:
        print("WARNING: No ANTHROPIC_API_KEY found in environment")
        print("Set it with: export ANTHROPIC_API_KEY=your_key_here")
        print()
        print("Showing expected output format instead...")
        print()

        # Show what the output should look like
        expected = {
            "engineering_features": [
                {
                    "action": "extract_1d_element_forces",
                    "domain": "result_extraction",
                    "description": "Extract element forces from 1D elements in Z direction",
                    "params": {
                        "element_types": ["CBAR"],
                        "result_type": "element_force",
                        "direction": "Z"
                    }
                }
            ],
            "inline_calculations": [
                {
                    "action": "calculate_average",
                    "description": "Calculate average of extracted forces",
                    "params": {"input": "forces_z", "operation": "mean"}
                },
                {
                    "action": "find_minimum",
                    "description": "Find minimum force value",
                    "params": {"input": "forces_z", "operation": "min"}
                }
            ],
            "post_processing_hooks": [
                {
                    "action": "custom_objective_metric",
                    "description": "Compare minimum to average",
                    "params": {
                        "inputs": ["min_force", "avg_force"],
                        "formula": "min_force / avg_force",
                        "objective": "minimize"
                    }
                }
            ],
            "optimization": {
                "algorithm": "genetic_algorithm",
                "design_variables": [
                    {"parameter": "cbar_stiffness_x", "type": "FEA_property"}
                ],
                "objectives": [{"type": "minimize", "target": "custom_objective_metric"}]
            }
        }

        analyzer = LLMWorkflowAnalyzer()
        print(analyzer.get_summary(expected))
        return

    # Use LLM to analyze
    analyzer = LLMWorkflowAnalyzer(api_key=api_key)

    print("Calling Claude to analyze request...")
    print()

    analysis = analyzer.analyze_request(request)

    print("LLM Analysis Complete!")
    print()
    print(analyzer.get_summary(analysis))

    print()
    print("=" * 80)
    print("Raw JSON Analysis:")
    print("=" * 80)
    print(json.dumps(analysis, indent=2))


if __name__ == '__main__':
    main()