**CRITICAL FIX**: FEM results were identical across trials **Root Cause**: The LLM runner was passing design_vars to simulation_runner(), which then passed them to NX Solver's expression_updates parameter. The solve journal tried to update hardcoded expression names (tip_thickness, support_angle) that don't exist in the beam model, causing the solver to ignore updates and use cached geometry. **Solution**: Match the working 50-trial optimization workflow: 1. model_updater() updates PRT file via NX import journal 2. Part file is closed/flushed to disk 3. simulation_runner() runs WITHOUT passing design_vars 4. NX solver loads SIM file, which references the updated PRT from disk 5. FEM regenerates with updated geometry automatically **Changes**: - llm_optimization_runner.py: Call simulation_runner() without arguments - run_optimization.py: Remove design_vars parameter from simulation_runner closure - import_expressions.py: Added theSession.Parts.CloseAll() to flush changes - test_phase_3_2_e2e.py: Fixed remaining variable name bugs **Test Results**: ✅ Trial 0: objective 7,315,679 ✅ Trial 1: objective 9,158.67 ✅ Trial 2: objective 7,655.28 FEM results are now DIFFERENT for each trial - optimization working correctly! **Remaining Issue**: LLM parsing "20 to 30 mm" as 0-1 range (separate fix needed)
494 lines
16 KiB
Python
494 lines
16 KiB
Python
"""
|
|
End-to-End Integration Test for Phase 3.2: LLM Mode
|
|
|
|
This test verifies the COMPLETE LLM mode workflow from natural language
|
|
to optimization results, using the REAL FEM solver.
|
|
|
|
Test Coverage:
|
|
1. Natural language request parsing
|
|
2. LLM workflow generation (requires API key or Claude Code)
|
|
3. Extractor auto-generation
|
|
4. Hook auto-generation
|
|
5. Model update (NX expressions)
|
|
6. Simulation run (actual FEM solve)
|
|
7. Result extraction from OP2 files
|
|
8. Optimization loop (3 trials)
|
|
9. Results saved to output directory
|
|
10. Plots generated (if enabled)
|
|
|
|
This is the validation test for Task 1.4 of Phase 3.2 Integration.
|
|
|
|
Author: Antoine Letarte
|
|
Date: 2025-11-17
|
|
Phase: 3.2 Week 1 - Task 1.4
|
|
"""
|
|
|
|
import sys
|
|
import json
|
|
import subprocess
|
|
import shutil
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
# Add parent directory to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
|
|
def test_e2e_llm_mode_with_api_key():
|
|
"""
|
|
End-to-end test of LLM mode with real FEM solver.
|
|
|
|
This test requires an Anthropic API key to work properly.
|
|
Set the ANTHROPIC_API_KEY environment variable before running.
|
|
"""
|
|
print("=" * 80)
|
|
print("END-TO-END INTEGRATION TEST: LLM Mode with Real FEM Solver")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
# Check for API key
|
|
import os
|
|
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
|
|
if not api_key:
|
|
print("[SKIP] No ANTHROPIC_API_KEY found in environment")
|
|
print()
|
|
print("This test requires a valid Anthropic API key to run.")
|
|
print("To run this test, set your API key:")
|
|
print(" Windows: set ANTHROPIC_API_KEY=your-key-here")
|
|
print(" Linux/Mac: export ANTHROPIC_API_KEY=your-key-here")
|
|
print()
|
|
print("Alternatively, you can run the manual test:")
|
|
print(" python examples/llm_mode_simple_example.py")
|
|
print()
|
|
return None # Skip test
|
|
|
|
print("[OK] API key found")
|
|
print()
|
|
|
|
# Natural language optimization request
|
|
request = """
|
|
Minimize mass while keeping maximum displacement below 5mm and
|
|
von Mises stress below 200 MPa.
|
|
|
|
Design variables:
|
|
- beam_half_core_thickness: 20 to 30 mm
|
|
- beam_face_thickness: 18 to 25 mm
|
|
|
|
Run 3 trials using TPE sampler.
|
|
"""
|
|
|
|
print("Natural Language Request:")
|
|
print(request)
|
|
print()
|
|
|
|
# Setup test environment
|
|
study_dir = Path(__file__).parent.parent / "studies" / "simple_beam_optimization"
|
|
prt_file = study_dir / "1_setup" / "model" / "Beam.prt"
|
|
sim_file = study_dir / "1_setup" / "model" / "Beam_sim1.sim"
|
|
output_dir = study_dir / "2_substudies" / f"test_e2e_3trials_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
|
|
# Verify files exist
|
|
if not prt_file.exists():
|
|
print(f"[FAIL] Part file not found: {prt_file}")
|
|
return False
|
|
|
|
if not sim_file.exists():
|
|
print(f"[FAIL] Simulation file not found: {sim_file}")
|
|
return False
|
|
|
|
print("Test Configuration:")
|
|
print(f" Part file: {prt_file}")
|
|
print(f" Simulation file: {sim_file}")
|
|
print(f" Output directory: {output_dir}")
|
|
print()
|
|
|
|
# Build command
|
|
python_exe = "c:/Users/antoi/anaconda3/envs/test_env/python.exe"
|
|
|
|
cmd = [
|
|
python_exe,
|
|
str(Path(__file__).parent.parent / "optimization_engine" / "run_optimization.py"),
|
|
"--llm", request,
|
|
"--prt", str(prt_file),
|
|
"--sim", str(sim_file),
|
|
"--output", str(output_dir.parent),
|
|
"--study-name", output_dir.name,
|
|
"--trials", "3",
|
|
"--api-key", api_key
|
|
]
|
|
|
|
print("Running LLM Mode Optimization...")
|
|
print("Command:")
|
|
print(" ".join(cmd[:7]) + " ...") # Don't print API key
|
|
print()
|
|
print("=" * 80)
|
|
print("OPTIMIZATION RUNNING - This will take several minutes...")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
# Run the command
|
|
start_time = datetime.now()
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
end_time = datetime.now()
|
|
|
|
duration = (end_time - start_time).total_seconds()
|
|
|
|
print()
|
|
print("=" * 80)
|
|
print(f"OPTIMIZATION COMPLETED in {duration:.1f} seconds ({duration/60:.1f} minutes)")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
# Check if optimization succeeded
|
|
if result.returncode != 0:
|
|
print("[FAIL] Optimization failed!")
|
|
print()
|
|
print("STDOUT:")
|
|
print(result.stdout)
|
|
print()
|
|
print("STDERR:")
|
|
print(result.stderr)
|
|
print()
|
|
return False
|
|
|
|
print("[OK] Optimization command completed successfully")
|
|
print()
|
|
|
|
# Verify outputs exist
|
|
print("Verifying outputs...")
|
|
|
|
checks = []
|
|
|
|
# 1. Output directory created
|
|
if output_dir.exists():
|
|
print(f" [OK] Output directory created: {output_dir}")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] Output directory not found: {output_dir}")
|
|
checks.append(False)
|
|
|
|
# 2. History file
|
|
history_file = output_dir / "optimization_history_incremental.json"
|
|
if history_file.exists():
|
|
print(f" [OK] History file created: {history_file.name}")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] History file not found: {history_file}")
|
|
checks.append(False)
|
|
|
|
# 3. Results file
|
|
results_file = output_dir / "optimization_results.json"
|
|
if results_file.exists():
|
|
print(f" [OK] Results file created: {results_file.name}")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] Results file not found: {results_file}")
|
|
checks.append(False)
|
|
|
|
# 4. Generated extractors directory
|
|
extractors_dir = output_dir / "generated_extractors"
|
|
if extractors_dir.exists():
|
|
print(f" [OK] Generated extractors directory: {extractors_dir.name}")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] Generated extractors not found: {extractors_dir}")
|
|
checks.append(False)
|
|
|
|
# 5. Audit trail (if implemented)
|
|
audit_dir = output_dir / "audit_trail"
|
|
if audit_dir.exists():
|
|
print(f" [OK] Audit trail directory: {audit_dir.name}")
|
|
else:
|
|
print(f" [INFO] Audit trail not found (may not be implemented yet)")
|
|
|
|
print()
|
|
|
|
# Verify history contents
|
|
if history_file.exists():
|
|
print("Verifying optimization history...")
|
|
|
|
try:
|
|
with open(history_file) as f:
|
|
history = json.load(f)
|
|
|
|
# Check number of trials
|
|
if len(history) == 3:
|
|
print(f" [OK] Correct number of trials: {len(history)}")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] Expected 3 trials, got {len(history)}")
|
|
checks.append(False)
|
|
|
|
# Check trial structure
|
|
required_fields = ["trial_number", "design_variables", "results", "objective"]
|
|
|
|
for i, trial in enumerate(history):
|
|
missing = [f for f in required_fields if f not in trial]
|
|
if not missing:
|
|
print(f" [OK] Trial {i+1} has all required fields")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] Trial {i+1} missing fields: {missing}")
|
|
checks.append(False)
|
|
|
|
# Check design variables
|
|
for i, trial in enumerate(history):
|
|
dvs = trial.get("design_variables", {})
|
|
if "beam_half_core_thickness" in dvs and "beam_face_thickness" in dvs:
|
|
print(f" [OK] Trial {i+1} has correct design variables")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] Trial {i+1} missing design variables")
|
|
checks.append(False)
|
|
|
|
# Check results
|
|
for i, trial in enumerate(history):
|
|
results = trial.get("results", {})
|
|
if results:
|
|
print(f" [OK] Trial {i+1} has results: {list(results.keys())}")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] Trial {i+1} has no results")
|
|
checks.append(False)
|
|
|
|
# Check objective values
|
|
for i, trial in enumerate(history):
|
|
obj = trial.get("objective")
|
|
if obj is not None and isinstance(obj, (int, float)):
|
|
print(f" [OK] Trial {i+1} objective: {obj:.6f}")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] Trial {i+1} invalid objective: {obj}")
|
|
checks.append(False)
|
|
|
|
print()
|
|
|
|
# Find best trial
|
|
best_trial = min(history, key=lambda x: x.get("objective", float('inf')))
|
|
print("Best Trial Found:")
|
|
print(f" Trial number: {best_trial['trial_number']}")
|
|
print(f" Design variables:")
|
|
for param, value in best_trial.get("design_variables", {}).items():
|
|
print(f" - {param}: {value:.4f} mm")
|
|
print(f" Objective value: {best_trial.get('objective', 'N/A'):.6f}")
|
|
print()
|
|
|
|
except Exception as e:
|
|
print(f" [FAIL] Error reading history file: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
checks.append(False)
|
|
|
|
# Verify results file
|
|
if results_file.exists():
|
|
print("Verifying results file...")
|
|
|
|
try:
|
|
with open(results_file) as f:
|
|
results = json.load(f)
|
|
|
|
if "best_params" in results and "best_value" in results:
|
|
print(f" [OK] Results file has correct structure")
|
|
print(f" Best value: {results['best_value']:.6f}")
|
|
checks.append(True)
|
|
else:
|
|
print(f" [FAIL] Results file missing fields")
|
|
checks.append(False)
|
|
|
|
except Exception as e:
|
|
print(f" [FAIL] Error reading results file: {e}")
|
|
checks.append(False)
|
|
|
|
print()
|
|
|
|
# Summary
|
|
print("=" * 80)
|
|
print("TEST SUMMARY")
|
|
print("=" * 80)
|
|
|
|
passed_count = sum(checks)
|
|
total_count = len(checks)
|
|
|
|
print(f"Checks passed: {passed_count}/{total_count}")
|
|
print()
|
|
|
|
all_passed = all(checks)
|
|
|
|
if all_passed:
|
|
print("[SUCCESS] END-TO-END TEST PASSED!")
|
|
print()
|
|
print("Verified:")
|
|
print(" [OK] Natural language parsed by LLM")
|
|
print(" [OK] Extractors auto-generated")
|
|
print(" [OK] Hooks auto-generated")
|
|
print(" [OK] Model updated with design variables")
|
|
print(" [OK] FEM simulations executed")
|
|
print(" [OK] Results extracted from OP2 files")
|
|
print(" [OK] 3 trials completed successfully")
|
|
print(" [OK] Optimization history saved")
|
|
print(" [OK] Best design identified and saved")
|
|
print()
|
|
print(f"Results saved to: {output_dir}")
|
|
print()
|
|
print("Task 1.4 Status: [OK] COMPLETE")
|
|
print()
|
|
else:
|
|
print("[FAIL] END-TO-END TEST FAILED")
|
|
print()
|
|
print("Some checks did not pass. See details above.")
|
|
print()
|
|
|
|
return all_passed
|
|
|
|
|
|
def test_e2e_llm_mode_without_api_key():
|
|
"""
|
|
Test that provides helpful error message when API key is missing.
|
|
|
|
This is expected to fail gracefully with a clear message.
|
|
"""
|
|
print("=" * 80)
|
|
print("TEST: LLM Mode without API Key (Expected Failure)")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
request = "Minimize mass. Design variable: beam_half_core_thickness (20-30mm). Run 2 trials."
|
|
|
|
study_dir = Path(__file__).parent.parent / "studies" / "simple_beam_optimization"
|
|
prt_file = study_dir / "1_setup" / "model" / "Beam.prt"
|
|
sim_file = study_dir / "1_setup" / "model" / "Beam_sim1.sim"
|
|
output_dir = study_dir / "2_substudies" / "test_no_api_key"
|
|
|
|
python_exe = "c:/Users/antoi/anaconda3/envs/test_env/python.exe"
|
|
|
|
cmd = [
|
|
python_exe,
|
|
str(Path(__file__).parent.parent / "optimization_engine" / "run_optimization.py"),
|
|
"--llm", request,
|
|
"--prt", str(prt_file),
|
|
"--sim", str(sim_file),
|
|
"--output", str(output_dir.parent),
|
|
"--study-name", output_dir.name,
|
|
"--trials", "2"
|
|
# NOTE: No API key provided
|
|
]
|
|
|
|
print("Running without API key...")
|
|
print()
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
# Should fail with helpful error message
|
|
if result.returncode != 0:
|
|
print("[OK] Command failed as expected (no API key)")
|
|
|
|
# Check for helpful error message
|
|
if "LLM analysis failed" in result.stdout or "LLM analysis failed" in result.stderr:
|
|
print("[OK] Error message mentions LLM analysis failure")
|
|
return True
|
|
elif "empty workflow" in result.stdout.lower() or "empty workflow" in result.stderr.lower():
|
|
print("[OK] Error message indicates empty workflow from LLM")
|
|
return True
|
|
else:
|
|
print("[WARN] Error message could be more helpful")
|
|
print()
|
|
print("STDOUT:")
|
|
print(result.stdout[-500:]) # Last 500 chars
|
|
print()
|
|
return True # Still pass - it failed gracefully
|
|
else:
|
|
print("[FAIL] Command succeeded without API key (unexpected)")
|
|
print()
|
|
print("This suggests the LLMWorkflowAnalyzer fallback may be too permissive.")
|
|
print()
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Run all end-to-end tests."""
|
|
print()
|
|
print("=" * 80)
|
|
print("PHASE 3.2 - TASK 1.4: END-TO-END INTEGRATION TESTS")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
print("This test suite validates the complete LLM mode workflow.")
|
|
print()
|
|
|
|
tests = [
|
|
("E2E with API Key", test_e2e_llm_mode_with_api_key),
|
|
("E2E without API Key (graceful failure)", test_e2e_llm_mode_without_api_key),
|
|
]
|
|
|
|
results = []
|
|
for test_name, test_func in tests:
|
|
print()
|
|
print("=" * 80)
|
|
result = test_func()
|
|
results.append((test_name, result))
|
|
print()
|
|
|
|
# Summary
|
|
print()
|
|
print("=" * 80)
|
|
print("END-TO-END TEST SUMMARY")
|
|
print("=" * 80)
|
|
|
|
for test_name, result in results:
|
|
if result is None:
|
|
status = "[SKIP]"
|
|
elif result:
|
|
status = "[PASS]"
|
|
else:
|
|
status = "[FAIL]"
|
|
|
|
print(f"{status}: {test_name}")
|
|
|
|
print()
|
|
|
|
# Filter out skipped tests
|
|
actual_results = [(name, res) for name, res in results if res is not None]
|
|
|
|
if not actual_results:
|
|
print("[INFO] All tests were skipped")
|
|
print()
|
|
print("To run the E2E test with API key:")
|
|
print(" 1. Set ANTHROPIC_API_KEY environment variable")
|
|
print(" 2. Run: python tests/test_phase_3_2_e2e.py")
|
|
print()
|
|
print("Alternatively, run the manual example:")
|
|
print(" python examples/llm_mode_simple_example.py")
|
|
print()
|
|
return None
|
|
|
|
all_passed = all(res for _, res in actual_results)
|
|
|
|
if all_passed:
|
|
print("[SUCCESS] ALL END-TO-END TESTS PASSED!")
|
|
print()
|
|
print("Task 1.4: End-to-End Integration Test - COMPLETE")
|
|
print()
|
|
print("Phase 3.2 Week 1 Status: 100% COMPLETE")
|
|
print(" [OK] Task 1.2: Wire LLMOptimizationRunner to production")
|
|
print(" [OK] Task 1.3: Create minimal working example")
|
|
print(" [OK] Task 1.4: End-to-end integration test")
|
|
print()
|
|
print("Next: Week 2 - Robustness & Safety")
|
|
print()
|
|
else:
|
|
failed_count = sum(1 for _, res in actual_results if not res)
|
|
print(f"[WARN] {failed_count} TEST(S) FAILED")
|
|
print()
|
|
|
|
return all_passed
|
|
|
|
|
|
if __name__ == '__main__':
|
|
success = main()
|
|
|
|
if success is None:
|
|
sys.exit(0) # Skipped
|
|
else:
|
|
sys.exit(0 if success else 1)
|