""" Parallel FEA Training Data Generator ===================================== Runs FEA simulations on space-filling training points in parallel using multiple NX sessions. Results are stored in a shared SQLite database for thread-safe access. Hardware Recommendation (based on your i7-14700HX, 64GB RAM): - 2-3 parallel sessions recommended (each uses ~4-6 cores for Nastran) - ~30-50 min for 100 points with 3 sessions Usage: python run_training_fea.py --study uav_arm_optimization --workers 3 python run_training_fea.py --study uav_arm_optimization --workers 2 --start 50 """ import sys import json import argparse import shutil import sqlite3 import time from pathlib import Path from datetime import datetime from concurrent.futures import ProcessPoolExecutor, as_completed from multiprocessing import Manager import threading # Add project root to path project_root = Path(__file__).parent sys.path.insert(0, str(project_root)) def setup_worker_directory(study_dir: Path, worker_id: int) -> Path: """Create isolated working directory for each worker.""" worker_dir = study_dir / "1_setup" / f"worker_{worker_id}" model_src = study_dir / "1_setup" / "model" # Clean and recreate worker directory if worker_dir.exists(): shutil.rmtree(worker_dir) # Copy model files to worker directory shutil.copytree(model_src, worker_dir) return worker_dir def run_single_fea(args_tuple): """ Run a single FEA simulation. This function runs in a separate process. Args: args_tuple: (sample_idx, sample, worker_id, study_dir, db_path) Returns: dict with results or error """ sample_idx, sample, worker_id, study_dir_str, db_path_str = args_tuple study_dir = Path(study_dir_str) db_path = Path(db_path_str) # Import inside worker to avoid multiprocessing issues import sys sys.path.insert(0, str(Path(__file__).parent)) try: import config as atomizer_config except ImportError: atomizer_config = None from optimization_engine.nx.solver import NXSolver from optimization_engine.extractors.extract_displacement import extract_displacement from optimization_engine.extractors.extract_von_mises_stress import extract_solid_stress from optimization_engine.extractors.extract_frequency import extract_frequency from optimization_engine.extractors.extract_mass_from_expression import extract_mass_from_expression result = { 'sample_idx': sample_idx, 'worker_id': worker_id, 'params': sample, 'success': False, 'error': None, 'mass': None, 'frequency': None, 'max_displacement': None, 'max_stress': None } try: # Setup worker directory worker_dir = setup_worker_directory(study_dir, worker_id) # Initialize NX solver for this worker nx_solver = NXSolver( nastran_version=atomizer_config.NX_VERSION if atomizer_config else "2412", timeout=atomizer_config.NASTRAN_TIMEOUT if atomizer_config else 600, use_journal=True, enable_session_management=True, study_name=f"training_worker_{worker_id}" ) # Setup paths model_file = worker_dir / "Beam.prt" sim_file = worker_dir / "Beam_sim1.sim" print(f"[Worker {worker_id}] Sample {sample_idx}: {sample}") # Run simulation sim_result = nx_solver.run_simulation( sim_file=sim_file, working_dir=worker_dir, expression_updates=sample, solution_name=None # Solve all solutions ) if not sim_result['success']: result['error'] = sim_result.get('error', 'Unknown error') print(f"[Worker {worker_id}] Sample {sample_idx} FAILED: {result['error']}") return result op2_file = sim_result['op2_file'] # Extract results # Mass from CAD expression mass_kg = extract_mass_from_expression(model_file, expression_name="p173") result['mass'] = mass_kg * 1000.0 # Convert to grams # Frequency from modal analysis op2_modal = str(op2_file).replace("solution_1", "solution_2") freq_result = extract_frequency(op2_modal, subcase=1, mode_number=1) result['frequency'] = freq_result['frequency'] # Displacement from static analysis disp_result = extract_displacement(op2_file, subcase=1) result['max_displacement'] = disp_result['max_displacement'] # Stress from static analysis stress_result = extract_solid_stress(op2_file, subcase=1, element_type='cquad4') result['max_stress'] = stress_result['max_von_mises'] result['success'] = True print(f"[Worker {worker_id}] Sample {sample_idx} SUCCESS: mass={result['mass']:.1f}g, freq={result['frequency']:.1f}Hz") # Save to database immediately (thread-safe with retries) save_result_to_db(db_path, result) except Exception as e: result['error'] = str(e) print(f"[Worker {worker_id}] Sample {sample_idx} ERROR: {e}") return result def save_result_to_db(db_path: Path, result: dict, max_retries: int = 5): """Save result to SQLite database with retry logic for concurrency.""" for attempt in range(max_retries): try: conn = sqlite3.connect(str(db_path), timeout=30) cursor = conn.cursor() cursor.execute(""" INSERT OR REPLACE INTO training_results (sample_idx, params_json, success, error, mass, frequency, max_displacement, max_stress, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( result['sample_idx'], json.dumps(result['params']), result['success'], result['error'], result['mass'], result['frequency'], result['max_displacement'], result['max_stress'], datetime.now().isoformat() )) conn.commit() conn.close() return True except sqlite3.OperationalError as e: if "locked" in str(e) and attempt < max_retries - 1: time.sleep(0.5 * (attempt + 1)) # Exponential backoff else: raise return False def init_database(db_path: Path): """Initialize SQLite database for training results.""" conn = sqlite3.connect(str(db_path)) cursor = conn.cursor() cursor.execute(""" CREATE TABLE IF NOT EXISTS training_results ( sample_idx INTEGER PRIMARY KEY, params_json TEXT, success INTEGER, error TEXT, mass REAL, frequency REAL, max_displacement REAL, max_stress REAL, timestamp TEXT ) """) conn.commit() conn.close() def get_completed_samples(db_path: Path) -> set: """Get set of already completed sample indices.""" if not db_path.exists(): return set() conn = sqlite3.connect(str(db_path)) cursor = conn.cursor() try: cursor.execute("SELECT sample_idx FROM training_results WHERE success = 1") completed = {row[0] for row in cursor.fetchall()} except sqlite3.OperationalError: completed = set() conn.close() return completed def main(): parser = argparse.ArgumentParser(description='Run parallel FEA training data generation') parser.add_argument('--study', required=True, help='Study name (e.g., uav_arm_optimization)') parser.add_argument('--workers', type=int, default=2, help='Number of parallel workers (default: 2)') parser.add_argument('--start', type=int, default=0, help='Starting sample index (for resuming)') parser.add_argument('--end', type=int, default=None, help='Ending sample index (exclusive)') parser.add_argument('--resume', action='store_true', help='Skip already completed samples') args = parser.parse_args() # Setup paths study_dir = project_root / "studies" / args.study training_points_path = study_dir / "1_setup" / "training_points.json" db_path = study_dir / "2_results" / "training_data.db" if not study_dir.exists(): print(f"ERROR: Study not found: {study_dir}") return if not training_points_path.exists(): print(f"ERROR: Training points not found: {training_points_path}") print(f"Generate them first: python generate_training_data.py --study {args.study}") return # Load training points with open(training_points_path) as f: data = json.load(f) samples = data['samples'] total_samples = len(samples) # Apply start/end filtering end_idx = args.end if args.end else total_samples samples_to_run = [(i, samples[i]) for i in range(args.start, min(end_idx, total_samples))] print("=" * 70) print("PARALLEL FEA TRAINING DATA GENERATOR") print("=" * 70) print(f"Study: {args.study}") print(f"Total training points: {total_samples}") print(f"Processing range: {args.start} to {end_idx}") print(f"Parallel workers: {args.workers}") print(f"Database: {db_path}") print() # Initialize database db_path.parent.mkdir(exist_ok=True) init_database(db_path) # Check for already completed samples if args.resume: completed = get_completed_samples(db_path) samples_to_run = [(i, s) for i, s in samples_to_run if i not in completed] print(f"Already completed: {len(completed)} samples") print(f"Remaining to process: {len(samples_to_run)} samples") if not samples_to_run: print("All samples already completed!") return print() print(f"Starting {args.workers} parallel workers...") print("=" * 70) # Prepare worker arguments worker_args = [] for idx, (sample_idx, sample) in enumerate(samples_to_run): worker_id = idx % args.workers worker_args.append((sample_idx, sample, worker_id, str(study_dir), str(db_path))) # Track progress start_time = time.time() completed_count = 0 failed_count = 0 # Run with ProcessPoolExecutor with ProcessPoolExecutor(max_workers=args.workers) as executor: futures = {executor.submit(run_single_fea, arg): arg[0] for arg in worker_args} for future in as_completed(futures): sample_idx = futures[future] try: result = future.result() if result['success']: completed_count += 1 else: failed_count += 1 except Exception as e: print(f"Sample {sample_idx} raised exception: {e}") failed_count += 1 # Progress update total_done = completed_count + failed_count elapsed = time.time() - start_time rate = total_done / elapsed if elapsed > 0 else 0 remaining = len(samples_to_run) - total_done eta = remaining / rate / 60 if rate > 0 else 0 print(f"\nProgress: {total_done}/{len(samples_to_run)} ({completed_count} OK, {failed_count} failed)") print(f"Rate: {rate:.2f} samples/min | ETA: {eta:.1f} min") # Summary elapsed = time.time() - start_time print() print("=" * 70) print("TRAINING DATA GENERATION COMPLETE") print("=" * 70) print(f"Total time: {elapsed/60:.1f} minutes") print(f"Completed: {completed_count}/{len(samples_to_run)}") print(f"Failed: {failed_count}") print(f"Results saved to: {db_path}") print() print("Next steps:") print(" 1. Merge with existing optimization data:") print(f" python merge_training_data.py --study {args.study}") print(" 2. Retrain neural network:") print(f" python train_nn_surrogate.py --study {args.study}") if __name__ == "__main__": main()