""" NX Session Manager - Prevents conflicts when multiple optimizations run concurrently. This module ensures that NX sessions don't interfere with each other when: 1. Multiple optimizations are running simultaneously 2. User has NX open for manual work 3. Multiple Atomizer instances are running Key Features: - Session detection (running NX processes) - File locking (prevents concurrent access to same model) - Process queuing (waits if NX is busy with another optimization) - Batch mode isolation (uses dedicated NX instances) """ import psutil import time import os from pathlib import Path from typing import Optional, List from contextlib import contextmanager from dataclasses import dataclass import json # Platform-specific imports if os.name != 'nt': # Unix/Linux/Mac import fcntl else: # Windows import msvcrt @dataclass class NXSessionInfo: """Information about a running NX session.""" pid: int name: str cmdline: List[str] working_dir: Optional[str] create_time: float class NXSessionManager: """ Manages NX sessions to prevent conflicts between concurrent optimizations. Strategy: 1. Detect running NX processes 2. Use file locks to ensure exclusive model access 3. Queue optimization trials if NX is busy 4. Isolate batch mode sessions from interactive sessions """ def __init__( self, lock_dir: Optional[Path] = None, max_concurrent_sessions: int = 1, wait_timeout: int = 300, verbose: bool = True ): """ Initialize session manager. Args: lock_dir: Directory for lock files (default: temp) max_concurrent_sessions: Maximum concurrent NX optimization sessions wait_timeout: Maximum wait time for NX to become available (seconds) verbose: Print session management info """ self.lock_dir = Path(lock_dir) if lock_dir else Path.home() / ".atomizer" / "locks" self.lock_dir.mkdir(parents=True, exist_ok=True) self.max_concurrent = max_concurrent_sessions self.wait_timeout = wait_timeout self.verbose = verbose # Track active sessions self.session_lock_file = self.lock_dir / "nx_sessions.json" self.global_lock_file = self.lock_dir / "nx_global.lock" def get_running_nx_sessions(self) -> List[NXSessionInfo]: """ Detect all running NX processes. Returns: List of NX session info objects """ nx_sessions = [] for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'cwd', 'create_time']): try: name = proc.info['name'] # Check if this is an NX process if name and any(nx_exe in name.lower() for nx_exe in ['ugraf.exe', 'nx.exe', 'run_journal.exe', 'nxmgr_inter.exe']): session = NXSessionInfo( pid=proc.info['pid'], name=name, cmdline=proc.info['cmdline'] or [], working_dir=proc.info['cwd'], create_time=proc.info['create_time'] ) nx_sessions.append(session) except (psutil.NoSuchProcess, psutil.AccessDenied): continue return nx_sessions def is_nx_interactive_session_running(self) -> bool: """ Check if user has NX open interactively (not batch mode). Returns: True if interactive NX session detected """ sessions = self.get_running_nx_sessions() for session in sessions: # Interactive sessions are typically ugraf.exe or nx.exe without -batch if 'ugraf.exe' in session.name.lower() or 'nx.exe' in session.name.lower(): # Check if it's not a batch session if '-batch' not in ' '.join(session.cmdline).lower(): return True return False @contextmanager def acquire_model_lock(self, model_file: Path, study_name: str): """ Acquire exclusive lock for a specific model file. This prevents two optimizations from modifying the same model simultaneously. Args: model_file: Path to the model file (.prt) study_name: Name of the study (for logging) Yields: Lock context Raises: TimeoutError: If lock cannot be acquired within timeout """ # Create lock file for this specific model model_hash = str(abs(hash(str(model_file)))) lock_file = self.lock_dir / f"model_{model_hash}.lock" if self.verbose: print(f"\n[SESSION MGR] Acquiring lock for model: {model_file.name}") lock_fd = None start_time = time.time() try: # Try to acquire lock with timeout while True: try: lock_fd = open(lock_file, 'w') # Try to acquire exclusive lock (non-blocking) if os.name == 'nt': # Windows import msvcrt msvcrt.locking(lock_fd.fileno(), msvcrt.LK_NBLCK, 1) else: # Unix fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) # Write lock info lock_info = { 'study_name': study_name, 'model_file': str(model_file), 'pid': os.getpid(), 'timestamp': time.time() } lock_fd.write(json.dumps(lock_info, indent=2)) lock_fd.flush() if self.verbose: print(f"[SESSION MGR] Lock acquired successfully") break # Lock acquired! except (IOError, OSError): # Lock is held by another process elapsed = time.time() - start_time if elapsed > self.wait_timeout: raise TimeoutError( f"Could not acquire model lock for {model_file.name} " f"after {self.wait_timeout}s. Another optimization may be using this model." ) if self.verbose and elapsed % 10 == 0: # Print every 10 seconds print(f"[SESSION MGR] Waiting for model lock... ({elapsed:.0f}s)") time.sleep(1) yield # Lock acquired, user code runs here finally: # Release lock if lock_fd: try: if os.name == 'nt': import msvcrt msvcrt.locking(lock_fd.fileno(), msvcrt.LK_UNLCK, 1) else: fcntl.flock(lock_fd, fcntl.LOCK_UN) lock_fd.close() if self.verbose: print(f"[SESSION MGR] Lock released for model: {model_file.name}") except Exception as e: if self.verbose: print(f"[SESSION MGR] Warning: Failed to release lock: {e}") # Clean up lock file try: if lock_file.exists(): lock_file.unlink() except: pass @contextmanager def acquire_nx_session(self, study_name: str): """ Acquire permission to run an NX batch session. This ensures we don't exceed max_concurrent_sessions and don't interfere with interactive NX sessions. Args: study_name: Name of the study (for logging) Yields: Session context Raises: TimeoutError: If session cannot be acquired within timeout """ if self.verbose: print(f"\n[SESSION MGR] Requesting NX batch session for study: {study_name}") # Check for interactive NX sessions if self.is_nx_interactive_session_running(): if self.verbose: print(f"[SESSION MGR] WARNING: Interactive NX session detected!") print(f"[SESSION MGR] Batch operations may conflict with user's work.") print(f"[SESSION MGR] Recommend closing interactive NX before running optimization.") start_time = time.time() session_acquired = False try: # Wait for available session slot while True: active_sessions = self._count_active_sessions() if active_sessions < self.max_concurrent: # Register this session self._register_session(study_name) session_acquired = True if self.verbose: print(f"[SESSION MGR] NX session acquired (active: {active_sessions + 1}/{self.max_concurrent})") break # Check timeout elapsed = time.time() - start_time if elapsed > self.wait_timeout: raise TimeoutError( f"Could not acquire NX session after {self.wait_timeout}s. " f"Max concurrent sessions ({self.max_concurrent}) reached." ) if self.verbose and elapsed % 10 == 0: print(f"[SESSION MGR] Waiting for NX session... ({elapsed:.0f}s, active: {active_sessions})") time.sleep(2) yield # Session acquired, user code runs here finally: # Unregister session if session_acquired: self._unregister_session(study_name) if self.verbose: print(f"[SESSION MGR] NX session released for study: {study_name}") def _count_active_sessions(self) -> int: """Count active optimization sessions.""" if not self.session_lock_file.exists(): return 0 try: with open(self.session_lock_file, 'r') as f: sessions = json.load(f) # Clean up stale sessions (processes that no longer exist) active_sessions = [] for session in sessions: pid = session.get('pid') if pid and psutil.pid_exists(pid): active_sessions.append(session) # Update file with only active sessions with open(self.session_lock_file, 'w') as f: json.dump(active_sessions, f, indent=2) return len(active_sessions) except Exception as e: if self.verbose: print(f"[SESSION MGR] Warning: Failed to count sessions: {e}") return 0 def _register_session(self, study_name: str): """Register a new optimization session.""" sessions = [] if self.session_lock_file.exists(): try: with open(self.session_lock_file, 'r') as f: sessions = json.load(f) except: sessions = [] # Add new session sessions.append({ 'study_name': study_name, 'pid': os.getpid(), 'start_time': time.time(), 'timestamp': time.time() }) # Save with open(self.session_lock_file, 'w') as f: json.dump(sessions, f, indent=2) def _unregister_session(self, study_name: str): """Unregister an optimization session.""" if not self.session_lock_file.exists(): return try: with open(self.session_lock_file, 'r') as f: sessions = json.load(f) # Remove this session pid = os.getpid() sessions = [s for s in sessions if s.get('pid') != pid] # Save with open(self.session_lock_file, 'w') as f: json.dump(sessions, f, indent=2) except Exception as e: if self.verbose: print(f"[SESSION MGR] Warning: Failed to unregister session: {e}") def cleanup_stale_locks(self): """Remove lock files from crashed processes.""" if not self.lock_dir.exists(): return cleaned = 0 for lock_file in self.lock_dir.glob("*.lock"): try: # Try to read lock info with open(lock_file, 'r') as f: lock_info = json.load(f) pid = lock_info.get('pid') # Check if process still exists if pid and not psutil.pid_exists(pid): lock_file.unlink() cleaned += 1 if self.verbose: print(f"[SESSION MGR] Cleaned stale lock: {lock_file.name}") except Exception: # If we can't read lock file, it might be corrupted - remove it try: lock_file.unlink() cleaned += 1 except: pass if self.verbose and cleaned > 0: print(f"[SESSION MGR] Cleaned {cleaned} stale lock file(s)") def get_status_report(self) -> str: """Generate status report of NX sessions and locks.""" report = "\n" + "="*70 + "\n" report += " NX SESSION MANAGER STATUS\n" report += "="*70 + "\n" # Running NX sessions nx_sessions = self.get_running_nx_sessions() report += f"\n Running NX Processes: {len(nx_sessions)}\n" for session in nx_sessions: report += f" PID {session.pid}: {session.name}\n" if session.working_dir: report += f" Working dir: {session.working_dir}\n" # Interactive session warning if self.is_nx_interactive_session_running(): report += f"\n WARNING: Interactive NX session detected!\n" report += f" Batch operations may conflict with user's work.\n" # Active optimization sessions active_count = self._count_active_sessions() report += f"\n Active Optimization Sessions: {active_count}/{self.max_concurrent}\n" if self.session_lock_file.exists(): try: with open(self.session_lock_file, 'r') as f: sessions = json.load(f) for session in sessions: study = session.get('study_name', 'Unknown') pid = session.get('pid', 'Unknown') report += f" {study} (PID {pid})\n" except: pass # Lock files lock_files = list(self.lock_dir.glob("*.lock")) report += f"\n Active Lock Files: {len(lock_files)}\n" report += "="*70 + "\n" return report