Files
Atomizer/optimization_engine/nx/session_manager.py
Anto01 eabcc4c3ca refactor: Major reorganization of optimization_engine module structure
BREAKING CHANGE: Module paths have been reorganized for better maintainability.
Backwards compatibility aliases with deprecation warnings are provided.

New Structure:
- core/           - Optimization runners (runner, intelligent_optimizer, etc.)
- processors/     - Data processing
  - surrogates/   - Neural network surrogates
- nx/             - NX/Nastran integration (solver, updater, session_manager)
- study/          - Study management (creator, wizard, state, reset)
- reporting/      - Reports and analysis (visualizer, report_generator)
- config/         - Configuration management (manager, builder)
- utils/          - Utilities (logger, auto_doc, etc.)
- future/         - Research/experimental code

Migration:
- ~200 import changes across 125 files
- All __init__.py files use lazy loading to avoid circular imports
- Backwards compatibility layer supports old import paths with warnings
- All existing functionality preserved

To migrate existing code:
  OLD: from optimization_engine.nx_solver import NXSolver
  NEW: from optimization_engine.nx.solver import NXSolver

  OLD: from optimization_engine.runner import OptimizationRunner
  NEW: from optimization_engine.core.runner import OptimizationRunner

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 12:30:59 -05:00

443 lines
15 KiB
Python

"""
NX Session Manager - Prevents conflicts when multiple optimizations run concurrently.
This module ensures that NX sessions don't interfere with each other when:
1. Multiple optimizations are running simultaneously
2. User has NX open for manual work
3. Multiple Atomizer instances are running
Key Features:
- Session detection (running NX processes)
- File locking (prevents concurrent access to same model)
- Process queuing (waits if NX is busy with another optimization)
- Batch mode isolation (uses dedicated NX instances)
"""
import psutil
import time
import os
from pathlib import Path
from typing import Optional, List
from contextlib import contextmanager
from dataclasses import dataclass
import json
# Platform-specific imports
if os.name != 'nt': # Unix/Linux/Mac
import fcntl
else: # Windows
import msvcrt
@dataclass
class NXSessionInfo:
"""Information about a running NX session."""
pid: int
name: str
cmdline: List[str]
working_dir: Optional[str]
create_time: float
class NXSessionManager:
"""
Manages NX sessions to prevent conflicts between concurrent optimizations.
Strategy:
1. Detect running NX processes
2. Use file locks to ensure exclusive model access
3. Queue optimization trials if NX is busy
4. Isolate batch mode sessions from interactive sessions
"""
def __init__(
self,
lock_dir: Optional[Path] = None,
max_concurrent_sessions: int = 1,
wait_timeout: int = 300,
verbose: bool = True
):
"""
Initialize session manager.
Args:
lock_dir: Directory for lock files (default: temp)
max_concurrent_sessions: Maximum concurrent NX optimization sessions
wait_timeout: Maximum wait time for NX to become available (seconds)
verbose: Print session management info
"""
self.lock_dir = Path(lock_dir) if lock_dir else Path.home() / ".atomizer" / "locks"
self.lock_dir.mkdir(parents=True, exist_ok=True)
self.max_concurrent = max_concurrent_sessions
self.wait_timeout = wait_timeout
self.verbose = verbose
# Track active sessions
self.session_lock_file = self.lock_dir / "nx_sessions.json"
self.global_lock_file = self.lock_dir / "nx_global.lock"
def get_running_nx_sessions(self) -> List[NXSessionInfo]:
"""
Detect all running NX processes.
Returns:
List of NX session info objects
"""
nx_sessions = []
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'cwd', 'create_time']):
try:
name = proc.info['name']
# Check if this is an NX process
if name and any(nx_exe in name.lower() for nx_exe in ['ugraf.exe', 'nx.exe', 'run_journal.exe', 'nxmgr_inter.exe']):
session = NXSessionInfo(
pid=proc.info['pid'],
name=name,
cmdline=proc.info['cmdline'] or [],
working_dir=proc.info['cwd'],
create_time=proc.info['create_time']
)
nx_sessions.append(session)
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
return nx_sessions
def is_nx_interactive_session_running(self) -> bool:
"""
Check if user has NX open interactively (not batch mode).
Returns:
True if interactive NX session detected
"""
sessions = self.get_running_nx_sessions()
for session in sessions:
# Interactive sessions are typically ugraf.exe or nx.exe without -batch
if 'ugraf.exe' in session.name.lower() or 'nx.exe' in session.name.lower():
# Check if it's not a batch session
if '-batch' not in ' '.join(session.cmdline).lower():
return True
return False
@contextmanager
def acquire_model_lock(self, model_file: Path, study_name: str):
"""
Acquire exclusive lock for a specific model file.
This prevents two optimizations from modifying the same model simultaneously.
Args:
model_file: Path to the model file (.prt)
study_name: Name of the study (for logging)
Yields:
Lock context
Raises:
TimeoutError: If lock cannot be acquired within timeout
"""
# Create lock file for this specific model
model_hash = str(abs(hash(str(model_file))))
lock_file = self.lock_dir / f"model_{model_hash}.lock"
if self.verbose:
print(f"\n[SESSION MGR] Acquiring lock for model: {model_file.name}")
lock_fd = None
start_time = time.time()
try:
# Try to acquire lock with timeout
while True:
try:
lock_fd = open(lock_file, 'w')
# Try to acquire exclusive lock (non-blocking)
if os.name == 'nt': # Windows
import msvcrt
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_NBLCK, 1)
else: # Unix
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
# Write lock info
lock_info = {
'study_name': study_name,
'model_file': str(model_file),
'pid': os.getpid(),
'timestamp': time.time()
}
lock_fd.write(json.dumps(lock_info, indent=2))
lock_fd.flush()
if self.verbose:
print(f"[SESSION MGR] Lock acquired successfully")
break # Lock acquired!
except (IOError, OSError):
# Lock is held by another process
elapsed = time.time() - start_time
if elapsed > self.wait_timeout:
raise TimeoutError(
f"Could not acquire model lock for {model_file.name} "
f"after {self.wait_timeout}s. Another optimization may be using this model."
)
if self.verbose and elapsed % 10 == 0: # Print every 10 seconds
print(f"[SESSION MGR] Waiting for model lock... ({elapsed:.0f}s)")
time.sleep(1)
yield # Lock acquired, user code runs here
finally:
# Release lock
if lock_fd:
try:
if os.name == 'nt':
import msvcrt
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
if self.verbose:
print(f"[SESSION MGR] Lock released for model: {model_file.name}")
except Exception as e:
if self.verbose:
print(f"[SESSION MGR] Warning: Failed to release lock: {e}")
# Clean up lock file
try:
if lock_file.exists():
lock_file.unlink()
except:
pass
@contextmanager
def acquire_nx_session(self, study_name: str):
"""
Acquire permission to run an NX batch session.
This ensures we don't exceed max_concurrent_sessions and
don't interfere with interactive NX sessions.
Args:
study_name: Name of the study (for logging)
Yields:
Session context
Raises:
TimeoutError: If session cannot be acquired within timeout
"""
if self.verbose:
print(f"\n[SESSION MGR] Requesting NX batch session for study: {study_name}")
# Check for interactive NX sessions
if self.is_nx_interactive_session_running():
if self.verbose:
print(f"[SESSION MGR] WARNING: Interactive NX session detected!")
print(f"[SESSION MGR] Batch operations may conflict with user's work.")
print(f"[SESSION MGR] Recommend closing interactive NX before running optimization.")
start_time = time.time()
session_acquired = False
try:
# Wait for available session slot
while True:
active_sessions = self._count_active_sessions()
if active_sessions < self.max_concurrent:
# Register this session
self._register_session(study_name)
session_acquired = True
if self.verbose:
print(f"[SESSION MGR] NX session acquired (active: {active_sessions + 1}/{self.max_concurrent})")
break
# Check timeout
elapsed = time.time() - start_time
if elapsed > self.wait_timeout:
raise TimeoutError(
f"Could not acquire NX session after {self.wait_timeout}s. "
f"Max concurrent sessions ({self.max_concurrent}) reached."
)
if self.verbose and elapsed % 10 == 0:
print(f"[SESSION MGR] Waiting for NX session... ({elapsed:.0f}s, active: {active_sessions})")
time.sleep(2)
yield # Session acquired, user code runs here
finally:
# Unregister session
if session_acquired:
self._unregister_session(study_name)
if self.verbose:
print(f"[SESSION MGR] NX session released for study: {study_name}")
def _count_active_sessions(self) -> int:
"""Count active optimization sessions."""
if not self.session_lock_file.exists():
return 0
try:
with open(self.session_lock_file, 'r') as f:
sessions = json.load(f)
# Clean up stale sessions (processes that no longer exist)
active_sessions = []
for session in sessions:
pid = session.get('pid')
if pid and psutil.pid_exists(pid):
active_sessions.append(session)
# Update file with only active sessions
with open(self.session_lock_file, 'w') as f:
json.dump(active_sessions, f, indent=2)
return len(active_sessions)
except Exception as e:
if self.verbose:
print(f"[SESSION MGR] Warning: Failed to count sessions: {e}")
return 0
def _register_session(self, study_name: str):
"""Register a new optimization session."""
sessions = []
if self.session_lock_file.exists():
try:
with open(self.session_lock_file, 'r') as f:
sessions = json.load(f)
except:
sessions = []
# Add new session
sessions.append({
'study_name': study_name,
'pid': os.getpid(),
'start_time': time.time(),
'timestamp': time.time()
})
# Save
with open(self.session_lock_file, 'w') as f:
json.dump(sessions, f, indent=2)
def _unregister_session(self, study_name: str):
"""Unregister an optimization session."""
if not self.session_lock_file.exists():
return
try:
with open(self.session_lock_file, 'r') as f:
sessions = json.load(f)
# Remove this session
pid = os.getpid()
sessions = [s for s in sessions if s.get('pid') != pid]
# Save
with open(self.session_lock_file, 'w') as f:
json.dump(sessions, f, indent=2)
except Exception as e:
if self.verbose:
print(f"[SESSION MGR] Warning: Failed to unregister session: {e}")
def cleanup_stale_locks(self):
"""Remove lock files from crashed processes."""
if not self.lock_dir.exists():
return
cleaned = 0
for lock_file in self.lock_dir.glob("*.lock"):
try:
# Try to read lock info
with open(lock_file, 'r') as f:
lock_info = json.load(f)
pid = lock_info.get('pid')
# Check if process still exists
if pid and not psutil.pid_exists(pid):
lock_file.unlink()
cleaned += 1
if self.verbose:
print(f"[SESSION MGR] Cleaned stale lock: {lock_file.name}")
except Exception:
# If we can't read lock file, it might be corrupted - remove it
try:
lock_file.unlink()
cleaned += 1
except:
pass
if self.verbose and cleaned > 0:
print(f"[SESSION MGR] Cleaned {cleaned} stale lock file(s)")
def get_status_report(self) -> str:
"""Generate status report of NX sessions and locks."""
report = "\n" + "="*70 + "\n"
report += " NX SESSION MANAGER STATUS\n"
report += "="*70 + "\n"
# Running NX sessions
nx_sessions = self.get_running_nx_sessions()
report += f"\n Running NX Processes: {len(nx_sessions)}\n"
for session in nx_sessions:
report += f" PID {session.pid}: {session.name}\n"
if session.working_dir:
report += f" Working dir: {session.working_dir}\n"
# Interactive session warning
if self.is_nx_interactive_session_running():
report += f"\n WARNING: Interactive NX session detected!\n"
report += f" Batch operations may conflict with user's work.\n"
# Active optimization sessions
active_count = self._count_active_sessions()
report += f"\n Active Optimization Sessions: {active_count}/{self.max_concurrent}\n"
if self.session_lock_file.exists():
try:
with open(self.session_lock_file, 'r') as f:
sessions = json.load(f)
for session in sessions:
study = session.get('study_name', 'Unknown')
pid = session.get('pid', 'Unknown')
report += f" {study} (PID {pid})\n"
except:
pass
# Lock files
lock_files = list(self.lock_dir.glob("*.lock"))
report += f"\n Active Lock Files: {len(lock_files)}\n"
report += "="*70 + "\n"
return report