refactor: Major reorganization of optimization_engine module structure
BREAKING CHANGE: Module paths have been reorganized for better maintainability. Backwards compatibility aliases with deprecation warnings are provided. New Structure: - core/ - Optimization runners (runner, intelligent_optimizer, etc.) - processors/ - Data processing - surrogates/ - Neural network surrogates - nx/ - NX/Nastran integration (solver, updater, session_manager) - study/ - Study management (creator, wizard, state, reset) - reporting/ - Reports and analysis (visualizer, report_generator) - config/ - Configuration management (manager, builder) - utils/ - Utilities (logger, auto_doc, etc.) - future/ - Research/experimental code Migration: - ~200 import changes across 125 files - All __init__.py files use lazy loading to avoid circular imports - Backwards compatibility layer supports old import paths with warnings - All existing functionality preserved To migrate existing code: OLD: from optimization_engine.nx_solver import NXSolver NEW: from optimization_engine.nx.solver import NXSolver OLD: from optimization_engine.runner import OptimizationRunner NEW: from optimization_engine.core.runner import OptimizationRunner 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
442
optimization_engine/nx/session_manager.py
Normal file
442
optimization_engine/nx/session_manager.py
Normal file
@@ -0,0 +1,442 @@
|
||||
"""
|
||||
NX Session Manager - Prevents conflicts when multiple optimizations run concurrently.
|
||||
|
||||
This module ensures that NX sessions don't interfere with each other when:
|
||||
1. Multiple optimizations are running simultaneously
|
||||
2. User has NX open for manual work
|
||||
3. Multiple Atomizer instances are running
|
||||
|
||||
Key Features:
|
||||
- Session detection (running NX processes)
|
||||
- File locking (prevents concurrent access to same model)
|
||||
- Process queuing (waits if NX is busy with another optimization)
|
||||
- Batch mode isolation (uses dedicated NX instances)
|
||||
"""
|
||||
|
||||
import psutil
|
||||
import time
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
|
||||
# Platform-specific imports
|
||||
if os.name != 'nt': # Unix/Linux/Mac
|
||||
import fcntl
|
||||
else: # Windows
|
||||
import msvcrt
|
||||
|
||||
|
||||
@dataclass
|
||||
class NXSessionInfo:
|
||||
"""Information about a running NX session."""
|
||||
pid: int
|
||||
name: str
|
||||
cmdline: List[str]
|
||||
working_dir: Optional[str]
|
||||
create_time: float
|
||||
|
||||
|
||||
class NXSessionManager:
|
||||
"""
|
||||
Manages NX sessions to prevent conflicts between concurrent optimizations.
|
||||
|
||||
Strategy:
|
||||
1. Detect running NX processes
|
||||
2. Use file locks to ensure exclusive model access
|
||||
3. Queue optimization trials if NX is busy
|
||||
4. Isolate batch mode sessions from interactive sessions
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
lock_dir: Optional[Path] = None,
|
||||
max_concurrent_sessions: int = 1,
|
||||
wait_timeout: int = 300,
|
||||
verbose: bool = True
|
||||
):
|
||||
"""
|
||||
Initialize session manager.
|
||||
|
||||
Args:
|
||||
lock_dir: Directory for lock files (default: temp)
|
||||
max_concurrent_sessions: Maximum concurrent NX optimization sessions
|
||||
wait_timeout: Maximum wait time for NX to become available (seconds)
|
||||
verbose: Print session management info
|
||||
"""
|
||||
self.lock_dir = Path(lock_dir) if lock_dir else Path.home() / ".atomizer" / "locks"
|
||||
self.lock_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.max_concurrent = max_concurrent_sessions
|
||||
self.wait_timeout = wait_timeout
|
||||
self.verbose = verbose
|
||||
|
||||
# Track active sessions
|
||||
self.session_lock_file = self.lock_dir / "nx_sessions.json"
|
||||
self.global_lock_file = self.lock_dir / "nx_global.lock"
|
||||
|
||||
def get_running_nx_sessions(self) -> List[NXSessionInfo]:
|
||||
"""
|
||||
Detect all running NX processes.
|
||||
|
||||
Returns:
|
||||
List of NX session info objects
|
||||
"""
|
||||
nx_sessions = []
|
||||
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'cwd', 'create_time']):
|
||||
try:
|
||||
name = proc.info['name']
|
||||
|
||||
# Check if this is an NX process
|
||||
if name and any(nx_exe in name.lower() for nx_exe in ['ugraf.exe', 'nx.exe', 'run_journal.exe', 'nxmgr_inter.exe']):
|
||||
session = NXSessionInfo(
|
||||
pid=proc.info['pid'],
|
||||
name=name,
|
||||
cmdline=proc.info['cmdline'] or [],
|
||||
working_dir=proc.info['cwd'],
|
||||
create_time=proc.info['create_time']
|
||||
)
|
||||
nx_sessions.append(session)
|
||||
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
continue
|
||||
|
||||
return nx_sessions
|
||||
|
||||
def is_nx_interactive_session_running(self) -> bool:
|
||||
"""
|
||||
Check if user has NX open interactively (not batch mode).
|
||||
|
||||
Returns:
|
||||
True if interactive NX session detected
|
||||
"""
|
||||
sessions = self.get_running_nx_sessions()
|
||||
|
||||
for session in sessions:
|
||||
# Interactive sessions are typically ugraf.exe or nx.exe without -batch
|
||||
if 'ugraf.exe' in session.name.lower() or 'nx.exe' in session.name.lower():
|
||||
# Check if it's not a batch session
|
||||
if '-batch' not in ' '.join(session.cmdline).lower():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@contextmanager
|
||||
def acquire_model_lock(self, model_file: Path, study_name: str):
|
||||
"""
|
||||
Acquire exclusive lock for a specific model file.
|
||||
|
||||
This prevents two optimizations from modifying the same model simultaneously.
|
||||
|
||||
Args:
|
||||
model_file: Path to the model file (.prt)
|
||||
study_name: Name of the study (for logging)
|
||||
|
||||
Yields:
|
||||
Lock context
|
||||
|
||||
Raises:
|
||||
TimeoutError: If lock cannot be acquired within timeout
|
||||
"""
|
||||
# Create lock file for this specific model
|
||||
model_hash = str(abs(hash(str(model_file))))
|
||||
lock_file = self.lock_dir / f"model_{model_hash}.lock"
|
||||
|
||||
if self.verbose:
|
||||
print(f"\n[SESSION MGR] Acquiring lock for model: {model_file.name}")
|
||||
|
||||
lock_fd = None
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
# Try to acquire lock with timeout
|
||||
while True:
|
||||
try:
|
||||
lock_fd = open(lock_file, 'w')
|
||||
|
||||
# Try to acquire exclusive lock (non-blocking)
|
||||
if os.name == 'nt': # Windows
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_NBLCK, 1)
|
||||
else: # Unix
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
|
||||
# Write lock info
|
||||
lock_info = {
|
||||
'study_name': study_name,
|
||||
'model_file': str(model_file),
|
||||
'pid': os.getpid(),
|
||||
'timestamp': time.time()
|
||||
}
|
||||
lock_fd.write(json.dumps(lock_info, indent=2))
|
||||
lock_fd.flush()
|
||||
|
||||
if self.verbose:
|
||||
print(f"[SESSION MGR] Lock acquired successfully")
|
||||
|
||||
break # Lock acquired!
|
||||
|
||||
except (IOError, OSError):
|
||||
# Lock is held by another process
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if elapsed > self.wait_timeout:
|
||||
raise TimeoutError(
|
||||
f"Could not acquire model lock for {model_file.name} "
|
||||
f"after {self.wait_timeout}s. Another optimization may be using this model."
|
||||
)
|
||||
|
||||
if self.verbose and elapsed % 10 == 0: # Print every 10 seconds
|
||||
print(f"[SESSION MGR] Waiting for model lock... ({elapsed:.0f}s)")
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
yield # Lock acquired, user code runs here
|
||||
|
||||
finally:
|
||||
# Release lock
|
||||
if lock_fd:
|
||||
try:
|
||||
if os.name == 'nt':
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
else:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||
|
||||
lock_fd.close()
|
||||
|
||||
if self.verbose:
|
||||
print(f"[SESSION MGR] Lock released for model: {model_file.name}")
|
||||
|
||||
except Exception as e:
|
||||
if self.verbose:
|
||||
print(f"[SESSION MGR] Warning: Failed to release lock: {e}")
|
||||
|
||||
# Clean up lock file
|
||||
try:
|
||||
if lock_file.exists():
|
||||
lock_file.unlink()
|
||||
except:
|
||||
pass
|
||||
|
||||
@contextmanager
|
||||
def acquire_nx_session(self, study_name: str):
|
||||
"""
|
||||
Acquire permission to run an NX batch session.
|
||||
|
||||
This ensures we don't exceed max_concurrent_sessions and
|
||||
don't interfere with interactive NX sessions.
|
||||
|
||||
Args:
|
||||
study_name: Name of the study (for logging)
|
||||
|
||||
Yields:
|
||||
Session context
|
||||
|
||||
Raises:
|
||||
TimeoutError: If session cannot be acquired within timeout
|
||||
"""
|
||||
if self.verbose:
|
||||
print(f"\n[SESSION MGR] Requesting NX batch session for study: {study_name}")
|
||||
|
||||
# Check for interactive NX sessions
|
||||
if self.is_nx_interactive_session_running():
|
||||
if self.verbose:
|
||||
print(f"[SESSION MGR] WARNING: Interactive NX session detected!")
|
||||
print(f"[SESSION MGR] Batch operations may conflict with user's work.")
|
||||
print(f"[SESSION MGR] Recommend closing interactive NX before running optimization.")
|
||||
|
||||
start_time = time.time()
|
||||
session_acquired = False
|
||||
|
||||
try:
|
||||
# Wait for available session slot
|
||||
while True:
|
||||
active_sessions = self._count_active_sessions()
|
||||
|
||||
if active_sessions < self.max_concurrent:
|
||||
# Register this session
|
||||
self._register_session(study_name)
|
||||
session_acquired = True
|
||||
|
||||
if self.verbose:
|
||||
print(f"[SESSION MGR] NX session acquired (active: {active_sessions + 1}/{self.max_concurrent})")
|
||||
|
||||
break
|
||||
|
||||
# Check timeout
|
||||
elapsed = time.time() - start_time
|
||||
if elapsed > self.wait_timeout:
|
||||
raise TimeoutError(
|
||||
f"Could not acquire NX session after {self.wait_timeout}s. "
|
||||
f"Max concurrent sessions ({self.max_concurrent}) reached."
|
||||
)
|
||||
|
||||
if self.verbose and elapsed % 10 == 0:
|
||||
print(f"[SESSION MGR] Waiting for NX session... ({elapsed:.0f}s, active: {active_sessions})")
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
yield # Session acquired, user code runs here
|
||||
|
||||
finally:
|
||||
# Unregister session
|
||||
if session_acquired:
|
||||
self._unregister_session(study_name)
|
||||
|
||||
if self.verbose:
|
||||
print(f"[SESSION MGR] NX session released for study: {study_name}")
|
||||
|
||||
def _count_active_sessions(self) -> int:
|
||||
"""Count active optimization sessions."""
|
||||
if not self.session_lock_file.exists():
|
||||
return 0
|
||||
|
||||
try:
|
||||
with open(self.session_lock_file, 'r') as f:
|
||||
sessions = json.load(f)
|
||||
|
||||
# Clean up stale sessions (processes that no longer exist)
|
||||
active_sessions = []
|
||||
for session in sessions:
|
||||
pid = session.get('pid')
|
||||
if pid and psutil.pid_exists(pid):
|
||||
active_sessions.append(session)
|
||||
|
||||
# Update file with only active sessions
|
||||
with open(self.session_lock_file, 'w') as f:
|
||||
json.dump(active_sessions, f, indent=2)
|
||||
|
||||
return len(active_sessions)
|
||||
|
||||
except Exception as e:
|
||||
if self.verbose:
|
||||
print(f"[SESSION MGR] Warning: Failed to count sessions: {e}")
|
||||
return 0
|
||||
|
||||
def _register_session(self, study_name: str):
|
||||
"""Register a new optimization session."""
|
||||
sessions = []
|
||||
|
||||
if self.session_lock_file.exists():
|
||||
try:
|
||||
with open(self.session_lock_file, 'r') as f:
|
||||
sessions = json.load(f)
|
||||
except:
|
||||
sessions = []
|
||||
|
||||
# Add new session
|
||||
sessions.append({
|
||||
'study_name': study_name,
|
||||
'pid': os.getpid(),
|
||||
'start_time': time.time(),
|
||||
'timestamp': time.time()
|
||||
})
|
||||
|
||||
# Save
|
||||
with open(self.session_lock_file, 'w') as f:
|
||||
json.dump(sessions, f, indent=2)
|
||||
|
||||
def _unregister_session(self, study_name: str):
|
||||
"""Unregister an optimization session."""
|
||||
if not self.session_lock_file.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
with open(self.session_lock_file, 'r') as f:
|
||||
sessions = json.load(f)
|
||||
|
||||
# Remove this session
|
||||
pid = os.getpid()
|
||||
sessions = [s for s in sessions if s.get('pid') != pid]
|
||||
|
||||
# Save
|
||||
with open(self.session_lock_file, 'w') as f:
|
||||
json.dump(sessions, f, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
if self.verbose:
|
||||
print(f"[SESSION MGR] Warning: Failed to unregister session: {e}")
|
||||
|
||||
def cleanup_stale_locks(self):
|
||||
"""Remove lock files from crashed processes."""
|
||||
if not self.lock_dir.exists():
|
||||
return
|
||||
|
||||
cleaned = 0
|
||||
|
||||
for lock_file in self.lock_dir.glob("*.lock"):
|
||||
try:
|
||||
# Try to read lock info
|
||||
with open(lock_file, 'r') as f:
|
||||
lock_info = json.load(f)
|
||||
|
||||
pid = lock_info.get('pid')
|
||||
|
||||
# Check if process still exists
|
||||
if pid and not psutil.pid_exists(pid):
|
||||
lock_file.unlink()
|
||||
cleaned += 1
|
||||
|
||||
if self.verbose:
|
||||
print(f"[SESSION MGR] Cleaned stale lock: {lock_file.name}")
|
||||
|
||||
except Exception:
|
||||
# If we can't read lock file, it might be corrupted - remove it
|
||||
try:
|
||||
lock_file.unlink()
|
||||
cleaned += 1
|
||||
except:
|
||||
pass
|
||||
|
||||
if self.verbose and cleaned > 0:
|
||||
print(f"[SESSION MGR] Cleaned {cleaned} stale lock file(s)")
|
||||
|
||||
def get_status_report(self) -> str:
|
||||
"""Generate status report of NX sessions and locks."""
|
||||
report = "\n" + "="*70 + "\n"
|
||||
report += " NX SESSION MANAGER STATUS\n"
|
||||
report += "="*70 + "\n"
|
||||
|
||||
# Running NX sessions
|
||||
nx_sessions = self.get_running_nx_sessions()
|
||||
report += f"\n Running NX Processes: {len(nx_sessions)}\n"
|
||||
|
||||
for session in nx_sessions:
|
||||
report += f" PID {session.pid}: {session.name}\n"
|
||||
|
||||
if session.working_dir:
|
||||
report += f" Working dir: {session.working_dir}\n"
|
||||
|
||||
# Interactive session warning
|
||||
if self.is_nx_interactive_session_running():
|
||||
report += f"\n WARNING: Interactive NX session detected!\n"
|
||||
report += f" Batch operations may conflict with user's work.\n"
|
||||
|
||||
# Active optimization sessions
|
||||
active_count = self._count_active_sessions()
|
||||
report += f"\n Active Optimization Sessions: {active_count}/{self.max_concurrent}\n"
|
||||
|
||||
if self.session_lock_file.exists():
|
||||
try:
|
||||
with open(self.session_lock_file, 'r') as f:
|
||||
sessions = json.load(f)
|
||||
|
||||
for session in sessions:
|
||||
study = session.get('study_name', 'Unknown')
|
||||
pid = session.get('pid', 'Unknown')
|
||||
report += f" {study} (PID {pid})\n"
|
||||
|
||||
except:
|
||||
pass
|
||||
|
||||
# Lock files
|
||||
lock_files = list(self.lock_dir.glob("*.lock"))
|
||||
report += f"\n Active Lock Files: {len(lock_files)}\n"
|
||||
|
||||
report += "="*70 + "\n"
|
||||
|
||||
return report
|
||||
Reference in New Issue
Block a user