BREAKING CHANGE: Module paths have been reorganized for better maintainability. Backwards compatibility aliases with deprecation warnings are provided. New Structure: - core/ - Optimization runners (runner, intelligent_optimizer, etc.) - processors/ - Data processing - surrogates/ - Neural network surrogates - nx/ - NX/Nastran integration (solver, updater, session_manager) - study/ - Study management (creator, wizard, state, reset) - reporting/ - Reports and analysis (visualizer, report_generator) - config/ - Configuration management (manager, builder) - utils/ - Utilities (logger, auto_doc, etc.) - future/ - Research/experimental code Migration: - ~200 import changes across 125 files - All __init__.py files use lazy loading to avoid circular imports - Backwards compatibility layer supports old import paths with warnings - All existing functionality preserved To migrate existing code: OLD: from optimization_engine.nx_solver import NXSolver NEW: from optimization_engine.nx.solver import NXSolver OLD: from optimization_engine.runner import OptimizationRunner NEW: from optimization_engine.core.runner import OptimizationRunner 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
443 lines
15 KiB
Python
443 lines
15 KiB
Python
"""
|
|
NX Session Manager - Prevents conflicts when multiple optimizations run concurrently.
|
|
|
|
This module ensures that NX sessions don't interfere with each other when:
|
|
1. Multiple optimizations are running simultaneously
|
|
2. User has NX open for manual work
|
|
3. Multiple Atomizer instances are running
|
|
|
|
Key Features:
|
|
- Session detection (running NX processes)
|
|
- File locking (prevents concurrent access to same model)
|
|
- Process queuing (waits if NX is busy with another optimization)
|
|
- Batch mode isolation (uses dedicated NX instances)
|
|
"""
|
|
|
|
import psutil
|
|
import time
|
|
import os
|
|
from pathlib import Path
|
|
from typing import Optional, List
|
|
from contextlib import contextmanager
|
|
from dataclasses import dataclass
|
|
import json
|
|
|
|
# Platform-specific imports
|
|
if os.name != 'nt': # Unix/Linux/Mac
|
|
import fcntl
|
|
else: # Windows
|
|
import msvcrt
|
|
|
|
|
|
@dataclass
|
|
class NXSessionInfo:
|
|
"""Information about a running NX session."""
|
|
pid: int
|
|
name: str
|
|
cmdline: List[str]
|
|
working_dir: Optional[str]
|
|
create_time: float
|
|
|
|
|
|
class NXSessionManager:
|
|
"""
|
|
Manages NX sessions to prevent conflicts between concurrent optimizations.
|
|
|
|
Strategy:
|
|
1. Detect running NX processes
|
|
2. Use file locks to ensure exclusive model access
|
|
3. Queue optimization trials if NX is busy
|
|
4. Isolate batch mode sessions from interactive sessions
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
lock_dir: Optional[Path] = None,
|
|
max_concurrent_sessions: int = 1,
|
|
wait_timeout: int = 300,
|
|
verbose: bool = True
|
|
):
|
|
"""
|
|
Initialize session manager.
|
|
|
|
Args:
|
|
lock_dir: Directory for lock files (default: temp)
|
|
max_concurrent_sessions: Maximum concurrent NX optimization sessions
|
|
wait_timeout: Maximum wait time for NX to become available (seconds)
|
|
verbose: Print session management info
|
|
"""
|
|
self.lock_dir = Path(lock_dir) if lock_dir else Path.home() / ".atomizer" / "locks"
|
|
self.lock_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
self.max_concurrent = max_concurrent_sessions
|
|
self.wait_timeout = wait_timeout
|
|
self.verbose = verbose
|
|
|
|
# Track active sessions
|
|
self.session_lock_file = self.lock_dir / "nx_sessions.json"
|
|
self.global_lock_file = self.lock_dir / "nx_global.lock"
|
|
|
|
def get_running_nx_sessions(self) -> List[NXSessionInfo]:
|
|
"""
|
|
Detect all running NX processes.
|
|
|
|
Returns:
|
|
List of NX session info objects
|
|
"""
|
|
nx_sessions = []
|
|
|
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'cwd', 'create_time']):
|
|
try:
|
|
name = proc.info['name']
|
|
|
|
# Check if this is an NX process
|
|
if name and any(nx_exe in name.lower() for nx_exe in ['ugraf.exe', 'nx.exe', 'run_journal.exe', 'nxmgr_inter.exe']):
|
|
session = NXSessionInfo(
|
|
pid=proc.info['pid'],
|
|
name=name,
|
|
cmdline=proc.info['cmdline'] or [],
|
|
working_dir=proc.info['cwd'],
|
|
create_time=proc.info['create_time']
|
|
)
|
|
nx_sessions.append(session)
|
|
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
continue
|
|
|
|
return nx_sessions
|
|
|
|
def is_nx_interactive_session_running(self) -> bool:
|
|
"""
|
|
Check if user has NX open interactively (not batch mode).
|
|
|
|
Returns:
|
|
True if interactive NX session detected
|
|
"""
|
|
sessions = self.get_running_nx_sessions()
|
|
|
|
for session in sessions:
|
|
# Interactive sessions are typically ugraf.exe or nx.exe without -batch
|
|
if 'ugraf.exe' in session.name.lower() or 'nx.exe' in session.name.lower():
|
|
# Check if it's not a batch session
|
|
if '-batch' not in ' '.join(session.cmdline).lower():
|
|
return True
|
|
|
|
return False
|
|
|
|
@contextmanager
|
|
def acquire_model_lock(self, model_file: Path, study_name: str):
|
|
"""
|
|
Acquire exclusive lock for a specific model file.
|
|
|
|
This prevents two optimizations from modifying the same model simultaneously.
|
|
|
|
Args:
|
|
model_file: Path to the model file (.prt)
|
|
study_name: Name of the study (for logging)
|
|
|
|
Yields:
|
|
Lock context
|
|
|
|
Raises:
|
|
TimeoutError: If lock cannot be acquired within timeout
|
|
"""
|
|
# Create lock file for this specific model
|
|
model_hash = str(abs(hash(str(model_file))))
|
|
lock_file = self.lock_dir / f"model_{model_hash}.lock"
|
|
|
|
if self.verbose:
|
|
print(f"\n[SESSION MGR] Acquiring lock for model: {model_file.name}")
|
|
|
|
lock_fd = None
|
|
start_time = time.time()
|
|
|
|
try:
|
|
# Try to acquire lock with timeout
|
|
while True:
|
|
try:
|
|
lock_fd = open(lock_file, 'w')
|
|
|
|
# Try to acquire exclusive lock (non-blocking)
|
|
if os.name == 'nt': # Windows
|
|
import msvcrt
|
|
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_NBLCK, 1)
|
|
else: # Unix
|
|
fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
|
|
# Write lock info
|
|
lock_info = {
|
|
'study_name': study_name,
|
|
'model_file': str(model_file),
|
|
'pid': os.getpid(),
|
|
'timestamp': time.time()
|
|
}
|
|
lock_fd.write(json.dumps(lock_info, indent=2))
|
|
lock_fd.flush()
|
|
|
|
if self.verbose:
|
|
print(f"[SESSION MGR] Lock acquired successfully")
|
|
|
|
break # Lock acquired!
|
|
|
|
except (IOError, OSError):
|
|
# Lock is held by another process
|
|
elapsed = time.time() - start_time
|
|
|
|
if elapsed > self.wait_timeout:
|
|
raise TimeoutError(
|
|
f"Could not acquire model lock for {model_file.name} "
|
|
f"after {self.wait_timeout}s. Another optimization may be using this model."
|
|
)
|
|
|
|
if self.verbose and elapsed % 10 == 0: # Print every 10 seconds
|
|
print(f"[SESSION MGR] Waiting for model lock... ({elapsed:.0f}s)")
|
|
|
|
time.sleep(1)
|
|
|
|
yield # Lock acquired, user code runs here
|
|
|
|
finally:
|
|
# Release lock
|
|
if lock_fd:
|
|
try:
|
|
if os.name == 'nt':
|
|
import msvcrt
|
|
msvcrt.locking(lock_fd.fileno(), msvcrt.LK_UNLCK, 1)
|
|
else:
|
|
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
|
|
|
lock_fd.close()
|
|
|
|
if self.verbose:
|
|
print(f"[SESSION MGR] Lock released for model: {model_file.name}")
|
|
|
|
except Exception as e:
|
|
if self.verbose:
|
|
print(f"[SESSION MGR] Warning: Failed to release lock: {e}")
|
|
|
|
# Clean up lock file
|
|
try:
|
|
if lock_file.exists():
|
|
lock_file.unlink()
|
|
except:
|
|
pass
|
|
|
|
@contextmanager
|
|
def acquire_nx_session(self, study_name: str):
|
|
"""
|
|
Acquire permission to run an NX batch session.
|
|
|
|
This ensures we don't exceed max_concurrent_sessions and
|
|
don't interfere with interactive NX sessions.
|
|
|
|
Args:
|
|
study_name: Name of the study (for logging)
|
|
|
|
Yields:
|
|
Session context
|
|
|
|
Raises:
|
|
TimeoutError: If session cannot be acquired within timeout
|
|
"""
|
|
if self.verbose:
|
|
print(f"\n[SESSION MGR] Requesting NX batch session for study: {study_name}")
|
|
|
|
# Check for interactive NX sessions
|
|
if self.is_nx_interactive_session_running():
|
|
if self.verbose:
|
|
print(f"[SESSION MGR] WARNING: Interactive NX session detected!")
|
|
print(f"[SESSION MGR] Batch operations may conflict with user's work.")
|
|
print(f"[SESSION MGR] Recommend closing interactive NX before running optimization.")
|
|
|
|
start_time = time.time()
|
|
session_acquired = False
|
|
|
|
try:
|
|
# Wait for available session slot
|
|
while True:
|
|
active_sessions = self._count_active_sessions()
|
|
|
|
if active_sessions < self.max_concurrent:
|
|
# Register this session
|
|
self._register_session(study_name)
|
|
session_acquired = True
|
|
|
|
if self.verbose:
|
|
print(f"[SESSION MGR] NX session acquired (active: {active_sessions + 1}/{self.max_concurrent})")
|
|
|
|
break
|
|
|
|
# Check timeout
|
|
elapsed = time.time() - start_time
|
|
if elapsed > self.wait_timeout:
|
|
raise TimeoutError(
|
|
f"Could not acquire NX session after {self.wait_timeout}s. "
|
|
f"Max concurrent sessions ({self.max_concurrent}) reached."
|
|
)
|
|
|
|
if self.verbose and elapsed % 10 == 0:
|
|
print(f"[SESSION MGR] Waiting for NX session... ({elapsed:.0f}s, active: {active_sessions})")
|
|
|
|
time.sleep(2)
|
|
|
|
yield # Session acquired, user code runs here
|
|
|
|
finally:
|
|
# Unregister session
|
|
if session_acquired:
|
|
self._unregister_session(study_name)
|
|
|
|
if self.verbose:
|
|
print(f"[SESSION MGR] NX session released for study: {study_name}")
|
|
|
|
def _count_active_sessions(self) -> int:
|
|
"""Count active optimization sessions."""
|
|
if not self.session_lock_file.exists():
|
|
return 0
|
|
|
|
try:
|
|
with open(self.session_lock_file, 'r') as f:
|
|
sessions = json.load(f)
|
|
|
|
# Clean up stale sessions (processes that no longer exist)
|
|
active_sessions = []
|
|
for session in sessions:
|
|
pid = session.get('pid')
|
|
if pid and psutil.pid_exists(pid):
|
|
active_sessions.append(session)
|
|
|
|
# Update file with only active sessions
|
|
with open(self.session_lock_file, 'w') as f:
|
|
json.dump(active_sessions, f, indent=2)
|
|
|
|
return len(active_sessions)
|
|
|
|
except Exception as e:
|
|
if self.verbose:
|
|
print(f"[SESSION MGR] Warning: Failed to count sessions: {e}")
|
|
return 0
|
|
|
|
def _register_session(self, study_name: str):
|
|
"""Register a new optimization session."""
|
|
sessions = []
|
|
|
|
if self.session_lock_file.exists():
|
|
try:
|
|
with open(self.session_lock_file, 'r') as f:
|
|
sessions = json.load(f)
|
|
except:
|
|
sessions = []
|
|
|
|
# Add new session
|
|
sessions.append({
|
|
'study_name': study_name,
|
|
'pid': os.getpid(),
|
|
'start_time': time.time(),
|
|
'timestamp': time.time()
|
|
})
|
|
|
|
# Save
|
|
with open(self.session_lock_file, 'w') as f:
|
|
json.dump(sessions, f, indent=2)
|
|
|
|
def _unregister_session(self, study_name: str):
|
|
"""Unregister an optimization session."""
|
|
if not self.session_lock_file.exists():
|
|
return
|
|
|
|
try:
|
|
with open(self.session_lock_file, 'r') as f:
|
|
sessions = json.load(f)
|
|
|
|
# Remove this session
|
|
pid = os.getpid()
|
|
sessions = [s for s in sessions if s.get('pid') != pid]
|
|
|
|
# Save
|
|
with open(self.session_lock_file, 'w') as f:
|
|
json.dump(sessions, f, indent=2)
|
|
|
|
except Exception as e:
|
|
if self.verbose:
|
|
print(f"[SESSION MGR] Warning: Failed to unregister session: {e}")
|
|
|
|
def cleanup_stale_locks(self):
|
|
"""Remove lock files from crashed processes."""
|
|
if not self.lock_dir.exists():
|
|
return
|
|
|
|
cleaned = 0
|
|
|
|
for lock_file in self.lock_dir.glob("*.lock"):
|
|
try:
|
|
# Try to read lock info
|
|
with open(lock_file, 'r') as f:
|
|
lock_info = json.load(f)
|
|
|
|
pid = lock_info.get('pid')
|
|
|
|
# Check if process still exists
|
|
if pid and not psutil.pid_exists(pid):
|
|
lock_file.unlink()
|
|
cleaned += 1
|
|
|
|
if self.verbose:
|
|
print(f"[SESSION MGR] Cleaned stale lock: {lock_file.name}")
|
|
|
|
except Exception:
|
|
# If we can't read lock file, it might be corrupted - remove it
|
|
try:
|
|
lock_file.unlink()
|
|
cleaned += 1
|
|
except:
|
|
pass
|
|
|
|
if self.verbose and cleaned > 0:
|
|
print(f"[SESSION MGR] Cleaned {cleaned} stale lock file(s)")
|
|
|
|
def get_status_report(self) -> str:
|
|
"""Generate status report of NX sessions and locks."""
|
|
report = "\n" + "="*70 + "\n"
|
|
report += " NX SESSION MANAGER STATUS\n"
|
|
report += "="*70 + "\n"
|
|
|
|
# Running NX sessions
|
|
nx_sessions = self.get_running_nx_sessions()
|
|
report += f"\n Running NX Processes: {len(nx_sessions)}\n"
|
|
|
|
for session in nx_sessions:
|
|
report += f" PID {session.pid}: {session.name}\n"
|
|
|
|
if session.working_dir:
|
|
report += f" Working dir: {session.working_dir}\n"
|
|
|
|
# Interactive session warning
|
|
if self.is_nx_interactive_session_running():
|
|
report += f"\n WARNING: Interactive NX session detected!\n"
|
|
report += f" Batch operations may conflict with user's work.\n"
|
|
|
|
# Active optimization sessions
|
|
active_count = self._count_active_sessions()
|
|
report += f"\n Active Optimization Sessions: {active_count}/{self.max_concurrent}\n"
|
|
|
|
if self.session_lock_file.exists():
|
|
try:
|
|
with open(self.session_lock_file, 'r') as f:
|
|
sessions = json.load(f)
|
|
|
|
for session in sessions:
|
|
study = session.get('study_name', 'Unknown')
|
|
pid = session.get('pid', 'Unknown')
|
|
report += f" {study} (PID {pid})\n"
|
|
|
|
except:
|
|
pass
|
|
|
|
# Lock files
|
|
lock_files = list(self.lock_dir.glob("*.lock"))
|
|
report += f"\n Active Lock Files: {len(lock_files)}\n"
|
|
|
|
report += "="*70 + "\n"
|
|
|
|
return report
|