Files
Atomizer/projects/hydrotech-beam/studies/01_doe_landscape/iteration_manager.py

250 lines
8.2 KiB
Python
Raw Normal View History

"""Smart iteration folder management for Hydrotech Beam optimization.
Manages iteration folders with intelligent retention:
- Each iteration gets a full copy of model files (openable in NX for debug)
- Last N iterations: keep full model files (rolling window)
- Best K iterations: keep full model files (by objective value)
- All others: strip model files, keep only solver outputs + params
This gives debuggability (open any recent/best iteration in NX) while
keeping disk usage bounded.
References:
CEO design brief (2026-02-11): "all models properly saved in their
iteration folder, keep last 10, keep best 3, delete stacking models"
"""
from __future__ import annotations
import json
import logging
import shutil
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional
logger = logging.getLogger(__name__)
# NX model file extensions (copied to each iteration)
MODEL_EXTENSIONS = {".prt", ".fem", ".sim"}
# Solver output extensions (always kept, even after stripping)
KEEP_EXTENSIONS = {".op2", ".f06", ".dat", ".log", ".json", ".txt", ".csv"}
# Default retention policy
DEFAULT_KEEP_RECENT = 10 # keep last N iterations with full models
DEFAULT_KEEP_BEST = 3 # keep best K iterations with full models
@dataclass
class IterationInfo:
"""Metadata for a single iteration."""
number: int
path: Path
mass: float = float("inf")
displacement: float = float("inf")
stress: float = float("inf")
feasible: bool = False
has_models: bool = True # False after stripping
@dataclass
class IterationManager:
"""Manages iteration folders with smart retention.
Usage:
mgr = IterationManager(study_dir, master_model_dir)
# Before each trial:
iter_dir = mgr.prepare_iteration(iteration_number)
# After trial completes:
mgr.record_result(iteration_number, mass=..., displacement=..., stress=...)
# Periodically or at study end:
mgr.apply_retention()
"""
study_dir: Path
master_model_dir: Path
keep_recent: int = DEFAULT_KEEP_RECENT
keep_best: int = DEFAULT_KEEP_BEST
_iterations: dict[int, IterationInfo] = field(default_factory=dict, repr=False)
def __post_init__(self) -> None:
self.iterations_dir = self.study_dir / "iterations"
self.iterations_dir.mkdir(parents=True, exist_ok=True)
# Scan existing iterations (for resume support)
for d in sorted(self.iterations_dir.iterdir()):
if d.is_dir() and d.name.startswith("iter"):
try:
num = int(d.name.replace("iter", ""))
info = IterationInfo(number=num, path=d)
# Load results if available
results_file = d / "results.json"
if results_file.exists():
data = json.loads(results_file.read_text())
info.mass = data.get("mass_kg", float("inf"))
info.displacement = data.get("tip_displacement_mm", float("inf"))
info.stress = data.get("max_von_mises_mpa", float("inf"))
info.feasible = (
info.displacement <= 10.0 and info.stress <= 130.0
)
# Check if model files are present
info.has_models = any(
f.suffix in MODEL_EXTENSIONS for f in d.iterdir()
)
self._iterations[num] = info
except (ValueError, json.JSONDecodeError):
continue
if self._iterations:
logger.info(
"Loaded %d existing iterations (resume support)",
len(self._iterations),
)
def prepare_iteration(self, iteration_number: int) -> Path:
"""Set up an iteration folder with fresh model copies.
Copies all model files from master_model_dir to the iteration folder.
All paths are resolved to absolute to avoid NX reference issues.
Args:
iteration_number: Trial number (1-indexed).
Returns:
Absolute path to the iteration folder.
"""
iter_dir = (self.iterations_dir / f"iter{iteration_number:03d}").resolve()
# Clean up if exists (failed previous run)
if iter_dir.exists():
shutil.rmtree(iter_dir)
iter_dir.mkdir(parents=True)
# Copy ALL model files (so NX can resolve references within the folder)
master = self.master_model_dir.resolve()
copied = 0
for ext in MODEL_EXTENSIONS:
for src in master.glob(f"*{ext}"):
shutil.copy2(src, iter_dir / src.name)
copied += 1
logger.info(
"Prepared iter%03d: copied %d model files to %s",
iteration_number, copied, iter_dir,
)
# Track iteration
self._iterations[iteration_number] = IterationInfo(
number=iteration_number,
path=iter_dir,
has_models=True,
)
return iter_dir
def record_result(
self,
iteration_number: int,
mass: float,
displacement: float,
stress: float,
) -> None:
"""Record results for an iteration and run retention check.
Args:
iteration_number: Trial number.
mass: Extracted mass in kg.
displacement: Tip displacement in mm.
stress: Max von Mises stress in MPa.
"""
if iteration_number in self._iterations:
info = self._iterations[iteration_number]
info.mass = mass
info.displacement = displacement
info.stress = stress
info.feasible = displacement <= 10.0 and stress <= 130.0
# Apply retention every 5 iterations to keep disk in check
if iteration_number % 5 == 0:
self.apply_retention()
def apply_retention(self) -> None:
"""Apply the smart retention policy.
Keep full model files for:
1. Last `keep_recent` iterations (rolling window)
2. Best `keep_best` iterations (by mass, feasible first)
Strip model files from everything else (keep solver outputs only).
"""
if not self._iterations:
return
all_nums = sorted(self._iterations.keys())
# Set 1: Last N iterations
recent_set = set(all_nums[-self.keep_recent:])
# Set 2: Best K by objective (feasible first, then lowest mass)
sorted_by_quality = sorted(
self._iterations.values(),
key=lambda info: (
0 if info.feasible else 1, # feasible first
info.mass, # then lowest mass
),
)
best_set = {info.number for info in sorted_by_quality[:self.keep_best]}
# Keep set = recent best
keep_set = recent_set | best_set
# Strip model files from everything NOT in keep set
stripped = 0
for num, info in self._iterations.items():
if num not in keep_set and info.has_models:
self._strip_models(info)
stripped += 1
if stripped > 0:
logger.info(
"Retention: kept %d recent + %d best, stripped %d iterations",
len(recent_set), len(best_set), stripped,
)
def _strip_models(self, info: IterationInfo) -> None:
"""Remove model files from an iteration folder, keep solver outputs."""
if not info.path.exists():
return
removed = 0
for f in info.path.iterdir():
if f.is_file() and f.suffix in MODEL_EXTENSIONS:
f.unlink()
removed += 1
info.has_models = False
if removed > 0:
logger.debug(
"Stripped %d model files from iter%03d",
removed, info.number,
)
def get_best_iterations(self, n: int = 3) -> list[IterationInfo]:
"""Return the N best iterations (feasible first, then lowest mass)."""
return sorted(
self._iterations.values(),
key=lambda info: (
0 if info.feasible else 1,
info.mass,
),
)[:n]