275 lines
8.8 KiB
Python
275 lines
8.8 KiB
Python
"""Latin Hypercube Sampling for Hydrotech Beam DoE.
|
|
|
|
Generates LHS sample points for the 4 design variables with:
|
|
- Maximin LHS for space-filling coverage
|
|
- Integer rounding for hole_count (DV4)
|
|
- Stratified integer sampling to ensure all 11 hole_count levels are covered
|
|
- Baseline (Trial 0) always included
|
|
|
|
References:
|
|
OPTIMIZATION_STRATEGY.md §2.3 — Phase 1 configuration
|
|
OPTIMIZATION_STRATEGY.md §1.4 — Integer handling
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
import numpy as np
|
|
from scipy.stats.qmc import LatinHypercube
|
|
|
|
from geometric_checks import DesignPoint
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Design variable bounds
|
|
# ---------------------------------------------------------------------------
|
|
@dataclass(frozen=True)
|
|
class DVBounds:
|
|
"""Design variable bounds and metadata."""
|
|
|
|
name: str
|
|
nx_expression: str
|
|
lower: float
|
|
upper: float
|
|
baseline: float
|
|
is_integer: bool = False
|
|
|
|
|
|
DV_DEFINITIONS: list[DVBounds] = [
|
|
DVBounds("beam_half_core_thickness", "beam_half_core_thickness", 10.0, 40.0, 25.162),
|
|
DVBounds("beam_face_thickness", "beam_face_thickness", 10.0, 40.0, 21.504),
|
|
DVBounds("holes_diameter", "holes_diameter", 150.0, 450.0, 300.0),
|
|
DVBounds("hole_count", "hole_count", 5.0, 15.0, 10.0, is_integer=True),
|
|
]
|
|
|
|
N_DVS = len(DV_DEFINITIONS)
|
|
BASELINE_VALUES = [dv.baseline for dv in DV_DEFINITIONS]
|
|
|
|
|
|
def get_baseline_point() -> DesignPoint:
|
|
"""Return the baseline design point (Trial 0).
|
|
|
|
Returns:
|
|
DesignPoint with confirmed baseline values from NX introspection.
|
|
"""
|
|
return DesignPoint(
|
|
beam_half_core_thickness=25.162,
|
|
beam_face_thickness=21.504,
|
|
holes_diameter=300.0,
|
|
hole_count=10,
|
|
)
|
|
|
|
|
|
def generate_lhs_samples(
|
|
n_samples: int = 50,
|
|
seed: int = 42,
|
|
include_baseline: bool = True,
|
|
) -> list[DesignPoint]:
|
|
"""Generate LHS sample points for the DoE study.
|
|
|
|
Strategy for integer coverage (hole_count = 5..15, 11 levels):
|
|
1. Generate `n_samples` LHS points with continuous DV4
|
|
2. Round DV4 to nearest integer
|
|
3. Check coverage — if any of the 11 levels are missing, replace
|
|
the closest duplicate trial with the missing level
|
|
This ensures all integer levels are represented while maintaining
|
|
the space-filling property of LHS for the continuous variables.
|
|
|
|
Args:
|
|
n_samples: Number of LHS sample points (default: 50).
|
|
seed: Random seed for reproducibility.
|
|
include_baseline: If True, prepend baseline as Trial 0.
|
|
|
|
Returns:
|
|
List of DesignPoint instances. If include_baseline is True,
|
|
the first element is the baseline (Trial 0).
|
|
"""
|
|
logger.info(
|
|
"Generating %d LHS samples (seed=%d, baseline=%s)",
|
|
n_samples, seed, include_baseline,
|
|
)
|
|
|
|
# Generate unit hypercube LHS samples
|
|
sampler = LatinHypercube(d=N_DVS, seed=seed, optimization="random-cd")
|
|
unit_samples = sampler.random(n=n_samples) # shape: (n_samples, 4)
|
|
|
|
# Scale to design variable bounds
|
|
samples = _scale_samples(unit_samples)
|
|
|
|
# Round hole_count to nearest integer
|
|
samples[:, 3] = np.round(samples[:, 3]).astype(int)
|
|
samples[:, 3] = np.clip(samples[:, 3], 5, 15)
|
|
|
|
# Ensure full integer coverage for hole_count
|
|
samples = _ensure_integer_coverage(samples, rng=np.random.default_rng(seed))
|
|
|
|
# Convert to DesignPoint list
|
|
points: list[DesignPoint] = []
|
|
if include_baseline:
|
|
points.append(get_baseline_point())
|
|
|
|
for row in samples:
|
|
points.append(
|
|
DesignPoint(
|
|
beam_half_core_thickness=float(row[0]),
|
|
beam_face_thickness=float(row[1]),
|
|
holes_diameter=float(row[2]),
|
|
hole_count=int(row[3]),
|
|
)
|
|
)
|
|
|
|
logger.info(
|
|
"Generated %d total points (%d LHS + %s baseline)",
|
|
len(points),
|
|
n_samples,
|
|
"1" if include_baseline else "0",
|
|
)
|
|
_log_coverage(points)
|
|
|
|
return points
|
|
|
|
|
|
def _scale_samples(unit_samples: np.ndarray) -> np.ndarray:
|
|
"""Scale unit hypercube [0,1]^d samples to design variable bounds.
|
|
|
|
Args:
|
|
unit_samples: Array of shape (n, 4) with values in [0, 1].
|
|
|
|
Returns:
|
|
Scaled array with values in [lower, upper] for each DV.
|
|
"""
|
|
lower = np.array([dv.lower for dv in DV_DEFINITIONS])
|
|
upper = np.array([dv.upper for dv in DV_DEFINITIONS])
|
|
return lower + unit_samples * (upper - lower)
|
|
|
|
|
|
def _ensure_integer_coverage(
|
|
samples: np.ndarray,
|
|
rng: np.random.Generator,
|
|
) -> np.ndarray:
|
|
"""Ensure all 11 hole_count levels (5-15) are represented.
|
|
|
|
If any integer level is missing, replace a duplicate from the most
|
|
over-represented level with a sample at the missing level.
|
|
Continuous DVs for replacement samples are drawn randomly within bounds.
|
|
|
|
Args:
|
|
samples: Array of shape (n, 4) with rounded hole_count in col 3.
|
|
rng: NumPy random generator for reproducibility.
|
|
|
|
Returns:
|
|
Modified samples array with full integer coverage.
|
|
"""
|
|
all_levels = set(range(5, 16)) # {5, 6, 7, ..., 15}
|
|
present_levels = set(int(x) for x in samples[:, 3])
|
|
missing_levels = all_levels - present_levels
|
|
|
|
if not missing_levels:
|
|
logger.info("All 11 hole_count levels represented ✓")
|
|
return samples
|
|
|
|
# Skip patching when sample size is too small to cover all levels
|
|
n_samples = len(samples)
|
|
if n_samples < len(all_levels):
|
|
logger.info(
|
|
"Only %d samples — too few to cover all 11 hole_count levels "
|
|
"(need ≥11). Skipping stratified patching.",
|
|
n_samples,
|
|
)
|
|
return samples
|
|
|
|
logger.warning(
|
|
"Missing hole_count levels: %s — patching with replacements",
|
|
sorted(missing_levels),
|
|
)
|
|
|
|
for missing_level in sorted(missing_levels):
|
|
# Find the most over-represented level
|
|
unique, counts = np.unique(samples[:, 3].astype(int), return_counts=True)
|
|
most_common_idx = np.argmax(counts)
|
|
most_common_level = unique[most_common_idx]
|
|
|
|
# Find indices with the most common level
|
|
candidates = np.where(samples[:, 3].astype(int) == most_common_level)[0]
|
|
replace_idx = rng.choice(candidates)
|
|
|
|
# Replace: keep continuous DVs random within bounds, set hole_count
|
|
for j, dv in enumerate(DV_DEFINITIONS):
|
|
if dv.is_integer:
|
|
samples[replace_idx, j] = missing_level
|
|
else:
|
|
samples[replace_idx, j] = rng.uniform(dv.lower, dv.upper)
|
|
|
|
logger.info(
|
|
" Replaced trial at idx %d: hole_count %d → %d",
|
|
replace_idx, most_common_level, missing_level,
|
|
)
|
|
|
|
return samples
|
|
|
|
|
|
def _log_coverage(points: list[DesignPoint]) -> None:
|
|
"""Log hole_count coverage statistics."""
|
|
counts: dict[int, int] = {}
|
|
for p in points:
|
|
counts[p.hole_count] = counts.get(p.hole_count, 0) + 1
|
|
|
|
logger.info("Hole count coverage:")
|
|
for level in range(5, 16):
|
|
n = counts.get(level, 0)
|
|
logger.info(" hole_count=%2d: %d trials", level, n)
|
|
|
|
|
|
def points_to_dicts(points: list[DesignPoint]) -> list[dict[str, Any]]:
|
|
"""Convert DesignPoint list to list of dicts (for Optuna enqueue).
|
|
|
|
Args:
|
|
points: List of DesignPoint instances.
|
|
|
|
Returns:
|
|
List of dicts with DV names as keys.
|
|
"""
|
|
return [
|
|
{
|
|
"beam_half_core_thickness": p.beam_half_core_thickness,
|
|
"beam_face_thickness": p.beam_face_thickness,
|
|
"holes_diameter": p.holes_diameter,
|
|
"hole_count": p.hole_count,
|
|
}
|
|
for p in points
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Quick self-test
|
|
# ---------------------------------------------------------------------------
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
|
|
points = generate_lhs_samples(n_samples=50, seed=42)
|
|
print(f"\nGenerated {len(points)} total points")
|
|
print(f" Trial 0 (baseline): {points[0]}")
|
|
print(f" Trial 1 (first LHS): {points[1]}")
|
|
print(f" Trial {len(points)-1} (last LHS): {points[-1]}")
|
|
|
|
# Verify coverage
|
|
hole_counts = {p.hole_count for p in points}
|
|
expected = set(range(5, 16))
|
|
assert hole_counts == expected, (
|
|
f"Missing hole_count levels: {expected - hole_counts}"
|
|
)
|
|
print("\nAll 11 hole_count levels covered ✓")
|
|
|
|
# Verify bounds
|
|
for p in points[1:]: # skip baseline
|
|
assert 10.0 <= p.beam_half_core_thickness <= 40.0
|
|
assert 10.0 <= p.beam_face_thickness <= 40.0
|
|
assert 150.0 <= p.holes_diameter <= 450.0
|
|
assert 5 <= p.hole_count <= 15
|
|
print("All samples within bounds ✓")
|