- Add validation framework (config, model, results, study validators) - Add Claude Code skills (create-study, run-optimization, generate-report, troubleshoot, analyze-model) - Add Atomizer Dashboard (React frontend + FastAPI backend) - Reorganize docs into structured directories (00-09) - Add neural surrogate modules and training infrastructure - Add multi-objective optimization support 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
321 lines
10 KiB
Python
321 lines
10 KiB
Python
"""
|
|
Space-Filling Training Data Generator
|
|
|
|
This script generates FEA training points that cover the ENTIRE design space
|
|
uniformly, unlike optimization which focuses only on promising regions.
|
|
|
|
Sampling Methods:
|
|
1. Latin Hypercube Sampling (LHS) - Good coverage, no clustering
|
|
2. Sobol Sequence - Quasi-random, very uniform
|
|
3. Grid Sampling - Regular grid, exhaustive but slow
|
|
|
|
Usage:
|
|
python generate_training_data.py --study uav_arm_optimization --method lhs --points 100
|
|
"""
|
|
import sys
|
|
from pathlib import Path
|
|
import json
|
|
import argparse
|
|
import numpy as np
|
|
from scipy.stats import qmc # For Latin Hypercube and Sobol
|
|
|
|
project_root = Path(__file__).parent
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
|
|
def load_config_bounds(study_path: Path) -> dict:
|
|
"""Load design variable bounds from optimization_config.json
|
|
|
|
Supports two config formats:
|
|
1. {"parameter": "name", "bounds": [min, max]} - Current format
|
|
2. {"name": "name", "min_value": min, "max_value": max} - Legacy format
|
|
"""
|
|
config_path = study_path / "1_setup" / "optimization_config.json"
|
|
|
|
if not config_path.exists():
|
|
raise FileNotFoundError(f"Config not found: {config_path}")
|
|
|
|
with open(config_path) as f:
|
|
config = json.load(f)
|
|
|
|
bounds = {}
|
|
for var in config.get('design_variables', []):
|
|
# Support both 'parameter' and 'name' keys
|
|
name = var.get('parameter') or var.get('name')
|
|
|
|
# Support both "bounds": [min, max] and "min_value"/"max_value" formats
|
|
if 'bounds' in var:
|
|
min_val, max_val = var['bounds']
|
|
else:
|
|
min_val = var.get('min_value', var.get('min', 0))
|
|
max_val = var.get('max_value', var.get('max', 1))
|
|
|
|
# Detect integer type based on name or explicit type
|
|
is_int = (var.get('type') == 'integer' or
|
|
'count' in name.lower() or
|
|
(isinstance(min_val, int) and isinstance(max_val, int) and max_val - min_val < 20))
|
|
|
|
bounds[name] = {
|
|
'min': min_val,
|
|
'max': max_val,
|
|
'type': 'int' if is_int else 'float'
|
|
}
|
|
|
|
return bounds, config
|
|
|
|
|
|
def generate_lhs_samples(bounds: dict, n_samples: int, seed: int = 42) -> list:
|
|
"""
|
|
Generate Latin Hypercube Samples across the full design space.
|
|
|
|
LHS ensures:
|
|
- Each dimension is divided into n equal intervals
|
|
- Exactly one sample in each interval per dimension
|
|
- Much better coverage than random sampling
|
|
"""
|
|
var_names = list(bounds.keys())
|
|
n_dims = len(var_names)
|
|
|
|
# Create LHS sampler
|
|
sampler = qmc.LatinHypercube(d=n_dims, seed=seed)
|
|
samples_unit = sampler.random(n=n_samples) # Values in [0, 1]
|
|
|
|
# Scale to actual bounds
|
|
samples = []
|
|
for i in range(n_samples):
|
|
point = {}
|
|
for j, name in enumerate(var_names):
|
|
b = bounds[name]
|
|
value = b['min'] + samples_unit[i, j] * (b['max'] - b['min'])
|
|
|
|
if b['type'] == 'int':
|
|
value = int(round(value))
|
|
|
|
point[name] = value
|
|
samples.append(point)
|
|
|
|
return samples
|
|
|
|
|
|
def generate_sobol_samples(bounds: dict, n_samples: int, seed: int = 42) -> list:
|
|
"""
|
|
Generate Sobol sequence samples (quasi-random, very uniform).
|
|
|
|
Sobol sequences are deterministic and provide excellent uniformity.
|
|
"""
|
|
var_names = list(bounds.keys())
|
|
n_dims = len(var_names)
|
|
|
|
sampler = qmc.Sobol(d=n_dims, scramble=True, seed=seed)
|
|
samples_unit = sampler.random(n=n_samples)
|
|
|
|
samples = []
|
|
for i in range(n_samples):
|
|
point = {}
|
|
for j, name in enumerate(var_names):
|
|
b = bounds[name]
|
|
value = b['min'] + samples_unit[i, j] * (b['max'] - b['min'])
|
|
|
|
if b['type'] == 'int':
|
|
value = int(round(value))
|
|
|
|
point[name] = value
|
|
samples.append(point)
|
|
|
|
return samples
|
|
|
|
|
|
def generate_grid_samples(bounds: dict, points_per_dim: int = 5) -> list:
|
|
"""
|
|
Generate regular grid samples.
|
|
|
|
Warning: Scales exponentially with dimensions!
|
|
4 dims x 5 points = 625 samples
|
|
4 dims x 10 points = 10,000 samples
|
|
"""
|
|
var_names = list(bounds.keys())
|
|
|
|
# Create linspace for each dimension
|
|
grids = []
|
|
for name in var_names:
|
|
b = bounds[name]
|
|
if b['type'] == 'int':
|
|
# For integers, use actual integer values
|
|
values = np.linspace(b['min'], b['max'], points_per_dim)
|
|
values = np.unique(np.round(values).astype(int))
|
|
else:
|
|
values = np.linspace(b['min'], b['max'], points_per_dim)
|
|
grids.append(values)
|
|
|
|
# Create meshgrid and flatten
|
|
mesh = np.meshgrid(*grids, indexing='ij')
|
|
flat = [m.flatten() for m in mesh]
|
|
|
|
samples = []
|
|
for i in range(len(flat[0])):
|
|
point = {}
|
|
for j, name in enumerate(var_names):
|
|
value = flat[j][i]
|
|
if bounds[name]['type'] == 'int':
|
|
value = int(value)
|
|
else:
|
|
value = float(value)
|
|
point[name] = value
|
|
samples.append(point)
|
|
|
|
return samples
|
|
|
|
|
|
def generate_corner_samples(bounds: dict) -> list:
|
|
"""
|
|
Generate samples at all corners of the design space.
|
|
|
|
This ensures the NN sees the extreme combinations.
|
|
For 4 dimensions: 2^4 = 16 corner points
|
|
"""
|
|
var_names = list(bounds.keys())
|
|
n_dims = len(var_names)
|
|
|
|
samples = []
|
|
for i in range(2**n_dims):
|
|
point = {}
|
|
for j, name in enumerate(var_names):
|
|
b = bounds[name]
|
|
# Use bit j of i to decide min or max
|
|
value = b['max'] if (i >> j) & 1 else b['min']
|
|
if b['type'] == 'int':
|
|
value = int(value)
|
|
point[name] = value
|
|
samples.append(point)
|
|
|
|
return samples
|
|
|
|
|
|
def save_training_points(samples: list, output_path: Path):
|
|
"""Save training points to JSON file."""
|
|
with open(output_path, 'w') as f:
|
|
json.dump({
|
|
'n_samples': len(samples),
|
|
'samples': samples
|
|
}, f, indent=2)
|
|
print(f"Saved {len(samples)} training points to: {output_path}")
|
|
|
|
|
|
def visualize_coverage(samples: list, bounds: dict, save_path: Path):
|
|
"""Visualize how well samples cover the design space."""
|
|
import matplotlib
|
|
matplotlib.use('Agg')
|
|
import matplotlib.pyplot as plt
|
|
|
|
var_names = list(bounds.keys())
|
|
n_vars = len(var_names)
|
|
|
|
# Create pairwise scatter plots
|
|
fig, axes = plt.subplots(n_vars-1, n_vars-1, figsize=(12, 12))
|
|
|
|
for i in range(n_vars - 1):
|
|
for j in range(i + 1, n_vars):
|
|
ax = axes[j-1, i] if n_vars > 2 else axes
|
|
|
|
x = [s[var_names[i]] for s in samples]
|
|
y = [s[var_names[j]] for s in samples]
|
|
|
|
ax.scatter(x, y, alpha=0.5, s=20)
|
|
ax.set_xlabel(var_names[i].replace('_', '\n'), fontsize=8)
|
|
ax.set_ylabel(var_names[j].replace('_', '\n'), fontsize=8)
|
|
|
|
# Show bounds
|
|
b_i = bounds[var_names[i]]
|
|
b_j = bounds[var_names[j]]
|
|
ax.set_xlim(b_i['min'], b_i['max'])
|
|
ax.set_ylim(b_j['min'], b_j['max'])
|
|
ax.grid(True, alpha=0.3)
|
|
|
|
# Hide unused subplots
|
|
for i in range(n_vars - 1):
|
|
for j in range(i):
|
|
if n_vars > 2:
|
|
axes[i, j].set_visible(False)
|
|
|
|
plt.suptitle(f'Design Space Coverage ({len(samples)} samples)', fontsize=14)
|
|
plt.tight_layout()
|
|
plt.savefig(save_path, dpi=150)
|
|
plt.close()
|
|
print(f"Saved coverage plot: {save_path}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Generate space-filling training data')
|
|
parser.add_argument('--study', required=True, help='Study name (e.g., uav_arm_optimization)')
|
|
parser.add_argument('--method', default='lhs', choices=['lhs', 'sobol', 'grid', 'corners', 'combined'],
|
|
help='Sampling method')
|
|
parser.add_argument('--points', type=int, default=100, help='Number of samples (for lhs/sobol)')
|
|
parser.add_argument('--grid-points', type=int, default=5, help='Points per dimension (for grid)')
|
|
parser.add_argument('--seed', type=int, default=42, help='Random seed')
|
|
parser.add_argument('--visualize', action='store_true', help='Generate coverage plot')
|
|
args = parser.parse_args()
|
|
|
|
study_path = project_root / "studies" / args.study
|
|
if not study_path.exists():
|
|
print(f"ERROR: Study not found: {study_path}")
|
|
return
|
|
|
|
print("="*70)
|
|
print("Space-Filling Training Data Generator")
|
|
print("="*70)
|
|
|
|
# Load bounds from config
|
|
print(f"\nLoading config from: {study_path}")
|
|
bounds, config = load_config_bounds(study_path)
|
|
|
|
print(f"\nDesign Variable Bounds:")
|
|
for name, b in bounds.items():
|
|
print(f" {name}: [{b['min']}, {b['max']}] ({b['type']})")
|
|
|
|
# Generate samples
|
|
print(f"\nGenerating samples using method: {args.method}")
|
|
|
|
if args.method == 'lhs':
|
|
samples = generate_lhs_samples(bounds, args.points, args.seed)
|
|
elif args.method == 'sobol':
|
|
samples = generate_sobol_samples(bounds, args.points, args.seed)
|
|
elif args.method == 'grid':
|
|
samples = generate_grid_samples(bounds, args.grid_points)
|
|
elif args.method == 'corners':
|
|
samples = generate_corner_samples(bounds)
|
|
elif args.method == 'combined':
|
|
# Combine corners + LHS for best coverage
|
|
corner_samples = generate_corner_samples(bounds)
|
|
lhs_samples = generate_lhs_samples(bounds, args.points - len(corner_samples), args.seed)
|
|
samples = corner_samples + lhs_samples
|
|
print(f" Combined: {len(corner_samples)} corners + {len(lhs_samples)} LHS")
|
|
|
|
print(f" Generated {len(samples)} samples")
|
|
|
|
# Show sample range coverage
|
|
print(f"\nSample Coverage:")
|
|
for name in bounds.keys():
|
|
values = [s[name] for s in samples]
|
|
print(f" {name}: [{min(values):.2f}, {max(values):.2f}]")
|
|
|
|
# Save samples
|
|
output_path = study_path / "1_setup" / "training_points.json"
|
|
save_training_points(samples, output_path)
|
|
|
|
# Visualize if requested
|
|
if args.visualize:
|
|
plot_path = study_path / "1_setup" / "training_coverage.png"
|
|
visualize_coverage(samples, bounds, plot_path)
|
|
|
|
print(f"\n" + "="*70)
|
|
print("NEXT STEPS")
|
|
print("="*70)
|
|
print(f"1. Run FEA on all {len(samples)} training points:")
|
|
print(f" python run_training_fea.py --study {args.study}")
|
|
print(f"2. This will create comprehensive training data")
|
|
print(f"3. Then retrain NN on this uniform data")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|