feat: Add configuration validation system for MVP stability (Phase 1.2)

Implements JSON Schema validation for optimization configurations to ensure
consistency across all studies and prevent configuration errors.

Added:
- optimization_engine/schemas/optimization_config_schema.json
  - Comprehensive schema for Protocol 10 & 11 configurations
  - Validates objectives, constraints, design variables, simulation settings
  - Enforces standard field names (goal, bounds, parameter, threshold)

- optimization_engine/config_manager.py
  - ConfigManager class with schema validation
  - CLI tool: python config_manager.py <config.json>
  - Type-safe accessor methods for config elements
  - Custom validations: bounds check, multi-objective consistency, location check

- optimization_engine/schemas/README.md
  - Complete documentation of standard configuration format
  - Validation examples and common error fixes
  - Migration guidance for legacy configs

- docs/07_DEVELOPMENT/Phase_1_2_Implementation_Plan.md
  - Detailed implementation plan for remaining Phase 1.2 tasks
  - Migration tool design, integration guide, testing plan

Testing:
- Validated drone_gimbal_arm_optimization config successfully
- ConfigManager works with drone_gimbal format (new standard)
- Identifies legacy format issues in bracket studies

Standards Established:
- Configuration location: studies/{name}/1_setup/
- Objective direction: "goal" not "type"
- Design var bounds: "bounds": [min, max] not "min"/"max"
- Design var name: "parameter" not "name"
- Constraint threshold: "threshold" not "value"

Next Steps (Phase 1.2.1+):
- Config migration tool for legacy studies
- Integration with run_optimization.py
- Update create-study Claude skill with schema reference
- Migrate bracket studies to new format

Relates to: Phase 1.2 MVP Development Plan

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-24 09:21:55 -05:00
parent d228ccec66
commit 155f5a8522
4 changed files with 1070 additions and 0 deletions

View File

@@ -0,0 +1,244 @@
"""Configuration validation and management for Atomizer studies.
This module provides schema-based validation for optimization configuration files,
ensuring consistency across all studies.
Usage:
# In run_optimization.py
from optimization_engine.config_manager import ConfigManager
config_manager = ConfigManager(Path(__file__).parent / "1_setup" / "optimization_config.json")
config_manager.load_config()
if not config_manager.validate():
print(config_manager.get_validation_report())
sys.exit(1)
# Access validated configuration
design_vars = config_manager.get_design_variables()
objectives = config_manager.get_objectives()
"""
import json
from pathlib import Path
from typing import Dict, List, Any, Optional
try:
import jsonschema
JSONSCHEMA_AVAILABLE = True
except ImportError:
JSONSCHEMA_AVAILABLE = False
print("Warning: jsonschema not installed. Install with: pip install jsonschema>=4.17.0")
class ConfigValidationError(Exception):
"""Raised when configuration validation fails."""
pass
class ConfigManager:
"""Manages and validates optimization configuration files."""
def __init__(self, config_path: Path):
"""
Initialize ConfigManager with path to optimization_config.json.
Args:
config_path: Path to optimization_config.json file
"""
self.config_path = Path(config_path)
self.schema_path = Path(__file__).parent / "schemas" / "optimization_config_schema.json"
self.config: Optional[Dict[str, Any]] = None
self.validation_errors: List[str] = []
def load_schema(self) -> Dict[str, Any]:
"""Load JSON schema for validation."""
if not self.schema_path.exists():
raise FileNotFoundError(f"Schema file not found: {self.schema_path}")
with open(self.schema_path, 'r') as f:
return json.load(f)
def load_config(self) -> Dict[str, Any]:
"""Load configuration file."""
if not self.config_path.exists():
raise FileNotFoundError(f"Config file not found: {self.config_path}")
with open(self.config_path, 'r') as f:
self.config = json.load(f)
return self.config
def validate(self, strict: bool = True) -> bool:
"""
Validate configuration against schema.
Args:
strict: If True, enforce all validations. If False, only warn on non-critical issues.
Returns:
True if valid, False otherwise
"""
if self.config is None:
self.load_config()
self.validation_errors = []
# JSON Schema validation
if JSONSCHEMA_AVAILABLE:
schema = self.load_schema()
try:
jsonschema.validate(instance=self.config, schema=schema)
except jsonschema.ValidationError as e:
self.validation_errors.append(f"Schema validation failed: {e.message}")
if strict:
return False
else:
self.validation_errors.append("jsonschema not installed - schema validation skipped")
# Custom validations
self._validate_design_variable_bounds()
self._validate_multi_objective_consistency()
self._validate_file_locations()
self._validate_extraction_consistency()
return len(self.validation_errors) == 0
def _validate_design_variable_bounds(self):
"""Ensure bounds are valid (min < max)."""
for dv in self.config.get("design_variables", []):
bounds = dv.get("bounds", [])
if len(bounds) == 2 and bounds[0] >= bounds[1]:
self.validation_errors.append(
f"Design variable '{dv.get('parameter', 'unknown')}': "
f"min ({bounds[0]}) must be < max ({bounds[1]})"
)
def _validate_multi_objective_consistency(self):
"""Validate multi-objective settings consistency."""
n_objectives = len(self.config.get("objectives", []))
protocol = self.config.get("optimization_settings", {}).get("protocol")
sampler = self.config.get("optimization_settings", {}).get("sampler")
if n_objectives > 1:
# Multi-objective should use protocol_11 and NSGA-II
if protocol and protocol != "protocol_11_multi_objective":
self.validation_errors.append(
f"Multi-objective optimization ({n_objectives} objectives) "
f"should use protocol_11_multi_objective (got {protocol})"
)
if sampler and sampler != "NSGAIISampler":
self.validation_errors.append(
f"Multi-objective optimization should use NSGAIISampler (got {sampler})"
)
elif n_objectives == 1:
# Single-objective should not use NSGA-II
if sampler == "NSGAIISampler":
self.validation_errors.append(
"Single-objective optimization should not use NSGAIISampler "
"(use TPESampler or CmaEsSampler)"
)
def _validate_file_locations(self):
"""Check if config is in correct location (1_setup/)."""
if "1_setup" not in str(self.config_path.parent):
self.validation_errors.append(
f"Warning: Config should be in '1_setup/' directory, "
f"found in {self.config_path.parent}"
)
def _validate_extraction_consistency(self):
"""Validate extraction specifications."""
# Check objectives have extraction specs
for obj in self.config.get("objectives", []):
if "extraction" not in obj:
self.validation_errors.append(
f"Objective '{obj.get('name', 'unknown')}' missing extraction specification"
)
# Check constraints have extraction specs
for constraint in self.config.get("constraints", []):
if "extraction" not in constraint:
self.validation_errors.append(
f"Constraint '{constraint.get('name', 'unknown')}' missing extraction specification"
)
def get_validation_report(self) -> str:
"""Get human-readable validation report."""
if not self.validation_errors:
return "[OK] Configuration is valid"
report = "[FAIL] Configuration validation failed:\n"
for i, error in enumerate(self.validation_errors, 1):
report += f" {i}. {error}\n"
return report
# Type-safe accessor methods
def get_design_variables(self) -> List[Dict[str, Any]]:
"""Get design variables with validated structure."""
if self.config is None:
self.load_config()
return self.config.get("design_variables", [])
def get_objectives(self) -> List[Dict[str, Any]]:
"""Get objectives with validated structure."""
if self.config is None:
self.load_config()
return self.config.get("objectives", [])
def get_constraints(self) -> List[Dict[str, Any]]:
"""Get constraints with validated structure."""
if self.config is None:
self.load_config()
return self.config.get("constraints", [])
def get_simulation_settings(self) -> Dict[str, Any]:
"""Get simulation settings."""
if self.config is None:
self.load_config()
return self.config.get("simulation", {})
def get_optimization_settings(self) -> Dict[str, Any]:
"""Get optimization settings."""
if self.config is None:
self.load_config()
return self.config.get("optimization_settings", {})
# CLI tool for validation
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python config_manager.py <path_to_optimization_config.json>")
print("\nExample:")
print(" python config_manager.py studies/drone_gimbal_arm_optimization/1_setup/optimization_config.json")
sys.exit(1)
config_path = Path(sys.argv[1])
print(f"Validating configuration: {config_path}")
print("=" * 60)
manager = ConfigManager(config_path)
try:
manager.load_config()
print("[OK] Config loaded successfully")
is_valid = manager.validate()
print(manager.get_validation_report())
if is_valid:
print("\n" + "=" * 60)
print("Configuration Summary:")
print(f" Study: {manager.config.get('study_name')}")
print(f" Protocol: {manager.get_optimization_settings().get('protocol')}")
print(f" Design Variables: {len(manager.get_design_variables())}")
print(f" Objectives: {len(manager.get_objectives())}")
print(f" Constraints: {len(manager.get_constraints())}")
sys.exit(0 if is_valid else 1)
except Exception as e:
print(f"[ERROR] {e}")
sys.exit(1)

View File

@@ -0,0 +1,213 @@
# Atomizer Configuration Schemas
This directory contains JSON Schema files for validating Atomizer optimization study configurations.
## Files
- **optimization_config_schema.json** - Schema for `optimization_config.json` files
- Validates study configuration including objectives, constraints, design variables
- Ensures consistency across all studies
- Based on Protocol 10 (single-objective) and Protocol 11 (multi-objective) standards
## Usage
### Validate a Configuration
Use the ConfigManager CLI tool:
```bash
python optimization_engine/config_manager.py studies/your_study/1_setup/optimization_config.json
```
### In Python Code
```python
from pathlib import Path
from optimization_engine.config_manager import ConfigManager
config_path = Path("studies/my_study/1_setup/optimization_config.json")
manager = ConfigManager(config_path)
manager.load_config()
if manager.validate():
print("Configuration is valid")
design_vars = manager.get_design_variables()
objectives = manager.get_objectives()
else:
print(manager.get_validation_report())
```
## Schema Standards
### Standard Configuration Format
Based on the drone_gimbal_arm_optimization study, the standard format is:
```json
{
"study_name": "study_name_lowercase_with_underscores",
"description": "Brief description of optimization problem",
"engineering_context": "Real-world scenario and requirements",
"optimization_settings": {
"protocol": "protocol_11_multi_objective", // or protocol_10_single_objective
"n_trials": 30,
"sampler": "NSGAIISampler", // or TPESampler, CmaEsSampler
"pruner": null,
"timeout_per_trial": 600
},
"design_variables": [
{
"parameter": "nx_expression_name",
"bounds": [min, max],
"description": "What this parameter controls"
}
],
"objectives": [
{
"name": "objective_name",
"goal": "minimize", // or "maximize"
"weight": 1.0,
"description": "What this measures",
"target": 100.0, // optional
"extraction": {
"action": "extract_mass", // or extract_stress, extract_displacement, etc.
"domain": "result_extraction",
"params": {
"result_type": "mass",
"metric": "total"
}
}
}
],
"constraints": [
{
"name": "constraint_name",
"type": "less_than", // or "greater_than"
"threshold": 100.0,
"description": "Engineering justification",
"extraction": {
"action": "extract_displacement",
"domain": "result_extraction",
"params": {
"result_type": "displacement",
"metric": "max"
}
}
}
],
"simulation": {
"model_file": "Model.prt",
"sim_file": "Model_sim1.sim",
"fem_file": "Model_fem1.fem",
"solver": "nastran",
"analysis_types": ["static", "modal"]
},
"reporting": {
"generate_plots": true,
"save_incremental": true,
"llm_summary": false
}
}
```
## Field Name Standards
**IMPORTANT**: Use these field names (not legacy alternatives):
| Field | Standard Name | Legacy (DON'T USE) |
|-------|--------------|-------------------|
| Objective direction | `"goal"` | `"type"` |
| Design var bounds | `"bounds": [min, max]` | `"min": X, "max": Y"` |
| Design var name | `"parameter"` | `"name"` |
| Constraint limit | `"threshold"` | `"value"` |
## File Location Standard
Configuration files **MUST** be in the `1_setup/` directory:
```
studies/your_study/
├── 1_setup/
│ ├── optimization_config.json ← HERE
│ └── workflow_config.json
├── 2_results/
└── run_optimization.py
```
## Validation Rules
The schema enforces:
1. **Study Name**: Lowercase with underscores, 3-100 characters
2. **Design Variable Bounds**: min < max
3. **Multi-Objective Consistency**:
- 2-3 objectives → protocol_11_multi_objective + NSGAIISampler
- 1 objective → protocol_10_single_objective + TPESampler/CmaEsSampler
4. **Extraction Specs**: All objectives and constraints must have extraction blocks
5. **File Extensions**: .prt, .sim, .fem
6. **Analysis Types**: static, modal, thermal, or buckling
## Common Validation Errors
### Error: "min must be < max"
```json
// BAD
{"parameter": "thickness", "bounds": [10, 5]}
// GOOD
{"parameter": "thickness", "bounds": [5, 10]}
```
### Error: "Multi-objective should use NSGAIISampler"
```json
// BAD
{
"objectives": [{...}, {...}], // 2 objectives
"optimization_settings": {"sampler": "TPESampler"} // Wrong sampler!
}
// GOOD
{
"objectives": [{...}, {...}],
"optimization_settings": {"sampler": "NSGAIISampler"}
}
```
### Error: "Config should be in 1_setup/"
Move your configuration file from study root to `1_setup/` directory.
## Dependencies
The ConfigManager requires:
```bash
pip install jsonschema>=4.17.0
```
## Migration from Legacy Format
If you have old configuration files with:
- `"type"` instead of `"goal"` in objectives
- `"min"/"max"` instead of `"bounds"` in design variables
- `"name"` instead of `"parameter"` in design variables
- `"value"` instead of `"threshold"` in constraints
See the migration tool (coming in Phase 1.2.1):
```bash
python optimization_engine/config_migrator.py studies/old_study/optimization_config.json
```
## References
- [Phase 1.2 Implementation Plan](../../docs/07_DEVELOPMENT/Phase_1_2_Implementation_Plan.md)
- [create-study Claude Skill](../../.claude/skills/create-study.md)
- [drone_gimbal_arm Example](../../studies/drone_gimbal_arm_optimization/1_setup/optimization_config.json)
## Questions?
For MVP development questions, refer to [DEVELOPMENT.md](../../DEVELOPMENT.md) or the MVP plan in `docs/07_DEVELOPMENT/Today_Todo.md`.

View File

@@ -0,0 +1,279 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://atomizer.io/schemas/optimization_config.json",
"title": "Atomizer Optimization Configuration",
"description": "Schema for Atomizer optimization study configuration files",
"type": "object",
"required": ["study_name", "description", "optimization_settings", "design_variables", "objectives", "simulation"],
"properties": {
"study_name": {
"type": "string",
"description": "Unique identifier for this optimization study",
"pattern": "^[a-z0-9_]+$",
"minLength": 3,
"maxLength": 100
},
"description": {
"type": "string",
"description": "Brief description of the optimization problem",
"minLength": 10,
"maxLength": 500
},
"engineering_context": {
"type": "string",
"description": "Real-world engineering scenario and requirements (optional but recommended)",
"minLength": 20
},
"optimization_settings": {
"type": "object",
"required": ["protocol", "n_trials"],
"properties": {
"protocol": {
"type": "string",
"description": "Optimization protocol to use",
"enum": [
"protocol_10_single_objective",
"protocol_11_multi_objective",
"legacy"
]
},
"n_trials": {
"type": "integer",
"description": "Number of optimization trials to run",
"minimum": 1,
"maximum": 10000
},
"sampler": {
"type": "string",
"description": "Optuna sampler algorithm",
"enum": ["TPESampler", "NSGAIISampler", "CmaEsSampler", "RandomSampler"]
},
"pruner": {
"type": ["string", "null"],
"description": "Optuna pruner (null for no pruning)",
"enum": ["MedianPruner", "HyperbandPruner", "SuccessiveHalvingPruner", null]
},
"timeout_per_trial": {
"type": "integer",
"description": "Maximum time per trial in seconds",
"minimum": 60,
"maximum": 7200
}
}
},
"design_variables": {
"type": "array",
"description": "List of design variables (NX expressions) to optimize",
"minItems": 1,
"maxItems": 50,
"items": {
"type": "object",
"required": ["parameter", "bounds", "description"],
"properties": {
"parameter": {
"type": "string",
"description": "NX expression name",
"pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$"
},
"bounds": {
"type": "array",
"description": "[min, max] bounds for this parameter",
"minItems": 2,
"maxItems": 2,
"items": {
"type": "number"
}
},
"description": {
"type": "string",
"description": "What this parameter controls"
}
}
}
},
"objectives": {
"type": "array",
"description": "Optimization objectives (1 for single-objective, 2-3 for multi-objective)",
"minItems": 1,
"maxItems": 3,
"items": {
"type": "object",
"required": ["name", "goal", "weight", "description", "extraction"],
"properties": {
"name": {
"type": "string",
"description": "Objective name (e.g., 'mass', 'stiffness', 'frequency')"
},
"goal": {
"type": "string",
"description": "Optimization direction",
"enum": ["minimize", "maximize"]
},
"weight": {
"type": "number",
"description": "Relative importance weight",
"minimum": 0,
"maximum": 100
},
"description": {
"type": "string",
"description": "What this objective measures"
},
"target": {
"type": ["number", "null"],
"description": "Target value (optional)"
},
"extraction": {
"$ref": "#/definitions/extraction_spec"
}
}
}
},
"constraints": {
"type": "array",
"description": "Constraints that must be satisfied",
"items": {
"type": "object",
"required": ["name", "type", "threshold", "description", "extraction"],
"properties": {
"name": {
"type": "string",
"description": "Constraint name"
},
"type": {
"type": "string",
"description": "Constraint type",
"enum": ["less_than", "greater_than", "equal_to"]
},
"threshold": {
"type": "number",
"description": "Threshold value for constraint"
},
"description": {
"type": "string",
"description": "Engineering justification for this constraint"
},
"extraction": {
"$ref": "#/definitions/extraction_spec"
}
}
}
},
"simulation": {
"type": "object",
"required": ["model_file", "sim_file", "fem_file", "solver"],
"properties": {
"model_file": {
"type": "string",
"description": "NX Part file name (e.g., 'Beam.prt')",
"pattern": "\\.(prt|PRT)$"
},
"sim_file": {
"type": "string",
"description": "NX Simulation file name (e.g., 'Beam_sim1.sim')",
"pattern": "\\.(sim|SIM)$"
},
"fem_file": {
"type": "string",
"description": "FEM mesh file name (e.g., 'Beam_fem1.fem')",
"pattern": "\\.(fem|FEM)$"
},
"solver": {
"type": "string",
"description": "FEA solver type",
"enum": ["nastran", "NX_Nastran"]
},
"analysis_types": {
"type": "array",
"description": "Types of analysis required",
"items": {
"type": "string",
"enum": ["static", "modal", "thermal", "buckling"]
}
}
}
},
"reporting": {
"type": "object",
"description": "Result reporting settings (optional)",
"properties": {
"generate_plots": {
"type": "boolean",
"default": true
},
"save_incremental": {
"type": "boolean",
"default": true
},
"llm_summary": {
"type": "boolean",
"default": false,
"description": "Generate LLM summaries (experimental, requires API setup)"
}
}
}
},
"definitions": {
"extraction_spec": {
"type": "object",
"required": ["action", "domain", "params"],
"properties": {
"action": {
"type": "string",
"description": "Extractor function to call",
"enum": [
"extract_mass",
"extract_stress",
"extract_displacement",
"extract_frequency",
"extract_stiffness"
]
},
"domain": {
"type": "string",
"description": "Extraction domain (always 'result_extraction' for MVP)",
"enum": ["result_extraction"]
},
"params": {
"type": "object",
"description": "Parameters for the extractor function",
"required": ["result_type"],
"properties": {
"result_type": {
"type": "string",
"description": "Type of result to extract",
"enum": ["mass", "stress", "displacement", "frequency", "stiffness"]
},
"metric": {
"type": "string",
"description": "Specific metric to extract (e.g., 'max', 'total', 'max_von_mises')"
},
"mode_number": {
"type": "integer",
"description": "Mode number for frequency extraction",
"minimum": 1
},
"subcase": {
"type": "integer",
"description": "Subcase number for result extraction"
},
"element_type": {
"type": "string",
"description": "Element type for stress extraction (e.g., 'CTETRA', 'CHEXA')"
}
}
}
}
}
}
}