Files
Atomizer/optimization_engine/validators/results_validator.py
Anto01 e3bdb08a22 feat: Major update with validators, skills, dashboard, and docs reorganization
- Add validation framework (config, model, results, study validators)
- Add Claude Code skills (create-study, run-optimization, generate-report,
  troubleshoot, analyze-model)
- Add Atomizer Dashboard (React frontend + FastAPI backend)
- Reorganize docs into structured directories (00-09)
- Add neural surrogate modules and training infrastructure
- Add multi-objective optimization support

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 19:23:58 -05:00

566 lines
19 KiB
Python

"""
Results Validator for Atomizer Optimization Studies
Validates optimization results stored in study.db and provides
analysis of trial quality, constraint satisfaction, and data integrity.
Usage:
from optimization_engine.validators.results_validator import validate_results
result = validate_results("studies/my_study/2_results/study.db")
if result.is_valid:
print("Results are valid")
else:
for error in result.errors:
print(f"ERROR: {error}")
"""
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
import json
@dataclass
class ResultsError:
"""Represents an error found during results validation."""
code: str
message: str
trial_number: Optional[int] = None
def __str__(self) -> str:
if self.trial_number is not None:
return f"[{self.code}] Trial #{self.trial_number}: {self.message}"
return f"[{self.code}] {self.message}"
@dataclass
class ResultsWarning:
"""Represents a warning found during results validation."""
code: str
message: str
trial_number: Optional[int] = None
def __str__(self) -> str:
if self.trial_number is not None:
return f"[{self.code}] Trial #{self.trial_number}: {self.message}"
return f"[{self.code}] {self.message}"
@dataclass
class ResultsInfo:
"""Information about the optimization results."""
study_name: str = ""
n_trials: int = 0
n_completed: int = 0
n_failed: int = 0
n_pruned: int = 0
n_pareto: int = 0
feasibility_rate: float = 0.0
is_multi_objective: bool = False
objective_names: List[str] = field(default_factory=list)
best_values: Dict[str, float] = field(default_factory=dict)
parameter_names: List[str] = field(default_factory=list)
@dataclass
class ResultsValidationResult:
"""Complete validation result for optimization results."""
is_valid: bool
errors: List[ResultsError]
warnings: List[ResultsWarning]
info: ResultsInfo
def __str__(self) -> str:
lines = []
# Status
status = "[OK] Results validation passed!" if self.is_valid else "[X] Results validation failed!"
lines.append(status)
lines.append("")
# Info
lines.append("RESULTS SUMMARY")
lines.append("-" * 40)
lines.append(f" Study: {self.info.study_name}")
lines.append(f" Total trials: {self.info.n_trials}")
lines.append(f" Completed: {self.info.n_completed}")
lines.append(f" Failed: {self.info.n_failed}")
if self.info.n_pruned > 0:
lines.append(f" Pruned: {self.info.n_pruned}")
lines.append(f" Multi-objective: {'Yes' if self.info.is_multi_objective else 'No'}")
if self.info.is_multi_objective and self.info.n_pareto > 0:
lines.append(f" Pareto-optimal: {self.info.n_pareto}")
if self.info.feasibility_rate > 0:
lines.append(f" Feasibility rate: {self.info.feasibility_rate:.1f}%")
lines.append("")
# Best values
if self.info.best_values:
lines.append("BEST VALUES")
lines.append("-" * 40)
for name, value in self.info.best_values.items():
lines.append(f" {name}: {value:.4f}")
lines.append("")
# Errors
if self.errors:
lines.append("ERRORS")
lines.append("-" * 40)
for error in self.errors:
lines.append(f" {error}")
lines.append("")
# Warnings
if self.warnings:
lines.append("WARNINGS")
lines.append("-" * 40)
for warning in self.warnings:
lines.append(f" {warning}")
lines.append("")
return "\n".join(lines)
def validate_results(
db_path: str,
config_path: Optional[str] = None,
min_trials: int = 1
) -> ResultsValidationResult:
"""
Validate optimization results stored in study.db.
Args:
db_path: Path to study.db file
config_path: Optional path to optimization_config.json for cross-validation
min_trials: Minimum number of completed trials required
Returns:
ResultsValidationResult with errors, warnings, and info
"""
errors: List[ResultsError] = []
warnings: List[ResultsWarning] = []
info = ResultsInfo()
db_path = Path(db_path)
# Check database exists
if not db_path.exists():
errors.append(ResultsError(
code="DB_NOT_FOUND",
message=f"Database not found: {db_path}"
))
return ResultsValidationResult(
is_valid=False,
errors=errors,
warnings=warnings,
info=info
)
# Try to load with Optuna
try:
import optuna
storage_url = f"sqlite:///{db_path}"
# Get all studies in the database
storage = optuna.storages.RDBStorage(url=storage_url)
study_summaries = storage.get_all_studies()
if not study_summaries:
errors.append(ResultsError(
code="NO_STUDIES",
message="Database contains no optimization studies"
))
return ResultsValidationResult(
is_valid=False,
errors=errors,
warnings=warnings,
info=info
)
# Use the first (usually only) study
study_summary = study_summaries[0]
info.study_name = study_summary.study_name
# Load the full study
study = optuna.load_study(
study_name=info.study_name,
storage=storage_url
)
# Basic counts
info.n_trials = len(study.trials)
info.n_completed = len([t for t in study.trials
if t.state == optuna.trial.TrialState.COMPLETE])
info.n_failed = len([t for t in study.trials
if t.state == optuna.trial.TrialState.FAIL])
info.n_pruned = len([t for t in study.trials
if t.state == optuna.trial.TrialState.PRUNED])
# Check minimum trials
if info.n_completed < min_trials:
errors.append(ResultsError(
code="INSUFFICIENT_TRIALS",
message=f"Only {info.n_completed} completed trials (minimum: {min_trials})"
))
# Check for multi-objective
info.is_multi_objective = len(study.directions) > 1
# Get parameter names from first completed trial
for trial in study.trials:
if trial.state == optuna.trial.TrialState.COMPLETE:
info.parameter_names = list(trial.params.keys())
break
# Analyze Pareto front for multi-objective
if info.is_multi_objective:
try:
pareto_trials = study.best_trials
info.n_pareto = len(pareto_trials)
if info.n_pareto == 0 and info.n_completed > 0:
warnings.append(ResultsWarning(
code="NO_PARETO",
message="No Pareto-optimal solutions found despite completed trials"
))
except Exception as e:
warnings.append(ResultsWarning(
code="PARETO_ERROR",
message=f"Could not compute Pareto front: {e}"
))
else:
# Single objective - get best value
if info.n_completed > 0:
try:
best_trial = study.best_trial
info.best_values["objective"] = best_trial.value
except Exception:
pass
# Analyze feasibility
feasible_count = 0
for trial in study.trials:
if trial.state == optuna.trial.TrialState.COMPLETE:
# Check user_attrs for feasibility flag
is_feasible = trial.user_attrs.get('feasible', True)
if is_feasible:
feasible_count += 1
if info.n_completed > 0:
info.feasibility_rate = (feasible_count / info.n_completed) * 100
if info.feasibility_rate < 50:
warnings.append(ResultsWarning(
code="LOW_FEASIBILITY",
message=f"Low feasibility rate ({info.feasibility_rate:.1f}%) - consider relaxing constraints or adjusting bounds"
))
elif info.feasibility_rate < 80:
warnings.append(ResultsWarning(
code="MODERATE_FEASIBILITY",
message=f"Moderate feasibility rate ({info.feasibility_rate:.1f}%)"
))
# Check for data quality issues
_validate_trial_data(study, errors, warnings)
# Cross-validate with config if provided
if config_path:
_cross_validate_with_config(study, config_path, info, errors, warnings)
except ImportError:
errors.append(ResultsError(
code="OPTUNA_NOT_INSTALLED",
message="Optuna is not installed. Cannot validate results."
))
except Exception as e:
errors.append(ResultsError(
code="LOAD_ERROR",
message=f"Failed to load study: {e}"
))
return ResultsValidationResult(
is_valid=len(errors) == 0,
errors=errors,
warnings=warnings,
info=info
)
def _validate_trial_data(study, errors: List[ResultsError], warnings: List[ResultsWarning]):
"""Validate individual trial data quality."""
import optuna
for trial in study.trials:
if trial.state != optuna.trial.TrialState.COMPLETE:
continue
# Check for NaN or inf values
if trial.values:
for i, val in enumerate(trial.values):
if val is None:
errors.append(ResultsError(
code="NULL_OBJECTIVE",
message=f"Objective {i} has null value",
trial_number=trial.number
))
elif not isinstance(val, (int, float)):
errors.append(ResultsError(
code="INVALID_OBJECTIVE_TYPE",
message=f"Objective {i} has invalid type: {type(val)}",
trial_number=trial.number
))
elif isinstance(val, float):
import math
if math.isnan(val):
errors.append(ResultsError(
code="NAN_OBJECTIVE",
message=f"Objective {i} is NaN",
trial_number=trial.number
))
elif math.isinf(val):
warnings.append(ResultsWarning(
code="INF_OBJECTIVE",
message=f"Objective {i} is infinite",
trial_number=trial.number
))
# Check for missing parameters
if not trial.params:
errors.append(ResultsError(
code="MISSING_PARAMS",
message="Trial has no parameters recorded",
trial_number=trial.number
))
# Check for negative values where unexpected
for param_name, param_value in trial.params.items():
if 'thickness' in param_name.lower() and param_value <= 0:
warnings.append(ResultsWarning(
code="INVALID_THICKNESS",
message=f"{param_name} = {param_value} (non-positive thickness)",
trial_number=trial.number
))
elif 'diameter' in param_name.lower() and param_value <= 0:
warnings.append(ResultsWarning(
code="INVALID_DIAMETER",
message=f"{param_name} = {param_value} (non-positive diameter)",
trial_number=trial.number
))
def _cross_validate_with_config(
study,
config_path: str,
info: ResultsInfo,
errors: List[ResultsError],
warnings: List[ResultsWarning]
):
"""Cross-validate results with optimization config."""
import optuna
config_path = Path(config_path)
if not config_path.exists():
warnings.append(ResultsWarning(
code="CONFIG_NOT_FOUND",
message=f"Config file not found for cross-validation: {config_path}"
))
return
try:
with open(config_path, 'r') as f:
config = json.load(f)
# Check parameter names match
config_params = set()
for var in config.get('design_variables', []):
param_name = var.get('parameter', var.get('name', ''))
if param_name:
config_params.add(param_name)
result_params = set(info.parameter_names)
missing_in_results = config_params - result_params
extra_in_results = result_params - config_params
if missing_in_results:
warnings.append(ResultsWarning(
code="MISSING_PARAMS_IN_RESULTS",
message=f"Config params not in results: {missing_in_results}"
))
if extra_in_results:
warnings.append(ResultsWarning(
code="EXTRA_PARAMS_IN_RESULTS",
message=f"Results have extra params not in config: {extra_in_results}"
))
# Check objective count matches
config_objectives = len(config.get('objectives', []))
result_objectives = len(study.directions)
if config_objectives != result_objectives:
warnings.append(ResultsWarning(
code="OBJECTIVE_COUNT_MISMATCH",
message=f"Config has {config_objectives} objectives, results have {result_objectives}"
))
# Get objective names from config
for obj in config.get('objectives', []):
obj_name = obj.get('name', '')
if obj_name:
info.objective_names.append(obj_name)
# Check bounds violations
for trial in study.trials:
if trial.state != optuna.trial.TrialState.COMPLETE:
continue
for var in config.get('design_variables', []):
param_name = var.get('parameter', var.get('name', ''))
bounds = var.get('bounds', [])
if param_name in trial.params and len(bounds) == 2:
value = trial.params[param_name]
min_val, max_val = bounds
# Small tolerance for floating point
tolerance = (max_val - min_val) * 0.001
if value < min_val - tolerance:
warnings.append(ResultsWarning(
code="BELOW_MIN_BOUND",
message=f"{param_name} = {value} < min ({min_val})",
trial_number=trial.number
))
elif value > max_val + tolerance:
warnings.append(ResultsWarning(
code="ABOVE_MAX_BOUND",
message=f"{param_name} = {value} > max ({max_val})",
trial_number=trial.number
))
except json.JSONDecodeError as e:
warnings.append(ResultsWarning(
code="CONFIG_PARSE_ERROR",
message=f"Could not parse config JSON: {e}"
))
except Exception as e:
warnings.append(ResultsWarning(
code="CONFIG_ERROR",
message=f"Error reading config: {e}"
))
def validate_study_results(study_name: str) -> ResultsValidationResult:
"""
Convenience function to validate results for a named study.
Args:
study_name: Name of the study (folder in studies/)
Returns:
ResultsValidationResult
"""
from pathlib import Path
study_dir = Path(f"studies/{study_name}")
db_path = study_dir / "2_results" / "study.db"
config_path = study_dir / "1_setup" / "optimization_config.json"
return validate_results(
db_path=str(db_path),
config_path=str(config_path) if config_path.exists() else None
)
def get_pareto_summary(db_path: str) -> Dict[str, Any]:
"""
Get a summary of Pareto-optimal designs from results.
Args:
db_path: Path to study.db
Returns:
Dictionary with Pareto front information
"""
try:
import optuna
storage_url = f"sqlite:///{db_path}"
storage = optuna.storages.RDBStorage(url=storage_url)
study_summaries = storage.get_all_studies()
if not study_summaries:
return {"error": "No studies found"}
study = optuna.load_study(
study_name=study_summaries[0].study_name,
storage=storage_url
)
if len(study.directions) < 2:
# Single objective
if study.best_trial:
return {
"type": "single_objective",
"best_trial": study.best_trial.number,
"best_value": study.best_value,
"best_params": study.best_params
}
return {"error": "No completed trials"}
# Multi-objective
pareto_trials = study.best_trials
designs = []
for trial in pareto_trials:
designs.append({
"trial_number": trial.number,
"objectives": trial.values,
"parameters": trial.params,
"user_attrs": dict(trial.user_attrs)
})
# Calculate ranges
ranges = {}
if designs:
for i in range(len(designs[0]["objectives"])):
values = [d["objectives"][i] for d in designs]
ranges[f"objective_{i}"] = {
"min": min(values),
"max": max(values),
"spread": max(values) - min(values)
}
return {
"type": "multi_objective",
"n_pareto": len(pareto_trials),
"designs": designs,
"ranges": ranges
}
except Exception as e:
return {"error": str(e)}
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python results_validator.py <study_name_or_db_path>")
print("Example: python results_validator.py uav_arm_optimization")
print("Example: python results_validator.py studies/my_study/2_results/study.db")
sys.exit(1)
arg = sys.argv[1]
# Check if it's a study name or db path
if arg.endswith('.db'):
result = validate_results(arg)
else:
result = validate_study_results(arg)
print(result)