Files
Atomizer/projects/hydrotech-beam/studies/01_doe_landscape/history.py
Anto01 40213578ad merge: recover Gitea state - HQ docs, cluster setup, isogrid work
Merge recovery/gitea-before-force-push to restore:
- hq/ directory (cluster setup, docker-compose, configs)
- docs/hq/ (12+ HQ planning docs)
- docs/guides/ (documentation boundaries, PKM standard)
- docs/plans/ (model introspection master plan)
- Isogrid extraction work
- Hydrotech-beam: keep local DOE results, remove Syncthing conflicts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 12:22:33 -05:00

236 lines
7.9 KiB
Python
Executable File

"""Persistent trial history — append-only, survives Optuna resets.
Every trial is logged to `history.db` (SQLite) and exported to `history.csv`.
Never deleted by --clean. Full lineage across all studies and phases.
Usage:
history = TrialHistory(results_dir)
history.log_trial(study_name, trial_id, params, results, ...)
history.export_csv()
df = history.query("SELECT * FROM trials WHERE mass_kg < 100")
"""
from __future__ import annotations
import csv
import json
import logging
import sqlite3
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Optional
logger = logging.getLogger(__name__)
# Schema version — bump if columns change
SCHEMA_VERSION = 1
CREATE_TABLE = """
CREATE TABLE IF NOT EXISTS trials (
id INTEGER PRIMARY KEY AUTOINCREMENT,
study_name TEXT NOT NULL,
trial_id INTEGER NOT NULL,
iteration TEXT,
timestamp TEXT NOT NULL,
-- Design variables
beam_half_core_thickness REAL,
beam_face_thickness REAL,
holes_diameter REAL,
hole_count INTEGER,
-- Results
mass_kg REAL,
tip_displacement_mm REAL,
max_von_mises_mpa REAL,
-- Constraint checks
disp_feasible INTEGER, -- 0/1
stress_feasible INTEGER, -- 0/1
geo_feasible INTEGER, -- 0/1
fully_feasible INTEGER, -- 0/1
-- Meta
status TEXT DEFAULT 'COMPLETE', -- COMPLETE, FAILED, PRUNED
error_message TEXT,
solve_time_s REAL,
iter_path TEXT,
notes TEXT,
-- Unique constraint: no duplicate (study, trial) pairs
UNIQUE(study_name, trial_id)
);
CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER PRIMARY KEY
);
"""
# Constraint thresholds (from OPTIMIZATION_STRATEGY.md)
DISP_LIMIT_MM = 10.0
STRESS_LIMIT_MPA = 130.0
# CSV column order
CSV_COLUMNS = [
"study_name", "trial_id", "iteration", "timestamp",
"beam_half_core_thickness", "beam_face_thickness",
"holes_diameter", "hole_count",
"mass_kg", "tip_displacement_mm", "max_von_mises_mpa",
"disp_feasible", "stress_feasible", "geo_feasible", "fully_feasible",
"status", "error_message", "solve_time_s", "iter_path",
]
class TrialHistory:
"""Append-only trial history database."""
def __init__(self, results_dir: Path | str):
self.results_dir = Path(results_dir)
self.results_dir.mkdir(parents=True, exist_ok=True)
self.db_path = self.results_dir / "history.db"
self.csv_path = self.results_dir / "history.csv"
self._conn = sqlite3.connect(str(self.db_path))
self._conn.row_factory = sqlite3.Row
self._conn.execute("PRAGMA journal_mode=WAL") # safe concurrent reads
self._init_schema()
count = self._conn.execute("SELECT COUNT(*) FROM trials").fetchone()[0]
logger.info("Trial history: %s (%d records)", self.db_path.name, count)
def _init_schema(self) -> None:
"""Create tables if they don't exist."""
self._conn.executescript(CREATE_TABLE)
# Check/set schema version
row = self._conn.execute(
"SELECT version FROM schema_version ORDER BY version DESC LIMIT 1"
).fetchone()
if row is None:
self._conn.execute(
"INSERT INTO schema_version (version) VALUES (?)",
(SCHEMA_VERSION,),
)
self._conn.commit()
def log_trial(
self,
study_name: str,
trial_id: int,
params: dict[str, float],
mass_kg: float = float("nan"),
tip_displacement_mm: float = float("nan"),
max_von_mises_mpa: float = float("nan"),
geo_feasible: bool = True,
status: str = "COMPLETE",
error_message: str | None = None,
solve_time_s: float = 0.0,
iter_path: str | None = None,
notes: str | None = None,
iteration_number: int | None = None,
) -> None:
"""Log a single trial result.
Uses INSERT OR REPLACE so re-runs of the same trial update cleanly.
"""
import math
disp_ok = (
not math.isnan(tip_displacement_mm)
and tip_displacement_mm <= DISP_LIMIT_MM
)
stress_ok = (
not math.isnan(max_von_mises_mpa)
and max_von_mises_mpa <= STRESS_LIMIT_MPA
)
iteration = f"iter{iteration_number:03d}" if iteration_number else None
try:
self._conn.execute(
"""
INSERT OR REPLACE INTO trials (
study_name, trial_id, iteration, timestamp,
beam_half_core_thickness, beam_face_thickness,
holes_diameter, hole_count,
mass_kg, tip_displacement_mm, max_von_mises_mpa,
disp_feasible, stress_feasible, geo_feasible, fully_feasible,
status, error_message, solve_time_s, iter_path, notes
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
study_name,
trial_id,
iteration,
datetime.now(timezone.utc).isoformat(),
params.get("beam_half_core_thickness"),
params.get("beam_face_thickness"),
params.get("holes_diameter"),
params.get("hole_count"),
mass_kg,
tip_displacement_mm,
max_von_mises_mpa,
int(disp_ok),
int(stress_ok),
int(geo_feasible),
int(disp_ok and stress_ok and geo_feasible),
status,
error_message,
solve_time_s,
iter_path,
notes,
),
)
self._conn.commit()
except sqlite3.Error as e:
logger.error("Failed to log trial %d: %s", trial_id, e)
def export_csv(self) -> Path:
"""Export all trials to CSV (overwrite). Returns path."""
rows = self._conn.execute(
f"SELECT {', '.join(CSV_COLUMNS)} FROM trials ORDER BY study_name, trial_id"
).fetchall()
with open(self.csv_path, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(CSV_COLUMNS)
for row in rows:
writer.writerow([row[col] for col in CSV_COLUMNS])
logger.info("Exported %d trials to %s", len(rows), self.csv_path.name)
return self.csv_path
def query(self, sql: str, params: tuple = ()) -> list[dict]:
"""Run an arbitrary SELECT query. Returns list of dicts."""
rows = self._conn.execute(sql, params).fetchall()
return [dict(row) for row in rows]
def get_study_summary(self, study_name: str) -> dict[str, Any]:
"""Get summary stats for a study."""
rows = self.query(
"SELECT * FROM trials WHERE study_name = ?", (study_name,)
)
if not rows:
return {"study_name": study_name, "total": 0}
complete = [r for r in rows if r["status"] == "COMPLETE"]
feasible = [r for r in complete if r["fully_feasible"]]
masses = [r["mass_kg"] for r in feasible if r["mass_kg"] is not None]
return {
"study_name": study_name,
"total": len(rows),
"complete": len(complete),
"failed": len(rows) - len(complete),
"feasible": len(feasible),
"best_mass_kg": min(masses) if masses else None,
"solve_rate": len(complete) / len(rows) * 100 if rows else 0,
"feasibility_rate": len(feasible) / len(complete) * 100 if complete else 0,
}
def close(self) -> None:
"""Export CSV and close connection."""
self.export_csv()
self._conn.close()