Atomizer/optimization_engine/generate_history_from_trials.py

"""
Generate history.json from trial directories.

For older substudies that don't have history.json,
reconstruct it from individual trial results.json files.
"""

from pathlib import Path
import json
import sys


def generate_history(substudy_dir: Path) -> list:
    """Generate history from trial directories."""
    substudy_dir = Path(substudy_dir)
    trial_dirs = sorted(substudy_dir.glob('trial_*'))

    history = []

    for trial_dir in trial_dirs:
        results_file = trial_dir / 'results.json'

        if not results_file.exists():
            print(f"Warning: No results.json in {trial_dir.name}")
            continue

        with open(results_file, 'r') as f:
            trial_data = json.load(f)

        # Extract trial number from directory name
        trial_num = int(trial_dir.name.split('_')[-1])

        # Create history entry
        history_entry = {
            'trial_number': trial_num,
            'timestamp': trial_data.get('timestamp', ''),
            'design_variables': trial_data.get('design_variables', {}),
            'objectives': trial_data.get('objectives', {}),
            'constraints': trial_data.get('constraints', {}),
            'total_objective': trial_data.get('total_objective', 0.0)
        }

        history.append(history_entry)

    # Sort by trial number
    history.sort(key=lambda x: x['trial_number'])

    return history


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Usage: python generate_history_from_trials.py <substudy_directory>")
        sys.exit(1)

    substudy_path = Path(sys.argv[1])

    print(f"Generating history.json from trials in: {substudy_path}")

    history = generate_history(substudy_path)

    print(f"Generated {len(history)} history entries")

    # Save history.json
    history_file = substudy_path / 'history.json'
    with open(history_file, 'w') as f:
        json.dump(history, f, indent=2)

    print(f"Saved: {history_file}")