Atomizer/optimization_engine/study_continuation.py

"""
Study Continuation - Standard utility for continuing existing optimization studies.

This module provides a standardized way to continue optimization studies with
additional trials, preserving all existing trial data and learned knowledge.

Usage:
    from optimization_engine.study_continuation import continue_study

    continue_study(
        study_dir=Path("studies/my_study"),
        additional_trials=50,
        objective_function=my_objective,
        design_variables={'param1': (0, 10), 'param2': (0, 100)}
    )

This is an Atomizer standard feature that should be exposed in the dashboard
alongside "Start New Optimization".
"""

import optuna
import json
from pathlib import Path
from typing import Dict, Tuple, Callable, Optional


def continue_study(
    study_dir: Path,
    additional_trials: int,
    objective_function: Callable,
    design_variables: Optional[Dict[str, Tuple[float, float]]] = None,
    target_value: Optional[float] = None,
    tolerance: Optional[float] = None,
    verbose: bool = True
) -> Dict:
    """
    Continue an existing optimization study with additional trials.

    Args:
        study_dir: Path to study directory containing 1_setup and 2_results
        additional_trials: Number of additional trials to run
        objective_function: Objective function to optimize (same as original)
        design_variables: Optional dict of design variable bounds (for reference)
        target_value: Optional target value for early stopping
        tolerance: Optional tolerance for target achievement
        verbose: Print progress information

    Returns:
        Dict containing:
            - 'study': The Optuna study object
            - 'total_trials': Total number of trials after continuation
            - 'successful_trials': Number of successful trials
            - 'pruned_trials': Number of pruned trials
            - 'best_value': Best objective value achieved
            - 'best_params': Best parameters found
            - 'target_achieved': Whether target was achieved (if specified)

    Raises:
        FileNotFoundError: If study database doesn't exist
        ValueError: If study name cannot be determined
    """

    # Setup paths
    setup_dir = study_dir / "1_setup"
    results_dir = study_dir / "2_results"
    history_file = results_dir / "optimization_history_incremental.json"

    # Load workflow config to get study name
    workflow_file = setup_dir / "workflow_config.json"
    if not workflow_file.exists():
        raise FileNotFoundError(
            f"Workflow config not found: {workflow_file}. "
            f"Make sure this is a valid study directory."
        )

    with open(workflow_file) as f:
        workflow = json.load(f)

    study_name = workflow.get('study_name')
    if not study_name:
        raise ValueError("Study name not found in workflow_config.json")

    # Load existing study
    storage = f"sqlite:///{results_dir / 'study.db'}"

    try:
        study = optuna.load_study(study_name=study_name, storage=storage)
    except KeyError:
        raise FileNotFoundError(
            f"Study '{study_name}' not found in database. "
            f"Run the initial optimization first using run_optimization.py"
        )

    # Get current state
    current_trials = len(study.trials)
    current_best = study.best_value if study.best_trial else None

    if verbose:
        print("\n" + "="*70)
        print("  CONTINUING OPTIMIZATION STUDY")
        print("="*70)
        print(f"\n  Study: {study_name}")
        print(f"  Current trials: {current_trials}")
        if current_best is not None:
            print(f"  Current best: {current_best:.6f}")
            print(f"  Best params:")
            for param, value in study.best_params.items():
                print(f"    {param}: {value:.4f}")
        print(f"\n  Adding {additional_trials} trials...\n")

    # Continue optimization
    study.optimize(
        objective_function,
        n_trials=additional_trials,
        timeout=None,
        catch=(Exception,)  # Catch exceptions to allow graceful continuation
    )

    # Analyze results
    total_trials = len(study.trials)
    successful_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])
    pruned_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED])

    results = {
        'study': study,
        'total_trials': total_trials,
        'successful_trials': successful_trials,
        'pruned_trials': pruned_trials,
        'best_value': study.best_value,
        'best_params': study.best_params,
    }

    # Check target achievement if specified
    if target_value is not None and tolerance is not None:
        target_achieved = abs(study.best_value - target_value) <= tolerance
        results['target_achieved'] = target_achieved

    if verbose:
        print("\n" + "="*70)
        print("  CONTINUATION COMPLETE")
        print("="*70)
        print(f"  Total trials: {total_trials} (added {additional_trials})")
        print(f"  Successful: {successful_trials}")
        print(f"  Pruned: {pruned_trials}")
        print(f"  Pruning rate: {pruned_trials/total_trials*100:.1f}%")
        print(f"\n  Best value: {study.best_value:.6f}")
        print(f"  Best params:")
        for param, value in study.best_params.items():
            print(f"    {param}: {value:.4f}")

        if target_value is not None and tolerance is not None:
            target_achieved = results.get('target_achieved', False)
            print(f"\n  Target: {target_value} ± {tolerance}")
            print(f"  Target achieved: {'YES' if target_achieved else 'NO'}")

        print("="*70 + "\n")

    return results


def can_continue_study(study_dir: Path) -> Tuple[bool, str]:
    """
    Check if a study can be continued.

    Args:
        study_dir: Path to study directory

    Returns:
        (can_continue, message): Tuple of bool and explanation message
    """

    setup_dir = study_dir / "1_setup"
    results_dir = study_dir / "2_results"

    # Check if workflow config exists
    workflow_file = setup_dir / "workflow_config.json"
    if not workflow_file.exists():
        return False, f"No workflow_config.json found in {setup_dir}"

    # Load study name
    try:
        with open(workflow_file) as f:
            workflow = json.load(f)
        study_name = workflow.get('study_name')
        if not study_name:
            return False, "No study_name in workflow_config.json"
    except Exception as e:
        return False, f"Error reading workflow config: {e}"

    # Check if database exists
    db_file = results_dir / "study.db"
    if not db_file.exists():
        return False, f"No study.db found. Run initial optimization first."

    # Try to load study
    try:
        storage = f"sqlite:///{db_file}"
        study = optuna.load_study(study_name=study_name, storage=storage)
        trial_count = len(study.trials)

        if trial_count == 0:
            return False, "Study exists but has no trials yet"

        return True, f"Study '{study_name}' ready (current trials: {trial_count})"

    except KeyError:
        return False, f"Study '{study_name}' not found in database"
    except Exception as e:
        return False, f"Error loading study: {e}"


def get_study_status(study_dir: Path) -> Optional[Dict]:
    """
    Get current status of a study.

    Args:
        study_dir: Path to study directory

    Returns:
        Dict with study status info, or None if study doesn't exist
        {
            'study_name': str,
            'total_trials': int,
            'successful_trials': int,
            'pruned_trials': int,
            'best_value': float,
            'best_params': dict
        }
    """

    can_continue, message = can_continue_study(study_dir)

    if not can_continue:
        return None

    setup_dir = study_dir / "1_setup"
    results_dir = study_dir / "2_results"

    # Load study
    with open(setup_dir / "workflow_config.json") as f:
        workflow = json.load(f)

    study_name = workflow['study_name']
    storage = f"sqlite:///{results_dir / 'study.db'}"

    try:
        study = optuna.load_study(study_name=study_name, storage=storage)

        total_trials = len(study.trials)
        successful_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE])
        pruned_trials = len([t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED])

        return {
            'study_name': study_name,
            'total_trials': total_trials,
            'successful_trials': successful_trials,
            'pruned_trials': pruned_trials,
            'pruning_rate': pruned_trials / total_trials if total_trials > 0 else 0,
            'best_value': study.best_value if study.best_trial else None,
            'best_params': study.best_params if study.best_trial else None
        }

    except Exception:
        return None