Files
Atomizer/optimization_engine/study_reset.py

448 lines
14 KiB
Python
Raw Normal View History

"""
Study Reset and Cleanup Utility for Atomizer
Provides safe operations to reset or clean up optimization studies:
- Reset database (remove all trials, keep configuration)
- Clean up temporary files
- Archive results
- Full study deletion
Usage:
python -m optimization_engine.study_reset my_study --reset-db
python -m optimization_engine.study_reset my_study --cleanup-temp
python -m optimization_engine.study_reset my_study --full-reset
Safety features:
- Confirmation prompts for destructive operations
- Automatic backups before deletion
- Dry-run mode to preview changes
"""
import json
import shutil
import optuna
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, List, Optional
import logging
logger = logging.getLogger(__name__)
class StudyReset:
"""Handles study reset and cleanup operations."""
def __init__(self, study_name: str, studies_dir: str = "studies"):
"""
Initialize study reset utility.
Args:
study_name: Name of the study to reset
studies_dir: Base directory for studies
"""
self.study_name = study_name
self.studies_dir = Path(studies_dir)
self.study_path = self.studies_dir / study_name
self.setup_dir = self.study_path / "1_setup"
self.model_dir = self.setup_dir / "model"
self.results_dir = self.study_path / "2_results"
def validate_study_exists(self) -> bool:
"""Check if study exists."""
return self.study_path.exists()
def get_study_stats(self) -> Dict[str, Any]:
"""
Get current study statistics.
Returns:
Dictionary with study statistics
"""
stats = {
"study_name": self.study_name,
"exists": self.study_path.exists(),
"has_results": self.results_dir.exists(),
"trials": 0,
"completed": 0,
"failed": 0,
"db_size_mb": 0,
"temp_files": 0,
"temp_size_mb": 0
}
if not self.study_path.exists():
return stats
# Check database
db_path = self.results_dir / "study.db"
if db_path.exists():
stats["db_size_mb"] = db_path.stat().st_size / (1024 * 1024)
try:
storage = f"sqlite:///{db_path}"
study = optuna.load_study(study_name=self.study_name, storage=storage)
stats["trials"] = len(study.trials)
stats["completed"] = len([t for t in study.trials
if t.state == optuna.trial.TrialState.COMPLETE])
stats["failed"] = len([t for t in study.trials
if t.state == optuna.trial.TrialState.FAIL])
except Exception as e:
logger.warning(f"Could not load study: {e}")
# Count temp files
temp_patterns = ["_temp*", "*.log", "*.bak", "worker_*"]
temp_files = []
for pattern in temp_patterns:
temp_files.extend(self.model_dir.glob(pattern))
temp_files.extend(self.results_dir.glob(pattern))
stats["temp_files"] = len(temp_files)
stats["temp_size_mb"] = sum(f.stat().st_size for f in temp_files if f.is_file()) / (1024 * 1024)
return stats
def reset_database(self, backup: bool = True, dry_run: bool = False) -> Dict[str, Any]:
"""
Reset the Optuna database (delete all trials).
Args:
backup: Create backup before reset
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
result = {"operation": "reset_database", "dry_run": dry_run}
db_path = self.results_dir / "study.db"
if not db_path.exists():
result["status"] = "skipped"
result["message"] = "No database found"
return result
if dry_run:
result["status"] = "preview"
result["message"] = f"Would delete {db_path}"
return result
# Create backup
if backup:
backup_name = f"study_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.db"
backup_path = self.results_dir / backup_name
shutil.copy2(db_path, backup_path)
result["backup"] = str(backup_path)
logger.info(f"Created backup: {backup_path}")
# Delete database
db_path.unlink()
result["status"] = "success"
result["message"] = "Database reset complete"
# Also clean history files
for history_file in ["history.json", "history.csv", "optimization_summary.json"]:
hist_path = self.results_dir / history_file
if hist_path.exists():
hist_path.unlink()
logger.info(f"Deleted: {hist_path}")
return result
def cleanup_temp_files(self, dry_run: bool = False) -> Dict[str, Any]:
"""
Remove temporary files from study.
Args:
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
result = {
"operation": "cleanup_temp",
"dry_run": dry_run,
"deleted_files": [],
"deleted_size_mb": 0
}
temp_patterns = [
"_temp*", # Temporary NX files
"*.log", # Log files
"*.bak", # Backup files
"worker_*", # Worker directories
"*.pyc", # Python cache
"__pycache__" # Python cache dirs
]
files_to_delete: List[Path] = []
for pattern in temp_patterns:
files_to_delete.extend(self.model_dir.glob(pattern))
files_to_delete.extend(self.results_dir.glob(pattern))
files_to_delete.extend(self.study_path.glob(pattern))
total_size = 0
for path in files_to_delete:
if path.is_file():
total_size += path.stat().st_size
result["files_found"] = len(files_to_delete)
result["size_mb"] = total_size / (1024 * 1024)
if dry_run:
result["status"] = "preview"
result["files_to_delete"] = [str(f) for f in files_to_delete[:20]] # Limit preview
return result
# Actually delete
for path in files_to_delete:
try:
if path.is_file():
path.unlink()
elif path.is_dir():
shutil.rmtree(path)
result["deleted_files"].append(str(path))
except Exception as e:
logger.warning(f"Could not delete {path}: {e}")
result["deleted_size_mb"] = total_size / (1024 * 1024)
result["status"] = "success"
return result
def archive_results(self, archive_dir: Optional[Path] = None, dry_run: bool = False) -> Dict[str, Any]:
"""
Archive study results before reset.
Args:
archive_dir: Directory for archives (default: studies/archives)
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
result = {"operation": "archive", "dry_run": dry_run}
if archive_dir is None:
archive_dir = self.studies_dir / "archives"
if not self.results_dir.exists():
result["status"] = "skipped"
result["message"] = "No results to archive"
return result
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
archive_name = f"{self.study_name}_{timestamp}"
archive_path = archive_dir / archive_name
if dry_run:
result["status"] = "preview"
result["archive_path"] = str(archive_path)
return result
archive_dir.mkdir(parents=True, exist_ok=True)
shutil.copytree(self.results_dir, archive_path)
result["status"] = "success"
result["archive_path"] = str(archive_path)
logger.info(f"Archived results to: {archive_path}")
return result
def full_reset(self, backup: bool = True, dry_run: bool = False) -> Dict[str, Any]:
"""
Perform full study reset (database + temp files).
Args:
backup: Create backup before reset
dry_run: Preview changes without executing
Returns:
Combined operation result
"""
results = {"operation": "full_reset", "dry_run": dry_run}
if backup and not dry_run:
archive_result = self.archive_results(dry_run=dry_run)
results["archive"] = archive_result
db_result = self.reset_database(backup=backup, dry_run=dry_run)
results["database"] = db_result
temp_result = self.cleanup_temp_files(dry_run=dry_run)
results["temp_cleanup"] = temp_result
# Remove lock files
lock_file = self.results_dir / ".optimization_lock"
if lock_file.exists() and not dry_run:
lock_file.unlink()
results["lock_removed"] = True
results["status"] = "success" if not dry_run else "preview"
return results
def delete_study(self, confirm: bool = False, dry_run: bool = False) -> Dict[str, Any]:
"""
Completely delete study (DESTRUCTIVE).
Args:
confirm: Must be True to actually delete
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
result = {"operation": "delete_study", "dry_run": dry_run}
if not confirm and not dry_run:
result["status"] = "error"
result["message"] = "Must set confirm=True to delete study"
return result
if not self.study_path.exists():
result["status"] = "skipped"
result["message"] = "Study does not exist"
return result
if dry_run:
result["status"] = "preview"
result["message"] = f"Would delete: {self.study_path}"
return result
# Create archive first
archive_result = self.archive_results()
result["archive"] = archive_result
# Delete study folder
shutil.rmtree(self.study_path)
result["status"] = "success"
result["message"] = f"Deleted study: {self.study_name}"
return result
def reset_study(
study_name: str,
reset_db: bool = True,
cleanup_temp: bool = True,
backup: bool = True,
dry_run: bool = False
) -> Dict[str, Any]:
"""
Convenience function to reset a study.
Args:
study_name: Name of the study
reset_db: Reset the Optuna database
cleanup_temp: Clean up temporary files
backup: Create backup before reset
dry_run: Preview changes without executing
Returns:
Operation result dictionary
"""
resetter = StudyReset(study_name)
if not resetter.validate_study_exists():
return {"status": "error", "message": f"Study '{study_name}' not found"}
results = {}
if reset_db:
results["database"] = resetter.reset_database(backup=backup, dry_run=dry_run)
if cleanup_temp:
results["temp_cleanup"] = resetter.cleanup_temp_files(dry_run=dry_run)
return results
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="Reset or cleanup Atomizer optimization studies",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Show study status
python -m optimization_engine.study_reset my_study --status
# Preview reset (dry run)
python -m optimization_engine.study_reset my_study --full-reset --dry-run
# Reset database only
python -m optimization_engine.study_reset my_study --reset-db
# Clean temp files only
python -m optimization_engine.study_reset my_study --cleanup-temp
# Full reset with backup
python -m optimization_engine.study_reset my_study --full-reset
"""
)
parser.add_argument("study_name", help="Name of the study")
parser.add_argument("--status", action="store_true", help="Show study status only")
parser.add_argument("--reset-db", action="store_true", help="Reset Optuna database")
parser.add_argument("--cleanup-temp", action="store_true", help="Clean temporary files")
parser.add_argument("--full-reset", action="store_true", help="Full reset (db + temp)")
parser.add_argument("--archive", action="store_true", help="Archive results before reset")
parser.add_argument("--delete", action="store_true", help="Delete study completely")
parser.add_argument("--no-backup", action="store_true", help="Skip backup")
parser.add_argument("--dry-run", action="store_true", help="Preview without executing")
parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompts")
args = parser.parse_args()
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s'
)
resetter = StudyReset(args.study_name)
if not resetter.validate_study_exists():
print(f"Error: Study '{args.study_name}' not found")
sys.exit(1)
if args.status:
stats = resetter.get_study_stats()
print(f"\nStudy: {args.study_name}")
print("=" * 50)
print(f" Trials: {stats['trials']} ({stats['completed']} completed, {stats['failed']} failed)")
print(f" Database size: {stats['db_size_mb']:.2f} MB")
print(f" Temp files: {stats['temp_files']} ({stats['temp_size_mb']:.2f} MB)")
sys.exit(0)
# Confirmation
if not args.dry_run and not args.yes:
action = "full reset" if args.full_reset else \
"delete" if args.delete else \
"reset" if args.reset_db else "cleanup"
response = input(f"\nReally {action} study '{args.study_name}'? [y/N] ")
if response.lower() not in ['y', 'yes']:
print("Aborted")
sys.exit(0)
backup = not args.no_backup
if args.full_reset:
result = resetter.full_reset(backup=backup, dry_run=args.dry_run)
elif args.delete:
result = resetter.delete_study(confirm=True, dry_run=args.dry_run)
elif args.reset_db:
result = resetter.reset_database(backup=backup, dry_run=args.dry_run)
elif args.cleanup_temp:
result = resetter.cleanup_temp_files(dry_run=args.dry_run)
elif args.archive:
result = resetter.archive_results(dry_run=args.dry_run)
else:
parser.print_help()
sys.exit(0)
print("\nResult:")
print(json.dumps(result, indent=2))
if args.dry_run:
print("\n[DRY RUN - no changes made]")