""" Study Reset and Cleanup Utility for Atomizer Provides safe operations to reset or clean up optimization studies: - Reset database (remove all trials, keep configuration) - Clean up temporary files - Archive results - Full study deletion Usage: python -m optimization_engine.study_reset my_study --reset-db python -m optimization_engine.study_reset my_study --cleanup-temp python -m optimization_engine.study_reset my_study --full-reset Safety features: - Confirmation prompts for destructive operations - Automatic backups before deletion - Dry-run mode to preview changes """ import json import shutil import optuna from pathlib import Path from datetime import datetime from typing import Dict, Any, List, Optional import logging logger = logging.getLogger(__name__) class StudyReset: """Handles study reset and cleanup operations.""" def __init__(self, study_name: str, studies_dir: str = "studies"): """ Initialize study reset utility. Args: study_name: Name of the study to reset studies_dir: Base directory for studies """ self.study_name = study_name self.studies_dir = Path(studies_dir) self.study_path = self.studies_dir / study_name self.setup_dir = self.study_path / "1_setup" self.model_dir = self.setup_dir / "model" self.results_dir = self.study_path / "2_results" def validate_study_exists(self) -> bool: """Check if study exists.""" return self.study_path.exists() def get_study_stats(self) -> Dict[str, Any]: """ Get current study statistics. Returns: Dictionary with study statistics """ stats = { "study_name": self.study_name, "exists": self.study_path.exists(), "has_results": self.results_dir.exists(), "trials": 0, "completed": 0, "failed": 0, "db_size_mb": 0, "temp_files": 0, "temp_size_mb": 0 } if not self.study_path.exists(): return stats # Check database db_path = self.results_dir / "study.db" if db_path.exists(): stats["db_size_mb"] = db_path.stat().st_size / (1024 * 1024) try: storage = f"sqlite:///{db_path}" study = optuna.load_study(study_name=self.study_name, storage=storage) stats["trials"] = len(study.trials) stats["completed"] = len([t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]) stats["failed"] = len([t for t in study.trials if t.state == optuna.trial.TrialState.FAIL]) except Exception as e: logger.warning(f"Could not load study: {e}") # Count temp files temp_patterns = ["_temp*", "*.log", "*.bak", "worker_*"] temp_files = [] for pattern in temp_patterns: temp_files.extend(self.model_dir.glob(pattern)) temp_files.extend(self.results_dir.glob(pattern)) stats["temp_files"] = len(temp_files) stats["temp_size_mb"] = sum(f.stat().st_size for f in temp_files if f.is_file()) / (1024 * 1024) return stats def reset_database(self, backup: bool = True, dry_run: bool = False) -> Dict[str, Any]: """ Reset the Optuna database (delete all trials). Args: backup: Create backup before reset dry_run: Preview changes without executing Returns: Operation result dictionary """ result = {"operation": "reset_database", "dry_run": dry_run} db_path = self.results_dir / "study.db" if not db_path.exists(): result["status"] = "skipped" result["message"] = "No database found" return result if dry_run: result["status"] = "preview" result["message"] = f"Would delete {db_path}" return result # Create backup if backup: backup_name = f"study_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.db" backup_path = self.results_dir / backup_name shutil.copy2(db_path, backup_path) result["backup"] = str(backup_path) logger.info(f"Created backup: {backup_path}") # Delete database db_path.unlink() result["status"] = "success" result["message"] = "Database reset complete" # Also clean history files for history_file in ["history.json", "history.csv", "optimization_summary.json"]: hist_path = self.results_dir / history_file if hist_path.exists(): hist_path.unlink() logger.info(f"Deleted: {hist_path}") return result def cleanup_temp_files(self, dry_run: bool = False) -> Dict[str, Any]: """ Remove temporary files from study. Args: dry_run: Preview changes without executing Returns: Operation result dictionary """ result = { "operation": "cleanup_temp", "dry_run": dry_run, "deleted_files": [], "deleted_size_mb": 0 } temp_patterns = [ "_temp*", # Temporary NX files "*.log", # Log files "*.bak", # Backup files "worker_*", # Worker directories "*.pyc", # Python cache "__pycache__" # Python cache dirs ] files_to_delete: List[Path] = [] for pattern in temp_patterns: files_to_delete.extend(self.model_dir.glob(pattern)) files_to_delete.extend(self.results_dir.glob(pattern)) files_to_delete.extend(self.study_path.glob(pattern)) total_size = 0 for path in files_to_delete: if path.is_file(): total_size += path.stat().st_size result["files_found"] = len(files_to_delete) result["size_mb"] = total_size / (1024 * 1024) if dry_run: result["status"] = "preview" result["files_to_delete"] = [str(f) for f in files_to_delete[:20]] # Limit preview return result # Actually delete for path in files_to_delete: try: if path.is_file(): path.unlink() elif path.is_dir(): shutil.rmtree(path) result["deleted_files"].append(str(path)) except Exception as e: logger.warning(f"Could not delete {path}: {e}") result["deleted_size_mb"] = total_size / (1024 * 1024) result["status"] = "success" return result def archive_results(self, archive_dir: Optional[Path] = None, dry_run: bool = False) -> Dict[str, Any]: """ Archive study results before reset. Args: archive_dir: Directory for archives (default: studies/archives) dry_run: Preview changes without executing Returns: Operation result dictionary """ result = {"operation": "archive", "dry_run": dry_run} if archive_dir is None: archive_dir = self.studies_dir / "archives" if not self.results_dir.exists(): result["status"] = "skipped" result["message"] = "No results to archive" return result timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') archive_name = f"{self.study_name}_{timestamp}" archive_path = archive_dir / archive_name if dry_run: result["status"] = "preview" result["archive_path"] = str(archive_path) return result archive_dir.mkdir(parents=True, exist_ok=True) shutil.copytree(self.results_dir, archive_path) result["status"] = "success" result["archive_path"] = str(archive_path) logger.info(f"Archived results to: {archive_path}") return result def full_reset(self, backup: bool = True, dry_run: bool = False) -> Dict[str, Any]: """ Perform full study reset (database + temp files). Args: backup: Create backup before reset dry_run: Preview changes without executing Returns: Combined operation result """ results = {"operation": "full_reset", "dry_run": dry_run} if backup and not dry_run: archive_result = self.archive_results(dry_run=dry_run) results["archive"] = archive_result db_result = self.reset_database(backup=backup, dry_run=dry_run) results["database"] = db_result temp_result = self.cleanup_temp_files(dry_run=dry_run) results["temp_cleanup"] = temp_result # Remove lock files lock_file = self.results_dir / ".optimization_lock" if lock_file.exists() and not dry_run: lock_file.unlink() results["lock_removed"] = True results["status"] = "success" if not dry_run else "preview" return results def delete_study(self, confirm: bool = False, dry_run: bool = False) -> Dict[str, Any]: """ Completely delete study (DESTRUCTIVE). Args: confirm: Must be True to actually delete dry_run: Preview changes without executing Returns: Operation result dictionary """ result = {"operation": "delete_study", "dry_run": dry_run} if not confirm and not dry_run: result["status"] = "error" result["message"] = "Must set confirm=True to delete study" return result if not self.study_path.exists(): result["status"] = "skipped" result["message"] = "Study does not exist" return result if dry_run: result["status"] = "preview" result["message"] = f"Would delete: {self.study_path}" return result # Create archive first archive_result = self.archive_results() result["archive"] = archive_result # Delete study folder shutil.rmtree(self.study_path) result["status"] = "success" result["message"] = f"Deleted study: {self.study_name}" return result def reset_study( study_name: str, reset_db: bool = True, cleanup_temp: bool = True, backup: bool = True, dry_run: bool = False ) -> Dict[str, Any]: """ Convenience function to reset a study. Args: study_name: Name of the study reset_db: Reset the Optuna database cleanup_temp: Clean up temporary files backup: Create backup before reset dry_run: Preview changes without executing Returns: Operation result dictionary """ resetter = StudyReset(study_name) if not resetter.validate_study_exists(): return {"status": "error", "message": f"Study '{study_name}' not found"} results = {} if reset_db: results["database"] = resetter.reset_database(backup=backup, dry_run=dry_run) if cleanup_temp: results["temp_cleanup"] = resetter.cleanup_temp_files(dry_run=dry_run) return results if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Reset or cleanup Atomizer optimization studies", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Show study status python -m optimization_engine.study_reset my_study --status # Preview reset (dry run) python -m optimization_engine.study_reset my_study --full-reset --dry-run # Reset database only python -m optimization_engine.study_reset my_study --reset-db # Clean temp files only python -m optimization_engine.study_reset my_study --cleanup-temp # Full reset with backup python -m optimization_engine.study_reset my_study --full-reset """ ) parser.add_argument("study_name", help="Name of the study") parser.add_argument("--status", action="store_true", help="Show study status only") parser.add_argument("--reset-db", action="store_true", help="Reset Optuna database") parser.add_argument("--cleanup-temp", action="store_true", help="Clean temporary files") parser.add_argument("--full-reset", action="store_true", help="Full reset (db + temp)") parser.add_argument("--archive", action="store_true", help="Archive results before reset") parser.add_argument("--delete", action="store_true", help="Delete study completely") parser.add_argument("--no-backup", action="store_true", help="Skip backup") parser.add_argument("--dry-run", action="store_true", help="Preview without executing") parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompts") args = parser.parse_args() # Set up logging logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s' ) resetter = StudyReset(args.study_name) if not resetter.validate_study_exists(): print(f"Error: Study '{args.study_name}' not found") sys.exit(1) if args.status: stats = resetter.get_study_stats() print(f"\nStudy: {args.study_name}") print("=" * 50) print(f" Trials: {stats['trials']} ({stats['completed']} completed, {stats['failed']} failed)") print(f" Database size: {stats['db_size_mb']:.2f} MB") print(f" Temp files: {stats['temp_files']} ({stats['temp_size_mb']:.2f} MB)") sys.exit(0) # Confirmation if not args.dry_run and not args.yes: action = "full reset" if args.full_reset else \ "delete" if args.delete else \ "reset" if args.reset_db else "cleanup" response = input(f"\nReally {action} study '{args.study_name}'? [y/N] ") if response.lower() not in ['y', 'yes']: print("Aborted") sys.exit(0) backup = not args.no_backup if args.full_reset: result = resetter.full_reset(backup=backup, dry_run=args.dry_run) elif args.delete: result = resetter.delete_study(confirm=True, dry_run=args.dry_run) elif args.reset_db: result = resetter.reset_database(backup=backup, dry_run=args.dry_run) elif args.cleanup_temp: result = resetter.cleanup_temp_files(dry_run=args.dry_run) elif args.archive: result = resetter.archive_results(dry_run=args.dry_run) else: parser.print_help() sys.exit(0) print("\nResult:") print(json.dumps(result, indent=2)) if args.dry_run: print("\n[DRY RUN - no changes made]")