Closes the neural training loop with automated workflow: - atomizer.py: One-command neural workflow CLI - auto_trainer.py: Auto-training trigger system (50pt threshold) - template_loader.py: Study creation from templates - study_reset.py: Study reset/cleanup utility - 3 templates: beam stiffness, bracket stress, frequency tuning - State assessment document (Nov 25) Usage: python atomizer.py neural-optimize --study my_study --trials 500 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
448 lines
14 KiB
Python
448 lines
14 KiB
Python
"""
|
|
Study Reset and Cleanup Utility for Atomizer
|
|
|
|
Provides safe operations to reset or clean up optimization studies:
|
|
- Reset database (remove all trials, keep configuration)
|
|
- Clean up temporary files
|
|
- Archive results
|
|
- Full study deletion
|
|
|
|
Usage:
|
|
python -m optimization_engine.study_reset my_study --reset-db
|
|
python -m optimization_engine.study_reset my_study --cleanup-temp
|
|
python -m optimization_engine.study_reset my_study --full-reset
|
|
|
|
Safety features:
|
|
- Confirmation prompts for destructive operations
|
|
- Automatic backups before deletion
|
|
- Dry-run mode to preview changes
|
|
"""
|
|
|
|
import json
|
|
import shutil
|
|
import optuna
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import Dict, Any, List, Optional
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class StudyReset:
|
|
"""Handles study reset and cleanup operations."""
|
|
|
|
def __init__(self, study_name: str, studies_dir: str = "studies"):
|
|
"""
|
|
Initialize study reset utility.
|
|
|
|
Args:
|
|
study_name: Name of the study to reset
|
|
studies_dir: Base directory for studies
|
|
"""
|
|
self.study_name = study_name
|
|
self.studies_dir = Path(studies_dir)
|
|
self.study_path = self.studies_dir / study_name
|
|
self.setup_dir = self.study_path / "1_setup"
|
|
self.model_dir = self.setup_dir / "model"
|
|
self.results_dir = self.study_path / "2_results"
|
|
|
|
def validate_study_exists(self) -> bool:
|
|
"""Check if study exists."""
|
|
return self.study_path.exists()
|
|
|
|
def get_study_stats(self) -> Dict[str, Any]:
|
|
"""
|
|
Get current study statistics.
|
|
|
|
Returns:
|
|
Dictionary with study statistics
|
|
"""
|
|
stats = {
|
|
"study_name": self.study_name,
|
|
"exists": self.study_path.exists(),
|
|
"has_results": self.results_dir.exists(),
|
|
"trials": 0,
|
|
"completed": 0,
|
|
"failed": 0,
|
|
"db_size_mb": 0,
|
|
"temp_files": 0,
|
|
"temp_size_mb": 0
|
|
}
|
|
|
|
if not self.study_path.exists():
|
|
return stats
|
|
|
|
# Check database
|
|
db_path = self.results_dir / "study.db"
|
|
if db_path.exists():
|
|
stats["db_size_mb"] = db_path.stat().st_size / (1024 * 1024)
|
|
|
|
try:
|
|
storage = f"sqlite:///{db_path}"
|
|
study = optuna.load_study(study_name=self.study_name, storage=storage)
|
|
stats["trials"] = len(study.trials)
|
|
stats["completed"] = len([t for t in study.trials
|
|
if t.state == optuna.trial.TrialState.COMPLETE])
|
|
stats["failed"] = len([t for t in study.trials
|
|
if t.state == optuna.trial.TrialState.FAIL])
|
|
except Exception as e:
|
|
logger.warning(f"Could not load study: {e}")
|
|
|
|
# Count temp files
|
|
temp_patterns = ["_temp*", "*.log", "*.bak", "worker_*"]
|
|
temp_files = []
|
|
for pattern in temp_patterns:
|
|
temp_files.extend(self.model_dir.glob(pattern))
|
|
temp_files.extend(self.results_dir.glob(pattern))
|
|
|
|
stats["temp_files"] = len(temp_files)
|
|
stats["temp_size_mb"] = sum(f.stat().st_size for f in temp_files if f.is_file()) / (1024 * 1024)
|
|
|
|
return stats
|
|
|
|
def reset_database(self, backup: bool = True, dry_run: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Reset the Optuna database (delete all trials).
|
|
|
|
Args:
|
|
backup: Create backup before reset
|
|
dry_run: Preview changes without executing
|
|
|
|
Returns:
|
|
Operation result dictionary
|
|
"""
|
|
result = {"operation": "reset_database", "dry_run": dry_run}
|
|
db_path = self.results_dir / "study.db"
|
|
|
|
if not db_path.exists():
|
|
result["status"] = "skipped"
|
|
result["message"] = "No database found"
|
|
return result
|
|
|
|
if dry_run:
|
|
result["status"] = "preview"
|
|
result["message"] = f"Would delete {db_path}"
|
|
return result
|
|
|
|
# Create backup
|
|
if backup:
|
|
backup_name = f"study_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.db"
|
|
backup_path = self.results_dir / backup_name
|
|
shutil.copy2(db_path, backup_path)
|
|
result["backup"] = str(backup_path)
|
|
logger.info(f"Created backup: {backup_path}")
|
|
|
|
# Delete database
|
|
db_path.unlink()
|
|
result["status"] = "success"
|
|
result["message"] = "Database reset complete"
|
|
|
|
# Also clean history files
|
|
for history_file in ["history.json", "history.csv", "optimization_summary.json"]:
|
|
hist_path = self.results_dir / history_file
|
|
if hist_path.exists():
|
|
hist_path.unlink()
|
|
logger.info(f"Deleted: {hist_path}")
|
|
|
|
return result
|
|
|
|
def cleanup_temp_files(self, dry_run: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Remove temporary files from study.
|
|
|
|
Args:
|
|
dry_run: Preview changes without executing
|
|
|
|
Returns:
|
|
Operation result dictionary
|
|
"""
|
|
result = {
|
|
"operation": "cleanup_temp",
|
|
"dry_run": dry_run,
|
|
"deleted_files": [],
|
|
"deleted_size_mb": 0
|
|
}
|
|
|
|
temp_patterns = [
|
|
"_temp*", # Temporary NX files
|
|
"*.log", # Log files
|
|
"*.bak", # Backup files
|
|
"worker_*", # Worker directories
|
|
"*.pyc", # Python cache
|
|
"__pycache__" # Python cache dirs
|
|
]
|
|
|
|
files_to_delete: List[Path] = []
|
|
|
|
for pattern in temp_patterns:
|
|
files_to_delete.extend(self.model_dir.glob(pattern))
|
|
files_to_delete.extend(self.results_dir.glob(pattern))
|
|
files_to_delete.extend(self.study_path.glob(pattern))
|
|
|
|
total_size = 0
|
|
for path in files_to_delete:
|
|
if path.is_file():
|
|
total_size += path.stat().st_size
|
|
|
|
result["files_found"] = len(files_to_delete)
|
|
result["size_mb"] = total_size / (1024 * 1024)
|
|
|
|
if dry_run:
|
|
result["status"] = "preview"
|
|
result["files_to_delete"] = [str(f) for f in files_to_delete[:20]] # Limit preview
|
|
return result
|
|
|
|
# Actually delete
|
|
for path in files_to_delete:
|
|
try:
|
|
if path.is_file():
|
|
path.unlink()
|
|
elif path.is_dir():
|
|
shutil.rmtree(path)
|
|
result["deleted_files"].append(str(path))
|
|
except Exception as e:
|
|
logger.warning(f"Could not delete {path}: {e}")
|
|
|
|
result["deleted_size_mb"] = total_size / (1024 * 1024)
|
|
result["status"] = "success"
|
|
return result
|
|
|
|
def archive_results(self, archive_dir: Optional[Path] = None, dry_run: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Archive study results before reset.
|
|
|
|
Args:
|
|
archive_dir: Directory for archives (default: studies/archives)
|
|
dry_run: Preview changes without executing
|
|
|
|
Returns:
|
|
Operation result dictionary
|
|
"""
|
|
result = {"operation": "archive", "dry_run": dry_run}
|
|
|
|
if archive_dir is None:
|
|
archive_dir = self.studies_dir / "archives"
|
|
|
|
if not self.results_dir.exists():
|
|
result["status"] = "skipped"
|
|
result["message"] = "No results to archive"
|
|
return result
|
|
|
|
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
archive_name = f"{self.study_name}_{timestamp}"
|
|
archive_path = archive_dir / archive_name
|
|
|
|
if dry_run:
|
|
result["status"] = "preview"
|
|
result["archive_path"] = str(archive_path)
|
|
return result
|
|
|
|
archive_dir.mkdir(parents=True, exist_ok=True)
|
|
shutil.copytree(self.results_dir, archive_path)
|
|
|
|
result["status"] = "success"
|
|
result["archive_path"] = str(archive_path)
|
|
logger.info(f"Archived results to: {archive_path}")
|
|
|
|
return result
|
|
|
|
def full_reset(self, backup: bool = True, dry_run: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Perform full study reset (database + temp files).
|
|
|
|
Args:
|
|
backup: Create backup before reset
|
|
dry_run: Preview changes without executing
|
|
|
|
Returns:
|
|
Combined operation result
|
|
"""
|
|
results = {"operation": "full_reset", "dry_run": dry_run}
|
|
|
|
if backup and not dry_run:
|
|
archive_result = self.archive_results(dry_run=dry_run)
|
|
results["archive"] = archive_result
|
|
|
|
db_result = self.reset_database(backup=backup, dry_run=dry_run)
|
|
results["database"] = db_result
|
|
|
|
temp_result = self.cleanup_temp_files(dry_run=dry_run)
|
|
results["temp_cleanup"] = temp_result
|
|
|
|
# Remove lock files
|
|
lock_file = self.results_dir / ".optimization_lock"
|
|
if lock_file.exists() and not dry_run:
|
|
lock_file.unlink()
|
|
results["lock_removed"] = True
|
|
|
|
results["status"] = "success" if not dry_run else "preview"
|
|
return results
|
|
|
|
def delete_study(self, confirm: bool = False, dry_run: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Completely delete study (DESTRUCTIVE).
|
|
|
|
Args:
|
|
confirm: Must be True to actually delete
|
|
dry_run: Preview changes without executing
|
|
|
|
Returns:
|
|
Operation result dictionary
|
|
"""
|
|
result = {"operation": "delete_study", "dry_run": dry_run}
|
|
|
|
if not confirm and not dry_run:
|
|
result["status"] = "error"
|
|
result["message"] = "Must set confirm=True to delete study"
|
|
return result
|
|
|
|
if not self.study_path.exists():
|
|
result["status"] = "skipped"
|
|
result["message"] = "Study does not exist"
|
|
return result
|
|
|
|
if dry_run:
|
|
result["status"] = "preview"
|
|
result["message"] = f"Would delete: {self.study_path}"
|
|
return result
|
|
|
|
# Create archive first
|
|
archive_result = self.archive_results()
|
|
result["archive"] = archive_result
|
|
|
|
# Delete study folder
|
|
shutil.rmtree(self.study_path)
|
|
result["status"] = "success"
|
|
result["message"] = f"Deleted study: {self.study_name}"
|
|
|
|
return result
|
|
|
|
|
|
def reset_study(
|
|
study_name: str,
|
|
reset_db: bool = True,
|
|
cleanup_temp: bool = True,
|
|
backup: bool = True,
|
|
dry_run: bool = False
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Convenience function to reset a study.
|
|
|
|
Args:
|
|
study_name: Name of the study
|
|
reset_db: Reset the Optuna database
|
|
cleanup_temp: Clean up temporary files
|
|
backup: Create backup before reset
|
|
dry_run: Preview changes without executing
|
|
|
|
Returns:
|
|
Operation result dictionary
|
|
"""
|
|
resetter = StudyReset(study_name)
|
|
|
|
if not resetter.validate_study_exists():
|
|
return {"status": "error", "message": f"Study '{study_name}' not found"}
|
|
|
|
results = {}
|
|
|
|
if reset_db:
|
|
results["database"] = resetter.reset_database(backup=backup, dry_run=dry_run)
|
|
|
|
if cleanup_temp:
|
|
results["temp_cleanup"] = resetter.cleanup_temp_files(dry_run=dry_run)
|
|
|
|
return results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Reset or cleanup Atomizer optimization studies",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Show study status
|
|
python -m optimization_engine.study_reset my_study --status
|
|
|
|
# Preview reset (dry run)
|
|
python -m optimization_engine.study_reset my_study --full-reset --dry-run
|
|
|
|
# Reset database only
|
|
python -m optimization_engine.study_reset my_study --reset-db
|
|
|
|
# Clean temp files only
|
|
python -m optimization_engine.study_reset my_study --cleanup-temp
|
|
|
|
# Full reset with backup
|
|
python -m optimization_engine.study_reset my_study --full-reset
|
|
"""
|
|
)
|
|
|
|
parser.add_argument("study_name", help="Name of the study")
|
|
parser.add_argument("--status", action="store_true", help="Show study status only")
|
|
parser.add_argument("--reset-db", action="store_true", help="Reset Optuna database")
|
|
parser.add_argument("--cleanup-temp", action="store_true", help="Clean temporary files")
|
|
parser.add_argument("--full-reset", action="store_true", help="Full reset (db + temp)")
|
|
parser.add_argument("--archive", action="store_true", help="Archive results before reset")
|
|
parser.add_argument("--delete", action="store_true", help="Delete study completely")
|
|
parser.add_argument("--no-backup", action="store_true", help="Skip backup")
|
|
parser.add_argument("--dry-run", action="store_true", help="Preview without executing")
|
|
parser.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompts")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Set up logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s [%(levelname)s] %(message)s'
|
|
)
|
|
|
|
resetter = StudyReset(args.study_name)
|
|
|
|
if not resetter.validate_study_exists():
|
|
print(f"Error: Study '{args.study_name}' not found")
|
|
sys.exit(1)
|
|
|
|
if args.status:
|
|
stats = resetter.get_study_stats()
|
|
print(f"\nStudy: {args.study_name}")
|
|
print("=" * 50)
|
|
print(f" Trials: {stats['trials']} ({stats['completed']} completed, {stats['failed']} failed)")
|
|
print(f" Database size: {stats['db_size_mb']:.2f} MB")
|
|
print(f" Temp files: {stats['temp_files']} ({stats['temp_size_mb']:.2f} MB)")
|
|
sys.exit(0)
|
|
|
|
# Confirmation
|
|
if not args.dry_run and not args.yes:
|
|
action = "full reset" if args.full_reset else \
|
|
"delete" if args.delete else \
|
|
"reset" if args.reset_db else "cleanup"
|
|
response = input(f"\nReally {action} study '{args.study_name}'? [y/N] ")
|
|
if response.lower() not in ['y', 'yes']:
|
|
print("Aborted")
|
|
sys.exit(0)
|
|
|
|
backup = not args.no_backup
|
|
|
|
if args.full_reset:
|
|
result = resetter.full_reset(backup=backup, dry_run=args.dry_run)
|
|
elif args.delete:
|
|
result = resetter.delete_study(confirm=True, dry_run=args.dry_run)
|
|
elif args.reset_db:
|
|
result = resetter.reset_database(backup=backup, dry_run=args.dry_run)
|
|
elif args.cleanup_temp:
|
|
result = resetter.cleanup_temp_files(dry_run=args.dry_run)
|
|
elif args.archive:
|
|
result = resetter.archive_results(dry_run=args.dry_run)
|
|
else:
|
|
parser.print_help()
|
|
sys.exit(0)
|
|
|
|
print("\nResult:")
|
|
print(json.dumps(result, indent=2))
|
|
|
|
if args.dry_run:
|
|
print("\n[DRY RUN - no changes made]")
|