#!/usr/bin/env python3 """ Utility script to identify and clean up duplicate files with (2), (3) suffixes. This helps clean up files that were created before the duplicate prevention was implemented. """ import json import re from pathlib import Path from typing import Dict, List, Tuple def find_duplicate_files(downloads_dir: str = "downloads") -> Dict[str, List[Path]]: """ Find duplicate files with (2), (3), etc. suffixes in the downloads directory. Args: downloads_dir: Path to downloads directory Returns: Dictionary mapping base filenames to lists of duplicate files """ downloads_path = Path(downloads_dir) if not downloads_path.exists(): print(f"โŒ Downloads directory not found: {downloads_dir}") return {} duplicates = {} # Scan all MP4 files in the downloads directory for mp4_file in downloads_path.rglob("*.mp4"): filename = mp4_file.name # Check if this is a duplicate file with (2), (3), etc. match = re.match(r'^(.+?)\s*\((\d+)\)\.mp4$', filename) if match: base_name = match.group(1) suffix_num = int(match.group(2)) if base_name not in duplicates: duplicates[base_name] = [] duplicates[base_name].append((mp4_file, suffix_num)) # Sort duplicates by suffix number for base_name in duplicates: duplicates[base_name].sort(key=lambda x: x[1]) return duplicates def analyze_duplicates(duplicates: Dict[str, List[Tuple[Path, int]]]) -> None: """ Analyze and display information about found duplicates. Args: duplicates: Dictionary of duplicate files """ if not duplicates: print("โœ… No duplicate files found!") return print(f"๐Ÿ” Found {len(duplicates)} sets of duplicate files:") print() total_duplicates = 0 for base_name, files in duplicates.items(): print(f"๐Ÿ“ {base_name}") for file_path, suffix in files: file_size = file_path.stat().st_size / (1024 * 1024) # MB print(f" ({suffix}) {file_path.name} - {file_size:.1f} MB") print() total_duplicates += len(files) - 1 # -1 because we keep the original print(f"๐Ÿ“Š Summary: {len(duplicates)} base files with {total_duplicates} duplicate files") def cleanup_duplicates(duplicates: Dict[str, List[Tuple[Path, int]]], dry_run: bool = True) -> None: """ Clean up duplicate files, keeping only the first occurrence. Args: duplicates: Dictionary of duplicate files dry_run: If True, only show what would be deleted without actually deleting """ if not duplicates: print("โœ… No duplicates to clean up!") return mode = "DRY RUN" if dry_run else "ACTUAL CLEANUP" print(f"๐Ÿงน Starting {mode}...") print() total_deleted = 0 total_size_freed = 0 for base_name, files in duplicates.items(): print(f"๐Ÿ“ Processing: {base_name}") # Keep the first file (lowest suffix number), delete the rest files_to_delete = files[1:] # Skip the first file for file_path, suffix in files_to_delete: file_size = file_path.stat().st_size / (1024 * 1024) # MB if dry_run: print(f" ๐Ÿ—‘๏ธ Would delete: {file_path.name} ({file_size:.1f} MB)") else: try: file_path.unlink() print(f" โœ… Deleted: {file_path.name} ({file_size:.1f} MB)") total_deleted += 1 total_size_freed += file_size except Exception as e: print(f" โŒ Failed to delete {file_path.name}: {e}") print() if dry_run: print(f"๐Ÿ“Š DRY RUN SUMMARY: Would delete {len([f for files in duplicates.values() for f in files[1:]])} files") else: print(f"๐Ÿ“Š CLEANUP SUMMARY: Deleted {total_deleted} files, freed {total_size_freed:.1f} MB") def main(): """Main function to run the duplicate file cleanup.""" print("๐ŸŽต Karaoke Video Downloader - Duplicate File Cleanup") print("=" * 50) print() # Find duplicates duplicates = find_duplicate_files() if not duplicates: print("โœ… No duplicate files found!") return # Analyze duplicates analyze_duplicates(duplicates) print() # Ask user what to do while True: print("Options:") print("1. Dry run (show what would be deleted)") print("2. Actually delete duplicate files") print("3. Exit without doing anything") choice = input("\nEnter your choice (1-3): ").strip() if choice == "1": cleanup_duplicates(duplicates, dry_run=True) break elif choice == "2": confirm = input("โš ๏ธ Are you sure you want to delete duplicate files? (yes/no): ").strip().lower() if confirm in ["yes", "y"]: cleanup_duplicates(duplicates, dry_run=False) else: print("โŒ Cleanup cancelled.") break elif choice == "3": print("โŒ Exiting without cleanup.") break else: print("โŒ Invalid choice. Please enter 1, 2, or 3.") if __name__ == "__main__": main()