164 lines
5.4 KiB
Python
164 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Utility script to identify and clean up duplicate files with (2), (3) suffixes.
|
|
This helps clean up files that were created before the duplicate prevention was implemented.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Tuple
|
|
|
|
def find_duplicate_files(downloads_dir: str = "downloads") -> Dict[str, List[Path]]:
|
|
"""
|
|
Find duplicate files with (2), (3), etc. suffixes in the downloads directory.
|
|
|
|
Args:
|
|
downloads_dir: Path to downloads directory
|
|
|
|
Returns:
|
|
Dictionary mapping base filenames to lists of duplicate files
|
|
"""
|
|
downloads_path = Path(downloads_dir)
|
|
if not downloads_path.exists():
|
|
print(f"❌ Downloads directory not found: {downloads_dir}")
|
|
return {}
|
|
|
|
duplicates = {}
|
|
|
|
# Scan all MP4 files in the downloads directory
|
|
for mp4_file in downloads_path.rglob("*.mp4"):
|
|
filename = mp4_file.name
|
|
|
|
# Check if this is a duplicate file with (2), (3), etc.
|
|
match = re.match(r'^(.+?)\s*\((\d+)\)\.mp4$', filename)
|
|
if match:
|
|
base_name = match.group(1)
|
|
suffix_num = int(match.group(2))
|
|
|
|
if base_name not in duplicates:
|
|
duplicates[base_name] = []
|
|
|
|
duplicates[base_name].append((mp4_file, suffix_num))
|
|
|
|
# Sort duplicates by suffix number
|
|
for base_name in duplicates:
|
|
duplicates[base_name].sort(key=lambda x: x[1])
|
|
|
|
return duplicates
|
|
|
|
def analyze_duplicates(duplicates: Dict[str, List[Tuple[Path, int]]]) -> None:
|
|
"""
|
|
Analyze and display information about found duplicates.
|
|
|
|
Args:
|
|
duplicates: Dictionary of duplicate files
|
|
"""
|
|
if not duplicates:
|
|
print("✅ No duplicate files found!")
|
|
return
|
|
|
|
print(f"🔍 Found {len(duplicates)} sets of duplicate files:")
|
|
print()
|
|
|
|
total_duplicates = 0
|
|
for base_name, files in duplicates.items():
|
|
print(f"📁 {base_name}")
|
|
for file_path, suffix in files:
|
|
file_size = file_path.stat().st_size / (1024 * 1024) # MB
|
|
print(f" ({suffix}) {file_path.name} - {file_size:.1f} MB")
|
|
print()
|
|
total_duplicates += len(files) - 1 # -1 because we keep the original
|
|
|
|
print(f"📊 Summary: {len(duplicates)} base files with {total_duplicates} duplicate files")
|
|
|
|
def cleanup_duplicates(duplicates: Dict[str, List[Tuple[Path, int]]], dry_run: bool = True) -> None:
|
|
"""
|
|
Clean up duplicate files, keeping only the first occurrence.
|
|
|
|
Args:
|
|
duplicates: Dictionary of duplicate files
|
|
dry_run: If True, only show what would be deleted without actually deleting
|
|
"""
|
|
if not duplicates:
|
|
print("✅ No duplicates to clean up!")
|
|
return
|
|
|
|
mode = "DRY RUN" if dry_run else "ACTUAL CLEANUP"
|
|
print(f"🧹 Starting {mode}...")
|
|
print()
|
|
|
|
total_deleted = 0
|
|
total_size_freed = 0
|
|
|
|
for base_name, files in duplicates.items():
|
|
print(f"📁 Processing: {base_name}")
|
|
|
|
# Keep the first file (lowest suffix number), delete the rest
|
|
files_to_delete = files[1:] # Skip the first file
|
|
|
|
for file_path, suffix in files_to_delete:
|
|
file_size = file_path.stat().st_size / (1024 * 1024) # MB
|
|
|
|
if dry_run:
|
|
print(f" 🗑️ Would delete: {file_path.name} ({file_size:.1f} MB)")
|
|
else:
|
|
try:
|
|
file_path.unlink()
|
|
print(f" ✅ Deleted: {file_path.name} ({file_size:.1f} MB)")
|
|
total_deleted += 1
|
|
total_size_freed += file_size
|
|
except Exception as e:
|
|
print(f" ❌ Failed to delete {file_path.name}: {e}")
|
|
|
|
print()
|
|
|
|
if dry_run:
|
|
print(f"📊 DRY RUN SUMMARY: Would delete {len([f for files in duplicates.values() for f in files[1:]])} files")
|
|
else:
|
|
print(f"📊 CLEANUP SUMMARY: Deleted {total_deleted} files, freed {total_size_freed:.1f} MB")
|
|
|
|
def main():
|
|
"""Main function to run the duplicate file cleanup."""
|
|
print("🎵 Karaoke Video Downloader - Duplicate File Cleanup")
|
|
print("=" * 50)
|
|
print()
|
|
|
|
# Find duplicates
|
|
duplicates = find_duplicate_files()
|
|
|
|
if not duplicates:
|
|
print("✅ No duplicate files found!")
|
|
return
|
|
|
|
# Analyze duplicates
|
|
analyze_duplicates(duplicates)
|
|
print()
|
|
|
|
# Ask user what to do
|
|
while True:
|
|
print("Options:")
|
|
print("1. Dry run (show what would be deleted)")
|
|
print("2. Actually delete duplicate files")
|
|
print("3. Exit without doing anything")
|
|
|
|
choice = input("\nEnter your choice (1-3): ").strip()
|
|
|
|
if choice == "1":
|
|
cleanup_duplicates(duplicates, dry_run=True)
|
|
break
|
|
elif choice == "2":
|
|
confirm = input("⚠️ Are you sure you want to delete duplicate files? (yes/no): ").strip().lower()
|
|
if confirm in ["yes", "y"]:
|
|
cleanup_duplicates(duplicates, dry_run=False)
|
|
else:
|
|
print("❌ Cleanup cancelled.")
|
|
break
|
|
elif choice == "3":
|
|
print("❌ Exiting without cleanup.")
|
|
break
|
|
else:
|
|
print("❌ Invalid choice. Please enter 1, 2, or 3.")
|
|
|
|
if __name__ == "__main__":
|
|
main() |