From 50b402ddece8c92c6b5777ea4261ad8607c2ba0c Mon Sep 17 00:00:00 2001 From: mbrucedogs Date: Tue, 29 Jul 2025 09:07:31 -0500 Subject: [PATCH] Signed-off-by: mbrucedogs --- PRD.md | 30 +++++++++++++++++++ README.md | 11 +++++++ karaoke_downloader/cli.py | 15 +++++++++- karaoke_downloader/downloader.py | 49 +++++++++++++++++++++++++++----- 4 files changed, 97 insertions(+), 8 deletions(-) diff --git a/PRD.md b/PRD.md index c70a247..f2f009e 100644 --- a/PRD.md +++ b/PRD.md @@ -199,6 +199,7 @@ KaroakeVideoDownloader/ - `--manual`: **Download from manual videos collection (data/manual_videos.json)** - `--channel-focus `: **Download from a specific channel by name (e.g., 'SingKingKaraoke')** - `--all-videos`: **Download all videos from channel (not just songlist matches), skipping existing files and songs in songs.json** +- `--dry-run`: **Build download plan and show what would be downloaded without actually downloading anything** --- @@ -528,3 +529,32 @@ def download_new_mode(self, ...): - [ ] Plugin system for custom file operations - [ ] Advanced configuration UI - [ ] Real-time download progress visualization + +## šŸ”§ Recent Bug Fixes & Improvements (v3.4.6) +### **Dry Run Mode** +- **New `--dry-run` parameter**: Build download plan and show what would be downloaded without actually downloading anything +- **Plan preview**: Shows total videos in plan and preview of first 5 videos +- **Safe testing**: Test download configurations without consuming bandwidth or disk space +- **All mode support**: Works with all download modes (--channel-focus, --all-videos, --songlist-only, --latest-per-channel) +- **Progress simulation**: Shows what the download process would look like without executing it + +### **Benefits of Dry Run Mode** +- **Safe testing**: Test complex download configurations without downloading anything +- **Plan validation**: Verify that the download plan contains the expected videos +- **Configuration debugging**: Troubleshoot download settings before committing to downloads +- **Resource conservation**: Save bandwidth and disk space during testing +- **User education**: Help users understand what the tool will do before running it + +### **Example Usage** +```bash +# Test songlist download plan +python download_karaoke.py --songlist-only --limit 5 --dry-run + +# Test channel download plan +python download_karaoke.py --channel-focus SingKingKaraoke --all-videos --limit 10 --dry-run + +# Test with fuzzy matching +python download_karaoke.py --songlist-only --fuzzy-match --limit 3 --dry-run +``` + +### **Future Development Guidelines** diff --git a/README.md b/README.md index 3851580..bf903ca 100644 --- a/README.md +++ b/README.md @@ -246,6 +246,16 @@ python download_karaoke.py --songlist-focus "2025 - Apple Top 50" --force python download_karaoke.py --songlist-only --limit 10 --fuzzy-match --fuzzy-threshold 85 ``` +### Test Download Plan (Dry Run) +```bash +python download_karaoke.py --songlist-only --limit 5 --dry-run +``` + +### Test Channel Download Plan (Dry Run) +```bash +python download_karaoke.py --channel-focus SingKingKaraoke --all-videos --limit 10 --dry-run +``` + ### Download Latest N Videos Per Channel ```bash python download_karaoke.py --latest-per-channel --limit 5 @@ -398,6 +408,7 @@ KaroakeVideoDownloader/ - `--force`: **Force download from channels, bypassing all existing file checks and re-downloading if necessary** - `--channel-focus `: **Download from a specific channel by name (e.g., 'SingKingKaraoke')** - `--all-videos`: **Download all videos from channel (not just songlist matches), skipping existing files** +- `--dry-run`: **Build download plan and show what would be downloaded without actually downloading anything** ## šŸ“ Example Usage diff --git a/karaoke_downloader/cli.py b/karaoke_downloader/cli.py index f06cb90..569f86c 100644 --- a/karaoke_downloader/cli.py +++ b/karaoke_downloader/cli.py @@ -311,6 +311,11 @@ Examples: action="store_true", help="Download all videos from channel (not just songlist matches), skipping existing files", ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Build download plan and show what would be downloaded without actually downloading anything", + ) args = parser.parse_args() # Validate workers argument @@ -366,6 +371,9 @@ Examples: if args.force: downloader.force_download = True print("šŸ’Ŗ Force mode enabled - will download regardless of existing files or server duplicates") + if args.dry_run: + downloader.dry_run = True + print("šŸ” Dry run mode enabled - will show download plan without downloading") if args.resolution != "720p": downloader.config_manager.update_resolution(args.resolution) @@ -517,6 +525,7 @@ Examples: force_refresh=args.refresh, force_download=args.force, limit=args.limit, + dry_run=args.dry_run, ) else: # Download only songlist matches from the channel @@ -527,6 +536,7 @@ Examples: fuzzy_match=args.fuzzy_match, fuzzy_threshold=args.fuzzy_threshold, force_download=args.force, + dry_run=args.dry_run, ) elif args.songlist_only or args.songlist_focus: # Use provided file or default to channels configuration @@ -545,6 +555,7 @@ Examples: show_pagination=args.show_pagination, parallel_channels=args.parallel_channels, max_channel_workers=args.channel_workers, + dry_run=args.dry_run, ) elif args.latest_per_channel: # Use provided file or default to channels configuration @@ -569,10 +580,11 @@ Examples: fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold, force_download=args.force, + dry_run=args.dry_run, ) elif args.url: success = downloader.download_channel_videos( - args.url, force_refresh=args.refresh + args.url, force_refresh=args.refresh, dry_run=args.dry_run ) else: # Default behavior: download from channels (equivalent to --latest-per-channel) @@ -599,6 +611,7 @@ Examples: fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold, force_download=args.force, + dry_run=args.dry_run, ) # Generate unmatched report if requested (additive feature) diff --git a/karaoke_downloader/downloader.py b/karaoke_downloader/downloader.py index 2b8c2f7..ce5e22d 100644 --- a/karaoke_downloader/downloader.py +++ b/karaoke_downloader/downloader.py @@ -118,6 +118,9 @@ class KaraokeDownloader: self.songlist_focus_titles = None self.songlist_only = False self.use_songlist_priority = True + + # Download mode attributes + self.dry_run = False self.download_limit = None self.force_download = False self.songlist_file_path = "data/songList.json" # Default songlist file path @@ -186,6 +189,7 @@ class KaraokeDownloader: fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False, + dry_run=False, ): """Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching.""" @@ -214,7 +218,7 @@ class KaraokeDownloader: print(f"šŸ“‹ Found {len(available_videos)} manual videos") # Process manual videos (skip yt-dlp) - return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download) + return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download, dry_run) # Regular YouTube channel processing - USE TRACKING MANAGER CACHE channel_name, channel_id = get_channel_info(url) @@ -234,7 +238,7 @@ class KaraokeDownloader: print(f"šŸ“‹ Found {len(available_videos)} videos from channel") # Process videos for download - return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download) + return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download, dry_run) def download_all_channel_videos( self, @@ -242,6 +246,7 @@ class KaraokeDownloader: force_refresh=False, force_download=False, limit=None, + dry_run=False, ): """Download ALL videos from a channel, skipping existing files and songs in songs.json.""" @@ -270,7 +275,7 @@ class KaraokeDownloader: print(f"šŸ“‹ Found {len(available_videos)} manual videos") # Process all manual videos (skip songlist filtering) - return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit) + return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit, dry_run) # Regular YouTube channel processing - USE TRACKING MANAGER CACHE channel_name, channel_id = get_channel_info(url) @@ -290,9 +295,9 @@ class KaraokeDownloader: print(f"šŸ“‹ Found {len(available_videos)} videos from channel") # Process all videos for download (skip songlist filtering) - return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit) + return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit, dry_run) - def _process_all_videos_for_download(self, available_videos, channel_name, force_refresh=False, force_download=False, limit=None): + def _process_all_videos_for_download(self, available_videos, channel_name, force_refresh=False, force_download=False, limit=None, dry_run=False): """Process ALL videos for download (no songlist filtering).""" print(f"šŸ” Processing {len(available_videos)} videos for download (no songlist filtering)...") @@ -450,6 +455,7 @@ class KaraokeDownloader: cache_file=cache_file, limit=limit, show_progress=True, + dry_run=dry_run, ) return success @@ -465,6 +471,7 @@ class KaraokeDownloader: show_pagination=False, parallel_channels=False, max_channel_workers=3, + dry_run=False, ): """ Download songs from the songlist across multiple channels. @@ -695,6 +702,7 @@ class KaraokeDownloader: download_plan=download_plan, cache_file=cache_file, limit=limit, + dry_run=dry_run, ) return success @@ -706,6 +714,7 @@ class KaraokeDownloader: fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False, + dry_run=False, ): """ Download the latest N videos from each channel. @@ -815,11 +824,12 @@ class KaraokeDownloader: cache_file=cache_file, limit=None, # Limit already applied during plan building show_progress=True, + dry_run=dry_run, ) return success - def _process_videos_for_download(self, available_videos, channel_name, force_refresh=False, fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False): + def _process_videos_for_download(self, available_videos, channel_name, force_refresh=False, fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False, dry_run=False): """Process videos for download (used for both manual and regular channels).""" songlist = load_songlist(self.songlist_file_path) @@ -964,6 +974,7 @@ class KaraokeDownloader: cache_file=None, # No specific cache file for this mode limit=limit, show_progress=True, + dry_run=dry_run, ) return success @@ -1012,6 +1023,7 @@ class KaraokeDownloader: cache_file=None, limit=None, show_progress=True, + dry_run=False, ): """ Unified download workflow that all download modes use. @@ -1021,6 +1033,7 @@ class KaraokeDownloader: cache_file: Optional cache file for progress tracking limit: Optional limit on number of downloads show_progress: Whether to show progress information + dry_run: If True, only show the plan without downloading Returns: tuple: (downloaded_count, success) @@ -1040,6 +1053,25 @@ class KaraokeDownloader: if cache_file: print(f" šŸ’¾ Progress tracking: {cache_file.name}") + # Handle dry-run mode + if dry_run: + print(f"\nšŸ” DRY RUN MODE - No downloads will be performed") + print(f"šŸ“‹ Download plan preview:") + print(f" šŸ“Š Total videos in plan: {len(download_plan)}") + print(f" šŸ“ Output directory: downloads/") + + # Show first few items as preview + preview_count = min(5, len(download_plan)) + print(f"\nšŸ“‹ Preview of first {preview_count} videos:") + for i, item in enumerate(download_plan[:preview_count], 1): + print(f" {i:2d}. {item['artist']} - {item['title']} ({item['channel_name']})") + + if len(download_plan) > preview_count: + print(f" ... and {len(download_plan) - preview_count} more videos") + + print(f"\nāœ… Dry run completed - {len(download_plan)} videos would be downloaded") + return len(download_plan), True + # Choose execution method based on parallel settings if self.enable_parallel_downloads: return self._execute_parallel_downloads(download_plan, cache_file, show_progress) @@ -1126,8 +1158,11 @@ class KaraokeDownloader: ) tasks.append(task) + # Add tasks to the downloader queue + parallel_downloader.add_download_tasks(tasks) + # Execute parallel downloads - results = parallel_downloader.execute_downloads(tasks) + results = parallel_downloader.execute_downloads(show_progress=show_progress) # Count successes success_count = sum(1 for result in results if result.success)