Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

This commit is contained in:
mbrucedogs 2025-07-29 09:07:31 -05:00
parent 9f0787d00a
commit 50b402ddec
4 changed files with 97 additions and 8 deletions

30
PRD.md
View File

@ -199,6 +199,7 @@ KaroakeVideoDownloader/
- `--manual`: **Download from manual videos collection (data/manual_videos.json)**
- `--channel-focus <CHANNEL_NAME>`: **Download from a specific channel by name (e.g., 'SingKingKaraoke')**
- `--all-videos`: **Download all videos from channel (not just songlist matches), skipping existing files and songs in songs.json**
- `--dry-run`: **Build download plan and show what would be downloaded without actually downloading anything**
---
@ -528,3 +529,32 @@ def download_new_mode(self, ...):
- [ ] Plugin system for custom file operations
- [ ] Advanced configuration UI
- [ ] Real-time download progress visualization
## 🔧 Recent Bug Fixes & Improvements (v3.4.6)
### **Dry Run Mode**
- **New `--dry-run` parameter**: Build download plan and show what would be downloaded without actually downloading anything
- **Plan preview**: Shows total videos in plan and preview of first 5 videos
- **Safe testing**: Test download configurations without consuming bandwidth or disk space
- **All mode support**: Works with all download modes (--channel-focus, --all-videos, --songlist-only, --latest-per-channel)
- **Progress simulation**: Shows what the download process would look like without executing it
### **Benefits of Dry Run Mode**
- **Safe testing**: Test complex download configurations without downloading anything
- **Plan validation**: Verify that the download plan contains the expected videos
- **Configuration debugging**: Troubleshoot download settings before committing to downloads
- **Resource conservation**: Save bandwidth and disk space during testing
- **User education**: Help users understand what the tool will do before running it
### **Example Usage**
```bash
# Test songlist download plan
python download_karaoke.py --songlist-only --limit 5 --dry-run
# Test channel download plan
python download_karaoke.py --channel-focus SingKingKaraoke --all-videos --limit 10 --dry-run
# Test with fuzzy matching
python download_karaoke.py --songlist-only --fuzzy-match --limit 3 --dry-run
```
### **Future Development Guidelines**

View File

@ -246,6 +246,16 @@ python download_karaoke.py --songlist-focus "2025 - Apple Top 50" --force
python download_karaoke.py --songlist-only --limit 10 --fuzzy-match --fuzzy-threshold 85
```
### Test Download Plan (Dry Run)
```bash
python download_karaoke.py --songlist-only --limit 5 --dry-run
```
### Test Channel Download Plan (Dry Run)
```bash
python download_karaoke.py --channel-focus SingKingKaraoke --all-videos --limit 10 --dry-run
```
### Download Latest N Videos Per Channel
```bash
python download_karaoke.py --latest-per-channel --limit 5
@ -398,6 +408,7 @@ KaroakeVideoDownloader/
- `--force`: **Force download from channels, bypassing all existing file checks and re-downloading if necessary**
- `--channel-focus <CHANNEL_NAME>`: **Download from a specific channel by name (e.g., 'SingKingKaraoke')**
- `--all-videos`: **Download all videos from channel (not just songlist matches), skipping existing files**
- `--dry-run`: **Build download plan and show what would be downloaded without actually downloading anything**
## 📝 Example Usage

View File

@ -311,6 +311,11 @@ Examples:
action="store_true",
help="Download all videos from channel (not just songlist matches), skipping existing files",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Build download plan and show what would be downloaded without actually downloading anything",
)
args = parser.parse_args()
# Validate workers argument
@ -366,6 +371,9 @@ Examples:
if args.force:
downloader.force_download = True
print("💪 Force mode enabled - will download regardless of existing files or server duplicates")
if args.dry_run:
downloader.dry_run = True
print("🔍 Dry run mode enabled - will show download plan without downloading")
if args.resolution != "720p":
downloader.config_manager.update_resolution(args.resolution)
@ -517,6 +525,7 @@ Examples:
force_refresh=args.refresh,
force_download=args.force,
limit=args.limit,
dry_run=args.dry_run,
)
else:
# Download only songlist matches from the channel
@ -527,6 +536,7 @@ Examples:
fuzzy_match=args.fuzzy_match,
fuzzy_threshold=args.fuzzy_threshold,
force_download=args.force,
dry_run=args.dry_run,
)
elif args.songlist_only or args.songlist_focus:
# Use provided file or default to channels configuration
@ -545,6 +555,7 @@ Examples:
show_pagination=args.show_pagination,
parallel_channels=args.parallel_channels,
max_channel_workers=args.channel_workers,
dry_run=args.dry_run,
)
elif args.latest_per_channel:
# Use provided file or default to channels configuration
@ -569,10 +580,11 @@ Examples:
fuzzy_match=fuzzy_match,
fuzzy_threshold=fuzzy_threshold,
force_download=args.force,
dry_run=args.dry_run,
)
elif args.url:
success = downloader.download_channel_videos(
args.url, force_refresh=args.refresh
args.url, force_refresh=args.refresh, dry_run=args.dry_run
)
else:
# Default behavior: download from channels (equivalent to --latest-per-channel)
@ -599,6 +611,7 @@ Examples:
fuzzy_match=fuzzy_match,
fuzzy_threshold=fuzzy_threshold,
force_download=args.force,
dry_run=args.dry_run,
)
# Generate unmatched report if requested (additive feature)

View File

@ -118,6 +118,9 @@ class KaraokeDownloader:
self.songlist_focus_titles = None
self.songlist_only = False
self.use_songlist_priority = True
# Download mode attributes
self.dry_run = False
self.download_limit = None
self.force_download = False
self.songlist_file_path = "data/songList.json" # Default songlist file path
@ -186,6 +189,7 @@ class KaraokeDownloader:
fuzzy_match=False,
fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD,
force_download=False,
dry_run=False,
):
"""Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching."""
@ -214,7 +218,7 @@ class KaraokeDownloader:
print(f"📋 Found {len(available_videos)} manual videos")
# Process manual videos (skip yt-dlp)
return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download)
return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download, dry_run)
# Regular YouTube channel processing - USE TRACKING MANAGER CACHE
channel_name, channel_id = get_channel_info(url)
@ -234,7 +238,7 @@ class KaraokeDownloader:
print(f"📋 Found {len(available_videos)} videos from channel")
# Process videos for download
return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download)
return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download, dry_run)
def download_all_channel_videos(
self,
@ -242,6 +246,7 @@ class KaraokeDownloader:
force_refresh=False,
force_download=False,
limit=None,
dry_run=False,
):
"""Download ALL videos from a channel, skipping existing files and songs in songs.json."""
@ -270,7 +275,7 @@ class KaraokeDownloader:
print(f"📋 Found {len(available_videos)} manual videos")
# Process all manual videos (skip songlist filtering)
return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit)
return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit, dry_run)
# Regular YouTube channel processing - USE TRACKING MANAGER CACHE
channel_name, channel_id = get_channel_info(url)
@ -290,9 +295,9 @@ class KaraokeDownloader:
print(f"📋 Found {len(available_videos)} videos from channel")
# Process all videos for download (skip songlist filtering)
return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit)
return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit, dry_run)
def _process_all_videos_for_download(self, available_videos, channel_name, force_refresh=False, force_download=False, limit=None):
def _process_all_videos_for_download(self, available_videos, channel_name, force_refresh=False, force_download=False, limit=None, dry_run=False):
"""Process ALL videos for download (no songlist filtering)."""
print(f"🔍 Processing {len(available_videos)} videos for download (no songlist filtering)...")
@ -450,6 +455,7 @@ class KaraokeDownloader:
cache_file=cache_file,
limit=limit,
show_progress=True,
dry_run=dry_run,
)
return success
@ -465,6 +471,7 @@ class KaraokeDownloader:
show_pagination=False,
parallel_channels=False,
max_channel_workers=3,
dry_run=False,
):
"""
Download songs from the songlist across multiple channels.
@ -695,6 +702,7 @@ class KaraokeDownloader:
download_plan=download_plan,
cache_file=cache_file,
limit=limit,
dry_run=dry_run,
)
return success
@ -706,6 +714,7 @@ class KaraokeDownloader:
fuzzy_match=False,
fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD,
force_download=False,
dry_run=False,
):
"""
Download the latest N videos from each channel.
@ -815,11 +824,12 @@ class KaraokeDownloader:
cache_file=cache_file,
limit=None, # Limit already applied during plan building
show_progress=True,
dry_run=dry_run,
)
return success
def _process_videos_for_download(self, available_videos, channel_name, force_refresh=False, fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False):
def _process_videos_for_download(self, available_videos, channel_name, force_refresh=False, fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False, dry_run=False):
"""Process videos for download (used for both manual and regular channels)."""
songlist = load_songlist(self.songlist_file_path)
@ -964,6 +974,7 @@ class KaraokeDownloader:
cache_file=None, # No specific cache file for this mode
limit=limit,
show_progress=True,
dry_run=dry_run,
)
return success
@ -1012,6 +1023,7 @@ class KaraokeDownloader:
cache_file=None,
limit=None,
show_progress=True,
dry_run=False,
):
"""
Unified download workflow that all download modes use.
@ -1021,6 +1033,7 @@ class KaraokeDownloader:
cache_file: Optional cache file for progress tracking
limit: Optional limit on number of downloads
show_progress: Whether to show progress information
dry_run: If True, only show the plan without downloading
Returns:
tuple: (downloaded_count, success)
@ -1040,6 +1053,25 @@ class KaraokeDownloader:
if cache_file:
print(f" 💾 Progress tracking: {cache_file.name}")
# Handle dry-run mode
if dry_run:
print(f"\n🔍 DRY RUN MODE - No downloads will be performed")
print(f"📋 Download plan preview:")
print(f" 📊 Total videos in plan: {len(download_plan)}")
print(f" 📁 Output directory: downloads/")
# Show first few items as preview
preview_count = min(5, len(download_plan))
print(f"\n📋 Preview of first {preview_count} videos:")
for i, item in enumerate(download_plan[:preview_count], 1):
print(f" {i:2d}. {item['artist']} - {item['title']} ({item['channel_name']})")
if len(download_plan) > preview_count:
print(f" ... and {len(download_plan) - preview_count} more videos")
print(f"\n✅ Dry run completed - {len(download_plan)} videos would be downloaded")
return len(download_plan), True
# Choose execution method based on parallel settings
if self.enable_parallel_downloads:
return self._execute_parallel_downloads(download_plan, cache_file, show_progress)
@ -1126,8 +1158,11 @@ class KaraokeDownloader:
)
tasks.append(task)
# Add tasks to the downloader queue
parallel_downloader.add_download_tasks(tasks)
# Execute parallel downloads
results = parallel_downloader.execute_downloads(tasks)
results = parallel_downloader.execute_downloads(show_progress=show_progress)
# Count successes
success_count = sum(1 for result in results if result.success)