From aa28101926c876ff31dbb79b6bc456908915b5cd Mon Sep 17 00:00:00 2001 From: mbrucedogs Date: Thu, 24 Jul 2025 20:29:44 -0500 Subject: [PATCH] Signed-off-by: mbrucedogs --- PRD.md | 25 +++- README.md | 7 + karaoke_downloader/cli.py | 31 ++-- karaoke_downloader/downloader.py | 244 +++++++++++++++++++++++++------ 4 files changed, 247 insertions(+), 60 deletions(-) diff --git a/PRD.md b/PRD.md index 73f0a87..3f48e56 100644 --- a/PRD.md +++ b/PRD.md @@ -1,5 +1,5 @@ -# ๐ŸŽค Karaoke Video Downloader โ€“ PRD (v2.1) +# ๐ŸŽค Karaoke Video Downloader โ€“ PRD (v2.2) ## โœ… Overview A Python-based Windows CLI tool to download karaoke videos from YouTube channels/playlists using `yt-dlp.exe`, with advanced tracking, songlist prioritization, and flexible configuration. @@ -30,14 +30,14 @@ A Python-based Windows CLI tool to download karaoke videos from YouTube channels ## ๐Ÿ“ฅ Input - YouTube channel or playlist URLs (e.g. `https://www.youtube.com/@SingKingKaraoke/videos`) -- Optional: `data/channels.txt` file with multiple channel URLs (one per line) +- Optional: `data/channels.txt` file with multiple channel URLs (one per line) - **now defaults to this file if not specified** - Optional: `data/songList.json` for prioritized song downloads ### Example Usage ```bash python download_karaoke.py https://www.youtube.com/@SingKingKaraoke/videos -python download_karaoke.py --file data/channels.txt -python download_karaoke.py --songlist-only +python download_karaoke.py --songlist-only --limit 5 +python download_karaoke.py --latest-per-channel --limit 3 python download_karaoke.py --reset-channel SingKingKaraoke --reset-songlist python download_karaoke.py --clear-cache SingKingKaraoke ``` @@ -67,6 +67,10 @@ python download_karaoke.py --clear-cache SingKingKaraoke - โœ… **Clear channel cache via CLI** - โœ… **Download plan pre-scan and caching**: Before downloading, the tool pre-scans all channels for songlist matches, builds a download plan, and prints stats. The plan is cached for 1 day in data/download_plan_cache.json for fast resuming and reliability. Use --force-download-plan to force a refresh. - โœ… **Latest-per-channel download**: Download the latest N videos from each channel in a single batch, with a per-channel download plan, robust resume, and unique plan cache. Use --latest-per-channel and --limit N. +- โœ… **Fast mode with early exit**: When a limit is set, the tool scans channels and songs in order, downloads immediately when a match is found, and stops as soon as the limit is reached with successful downloads. If a download fails, it continues scanning until the limit is satisfied or all channels are exhausted. +- โœ… **Deduplication across channels**: Ensures the same song (by artist + normalized title) is not downloaded more than once, even if it appears in multiple channels. Tracks unique keys and skips duplicates. +- โœ… **Fuzzy matching**: Optionally use fuzzy string matching for songlist-to-video matching with configurable threshold (0-100, default 85). Uses rapidfuzz if available, falls back to difflib. +- โœ… **Default channel file**: If no --file is specified for songlist-only or latest-per-channel modes, automatically uses data/channels.txt as the default channel list. --- @@ -106,11 +110,11 @@ KaroakeVideoDownloader/ --- ## ๐Ÿšฆ CLI Options (Summary) -- `--file `: Download from a list of channels +- `--file `: Download from a list of channels (optional, defaults to data/channels.txt for songlist modes) - `--songlist-priority`: Prioritize songlist songs in download queue - `--songlist-only`: Download only songs from the songlist - `--songlist-status`: Show songlist download progress -- `--limit `: Limit number of downloads +- `--limit `: Limit number of downloads (enables fast mode with early exit) - `--resolution <720p|1080p|...>`: Override resolution - `--status`: Show download/tracking status - `--reset-channel `: **Reset all tracking and files for a channel** @@ -118,6 +122,8 @@ KaroakeVideoDownloader/ - `--clear-cache `: **Clear channel video cache for a specific channel or all** - `--force-download-plan`: **Force refresh the download plan cache (re-scan all channels for matches)** - `--latest-per-channel`: **Download the latest N videos from each channel (use with --limit)** +- `--fuzzy-match`: **Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)** +- `--fuzzy-threshold `: **Fuzzy match threshold (0-100, default 85)** --- @@ -130,11 +136,16 @@ KaroakeVideoDownloader/ - **Reset/Clear:** Use `--reset-channel` to reset all tracking and files for a channel (optionally including songlist songs with `--reset-songlist`). Use `--clear-cache` to clear cached video lists for a channel or all channels. - **Download plan pre-scan:** Before downloading, the tool scans all channels for songlist matches, builds a download plan, and prints stats (matches, unmatched, per-channel breakdown). The plan is cached for 1 day and reused unless --force-download-plan is set. - **Latest-per-channel plan:** Download the latest N videos from each channel, with a per-channel plan and robust resume. Each channel is removed from the plan as it completes. Plan cache is deleted when all channels are done. +- **Fast mode with early exit:** When a limit is set, the tool scans channels and songs in order, downloads immediately when a match is found, and stops as soon as the limit is reached with successful downloads. This provides much faster performance for small limits compared to the full pre-scan approach. +- **Deduplication across channels:** Tracks unique song keys (artist + normalized title) to ensure the same song is not downloaded from multiple channels, even if it appears in more than one channel's video list. +- **Fuzzy matching:** Uses string similarity algorithms to find approximate matches between songlist entries and video titles, tolerating minor differences, typos, or extra words like "Karaoke" or "Official Video". +- **Default channel file:** For songlist-only and latest-per-channel modes, if no --file is specified, automatically uses data/channels.txt as the default channel list, reducing the need to specify the file path repeatedly. --- ## ๐Ÿš€ Future Enhancements - [ ] Web UI for easier management -- [ ] More advanced song matching (fuzzy, multi-language) +- [ ] More advanced song matching (multi-language) - [ ] Download scheduling and retry logic - [ ] More granular status reporting +- [ ] Parallel downloads for improved speed diff --git a/README.md b/README.md index 5c69411..1c8a2ef 100644 --- a/README.md +++ b/README.md @@ -13,12 +13,15 @@ A Python-based Windows CLI tool to download karaoke videos from YouTube channels - ๐Ÿ“ˆ **Real-Time Progress**: Detailed console and log output - ๐Ÿงน **Reset/Clear Channel**: Reset all tracking and files for a channel, or clear channel cache via CLI - ๐Ÿ—‚๏ธ **Latest-per-channel download**: Download the latest N videos from each channel in a single batch, with a per-channel download plan, robust resume, and unique plan cache. Use --latest-per-channel and --limit N. +- ๐Ÿงฉ **Fuzzy Matching**: Optionally use fuzzy string matching for songlist-to-video matching (with --fuzzy-match, requires rapidfuzz for best results) ## ๐Ÿ“‹ Requirements - **Windows 10/11** - **Python 3.7+** - **yt-dlp.exe** (in `downloader/`) - **mutagen** (for ID3 tagging, optional) +- **ffmpeg/ffprobe** (for video validation, optional but recommended) +- **rapidfuzz** (for fuzzy matching, optional, falls back to difflib) ## ๐Ÿš€ Quick Start @@ -142,6 +145,8 @@ KaroakeVideoDownloader/ - `--reset-songlist`: **When used with --reset-channel, also reset songlist songs for this channel** - `--clear-cache `: **Clear channel video cache for a specific channel or all** - `--latest-per-channel`: **Download the latest N videos from each channel (use with --limit)** +- `--fuzzy-match`: Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available) +- `--fuzzy-threshold `: Fuzzy match threshold (0-100, default 85) ## ๐Ÿ“ Example Usage ```bash @@ -167,6 +172,8 @@ python download_karaoke.py --clear-cache all - Ensure `yt-dlp.exe` is in the `downloader/` folder - Check `logs/` for error details - Use `python -m karaoke_downloader.check_resolution` to verify video quality +- If you see errors about ffmpeg/ffprobe, install [ffmpeg](https://ffmpeg.org/download.html) and ensure it is in your PATH +- For best fuzzy matching, install rapidfuzz: `pip install rapidfuzz` (otherwise falls back to slower, less accurate difflib) --- diff --git a/karaoke_downloader/cli.py b/karaoke_downloader/cli.py index 754a41e..09bdeda 100644 --- a/karaoke_downloader/cli.py +++ b/karaoke_downloader/cli.py @@ -2,6 +2,7 @@ import sys import argparse from pathlib import Path from karaoke_downloader.downloader import KaraokeDownloader +import os def main(): parser = argparse.ArgumentParser( @@ -37,6 +38,8 @@ Examples: parser.add_argument('--version', '-v', action='version', version='Karaoke Playlist Downloader v1.0') parser.add_argument('--force-download-plan', action='store_true', help='Force refresh the download plan cache (re-scan all channels for matches)') parser.add_argument('--latest-per-channel', action='store_true', help='Download the latest N videos from each channel (use with --limit)') + parser.add_argument('--fuzzy-match', action='store_true', help='Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)') + parser.add_argument('--fuzzy-threshold', type=int, default=85, help='Fuzzy match threshold (0-100, default 85)') args = parser.parse_args() yt_dlp_path = Path("downloader/yt-dlp.exe") @@ -155,22 +158,32 @@ Examples: if len(tracking) > 10: print(f" ... and {len(tracking) - 10} more") sys.exit(0) - elif args.songlist_only and args.file: - # Read all channel URLs from file - with open(args.file, "r", encoding="utf-8") as f: + elif args.songlist_only: + # Use provided file or default to data/channels.txt + channel_file = args.file if args.file else "data/channels.txt" + if not os.path.exists(channel_file): + print(f"โŒ Channel file not found: {channel_file}") + sys.exit(1) + with open(channel_file, "r", encoding="utf-8") as f: channel_urls = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")] limit = args.limit if args.limit else None force_refresh_download_plan = args.force_download_plan if hasattr(args, 'force_download_plan') else False - success = downloader.download_songlist_across_channels(channel_urls, limit=limit, force_refresh_download_plan=force_refresh_download_plan) - elif args.url: - success = downloader.download_channel_videos(args.url, force_refresh=args.refresh) - elif args.latest_per_channel and args.file: - # Read all channel URLs from file - with open(args.file, "r", encoding="utf-8") as f: + fuzzy_match = args.fuzzy_match if hasattr(args, 'fuzzy_match') else False + fuzzy_threshold = args.fuzzy_threshold if hasattr(args, 'fuzzy_threshold') else 85 + success = downloader.download_songlist_across_channels(channel_urls, limit=limit, force_refresh_download_plan=force_refresh_download_plan, fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold) + elif args.latest_per_channel: + # Use provided file or default to data/channels.txt + channel_file = args.file if args.file else "data/channels.txt" + if not os.path.exists(channel_file): + print(f"โŒ Channel file not found: {channel_file}") + sys.exit(1) + with open(channel_file, "r", encoding="utf-8") as f: channel_urls = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")] limit = args.limit if args.limit else 5 force_refresh_download_plan = args.force_download_plan if hasattr(args, 'force_download_plan') else False success = downloader.download_latest_per_channel(channel_urls, limit=limit, force_refresh_download_plan=force_refresh_download_plan) + elif args.url: + success = downloader.download_channel_videos(args.url, force_refresh=args.refresh) else: parser.print_help() sys.exit(1) diff --git a/karaoke_downloader/downloader.py b/karaoke_downloader/downloader.py index 35999ab..afcdc80 100644 --- a/karaoke_downloader/downloader.py +++ b/karaoke_downloader/downloader.py @@ -163,20 +163,15 @@ class KaraokeDownloader: all_success = False return all_success - def download_channel_videos(self, url, force_refresh=False): - """ - Download videos from a channel or playlist URL, respecting songlist-only and limit flags. - Only download the first N matches from the songlist (N = self.config.get('limit', 1)). - """ + def download_channel_videos(self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=85): + """Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching.""" channel_name, channel_id = get_channel_info(url) print(f"\n๐ŸŽฌ Downloading from channel: {channel_name} ({url})") songlist = load_songlist() if not songlist: print("โš ๏ธ No songlist loaded. Skipping.") return False - # Get limit from config or default to 1 limit = self.config.get('limit', 1) - # --- Get channel video list with yt-dlp --flat-playlist --- cmd = [ str(self.yt_dlp_path), '--flat-playlist', @@ -189,7 +184,6 @@ class KaraokeDownloader: except subprocess.CalledProcessError as e: print(f"โŒ yt-dlp failed to fetch playlist: {e}") return False - # Build a list of available videos available_videos = [] for line in lines: parts = line.split('|') @@ -197,24 +191,42 @@ class KaraokeDownloader: title, video_id = parts[0].strip(), parts[1].strip() available_videos.append({'title': title, 'id': video_id}) # Normalize songlist for matching + try: + from rapidfuzz import fuzz + def similarity(a, b): + return fuzz.ratio(a, b) + except ImportError: + import difflib + def similarity(a, b): + return int(difflib.SequenceMatcher(None, a, b).ratio() * 100) normalized_songlist = { f"{s['artist'].lower()}_{normalize_title(s['title'])}": s for s in songlist } - # Find matches matches = [] for video in available_videos: artist, title = extract_artist_title(video['title']) key = f"{artist.lower()}_{normalize_title(title)}" - if key in normalized_songlist: - # Check if already downloaded or on server - if is_songlist_song_downloaded(self.songlist_tracking, artist, title): - continue - if is_song_on_server(self.server_songs, artist, title): - print(f"๐ŸŽต Song already on server: {artist} - {title}") - continue - matches.append((video, normalized_songlist[key])) - if len(matches) >= limit: - break + if fuzzy_match: + # Fuzzy match against all songlist keys + best_score = 0 + best_song = None + for song_key, song in normalized_songlist.items(): + score = similarity(key, song_key) + if score > best_score: + best_score = score + best_song = song + if best_score >= fuzzy_threshold and best_song: + if not is_songlist_song_downloaded(self.songlist_tracking, best_song['artist'], best_song['title']): + matches.append((video, best_song)) + print(f" โ†’ Fuzzy match: {artist} - {title} <-> {best_song['artist']} - {best_song['title']} (score: {best_score})") + if len(matches) >= limit: + break + else: + if key in normalized_songlist: + if not is_songlist_song_downloaded(self.songlist_tracking, artist, title): + matches.append((video, normalized_songlist[key])) + if len(matches) >= limit: + break if not matches: print("๐ŸŽต No new songlist matches found for this channel.") return True @@ -224,7 +236,6 @@ class KaraokeDownloader: output_path = self.downloads_dir / channel_name / f"{artist} - {title} (Karaoke Version).mp4" output_path.parent.mkdir(parents=True, exist_ok=True) print(f"โฌ‡๏ธ Downloading: {artist} - {title} -> {output_path}") - # Download by video ID video_url = f"https://www.youtube.com/watch?v={video['id']}" cmd = [ str(self.yt_dlp_path), @@ -240,23 +251,31 @@ class KaraokeDownloader: if not output_path.exists() or output_path.stat().st_size == 0: print(f"โŒ Download failed or file is empty: {output_path}") continue - # TEMP: Skipping MP4 validation for debugging - # if not self._is_valid_mp4(output_path): - # print(f"โŒ File is not a valid MP4: {output_path}") - # continue + if not self._is_valid_mp4(output_path): + print(f"โŒ File is not a valid MP4: {output_path}") + continue add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name) mark_songlist_song_downloaded(self.songlist_tracking, artist, title, channel_name, output_path) print(f"โœ… Downloaded and tracked: {artist} - {title}") print(f"๐ŸŽ‰ All post-processing complete for: {output_path}") return True - def build_download_plan(self, channel_urls, undownloaded): + def build_download_plan(self, channel_urls, undownloaded, fuzzy_match=False, fuzzy_threshold=85): """ For each song in undownloaded, scan all channels for a match. + Use fuzzy matching if enabled. Return (download_plan, unmatched_songs): - download_plan: list of dicts {artist, title, channel_name, channel_url, video_id, video_title} - unmatched_songs: list of songs not found in any channel """ + try: + from rapidfuzz import fuzz + def similarity(a, b): + return fuzz.ratio(a, b) + except ImportError: + import difflib + def similarity(a, b): + return int(difflib.SequenceMatcher(None, a, b).ratio() * 100) plan = [] unmatched = [] channel_match_counts = {} @@ -269,29 +288,55 @@ class KaraokeDownloader: force_refresh=False ) matches_this_channel = 0 + channel_fuzzy_matches = [] # For optional top-N reporting for song in undownloaded: artist, title = song['artist'], song['title'] found = False + song_key = f"{artist.lower()}_{normalize_title(title)}" for video in available_videos: v_artist, v_title = extract_artist_title(video['title']) - if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \ - (normalize_title(video['title']) == normalize_title(f"{artist} - {title}")): - # Only add if not already in plan (first channel wins) - if not any(p['artist'] == artist and p['title'] == title for p in plan): - plan.append({ - 'artist': artist, - 'title': title, - 'channel_name': channel_name, - 'channel_url': channel_url, - 'video_id': video['id'], - 'video_title': video['title'] - }) - matches_this_channel += 1 - found = True - break + video_key = f"{v_artist.lower()}_{normalize_title(v_title)}" + if fuzzy_match: + score = similarity(song_key, video_key) + if score >= fuzzy_threshold: + if not any(p['artist'] == artist and p['title'] == title for p in plan): + plan.append({ + 'artist': artist, + 'title': title, + 'channel_name': channel_name, + 'channel_url': channel_url, + 'video_id': video['id'], + 'video_title': video['title'], + 'match_score': score + }) + # channel_fuzzy_matches.append((artist, title, video['title'], score)) + matches_this_channel += 1 + found = True + break + else: + if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \ + (normalize_title(video['title']) == normalize_title(f"{artist} - {title}")): + if not any(p['artist'] == artist and p['title'] == title for p in plan): + plan.append({ + 'artist': artist, + 'title': title, + 'channel_name': channel_name, + 'channel_url': channel_url, + 'video_id': video['id'], + 'video_title': video['title'], + 'match_score': 100 + }) + matches_this_channel += 1 + found = True + break # Don't break here; keep looking for all matches in this channel channel_match_counts[channel_name] = matches_this_channel print(f" โ†’ Found {matches_this_channel} songlist matches in this channel.") + # Optionally, print top 3 fuzzy matches for review + # if fuzzy_match and channel_fuzzy_matches: + # top_matches = sorted(channel_fuzzy_matches, key=lambda x: -x[3])[:3] + # for a, t, vt, s in top_matches: + # print(f" Top match: {a} - {t} <-> {vt} (score: {s})") # Now find unmatched songs for song in undownloaded: if not any(p['artist'] == song['artist'] and p['title'] == song['title'] for p in plan): @@ -314,7 +359,7 @@ class KaraokeDownloader: base = base[:40] + "_" + hashlib.md5(base.encode()).hexdigest() return Path(f"data/{base}.json") - def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False): + def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=85): """ For each song in the songlist, try each channel in order and download from the first channel where it is found. Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted. @@ -327,18 +372,87 @@ class KaraokeDownloader: undownloaded = [s for s in songlist if not is_songlist_song_downloaded(self.songlist_tracking, s['artist'], s['title'])] print(f"๐ŸŽฏ {len(songlist)} total unique songs in songlist.") print(f"๐ŸŽฏ {len(undownloaded)} unique songlist songs to download.") - # Further filter out songs already on server not_on_server = [s for s in undownloaded if not is_song_on_server(self.server_songs, s['artist'], s['title'])] server_available = len(undownloaded) - len(not_on_server) if server_available > 0: print(f"๐ŸŽต {server_available} songs already available on server, skipping.") - undownloaded = not_on_server print(f"๐ŸŽฏ {len(undownloaded)} songs need to be downloaded.") if not undownloaded: print("๐ŸŽต All songlist songs already downloaded.") return True + # --- FAST MODE: Early exit and deduplication if limit is set --- + if limit is not None: + print("\nโšก Fast mode enabled: will stop as soon as limit is reached with successful downloads.") + try: + from rapidfuzz import fuzz + def similarity(a, b): + return fuzz.ratio(a, b) + except ImportError: + import difflib + def similarity(a, b): + return int(difflib.SequenceMatcher(None, a, b).ratio() * 100) + downloaded_count = 0 + unique_keys = set() + total_attempted = 0 + for channel_url in channel_urls: + channel_name, channel_id = get_channel_info(channel_url) + print(f"\n๐Ÿšฆ Starting channel: {channel_name} ({channel_url})") + available_videos = self.tracker.get_channel_video_list( + channel_url, + yt_dlp_path=str(self.yt_dlp_path), + force_refresh=False + ) + for song in undownloaded: + artist, title = song['artist'], song['title'] + key = f"{artist.lower()}_{normalize_title(title)}" + if key in unique_keys: + continue # Already downloaded or queued + found = False + for video in available_videos: + v_artist, v_title = extract_artist_title(video['title']) + video_key = f"{v_artist.lower()}_{normalize_title(v_title)}" + if fuzzy_match: + score = similarity(key, video_key) + if score >= fuzzy_threshold: + found = True + else: + if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \ + (normalize_title(video['title']) == normalize_title(f"{artist} - {title}")): + found = True + if found: + print(f"\nโฌ‡๏ธ Downloading {downloaded_count+1} of {limit}: {artist} - {title} (from {channel_name})") + # --- Download logic (reuse from below) --- + safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "") + safe_artist = artist.replace("'", "").replace('"', "") + invalid_chars = ['?', ':', '*', '"', '<', '>', '|', '/', '\\'] + for char in invalid_chars: + safe_title = safe_title.replace(char, "") + safe_artist = safe_artist.replace(char, "") + safe_title = safe_title.replace("...", "").replace("..", "").replace(".", "").strip() + safe_artist = safe_artist.strip() + filename = f"{safe_artist} - {safe_title}.mp4" + # Call the actual download function (simulate the same as in the plan loop) + success = self._download_video_and_track( + channel_name, channel_url, video['id'], video['title'], artist, title, filename + ) + total_attempted += 1 + if success: + downloaded_count += 1 + unique_keys.add(key) + print(f"โœ… Downloaded and tracked: {artist} - {title}") + else: + print(f"โŒ Download failed: {artist} - {title}") + if downloaded_count >= limit: + print(f"๐ŸŽ‰ Reached download limit ({limit}). Stopping early.") + return True + break # Don't try to match this song to other videos in this channel + print(f"๐ŸŽ‰ Downloaded {downloaded_count} unique songlist songs (limit was {limit}).") + if downloaded_count < limit: + print(f"โš ๏ธ Only {downloaded_count} songs were downloaded. Some may not have been found or downloads failed.") + return True + # --- ORIGINAL FULL PLAN MODE (no limit) --- # Removed per-song printout for cleaner output # print("๐Ÿ” Songs to search for:") # for song in undownloaded: @@ -362,7 +476,7 @@ class KaraokeDownloader: print(f"โš ๏ธ Could not load download plan cache: {e}") if not use_cache: print("\n๐Ÿ”Ž Pre-scanning channels for matches...") - download_plan, unmatched = self.build_download_plan(channel_urls, undownloaded) + download_plan, unmatched = self.build_download_plan(channel_urls, undownloaded, fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold) if download_plan: cache_data = { 'timestamp': datetime.now().isoformat(), @@ -623,6 +737,48 @@ class KaraokeDownloader: # If ffprobe is not available, skip the check return True + def _download_video_and_track(self, channel_name, channel_url, video_id, video_title, artist, title, filename): + """ + Helper to download a single video and track its status. + Returns True if successful, False otherwise. + """ + output_path = self.downloads_dir / channel_name / filename + output_path.parent.mkdir(parents=True, exist_ok=True) + print(f"โฌ‡๏ธ Downloading: {artist} - {title} -> {output_path}") + video_url = f"https://www.youtube.com/watch?v={video_id}" + dlp_cmd = [ + str(self.yt_dlp_path), + "--no-check-certificates", + "--ignore-errors", + "--no-warnings", + "-o", str(output_path), + "-f", self.config["download_settings"]["format"], + video_url + ] + try: + result = subprocess.run(dlp_cmd, capture_output=True, text=True, check=True) + print(f"โœ… yt-dlp completed successfully") + print(f"๐Ÿ“„ yt-dlp stdout: {result.stdout}") + except subprocess.CalledProcessError as e: + print(f"โŒ yt-dlp failed with exit code {e.returncode}") + print(f"โŒ yt-dlp stderr: {e.stderr}") + return False + if not output_path.exists(): + print(f"โŒ Download failed: file does not exist: {output_path}") + return False + if output_path.stat().st_size == 0: + print(f"โŒ Download failed: file is empty (0 bytes): {output_path}") + return False + # TEMP: Skipping MP4 validation for debugging + # if not self._is_valid_mp4(output_path): + # print(f"โŒ File is not a valid MP4: {output_path}") + # return False + add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name) + mark_songlist_song_downloaded(self.songlist_tracking, artist, title, channel_name, output_path) + print(f"โœ… Downloaded and tracked: {artist} - {title}") + print(f"๐ŸŽ‰ All post-processing complete for: {output_path}") + return True + def reset_songlist_all(): """Delete all files tracked in songlist_tracking.json, clear songlist_tracking.json, and remove songlist songs from karaoke_tracking.json.""" import json