From f462da69ccebdda1ca788fac3b0583638b73156f Mon Sep 17 00:00:00 2001 From: mbrucedogs Date: Thu, 24 Jul 2025 20:47:46 -0500 Subject: [PATCH] Signed-off-by: mbrucedogs --- karaoke_downloader/cli.py | 4 ++-- karaoke_downloader/downloader.py | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/karaoke_downloader/cli.py b/karaoke_downloader/cli.py index 09bdeda..eb6e410 100644 --- a/karaoke_downloader/cli.py +++ b/karaoke_downloader/cli.py @@ -39,7 +39,7 @@ Examples: parser.add_argument('--force-download-plan', action='store_true', help='Force refresh the download plan cache (re-scan all channels for matches)') parser.add_argument('--latest-per-channel', action='store_true', help='Download the latest N videos from each channel (use with --limit)') parser.add_argument('--fuzzy-match', action='store_true', help='Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)') - parser.add_argument('--fuzzy-threshold', type=int, default=85, help='Fuzzy match threshold (0-100, default 85)') + parser.add_argument('--fuzzy-threshold', type=int, default=90, help='Fuzzy match threshold (0-100, default 90)') args = parser.parse_args() yt_dlp_path = Path("downloader/yt-dlp.exe") @@ -169,7 +169,7 @@ Examples: limit = args.limit if args.limit else None force_refresh_download_plan = args.force_download_plan if hasattr(args, 'force_download_plan') else False fuzzy_match = args.fuzzy_match if hasattr(args, 'fuzzy_match') else False - fuzzy_threshold = args.fuzzy_threshold if hasattr(args, 'fuzzy_threshold') else 85 + fuzzy_threshold = args.fuzzy_threshold if hasattr(args, 'fuzzy_threshold') else 90 success = downloader.download_songlist_across_channels(channel_urls, limit=limit, force_refresh_download_plan=force_refresh_download_plan, fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold) elif args.latest_per_channel: # Use provided file or default to data/channels.txt diff --git a/karaoke_downloader/downloader.py b/karaoke_downloader/downloader.py index afcdc80..46e3753 100644 --- a/karaoke_downloader/downloader.py +++ b/karaoke_downloader/downloader.py @@ -163,7 +163,7 @@ class KaraokeDownloader: all_success = False return all_success - def download_channel_videos(self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=85): + def download_channel_videos(self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=90): """Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching.""" channel_name, channel_id = get_channel_info(url) print(f"\nšŸŽ¬ Downloading from channel: {channel_name} ({url})") @@ -260,7 +260,7 @@ class KaraokeDownloader: print(f"šŸŽ‰ All post-processing complete for: {output_path}") return True - def build_download_plan(self, channel_urls, undownloaded, fuzzy_match=False, fuzzy_threshold=85): + def build_download_plan(self, channel_urls, undownloaded, fuzzy_match=False, fuzzy_threshold=90): """ For each song in undownloaded, scan all channels for a match. Use fuzzy matching if enabled. @@ -309,7 +309,7 @@ class KaraokeDownloader: 'video_title': video['title'], 'match_score': score }) - # channel_fuzzy_matches.append((artist, title, video['title'], score)) + print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (score: {score})") matches_this_channel += 1 found = True break @@ -326,6 +326,7 @@ class KaraokeDownloader: 'video_title': video['title'], 'match_score': 100 }) + print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (exact)") matches_this_channel += 1 found = True break @@ -359,7 +360,7 @@ class KaraokeDownloader: base = base[:40] + "_" + hashlib.md5(base.encode()).hexdigest() return Path(f"data/{base}.json") - def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=85): + def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=90): """ For each song in the songlist, try each channel in order and download from the first channel where it is found. Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted. @@ -416,13 +417,17 @@ class KaraokeDownloader: if fuzzy_match: score = similarity(key, video_key) if score >= fuzzy_threshold: + print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (score: {score})") found = True else: if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \ (normalize_title(video['title']) == normalize_title(f"{artist} - {title}")): + print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (exact)") found = True if found: - print(f"\nā¬‡ļø Downloading {downloaded_count+1} of {limit}: {artist} - {title} (from {channel_name})") + print(f"\nā¬‡ļø Downloading {downloaded_count+1} of {limit}:") + print(f" šŸ“‹ Songlist: {artist} - {title}") + print(f" šŸŽ¬ Video: {video['title']} ({channel_name})") # --- Download logic (reuse from below) --- safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "") safe_artist = artist.replace("'", "").replace('"', "") @@ -510,7 +515,9 @@ class KaraokeDownloader: channel_url = item['channel_url'] video_id = item['video_id'] video_title = item['video_title'] - print(f"\nā¬‡ļø Downloading {idx+1} of {total_to_download}: {artist} - {title} (from {channel_name})") + print(f"\nā¬‡ļø Downloading {idx+1} of {total_to_download}:") + print(f" šŸ“‹ Songlist: {artist} - {title}") + print(f" šŸŽ¬ Video: {video_title} ({channel_name})") # --- Existing download logic here, using channel_name, video_id, etc. --- # (Copy the download logic from the previous loop, using these variables) # Create a shorter, safer filename - do this ONCE and use consistently