Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

This commit is contained in:
mbrucedogs 2025-07-24 20:47:46 -05:00
parent 28364daacd
commit f462da69cc
2 changed files with 15 additions and 8 deletions

View File

@ -39,7 +39,7 @@ Examples:
parser.add_argument('--force-download-plan', action='store_true', help='Force refresh the download plan cache (re-scan all channels for matches)') parser.add_argument('--force-download-plan', action='store_true', help='Force refresh the download plan cache (re-scan all channels for matches)')
parser.add_argument('--latest-per-channel', action='store_true', help='Download the latest N videos from each channel (use with --limit)') parser.add_argument('--latest-per-channel', action='store_true', help='Download the latest N videos from each channel (use with --limit)')
parser.add_argument('--fuzzy-match', action='store_true', help='Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)') parser.add_argument('--fuzzy-match', action='store_true', help='Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)')
parser.add_argument('--fuzzy-threshold', type=int, default=85, help='Fuzzy match threshold (0-100, default 85)') parser.add_argument('--fuzzy-threshold', type=int, default=90, help='Fuzzy match threshold (0-100, default 90)')
args = parser.parse_args() args = parser.parse_args()
yt_dlp_path = Path("downloader/yt-dlp.exe") yt_dlp_path = Path("downloader/yt-dlp.exe")
@ -169,7 +169,7 @@ Examples:
limit = args.limit if args.limit else None limit = args.limit if args.limit else None
force_refresh_download_plan = args.force_download_plan if hasattr(args, 'force_download_plan') else False force_refresh_download_plan = args.force_download_plan if hasattr(args, 'force_download_plan') else False
fuzzy_match = args.fuzzy_match if hasattr(args, 'fuzzy_match') else False fuzzy_match = args.fuzzy_match if hasattr(args, 'fuzzy_match') else False
fuzzy_threshold = args.fuzzy_threshold if hasattr(args, 'fuzzy_threshold') else 85 fuzzy_threshold = args.fuzzy_threshold if hasattr(args, 'fuzzy_threshold') else 90
success = downloader.download_songlist_across_channels(channel_urls, limit=limit, force_refresh_download_plan=force_refresh_download_plan, fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold) success = downloader.download_songlist_across_channels(channel_urls, limit=limit, force_refresh_download_plan=force_refresh_download_plan, fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold)
elif args.latest_per_channel: elif args.latest_per_channel:
# Use provided file or default to data/channels.txt # Use provided file or default to data/channels.txt

View File

@ -163,7 +163,7 @@ class KaraokeDownloader:
all_success = False all_success = False
return all_success return all_success
def download_channel_videos(self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=85): def download_channel_videos(self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=90):
"""Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching.""" """Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching."""
channel_name, channel_id = get_channel_info(url) channel_name, channel_id = get_channel_info(url)
print(f"\n🎬 Downloading from channel: {channel_name} ({url})") print(f"\n🎬 Downloading from channel: {channel_name} ({url})")
@ -260,7 +260,7 @@ class KaraokeDownloader:
print(f"🎉 All post-processing complete for: {output_path}") print(f"🎉 All post-processing complete for: {output_path}")
return True return True
def build_download_plan(self, channel_urls, undownloaded, fuzzy_match=False, fuzzy_threshold=85): def build_download_plan(self, channel_urls, undownloaded, fuzzy_match=False, fuzzy_threshold=90):
""" """
For each song in undownloaded, scan all channels for a match. For each song in undownloaded, scan all channels for a match.
Use fuzzy matching if enabled. Use fuzzy matching if enabled.
@ -309,7 +309,7 @@ class KaraokeDownloader:
'video_title': video['title'], 'video_title': video['title'],
'match_score': score 'match_score': score
}) })
# channel_fuzzy_matches.append((artist, title, video['title'], score)) print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (score: {score})")
matches_this_channel += 1 matches_this_channel += 1
found = True found = True
break break
@ -326,6 +326,7 @@ class KaraokeDownloader:
'video_title': video['title'], 'video_title': video['title'],
'match_score': 100 'match_score': 100
}) })
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (exact)")
matches_this_channel += 1 matches_this_channel += 1
found = True found = True
break break
@ -359,7 +360,7 @@ class KaraokeDownloader:
base = base[:40] + "_" + hashlib.md5(base.encode()).hexdigest() base = base[:40] + "_" + hashlib.md5(base.encode()).hexdigest()
return Path(f"data/{base}.json") return Path(f"data/{base}.json")
def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=85): def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=90):
""" """
For each song in the songlist, try each channel in order and download from the first channel where it is found. For each song in the songlist, try each channel in order and download from the first channel where it is found.
Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted. Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted.
@ -416,13 +417,17 @@ class KaraokeDownloader:
if fuzzy_match: if fuzzy_match:
score = similarity(key, video_key) score = similarity(key, video_key)
if score >= fuzzy_threshold: if score >= fuzzy_threshold:
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (score: {score})")
found = True found = True
else: else:
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \ if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")): (normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (exact)")
found = True found = True
if found: if found:
print(f"\n⬇️ Downloading {downloaded_count+1} of {limit}: {artist} - {title} (from {channel_name})") print(f"\n⬇️ Downloading {downloaded_count+1} of {limit}:")
print(f" 📋 Songlist: {artist} - {title}")
print(f" 🎬 Video: {video['title']} ({channel_name})")
# --- Download logic (reuse from below) --- # --- Download logic (reuse from below) ---
safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "") safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "")
safe_artist = artist.replace("'", "").replace('"', "") safe_artist = artist.replace("'", "").replace('"', "")
@ -510,7 +515,9 @@ class KaraokeDownloader:
channel_url = item['channel_url'] channel_url = item['channel_url']
video_id = item['video_id'] video_id = item['video_id']
video_title = item['video_title'] video_title = item['video_title']
print(f"\n⬇️ Downloading {idx+1} of {total_to_download}: {artist} - {title} (from {channel_name})") print(f"\n⬇️ Downloading {idx+1} of {total_to_download}:")
print(f" 📋 Songlist: {artist} - {title}")
print(f" 🎬 Video: {video_title} ({channel_name})")
# --- Existing download logic here, using channel_name, video_id, etc. --- # --- Existing download logic here, using channel_name, video_id, etc. ---
# (Copy the download logic from the previous loop, using these variables) # (Copy the download logic from the previous loop, using these variables)
# Create a shorter, safer filename - do this ONCE and use consistently # Create a shorter, safer filename - do this ONCE and use consistently