Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

This commit is contained in:
mbrucedogs 2025-07-24 20:47:46 -05:00
parent 28364daacd
commit f462da69cc
2 changed files with 15 additions and 8 deletions

View File

@ -39,7 +39,7 @@ Examples:
parser.add_argument('--force-download-plan', action='store_true', help='Force refresh the download plan cache (re-scan all channels for matches)')
parser.add_argument('--latest-per-channel', action='store_true', help='Download the latest N videos from each channel (use with --limit)')
parser.add_argument('--fuzzy-match', action='store_true', help='Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)')
parser.add_argument('--fuzzy-threshold', type=int, default=85, help='Fuzzy match threshold (0-100, default 85)')
parser.add_argument('--fuzzy-threshold', type=int, default=90, help='Fuzzy match threshold (0-100, default 90)')
args = parser.parse_args()
yt_dlp_path = Path("downloader/yt-dlp.exe")
@ -169,7 +169,7 @@ Examples:
limit = args.limit if args.limit else None
force_refresh_download_plan = args.force_download_plan if hasattr(args, 'force_download_plan') else False
fuzzy_match = args.fuzzy_match if hasattr(args, 'fuzzy_match') else False
fuzzy_threshold = args.fuzzy_threshold if hasattr(args, 'fuzzy_threshold') else 85
fuzzy_threshold = args.fuzzy_threshold if hasattr(args, 'fuzzy_threshold') else 90
success = downloader.download_songlist_across_channels(channel_urls, limit=limit, force_refresh_download_plan=force_refresh_download_plan, fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold)
elif args.latest_per_channel:
# Use provided file or default to data/channels.txt

View File

@ -163,7 +163,7 @@ class KaraokeDownloader:
all_success = False
return all_success
def download_channel_videos(self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=85):
def download_channel_videos(self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=90):
"""Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching."""
channel_name, channel_id = get_channel_info(url)
print(f"\n🎬 Downloading from channel: {channel_name} ({url})")
@ -260,7 +260,7 @@ class KaraokeDownloader:
print(f"🎉 All post-processing complete for: {output_path}")
return True
def build_download_plan(self, channel_urls, undownloaded, fuzzy_match=False, fuzzy_threshold=85):
def build_download_plan(self, channel_urls, undownloaded, fuzzy_match=False, fuzzy_threshold=90):
"""
For each song in undownloaded, scan all channels for a match.
Use fuzzy matching if enabled.
@ -309,7 +309,7 @@ class KaraokeDownloader:
'video_title': video['title'],
'match_score': score
})
# channel_fuzzy_matches.append((artist, title, video['title'], score))
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (score: {score})")
matches_this_channel += 1
found = True
break
@ -326,6 +326,7 @@ class KaraokeDownloader:
'video_title': video['title'],
'match_score': 100
})
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (exact)")
matches_this_channel += 1
found = True
break
@ -359,7 +360,7 @@ class KaraokeDownloader:
base = base[:40] + "_" + hashlib.md5(base.encode()).hexdigest()
return Path(f"data/{base}.json")
def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=85):
def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=90):
"""
For each song in the songlist, try each channel in order and download from the first channel where it is found.
Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted.
@ -416,13 +417,17 @@ class KaraokeDownloader:
if fuzzy_match:
score = similarity(key, video_key)
if score >= fuzzy_threshold:
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (score: {score})")
found = True
else:
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (exact)")
found = True
if found:
print(f"\n⬇️ Downloading {downloaded_count+1} of {limit}: {artist} - {title} (from {channel_name})")
print(f"\n⬇️ Downloading {downloaded_count+1} of {limit}:")
print(f" 📋 Songlist: {artist} - {title}")
print(f" 🎬 Video: {video['title']} ({channel_name})")
# --- Download logic (reuse from below) ---
safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "")
safe_artist = artist.replace("'", "").replace('"', "")
@ -510,7 +515,9 @@ class KaraokeDownloader:
channel_url = item['channel_url']
video_id = item['video_id']
video_title = item['video_title']
print(f"\n⬇️ Downloading {idx+1} of {total_to_download}: {artist} - {title} (from {channel_name})")
print(f"\n⬇️ Downloading {idx+1} of {total_to_download}:")
print(f" 📋 Songlist: {artist} - {title}")
print(f" 🎬 Video: {video_title} ({channel_name})")
# --- Existing download logic here, using channel_name, video_id, etc. ---
# (Copy the download logic from the previous loop, using these variables)
# Create a shorter, safer filename - do this ONCE and use consistently