Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>
This commit is contained in:
parent
28364daacd
commit
f462da69cc
@ -39,7 +39,7 @@ Examples:
|
|||||||
parser.add_argument('--force-download-plan', action='store_true', help='Force refresh the download plan cache (re-scan all channels for matches)')
|
parser.add_argument('--force-download-plan', action='store_true', help='Force refresh the download plan cache (re-scan all channels for matches)')
|
||||||
parser.add_argument('--latest-per-channel', action='store_true', help='Download the latest N videos from each channel (use with --limit)')
|
parser.add_argument('--latest-per-channel', action='store_true', help='Download the latest N videos from each channel (use with --limit)')
|
||||||
parser.add_argument('--fuzzy-match', action='store_true', help='Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)')
|
parser.add_argument('--fuzzy-match', action='store_true', help='Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)')
|
||||||
parser.add_argument('--fuzzy-threshold', type=int, default=85, help='Fuzzy match threshold (0-100, default 85)')
|
parser.add_argument('--fuzzy-threshold', type=int, default=90, help='Fuzzy match threshold (0-100, default 90)')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
yt_dlp_path = Path("downloader/yt-dlp.exe")
|
yt_dlp_path = Path("downloader/yt-dlp.exe")
|
||||||
@ -169,7 +169,7 @@ Examples:
|
|||||||
limit = args.limit if args.limit else None
|
limit = args.limit if args.limit else None
|
||||||
force_refresh_download_plan = args.force_download_plan if hasattr(args, 'force_download_plan') else False
|
force_refresh_download_plan = args.force_download_plan if hasattr(args, 'force_download_plan') else False
|
||||||
fuzzy_match = args.fuzzy_match if hasattr(args, 'fuzzy_match') else False
|
fuzzy_match = args.fuzzy_match if hasattr(args, 'fuzzy_match') else False
|
||||||
fuzzy_threshold = args.fuzzy_threshold if hasattr(args, 'fuzzy_threshold') else 85
|
fuzzy_threshold = args.fuzzy_threshold if hasattr(args, 'fuzzy_threshold') else 90
|
||||||
success = downloader.download_songlist_across_channels(channel_urls, limit=limit, force_refresh_download_plan=force_refresh_download_plan, fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold)
|
success = downloader.download_songlist_across_channels(channel_urls, limit=limit, force_refresh_download_plan=force_refresh_download_plan, fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold)
|
||||||
elif args.latest_per_channel:
|
elif args.latest_per_channel:
|
||||||
# Use provided file or default to data/channels.txt
|
# Use provided file or default to data/channels.txt
|
||||||
|
|||||||
@ -163,7 +163,7 @@ class KaraokeDownloader:
|
|||||||
all_success = False
|
all_success = False
|
||||||
return all_success
|
return all_success
|
||||||
|
|
||||||
def download_channel_videos(self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=85):
|
def download_channel_videos(self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=90):
|
||||||
"""Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching."""
|
"""Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching."""
|
||||||
channel_name, channel_id = get_channel_info(url)
|
channel_name, channel_id = get_channel_info(url)
|
||||||
print(f"\n🎬 Downloading from channel: {channel_name} ({url})")
|
print(f"\n🎬 Downloading from channel: {channel_name} ({url})")
|
||||||
@ -260,7 +260,7 @@ class KaraokeDownloader:
|
|||||||
print(f"🎉 All post-processing complete for: {output_path}")
|
print(f"🎉 All post-processing complete for: {output_path}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def build_download_plan(self, channel_urls, undownloaded, fuzzy_match=False, fuzzy_threshold=85):
|
def build_download_plan(self, channel_urls, undownloaded, fuzzy_match=False, fuzzy_threshold=90):
|
||||||
"""
|
"""
|
||||||
For each song in undownloaded, scan all channels for a match.
|
For each song in undownloaded, scan all channels for a match.
|
||||||
Use fuzzy matching if enabled.
|
Use fuzzy matching if enabled.
|
||||||
@ -309,7 +309,7 @@ class KaraokeDownloader:
|
|||||||
'video_title': video['title'],
|
'video_title': video['title'],
|
||||||
'match_score': score
|
'match_score': score
|
||||||
})
|
})
|
||||||
# channel_fuzzy_matches.append((artist, title, video['title'], score))
|
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (score: {score})")
|
||||||
matches_this_channel += 1
|
matches_this_channel += 1
|
||||||
found = True
|
found = True
|
||||||
break
|
break
|
||||||
@ -326,6 +326,7 @@ class KaraokeDownloader:
|
|||||||
'video_title': video['title'],
|
'video_title': video['title'],
|
||||||
'match_score': 100
|
'match_score': 100
|
||||||
})
|
})
|
||||||
|
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (exact)")
|
||||||
matches_this_channel += 1
|
matches_this_channel += 1
|
||||||
found = True
|
found = True
|
||||||
break
|
break
|
||||||
@ -359,7 +360,7 @@ class KaraokeDownloader:
|
|||||||
base = base[:40] + "_" + hashlib.md5(base.encode()).hexdigest()
|
base = base[:40] + "_" + hashlib.md5(base.encode()).hexdigest()
|
||||||
return Path(f"data/{base}.json")
|
return Path(f"data/{base}.json")
|
||||||
|
|
||||||
def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=85):
|
def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=90):
|
||||||
"""
|
"""
|
||||||
For each song in the songlist, try each channel in order and download from the first channel where it is found.
|
For each song in the songlist, try each channel in order and download from the first channel where it is found.
|
||||||
Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted.
|
Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted.
|
||||||
@ -416,13 +417,17 @@ class KaraokeDownloader:
|
|||||||
if fuzzy_match:
|
if fuzzy_match:
|
||||||
score = similarity(key, video_key)
|
score = similarity(key, video_key)
|
||||||
if score >= fuzzy_threshold:
|
if score >= fuzzy_threshold:
|
||||||
|
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (score: {score})")
|
||||||
found = True
|
found = True
|
||||||
else:
|
else:
|
||||||
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
|
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
|
||||||
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
|
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
|
||||||
|
print(f" → Match: \"{artist} - {title}\" <-> \"{video['title']}\" (exact)")
|
||||||
found = True
|
found = True
|
||||||
if found:
|
if found:
|
||||||
print(f"\n⬇️ Downloading {downloaded_count+1} of {limit}: {artist} - {title} (from {channel_name})")
|
print(f"\n⬇️ Downloading {downloaded_count+1} of {limit}:")
|
||||||
|
print(f" 📋 Songlist: {artist} - {title}")
|
||||||
|
print(f" 🎬 Video: {video['title']} ({channel_name})")
|
||||||
# --- Download logic (reuse from below) ---
|
# --- Download logic (reuse from below) ---
|
||||||
safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "")
|
safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "")
|
||||||
safe_artist = artist.replace("'", "").replace('"', "")
|
safe_artist = artist.replace("'", "").replace('"', "")
|
||||||
@ -510,7 +515,9 @@ class KaraokeDownloader:
|
|||||||
channel_url = item['channel_url']
|
channel_url = item['channel_url']
|
||||||
video_id = item['video_id']
|
video_id = item['video_id']
|
||||||
video_title = item['video_title']
|
video_title = item['video_title']
|
||||||
print(f"\n⬇️ Downloading {idx+1} of {total_to_download}: {artist} - {title} (from {channel_name})")
|
print(f"\n⬇️ Downloading {idx+1} of {total_to_download}:")
|
||||||
|
print(f" 📋 Songlist: {artist} - {title}")
|
||||||
|
print(f" 🎬 Video: {video_title} ({channel_name})")
|
||||||
# --- Existing download logic here, using channel_name, video_id, etc. ---
|
# --- Existing download logic here, using channel_name, video_id, etc. ---
|
||||||
# (Copy the download logic from the previous loop, using these variables)
|
# (Copy the download logic from the previous loop, using these variables)
|
||||||
# Create a shorter, safer filename - do this ONCE and use consistently
|
# Create a shorter, safer filename - do this ONCE and use consistently
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user