diff --git a/karaoke_downloader/download_planner.py b/karaoke_downloader/download_planner.py index e39c953..28c5efa 100644 --- a/karaoke_downloader/download_planner.py +++ b/karaoke_downloader/download_planner.py @@ -20,12 +20,12 @@ from karaoke_downloader.cache_manager import ( from karaoke_downloader.fuzzy_matcher import ( create_song_key, create_video_key, + extract_artist_title, get_similarity_function, is_exact_match, is_fuzzy_match, normalize_title, ) -from karaoke_downloader.id3_utils import extract_artist_title from karaoke_downloader.youtube_utils import get_channel_info # Constants diff --git a/karaoke_downloader/downloader.py b/karaoke_downloader/downloader.py index f732ace..e355bf2 100644 --- a/karaoke_downloader/downloader.py +++ b/karaoke_downloader/downloader.py @@ -472,6 +472,13 @@ class KaraokeDownloader: ) undownloaded = not_on_server + + # Apply limit to undownloaded list before scanning + if limit is not None: + original_count = len(undownloaded) + undownloaded = undownloaded[:limit] + print(f"\n🎯 Limited to first {limit} songs (was {original_count} total)") + print(f"\n🎯 {len(undownloaded)} songs need to be downloaded.") if not undownloaded: print("🎵 All songlist songs already downloaded.") diff --git a/karaoke_downloader/fuzzy_matcher.py b/karaoke_downloader/fuzzy_matcher.py index 7beb140..ef02914 100644 --- a/karaoke_downloader/fuzzy_matcher.py +++ b/karaoke_downloader/fuzzy_matcher.py @@ -33,9 +33,46 @@ def normalize_title(title): def extract_artist_title(video_title): """Extract artist and title from video title.""" + # Handle "Title - Artist" format if " - " in video_title: parts = video_title.split(" - ", 1) return parts[0].strip(), parts[1].strip() + + # Handle "Title Karaoke | Artist Karaoke Version" format + if " | " in video_title and "karaoke" in video_title.lower(): + parts = video_title.split(" | ", 1) + title_part = parts[0].strip() + artist_part = parts[1].strip() + + # Clean up the parts + title = title_part.replace("Karaoke", "").strip() + artist = artist_part.replace("Karaoke Version", "").strip() + + return artist, title + + # Handle "Title Artist KARAOKE" format + if "karaoke" in video_title.lower(): + # Try to find the artist by looking for common patterns + title_lower = video_title.lower() + + # Look for patterns like "Title Artist KARAOKE" + # This is a simplified approach - we'll need to improve this + words = video_title.split() + if len(words) >= 3: + # Assume the last word before "KARAOKE" is part of the artist + for i, word in enumerate(words): + if "karaoke" in word.lower(): + if i >= 2: + # Everything before the last word before KARAOKE is title + # Everything after is artist + title = " ".join(words[:i-1]) + artist = " ".join(words[i-1:]) + return artist, title + + # If we can't parse it, return empty artist and full title + return "", video_title + + # Default: return empty artist and full title return "", video_title diff --git a/karaoke_downloader/id3_utils.py b/karaoke_downloader/id3_utils.py index 6097ddf..f6cc5a2 100644 --- a/karaoke_downloader/id3_utils.py +++ b/karaoke_downloader/id3_utils.py @@ -31,17 +31,7 @@ def clean_channel_name(channel_name: str) -> str: return "Unknown" -def extract_artist_title(video_title): - title = ( - video_title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip() - ) - if " - " in title: - parts = title.split(" - ", 1) - if len(parts) == 2: - artist = parts[0].strip() - song_title = parts[1].strip() - return artist, song_title - return "Unknown Artist", title +from karaoke_downloader.fuzzy_matcher import extract_artist_title def add_id3_tags(file_path, video_title, channel_name):