""" Fuzzy matching utilities for songlist-to-video matching. Handles similarity calculations and match validation. """ def get_similarity_function(): """ Get the best available similarity function. Returns rapidfuzz if available, otherwise falls back to difflib. """ try: from rapidfuzz import fuzz def similarity(a, b): return fuzz.ratio(a, b) return similarity except ImportError: import difflib def similarity(a, b): return int(difflib.SequenceMatcher(None, a, b).ratio() * 100) return similarity def normalize_title(title): """Normalize a title for comparison.""" normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip() return " ".join(normalized.split()).lower() def extract_artist_title(video_title): """ Extract artist and title from video title. This function handles multiple common video title formats found on YouTube karaoke channels: 1. "Artist - Title" format: "38 Special - Hold On Loosely" 2. "Title Karaoke | Artist Karaoke Version" format: "Hold On Loosely Karaoke | 38 Special Karaoke Version" 3. "Title Artist KARAOKE" format: "Hold On Loosely 38 Special KARAOKE" Args: video_title (str): The YouTube video title to parse Returns: tuple: (artist, title) where artist and title are strings. If parsing fails, artist will be empty string and title will be the full video title. Examples: >>> extract_artist_title("38 Special - Hold On Loosely") ("38 Special", "Hold On Loosely") >>> extract_artist_title("Hold On Loosely Karaoke | 38 Special Karaoke Version") ("38 Special", "Hold On Loosely") >>> extract_artist_title("Unknown Format Video Title") ("", "Unknown Format Video Title") """ # Handle "Artist - Title" format if " - " in video_title: parts = video_title.split(" - ", 1) return parts[0].strip(), parts[1].strip() # Handle "Title Karaoke | Artist Karaoke Version" format if " | " in video_title and "karaoke" in video_title.lower(): parts = video_title.split(" | ", 1) title_part = parts[0].strip() artist_part = parts[1].strip() # Clean up the parts title = title_part.replace("Karaoke", "").strip() artist = artist_part.replace("Karaoke Version", "").strip() return artist, title # Handle "Title Artist KARAOKE" format if "karaoke" in video_title.lower(): # Try to find the artist by looking for common patterns title_lower = video_title.lower() # Look for patterns like "Title Artist KARAOKE" # This is a simplified approach - we'll need to improve this words = video_title.split() if len(words) >= 3: # Assume the last word before "KARAOKE" is part of the artist for i, word in enumerate(words): if "karaoke" in word.lower(): if i >= 2: # Everything before the last word before KARAOKE is title # Everything after is artist title = " ".join(words[:i-1]) artist = " ".join(words[i-1:]) return artist, title # If we can't parse it, return empty artist and full title return "", video_title # Default: return empty artist and full title return "", video_title def create_song_key(artist, title): """Create a normalized key for song comparison.""" return f"{artist.lower()}_{normalize_title(title)}" def create_video_key(video_title): """Create a normalized key for video comparison.""" artist, title = extract_artist_title(video_title) return f"{artist.lower()}_{normalize_title(title)}" def is_fuzzy_match(songlist_artist, songlist_title, video_title, threshold=90): """ Check if a songlist entry matches a video title using fuzzy matching. Args: songlist_artist: Artist from songlist songlist_title: Title from songlist video_title: YouTube video title threshold: Minimum similarity score (0-100) Returns: tuple: (is_match, score) where is_match is boolean and score is the similarity score """ similarity = get_similarity_function() song_key = create_song_key(songlist_artist, songlist_title) video_key = create_video_key(video_title) score = similarity(song_key, video_key) is_match = score >= threshold return is_match, score def is_exact_match(songlist_artist, songlist_title, video_title): """ Check if a songlist entry exactly matches a video title. Args: songlist_artist: Artist from songlist songlist_title: Title from songlist video_title: YouTube video title Returns: bool: True if exact match, False otherwise """ v_artist, v_title = extract_artist_title(video_title) # Check artist and title separately artist_match = normalize_title(v_artist) == normalize_title(songlist_artist) title_match = normalize_title(v_title) == normalize_title(songlist_title) # Also check if video title matches "artist - title" format full_title_match = normalize_title(video_title) == normalize_title( f"{songlist_artist} - {songlist_title}" ) return (artist_match and title_match) or full_title_match