""" Fuzzy matching utilities for songlist-to-video matching. Handles similarity calculations and match validation. """ def get_similarity_function(): """ Get the best available similarity function. Returns rapidfuzz if available, otherwise falls back to difflib. """ try: from rapidfuzz import fuzz def similarity(a, b): return fuzz.ratio(a, b) return similarity except ImportError: import difflib def similarity(a, b): return int(difflib.SequenceMatcher(None, a, b).ratio() * 100) return similarity def normalize_title(title): """Normalize a title for comparison.""" normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip() return " ".join(normalized.split()).lower() def extract_artist_title(video_title): """Extract artist and title from video title.""" if " - " in video_title: parts = video_title.split(" - ", 1) return parts[0].strip(), parts[1].strip() return "", video_title def create_song_key(artist, title): """Create a normalized key for song comparison.""" return f"{artist.lower()}_{normalize_title(title)}" def create_video_key(video_title): """Create a normalized key for video comparison.""" artist, title = extract_artist_title(video_title) return f"{artist.lower()}_{normalize_title(title)}" def is_fuzzy_match(songlist_artist, songlist_title, video_title, threshold=90): """ Check if a songlist entry matches a video title using fuzzy matching. Args: songlist_artist: Artist from songlist songlist_title: Title from songlist video_title: YouTube video title threshold: Minimum similarity score (0-100) Returns: tuple: (is_match, score) where is_match is boolean and score is the similarity score """ similarity = get_similarity_function() song_key = create_song_key(songlist_artist, songlist_title) video_key = create_video_key(video_title) score = similarity(song_key, video_key) is_match = score >= threshold return is_match, score def is_exact_match(songlist_artist, songlist_title, video_title): """ Check if a songlist entry exactly matches a video title. Args: songlist_artist: Artist from songlist songlist_title: Title from songlist video_title: YouTube video title Returns: bool: True if exact match, False otherwise """ v_artist, v_title = extract_artist_title(video_title) # Check artist and title separately artist_match = normalize_title(v_artist) == normalize_title(songlist_artist) title_match = normalize_title(v_title) == normalize_title(songlist_title) # Also check if video title matches "artist - title" format full_title_match = normalize_title(video_title) == normalize_title( f"{songlist_artist} - {songlist_title}" ) return (artist_match and title_match) or full_title_match