From 3d8b0165af7a7049302d145ca254993116e9af65 Mon Sep 17 00:00:00 2001 From: Matt Bruce Date: Fri, 1 Aug 2025 09:14:15 -0500 Subject: [PATCH] Signed-off-by: Matt Bruce --- data/songs.json | 14 +++++++------- src/api/database.py | 18 ++++++++++++++---- src/cli/main.py | 39 +++++++++++++++++++++++++++++++-------- 3 files changed, 52 insertions(+), 19 deletions(-) diff --git a/data/songs.json b/data/songs.json index efdbb96..50e61e0 100755 --- a/data/songs.json +++ b/data/songs.json @@ -45,7 +45,7 @@ "favorite": false, "guid": "3ff82151-be07-70e0-7e05-145dc0abec4a", "path": "z://MP4\\Big Sean and E40 - I Don t F--k With You.mp4", - "title": "I Don t F--k With You" + "title": "I Don't Fuck With You" }, { "artist": "Blake Shelton ft. Gwen Stefani", @@ -69,7 +69,7 @@ "favorite": false, "guid": "946a1077-ab9e-300c-3a72-b1e141e9706f", "path": "z://MP4\\Bruno Mars ft. Cardi B - Finesse Remix (Karaoke Version).mp4", - "title": "Finesse Remix" + "title": "Finesse" }, { "artist": "Cardi B ft. Bad Bunny, J Balvin", @@ -176,7 +176,7 @@ "title": "Standing Outside the Fire" }, { - "artist": "Garth-Brooks", + "artist": "Garth Brooks", "disabled": false, "favorite": false, "guid": "85d9afa3-4463-457c-5ed2-f71602529edb", @@ -437,7 +437,7 @@ "favorite": false, "guid": "c9d7ed4f-2efd-ea4b-91ab-543968b35ad6", "path": "z://MP4\\Snoop Dog - Gin & Juice Instrumental with Lyrics.mp4", - "title": "Gin & Juice Instrumental with Lyrics" + "title": "Gin & Juice" }, { "artist": "Taylor Swift", @@ -445,7 +445,7 @@ "favorite": false, "guid": "542672e8-3f01-fdc3-005f-dbc718ec4278", "path": "z://MP4\\Taylor Swift - Delicade (Karaoke Version).mp4", - "title": "Delicade" + "title": "Delicate" }, { "artist": "Taylor Swift ft. Bon Iver", @@ -525,7 +525,7 @@ "favorite": false, "guid": "6be4ca88-1996-e39e-6eae-18d33a797eb4", "path": "z://MP4\\Warren G ft Nate Dogg - Regulate with lyrics.mp4", - "title": "Regulate with lyrics" + "title": "Regulate" }, { "artist": "Waylon Jennings", @@ -408872,7 +408872,7 @@ "favorite": false, "guid": "a6b46b67-c4be-ec6a-0857-cf19bebca9e2", "path": "z://CDG\\Various\\Waylon Jennings - Good Ol Boys Theme From The Duk.mp3", - "title": "Good Ol Boys Theme From The Duk" + "title": "Good Ol' Boys" }, { "artist": "Wayne Newton", diff --git a/src/api/database.py b/src/api/database.py index 81e4069..c42e08f 100644 --- a/src/api/database.py +++ b/src/api/database.py @@ -661,9 +661,12 @@ class MusicBrainzDatabase: collaborators = parts[1:] return (main_artist, collaborators) else: - # Use word boundaries to avoid splitting within words like "Orlando" + # Use whitespace boundaries to avoid splitting within words like "Orlando" import re - pattern_regex = r'\b' + re.escape(pattern) + r'\b' + if pattern in ['&', 'and']: + pattern_regex = r'\s' + re.escape(pattern) + r'\s' + else: + pattern_regex = r'\b' + re.escape(pattern) + r'\b' if re.search(pattern_regex, artist_string, re.IGNORECASE): # For secondary patterns, be more careful # Check if this looks like a band name vs collaboration @@ -725,8 +728,15 @@ class MusicBrainzDatabase: if not is_collaboration: return (artist_string, []) - # Split on the pattern - parts = artist_string.split(split_pattern) + # Split on the pattern using the same regex that was used for detection + if split_pattern in ['&', 'and']: + # Use whitespace boundary regex for these patterns + pattern_regex = r'\s' + re.escape(split_pattern) + r'\s' + parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE) + else: + # Use simple split for other patterns + parts = artist_string.split(split_pattern) + if len(parts) < 2: return (artist_string, []) diff --git a/src/cli/main.py b/src/cli/main.py index 27ad0ff..c885142 100644 --- a/src/cli/main.py +++ b/src/cli/main.py @@ -79,15 +79,28 @@ class MusicBrainzCleaner: def find_artist_mbid(self, artist_name: str) -> Optional[str]: clean_name = self._clean_artist_name(artist_name) - # Handle collaborations - prioritize finding artist credit + # Handle collaborations - try to find main artist first # Use the same complex collaboration parsing as the database main_artist, collaborators = self._parse_complex_collaboration(clean_name) has_collaboration = len(collaborators) > 0 if has_collaboration: - # For collaborations, we'll handle this in find_recording_mbid - # by using the artist credit approach - # Return None here so we can find the full collaboration later + # For collaborations, try to find the main artist's MBID first + if main_artist: + if self.use_database: + result = self.db.fuzzy_search_artist(main_artist) + if result and isinstance(result, tuple) and len(result) >= 2: + return result[1] # Return main artist's MBID + else: + # Fallback to API + try: + result = self.api.search_artist(main_artist) + if result: + return result['id'] + except: + pass + + # If main artist not found, return None to try artist credit approach return None # Try fuzzy search for full artist name @@ -195,8 +208,11 @@ class MusicBrainzCleaner: # If no primary collaboration found, check secondary patterns if not is_collaboration: for pattern in secondary_patterns: - # Use word boundaries to avoid splitting within words like "Orlando" - pattern_regex = r'\b' + re.escape(pattern) + r'\b' + # Use whitespace boundaries for & and and, word boundaries for others + if pattern in ['&', 'and']: + pattern_regex = r'\s' + re.escape(pattern) + r'\s' + else: + pattern_regex = r'\b' + re.escape(pattern) + r'\b' if re.search(pattern_regex, artist_string, re.IGNORECASE): # For secondary patterns, be more careful # Check if this looks like a band name vs collaboration @@ -239,8 +255,15 @@ class MusicBrainzCleaner: if not is_collaboration: return (artist_string, []) - # Split on the pattern - parts = artist_string.split(split_pattern) + # Split on the pattern using the same regex that was used for detection + if split_pattern in ['&', 'and']: + # Use whitespace boundary regex for these patterns + pattern_regex = r'\s' + re.escape(split_pattern) + r'\s' + parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE) + else: + # Use simple split for other patterns + parts = artist_string.split(split_pattern) + if len(parts) < 2: return (artist_string, [])