Signed-off-by: Matt Bruce <mbrucedogs@gmail.com>

This commit is contained in:
Matt Bruce 2025-08-01 09:14:15 -05:00
parent a57687d10c
commit 3d8b0165af
3 changed files with 52 additions and 19 deletions

View File

@ -45,7 +45,7 @@
"favorite": false,
"guid": "3ff82151-be07-70e0-7e05-145dc0abec4a",
"path": "z://MP4\\Big Sean and E40 - I Don t F--k With You.mp4",
"title": "I Don t F--k With You"
"title": "I Don't Fuck With You"
},
{
"artist": "Blake Shelton ft. Gwen Stefani",
@ -69,7 +69,7 @@
"favorite": false,
"guid": "946a1077-ab9e-300c-3a72-b1e141e9706f",
"path": "z://MP4\\Bruno Mars ft. Cardi B - Finesse Remix (Karaoke Version).mp4",
"title": "Finesse Remix"
"title": "Finesse"
},
{
"artist": "Cardi B ft. Bad Bunny, J Balvin",
@ -176,7 +176,7 @@
"title": "Standing Outside the Fire"
},
{
"artist": "Garth-Brooks",
"artist": "Garth Brooks",
"disabled": false,
"favorite": false,
"guid": "85d9afa3-4463-457c-5ed2-f71602529edb",
@ -437,7 +437,7 @@
"favorite": false,
"guid": "c9d7ed4f-2efd-ea4b-91ab-543968b35ad6",
"path": "z://MP4\\Snoop Dog - Gin & Juice Instrumental with Lyrics.mp4",
"title": "Gin & Juice Instrumental with Lyrics"
"title": "Gin & Juice"
},
{
"artist": "Taylor Swift",
@ -445,7 +445,7 @@
"favorite": false,
"guid": "542672e8-3f01-fdc3-005f-dbc718ec4278",
"path": "z://MP4\\Taylor Swift - Delicade (Karaoke Version).mp4",
"title": "Delicade"
"title": "Delicate"
},
{
"artist": "Taylor Swift ft. Bon Iver",
@ -525,7 +525,7 @@
"favorite": false,
"guid": "6be4ca88-1996-e39e-6eae-18d33a797eb4",
"path": "z://MP4\\Warren G ft Nate Dogg - Regulate with lyrics.mp4",
"title": "Regulate with lyrics"
"title": "Regulate"
},
{
"artist": "Waylon Jennings",
@ -408872,7 +408872,7 @@
"favorite": false,
"guid": "a6b46b67-c4be-ec6a-0857-cf19bebca9e2",
"path": "z://CDG\\Various\\Waylon Jennings - Good Ol Boys Theme From The Duk.mp3",
"title": "Good Ol Boys Theme From The Duk"
"title": "Good Ol' Boys"
},
{
"artist": "Wayne Newton",

View File

@ -661,8 +661,11 @@ class MusicBrainzDatabase:
collaborators = parts[1:]
return (main_artist, collaborators)
else:
# Use word boundaries to avoid splitting within words like "Orlando"
# Use whitespace boundaries to avoid splitting within words like "Orlando"
import re
if pattern in ['&', 'and']:
pattern_regex = r'\s' + re.escape(pattern) + r'\s'
else:
pattern_regex = r'\b' + re.escape(pattern) + r'\b'
if re.search(pattern_regex, artist_string, re.IGNORECASE):
# For secondary patterns, be more careful
@ -725,8 +728,15 @@ class MusicBrainzDatabase:
if not is_collaboration:
return (artist_string, [])
# Split on the pattern
# Split on the pattern using the same regex that was used for detection
if split_pattern in ['&', 'and']:
# Use whitespace boundary regex for these patterns
pattern_regex = r'\s' + re.escape(split_pattern) + r'\s'
parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
else:
# Use simple split for other patterns
parts = artist_string.split(split_pattern)
if len(parts) < 2:
return (artist_string, [])

View File

@ -79,15 +79,28 @@ class MusicBrainzCleaner:
def find_artist_mbid(self, artist_name: str) -> Optional[str]:
clean_name = self._clean_artist_name(artist_name)
# Handle collaborations - prioritize finding artist credit
# Handle collaborations - try to find main artist first
# Use the same complex collaboration parsing as the database
main_artist, collaborators = self._parse_complex_collaboration(clean_name)
has_collaboration = len(collaborators) > 0
if has_collaboration:
# For collaborations, we'll handle this in find_recording_mbid
# by using the artist credit approach
# Return None here so we can find the full collaboration later
# For collaborations, try to find the main artist's MBID first
if main_artist:
if self.use_database:
result = self.db.fuzzy_search_artist(main_artist)
if result and isinstance(result, tuple) and len(result) >= 2:
return result[1] # Return main artist's MBID
else:
# Fallback to API
try:
result = self.api.search_artist(main_artist)
if result:
return result['id']
except:
pass
# If main artist not found, return None to try artist credit approach
return None
# Try fuzzy search for full artist name
@ -195,7 +208,10 @@ class MusicBrainzCleaner:
# If no primary collaboration found, check secondary patterns
if not is_collaboration:
for pattern in secondary_patterns:
# Use word boundaries to avoid splitting within words like "Orlando"
# Use whitespace boundaries for & and and, word boundaries for others
if pattern in ['&', 'and']:
pattern_regex = r'\s' + re.escape(pattern) + r'\s'
else:
pattern_regex = r'\b' + re.escape(pattern) + r'\b'
if re.search(pattern_regex, artist_string, re.IGNORECASE):
# For secondary patterns, be more careful
@ -239,8 +255,15 @@ class MusicBrainzCleaner:
if not is_collaboration:
return (artist_string, [])
# Split on the pattern
# Split on the pattern using the same regex that was used for detection
if split_pattern in ['&', 'and']:
# Use whitespace boundary regex for these patterns
pattern_regex = r'\s' + re.escape(split_pattern) + r'\s'
parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
else:
# Use simple split for other patterns
parts = artist_string.split(split_pattern)
if len(parts) < 2:
return (artist_string, [])