Signed-off-by: Matt Bruce <mbrucedogs@gmail.com>

This commit is contained in:
Matt Bruce 2025-08-01 09:14:15 -05:00
parent a57687d10c
commit 3d8b0165af
3 changed files with 52 additions and 19 deletions

View File

@ -45,7 +45,7 @@
"favorite": false, "favorite": false,
"guid": "3ff82151-be07-70e0-7e05-145dc0abec4a", "guid": "3ff82151-be07-70e0-7e05-145dc0abec4a",
"path": "z://MP4\\Big Sean and E40 - I Don t F--k With You.mp4", "path": "z://MP4\\Big Sean and E40 - I Don t F--k With You.mp4",
"title": "I Don t F--k With You" "title": "I Don't Fuck With You"
}, },
{ {
"artist": "Blake Shelton ft. Gwen Stefani", "artist": "Blake Shelton ft. Gwen Stefani",
@ -69,7 +69,7 @@
"favorite": false, "favorite": false,
"guid": "946a1077-ab9e-300c-3a72-b1e141e9706f", "guid": "946a1077-ab9e-300c-3a72-b1e141e9706f",
"path": "z://MP4\\Bruno Mars ft. Cardi B - Finesse Remix (Karaoke Version).mp4", "path": "z://MP4\\Bruno Mars ft. Cardi B - Finesse Remix (Karaoke Version).mp4",
"title": "Finesse Remix" "title": "Finesse"
}, },
{ {
"artist": "Cardi B ft. Bad Bunny, J Balvin", "artist": "Cardi B ft. Bad Bunny, J Balvin",
@ -176,7 +176,7 @@
"title": "Standing Outside the Fire" "title": "Standing Outside the Fire"
}, },
{ {
"artist": "Garth-Brooks", "artist": "Garth Brooks",
"disabled": false, "disabled": false,
"favorite": false, "favorite": false,
"guid": "85d9afa3-4463-457c-5ed2-f71602529edb", "guid": "85d9afa3-4463-457c-5ed2-f71602529edb",
@ -437,7 +437,7 @@
"favorite": false, "favorite": false,
"guid": "c9d7ed4f-2efd-ea4b-91ab-543968b35ad6", "guid": "c9d7ed4f-2efd-ea4b-91ab-543968b35ad6",
"path": "z://MP4\\Snoop Dog - Gin & Juice Instrumental with Lyrics.mp4", "path": "z://MP4\\Snoop Dog - Gin & Juice Instrumental with Lyrics.mp4",
"title": "Gin & Juice Instrumental with Lyrics" "title": "Gin & Juice"
}, },
{ {
"artist": "Taylor Swift", "artist": "Taylor Swift",
@ -445,7 +445,7 @@
"favorite": false, "favorite": false,
"guid": "542672e8-3f01-fdc3-005f-dbc718ec4278", "guid": "542672e8-3f01-fdc3-005f-dbc718ec4278",
"path": "z://MP4\\Taylor Swift - Delicade (Karaoke Version).mp4", "path": "z://MP4\\Taylor Swift - Delicade (Karaoke Version).mp4",
"title": "Delicade" "title": "Delicate"
}, },
{ {
"artist": "Taylor Swift ft. Bon Iver", "artist": "Taylor Swift ft. Bon Iver",
@ -525,7 +525,7 @@
"favorite": false, "favorite": false,
"guid": "6be4ca88-1996-e39e-6eae-18d33a797eb4", "guid": "6be4ca88-1996-e39e-6eae-18d33a797eb4",
"path": "z://MP4\\Warren G ft Nate Dogg - Regulate with lyrics.mp4", "path": "z://MP4\\Warren G ft Nate Dogg - Regulate with lyrics.mp4",
"title": "Regulate with lyrics" "title": "Regulate"
}, },
{ {
"artist": "Waylon Jennings", "artist": "Waylon Jennings",
@ -408872,7 +408872,7 @@
"favorite": false, "favorite": false,
"guid": "a6b46b67-c4be-ec6a-0857-cf19bebca9e2", "guid": "a6b46b67-c4be-ec6a-0857-cf19bebca9e2",
"path": "z://CDG\\Various\\Waylon Jennings - Good Ol Boys Theme From The Duk.mp3", "path": "z://CDG\\Various\\Waylon Jennings - Good Ol Boys Theme From The Duk.mp3",
"title": "Good Ol Boys Theme From The Duk" "title": "Good Ol' Boys"
}, },
{ {
"artist": "Wayne Newton", "artist": "Wayne Newton",

View File

@ -661,9 +661,12 @@ class MusicBrainzDatabase:
collaborators = parts[1:] collaborators = parts[1:]
return (main_artist, collaborators) return (main_artist, collaborators)
else: else:
# Use word boundaries to avoid splitting within words like "Orlando" # Use whitespace boundaries to avoid splitting within words like "Orlando"
import re import re
pattern_regex = r'\b' + re.escape(pattern) + r'\b' if pattern in ['&', 'and']:
pattern_regex = r'\s' + re.escape(pattern) + r'\s'
else:
pattern_regex = r'\b' + re.escape(pattern) + r'\b'
if re.search(pattern_regex, artist_string, re.IGNORECASE): if re.search(pattern_regex, artist_string, re.IGNORECASE):
# For secondary patterns, be more careful # For secondary patterns, be more careful
# Check if this looks like a band name vs collaboration # Check if this looks like a band name vs collaboration
@ -725,8 +728,15 @@ class MusicBrainzDatabase:
if not is_collaboration: if not is_collaboration:
return (artist_string, []) return (artist_string, [])
# Split on the pattern # Split on the pattern using the same regex that was used for detection
parts = artist_string.split(split_pattern) if split_pattern in ['&', 'and']:
# Use whitespace boundary regex for these patterns
pattern_regex = r'\s' + re.escape(split_pattern) + r'\s'
parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
else:
# Use simple split for other patterns
parts = artist_string.split(split_pattern)
if len(parts) < 2: if len(parts) < 2:
return (artist_string, []) return (artist_string, [])

View File

@ -79,15 +79,28 @@ class MusicBrainzCleaner:
def find_artist_mbid(self, artist_name: str) -> Optional[str]: def find_artist_mbid(self, artist_name: str) -> Optional[str]:
clean_name = self._clean_artist_name(artist_name) clean_name = self._clean_artist_name(artist_name)
# Handle collaborations - prioritize finding artist credit # Handle collaborations - try to find main artist first
# Use the same complex collaboration parsing as the database # Use the same complex collaboration parsing as the database
main_artist, collaborators = self._parse_complex_collaboration(clean_name) main_artist, collaborators = self._parse_complex_collaboration(clean_name)
has_collaboration = len(collaborators) > 0 has_collaboration = len(collaborators) > 0
if has_collaboration: if has_collaboration:
# For collaborations, we'll handle this in find_recording_mbid # For collaborations, try to find the main artist's MBID first
# by using the artist credit approach if main_artist:
# Return None here so we can find the full collaboration later if self.use_database:
result = self.db.fuzzy_search_artist(main_artist)
if result and isinstance(result, tuple) and len(result) >= 2:
return result[1] # Return main artist's MBID
else:
# Fallback to API
try:
result = self.api.search_artist(main_artist)
if result:
return result['id']
except:
pass
# If main artist not found, return None to try artist credit approach
return None return None
# Try fuzzy search for full artist name # Try fuzzy search for full artist name
@ -195,8 +208,11 @@ class MusicBrainzCleaner:
# If no primary collaboration found, check secondary patterns # If no primary collaboration found, check secondary patterns
if not is_collaboration: if not is_collaboration:
for pattern in secondary_patterns: for pattern in secondary_patterns:
# Use word boundaries to avoid splitting within words like "Orlando" # Use whitespace boundaries for & and and, word boundaries for others
pattern_regex = r'\b' + re.escape(pattern) + r'\b' if pattern in ['&', 'and']:
pattern_regex = r'\s' + re.escape(pattern) + r'\s'
else:
pattern_regex = r'\b' + re.escape(pattern) + r'\b'
if re.search(pattern_regex, artist_string, re.IGNORECASE): if re.search(pattern_regex, artist_string, re.IGNORECASE):
# For secondary patterns, be more careful # For secondary patterns, be more careful
# Check if this looks like a band name vs collaboration # Check if this looks like a band name vs collaboration
@ -239,8 +255,15 @@ class MusicBrainzCleaner:
if not is_collaboration: if not is_collaboration:
return (artist_string, []) return (artist_string, [])
# Split on the pattern # Split on the pattern using the same regex that was used for detection
parts = artist_string.split(split_pattern) if split_pattern in ['&', 'and']:
# Use whitespace boundary regex for these patterns
pattern_regex = r'\s' + re.escape(split_pattern) + r'\s'
parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
else:
# Use simple split for other patterns
parts = artist_string.split(split_pattern)
if len(parts) < 2: if len(parts) < 2:
return (artist_string, []) return (artist_string, [])