From 3d8b0165af7a7049302d145ca254993116e9af65 Mon Sep 17 00:00:00 2001
From: Matt Bruce <mbrucedogs@gmail.com>
Date: Fri, 1 Aug 2025 09:14:15 -0500
Subject: [PATCH] Signed-off-by: Matt Bruce <mbrucedogs@gmail.com>

---
 data/songs.json     | 14 +++++++-------
 src/api/database.py | 18 ++++++++++++++----
 src/cli/main.py     | 39 +++++++++++++++++++++++++++++++--------
 3 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/data/songs.json b/data/songs.json
index efdbb96..50e61e0 100755
--- a/data/songs.json
+++ b/data/songs.json
@@ -45,7 +45,7 @@
     "favorite": false,
     "guid": "3ff82151-be07-70e0-7e05-145dc0abec4a",
     "path": "z://MP4\\Big Sean and E40 - I Don t F--k With You.mp4",
-    "title": "I Don t F--k With You"
+    "title": "I Don't Fuck With You"
   },
   {
     "artist": "Blake Shelton ft. Gwen Stefani",
@@ -69,7 +69,7 @@
     "favorite": false,
     "guid": "946a1077-ab9e-300c-3a72-b1e141e9706f",
     "path": "z://MP4\\Bruno Mars ft. Cardi B - Finesse Remix (Karaoke Version).mp4",
-    "title": "Finesse Remix"
+    "title": "Finesse"
   },
   {
     "artist": "Cardi B ft. Bad Bunny, J Balvin",
@@ -176,7 +176,7 @@
     "title": "Standing Outside the Fire"
   },
   {
-    "artist": "Garth-Brooks",
+    "artist": "Garth Brooks",
     "disabled": false,
     "favorite": false,
     "guid": "85d9afa3-4463-457c-5ed2-f71602529edb",
@@ -437,7 +437,7 @@
     "favorite": false,
     "guid": "c9d7ed4f-2efd-ea4b-91ab-543968b35ad6",
     "path": "z://MP4\\Snoop Dog - Gin & Juice Instrumental with Lyrics.mp4",
-    "title": "Gin & Juice Instrumental with Lyrics"
+    "title": "Gin & Juice"
   },
   {
     "artist": "Taylor Swift",
@@ -445,7 +445,7 @@
     "favorite": false,
     "guid": "542672e8-3f01-fdc3-005f-dbc718ec4278",
     "path": "z://MP4\\Taylor Swift - Delicade (Karaoke Version).mp4",
-    "title": "Delicade"
+    "title": "Delicate"
   },
   {
     "artist": "Taylor Swift ft. Bon Iver",
@@ -525,7 +525,7 @@
     "favorite": false,
     "guid": "6be4ca88-1996-e39e-6eae-18d33a797eb4",
     "path": "z://MP4\\Warren G ft Nate Dogg - Regulate with lyrics.mp4",
-    "title": "Regulate with lyrics"
+    "title": "Regulate"
   },
   {
     "artist": "Waylon Jennings",
@@ -408872,7 +408872,7 @@
     "favorite": false,
     "guid": "a6b46b67-c4be-ec6a-0857-cf19bebca9e2",
     "path": "z://CDG\\Various\\Waylon Jennings - Good Ol Boys Theme From The Duk.mp3",
-    "title": "Good Ol Boys Theme From The Duk"
+    "title": "Good Ol' Boys"
   },
   {
     "artist": "Wayne Newton",
diff --git a/src/api/database.py b/src/api/database.py
index 81e4069..c42e08f 100644
--- a/src/api/database.py
+++ b/src/api/database.py
@@ -661,9 +661,12 @@ class MusicBrainzDatabase:
                                 collaborators = parts[1:]
                                 return (main_artist, collaborators)
                 else:
-                    # Use word boundaries to avoid splitting within words like "Orlando"
+                    # Use whitespace boundaries to avoid splitting within words like "Orlando"
                     import re
-                    pattern_regex = r'\b' + re.escape(pattern) + r'\b'
+                    if pattern in ['&', 'and']:
+                        pattern_regex = r'\s' + re.escape(pattern) + r'\s'
+                    else:
+                        pattern_regex = r'\b' + re.escape(pattern) + r'\b'
                     if re.search(pattern_regex, artist_string, re.IGNORECASE):
                         # For secondary patterns, be more careful
                         # Check if this looks like a band name vs collaboration
@@ -725,8 +728,15 @@ class MusicBrainzDatabase:
         if not is_collaboration:
             return (artist_string, [])
         
-        # Split on the pattern
-        parts = artist_string.split(split_pattern)
+        # Split on the pattern using the same regex that was used for detection
+        if split_pattern in ['&', 'and']:
+            # Use whitespace boundary regex for these patterns
+            pattern_regex = r'\s' + re.escape(split_pattern) + r'\s'
+            parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
+        else:
+            # Use simple split for other patterns
+            parts = artist_string.split(split_pattern)
+            
         if len(parts) < 2:
             return (artist_string, [])
         
diff --git a/src/cli/main.py b/src/cli/main.py
index 27ad0ff..c885142 100644
--- a/src/cli/main.py
+++ b/src/cli/main.py
@@ -79,15 +79,28 @@ class MusicBrainzCleaner:
     def find_artist_mbid(self, artist_name: str) -> Optional[str]:
         clean_name = self._clean_artist_name(artist_name)
         
-        # Handle collaborations - prioritize finding artist credit
+        # Handle collaborations - try to find main artist first
         # Use the same complex collaboration parsing as the database
         main_artist, collaborators = self._parse_complex_collaboration(clean_name)
         has_collaboration = len(collaborators) > 0
         
         if has_collaboration:
-            # For collaborations, we'll handle this in find_recording_mbid
-            # by using the artist credit approach
-            # Return None here so we can find the full collaboration later
+            # For collaborations, try to find the main artist's MBID first
+            if main_artist:
+                if self.use_database:
+                    result = self.db.fuzzy_search_artist(main_artist)
+                    if result and isinstance(result, tuple) and len(result) >= 2:
+                        return result[1]  # Return main artist's MBID
+                else:
+                    # Fallback to API
+                    try:
+                        result = self.api.search_artist(main_artist)
+                        if result:
+                            return result['id']
+                    except:
+                        pass
+            
+            # If main artist not found, return None to try artist credit approach
             return None
         
         # Try fuzzy search for full artist name
@@ -195,8 +208,11 @@ class MusicBrainzCleaner:
         # If no primary collaboration found, check secondary patterns
         if not is_collaboration:
             for pattern in secondary_patterns:
-                # Use word boundaries to avoid splitting within words like "Orlando"
-                pattern_regex = r'\b' + re.escape(pattern) + r'\b'
+                # Use whitespace boundaries for & and and, word boundaries for others
+                if pattern in ['&', 'and']:
+                    pattern_regex = r'\s' + re.escape(pattern) + r'\s'
+                else:
+                    pattern_regex = r'\b' + re.escape(pattern) + r'\b'
                 if re.search(pattern_regex, artist_string, re.IGNORECASE):
                     # For secondary patterns, be more careful
                     # Check if this looks like a band name vs collaboration
@@ -239,8 +255,15 @@ class MusicBrainzCleaner:
         if not is_collaboration:
             return (artist_string, [])
         
-        # Split on the pattern
-        parts = artist_string.split(split_pattern)
+        # Split on the pattern using the same regex that was used for detection
+        if split_pattern in ['&', 'and']:
+            # Use whitespace boundary regex for these patterns
+            pattern_regex = r'\s' + re.escape(split_pattern) + r'\s'
+            parts = re.split(pattern_regex, artist_string, flags=re.IGNORECASE)
+        else:
+            # Use simple split for other patterns
+            parts = artist_string.split(split_pattern)
+            
         if len(parts) < 2:
             return (artist_string, [])