Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

2025-07-27 10:56:19 -05:00 · 2025-07-27 10:56:19 -05:00 · e6b2c9443c
commit e6b2c9443c
parent 712573d91a
9 changed files with 9537 additions and 15177 deletions
--- a/data/bak_songList.json
+++ b/data/bak_songList.json
--- a/data/channel_cache.json
+++ b/data/channel_cache.json
@ -29967,7 +29967,7 @@
  },
  "settings": {
    "cache_duration_hours": 168,
-    "last_updated": "2025-07-24T20:17:15.426193"
+    "last_updated": "2025-07-26T20:17:15.426193"
  },
  "@SingKingKaraoke": [
    {
--- a/data/channels.txt
+++ b/data/channels.txt
@ -1,7 +1,4 @@
 https://www.youtube.com/@SingKingKaraoke/videos 
-https://www.youtube.com/@karafun/videos
 https://www.youtube.com/@KaraokeOnVEVO/videos
 https://www.youtube.com/@StingrayKaraoke/videos
-https://www.youtube.com/@CCKaraoke/videos
-https://www.youtube.com/@AtomicKaraoke/videos
 https://www.youtube.com/@sing2karaoke/videos
--- a/data/server_duplicates_tracking.json
+++ b/data/server_duplicates_tracking.json
--- a/data/songs.json
+++ b/data/songs.json
--- a/karaoke_downloader/cli.py
+++ b/karaoke_downloader/cli.py
@ -156,8 +156,8 @@ Examples:
    parser.add_argument(
        "--fuzzy-threshold",
        type=int,
-        default=90,
-        help="Fuzzy match threshold (0-100, default 90)",
+        default=DEFAULT_FUZZY_THRESHOLD,
+        help=f"Fuzzy match threshold (0-100, default {DEFAULT_FUZZY_THRESHOLD})",
    )
    parser.add_argument(
        "--parallel",
--- a/karaoke_downloader/downloader.py
+++ b/karaoke_downloader/downloader.py
@ -404,7 +404,8 @@ class KaraokeDownloader:

        # Further filter out songs already on server or marked as duplicates
        not_on_server = []
-        server_available = 0
+        server_available_mp4 = 0
+        server_available_other = 0
        marked_duplicates = 0

        for song in undownloaded:
@ -426,14 +427,26 @@ class KaraokeDownloader:
                f"{artist} - {title}",
                "songlist",
            ):
-                server_available += 1
+                server_available_mp4 += 1
                continue

+            # Check if song is on server but in different format (MP3/CDG)
+            key = f"{artist.lower()}_{normalize_title(title)}"
+            if key in server_songs:
+                song_info = server_songs[key]
+                if song_info.get("is_mp3", False) or song_info.get("is_cdg", False):
+                    server_available_other += 1
+                    print(f"🎵 Found {artist} - {title} on server as {song_info.get('is_mp3', False) and 'MP3' or 'CDG'} format, will download video version")
+
            not_on_server.append(song)

-        if server_available > 0:
+        if server_available_mp4 > 0:
            print(
-                f"\n🎵 {server_available} songs already available on server, skipping."
+                f"\n🎵 {server_available_mp4} songs already available as MP4 on server, skipping."
+            )
+        if server_available_other > 0:
+            print(
+                f"\n🎵 {server_available_other} songs found on server as MP3/CDG, will download video versions."
            )
        if marked_duplicates > 0:
            print(
@ -449,9 +462,8 @@ class KaraokeDownloader:
        # --- Download plan building (same for both normal and focus modes) ---
        # --- Download plan cache logic ---
        plan_mode = "songlist"
-        # Include all parameters that affect the plan generation
+        # Include only parameters that affect the plan generation (exclude limit since it only affects execution)
        plan_kwargs = {
-            "limit": limit or "all",
            "channels": len(channel_urls),
            "fuzzy": fuzzy_match,
            "threshold": fuzzy_threshold,
--- a/karaoke_downloader/id3_utils.py
+++ b/karaoke_downloader/id3_utils.py
@ -7,6 +7,30 @@ except ImportError:
    MUTAGEN_AVAILABLE = False


+def clean_channel_name(channel_name: str) -> str:
+    """
+    Clean channel name for ID3 tagging by removing @ symbol and ensuring it's alpha-only.
+    
+    Args:
+        channel_name: Raw channel name (may contain @ symbol)
+        
+    Returns:
+        Cleaned channel name suitable for ID3 tags
+    """
+    # Remove @ symbol if present
+    if channel_name.startswith('@'):
+        channel_name = channel_name[1:]
+    
+    # Remove any non-alphanumeric characters and convert to single word
+    # Keep only letters, numbers, and spaces, then take the first word
+    cleaned = re.sub(r'[^a-zA-Z0-9\s]', '', channel_name)
+    words = cleaned.split()
+    if words:
+        return words[0]  # Return only the first word
+    
+    return "Unknown"
+
+
 def extract_artist_title(video_title):
    title = (
        video_title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
@ -26,12 +50,13 @@ def add_id3_tags(file_path, video_title, channel_name):
        return
    try:
        artist, title = extract_artist_title(video_title)
+        clean_channel = clean_channel_name(channel_name)
        mp4 = MP4(str(file_path))
        mp4["\xa9nam"] = title
        mp4["\xa9ART"] = artist
-        mp4["\xa9alb"] = f"{channel_name} Karaoke"
+        mp4["\xa9alb"] = clean_channel  # Use clean channel name only, no suffix
        mp4["\xa9gen"] = "Karaoke"
        mp4.save()
-        print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}'")
+        print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}', Album='{clean_channel}'")
    except Exception as e:
        print(f"⚠️ Could not add ID3 tags: {e}")
--- a/karaoke_downloader/server_manager.py
+++ b/karaoke_downloader/server_manager.py
@ -9,26 +9,34 @@ from pathlib import Path


 def load_server_songs(songs_path="data/songs.json"):
-    """Load the list of songs already available on the server."""
+    """Load the list of songs already available on the server with format information."""
    songs_file = Path(songs_path)
    if not songs_file.exists():
        print(f"⚠️ Server songs file not found: {songs_path}")
-        return set()
+        return {}
    try:
        with open(songs_file, "r", encoding="utf-8") as f:
            data = json.load(f)
-        server_songs = set()
+        server_songs = {}
        for song in data:
-            if "artist" in song and "title" in song:
+            if "artist" in song and "title" in song and "path" in song:
                artist = song["artist"].strip()
                title = song["title"].strip()
+                path = song["path"].strip()
                key = f"{artist.lower()}_{normalize_title(title)}"
-                server_songs.add(key)
+                server_songs[key] = {
+                    "artist": artist,
+                    "title": title,
+                    "path": path,
+                    "is_mp3": path.lower().endswith('.mp3'),
+                    "is_cdg": 'cdg' in path.lower(),
+                    "is_mp4": path.lower().endswith('.mp4')
+                }
        print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)")
        return server_songs
    except (json.JSONDecodeError, FileNotFoundError) as e:
        print(f"⚠️ Could not load server songs: {e}")
-        return set()
+        return {}


 def is_song_on_server(server_songs, artist, title):
@ -37,6 +45,19 @@ def is_song_on_server(server_songs, artist, title):
    return key in server_songs


+def should_skip_server_song(server_songs, artist, title):
+    """Check if a song should be skipped because it's already available as MP4 on server.
+    Returns True if the song should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
+    key = f"{artist.lower()}_{normalize_title(title)}"
+    if key not in server_songs:
+        return False  # Not on server, so don't skip
+    
+    song_info = server_songs[key]
+    # Skip if it's an MP4 file (video format)
+    # Don't skip if it's MP3 or in CDG folder (different format)
+    return song_info.get("is_mp4", False) and not song_info.get("is_cdg", False)
+
+
 def load_server_duplicates_tracking(
    tracking_path="data/server_duplicates_tracking.json",
 ):
@ -86,8 +107,9 @@ def mark_song_as_server_duplicate(tracking, artist, title, video_title, channel_
 def check_and_mark_server_duplicate(
    server_songs, server_duplicates_tracking, artist, title, video_title, channel_name
 ):
-    """Check if a song is on server and mark it as duplicate if so. Returns True if it's a duplicate."""
-    if is_song_on_server(server_songs, artist, title):
+    """Check if a song should be skipped because it's already available as MP4 on server and mark it as duplicate if so. 
+    Returns True if it should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
+    if should_skip_server_song(server_songs, artist, title):
        if not is_song_marked_as_server_duplicate(
            server_duplicates_tracking, artist, title
        ):