Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

This commit is contained in:
mbrucedogs 2025-07-27 10:56:19 -05:00
parent 712573d91a
commit e6b2c9443c
9 changed files with 9537 additions and 15177 deletions

File diff suppressed because it is too large Load Diff

View File

@ -29967,7 +29967,7 @@
}, },
"settings": { "settings": {
"cache_duration_hours": 168, "cache_duration_hours": 168,
"last_updated": "2025-07-24T20:17:15.426193" "last_updated": "2025-07-26T20:17:15.426193"
}, },
"@SingKingKaraoke": [ "@SingKingKaraoke": [
{ {

View File

@ -1,7 +1,4 @@
https://www.youtube.com/@SingKingKaraoke/videos https://www.youtube.com/@SingKingKaraoke/videos
https://www.youtube.com/@karafun/videos
https://www.youtube.com/@KaraokeOnVEVO/videos https://www.youtube.com/@KaraokeOnVEVO/videos
https://www.youtube.com/@StingrayKaraoke/videos https://www.youtube.com/@StingrayKaraoke/videos
https://www.youtube.com/@CCKaraoke/videos
https://www.youtube.com/@AtomicKaraoke/videos
https://www.youtube.com/@sing2karaoke/videos https://www.youtube.com/@sing2karaoke/videos

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -156,8 +156,8 @@ Examples:
parser.add_argument( parser.add_argument(
"--fuzzy-threshold", "--fuzzy-threshold",
type=int, type=int,
default=90, default=DEFAULT_FUZZY_THRESHOLD,
help="Fuzzy match threshold (0-100, default 90)", help=f"Fuzzy match threshold (0-100, default {DEFAULT_FUZZY_THRESHOLD})",
) )
parser.add_argument( parser.add_argument(
"--parallel", "--parallel",

View File

@ -404,7 +404,8 @@ class KaraokeDownloader:
# Further filter out songs already on server or marked as duplicates # Further filter out songs already on server or marked as duplicates
not_on_server = [] not_on_server = []
server_available = 0 server_available_mp4 = 0
server_available_other = 0
marked_duplicates = 0 marked_duplicates = 0
for song in undownloaded: for song in undownloaded:
@ -426,14 +427,26 @@ class KaraokeDownloader:
f"{artist} - {title}", f"{artist} - {title}",
"songlist", "songlist",
): ):
server_available += 1 server_available_mp4 += 1
continue continue
# Check if song is on server but in different format (MP3/CDG)
key = f"{artist.lower()}_{normalize_title(title)}"
if key in server_songs:
song_info = server_songs[key]
if song_info.get("is_mp3", False) or song_info.get("is_cdg", False):
server_available_other += 1
print(f"🎵 Found {artist} - {title} on server as {song_info.get('is_mp3', False) and 'MP3' or 'CDG'} format, will download video version")
not_on_server.append(song) not_on_server.append(song)
if server_available > 0: if server_available_mp4 > 0:
print( print(
f"\n🎵 {server_available} songs already available on server, skipping." f"\n🎵 {server_available_mp4} songs already available as MP4 on server, skipping."
)
if server_available_other > 0:
print(
f"\n🎵 {server_available_other} songs found on server as MP3/CDG, will download video versions."
) )
if marked_duplicates > 0: if marked_duplicates > 0:
print( print(
@ -449,9 +462,8 @@ class KaraokeDownloader:
# --- Download plan building (same for both normal and focus modes) --- # --- Download plan building (same for both normal and focus modes) ---
# --- Download plan cache logic --- # --- Download plan cache logic ---
plan_mode = "songlist" plan_mode = "songlist"
# Include all parameters that affect the plan generation # Include only parameters that affect the plan generation (exclude limit since it only affects execution)
plan_kwargs = { plan_kwargs = {
"limit": limit or "all",
"channels": len(channel_urls), "channels": len(channel_urls),
"fuzzy": fuzzy_match, "fuzzy": fuzzy_match,
"threshold": fuzzy_threshold, "threshold": fuzzy_threshold,

View File

@ -7,6 +7,30 @@ except ImportError:
MUTAGEN_AVAILABLE = False MUTAGEN_AVAILABLE = False
def clean_channel_name(channel_name: str) -> str:
"""
Clean channel name for ID3 tagging by removing @ symbol and ensuring it's alpha-only.
Args:
channel_name: Raw channel name (may contain @ symbol)
Returns:
Cleaned channel name suitable for ID3 tags
"""
# Remove @ symbol if present
if channel_name.startswith('@'):
channel_name = channel_name[1:]
# Remove any non-alphanumeric characters and convert to single word
# Keep only letters, numbers, and spaces, then take the first word
cleaned = re.sub(r'[^a-zA-Z0-9\s]', '', channel_name)
words = cleaned.split()
if words:
return words[0] # Return only the first word
return "Unknown"
def extract_artist_title(video_title): def extract_artist_title(video_title):
title = ( title = (
video_title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip() video_title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
@ -26,12 +50,13 @@ def add_id3_tags(file_path, video_title, channel_name):
return return
try: try:
artist, title = extract_artist_title(video_title) artist, title = extract_artist_title(video_title)
clean_channel = clean_channel_name(channel_name)
mp4 = MP4(str(file_path)) mp4 = MP4(str(file_path))
mp4["\xa9nam"] = title mp4["\xa9nam"] = title
mp4["\xa9ART"] = artist mp4["\xa9ART"] = artist
mp4["\xa9alb"] = f"{channel_name} Karaoke" mp4["\xa9alb"] = clean_channel # Use clean channel name only, no suffix
mp4["\xa9gen"] = "Karaoke" mp4["\xa9gen"] = "Karaoke"
mp4.save() mp4.save()
print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}'") print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}', Album='{clean_channel}'")
except Exception as e: except Exception as e:
print(f"⚠️ Could not add ID3 tags: {e}") print(f"⚠️ Could not add ID3 tags: {e}")

View File

@ -9,26 +9,34 @@ from pathlib import Path
def load_server_songs(songs_path="data/songs.json"): def load_server_songs(songs_path="data/songs.json"):
"""Load the list of songs already available on the server.""" """Load the list of songs already available on the server with format information."""
songs_file = Path(songs_path) songs_file = Path(songs_path)
if not songs_file.exists(): if not songs_file.exists():
print(f"⚠️ Server songs file not found: {songs_path}") print(f"⚠️ Server songs file not found: {songs_path}")
return set() return {}
try: try:
with open(songs_file, "r", encoding="utf-8") as f: with open(songs_file, "r", encoding="utf-8") as f:
data = json.load(f) data = json.load(f)
server_songs = set() server_songs = {}
for song in data: for song in data:
if "artist" in song and "title" in song: if "artist" in song and "title" in song and "path" in song:
artist = song["artist"].strip() artist = song["artist"].strip()
title = song["title"].strip() title = song["title"].strip()
path = song["path"].strip()
key = f"{artist.lower()}_{normalize_title(title)}" key = f"{artist.lower()}_{normalize_title(title)}"
server_songs.add(key) server_songs[key] = {
"artist": artist,
"title": title,
"path": path,
"is_mp3": path.lower().endswith('.mp3'),
"is_cdg": 'cdg' in path.lower(),
"is_mp4": path.lower().endswith('.mp4')
}
print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)") print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)")
return server_songs return server_songs
except (json.JSONDecodeError, FileNotFoundError) as e: except (json.JSONDecodeError, FileNotFoundError) as e:
print(f"⚠️ Could not load server songs: {e}") print(f"⚠️ Could not load server songs: {e}")
return set() return {}
def is_song_on_server(server_songs, artist, title): def is_song_on_server(server_songs, artist, title):
@ -37,6 +45,19 @@ def is_song_on_server(server_songs, artist, title):
return key in server_songs return key in server_songs
def should_skip_server_song(server_songs, artist, title):
"""Check if a song should be skipped because it's already available as MP4 on server.
Returns True if the song should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
key = f"{artist.lower()}_{normalize_title(title)}"
if key not in server_songs:
return False # Not on server, so don't skip
song_info = server_songs[key]
# Skip if it's an MP4 file (video format)
# Don't skip if it's MP3 or in CDG folder (different format)
return song_info.get("is_mp4", False) and not song_info.get("is_cdg", False)
def load_server_duplicates_tracking( def load_server_duplicates_tracking(
tracking_path="data/server_duplicates_tracking.json", tracking_path="data/server_duplicates_tracking.json",
): ):
@ -86,8 +107,9 @@ def mark_song_as_server_duplicate(tracking, artist, title, video_title, channel_
def check_and_mark_server_duplicate( def check_and_mark_server_duplicate(
server_songs, server_duplicates_tracking, artist, title, video_title, channel_name server_songs, server_duplicates_tracking, artist, title, video_title, channel_name
): ):
"""Check if a song is on server and mark it as duplicate if so. Returns True if it's a duplicate.""" """Check if a song should be skipped because it's already available as MP4 on server and mark it as duplicate if so.
if is_song_on_server(server_songs, artist, title): Returns True if it should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
if should_skip_server_song(server_songs, artist, title):
if not is_song_marked_as_server_duplicate( if not is_song_marked_as_server_duplicate(
server_duplicates_tracking, artist, title server_duplicates_tracking, artist, title
): ):