Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

This commit is contained in:
mbrucedogs 2025-07-27 10:56:19 -05:00
parent 712573d91a
commit e6b2c9443c
9 changed files with 9537 additions and 15177 deletions

File diff suppressed because it is too large Load Diff

View File

@ -29967,7 +29967,7 @@
},
"settings": {
"cache_duration_hours": 168,
"last_updated": "2025-07-24T20:17:15.426193"
"last_updated": "2025-07-26T20:17:15.426193"
},
"@SingKingKaraoke": [
{

View File

@ -1,7 +1,4 @@
https://www.youtube.com/@SingKingKaraoke/videos
https://www.youtube.com/@karafun/videos
https://www.youtube.com/@KaraokeOnVEVO/videos
https://www.youtube.com/@StingrayKaraoke/videos
https://www.youtube.com/@CCKaraoke/videos
https://www.youtube.com/@AtomicKaraoke/videos
https://www.youtube.com/@sing2karaoke/videos

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -156,8 +156,8 @@ Examples:
parser.add_argument(
"--fuzzy-threshold",
type=int,
default=90,
help="Fuzzy match threshold (0-100, default 90)",
default=DEFAULT_FUZZY_THRESHOLD,
help=f"Fuzzy match threshold (0-100, default {DEFAULT_FUZZY_THRESHOLD})",
)
parser.add_argument(
"--parallel",

View File

@ -404,7 +404,8 @@ class KaraokeDownloader:
# Further filter out songs already on server or marked as duplicates
not_on_server = []
server_available = 0
server_available_mp4 = 0
server_available_other = 0
marked_duplicates = 0
for song in undownloaded:
@ -426,14 +427,26 @@ class KaraokeDownloader:
f"{artist} - {title}",
"songlist",
):
server_available += 1
server_available_mp4 += 1
continue
# Check if song is on server but in different format (MP3/CDG)
key = f"{artist.lower()}_{normalize_title(title)}"
if key in server_songs:
song_info = server_songs[key]
if song_info.get("is_mp3", False) or song_info.get("is_cdg", False):
server_available_other += 1
print(f"🎵 Found {artist} - {title} on server as {song_info.get('is_mp3', False) and 'MP3' or 'CDG'} format, will download video version")
not_on_server.append(song)
if server_available > 0:
if server_available_mp4 > 0:
print(
f"\n🎵 {server_available} songs already available on server, skipping."
f"\n🎵 {server_available_mp4} songs already available as MP4 on server, skipping."
)
if server_available_other > 0:
print(
f"\n🎵 {server_available_other} songs found on server as MP3/CDG, will download video versions."
)
if marked_duplicates > 0:
print(
@ -449,9 +462,8 @@ class KaraokeDownloader:
# --- Download plan building (same for both normal and focus modes) ---
# --- Download plan cache logic ---
plan_mode = "songlist"
# Include all parameters that affect the plan generation
# Include only parameters that affect the plan generation (exclude limit since it only affects execution)
plan_kwargs = {
"limit": limit or "all",
"channels": len(channel_urls),
"fuzzy": fuzzy_match,
"threshold": fuzzy_threshold,

View File

@ -7,6 +7,30 @@ except ImportError:
MUTAGEN_AVAILABLE = False
def clean_channel_name(channel_name: str) -> str:
"""
Clean channel name for ID3 tagging by removing @ symbol and ensuring it's alpha-only.
Args:
channel_name: Raw channel name (may contain @ symbol)
Returns:
Cleaned channel name suitable for ID3 tags
"""
# Remove @ symbol if present
if channel_name.startswith('@'):
channel_name = channel_name[1:]
# Remove any non-alphanumeric characters and convert to single word
# Keep only letters, numbers, and spaces, then take the first word
cleaned = re.sub(r'[^a-zA-Z0-9\s]', '', channel_name)
words = cleaned.split()
if words:
return words[0] # Return only the first word
return "Unknown"
def extract_artist_title(video_title):
title = (
video_title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
@ -26,12 +50,13 @@ def add_id3_tags(file_path, video_title, channel_name):
return
try:
artist, title = extract_artist_title(video_title)
clean_channel = clean_channel_name(channel_name)
mp4 = MP4(str(file_path))
mp4["\xa9nam"] = title
mp4["\xa9ART"] = artist
mp4["\xa9alb"] = f"{channel_name} Karaoke"
mp4["\xa9alb"] = clean_channel # Use clean channel name only, no suffix
mp4["\xa9gen"] = "Karaoke"
mp4.save()
print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}'")
print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}', Album='{clean_channel}'")
except Exception as e:
print(f"⚠️ Could not add ID3 tags: {e}")

View File

@ -9,26 +9,34 @@ from pathlib import Path
def load_server_songs(songs_path="data/songs.json"):
"""Load the list of songs already available on the server."""
"""Load the list of songs already available on the server with format information."""
songs_file = Path(songs_path)
if not songs_file.exists():
print(f"⚠️ Server songs file not found: {songs_path}")
return set()
return {}
try:
with open(songs_file, "r", encoding="utf-8") as f:
data = json.load(f)
server_songs = set()
server_songs = {}
for song in data:
if "artist" in song and "title" in song:
if "artist" in song and "title" in song and "path" in song:
artist = song["artist"].strip()
title = song["title"].strip()
path = song["path"].strip()
key = f"{artist.lower()}_{normalize_title(title)}"
server_songs.add(key)
server_songs[key] = {
"artist": artist,
"title": title,
"path": path,
"is_mp3": path.lower().endswith('.mp3'),
"is_cdg": 'cdg' in path.lower(),
"is_mp4": path.lower().endswith('.mp4')
}
print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)")
return server_songs
except (json.JSONDecodeError, FileNotFoundError) as e:
print(f"⚠️ Could not load server songs: {e}")
return set()
return {}
def is_song_on_server(server_songs, artist, title):
@ -37,6 +45,19 @@ def is_song_on_server(server_songs, artist, title):
return key in server_songs
def should_skip_server_song(server_songs, artist, title):
"""Check if a song should be skipped because it's already available as MP4 on server.
Returns True if the song should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
key = f"{artist.lower()}_{normalize_title(title)}"
if key not in server_songs:
return False # Not on server, so don't skip
song_info = server_songs[key]
# Skip if it's an MP4 file (video format)
# Don't skip if it's MP3 or in CDG folder (different format)
return song_info.get("is_mp4", False) and not song_info.get("is_cdg", False)
def load_server_duplicates_tracking(
tracking_path="data/server_duplicates_tracking.json",
):
@ -86,8 +107,9 @@ def mark_song_as_server_duplicate(tracking, artist, title, video_title, channel_
def check_and_mark_server_duplicate(
server_songs, server_duplicates_tracking, artist, title, video_title, channel_name
):
"""Check if a song is on server and mark it as duplicate if so. Returns True if it's a duplicate."""
if is_song_on_server(server_songs, artist, title):
"""Check if a song should be skipped because it's already available as MP4 on server and mark it as duplicate if so.
Returns True if it should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
if should_skip_server_song(server_songs, artist, title):
if not is_song_marked_as_server_duplicate(
server_duplicates_tracking, artist, title
):