Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>
This commit is contained in:
parent
712573d91a
commit
e6b2c9443c
File diff suppressed because it is too large
Load Diff
@ -29967,7 +29967,7 @@
|
||||
},
|
||||
"settings": {
|
||||
"cache_duration_hours": 168,
|
||||
"last_updated": "2025-07-24T20:17:15.426193"
|
||||
"last_updated": "2025-07-26T20:17:15.426193"
|
||||
},
|
||||
"@SingKingKaraoke": [
|
||||
{
|
||||
|
||||
@ -1,7 +1,4 @@
|
||||
https://www.youtube.com/@SingKingKaraoke/videos
|
||||
https://www.youtube.com/@karafun/videos
|
||||
https://www.youtube.com/@KaraokeOnVEVO/videos
|
||||
https://www.youtube.com/@StingrayKaraoke/videos
|
||||
https://www.youtube.com/@CCKaraoke/videos
|
||||
https://www.youtube.com/@AtomicKaraoke/videos
|
||||
https://www.youtube.com/@sing2karaoke/videos
|
||||
File diff suppressed because it is too large
Load Diff
9488
data/songs.json
9488
data/songs.json
File diff suppressed because it is too large
Load Diff
@ -156,8 +156,8 @@ Examples:
|
||||
parser.add_argument(
|
||||
"--fuzzy-threshold",
|
||||
type=int,
|
||||
default=90,
|
||||
help="Fuzzy match threshold (0-100, default 90)",
|
||||
default=DEFAULT_FUZZY_THRESHOLD,
|
||||
help=f"Fuzzy match threshold (0-100, default {DEFAULT_FUZZY_THRESHOLD})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--parallel",
|
||||
|
||||
@ -404,7 +404,8 @@ class KaraokeDownloader:
|
||||
|
||||
# Further filter out songs already on server or marked as duplicates
|
||||
not_on_server = []
|
||||
server_available = 0
|
||||
server_available_mp4 = 0
|
||||
server_available_other = 0
|
||||
marked_duplicates = 0
|
||||
|
||||
for song in undownloaded:
|
||||
@ -426,14 +427,26 @@ class KaraokeDownloader:
|
||||
f"{artist} - {title}",
|
||||
"songlist",
|
||||
):
|
||||
server_available += 1
|
||||
server_available_mp4 += 1
|
||||
continue
|
||||
|
||||
# Check if song is on server but in different format (MP3/CDG)
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
if key in server_songs:
|
||||
song_info = server_songs[key]
|
||||
if song_info.get("is_mp3", False) or song_info.get("is_cdg", False):
|
||||
server_available_other += 1
|
||||
print(f"🎵 Found {artist} - {title} on server as {song_info.get('is_mp3', False) and 'MP3' or 'CDG'} format, will download video version")
|
||||
|
||||
not_on_server.append(song)
|
||||
|
||||
if server_available > 0:
|
||||
if server_available_mp4 > 0:
|
||||
print(
|
||||
f"\n🎵 {server_available} songs already available on server, skipping."
|
||||
f"\n🎵 {server_available_mp4} songs already available as MP4 on server, skipping."
|
||||
)
|
||||
if server_available_other > 0:
|
||||
print(
|
||||
f"\n🎵 {server_available_other} songs found on server as MP3/CDG, will download video versions."
|
||||
)
|
||||
if marked_duplicates > 0:
|
||||
print(
|
||||
@ -449,9 +462,8 @@ class KaraokeDownloader:
|
||||
# --- Download plan building (same for both normal and focus modes) ---
|
||||
# --- Download plan cache logic ---
|
||||
plan_mode = "songlist"
|
||||
# Include all parameters that affect the plan generation
|
||||
# Include only parameters that affect the plan generation (exclude limit since it only affects execution)
|
||||
plan_kwargs = {
|
||||
"limit": limit or "all",
|
||||
"channels": len(channel_urls),
|
||||
"fuzzy": fuzzy_match,
|
||||
"threshold": fuzzy_threshold,
|
||||
|
||||
@ -7,6 +7,30 @@ except ImportError:
|
||||
MUTAGEN_AVAILABLE = False
|
||||
|
||||
|
||||
def clean_channel_name(channel_name: str) -> str:
|
||||
"""
|
||||
Clean channel name for ID3 tagging by removing @ symbol and ensuring it's alpha-only.
|
||||
|
||||
Args:
|
||||
channel_name: Raw channel name (may contain @ symbol)
|
||||
|
||||
Returns:
|
||||
Cleaned channel name suitable for ID3 tags
|
||||
"""
|
||||
# Remove @ symbol if present
|
||||
if channel_name.startswith('@'):
|
||||
channel_name = channel_name[1:]
|
||||
|
||||
# Remove any non-alphanumeric characters and convert to single word
|
||||
# Keep only letters, numbers, and spaces, then take the first word
|
||||
cleaned = re.sub(r'[^a-zA-Z0-9\s]', '', channel_name)
|
||||
words = cleaned.split()
|
||||
if words:
|
||||
return words[0] # Return only the first word
|
||||
|
||||
return "Unknown"
|
||||
|
||||
|
||||
def extract_artist_title(video_title):
|
||||
title = (
|
||||
video_title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
|
||||
@ -26,12 +50,13 @@ def add_id3_tags(file_path, video_title, channel_name):
|
||||
return
|
||||
try:
|
||||
artist, title = extract_artist_title(video_title)
|
||||
clean_channel = clean_channel_name(channel_name)
|
||||
mp4 = MP4(str(file_path))
|
||||
mp4["\xa9nam"] = title
|
||||
mp4["\xa9ART"] = artist
|
||||
mp4["\xa9alb"] = f"{channel_name} Karaoke"
|
||||
mp4["\xa9alb"] = clean_channel # Use clean channel name only, no suffix
|
||||
mp4["\xa9gen"] = "Karaoke"
|
||||
mp4.save()
|
||||
print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}'")
|
||||
print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}', Album='{clean_channel}'")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not add ID3 tags: {e}")
|
||||
|
||||
@ -9,26 +9,34 @@ from pathlib import Path
|
||||
|
||||
|
||||
def load_server_songs(songs_path="data/songs.json"):
|
||||
"""Load the list of songs already available on the server."""
|
||||
"""Load the list of songs already available on the server with format information."""
|
||||
songs_file = Path(songs_path)
|
||||
if not songs_file.exists():
|
||||
print(f"⚠️ Server songs file not found: {songs_path}")
|
||||
return set()
|
||||
return {}
|
||||
try:
|
||||
with open(songs_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
server_songs = set()
|
||||
server_songs = {}
|
||||
for song in data:
|
||||
if "artist" in song and "title" in song:
|
||||
if "artist" in song and "title" in song and "path" in song:
|
||||
artist = song["artist"].strip()
|
||||
title = song["title"].strip()
|
||||
path = song["path"].strip()
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
server_songs.add(key)
|
||||
server_songs[key] = {
|
||||
"artist": artist,
|
||||
"title": title,
|
||||
"path": path,
|
||||
"is_mp3": path.lower().endswith('.mp3'),
|
||||
"is_cdg": 'cdg' in path.lower(),
|
||||
"is_mp4": path.lower().endswith('.mp4')
|
||||
}
|
||||
print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)")
|
||||
return server_songs
|
||||
except (json.JSONDecodeError, FileNotFoundError) as e:
|
||||
print(f"⚠️ Could not load server songs: {e}")
|
||||
return set()
|
||||
return {}
|
||||
|
||||
|
||||
def is_song_on_server(server_songs, artist, title):
|
||||
@ -37,6 +45,19 @@ def is_song_on_server(server_songs, artist, title):
|
||||
return key in server_songs
|
||||
|
||||
|
||||
def should_skip_server_song(server_songs, artist, title):
|
||||
"""Check if a song should be skipped because it's already available as MP4 on server.
|
||||
Returns True if the song should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
if key not in server_songs:
|
||||
return False # Not on server, so don't skip
|
||||
|
||||
song_info = server_songs[key]
|
||||
# Skip if it's an MP4 file (video format)
|
||||
# Don't skip if it's MP3 or in CDG folder (different format)
|
||||
return song_info.get("is_mp4", False) and not song_info.get("is_cdg", False)
|
||||
|
||||
|
||||
def load_server_duplicates_tracking(
|
||||
tracking_path="data/server_duplicates_tracking.json",
|
||||
):
|
||||
@ -86,8 +107,9 @@ def mark_song_as_server_duplicate(tracking, artist, title, video_title, channel_
|
||||
def check_and_mark_server_duplicate(
|
||||
server_songs, server_duplicates_tracking, artist, title, video_title, channel_name
|
||||
):
|
||||
"""Check if a song is on server and mark it as duplicate if so. Returns True if it's a duplicate."""
|
||||
if is_song_on_server(server_songs, artist, title):
|
||||
"""Check if a song should be skipped because it's already available as MP4 on server and mark it as duplicate if so.
|
||||
Returns True if it should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
|
||||
if should_skip_server_song(server_songs, artist, title):
|
||||
if not is_song_marked_as_server_duplicate(
|
||||
server_duplicates_tracking, artist, title
|
||||
):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user