Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>
This commit is contained in:
parent
712573d91a
commit
e6b2c9443c
File diff suppressed because it is too large
Load Diff
@ -29967,7 +29967,7 @@
|
|||||||
},
|
},
|
||||||
"settings": {
|
"settings": {
|
||||||
"cache_duration_hours": 168,
|
"cache_duration_hours": 168,
|
||||||
"last_updated": "2025-07-24T20:17:15.426193"
|
"last_updated": "2025-07-26T20:17:15.426193"
|
||||||
},
|
},
|
||||||
"@SingKingKaraoke": [
|
"@SingKingKaraoke": [
|
||||||
{
|
{
|
||||||
|
|||||||
@ -1,7 +1,4 @@
|
|||||||
https://www.youtube.com/@SingKingKaraoke/videos
|
https://www.youtube.com/@SingKingKaraoke/videos
|
||||||
https://www.youtube.com/@karafun/videos
|
|
||||||
https://www.youtube.com/@KaraokeOnVEVO/videos
|
https://www.youtube.com/@KaraokeOnVEVO/videos
|
||||||
https://www.youtube.com/@StingrayKaraoke/videos
|
https://www.youtube.com/@StingrayKaraoke/videos
|
||||||
https://www.youtube.com/@CCKaraoke/videos
|
|
||||||
https://www.youtube.com/@AtomicKaraoke/videos
|
|
||||||
https://www.youtube.com/@sing2karaoke/videos
|
https://www.youtube.com/@sing2karaoke/videos
|
||||||
File diff suppressed because it is too large
Load Diff
9488
data/songs.json
9488
data/songs.json
File diff suppressed because it is too large
Load Diff
@ -156,8 +156,8 @@ Examples:
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--fuzzy-threshold",
|
"--fuzzy-threshold",
|
||||||
type=int,
|
type=int,
|
||||||
default=90,
|
default=DEFAULT_FUZZY_THRESHOLD,
|
||||||
help="Fuzzy match threshold (0-100, default 90)",
|
help=f"Fuzzy match threshold (0-100, default {DEFAULT_FUZZY_THRESHOLD})",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--parallel",
|
"--parallel",
|
||||||
|
|||||||
@ -404,7 +404,8 @@ class KaraokeDownloader:
|
|||||||
|
|
||||||
# Further filter out songs already on server or marked as duplicates
|
# Further filter out songs already on server or marked as duplicates
|
||||||
not_on_server = []
|
not_on_server = []
|
||||||
server_available = 0
|
server_available_mp4 = 0
|
||||||
|
server_available_other = 0
|
||||||
marked_duplicates = 0
|
marked_duplicates = 0
|
||||||
|
|
||||||
for song in undownloaded:
|
for song in undownloaded:
|
||||||
@ -426,14 +427,26 @@ class KaraokeDownloader:
|
|||||||
f"{artist} - {title}",
|
f"{artist} - {title}",
|
||||||
"songlist",
|
"songlist",
|
||||||
):
|
):
|
||||||
server_available += 1
|
server_available_mp4 += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Check if song is on server but in different format (MP3/CDG)
|
||||||
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
|
if key in server_songs:
|
||||||
|
song_info = server_songs[key]
|
||||||
|
if song_info.get("is_mp3", False) or song_info.get("is_cdg", False):
|
||||||
|
server_available_other += 1
|
||||||
|
print(f"🎵 Found {artist} - {title} on server as {song_info.get('is_mp3', False) and 'MP3' or 'CDG'} format, will download video version")
|
||||||
|
|
||||||
not_on_server.append(song)
|
not_on_server.append(song)
|
||||||
|
|
||||||
if server_available > 0:
|
if server_available_mp4 > 0:
|
||||||
print(
|
print(
|
||||||
f"\n🎵 {server_available} songs already available on server, skipping."
|
f"\n🎵 {server_available_mp4} songs already available as MP4 on server, skipping."
|
||||||
|
)
|
||||||
|
if server_available_other > 0:
|
||||||
|
print(
|
||||||
|
f"\n🎵 {server_available_other} songs found on server as MP3/CDG, will download video versions."
|
||||||
)
|
)
|
||||||
if marked_duplicates > 0:
|
if marked_duplicates > 0:
|
||||||
print(
|
print(
|
||||||
@ -449,9 +462,8 @@ class KaraokeDownloader:
|
|||||||
# --- Download plan building (same for both normal and focus modes) ---
|
# --- Download plan building (same for both normal and focus modes) ---
|
||||||
# --- Download plan cache logic ---
|
# --- Download plan cache logic ---
|
||||||
plan_mode = "songlist"
|
plan_mode = "songlist"
|
||||||
# Include all parameters that affect the plan generation
|
# Include only parameters that affect the plan generation (exclude limit since it only affects execution)
|
||||||
plan_kwargs = {
|
plan_kwargs = {
|
||||||
"limit": limit or "all",
|
|
||||||
"channels": len(channel_urls),
|
"channels": len(channel_urls),
|
||||||
"fuzzy": fuzzy_match,
|
"fuzzy": fuzzy_match,
|
||||||
"threshold": fuzzy_threshold,
|
"threshold": fuzzy_threshold,
|
||||||
|
|||||||
@ -7,6 +7,30 @@ except ImportError:
|
|||||||
MUTAGEN_AVAILABLE = False
|
MUTAGEN_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
|
def clean_channel_name(channel_name: str) -> str:
|
||||||
|
"""
|
||||||
|
Clean channel name for ID3 tagging by removing @ symbol and ensuring it's alpha-only.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
channel_name: Raw channel name (may contain @ symbol)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cleaned channel name suitable for ID3 tags
|
||||||
|
"""
|
||||||
|
# Remove @ symbol if present
|
||||||
|
if channel_name.startswith('@'):
|
||||||
|
channel_name = channel_name[1:]
|
||||||
|
|
||||||
|
# Remove any non-alphanumeric characters and convert to single word
|
||||||
|
# Keep only letters, numbers, and spaces, then take the first word
|
||||||
|
cleaned = re.sub(r'[^a-zA-Z0-9\s]', '', channel_name)
|
||||||
|
words = cleaned.split()
|
||||||
|
if words:
|
||||||
|
return words[0] # Return only the first word
|
||||||
|
|
||||||
|
return "Unknown"
|
||||||
|
|
||||||
|
|
||||||
def extract_artist_title(video_title):
|
def extract_artist_title(video_title):
|
||||||
title = (
|
title = (
|
||||||
video_title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
|
video_title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
|
||||||
@ -26,12 +50,13 @@ def add_id3_tags(file_path, video_title, channel_name):
|
|||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
artist, title = extract_artist_title(video_title)
|
artist, title = extract_artist_title(video_title)
|
||||||
|
clean_channel = clean_channel_name(channel_name)
|
||||||
mp4 = MP4(str(file_path))
|
mp4 = MP4(str(file_path))
|
||||||
mp4["\xa9nam"] = title
|
mp4["\xa9nam"] = title
|
||||||
mp4["\xa9ART"] = artist
|
mp4["\xa9ART"] = artist
|
||||||
mp4["\xa9alb"] = f"{channel_name} Karaoke"
|
mp4["\xa9alb"] = clean_channel # Use clean channel name only, no suffix
|
||||||
mp4["\xa9gen"] = "Karaoke"
|
mp4["\xa9gen"] = "Karaoke"
|
||||||
mp4.save()
|
mp4.save()
|
||||||
print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}'")
|
print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}', Album='{clean_channel}'")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"⚠️ Could not add ID3 tags: {e}")
|
print(f"⚠️ Could not add ID3 tags: {e}")
|
||||||
|
|||||||
@ -9,26 +9,34 @@ from pathlib import Path
|
|||||||
|
|
||||||
|
|
||||||
def load_server_songs(songs_path="data/songs.json"):
|
def load_server_songs(songs_path="data/songs.json"):
|
||||||
"""Load the list of songs already available on the server."""
|
"""Load the list of songs already available on the server with format information."""
|
||||||
songs_file = Path(songs_path)
|
songs_file = Path(songs_path)
|
||||||
if not songs_file.exists():
|
if not songs_file.exists():
|
||||||
print(f"⚠️ Server songs file not found: {songs_path}")
|
print(f"⚠️ Server songs file not found: {songs_path}")
|
||||||
return set()
|
return {}
|
||||||
try:
|
try:
|
||||||
with open(songs_file, "r", encoding="utf-8") as f:
|
with open(songs_file, "r", encoding="utf-8") as f:
|
||||||
data = json.load(f)
|
data = json.load(f)
|
||||||
server_songs = set()
|
server_songs = {}
|
||||||
for song in data:
|
for song in data:
|
||||||
if "artist" in song and "title" in song:
|
if "artist" in song and "title" in song and "path" in song:
|
||||||
artist = song["artist"].strip()
|
artist = song["artist"].strip()
|
||||||
title = song["title"].strip()
|
title = song["title"].strip()
|
||||||
|
path = song["path"].strip()
|
||||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
server_songs.add(key)
|
server_songs[key] = {
|
||||||
|
"artist": artist,
|
||||||
|
"title": title,
|
||||||
|
"path": path,
|
||||||
|
"is_mp3": path.lower().endswith('.mp3'),
|
||||||
|
"is_cdg": 'cdg' in path.lower(),
|
||||||
|
"is_mp4": path.lower().endswith('.mp4')
|
||||||
|
}
|
||||||
print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)")
|
print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)")
|
||||||
return server_songs
|
return server_songs
|
||||||
except (json.JSONDecodeError, FileNotFoundError) as e:
|
except (json.JSONDecodeError, FileNotFoundError) as e:
|
||||||
print(f"⚠️ Could not load server songs: {e}")
|
print(f"⚠️ Could not load server songs: {e}")
|
||||||
return set()
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def is_song_on_server(server_songs, artist, title):
|
def is_song_on_server(server_songs, artist, title):
|
||||||
@ -37,6 +45,19 @@ def is_song_on_server(server_songs, artist, title):
|
|||||||
return key in server_songs
|
return key in server_songs
|
||||||
|
|
||||||
|
|
||||||
|
def should_skip_server_song(server_songs, artist, title):
|
||||||
|
"""Check if a song should be skipped because it's already available as MP4 on server.
|
||||||
|
Returns True if the song should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
|
||||||
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
|
if key not in server_songs:
|
||||||
|
return False # Not on server, so don't skip
|
||||||
|
|
||||||
|
song_info = server_songs[key]
|
||||||
|
# Skip if it's an MP4 file (video format)
|
||||||
|
# Don't skip if it's MP3 or in CDG folder (different format)
|
||||||
|
return song_info.get("is_mp4", False) and not song_info.get("is_cdg", False)
|
||||||
|
|
||||||
|
|
||||||
def load_server_duplicates_tracking(
|
def load_server_duplicates_tracking(
|
||||||
tracking_path="data/server_duplicates_tracking.json",
|
tracking_path="data/server_duplicates_tracking.json",
|
||||||
):
|
):
|
||||||
@ -86,8 +107,9 @@ def mark_song_as_server_duplicate(tracking, artist, title, video_title, channel_
|
|||||||
def check_and_mark_server_duplicate(
|
def check_and_mark_server_duplicate(
|
||||||
server_songs, server_duplicates_tracking, artist, title, video_title, channel_name
|
server_songs, server_duplicates_tracking, artist, title, video_title, channel_name
|
||||||
):
|
):
|
||||||
"""Check if a song is on server and mark it as duplicate if so. Returns True if it's a duplicate."""
|
"""Check if a song should be skipped because it's already available as MP4 on server and mark it as duplicate if so.
|
||||||
if is_song_on_server(server_songs, artist, title):
|
Returns True if it should be skipped (MP4 format), False if it should be downloaded (MP3/CDG format)."""
|
||||||
|
if should_skip_server_song(server_songs, artist, title):
|
||||||
if not is_song_marked_as_server_duplicate(
|
if not is_song_marked_as_server_duplicate(
|
||||||
server_duplicates_tracking, artist, title
|
server_duplicates_tracking, artist, title
|
||||||
):
|
):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user