Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

This commit is contained in:
mbrucedogs 2025-07-24 14:48:19 -05:00
parent eb56c6c581
commit e238def11c
8 changed files with 366719 additions and 3775 deletions

4022
data/bak_songList.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,2 +1,5 @@
https://www.youtube.com/@SingKingKaraoke/videos
https://www.youtube.com/@karafun/videos
https://www.youtube.com/@StingrayKaraoke/videos
https://www.youtube.com/@CCKaraoke/videos
https://www.youtube.com/@AtomicKaraoke/videos

File diff suppressed because it is too large Load Diff

View File

@ -31,6 +31,28 @@ def load_songlist_tracking(tracking_path="data/songlist_tracking.json"):
with open(tracking_file, 'r', encoding='utf-8') as f:
return json.load(f)
def load_server_songs(songs_path="data/songs.json"):
"""Load the list of songs already available on the server."""
songs_file = Path(songs_path)
if not songs_file.exists():
print(f"⚠️ Server songs file not found: {songs_path}")
return set()
with open(songs_file, 'r', encoding='utf-8') as f:
data = json.load(f)
server_songs = set()
for song in data:
if "artist" in song and "title" in song:
artist = song["artist"].strip()
title = song["title"].strip()
key = f"{artist.lower()}_{normalize_title(title)}"
server_songs.add(key)
return server_songs
def is_song_on_server(server_songs, artist, title):
"""Check if a song is already available on the server."""
key = f"{artist.lower()}_{normalize_title(title)}"
return key in server_songs
def main():
all_songs = load_songlist()
print(f"Total songs in songList.json (raw): {len(all_songs)}")
@ -50,9 +72,23 @@ def main():
downloaded_keys = set(tracking.keys())
print(f"Downloaded songs: {len(downloaded_keys)}")
# Songs left to download
left_to_download = [song for key, song in unique_songs.items() if key not in downloaded_keys]
print(f"Songs left to download: {len(left_to_download)}")
# Server songs
server_songs = load_server_songs()
print(f"Songs available on server: {len(server_songs)}")
# Songs left to download (not downloaded AND not on server)
left_to_download = []
for key, song in unique_songs.items():
if key not in downloaded_keys and not is_song_on_server(server_songs, song['artist'], song['title']):
left_to_download.append(song)
print(f"Songs left to download (not downloaded and not on server): {len(left_to_download)}")
# Songs already available (downloaded OR on server)
already_available = []
for key, song in unique_songs.items():
if key in downloaded_keys or is_song_on_server(server_songs, song['artist'], song['title']):
already_available.append(song)
print(f"Songs already available (downloaded or on server): {len(already_available)}")
print("\nSample unique songs:")
for song in list(unique_songs.values())[:10]:
@ -67,5 +103,10 @@ def main():
for song in left_to_download[:10]:
print(f" - {song['artist']} - {song['title']}")
print("\nSample songs already available:")
for song in already_available[:10]:
status = "downloaded" if f"{song['artist'].lower()}_{normalize_title(song['title'])}" in downloaded_keys else "on server"
print(f" - {song['artist']} - {song['title']} ({status})")
if __name__ == "__main__":
main()

294785
data/songs.json Normal file

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,8 @@ from karaoke_downloader.tracking_manager import TrackingManager, SongStatus, For
from karaoke_downloader.id3_utils import add_id3_tags, extract_artist_title
from karaoke_downloader.songlist_manager import (
load_songlist, load_songlist_tracking, save_songlist_tracking,
is_songlist_song_downloaded, mark_songlist_song_downloaded, normalize_title
is_songlist_song_downloaded, mark_songlist_song_downloaded, normalize_title,
load_server_songs, is_song_on_server
)
from karaoke_downloader.youtube_utils import get_channel_info, get_playlist_info
import logging
@ -27,6 +28,8 @@ class KaraokeDownloader:
self.config = self._load_config()
self.songlist_tracking_file = DATA_DIR / "songlist_tracking.json"
self.songlist_tracking = load_songlist_tracking(str(self.songlist_tracking_file))
# Load server songs for availability checking
self.server_songs = load_server_songs()
def _load_config(self):
config_file = DATA_DIR / "config.json"
@ -202,9 +205,13 @@ class KaraokeDownloader:
artist, title = extract_artist_title(video['title'])
key = f"{artist.lower()}_{normalize_title(title)}"
if key in normalized_songlist:
# Check if already downloaded
if not is_songlist_song_downloaded(self.songlist_tracking, artist, title):
matches.append((video, normalized_songlist[key]))
# Check if already downloaded or on server
if is_songlist_song_downloaded(self.songlist_tracking, artist, title):
continue
if is_song_on_server(self.server_songs, artist, title):
print(f"🎵 Song already on server: {artist} - {title}")
continue
matches.append((video, normalized_songlist[key]))
if len(matches) >= limit:
break
if not matches:
@ -255,6 +262,15 @@ class KaraokeDownloader:
undownloaded = [s for s in songlist if not is_songlist_song_downloaded(self.songlist_tracking, s['artist'], s['title'])]
print(f"🎯 {len(songlist)} total unique songs in songlist.")
print(f"🎯 {len(undownloaded)} unique songlist songs to download.")
# Further filter out songs already on server
not_on_server = [s for s in undownloaded if not is_song_on_server(self.server_songs, s['artist'], s['title'])]
server_available = len(undownloaded) - len(not_on_server)
if server_available > 0:
print(f"🎵 {server_available} songs already available on server, skipping.")
undownloaded = not_on_server
print(f"🎯 {len(undownloaded)} songs need to be downloaded.")
if not undownloaded:
print("🎵 All songlist songs already downloaded.")
return True
@ -264,6 +280,7 @@ class KaraokeDownloader:
downloaded_count = 0
attempted = set()
total_to_download = limit if limit is not None else len(undownloaded)
print(f"\n🎬 Processing {len(channel_urls)} channels for song matches...")
# Keep looping until limit is reached or no more undownloaded songs
while undownloaded and (limit is None or downloaded_count < limit):
for song in list(undownloaded):
@ -273,10 +290,9 @@ class KaraokeDownloader:
if (artist, title) in attempted:
continue
found = False
print(f"\n🔍 Searching for: {artist} - {title}")
for channel_url in channel_urls:
channel_name, channel_id = get_channel_info(channel_url)
print(f"\n🎬 Checking channel: {channel_name} ({channel_url})")
print(f" Searching for: {artist} - {title}")
available_videos = self.tracker.get_channel_video_list(
channel_url,
yt_dlp_path=str(self.yt_dlp_path),
@ -287,7 +303,8 @@ class KaraokeDownloader:
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
# Progress print statement
print(f"\U0001F4E5 Downloading {downloaded_count + 1} of {total_to_download} songlist songs...")
print(f"📥 Downloading {downloaded_count + 1} of {total_to_download} songlist songs...")
print(f"🎯 Found on channel: {channel_name}")
# Download this song from this channel
# Create a shorter, safer filename
safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "")
@ -353,6 +370,7 @@ class KaraokeDownloader:
for song in undownloaded:
print(f" - {song['artist']} - {song['title']}")
print(f"🎉 Downloaded {downloaded_count} songlist songs.")
print(f"📊 Summary: Processed {len(channel_urls)} channels, found {downloaded_count} songs, {len(undownloaded)} songs not found.")
return True
def _is_valid_mp4(self, file_path):

View File

@ -69,3 +69,30 @@ def mark_songlist_song_downloaded(tracking, artist, title, channel_name, file_pa
"downloaded_at": datetime.now().isoformat()
}
save_songlist_tracking(tracking)
def load_server_songs(songs_path="data/songs.json"):
"""Load the list of songs already available on the server."""
songs_file = Path(songs_path)
if not songs_file.exists():
print(f"⚠️ Server songs file not found: {songs_path}")
return set()
try:
with open(songs_file, 'r', encoding='utf-8') as f:
data = json.load(f)
server_songs = set()
for song in data:
if "artist" in song and "title" in song:
artist = song["artist"].strip()
title = song["title"].strip()
key = f"{artist.lower()}_{normalize_title(title)}"
server_songs.add(key)
print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)")
return server_songs
except (json.JSONDecodeError, FileNotFoundError) as e:
print(f"⚠️ Could not load server songs: {e}")
return set()
def is_song_on_server(server_songs, artist, title):
"""Check if a song is already available on the server."""
key = f"{artist.lower()}_{normalize_title(title)}"
return key in server_songs