Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>
This commit is contained in:
parent
eb56c6c581
commit
e238def11c
4022
data/bak_songList.json
Normal file
4022
data/bak_songList.json
Normal file
File diff suppressed because it is too large
Load Diff
19240
data/channel_cache.json
19240
data/channel_cache.json
File diff suppressed because it is too large
Load Diff
@ -1,2 +1,5 @@
|
||||
https://www.youtube.com/@SingKingKaraoke/videos
|
||||
https://www.youtube.com/@karafun/videos
|
||||
https://www.youtube.com/@karafun/videos
|
||||
https://www.youtube.com/@StingrayKaraoke/videos
|
||||
https://www.youtube.com/@CCKaraoke/videos
|
||||
https://www.youtube.com/@AtomicKaraoke/videos
|
||||
52334
data/songList.json
52334
data/songList.json
File diff suppressed because it is too large
Load Diff
@ -31,6 +31,28 @@ def load_songlist_tracking(tracking_path="data/songlist_tracking.json"):
|
||||
with open(tracking_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
def load_server_songs(songs_path="data/songs.json"):
|
||||
"""Load the list of songs already available on the server."""
|
||||
songs_file = Path(songs_path)
|
||||
if not songs_file.exists():
|
||||
print(f"⚠️ Server songs file not found: {songs_path}")
|
||||
return set()
|
||||
with open(songs_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
server_songs = set()
|
||||
for song in data:
|
||||
if "artist" in song and "title" in song:
|
||||
artist = song["artist"].strip()
|
||||
title = song["title"].strip()
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
server_songs.add(key)
|
||||
return server_songs
|
||||
|
||||
def is_song_on_server(server_songs, artist, title):
|
||||
"""Check if a song is already available on the server."""
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
return key in server_songs
|
||||
|
||||
def main():
|
||||
all_songs = load_songlist()
|
||||
print(f"Total songs in songList.json (raw): {len(all_songs)}")
|
||||
@ -50,9 +72,23 @@ def main():
|
||||
downloaded_keys = set(tracking.keys())
|
||||
print(f"Downloaded songs: {len(downloaded_keys)}")
|
||||
|
||||
# Songs left to download
|
||||
left_to_download = [song for key, song in unique_songs.items() if key not in downloaded_keys]
|
||||
print(f"Songs left to download: {len(left_to_download)}")
|
||||
# Server songs
|
||||
server_songs = load_server_songs()
|
||||
print(f"Songs available on server: {len(server_songs)}")
|
||||
|
||||
# Songs left to download (not downloaded AND not on server)
|
||||
left_to_download = []
|
||||
for key, song in unique_songs.items():
|
||||
if key not in downloaded_keys and not is_song_on_server(server_songs, song['artist'], song['title']):
|
||||
left_to_download.append(song)
|
||||
print(f"Songs left to download (not downloaded and not on server): {len(left_to_download)}")
|
||||
|
||||
# Songs already available (downloaded OR on server)
|
||||
already_available = []
|
||||
for key, song in unique_songs.items():
|
||||
if key in downloaded_keys or is_song_on_server(server_songs, song['artist'], song['title']):
|
||||
already_available.append(song)
|
||||
print(f"Songs already available (downloaded or on server): {len(already_available)}")
|
||||
|
||||
print("\nSample unique songs:")
|
||||
for song in list(unique_songs.values())[:10]:
|
||||
@ -67,5 +103,10 @@ def main():
|
||||
for song in left_to_download[:10]:
|
||||
print(f" - {song['artist']} - {song['title']}")
|
||||
|
||||
print("\nSample songs already available:")
|
||||
for song in already_available[:10]:
|
||||
status = "downloaded" if f"{song['artist'].lower()}_{normalize_title(song['title'])}" in downloaded_keys else "on server"
|
||||
print(f" - {song['artist']} - {song['title']} ({status})")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
294785
data/songs.json
Normal file
294785
data/songs.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -9,7 +9,8 @@ from karaoke_downloader.tracking_manager import TrackingManager, SongStatus, For
|
||||
from karaoke_downloader.id3_utils import add_id3_tags, extract_artist_title
|
||||
from karaoke_downloader.songlist_manager import (
|
||||
load_songlist, load_songlist_tracking, save_songlist_tracking,
|
||||
is_songlist_song_downloaded, mark_songlist_song_downloaded, normalize_title
|
||||
is_songlist_song_downloaded, mark_songlist_song_downloaded, normalize_title,
|
||||
load_server_songs, is_song_on_server
|
||||
)
|
||||
from karaoke_downloader.youtube_utils import get_channel_info, get_playlist_info
|
||||
import logging
|
||||
@ -27,6 +28,8 @@ class KaraokeDownloader:
|
||||
self.config = self._load_config()
|
||||
self.songlist_tracking_file = DATA_DIR / "songlist_tracking.json"
|
||||
self.songlist_tracking = load_songlist_tracking(str(self.songlist_tracking_file))
|
||||
# Load server songs for availability checking
|
||||
self.server_songs = load_server_songs()
|
||||
|
||||
def _load_config(self):
|
||||
config_file = DATA_DIR / "config.json"
|
||||
@ -202,9 +205,13 @@ class KaraokeDownloader:
|
||||
artist, title = extract_artist_title(video['title'])
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
if key in normalized_songlist:
|
||||
# Check if already downloaded
|
||||
if not is_songlist_song_downloaded(self.songlist_tracking, artist, title):
|
||||
matches.append((video, normalized_songlist[key]))
|
||||
# Check if already downloaded or on server
|
||||
if is_songlist_song_downloaded(self.songlist_tracking, artist, title):
|
||||
continue
|
||||
if is_song_on_server(self.server_songs, artist, title):
|
||||
print(f"🎵 Song already on server: {artist} - {title}")
|
||||
continue
|
||||
matches.append((video, normalized_songlist[key]))
|
||||
if len(matches) >= limit:
|
||||
break
|
||||
if not matches:
|
||||
@ -255,6 +262,15 @@ class KaraokeDownloader:
|
||||
undownloaded = [s for s in songlist if not is_songlist_song_downloaded(self.songlist_tracking, s['artist'], s['title'])]
|
||||
print(f"🎯 {len(songlist)} total unique songs in songlist.")
|
||||
print(f"🎯 {len(undownloaded)} unique songlist songs to download.")
|
||||
|
||||
# Further filter out songs already on server
|
||||
not_on_server = [s for s in undownloaded if not is_song_on_server(self.server_songs, s['artist'], s['title'])]
|
||||
server_available = len(undownloaded) - len(not_on_server)
|
||||
if server_available > 0:
|
||||
print(f"🎵 {server_available} songs already available on server, skipping.")
|
||||
|
||||
undownloaded = not_on_server
|
||||
print(f"🎯 {len(undownloaded)} songs need to be downloaded.")
|
||||
if not undownloaded:
|
||||
print("🎵 All songlist songs already downloaded.")
|
||||
return True
|
||||
@ -264,6 +280,7 @@ class KaraokeDownloader:
|
||||
downloaded_count = 0
|
||||
attempted = set()
|
||||
total_to_download = limit if limit is not None else len(undownloaded)
|
||||
print(f"\n🎬 Processing {len(channel_urls)} channels for song matches...")
|
||||
# Keep looping until limit is reached or no more undownloaded songs
|
||||
while undownloaded and (limit is None or downloaded_count < limit):
|
||||
for song in list(undownloaded):
|
||||
@ -273,10 +290,9 @@ class KaraokeDownloader:
|
||||
if (artist, title) in attempted:
|
||||
continue
|
||||
found = False
|
||||
print(f"\n🔍 Searching for: {artist} - {title}")
|
||||
for channel_url in channel_urls:
|
||||
channel_name, channel_id = get_channel_info(channel_url)
|
||||
print(f"\n🎬 Checking channel: {channel_name} ({channel_url})")
|
||||
print(f" Searching for: {artist} - {title}")
|
||||
available_videos = self.tracker.get_channel_video_list(
|
||||
channel_url,
|
||||
yt_dlp_path=str(self.yt_dlp_path),
|
||||
@ -287,7 +303,8 @@ class KaraokeDownloader:
|
||||
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
|
||||
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
|
||||
# Progress print statement
|
||||
print(f"\U0001F4E5 Downloading {downloaded_count + 1} of {total_to_download} songlist songs...")
|
||||
print(f"📥 Downloading {downloaded_count + 1} of {total_to_download} songlist songs...")
|
||||
print(f"🎯 Found on channel: {channel_name}")
|
||||
# Download this song from this channel
|
||||
# Create a shorter, safer filename
|
||||
safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "")
|
||||
@ -353,6 +370,7 @@ class KaraokeDownloader:
|
||||
for song in undownloaded:
|
||||
print(f" - {song['artist']} - {song['title']}")
|
||||
print(f"🎉 Downloaded {downloaded_count} songlist songs.")
|
||||
print(f"📊 Summary: Processed {len(channel_urls)} channels, found {downloaded_count} songs, {len(undownloaded)} songs not found.")
|
||||
return True
|
||||
|
||||
def _is_valid_mp4(self, file_path):
|
||||
|
||||
@ -68,4 +68,31 @@ def mark_songlist_song_downloaded(tracking, artist, title, channel_name, file_pa
|
||||
"file_path": str(file_path),
|
||||
"downloaded_at": datetime.now().isoformat()
|
||||
}
|
||||
save_songlist_tracking(tracking)
|
||||
save_songlist_tracking(tracking)
|
||||
|
||||
def load_server_songs(songs_path="data/songs.json"):
|
||||
"""Load the list of songs already available on the server."""
|
||||
songs_file = Path(songs_path)
|
||||
if not songs_file.exists():
|
||||
print(f"⚠️ Server songs file not found: {songs_path}")
|
||||
return set()
|
||||
try:
|
||||
with open(songs_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
server_songs = set()
|
||||
for song in data:
|
||||
if "artist" in song and "title" in song:
|
||||
artist = song["artist"].strip()
|
||||
title = song["title"].strip()
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
server_songs.add(key)
|
||||
print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)")
|
||||
return server_songs
|
||||
except (json.JSONDecodeError, FileNotFoundError) as e:
|
||||
print(f"⚠️ Could not load server songs: {e}")
|
||||
return set()
|
||||
|
||||
def is_song_on_server(server_songs, artist, title):
|
||||
"""Check if a song is already available on the server."""
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
return key in server_songs
|
||||
Loading…
Reference in New Issue
Block a user