Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>
This commit is contained in:
parent
eb56c6c581
commit
e238def11c
4022
data/bak_songList.json
Normal file
4022
data/bak_songList.json
Normal file
File diff suppressed because it is too large
Load Diff
19240
data/channel_cache.json
19240
data/channel_cache.json
File diff suppressed because it is too large
Load Diff
@ -1,2 +1,5 @@
|
|||||||
https://www.youtube.com/@SingKingKaraoke/videos
|
https://www.youtube.com/@SingKingKaraoke/videos
|
||||||
https://www.youtube.com/@karafun/videos
|
https://www.youtube.com/@karafun/videos
|
||||||
|
https://www.youtube.com/@StingrayKaraoke/videos
|
||||||
|
https://www.youtube.com/@CCKaraoke/videos
|
||||||
|
https://www.youtube.com/@AtomicKaraoke/videos
|
||||||
52334
data/songList.json
52334
data/songList.json
File diff suppressed because it is too large
Load Diff
@ -31,6 +31,28 @@ def load_songlist_tracking(tracking_path="data/songlist_tracking.json"):
|
|||||||
with open(tracking_file, 'r', encoding='utf-8') as f:
|
with open(tracking_file, 'r', encoding='utf-8') as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
|
def load_server_songs(songs_path="data/songs.json"):
|
||||||
|
"""Load the list of songs already available on the server."""
|
||||||
|
songs_file = Path(songs_path)
|
||||||
|
if not songs_file.exists():
|
||||||
|
print(f"⚠️ Server songs file not found: {songs_path}")
|
||||||
|
return set()
|
||||||
|
with open(songs_file, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
server_songs = set()
|
||||||
|
for song in data:
|
||||||
|
if "artist" in song and "title" in song:
|
||||||
|
artist = song["artist"].strip()
|
||||||
|
title = song["title"].strip()
|
||||||
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
|
server_songs.add(key)
|
||||||
|
return server_songs
|
||||||
|
|
||||||
|
def is_song_on_server(server_songs, artist, title):
|
||||||
|
"""Check if a song is already available on the server."""
|
||||||
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
|
return key in server_songs
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
all_songs = load_songlist()
|
all_songs = load_songlist()
|
||||||
print(f"Total songs in songList.json (raw): {len(all_songs)}")
|
print(f"Total songs in songList.json (raw): {len(all_songs)}")
|
||||||
@ -50,9 +72,23 @@ def main():
|
|||||||
downloaded_keys = set(tracking.keys())
|
downloaded_keys = set(tracking.keys())
|
||||||
print(f"Downloaded songs: {len(downloaded_keys)}")
|
print(f"Downloaded songs: {len(downloaded_keys)}")
|
||||||
|
|
||||||
# Songs left to download
|
# Server songs
|
||||||
left_to_download = [song for key, song in unique_songs.items() if key not in downloaded_keys]
|
server_songs = load_server_songs()
|
||||||
print(f"Songs left to download: {len(left_to_download)}")
|
print(f"Songs available on server: {len(server_songs)}")
|
||||||
|
|
||||||
|
# Songs left to download (not downloaded AND not on server)
|
||||||
|
left_to_download = []
|
||||||
|
for key, song in unique_songs.items():
|
||||||
|
if key not in downloaded_keys and not is_song_on_server(server_songs, song['artist'], song['title']):
|
||||||
|
left_to_download.append(song)
|
||||||
|
print(f"Songs left to download (not downloaded and not on server): {len(left_to_download)}")
|
||||||
|
|
||||||
|
# Songs already available (downloaded OR on server)
|
||||||
|
already_available = []
|
||||||
|
for key, song in unique_songs.items():
|
||||||
|
if key in downloaded_keys or is_song_on_server(server_songs, song['artist'], song['title']):
|
||||||
|
already_available.append(song)
|
||||||
|
print(f"Songs already available (downloaded or on server): {len(already_available)}")
|
||||||
|
|
||||||
print("\nSample unique songs:")
|
print("\nSample unique songs:")
|
||||||
for song in list(unique_songs.values())[:10]:
|
for song in list(unique_songs.values())[:10]:
|
||||||
@ -67,5 +103,10 @@ def main():
|
|||||||
for song in left_to_download[:10]:
|
for song in left_to_download[:10]:
|
||||||
print(f" - {song['artist']} - {song['title']}")
|
print(f" - {song['artist']} - {song['title']}")
|
||||||
|
|
||||||
|
print("\nSample songs already available:")
|
||||||
|
for song in already_available[:10]:
|
||||||
|
status = "downloaded" if f"{song['artist'].lower()}_{normalize_title(song['title'])}" in downloaded_keys else "on server"
|
||||||
|
print(f" - {song['artist']} - {song['title']} ({status})")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
294785
data/songs.json
Normal file
294785
data/songs.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -9,7 +9,8 @@ from karaoke_downloader.tracking_manager import TrackingManager, SongStatus, For
|
|||||||
from karaoke_downloader.id3_utils import add_id3_tags, extract_artist_title
|
from karaoke_downloader.id3_utils import add_id3_tags, extract_artist_title
|
||||||
from karaoke_downloader.songlist_manager import (
|
from karaoke_downloader.songlist_manager import (
|
||||||
load_songlist, load_songlist_tracking, save_songlist_tracking,
|
load_songlist, load_songlist_tracking, save_songlist_tracking,
|
||||||
is_songlist_song_downloaded, mark_songlist_song_downloaded, normalize_title
|
is_songlist_song_downloaded, mark_songlist_song_downloaded, normalize_title,
|
||||||
|
load_server_songs, is_song_on_server
|
||||||
)
|
)
|
||||||
from karaoke_downloader.youtube_utils import get_channel_info, get_playlist_info
|
from karaoke_downloader.youtube_utils import get_channel_info, get_playlist_info
|
||||||
import logging
|
import logging
|
||||||
@ -27,6 +28,8 @@ class KaraokeDownloader:
|
|||||||
self.config = self._load_config()
|
self.config = self._load_config()
|
||||||
self.songlist_tracking_file = DATA_DIR / "songlist_tracking.json"
|
self.songlist_tracking_file = DATA_DIR / "songlist_tracking.json"
|
||||||
self.songlist_tracking = load_songlist_tracking(str(self.songlist_tracking_file))
|
self.songlist_tracking = load_songlist_tracking(str(self.songlist_tracking_file))
|
||||||
|
# Load server songs for availability checking
|
||||||
|
self.server_songs = load_server_songs()
|
||||||
|
|
||||||
def _load_config(self):
|
def _load_config(self):
|
||||||
config_file = DATA_DIR / "config.json"
|
config_file = DATA_DIR / "config.json"
|
||||||
@ -202,9 +205,13 @@ class KaraokeDownloader:
|
|||||||
artist, title = extract_artist_title(video['title'])
|
artist, title = extract_artist_title(video['title'])
|
||||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
if key in normalized_songlist:
|
if key in normalized_songlist:
|
||||||
# Check if already downloaded
|
# Check if already downloaded or on server
|
||||||
if not is_songlist_song_downloaded(self.songlist_tracking, artist, title):
|
if is_songlist_song_downloaded(self.songlist_tracking, artist, title):
|
||||||
matches.append((video, normalized_songlist[key]))
|
continue
|
||||||
|
if is_song_on_server(self.server_songs, artist, title):
|
||||||
|
print(f"🎵 Song already on server: {artist} - {title}")
|
||||||
|
continue
|
||||||
|
matches.append((video, normalized_songlist[key]))
|
||||||
if len(matches) >= limit:
|
if len(matches) >= limit:
|
||||||
break
|
break
|
||||||
if not matches:
|
if not matches:
|
||||||
@ -255,6 +262,15 @@ class KaraokeDownloader:
|
|||||||
undownloaded = [s for s in songlist if not is_songlist_song_downloaded(self.songlist_tracking, s['artist'], s['title'])]
|
undownloaded = [s for s in songlist if not is_songlist_song_downloaded(self.songlist_tracking, s['artist'], s['title'])]
|
||||||
print(f"🎯 {len(songlist)} total unique songs in songlist.")
|
print(f"🎯 {len(songlist)} total unique songs in songlist.")
|
||||||
print(f"🎯 {len(undownloaded)} unique songlist songs to download.")
|
print(f"🎯 {len(undownloaded)} unique songlist songs to download.")
|
||||||
|
|
||||||
|
# Further filter out songs already on server
|
||||||
|
not_on_server = [s for s in undownloaded if not is_song_on_server(self.server_songs, s['artist'], s['title'])]
|
||||||
|
server_available = len(undownloaded) - len(not_on_server)
|
||||||
|
if server_available > 0:
|
||||||
|
print(f"🎵 {server_available} songs already available on server, skipping.")
|
||||||
|
|
||||||
|
undownloaded = not_on_server
|
||||||
|
print(f"🎯 {len(undownloaded)} songs need to be downloaded.")
|
||||||
if not undownloaded:
|
if not undownloaded:
|
||||||
print("🎵 All songlist songs already downloaded.")
|
print("🎵 All songlist songs already downloaded.")
|
||||||
return True
|
return True
|
||||||
@ -264,6 +280,7 @@ class KaraokeDownloader:
|
|||||||
downloaded_count = 0
|
downloaded_count = 0
|
||||||
attempted = set()
|
attempted = set()
|
||||||
total_to_download = limit if limit is not None else len(undownloaded)
|
total_to_download = limit if limit is not None else len(undownloaded)
|
||||||
|
print(f"\n🎬 Processing {len(channel_urls)} channels for song matches...")
|
||||||
# Keep looping until limit is reached or no more undownloaded songs
|
# Keep looping until limit is reached or no more undownloaded songs
|
||||||
while undownloaded and (limit is None or downloaded_count < limit):
|
while undownloaded and (limit is None or downloaded_count < limit):
|
||||||
for song in list(undownloaded):
|
for song in list(undownloaded):
|
||||||
@ -273,10 +290,9 @@ class KaraokeDownloader:
|
|||||||
if (artist, title) in attempted:
|
if (artist, title) in attempted:
|
||||||
continue
|
continue
|
||||||
found = False
|
found = False
|
||||||
|
print(f"\n🔍 Searching for: {artist} - {title}")
|
||||||
for channel_url in channel_urls:
|
for channel_url in channel_urls:
|
||||||
channel_name, channel_id = get_channel_info(channel_url)
|
channel_name, channel_id = get_channel_info(channel_url)
|
||||||
print(f"\n🎬 Checking channel: {channel_name} ({channel_url})")
|
|
||||||
print(f" Searching for: {artist} - {title}")
|
|
||||||
available_videos = self.tracker.get_channel_video_list(
|
available_videos = self.tracker.get_channel_video_list(
|
||||||
channel_url,
|
channel_url,
|
||||||
yt_dlp_path=str(self.yt_dlp_path),
|
yt_dlp_path=str(self.yt_dlp_path),
|
||||||
@ -287,7 +303,8 @@ class KaraokeDownloader:
|
|||||||
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
|
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
|
||||||
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
|
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
|
||||||
# Progress print statement
|
# Progress print statement
|
||||||
print(f"\U0001F4E5 Downloading {downloaded_count + 1} of {total_to_download} songlist songs...")
|
print(f"📥 Downloading {downloaded_count + 1} of {total_to_download} songlist songs...")
|
||||||
|
print(f"🎯 Found on channel: {channel_name}")
|
||||||
# Download this song from this channel
|
# Download this song from this channel
|
||||||
# Create a shorter, safer filename
|
# Create a shorter, safer filename
|
||||||
safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "")
|
safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "").replace("'", "").replace('"', "")
|
||||||
@ -353,6 +370,7 @@ class KaraokeDownloader:
|
|||||||
for song in undownloaded:
|
for song in undownloaded:
|
||||||
print(f" - {song['artist']} - {song['title']}")
|
print(f" - {song['artist']} - {song['title']}")
|
||||||
print(f"🎉 Downloaded {downloaded_count} songlist songs.")
|
print(f"🎉 Downloaded {downloaded_count} songlist songs.")
|
||||||
|
print(f"📊 Summary: Processed {len(channel_urls)} channels, found {downloaded_count} songs, {len(undownloaded)} songs not found.")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _is_valid_mp4(self, file_path):
|
def _is_valid_mp4(self, file_path):
|
||||||
|
|||||||
@ -69,3 +69,30 @@ def mark_songlist_song_downloaded(tracking, artist, title, channel_name, file_pa
|
|||||||
"downloaded_at": datetime.now().isoformat()
|
"downloaded_at": datetime.now().isoformat()
|
||||||
}
|
}
|
||||||
save_songlist_tracking(tracking)
|
save_songlist_tracking(tracking)
|
||||||
|
|
||||||
|
def load_server_songs(songs_path="data/songs.json"):
|
||||||
|
"""Load the list of songs already available on the server."""
|
||||||
|
songs_file = Path(songs_path)
|
||||||
|
if not songs_file.exists():
|
||||||
|
print(f"⚠️ Server songs file not found: {songs_path}")
|
||||||
|
return set()
|
||||||
|
try:
|
||||||
|
with open(songs_file, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
server_songs = set()
|
||||||
|
for song in data:
|
||||||
|
if "artist" in song and "title" in song:
|
||||||
|
artist = song["artist"].strip()
|
||||||
|
title = song["title"].strip()
|
||||||
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
|
server_songs.add(key)
|
||||||
|
print(f"📋 Loaded {len(server_songs)} songs from server (songs.json)")
|
||||||
|
return server_songs
|
||||||
|
except (json.JSONDecodeError, FileNotFoundError) as e:
|
||||||
|
print(f"⚠️ Could not load server songs: {e}")
|
||||||
|
return set()
|
||||||
|
|
||||||
|
def is_song_on_server(server_songs, artist, title):
|
||||||
|
"""Check if a song is already available on the server."""
|
||||||
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
|
return key in server_songs
|
||||||
Loading…
Reference in New Issue
Block a user