import json from pathlib import Path def normalize_title(title): normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip() return " ".join(normalized.split()).lower() def load_songlist(songlist_path="data/songList.json"): songlist_file = Path(songlist_path) if not songlist_file.exists(): print(f"⚠️ Songlist file not found: {songlist_path}") return [] with open(songlist_file, 'r', encoding='utf-8') as f: data = json.load(f) all_songs = [] for category in data: if "songs" in category: for song in category["songs"]: if "artist" in song and "title" in song: all_songs.append({ "artist": song["artist"].strip(), "title": song["title"].strip(), "position": song.get("position", 0) }) return all_songs def load_songlist_tracking(tracking_path="data/songlist_tracking.json"): tracking_file = Path(tracking_path) if not tracking_file.exists(): return {} with open(tracking_file, 'r', encoding='utf-8') as f: return json.load(f) def load_server_songs(songs_path="data/songs.json"): """Load the list of songs already available on the server.""" songs_file = Path(songs_path) if not songs_file.exists(): print(f"⚠️ Server songs file not found: {songs_path}") return set() with open(songs_file, 'r', encoding='utf-8') as f: data = json.load(f) server_songs = set() for song in data: if "artist" in song and "title" in song: artist = song["artist"].strip() title = song["title"].strip() key = f"{artist.lower()}_{normalize_title(title)}" server_songs.add(key) return server_songs def is_song_on_server(server_songs, artist, title): """Check if a song is already available on the server.""" key = f"{artist.lower()}_{normalize_title(title)}" return key in server_songs def main(): all_songs = load_songlist() print(f"Total songs in songList.json (raw): {len(all_songs)}") # Deduplicate unique_songs = {} for song in all_songs: artist = song["artist"] title = song["title"] key = f"{artist.lower()}_{normalize_title(title)}" if key not in unique_songs: unique_songs[key] = song print(f"Unique songs (by artist+title): {len(unique_songs)}") # Downloaded songs tracking = load_songlist_tracking() downloaded_keys = set(tracking.keys()) print(f"Downloaded songs: {len(downloaded_keys)}") # Server songs server_songs = load_server_songs() print(f"Songs available on server: {len(server_songs)}") # Songs left to download (not downloaded AND not on server) left_to_download = [] for key, song in unique_songs.items(): if key not in downloaded_keys and not is_song_on_server(server_songs, song['artist'], song['title']): left_to_download.append(song) print(f"Songs left to download (not downloaded and not on server): {len(left_to_download)}") # Songs already available (downloaded OR on server) already_available = [] for key, song in unique_songs.items(): if key in downloaded_keys or is_song_on_server(server_songs, song['artist'], song['title']): already_available.append(song) print(f"Songs already available (downloaded or on server): {len(already_available)}") print("\nSample unique songs:") for song in list(unique_songs.values())[:10]: print(f" - {song['artist']} - {song['title']}") print("\nSample downloaded songs:") for key in list(downloaded_keys)[:10]: entry = tracking[key] print(f" - {entry['artist']} - {entry['title']}") print("\nSample songs left to download:") for song in left_to_download[:10]: print(f" - {song['artist']} - {song['title']}") print("\nSample songs already available:") for song in already_available[:10]: status = "downloaded" if f"{song['artist'].lower()}_{normalize_title(song['title'])}" in downloaded_keys else "on server" print(f" - {song['artist']} - {song['title']} ({status})") if __name__ == "__main__": main()