KaraokeVideoDownloader/data/songlist_report.py

71 lines
2.4 KiB
Python

import json
from pathlib import Path
def normalize_title(title):
normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
return " ".join(normalized.split()).lower()
def load_songlist(songlist_path="data/songList.json"):
songlist_file = Path(songlist_path)
if not songlist_file.exists():
print(f"⚠️ Songlist file not found: {songlist_path}")
return []
with open(songlist_file, 'r', encoding='utf-8') as f:
data = json.load(f)
all_songs = []
for category in data:
if "songs" in category:
for song in category["songs"]:
if "artist" in song and "title" in song:
all_songs.append({
"artist": song["artist"].strip(),
"title": song["title"].strip(),
"position": song.get("position", 0)
})
return all_songs
def load_songlist_tracking(tracking_path="data/songlist_tracking.json"):
tracking_file = Path(tracking_path)
if not tracking_file.exists():
return {}
with open(tracking_file, 'r', encoding='utf-8') as f:
return json.load(f)
def main():
all_songs = load_songlist()
print(f"Total songs in songList.json (raw): {len(all_songs)}")
# Deduplicate
unique_songs = {}
for song in all_songs:
artist = song["artist"]
title = song["title"]
key = f"{artist.lower()}_{normalize_title(title)}"
if key not in unique_songs:
unique_songs[key] = song
print(f"Unique songs (by artist+title): {len(unique_songs)}")
# Downloaded songs
tracking = load_songlist_tracking()
downloaded_keys = set(tracking.keys())
print(f"Downloaded songs: {len(downloaded_keys)}")
# Songs left to download
left_to_download = [song for key, song in unique_songs.items() if key not in downloaded_keys]
print(f"Songs left to download: {len(left_to_download)}")
print("\nSample unique songs:")
for song in list(unique_songs.values())[:10]:
print(f" - {song['artist']} - {song['title']}")
print("\nSample downloaded songs:")
for key in list(downloaded_keys)[:10]:
entry = tracking[key]
print(f" - {entry['artist']} - {entry['title']}")
print("\nSample songs left to download:")
for song in left_to_download[:10]:
print(f" - {song['artist']} - {song['title']}")
if __name__ == "__main__":
main()