28 lines
1.2 KiB
Python
28 lines
1.2 KiB
Python
import json
|
|
from pathlib import Path
|
|
|
|
def normalize_title(title):
|
|
normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
|
|
return " ".join(normalized.split()).lower()
|
|
|
|
def deduplicate_songlist_tracking(tracking_path="data/songlist_tracking.json"):
|
|
tracking_file = Path(tracking_path)
|
|
if not tracking_file.exists():
|
|
print(f"File not found: {tracking_path}")
|
|
return
|
|
with open(tracking_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
deduped = {}
|
|
for entry in data.values():
|
|
artist = entry.get("artist", "").strip()
|
|
title = entry.get("title", "").strip()
|
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
|
# Always keep the latest entry (by downloaded_at)
|
|
if key not in deduped or (entry.get("downloaded_at") and entry.get("downloaded_at") > deduped[key].get("downloaded_at", "")):
|
|
deduped[key] = entry
|
|
with open(tracking_file, 'w', encoding='utf-8') as f:
|
|
json.dump(deduped, f, indent=2, ensure_ascii=False)
|
|
print(f"Deduplicated {len(data)} -> {len(deduped)} entries in {tracking_path}")
|
|
|
|
if __name__ == "__main__":
|
|
deduplicate_songlist_tracking() |