import json from pathlib import Path def normalize_title(title): normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip() return " ".join(normalized.split()).lower() def deduplicate_songlist_tracking(tracking_path="data/songlist_tracking.json"): tracking_file = Path(tracking_path) if not tracking_file.exists(): print(f"File not found: {tracking_path}") return with open(tracking_file, 'r', encoding='utf-8') as f: data = json.load(f) deduped = {} for entry in data.values(): artist = entry.get("artist", "").strip() title = entry.get("title", "").strip() key = f"{artist.lower()}_{normalize_title(title)}" # Always keep the latest entry (by downloaded_at) if key not in deduped or (entry.get("downloaded_at") and entry.get("downloaded_at") > deduped[key].get("downloaded_at", "")): deduped[key] = entry with open(tracking_file, 'w', encoding='utf-8') as f: json.dump(deduped, f, indent=2, ensure_ascii=False) print(f"Deduplicated {len(data)} -> {len(deduped)} entries in {tracking_path}") if __name__ == "__main__": deduplicate_songlist_tracking()