350 lines
14 KiB
Python
350 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Playlist validation module for the Karaoke Song Library Cleanup Tool.
|
|
Validates playlist songs against the song library using exact and fuzzy matching.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
from typing import Dict, List, Any, Tuple, Optional
|
|
from collections import defaultdict
|
|
import difflib
|
|
|
|
try:
|
|
from fuzzywuzzy import fuzz
|
|
FUZZY_AVAILABLE = True
|
|
except ImportError:
|
|
FUZZY_AVAILABLE = False
|
|
|
|
from utils import (
|
|
normalize_artist_title,
|
|
extract_channel_from_path,
|
|
get_file_extension,
|
|
parse_multi_artist,
|
|
clean_artist_name,
|
|
validate_song_data
|
|
)
|
|
|
|
from matching import SongMatcher
|
|
|
|
|
|
class PlaylistValidator:
|
|
"""Validates playlist songs against the song library."""
|
|
|
|
def __init__(self, config: Dict[str, Any], data_dir: str = "../data"):
|
|
self.config = config
|
|
self.data_dir = data_dir
|
|
self.song_matcher = SongMatcher(config, data_dir)
|
|
self.fuzzy_threshold = config.get('matching', {}).get('fuzzy_threshold', 0.8)
|
|
|
|
# Load song library
|
|
self.all_songs = self._load_all_songs()
|
|
if not self.all_songs:
|
|
raise ValueError("Could not load song library from allSongs.json")
|
|
|
|
# Create lookup dictionaries for faster matching
|
|
self._build_lookup_tables()
|
|
|
|
def _load_all_songs(self) -> List[Dict[str, Any]]:
|
|
"""Load the song library from songs.json."""
|
|
all_songs_path = os.path.join(self.data_dir, 'songs.json')
|
|
try:
|
|
with open(all_songs_path, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
except Exception as e:
|
|
print(f"Error loading song library: {e}")
|
|
return []
|
|
|
|
def _build_lookup_tables(self):
|
|
"""Build lookup tables for faster exact matching."""
|
|
self.exact_lookup = {}
|
|
self.artist_title_lookup = {}
|
|
|
|
for song in self.all_songs:
|
|
if not validate_song_data(song):
|
|
continue
|
|
|
|
# Clean and handle artist names
|
|
cleaned_artist = clean_artist_name(song['artist'])
|
|
if not cleaned_artist:
|
|
cleaned_artist = song['artist'] # Fallback to original if cleaning fails
|
|
|
|
# Handle multi-artist songs (after cleaning)
|
|
artists = parse_multi_artist(cleaned_artist)
|
|
if not artists:
|
|
artists = [cleaned_artist]
|
|
|
|
# Create exact match keys
|
|
for artist in artists:
|
|
normalized_key = normalize_artist_title(artist, song['title'], False)
|
|
if normalized_key not in self.exact_lookup:
|
|
self.exact_lookup[normalized_key] = []
|
|
self.exact_lookup[normalized_key].append(song)
|
|
|
|
# Also store by artist-title for fuzzy matching
|
|
artist_title_key = f"{artist.lower()} - {song['title'].lower()}"
|
|
if artist_title_key not in self.artist_title_lookup:
|
|
self.artist_title_lookup[artist_title_key] = []
|
|
self.artist_title_lookup[artist_title_key].append(song)
|
|
|
|
def find_exact_match(self, artist: str, title: str) -> Optional[List[Dict[str, Any]]]:
|
|
"""Find exact matches for artist/title combination."""
|
|
normalized_key = normalize_artist_title(artist, title, False)
|
|
return self.exact_lookup.get(normalized_key, [])
|
|
|
|
def find_fuzzy_matches(self, artist: str, title: str, threshold: float = None) -> List[Tuple[Dict[str, Any], float]]:
|
|
"""Find fuzzy matches for artist/title combination."""
|
|
if not FUZZY_AVAILABLE:
|
|
return []
|
|
|
|
if threshold is None:
|
|
threshold = self.fuzzy_threshold
|
|
|
|
query = f"{artist.lower()} - {title.lower()}"
|
|
matches = []
|
|
|
|
for key, songs in self.artist_title_lookup.items():
|
|
similarity = fuzz.ratio(query, key) / 100.0
|
|
if similarity >= threshold:
|
|
# Get the best song from this group using existing priority logic
|
|
best_song, _ = self.song_matcher.select_best_song(songs, artist, title)
|
|
matches.append((best_song, similarity))
|
|
|
|
# Sort by similarity score (highest first)
|
|
matches.sort(key=lambda x: x[1], reverse=True)
|
|
return matches
|
|
|
|
def validate_playlist(self, playlist: Dict[str, Any], dry_run: bool = True) -> Dict[str, Any]:
|
|
"""Validate a single playlist against the song library."""
|
|
results = {
|
|
'playlist_title': playlist.get('title', 'Unknown Playlist'),
|
|
'total_songs': len(playlist.get('songs', [])),
|
|
'exact_matches': [],
|
|
'fuzzy_matches': [],
|
|
'missing_songs': [],
|
|
'summary': {
|
|
'exact_match_count': 0,
|
|
'fuzzy_match_count': 0,
|
|
'missing_count': 0,
|
|
'needs_manual_review': 0
|
|
}
|
|
}
|
|
|
|
for song in playlist.get('songs', []):
|
|
artist = song.get('artist', '')
|
|
title = song.get('title', '')
|
|
position = song.get('position', 0)
|
|
|
|
if not artist or not title:
|
|
results['missing_songs'].append({
|
|
'position': position,
|
|
'artist': artist,
|
|
'title': title,
|
|
'reason': 'Missing artist or title'
|
|
})
|
|
results['summary']['missing_count'] += 1
|
|
continue
|
|
|
|
# Try exact match first
|
|
exact_matches = self.find_exact_match(artist, title)
|
|
|
|
if exact_matches:
|
|
# Get the best song using existing priority logic
|
|
best_song, _ = self.song_matcher.select_best_song(exact_matches, artist, title)
|
|
|
|
results['exact_matches'].append({
|
|
'position': position,
|
|
'playlist_artist': artist,
|
|
'playlist_title': title,
|
|
'found_song': best_song,
|
|
'match_type': 'exact'
|
|
})
|
|
results['summary']['exact_match_count'] += 1
|
|
|
|
else:
|
|
# Try fuzzy matching
|
|
fuzzy_matches = self.find_fuzzy_matches(artist, title)
|
|
|
|
if fuzzy_matches:
|
|
best_fuzzy_song, similarity = fuzzy_matches[0]
|
|
|
|
results['fuzzy_matches'].append({
|
|
'position': position,
|
|
'playlist_artist': artist,
|
|
'playlist_title': title,
|
|
'found_song': best_fuzzy_song,
|
|
'similarity': similarity,
|
|
'match_type': 'fuzzy',
|
|
'needs_manual_review': True
|
|
})
|
|
results['summary']['fuzzy_match_count'] += 1
|
|
results['summary']['needs_manual_review'] += 1
|
|
|
|
else:
|
|
results['missing_songs'].append({
|
|
'position': position,
|
|
'artist': artist,
|
|
'title': title,
|
|
'reason': 'No matches found'
|
|
})
|
|
results['summary']['missing_count'] += 1
|
|
|
|
return results
|
|
|
|
def validate_all_playlists(self, dry_run: bool = True) -> Dict[str, Any]:
|
|
"""Validate all playlists in songList.json."""
|
|
playlists_path = os.path.join(self.data_dir, 'songList.json')
|
|
|
|
try:
|
|
with open(playlists_path, 'r', encoding='utf-8') as f:
|
|
playlists = json.load(f)
|
|
except Exception as e:
|
|
print(f"Error loading playlists: {e}")
|
|
return {}
|
|
|
|
all_results = {
|
|
'total_playlists': len(playlists),
|
|
'playlist_results': [],
|
|
'overall_summary': {
|
|
'total_songs': 0,
|
|
'exact_matches': 0,
|
|
'fuzzy_matches': 0,
|
|
'missing_songs': 0,
|
|
'needs_manual_review': 0
|
|
}
|
|
}
|
|
|
|
for playlist in playlists:
|
|
result = self.validate_playlist(playlist, dry_run)
|
|
all_results['playlist_results'].append(result)
|
|
|
|
# Update overall summary
|
|
summary = result['summary']
|
|
all_results['overall_summary']['total_songs'] += result['total_songs']
|
|
all_results['overall_summary']['exact_matches'] += summary['exact_match_count']
|
|
all_results['overall_summary']['fuzzy_matches'] += summary['fuzzy_match_count']
|
|
all_results['overall_summary']['missing_songs'] += summary['missing_count']
|
|
all_results['overall_summary']['needs_manual_review'] += summary['needs_manual_review']
|
|
|
|
return all_results
|
|
|
|
def update_playlist_song(self, playlist_index: int, song_position: int,
|
|
new_artist: str, new_title: str, dry_run: bool = True) -> bool:
|
|
"""Update a playlist song with corrected artist/title."""
|
|
playlists_path = os.path.join(self.data_dir, 'songList.json')
|
|
|
|
try:
|
|
with open(playlists_path, 'r', encoding='utf-8') as f:
|
|
playlists = json.load(f)
|
|
except Exception as e:
|
|
print(f"Error loading playlists: {e}")
|
|
return False
|
|
|
|
if playlist_index >= len(playlists):
|
|
print(f"Invalid playlist index: {playlist_index}")
|
|
return False
|
|
|
|
playlist = playlists[playlist_index]
|
|
songs = playlist.get('songs', [])
|
|
|
|
# Find the song by position
|
|
for song in songs:
|
|
if song.get('position') == song_position:
|
|
if dry_run:
|
|
print(f"DRY RUN: Would update playlist '{playlist['title']}' song {song_position}")
|
|
print(f" From: {song['artist']} - {song['title']}")
|
|
print(f" To: {new_artist} - {new_title}")
|
|
else:
|
|
song['artist'] = new_artist
|
|
song['title'] = new_title
|
|
|
|
# Save the updated playlists
|
|
try:
|
|
with open(playlists_path, 'w', encoding='utf-8') as f:
|
|
json.dump(playlists, f, indent=2, ensure_ascii=False)
|
|
print(f"Updated playlist '{playlist['title']}' song {song_position}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error saving playlists: {e}")
|
|
return False
|
|
break
|
|
else:
|
|
print(f"Song with position {song_position} not found in playlist")
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def main():
|
|
"""Main function for CLI usage."""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description='Validate playlists against song library')
|
|
parser.add_argument('--config', default='../config/config.json', help='Configuration file path')
|
|
parser.add_argument('--data-dir', default='../data', help='Data directory path')
|
|
parser.add_argument('--dry-run', action='store_true', default=True, help='Dry run mode (default)')
|
|
parser.add_argument('--apply', action='store_true', help='Apply changes (disable dry run)')
|
|
parser.add_argument('--playlist-index', type=int, help='Validate specific playlist by index')
|
|
parser.add_argument('--output', help='Output results to JSON file')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load configuration
|
|
try:
|
|
with open(args.config, 'r') as f:
|
|
config = json.load(f)
|
|
except Exception as e:
|
|
print(f"Error loading config: {e}")
|
|
return
|
|
|
|
# Create validator
|
|
validator = PlaylistValidator(config, args.data_dir)
|
|
|
|
# Determine dry run mode
|
|
dry_run = not args.apply
|
|
|
|
if args.playlist_index is not None:
|
|
# Validate specific playlist
|
|
playlists_path = os.path.join(args.data_dir, 'songList.json')
|
|
try:
|
|
with open(playlists_path, 'r', encoding='utf-8') as f:
|
|
playlists = json.load(f)
|
|
except Exception as e:
|
|
print(f"Error loading playlists: {e}")
|
|
return
|
|
|
|
if args.playlist_index >= len(playlists):
|
|
print(f"Invalid playlist index: {args.playlist_index}")
|
|
return
|
|
|
|
result = validator.validate_playlist(playlists[args.playlist_index], dry_run)
|
|
print(f"\nPlaylist: {result['playlist_title']}")
|
|
print(f"Total songs: {result['total_songs']}")
|
|
print(f"Exact matches: {result['summary']['exact_match_count']}")
|
|
print(f"Fuzzy matches: {result['summary']['fuzzy_match_count']}")
|
|
print(f"Missing: {result['summary']['missing_count']}")
|
|
print(f"Need manual review: {result['summary']['needs_manual_review']}")
|
|
|
|
else:
|
|
# Validate all playlists
|
|
results = validator.validate_all_playlists(dry_run)
|
|
|
|
print(f"\nPlaylist Validation Results:")
|
|
print(f"Total playlists: {results['total_playlists']}")
|
|
print(f"Total songs: {results['overall_summary']['total_songs']}")
|
|
print(f"Exact matches: {results['overall_summary']['exact_matches']}")
|
|
print(f"Fuzzy matches: {results['overall_summary']['fuzzy_matches']}")
|
|
print(f"Missing: {results['overall_summary']['missing_songs']}")
|
|
print(f"Need manual review: {results['overall_summary']['needs_manual_review']}")
|
|
|
|
if args.output:
|
|
try:
|
|
with open(args.output, 'w', encoding='utf-8') as f:
|
|
json.dump(results, f, indent=2, ensure_ascii=False)
|
|
print(f"\nResults saved to: {args.output}")
|
|
except Exception as e:
|
|
print(f"Error saving results: {e}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |