#!/usr/bin/env python3 """ Playlist validation module for the Karaoke Song Library Cleanup Tool. Validates playlist songs against the song library using exact and fuzzy matching. """ import json import os from typing import Dict, List, Any, Tuple, Optional from collections import defaultdict import difflib try: from fuzzywuzzy import fuzz FUZZY_AVAILABLE = True except ImportError: FUZZY_AVAILABLE = False from utils import ( normalize_artist_title, extract_channel_from_path, get_file_extension, parse_multi_artist, clean_artist_name, validate_song_data ) from matching import SongMatcher class PlaylistValidator: """Validates playlist songs against the song library.""" def __init__(self, config: Dict[str, Any], data_dir: str = "../data"): self.config = config self.data_dir = data_dir self.song_matcher = SongMatcher(config, data_dir) self.fuzzy_threshold = config.get('matching', {}).get('fuzzy_threshold', 0.8) # Load song library self.all_songs = self._load_all_songs() if not self.all_songs: raise ValueError("Could not load song library from allSongs.json") # Create lookup dictionaries for faster matching self._build_lookup_tables() def _load_all_songs(self) -> List[Dict[str, Any]]: """Load the song library from songs.json.""" all_songs_path = os.path.join(self.data_dir, 'songs.json') try: with open(all_songs_path, 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: print(f"Error loading song library: {e}") return [] def _build_lookup_tables(self): """Build lookup tables for faster exact matching.""" self.exact_lookup = {} self.artist_title_lookup = {} for song in self.all_songs: if not validate_song_data(song): continue # Clean and handle artist names cleaned_artist = clean_artist_name(song['artist']) if not cleaned_artist: cleaned_artist = song['artist'] # Fallback to original if cleaning fails # Handle multi-artist songs (after cleaning) artists = parse_multi_artist(cleaned_artist) if not artists: artists = [cleaned_artist] # Create exact match keys for artist in artists: normalized_key = normalize_artist_title(artist, song['title'], False) if normalized_key not in self.exact_lookup: self.exact_lookup[normalized_key] = [] self.exact_lookup[normalized_key].append(song) # Also store by artist-title for fuzzy matching artist_title_key = f"{artist.lower()} - {song['title'].lower()}" if artist_title_key not in self.artist_title_lookup: self.artist_title_lookup[artist_title_key] = [] self.artist_title_lookup[artist_title_key].append(song) def find_exact_match(self, artist: str, title: str) -> Optional[List[Dict[str, Any]]]: """Find exact matches for artist/title combination.""" normalized_key = normalize_artist_title(artist, title, False) return self.exact_lookup.get(normalized_key, []) def find_fuzzy_matches(self, artist: str, title: str, threshold: float = None) -> List[Tuple[Dict[str, Any], float]]: """Find fuzzy matches for artist/title combination.""" if not FUZZY_AVAILABLE: return [] if threshold is None: threshold = self.fuzzy_threshold query = f"{artist.lower()} - {title.lower()}" matches = [] for key, songs in self.artist_title_lookup.items(): similarity = fuzz.ratio(query, key) / 100.0 if similarity >= threshold: # Get the best song from this group using existing priority logic best_song, _ = self.song_matcher.select_best_song(songs, artist, title) matches.append((best_song, similarity)) # Sort by similarity score (highest first) matches.sort(key=lambda x: x[1], reverse=True) return matches def validate_playlist(self, playlist: Dict[str, Any], dry_run: bool = True) -> Dict[str, Any]: """Validate a single playlist against the song library.""" results = { 'playlist_title': playlist.get('title', 'Unknown Playlist'), 'total_songs': len(playlist.get('songs', [])), 'exact_matches': [], 'fuzzy_matches': [], 'missing_songs': [], 'summary': { 'exact_match_count': 0, 'fuzzy_match_count': 0, 'missing_count': 0, 'needs_manual_review': 0 } } for song in playlist.get('songs', []): artist = song.get('artist', '') title = song.get('title', '') position = song.get('position', 0) if not artist or not title: results['missing_songs'].append({ 'position': position, 'artist': artist, 'title': title, 'reason': 'Missing artist or title' }) results['summary']['missing_count'] += 1 continue # Try exact match first exact_matches = self.find_exact_match(artist, title) if exact_matches: # Get the best song using existing priority logic best_song, _ = self.song_matcher.select_best_song(exact_matches, artist, title) results['exact_matches'].append({ 'position': position, 'playlist_artist': artist, 'playlist_title': title, 'found_song': best_song, 'match_type': 'exact' }) results['summary']['exact_match_count'] += 1 else: # Try fuzzy matching fuzzy_matches = self.find_fuzzy_matches(artist, title) if fuzzy_matches: best_fuzzy_song, similarity = fuzzy_matches[0] results['fuzzy_matches'].append({ 'position': position, 'playlist_artist': artist, 'playlist_title': title, 'found_song': best_fuzzy_song, 'similarity': similarity, 'match_type': 'fuzzy', 'needs_manual_review': True }) results['summary']['fuzzy_match_count'] += 1 results['summary']['needs_manual_review'] += 1 else: results['missing_songs'].append({ 'position': position, 'artist': artist, 'title': title, 'reason': 'No matches found' }) results['summary']['missing_count'] += 1 return results def validate_all_playlists(self, dry_run: bool = True) -> Dict[str, Any]: """Validate all playlists in songList.json.""" playlists_path = os.path.join(self.data_dir, 'songList.json') try: with open(playlists_path, 'r', encoding='utf-8') as f: playlists = json.load(f) except Exception as e: print(f"Error loading playlists: {e}") return {} all_results = { 'total_playlists': len(playlists), 'playlist_results': [], 'overall_summary': { 'total_songs': 0, 'exact_matches': 0, 'fuzzy_matches': 0, 'missing_songs': 0, 'needs_manual_review': 0 } } for playlist in playlists: result = self.validate_playlist(playlist, dry_run) all_results['playlist_results'].append(result) # Update overall summary summary = result['summary'] all_results['overall_summary']['total_songs'] += result['total_songs'] all_results['overall_summary']['exact_matches'] += summary['exact_match_count'] all_results['overall_summary']['fuzzy_matches'] += summary['fuzzy_match_count'] all_results['overall_summary']['missing_songs'] += summary['missing_count'] all_results['overall_summary']['needs_manual_review'] += summary['needs_manual_review'] return all_results def update_playlist_song(self, playlist_index: int, song_position: int, new_artist: str, new_title: str, dry_run: bool = True) -> bool: """Update a playlist song with corrected artist/title.""" playlists_path = os.path.join(self.data_dir, 'songList.json') try: with open(playlists_path, 'r', encoding='utf-8') as f: playlists = json.load(f) except Exception as e: print(f"Error loading playlists: {e}") return False if playlist_index >= len(playlists): print(f"Invalid playlist index: {playlist_index}") return False playlist = playlists[playlist_index] songs = playlist.get('songs', []) # Find the song by position for song in songs: if song.get('position') == song_position: if dry_run: print(f"DRY RUN: Would update playlist '{playlist['title']}' song {song_position}") print(f" From: {song['artist']} - {song['title']}") print(f" To: {new_artist} - {new_title}") else: song['artist'] = new_artist song['title'] = new_title # Save the updated playlists try: with open(playlists_path, 'w', encoding='utf-8') as f: json.dump(playlists, f, indent=2, ensure_ascii=False) print(f"Updated playlist '{playlist['title']}' song {song_position}") return True except Exception as e: print(f"Error saving playlists: {e}") return False break else: print(f"Song with position {song_position} not found in playlist") return False return True def main(): """Main function for CLI usage.""" import argparse parser = argparse.ArgumentParser(description='Validate playlists against song library') parser.add_argument('--config', default='../config/config.json', help='Configuration file path') parser.add_argument('--data-dir', default='../data', help='Data directory path') parser.add_argument('--dry-run', action='store_true', default=True, help='Dry run mode (default)') parser.add_argument('--apply', action='store_true', help='Apply changes (disable dry run)') parser.add_argument('--playlist-index', type=int, help='Validate specific playlist by index') parser.add_argument('--output', help='Output results to JSON file') args = parser.parse_args() # Load configuration try: with open(args.config, 'r') as f: config = json.load(f) except Exception as e: print(f"Error loading config: {e}") return # Create validator validator = PlaylistValidator(config, args.data_dir) # Determine dry run mode dry_run = not args.apply if args.playlist_index is not None: # Validate specific playlist playlists_path = os.path.join(args.data_dir, 'songList.json') try: with open(playlists_path, 'r', encoding='utf-8') as f: playlists = json.load(f) except Exception as e: print(f"Error loading playlists: {e}") return if args.playlist_index >= len(playlists): print(f"Invalid playlist index: {args.playlist_index}") return result = validator.validate_playlist(playlists[args.playlist_index], dry_run) print(f"\nPlaylist: {result['playlist_title']}") print(f"Total songs: {result['total_songs']}") print(f"Exact matches: {result['summary']['exact_match_count']}") print(f"Fuzzy matches: {result['summary']['fuzzy_match_count']}") print(f"Missing: {result['summary']['missing_count']}") print(f"Need manual review: {result['summary']['needs_manual_review']}") else: # Validate all playlists results = validator.validate_all_playlists(dry_run) print(f"\nPlaylist Validation Results:") print(f"Total playlists: {results['total_playlists']}") print(f"Total songs: {results['overall_summary']['total_songs']}") print(f"Exact matches: {results['overall_summary']['exact_matches']}") print(f"Fuzzy matches: {results['overall_summary']['fuzzy_matches']}") print(f"Missing: {results['overall_summary']['missing_songs']}") print(f"Need manual review: {results['overall_summary']['needs_manual_review']}") if args.output: try: with open(args.output, 'w', encoding='utf-8') as f: json.dump(results, f, indent=2, ensure_ascii=False) print(f"\nResults saved to: {args.output}") except Exception as e: print(f"Error saving results: {e}") if __name__ == '__main__': main()