""" Utility functions for the Karaoke Song Library Cleanup Tool. """ import json import os import re from pathlib import Path from typing import Dict, List, Any, Optional def load_json_file(file_path: str) -> Any: """Load and parse a JSON file.""" try: with open(file_path, 'r', encoding='utf-8') as f: return json.load(f) except FileNotFoundError: raise FileNotFoundError(f"File not found: {file_path}") except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in {file_path}: {e}") def save_json_file(data: Any, file_path: str, indent: int = 2) -> None: """Save data to a JSON file.""" os.makedirs(os.path.dirname(file_path), exist_ok=True) with open(file_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=indent, ensure_ascii=False) def get_file_extension(file_path: str) -> str: """Extract file extension from file path.""" return os.path.splitext(file_path)[1].lower() def get_base_filename(file_path: str) -> str: """Get the base filename without extension for CDG/MP3 pairing.""" return os.path.splitext(file_path)[0] def find_mp3_pairs(songs: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]: """ Group songs into MP3 pairs (CDG/MP3) and standalone files. Returns a dict with keys: 'pairs', 'standalone_mp4', 'standalone_mp3' """ pairs = [] standalone_mp4 = [] standalone_mp3 = [] # Create lookup for CDG and MP3 files by base filename cdg_lookup = {} mp3_lookup = {} for song in songs: ext = get_file_extension(song['path']) base_name = get_base_filename(song['path']) if ext == '.cdg': cdg_lookup[base_name] = song elif ext == '.mp3': mp3_lookup[base_name] = song elif ext == '.mp4': standalone_mp4.append(song) # Find CDG/MP3 pairs (treat as MP3) for base_name in cdg_lookup: if base_name in mp3_lookup: # Found a pair cdg_song = cdg_lookup[base_name] mp3_song = mp3_lookup[base_name] pairs.append([cdg_song, mp3_song]) else: # CDG without MP3 - treat as standalone MP3 standalone_mp3.append(cdg_lookup[base_name]) # Find MP3s without CDG for base_name in mp3_lookup: if base_name not in cdg_lookup: standalone_mp3.append(mp3_lookup[base_name]) return { 'pairs': pairs, 'standalone_mp4': standalone_mp4, 'standalone_mp3': standalone_mp3 } def normalize_artist_title(artist: str, title: str, case_sensitive: bool = False) -> str: """Normalize artist and title for consistent matching.""" if not case_sensitive: artist = artist.lower() title = title.lower() # Remove common punctuation and extra spaces artist = re.sub(r'[^\w\s]', ' ', artist).strip() title = re.sub(r'[^\w\s]', ' ', title).strip() # Replace multiple spaces with single space artist = re.sub(r'\s+', ' ', artist) title = re.sub(r'\s+', ' ', title) return f"{artist}|{title}" def extract_channel_from_path(file_path: str, channel_priorities: List[str] = None) -> Optional[str]: """Extract channel information from file path based on configured folder names.""" if not file_path.lower().endswith('.mp4'): return None if not channel_priorities: return None # Look for configured channel priority folder names in the path path_lower = file_path.lower() for channel in channel_priorities: # Escape special regex characters in the channel name escaped_channel = re.escape(channel.lower()) if re.search(escaped_channel, path_lower): return channel return None def parse_multi_artist(artist_string: str) -> List[str]: """Parse multi-artist strings with various delimiters.""" if not artist_string: return [] # Common delimiters for multi-artist songs delimiters = [ r'\s*feat\.?\s*', r'\s*ft\.?\s*', r'\s*featuring\s*', r'\s*&\s*', r'\s*and\s*', r'\s*,\s*', r'\s*;\s*', r'\s*/\s*' ] # Split by delimiters artists = [artist_string] for delimiter in delimiters: new_artists = [] for artist in artists: new_artists.extend(re.split(delimiter, artist)) artists = [a.strip() for a in new_artists if a.strip()] return artists def format_file_size(size_bytes: int) -> str: """Format file size in human readable format.""" if size_bytes == 0: return "0B" size_names = ["B", "KB", "MB", "GB"] i = 0 while size_bytes >= 1024 and i < len(size_names) - 1: size_bytes /= 1024.0 i += 1 return f"{size_bytes:.1f}{size_names[i]}" def validate_song_data(song: Dict[str, Any]) -> bool: """Validate that a song object has required fields.""" required_fields = ['artist', 'title', 'path'] return all(field in song and song[field] for field in required_fields)