""" Data loader utility for MusicBrainz Data Cleaner. Loads known artist and recording MBIDs from JSON files. """ import json from pathlib import Path from typing import Dict, List, Optional, Tuple class DataLoader: """Loads and manages known artist and recording data from JSON files.""" def __init__(self, data_dir: str = "data"): self.data_dir = Path(data_dir) self._known_artists: Optional[Dict[str, str]] = None self._known_recordings: Optional[Dict[Tuple[str, str], str]] = None def load_known_artists(self) -> Dict[str, str]: """Load known artist MBIDs from JSON file.""" if self._known_artists is None: artists_file = self.data_dir / "known_artists.json" try: with open(artists_file, 'r', encoding='utf-8') as f: data = json.load(f) self._known_artists = data.get('artists', {}) except (FileNotFoundError, json.JSONDecodeError) as e: print(f"Warning: Could not load known artists data: {e}") self._known_artists = {} return self._known_artists def load_name_variations(self) -> Dict[str, str]: """Load name variations from JSON file.""" artists_file = self.data_dir / "known_artists.json" try: with open(artists_file, 'r', encoding='utf-8') as f: data = json.load(f) return data.get('name_variations', {}) except (FileNotFoundError, json.JSONDecodeError) as e: print(f"Warning: Could not load name variations data: {e}") return {} def load_known_recordings(self) -> Dict[Tuple[str, str], str]: """Load known recording MBIDs from JSON file.""" if self._known_recordings is None: recordings_file = self.data_dir / "known_recordings.json" try: with open(recordings_file, 'r', encoding='utf-8') as f: data = json.load(f) recordings = data.get('recordings', []) # Convert list format to tuple key format self._known_recordings = {} for recording in recordings: artist_mbid = recording.get('artist_mbid') title = recording.get('title') recording_mbid = recording.get('recording_mbid') if all([artist_mbid, title, recording_mbid]): self._known_recordings[(artist_mbid, title)] = recording_mbid except (FileNotFoundError, json.JSONDecodeError) as e: print(f"Warning: Could not load known recordings data: {e}") self._known_recordings = {} return self._known_recordings def load_contraction_fixes(self) -> Dict[str, str]: """Load contraction fixes from JSON file.""" contractions_file = self.data_dir / "contraction_fixes.json" try: with open(contractions_file, 'r', encoding='utf-8') as f: return json.load(f) except (FileNotFoundError, json.JSONDecodeError) as e: print(f"Warning: Could not load contraction fixes data: {e}") return {} def reload_data(self) -> None: """Reload data from files (useful for testing or updates).""" self._known_artists = None self._known_recordings = None self.load_known_artists() self.load_known_recordings() # Global instance for easy access data_loader = DataLoader()