import hashlib import logging import os import re import sys import json import subprocess from datetime import datetime, timedelta from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from karaoke_downloader.cache_manager import ( delete_plan_cache, get_download_plan_cache_file, load_cached_plan, save_plan_cache, ) from karaoke_downloader.channel_manager import ( download_from_file, reset_channel_downloads, ) from karaoke_downloader.config_manager import get_config_manager, load_config from karaoke_downloader.download_pipeline import DownloadPipeline from karaoke_downloader.download_planner import build_download_plan from karaoke_downloader.error_utils import handle_yt_dlp_error, log_error from karaoke_downloader.file_utils import ensure_directory_exists, sanitize_filename from karaoke_downloader.fuzzy_matcher import ( create_song_key, create_video_key, get_similarity_function, is_exact_match, is_fuzzy_match, ) from karaoke_downloader.id3_utils import add_id3_tags from karaoke_downloader.channel_parser import ChannelParser from karaoke_downloader.server_manager import ( check_and_mark_server_duplicate, is_song_marked_as_server_duplicate, is_song_on_server, load_server_duplicates_tracking, load_server_songs, ) from karaoke_downloader.song_validator import create_song_validator from karaoke_downloader.songlist_manager import ( is_songlist_song_downloaded, load_songlist, load_songlist_tracking, mark_songlist_song_downloaded, normalize_title, save_songlist_tracking, ) from karaoke_downloader.tracking_manager import FormatType, SongStatus, TrackingManager from karaoke_downloader.video_downloader import ( download_video_and_track, execute_download_plan, is_valid_mp4, cleanup_cache, ) from karaoke_downloader.parallel_downloader import ( ParallelDownloader, DownloadTask, create_parallel_downloader, ) from karaoke_downloader.youtube_utils import get_channel_info, get_playlist_info from karaoke_downloader.manual_video_manager import is_manual_channel, get_manual_channel_info, get_manual_videos_for_channel # Constants DEFAULT_FUZZY_THRESHOLD = 85 DEFAULT_CACHE_EXPIRATION_DAYS = 1 DEFAULT_DISPLAY_LIMIT = 10 DATA_DIR = Path("data") class KaraokeDownloader: def __init__(self): # Load configuration self.config_manager = get_config_manager() self.config = self.config_manager.load_config() # Initialize paths self.yt_dlp_path = Path(self.config.yt_dlp_path) self.downloads_dir = Path(self.config.folder_structure.downloads_dir) self.logs_dir = Path(self.config.folder_structure.logs_dir) # Ensure directories exist ensure_directory_exists(self.downloads_dir) ensure_directory_exists(self.logs_dir) # Initialize tracking tracking_file = DATA_DIR / "karaoke_tracking.json" cache_dir = DATA_DIR / "channel_cache" self.tracker = TrackingManager( tracking_file=tracking_file, cache_dir=cache_dir ) # Initialize song validator self.song_validator = create_song_validator(self.tracker, self.downloads_dir) # Load songlist tracking self.songlist_tracking_file = DATA_DIR / "songlist_tracking.json" self.songlist_tracking = load_songlist_tracking( str(self.songlist_tracking_file) ) # Load server songs for availability checking self.server_songs = load_server_songs() # Initialize channel parser for title parsing self.channel_parser = ChannelParser() # Parallel download settings self.enable_parallel_downloads = False self.parallel_workers = 3 # Songlist focus mode attributes self.songlist_focus_titles = None self.songlist_only = False self.use_songlist_priority = True self.download_limit = None self.force_download = False self.songlist_file_path = "data/songList.json" # Default songlist file path def _load_config(self): """Load configuration using the config manager.""" return self.config_manager.load_config() def _should_skip_song( self, artist, title, channel_name, video_id, video_title, server_songs=None, server_duplicates_tracking=None, ): """ Check if a song should be skipped using the centralized SongValidator. Returns: tuple: (should_skip, reason, total_filtered) """ return self.song_validator.should_skip_song( artist, title, channel_name, video_id, video_title, server_songs, server_duplicates_tracking, self.force_download, ) def _mark_song_failed(self, artist, title, video_id, channel_name, error_message): """ Mark a song as failed in tracking using the SongValidator. """ self.song_validator.mark_song_failed( artist, title, video_id, channel_name, error_message ) def _handle_download_failure( self, artist, title, video_id, channel_name, error_type, error_details="" ): """ Handle download failures using the SongValidator. Args: artist: Song artist title: Song title video_id: YouTube video ID channel_name: Channel name error_type: Type of error (e.g., "yt-dlp failed", "file verification failed") error_details: Additional error details """ self.song_validator.handle_download_failure( artist, title, video_id, channel_name, error_type, error_details ) def download_channel_videos( self, url, force_refresh=False, fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False, ): """Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching.""" # Check if this is a manual channel from karaoke_downloader.manual_video_manager import is_manual_channel, get_manual_channel_info, get_manual_videos_for_channel if is_manual_channel(url): channel_name, channel_id = get_manual_channel_info(url) print(f"\n๐ŸŽฌ Downloading from manual channel: {channel_name} ({url})") # Load manual videos manual_videos = get_manual_videos_for_channel(channel_name) if not manual_videos: print("โš ๏ธ No manual videos found. Skipping.") return False # Convert to the expected format available_videos = [] for video in manual_videos: available_videos.append({ "title": video.get("title", ""), "id": video.get("id", ""), "url": video.get("url", "") }) print(f"๐Ÿ“‹ Found {len(available_videos)} manual videos") # Process manual videos (skip yt-dlp) return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download) # Regular YouTube channel processing - USE TRACKING MANAGER CACHE channel_name, channel_id = get_channel_info(url) print(f"\n๐ŸŽฌ Downloading from channel: {channel_name} ({url})") # Use tracking manager to get videos (with cache support) available_videos = self.tracker.get_channel_video_list( url, str(self.yt_dlp_path), force_refresh=force_refresh ) if not available_videos: print("โš ๏ธ No videos found for this channel. Skipping.") return False print(f"๐Ÿ“‹ Found {len(available_videos)} videos from channel") # Process videos for download return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download) def download_all_channel_videos( self, url, force_refresh=False, force_download=False, limit=None, ): """Download ALL videos from a channel, skipping existing files and songs in songs.json.""" # Check if this is a manual channel from karaoke_downloader.manual_video_manager import is_manual_channel, get_manual_channel_info, get_manual_videos_for_channel if is_manual_channel(url): channel_name, channel_id = get_manual_channel_info(url) print(f"\n๐ŸŽฌ Downloading ALL videos from manual channel: {channel_name} ({url})") # Load manual videos manual_videos = get_manual_videos_for_channel(channel_name) if not manual_videos: print("โš ๏ธ No manual videos found. Skipping.") return False # Convert to the expected format available_videos = [] for video in manual_videos: available_videos.append({ "title": video.get("title", ""), "id": video.get("id", ""), "url": video.get("url", "") }) print(f"๐Ÿ“‹ Found {len(available_videos)} manual videos") # Process all manual videos (skip songlist filtering) return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit) # Regular YouTube channel processing - USE TRACKING MANAGER CACHE channel_name, channel_id = get_channel_info(url) print(f"\n๐ŸŽฌ Downloading ALL videos from channel: {channel_name} ({url})") # Use tracking manager to get videos (with cache support) available_videos = self.tracker.get_channel_video_list( url, str(self.yt_dlp_path), force_refresh=force_refresh ) if not available_videos: print("โš ๏ธ No videos found for this channel. Skipping.") return False print(f"๐Ÿ“‹ Found {len(available_videos)} videos from channel") # Process all videos for download (skip songlist filtering) return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit) def _process_all_videos_for_download(self, available_videos, channel_name, force_refresh=False, force_download=False, limit=None): """Process ALL videos for download (no songlist filtering).""" print(f"๐Ÿ” Processing {len(available_videos)} videos for download (no songlist filtering)...") # Load server songs and duplicates tracking for availability checking server_songs = load_server_songs() server_duplicates_tracking = load_server_duplicates_tracking() # Load songlist to check if songs are already in songs.json songlist = load_songlist(self.songlist_file_path) songlist_artists_titles = set() if songlist: for song in songlist: song_key = f"{song['artist']} - {song['title']}" songlist_artists_titles.add(song_key) # Process all videos, skipping existing files and songs in songs.json videos_to_download = [] skipped_count = 0 for video in available_videos: video_title = video["title"] video_id = video["id"] # Extract artist and title artist, extracted_title = self.channel_parser.extract_artist_title(video_title, channel_name) if not artist and not extracted_title: # Fallback: use the full title artist = "" extracted_title = video_title # Create filename filename = f"{artist} - {extracted_title}.mp4" if artist and extracted_title else f"{video_title}.mp4" output_path = self.downloads_dir / channel_name / filename # Check if file already exists if output_path.exists() and not force_download: print(f" โญ๏ธ Skipping existing file: {filename}") skipped_count += 1 continue # Check if song is already in songs.json song_key = f"{artist} - {extracted_title}" if song_key in songlist_artists_titles and not force_download: # Check if there's already an MP4 version in songs.json if song_key in server_songs: song_info = server_songs[song_key] if song_info.get("is_mp4", False): print(f" โญ๏ธ Skipping song in songs.json (MP4): {song_key}") skipped_count += 1 continue else: print(f" ๐Ÿ“ฅ Found MP3 version in songs.json, will download MP4: {song_key}") # Continue to download the MP4 version else: print(f" ๐Ÿ“ฅ Found song in songlist, will download MP4: {song_key}") # Continue to download the MP4 version # Check if already downloaded (tracking) if self.tracker.is_song_downloaded(artist, extracted_title, channel_name, video_id) and not force_download: print(f" โญ๏ธ Skipping already tracked: {song_key}") skipped_count += 1 continue # Check if on server if not force_download and check_and_mark_server_duplicate( server_songs, server_duplicates_tracking, artist, extracted_title, video_title, channel_name, ): print(f" โญ๏ธ Skipping server duplicate: {song_key}") skipped_count += 1 continue # Add to download list videos_to_download.append((video, artist, extracted_title, filename)) print(f"๐Ÿ“Š Found {len(videos_to_download)} videos to download (skipped {skipped_count})") # Apply limit if specified if limit is not None: original_count = len(videos_to_download) videos_to_download = videos_to_download[:limit] print(f"๐ŸŽฏ Limited to first {limit} videos (was {original_count} total)") if not videos_to_download: print("๐ŸŽต No new videos to download.") return True # Create download plan for caching and resuming download_plan = [] for video, artist, title, filename in videos_to_download: download_plan.append({ "video_id": video["id"], "artist": artist, "title": title, "filename": filename, "channel_name": channel_name, "video_title": video.get("title", ""), "force_download": force_download }) # Cache the download plan for resuming import hashlib from karaoke_downloader.cache_manager import get_download_plan_cache_file, save_plan_cache, load_cached_plan plan_kwargs = { "channel": channel_name, "total_videos": len(available_videos), "force_download": force_download, } if limit: plan_kwargs["limit"] = limit cache_file = get_download_plan_cache_file("all_videos", **plan_kwargs) # Check for existing cache cached_plan, _ = load_cached_plan(cache_file) if cached_plan and not force_refresh: print(f"\n๐Ÿ“‹ Found existing download plan cache: {cache_file.name}") print(f" ๐ŸŽฌ Cached videos to download: {len(cached_plan)}") download_plan = cached_plan else: # Create new download plan download_plan = [] for video, artist, title, filename in videos_to_download: download_plan.append({ "video_id": video["id"], "artist": artist, "title": title, "filename": filename, "channel_name": channel_name, "video_title": video.get("title", ""), "force_download": force_download }) # Save the new plan to cache save_plan_cache(cache_file, download_plan, []) # No unmatched for all-videos mode # Show download plan summary print(f"\n๐Ÿ“‹ Download Plan Summary:") print(f" ๐Ÿ“บ Channel: {channel_name}") print(f" ๐ŸŽฌ Total videos to download: {len(videos_to_download)}") print(f" โญ๏ธ Videos skipped: {skipped_count}") if limit: print(f" ๐ŸŽฏ Limit applied: {limit} videos") print(f" ๐Ÿ“ Output directory: downloads/{channel_name}/") print(f" ๐Ÿ’พ Download plan cached to: {cache_file.name}") print(f"\n๐ŸŽฌ Starting downloads...") # Download videos using the download pipeline pipeline = DownloadPipeline( yt_dlp_path=str(self.yt_dlp_path), config=self.config, downloads_dir=self.downloads_dir, songlist_tracking=self.songlist_tracking, tracker=self.tracker, ) success_count = 0 total_to_download = len(download_plan) for i, plan_item in enumerate(download_plan[:], 1): # Use slice to create a copy for iteration print(f"โฌ‡๏ธ Downloading {i}/{total_to_download}: {plan_item['artist']} - {plan_item['title']}") if pipeline.execute_pipeline( video_id=plan_item["video_id"], artist=plan_item["artist"], title=plan_item["title"], channel_name=plan_item["channel_name"], video_title=plan_item["video_title"], ): success_count += 1 # Remove completed item from cache download_plan.remove(plan_item) # Remove the current item if download_plan: # If there are still items left save_plan_cache(cache_file, download_plan, []) print(f"๐Ÿ—‘๏ธ Removed completed item from download plan. {len(download_plan)} items remaining.") else: # All downloads completed, delete the cache file from karaoke_downloader.cache_manager import delete_plan_cache delete_plan_cache(cache_file) print("๐Ÿ—‘๏ธ All downloads completed, deleted download plan cache.") print(f"\n๐ŸŽ‰ Download complete! {success_count}/{total_to_download} videos downloaded successfully") return success_count > 0 def download_songlist_across_channels( self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False, show_pagination=False, parallel_channels=False, max_channel_workers=3, ): """ Download songs from the songlist across multiple channels. """ # Set force download flag self.force_download = force_download # Apply songlist focus filtering if specified if self.songlist_focus_titles: # Load the raw songlist data to filter by playlist titles songlist_file = Path(self.songlist_file_path) if not songlist_file.exists(): print(f"โš ๏ธ Songlist file not found: {self.songlist_file_path}") return False try: with open(songlist_file, "r", encoding="utf-8") as f: raw_data = json.load(f) # Filter playlists by title focused_playlists = [] print(f"๐Ÿ” Looking for playlists: {self.songlist_focus_titles}") print(f"๐Ÿ” Available playlists in songList.json:") for i, playlist in enumerate(raw_data[:5]): # Show first 5 playlists print(f" {i+1}. '{playlist.get('title', 'NO TITLE')}'") if len(raw_data) > 5: print(f" ... and {len(raw_data) - 5} more playlists") for playlist in raw_data: playlist_title = playlist.get("title", "") if playlist_title in self.songlist_focus_titles: focused_playlists.append(playlist) print(f"โœ… Found matching playlist: '{playlist_title}'") if not focused_playlists: print( f"โš ๏ธ No playlists found matching the specified titles: {', '.join(self.songlist_focus_titles)}" ) return False # Flatten the focused playlists into songs focused_songs = [] seen = set() for playlist in focused_playlists: if "songs" in playlist: for song in playlist["songs"]: if "artist" in song and "title" in song: artist = song["artist"].strip() title = song["title"].strip() key = f"{artist.lower()}_{title.lower()}" if key in seen: continue seen.add(key) focused_songs.append( { "artist": artist, "title": title, "position": song.get("position", 0), } ) songlist = focused_songs print( f"\n๐ŸŽฏ Songlist focus mode: {len(focused_songs)} songs from {len(focused_playlists)} playlists selected" ) print(f"๐ŸŽฏ Focused playlists: {', '.join(self.songlist_focus_titles)}") except (json.JSONDecodeError, FileNotFoundError) as e: print(f"โš ๏ธ Could not load songlist for filtering from {self.songlist_file_path}: {e}") return False else: # Load songlist normally (flattened from all playlists) songlist = load_songlist(self.songlist_file_path) if not songlist: print("โš ๏ธ No songlist loaded. Skipping.") return False # Filter for songs not yet downloaded if self.force_download: undownloaded = songlist print(f"\n๐ŸŽฏ {len(songlist)} total unique songs in songlist.") print(f"๐Ÿ’ช Force mode enabled - will download all songs regardless of existing status") else: undownloaded = [ s for s in songlist if not is_songlist_song_downloaded( self.songlist_tracking, s["artist"], s["title"] ) ] print(f"\n๐ŸŽฏ {len(songlist)} total unique songs in songlist.") print(f"\n๐ŸŽฏ {len(undownloaded)} unique songlist songs to download.") # Load server songs and duplicates tracking for availability checking server_songs = load_server_songs() server_duplicates_tracking = load_server_duplicates_tracking() # Initialize counters for logging server_available_mp4 = 0 server_available_other = 0 marked_duplicates = 0 # Further filter out songs already on server or marked as duplicates if self.force_download: not_on_server = undownloaded print(f"๐Ÿ’ช Force mode enabled - will download all songs regardless of server status") else: not_on_server = [] for song in undownloaded: artist, title = song["artist"], song["title"] # Check if already marked as server duplicate if is_song_marked_as_server_duplicate( server_duplicates_tracking, artist, title ): marked_duplicates += 1 continue # Check if already on server and mark for future skipping if check_and_mark_server_duplicate( server_songs, server_duplicates_tracking, artist, title, f"{artist} - {title}", "songlist", ): server_available_mp4 += 1 continue # Check if song is on server but in different format (MP3/CDG) key = f"{artist.lower()}_{normalize_title(title)}" if key in server_songs: song_info = server_songs[key] if song_info.get("is_mp3", False) or song_info.get("is_cdg", False): server_available_other += 1 print(f"๐ŸŽต Found {artist} - {title} on server as {song_info.get('is_mp3', False) and 'MP3' or 'CDG'} format, will download video version") not_on_server.append(song) # Apply limit to undownloaded list before logging # This ensures that only the specified number of songs are processed and logged, # providing accurate counts when using --limit if limit is not None: original_count = len(not_on_server) not_on_server = not_on_server[:limit] print(f"\n๐ŸŽฏ Limited to first {limit} songs (was {original_count} total)") undownloaded = not_on_server # Now log the counts based on the limited list if server_available_mp4 > 0: print( f"\n๐ŸŽต {server_available_mp4} songs already available as MP4 on server, skipping." ) if server_available_other > 0: # Only count songs that are in the limited list limited_server_other = sum(1 for song in not_on_server if f"{song['artist'].lower()}_{normalize_title(song['title'])}" in server_songs) if limited_server_other > 0: print( f"\n๐ŸŽต {limited_server_other} songs found on server as MP3/CDG, will download video versions." ) if marked_duplicates > 0: print( f"\n๐Ÿท๏ธ {marked_duplicates} songs previously marked as server duplicates, skipping." ) print(f"\n๐ŸŽฏ {len(undownloaded)} songs need to be downloaded.") if not undownloaded: print("๐ŸŽต All songlist songs already downloaded.") return True # --- Download plan building (same for both normal and focus modes) --- # --- Download plan cache logic --- plan_mode = "songlist" # Include only parameters that affect the plan generation (exclude limit since it only affects execution) plan_kwargs = { "channels": len(channel_urls), "fuzzy": fuzzy_match, "threshold": fuzzy_threshold, } # Add channel URLs hash to ensure same channels = same cache channels_hash = hashlib.md5( "|".join(sorted(channel_urls)).encode() ).hexdigest()[:8] plan_kwargs["channels_hash"] = channels_hash cache_file = get_download_plan_cache_file(plan_mode, **plan_kwargs) use_cache = False download_plan, unmatched = load_cached_plan(cache_file) if ( not force_refresh_download_plan and download_plan is not None and unmatched is not None ): use_cache = True print(f"\n๐Ÿ“‹ Using cached download plan from: {cache_file}") if not use_cache: print(f"\n๐Ÿ” Pre-scanning {len(channel_urls)} channels for matches...") print(f"๐Ÿ” Scanning {len(undownloaded)} songs against all channels...") download_plan, unmatched = build_download_plan( channel_urls, undownloaded, self.tracker, self.yt_dlp_path, fuzzy_match=fuzzy_match, fuzzy_threshold=fuzzy_threshold, show_pagination=show_pagination, parallel_channels=parallel_channels, max_channel_workers=max_channel_workers, ) save_plan_cache(cache_file, download_plan, unmatched) print(f"๐Ÿ’พ Download plan cached to: {cache_file}") print( f"\n๐Ÿ“Š Download plan ready: {len(download_plan)} songs will be downloaded." ) print(f"โŒ {len(unmatched)} songs could not be found in any channel.") if unmatched: print("Unmatched songs:") for song in unmatched[:DEFAULT_DISPLAY_LIMIT]: print(f" - {song['artist']} - {song['title']}") if len(unmatched) > DEFAULT_DISPLAY_LIMIT: print(f" ...and {len(unmatched)-DEFAULT_DISPLAY_LIMIT} more.") # --- Download phase --- downloaded_count, success = self.execute_download_plan_parallel( download_plan=download_plan, unmatched=unmatched, cache_file=cache_file, limit=limit, ) return success def _process_videos_for_download(self, available_videos, channel_name, force_refresh=False, fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False): """Process videos for download (used for both manual and regular channels).""" songlist = load_songlist(self.songlist_file_path) # For manual videos with force_download, bypass songlist filtering if force_download and channel_name == "@ManualVideos": print(f"๐Ÿ’ช Force mode enabled - downloading all {len(available_videos)} manual videos") # Download all videos directly without songlist filtering success_count = 0 for i, video in enumerate(available_videos, 1): title = video["title"] video_id = video["id"] print(f"\nโฌ‡๏ธ Downloading {i}/{len(available_videos)}: {title}") print(f" ๐ŸŽฌ Video: {title} ({channel_name})") # Create filename from title artist, extracted_title = self.channel_parser.extract_artist_title(title, channel_name) if not artist and not extracted_title: # Fallback: use the full title as filename filename = sanitize_filename("", title) else: filename = sanitize_filename(artist, extracted_title) # Download the video success = self._download_single_video(video, channel_name, filename, force_download=True) if success: success_count += 1 print(f"\nโœ… Downloaded {success_count}/{len(available_videos)} manual videos") return success_count > 0 # Regular songlist-based processing if not songlist: print("โš ๏ธ No songlist loaded. Skipping.") return False # Load server songs and duplicates tracking for availability checking server_songs = load_server_songs() server_duplicates_tracking = load_server_duplicates_tracking() limit = getattr(self.config, "limit", 1) # Normalize songlist for matching normalized_songlist = { create_song_key(s["artist"], s["title"]): s for s in songlist } matches = [] similarity = get_similarity_function() print(f"๐Ÿ” Scanning {len(available_videos)} videos for songlist matches...") for video in available_videos: title = video["title"] video_id = video["id"] # Extract artist and title using channel parser artist, extracted_title = self.channel_parser.extract_artist_title(title, channel_name) if not artist and not extracted_title: continue song_key = create_song_key(artist, extracted_title) # Check for exact matches first if song_key in normalized_songlist: song_data = normalized_songlist[song_key] matches.append({ "video": video, "song": song_data, "match_type": "exact", "match_score": 100.0, "artist": artist, "title": extracted_title }) print(f" โœ… Exact match: {artist} - {extracted_title}") continue # Check for fuzzy matches if enabled if fuzzy_match: best_match = None best_score = 0 for song_key, song_data in normalized_songlist.items(): score = similarity(f"{artist} {extracted_title}", f"{song_data['artist']} {song_data['title']}") if score > best_score and score >= fuzzy_threshold: best_score = score best_match = song_data if best_match: matches.append({ "video": video, "song": best_match, "match_type": "fuzzy", "match_score": best_score, "artist": artist, "title": extracted_title }) print(f" ๐ŸŽฏ Fuzzy match ({best_score:.1f}%): {artist} - {extracted_title} -> {best_match['artist']} - {best_match['title']}") print(f"๐Ÿ“Š Found {len(matches)} matches out of {len(available_videos)} videos") if not matches: print("โŒ No matches found in songlist") return False # Sort matches by score (exact matches first, then by fuzzy score) matches.sort(key=lambda x: (x["match_type"] != "exact", -x["match_score"])) # Limit downloads if limit: matches = matches[:limit] print(f"๐ŸŽฏ Limiting to {len(matches)} downloads") # Download matched videos success_count = 0 for i, match in enumerate(matches, 1): video = match["video"] song = match["song"] artist = match["artist"] title = match["title"] video_id = video["id"] print(f"\nโฌ‡๏ธ Downloading {i}/{len(matches)}: {artist} - {title}") print(f" ๐ŸŽฌ Video: {video['title']} ({channel_name})") if match["match_type"] == "fuzzy": print(f" ๐ŸŽฏ Match Score: {match['match_score']:.1f}%") # Create filename filename = sanitize_filename(artist, title) output_path = self.downloads_dir / channel_name / filename # Use the download pipeline pipeline = DownloadPipeline( yt_dlp_path=str(self.yt_dlp_path), config=self.config, downloads_dir=self.downloads_dir, songlist_tracking=self.songlist_tracking, tracker=self.tracker, ) success = pipeline.execute_pipeline( video_id=video_id, artist=artist, title=title, channel_name=channel_name, video_title=video["title"] ) if success: success_count += 1 print(f"โœ… Successfully downloaded: {artist} - {title}") else: print(f"โŒ Failed to download: {artist} - {title}") print(f"\n๐ŸŽ‰ Download complete! {success_count}/{len(matches)} videos downloaded successfully") return success_count > 0 def _download_single_video(self, video, channel_name, filename, force_download=False): """Download a single video using the download pipeline.""" video_id = video["id"] video_title = video["title"] # Extract artist and title for tracking artist, extracted_title = self.channel_parser.extract_artist_title(video_title, channel_name) if not artist and not extracted_title: # Fallback: use the full title artist = "" extracted_title = video_title output_path = self.downloads_dir / channel_name / filename # Use the download pipeline pipeline = DownloadPipeline( yt_dlp_path=str(self.yt_dlp_path), config=self.config, downloads_dir=self.downloads_dir, songlist_tracking=self.songlist_tracking, tracker=self.tracker, ) success = pipeline.execute_pipeline( video_id=video_id, artist=artist, title=extracted_title, channel_name=channel_name, video_title=video_title ) if success: print(f"โœ… Successfully downloaded: {video_title}") else: print(f"โŒ Failed to download: {video_title}") return success def reset_songlist_all(): """Delete all files tracked in songlist_tracking.json, clear songlist_tracking.json, and remove songlist songs from karaoke_tracking.json.""" import json from pathlib import Path # Load songlist tracking songlist_tracking_file = Path("data/songlist_tracking.json") karaoke_tracking_file = Path("data/karaoke_tracking.json") if songlist_tracking_file.exists(): with open(songlist_tracking_file, "r", encoding="utf-8") as f: tracking = json.load(f) else: tracking = {} # Delete all files tracked for entry in tracking.values(): file_path = entry.get("file_path") if file_path: p = Path(file_path) try: if p.exists(): p.unlink() print(f"๐Ÿ—‘๏ธ Deleted: {p}") except Exception as e: print(f"โš ๏ธ Could not delete {p}: {e}") # Clear songlist_tracking.json songlist_tracking_file.write_text("{}", encoding="utf-8") print("๐Ÿงน Cleared songlist_tracking.json") # Remove songlist songs from karaoke_tracking.json if karaoke_tracking_file.exists(): with open(karaoke_tracking_file, "r", encoding="utf-8") as f: karaoke_data = json.load(f) song_keys_to_remove = [] for song_id, song in karaoke_data.get("songs", {}).items(): artist = song.get("artist", "") title = song.get("title", song.get("name", "")) key = f"{artist.lower()}_{normalize_title(title)}" if key in tracking: song_keys_to_remove.append(song_id) for song_id in song_keys_to_remove: del karaoke_data["songs"][song_id] with open(karaoke_tracking_file, "w", encoding="utf-8") as f: json.dump(karaoke_data, f, indent=2, ensure_ascii=False) print( f"๐Ÿงน Removed {len(song_keys_to_remove)} songlist songs from karaoke_tracking.json" ) print("โœ… Global songlist reset complete.") # For brevity, the rest of the class methods should be copied here from the original download_karaoke.py, # updating all references to use the new karaoke_downloader.* imports as needed.