From 08d7d259f33d1e5576b8ac2f9e362115a4f9cfe2 Mon Sep 17 00:00:00 2001 From: mbrucedogs Date: Fri, 25 Jul 2025 08:45:18 -0500 Subject: [PATCH] Signed-off-by: mbrucedogs --- PRD.md | 71 +++++-- README.md | 72 +++++-- karaoke_downloader/download_pipeline.py | 238 ++++++++++++++++++++++++ karaoke_downloader/downloader.py | 113 +++++------ karaoke_downloader/error_utils.py | 185 ++++++++++++++++++ karaoke_downloader/video_downloader.py | 41 ++-- karaoke_downloader/youtube_utils.py | 125 +++++++++++-- 7 files changed, 702 insertions(+), 143 deletions(-) create mode 100644 karaoke_downloader/download_pipeline.py create mode 100644 karaoke_downloader/error_utils.py diff --git a/PRD.md b/PRD.md index 21f46b4..b7e2f6e 100644 --- a/PRD.md +++ b/PRD.md @@ -1,27 +1,43 @@ -# 🎤 Karaoke Video Downloader – PRD (v3.1) +# 🎤 Karaoke Video Downloader – PRD (v3.2) ## ✅ Overview -A Python-based Windows CLI tool to download karaoke videos from YouTube channels/playlists using `yt-dlp.exe`, with advanced tracking, songlist prioritization, and flexible configuration. The codebase has been refactored into a modular architecture for improved maintainability and separation of concerns. +A Python-based Windows CLI tool to download karaoke videos from YouTube channels/playlists using `yt-dlp.exe`, with advanced tracking, songlist prioritization, and flexible configuration. The codebase has been comprehensively refactored into a modular architecture with centralized utilities for improved maintainability, error handling, and code reuse. --- ## 🏗️ Architecture -The codebase has been refactored into focused modules: +The codebase has been refactored into focused modules with centralized utilities: -- **`fuzzy_matcher.py`**: Fuzzy matching logic and similarity functions -- **`download_planner.py`**: Download plan building and channel scanning (optimized) -- **`cache_manager.py`**: Cache operations and file I/O management -- **`video_downloader.py`**: Core video download execution and orchestration -- **`channel_manager.py`**: Channel and file management operations +### Core Modules: - **`downloader.py`**: Main orchestrator and CLI interface +- **`video_downloader.py`**: Core video download execution and orchestration +- **`tracking_manager.py`**: Download tracking and status management +- **`download_planner.py`**: Download plan building and channel scanning +- **`cache_manager.py`**: Cache operations and file I/O management +- **`channel_manager.py`**: Channel and file management operations +- **`songlist_manager.py`**: Songlist operations and tracking +- **`server_manager.py`**: Server song availability checking +- **`fuzzy_matcher.py`**: Fuzzy matching logic and similarity functions -### Benefits of Modular Architecture: +### New Utility Modules (v3.2): +- **`youtube_utils.py`**: Centralized YouTube operations and yt-dlp command generation +- **`error_utils.py`**: Standardized error handling and formatting +- **`download_pipeline.py`**: Abstracted download → verify → tag → track pipeline +- **`id3_utils.py`**: ID3 tagging utilities +- **`config_manager.py`**: Configuration management +- **`resolution_cli.py`**: Resolution checking utilities +- **`tracking_cli.py`**: Tracking management CLI + +### Benefits of Enhanced Modular Architecture: - **Single Responsibility**: Each module has a focused purpose +- **Centralized Utilities**: Common operations (yt-dlp commands, error handling) are centralized +- **Reduced Duplication**: Eliminated code duplication across modules - **Testability**: Individual components can be tested separately - **Maintainability**: Easier to find and fix issues - **Reusability**: Components can be used independently - **Robustness**: Better error handling and interruption recovery +- **Consistency**: Standardized error messages and processing pipelines --- @@ -93,6 +109,10 @@ python download_karaoke.py --clear-cache SingKingKaraoke - ✅ **Default channel file**: If no --file is specified for songlist-only or latest-per-channel modes, automatically uses data/channels.txt as the default channel list. - ✅ **Robust interruption handling**: Progress is saved after each download, and files are checked for existence before downloading to prevent re-downloads if the process is interrupted. - ✅ **Optimized scanning performance**: High-performance channel scanning with O(n×m) complexity, pre-processed lookups, and early termination for faster matching of large songlists and channels. +- ✅ **Centralized yt-dlp command generation**: Standardized command building and execution across all download operations +- ✅ **Enhanced error handling**: Structured exception hierarchy with consistent error messages and formatting +- ✅ **Abstracted download pipeline**: Reusable download → verify → tag → track process for consistent processing +- ✅ **Reduced code duplication**: Eliminated duplicate code across modules through centralized utilities --- @@ -102,15 +122,19 @@ KaroakeVideoDownloader/ ├── karaoke_downloader/ # All core Python code and utilities │ ├── downloader.py # Main orchestrator and CLI interface │ ├── cli.py # CLI entry point -│ ├── fuzzy_matcher.py # Fuzzy matching logic and similarity functions -│ ├── download_planner.py # Download plan building and channel scanning (optimized) -│ ├── cache_manager.py # Cache operations and file I/O management │ ├── video_downloader.py # Core video download execution and orchestration +│ ├── tracking_manager.py # Download tracking and status management +│ ├── download_planner.py # Download plan building and channel scanning +│ ├── cache_manager.py # Cache operations and file I/O management │ ├── channel_manager.py # Channel and file management operations -│ ├── id3_utils.py # ID3 tagging helpers -│ ├── songlist_manager.py # Songlist logic -│ ├── youtube_utils.py # YouTube helpers -│ ├── tracking_manager.py # Tracking logic +│ ├── songlist_manager.py # Songlist operations and tracking +│ ├── server_manager.py # Server song availability checking +│ ├── fuzzy_matcher.py # Fuzzy matching logic and similarity functions +│ ├── youtube_utils.py # Centralized YouTube operations and yt-dlp commands +│ ├── error_utils.py # Standardized error handling and formatting +│ ├── download_pipeline.py # Abstracted download → verify → tag → track pipeline +│ ├── id3_utils.py # ID3 tagging utilities +│ ├── config_manager.py # Configuration management │ ├── check_resolution.py # Resolution checker utility │ ├── resolution_cli.py # Resolution config CLI │ └── tracking_cli.py # Tracking management CLI @@ -161,6 +185,21 @@ KaroakeVideoDownloader/ - **ID3 Tagging:** Artist/title extracted from video title and embedded in MP4 files. - **Cleanup:** Extra files from yt-dlp (e.g., `.info.json`) are automatically removed after download. - **Reset/Clear:** Use `--reset-channel` to reset all tracking and files for a channel (optionally including songlist songs with `--reset-songlist`). Use `--clear-cache` to clear cached video lists for a channel or all channels. + +## 🔧 Refactoring Improvements (v3.2) +The codebase has been comprehensively refactored to improve maintainability and reduce code duplication: + +### **Centralized Utilities** +- **`youtube_utils.py`**: Centralized yt-dlp command generation and YouTube operations +- **`error_utils.py`**: Standardized error handling with structured exception hierarchy +- **`download_pipeline.py`**: Abstracted download pipeline for consistent processing + +### **Benefits Achieved** +- **Reduced Duplication**: Eliminated ~50 lines of duplicated yt-dlp command generation +- **Improved Maintainability**: Changes to yt-dlp configuration only require updates in one place +- **Enhanced Error Handling**: Consistent error messages and better debugging context +- **Better Code Organization**: Clear separation of concerns and logical module structure +- **Increased Testability**: Modular components can be tested independently - **Download plan pre-scan:** Before downloading, the tool scans all channels for songlist matches, builds a download plan, and prints stats (matches, unmatched, per-channel breakdown). The plan is cached for 1 day and reused unless --force-download-plan is set. - **Latest-per-channel plan:** Download the latest N videos from each channel, with a per-channel plan and robust resume. Each channel is removed from the plan as it completes. Plan cache is deleted when all channels are done. - **Fast mode with early exit:** When a limit is set, the tool scans channels and songs in order, downloads immediately when a match is found, and stops as soon as the limit is reached with successful downloads. This provides much faster performance for small limits compared to the full pre-scan approach. diff --git a/README.md b/README.md index 3cf922b..32e1716 100644 --- a/README.md +++ b/README.md @@ -22,15 +22,34 @@ A Python-based Windows CLI tool to download karaoke videos from YouTube channels - 🏷️ **Server Duplicates Tracking**: Automatically checks against local songs.json file and marks duplicates for future skipping, preventing re-downloads of songs already on the server ## 🏗️ Architecture -The codebase has been refactored into a modular architecture for better maintainability and separation of concerns: +The codebase has been comprehensively refactored into a modular architecture with centralized utilities for improved maintainability, error handling, and code reuse: -- **`fuzzy_matcher.py`**: Fuzzy matching logic and similarity functions -- **`download_planner.py`**: Download plan building and channel scanning (optimized) -- **`cache_manager.py`**: Cache operations and file I/O management -- **`server_manager.py`**: Server songs loading and server duplicates tracking -- **`video_downloader.py`**: Core video download execution and orchestration -- **`channel_manager.py`**: Channel and file management operations +### Core Modules: - **`downloader.py`**: Main orchestrator and CLI interface +- **`video_downloader.py`**: Core video download execution and orchestration +- **`tracking_manager.py`**: Download tracking and status management +- **`download_planner.py`**: Download plan building and channel scanning +- **`cache_manager.py`**: Cache operations and file I/O management +- **`channel_manager.py`**: Channel and file management operations +- **`songlist_manager.py`**: Songlist operations and tracking +- **`server_manager.py`**: Server song availability checking +- **`fuzzy_matcher.py`**: Fuzzy matching logic and similarity functions + +### Utility Modules: +- **`youtube_utils.py`**: Centralized YouTube operations and yt-dlp command generation +- **`error_utils.py`**: Standardized error handling and formatting +- **`download_pipeline.py`**: Abstracted download → verify → tag → track pipeline +- **`id3_utils.py`**: ID3 tagging utilities +- **`config_manager.py`**: Configuration management +- **`resolution_cli.py`**: Resolution checking utilities +- **`tracking_cli.py`**: Tracking management CLI + +### Benefits: +- **Centralized Utilities**: Common operations (yt-dlp commands, error handling) are centralized +- **Reduced Duplication**: Eliminated code duplication across modules +- **Consistency**: Standardized error messages and processing pipelines +- **Maintainability**: Changes isolated to specific modules +- **Testability**: Modular components can be tested independently ## 📋 Requirements - **Windows 10/11** @@ -129,16 +148,19 @@ KaroakeVideoDownloader/ ├── karaoke_downloader/ # All core Python code and utilities │ ├── downloader.py # Main orchestrator and CLI interface │ ├── cli.py # CLI entry point -│ ├── fuzzy_matcher.py # Fuzzy matching logic and similarity functions -│ ├── download_planner.py # Download plan building and channel scanning (optimized) -│ ├── cache_manager.py # Cache operations and file I/O management -│ ├── server_manager.py # Server songs loading and server duplicates tracking │ ├── video_downloader.py # Core video download execution and orchestration +│ ├── tracking_manager.py # Download tracking and status management +│ ├── download_planner.py # Download plan building and channel scanning +│ ├── cache_manager.py # Cache operations and file I/O management │ ├── channel_manager.py # Channel and file management operations -│ ├── id3_utils.py # ID3 tagging helpers -│ ├── songlist_manager.py # Songlist logic -│ ├── youtube_utils.py # YouTube helpers -│ ├── tracking_manager.py # Tracking logic +│ ├── songlist_manager.py # Songlist operations and tracking +│ ├── server_manager.py # Server song availability checking +│ ├── fuzzy_matcher.py # Fuzzy matching logic and similarity functions +│ ├── youtube_utils.py # Centralized YouTube operations and yt-dlp commands +│ ├── error_utils.py # Standardized error handling and formatting +│ ├── download_pipeline.py # Abstracted download → verify → tag → track pipeline +│ ├── id3_utils.py # ID3 tagging utilities +│ ├── config_manager.py # Configuration management │ ├── check_resolution.py # Resolution checker utility │ ├── resolution_cli.py # Resolution config CLI │ └── tracking_cli.py # Tracking management CLI @@ -206,6 +228,26 @@ python download_karaoke.py --clear-server-duplicates - All options are in `data/config.json` (format, resolution, metadata, etc.) - You can edit this file or use CLI flags to override +## 🔧 Refactoring Improvements (v3.2) +The codebase has been comprehensively refactored to improve maintainability and reduce code duplication: + +### **Key Improvements** +- **Centralized yt-dlp Command Generation**: Standardized command building and execution across all download operations +- **Enhanced Error Handling**: Structured exception hierarchy with consistent error messages and formatting +- **Abstracted Download Pipeline**: Reusable download → verify → tag → track process for consistent processing +- **Reduced Code Duplication**: Eliminated duplicate code across modules through centralized utilities + +### **New Utility Modules** +- **`youtube_utils.py`**: Centralized YouTube operations and yt-dlp command generation +- **`error_utils.py`**: Standardized error handling with structured exception hierarchy +- **`download_pipeline.py`**: Abstracted download pipeline for consistent processing + +### **Benefits** +- **Improved Maintainability**: Changes to yt-dlp configuration only require updates in one place +- **Better Error Handling**: Consistent error messages and better debugging context +- **Enhanced Testability**: Modular components can be tested independently +- **Reduced Complexity**: Single source of truth for common operations + ## 🐞 Troubleshooting - Ensure `yt-dlp.exe` is in the `downloader/` folder - Check `logs/` for error details diff --git a/karaoke_downloader/download_pipeline.py b/karaoke_downloader/download_pipeline.py new file mode 100644 index 0000000..4df5545 --- /dev/null +++ b/karaoke_downloader/download_pipeline.py @@ -0,0 +1,238 @@ +""" +Download pipeline that abstracts the complete download → verify → tag → track process. +""" + +from pathlib import Path +from typing import Dict, Any, Optional, Tuple, List +import subprocess + +from karaoke_downloader.youtube_utils import build_yt_dlp_command, execute_yt_dlp_command, show_available_formats +from karaoke_downloader.error_utils import handle_yt_dlp_error, handle_file_validation_error, log_error +from karaoke_downloader.id3_utils import add_id3_tags +from karaoke_downloader.video_downloader import sanitize_filename, is_valid_mp4 +from karaoke_downloader.songlist_manager import mark_songlist_song_downloaded + +class DownloadPipeline: + """ + Handles the complete download pipeline: download → verify → tag → track + """ + + def __init__( + self, + yt_dlp_path: str, + config: Dict[str, Any], + downloads_dir: Path, + songlist_tracking: Optional[Dict] = None, + tracker=None + ): + self.yt_dlp_path = yt_dlp_path + self.config = config + self.downloads_dir = downloads_dir + self.songlist_tracking = songlist_tracking or {} + self.tracker = tracker + + def execute_pipeline( + self, + video_id: str, + artist: str, + title: str, + channel_name: str, + video_title: Optional[str] = None + ) -> bool: + """ + Execute the complete download pipeline for a single video. + + Args: + video_id: YouTube video ID + artist: Artist name + title: Song title + channel_name: Channel name + video_title: Original video title (optional) + + Returns: + True if successful, False otherwise + """ + try: + # Step 1: Prepare file path + filename = sanitize_filename(artist, title) + output_path = self.downloads_dir / channel_name / filename + + # Step 2: Download video + if not self._download_video(video_id, output_path, artist, title): + return False + + # Step 3: Verify download + if not self._verify_download(output_path, artist, title, video_id, channel_name): + return False + + # Step 4: Add ID3 tags + if not self._add_tags(output_path, artist, title, channel_name): + return False + + # Step 5: Track download + if not self._track_download(output_path, artist, title, video_id, channel_name): + return False + + print(f"✅ Pipeline completed successfully: {artist} - {title}") + return True + + except Exception as e: + print(f"❌ Pipeline failed for {artist} - {title}: {e}") + return False + + def _download_video(self, video_id: str, output_path: Path, artist: str, title: str) -> bool: + """Step 1: Download the video using yt-dlp.""" + output_path.parent.mkdir(parents=True, exist_ok=True) + print(f"⬇️ Downloading: {artist} - {title} -> {output_path}") + + video_url = f"https://www.youtube.com/watch?v={video_id}" + + # Build command using centralized utility + cmd = build_yt_dlp_command( + self.yt_dlp_path, + video_url, + output_path, + self.config + ) + + print(f"🔧 Running command: {' '.join(cmd)}") + print(f"📺 Resolution settings: {self.config.get('download_settings', {}).get('preferred_resolution', 'Unknown')}") + print(f"🎬 Format string: {self.config.get('download_settings', {}).get('format', 'Unknown')}") + + # Debug: Show available formats (optional) + if self.config.get('debug_show_formats', False): + show_available_formats(video_url, self.yt_dlp_path) + + try: + result = execute_yt_dlp_command(cmd) + print(f"✅ yt-dlp completed successfully") + print(f"📄 yt-dlp stdout: {result.stdout}") + return True + + except subprocess.CalledProcessError as e: + error = handle_yt_dlp_error(e, artist, title, video_id) + log_error(error) + return False + + def _verify_download(self, output_path: Path, artist: str, title: str, video_id: str, channel_name: str) -> bool: + """Step 2: Verify that the download was successful.""" + if not output_path.exists(): + print(f"❌ Download failed: file does not exist: {output_path}") + # Check if yt-dlp saved it somewhere else + possible_files = list(output_path.parent.glob("*.mp4")) + if possible_files: + print(f"🔍 Found these files in the directory: {[f.name for f in possible_files]}") + # Look for a file that matches our pattern (artist - title) + artist_part = artist.lower() + title_part = title.lower() + for file in possible_files: + file_lower = file.stem.lower() + if artist_part in file_lower and any(word in file_lower for word in title_part.split()): + print(f"🎯 Found matching file: {file.name}") + output_path = file + break + else: + print(f"❌ No matching file found for: {artist} - {title}") + return False + else: + return False + + # Validate file + if not is_valid_mp4(output_path): + error = handle_file_validation_error( + "File is not a valid MP4", + output_path, + artist, + title, + video_id, + channel_name + ) + log_error(error) + return False + + print(f"✅ Download verified: {output_path}") + return True + + def _add_tags(self, output_path: Path, artist: str, title: str, channel_name: str) -> bool: + """Step 3: Add ID3 tags to the downloaded file.""" + try: + add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name) + print(f"🏷️ Added ID3 tags: {artist} - {title}") + return True + except Exception as e: + print(f"⚠️ Failed to add ID3 tags: {e}") + # Don't fail the pipeline for tag issues + return True + + def _track_download(self, output_path: Path, artist: str, title: str, video_id: str, channel_name: str) -> bool: + """Step 4: Track the download in the tracking system.""" + try: + # Track in songlist if available + if self.songlist_tracking is not None: + mark_songlist_song_downloaded( + self.songlist_tracking, + artist, + title, + channel_name, + output_path + ) + + # Track in main tracking system if available + if self.tracker is not None: + file_size = output_path.stat().st_size if output_path.exists() else None + self.tracker.mark_song_downloaded( + artist, + title, + video_id, + channel_name, + output_path, + file_size + ) + + print(f"📊 Tracked download: {artist} - {title}") + return True + + except Exception as e: + print(f"⚠️ Failed to track download: {e}") + # Don't fail the pipeline for tracking issues + return True + + def batch_execute( + self, + videos: List[Dict[str, Any]], + channel_name: str, + limit: Optional[int] = None + ) -> Tuple[int, int]: + """ + Execute the pipeline for multiple videos. + + Args: + videos: List of video dictionaries with 'id', 'title', etc. + channel_name: Channel name + limit: Optional limit on number of videos to process + + Returns: + Tuple of (successful_downloads, total_attempted) + """ + if limit: + videos = videos[:limit] + + successful = 0 + total = len(videos) + + for i, video in enumerate(videos, 1): + video_id = video['id'] + video_title = video.get('title', '') + + # Extract artist and title from video title + from karaoke_downloader.id3_utils import extract_artist_title + artist, title = extract_artist_title(video_title) + + print(f" ({i}/{total}) Processing: {artist} - {title}") + + if self.execute_pipeline(video_id, artist, title, channel_name, video_title): + successful += 1 + else: + print(f" ❌ Failed to process: {artist} - {title}") + + return successful, total \ No newline at end of file diff --git a/karaoke_downloader/downloader.py b/karaoke_downloader/downloader.py index d50fa9f..e4d5206 100644 --- a/karaoke_downloader/downloader.py +++ b/karaoke_downloader/downloader.py @@ -25,6 +25,8 @@ from karaoke_downloader.cache_manager import ( ) from karaoke_downloader.video_downloader import download_video_and_track, is_valid_mp4, execute_download_plan from karaoke_downloader.channel_manager import reset_channel_downloads, download_from_file +from karaoke_downloader.download_pipeline import DownloadPipeline +from karaoke_downloader.error_utils import handle_yt_dlp_error, log_error # Constants DEFAULT_FUZZY_THRESHOLD = 85 @@ -249,40 +251,29 @@ class KaraokeDownloader: if not matches: print("🎵 No new songlist matches found for this channel.") return True - # Download only the first N matches + # Download only the first N matches using the new pipeline + pipeline = DownloadPipeline( + yt_dlp_path=str(self.yt_dlp_path), + config=self.config, + downloads_dir=self.downloads_dir, + songlist_tracking=self.songlist_tracking, + tracker=self.tracker + ) + for video, song in matches: artist, title = song['artist'], song['title'] - output_path = self.downloads_dir / channel_name / f"{artist} - {title} (Karaoke Version).mp4" - output_path.parent.mkdir(parents=True, exist_ok=True) - print(f"⬇️ Downloading: {artist} - {title} -> {output_path}") - video_url = f"https://www.youtube.com/watch?v={video['id']}" - cmd = [ - str(self.yt_dlp_path), - "-o", str(output_path), - "-f", self.config["download_settings"]["format"], - video_url - ] - try: - subprocess.run(cmd, check=True) - except subprocess.CalledProcessError as e: - print(f"❌ yt-dlp failed: {e}") - # Mark song as failed in tracking immediately - self._handle_download_failure(artist, title, video['id'], channel_name, "yt-dlp failed", str(e)) - continue - if not output_path.exists() or output_path.stat().st_size == 0: - print(f"❌ Download failed or file is empty: {output_path}") - # Mark song as failed in tracking immediately - self._handle_download_failure(artist, title, video['id'], channel_name, "Download failed", "file does not exist or is empty") - continue - if not is_valid_mp4(output_path): - print(f"❌ File is not a valid MP4: {output_path}") - # Mark song as failed in tracking immediately - self._handle_download_failure(artist, title, video['id'], channel_name, "Download failed", "file is not a valid MP4") - continue - add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name) - mark_songlist_song_downloaded(self.songlist_tracking, artist, title, channel_name, output_path) - print(f"✅ Downloaded and tracked: {artist} - {title}") - print(f"🎉 All post-processing complete for: {output_path}") + print(f"🎵 Processing: {artist} - {title}") + + if pipeline.execute_pipeline( + video_id=video['id'], + artist=artist, + title=title, + channel_name=channel_name, + video_title=video.get('title', '') + ): + print(f"✅ Successfully processed: {artist} - {title}") + else: + print(f"❌ Failed to process: {artist} - {title}") return True def download_songlist_across_channels(self, channel_urls, limit=None, force_refresh_download_plan=False, fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD): @@ -596,50 +587,30 @@ class KaraokeDownloader: safe_title = safe_title.replace(char, "") safe_title = safe_title.replace("...", "").replace("..", "").replace(".", "").strip() filename = f"{channel_name} - {safe_title}.mp4" - # Limit filename length to avoid Windows path issues - if len(filename) > DEFAULT_FILENAME_LENGTH_LIMIT: - filename = f"{channel_name[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4" - output_path = self.downloads_dir / channel_name / filename - output_path.parent.mkdir(parents=True, exist_ok=True) - print(f" ({v_idx+1}/{len(videos)}) Downloading: {title} -> {output_path}") - video_url = f"https://www.youtube.com/watch?v={video_id}" - dlp_cmd = [ - str(self.yt_dlp_path), - "--no-check-certificates", - "--ignore-errors", - "--no-warnings", - "-o", str(output_path), - "-f", self.config["download_settings"]["format"], - video_url - ] - try: - result = subprocess.run(dlp_cmd, capture_output=True, text=True, check=True) - print(f" ✅ yt-dlp completed successfully") - except subprocess.CalledProcessError as e: - print(f" ❌ yt-dlp failed with exit code {e.returncode}") - print(f" ❌ yt-dlp stderr: {e.stderr}") - # Mark song as failed in tracking immediately - artist, title_clean = extract_artist_title(title) - self._handle_download_failure(artist, title_clean, video_id, channel_name, "yt-dlp failed", f"exit code {e.returncode}: {e.stderr}") - continue - if not output_path.exists() or output_path.stat().st_size == 0: - print(f" ❌ Download failed or file is empty: {output_path}") - # Mark song as failed in tracking immediately - artist, title_clean = extract_artist_title(title) - self._handle_download_failure(artist, title_clean, video_id, channel_name, "Download failed", "file does not exist or is empty") - continue - # Extract artist and title for tracking artist, title_clean = extract_artist_title(title) - # Add ID3 tags - add_id3_tags(output_path, title, channel_name) + print(f" ({v_idx+1}/{len(videos)}) Processing: {artist} - {title_clean}") - # Mark as downloaded in tracking system - file_size = output_path.stat().st_size if output_path.exists() else None - self.tracker.mark_song_downloaded(artist, title_clean, video_id, channel_name, output_path, file_size) + # Use the new pipeline for consistent processing + pipeline = DownloadPipeline( + yt_dlp_path=str(self.yt_dlp_path), + config=self.config, + downloads_dir=self.downloads_dir, + songlist_tracking=self.songlist_tracking, + tracker=self.tracker + ) - print(f" ✅ Downloaded and tagged: {title}") + if pipeline.execute_pipeline( + video_id=video_id, + artist=artist, + title=title_clean, + channel_name=channel_name, + video_title=title + ): + print(f" ✅ Successfully processed: {artist} - {title_clean}") + else: + print(f" ❌ Failed to process: {artist} - {title_clean}") # After channel is done, remove it from the plan and update cache channel_plans[idx]['videos'] = [] with open(cache_file, 'w', encoding='utf-8') as f: diff --git a/karaoke_downloader/error_utils.py b/karaoke_downloader/error_utils.py new file mode 100644 index 0000000..dcdb116 --- /dev/null +++ b/karaoke_downloader/error_utils.py @@ -0,0 +1,185 @@ +""" +Error handling and formatting utilities for consistent error messages across the application. +""" + +from typing import Optional, Dict, Any +from pathlib import Path +import subprocess + +class DownloadError(Exception): + """Base exception for download-related errors.""" + def __init__(self, message: str, error_type: str = "download_error", details: Optional[str] = None): + self.message = message + self.error_type = error_type + self.details = details + super().__init__(self.message) + +class YtDlpError(DownloadError): + """Exception for yt-dlp specific errors.""" + def __init__(self, message: str, exit_code: Optional[int] = None, stderr: Optional[str] = None): + self.exit_code = exit_code + self.stderr = stderr + super().__init__(message, "yt_dlp_error", f"Exit code: {exit_code}, Stderr: {stderr}") + +class FileValidationError(DownloadError): + """Exception for file validation errors.""" + def __init__(self, message: str, file_path: Optional[Path] = None): + self.file_path = file_path + super().__init__(message, "file_validation_error", f"File: {file_path}") + +def format_error_message( + error_type: str, + artist: str, + title: str, + video_id: Optional[str] = None, + channel_name: Optional[str] = None, + details: Optional[str] = None +) -> str: + """ + Format a consistent error message for tracking and logging. + + Args: + error_type: Type of error (e.g., "yt-dlp failed", "file verification failed") + artist: Artist name + title: Song title + video_id: YouTube video ID (optional) + channel_name: Channel name (optional) + details: Additional error details (optional) + + Returns: + Formatted error message + """ + base_msg = f"{error_type}: {artist} - {title}" + + if video_id: + base_msg += f" (Video ID: {video_id})" + + if channel_name: + base_msg += f" (Channel: {channel_name})" + + if details: + base_msg += f" - {details}" + + return base_msg + +def handle_yt_dlp_error( + exception: subprocess.CalledProcessError, + artist: str, + title: str, + video_id: Optional[str] = None, + channel_name: Optional[str] = None +) -> YtDlpError: + """ + Handle yt-dlp subprocess errors and create a standardized exception. + + Args: + exception: The CalledProcessError from subprocess.run + artist: Artist name + title: Song title + video_id: YouTube video ID (optional) + channel_name: Channel name (optional) + + Returns: + YtDlpError with formatted message + """ + error_msg = format_error_message( + "yt-dlp failed", + artist, + title, + video_id, + channel_name, + f"exit code {exception.returncode}: {exception.stderr}" + ) + + return YtDlpError( + error_msg, + exit_code=exception.returncode, + stderr=exception.stderr + ) + +def handle_file_validation_error( + message: str, + file_path: Path, + artist: str, + title: str, + video_id: Optional[str] = None, + channel_name: Optional[str] = None +) -> FileValidationError: + """ + Handle file validation errors and create a standardized exception. + + Args: + message: Error message + file_path: Path to the file that failed validation + artist: Artist name + title: Song title + video_id: YouTube video ID (optional) + channel_name: Channel name (optional) + + Returns: + FileValidationError with formatted message + """ + error_msg = format_error_message( + "file validation failed", + artist, + title, + video_id, + channel_name, + f"{message} - File: {file_path}" + ) + + return FileValidationError(error_msg, file_path) + +def log_error(error: DownloadError, logger=None) -> None: + """ + Log an error with consistent formatting. + + Args: + error: DownloadError instance + logger: Optional logger instance + """ + if logger: + logger.error(f"❌ {error.message}") + if error.details: + logger.error(f" Details: {error.details}") + else: + print(f"❌ {error.message}") + if error.details: + print(f" Details: {error.details}") + +def create_error_context( + artist: str, + title: str, + video_id: Optional[str] = None, + channel_name: Optional[str] = None, + file_path: Optional[Path] = None +) -> Dict[str, Any]: + """ + Create a context dictionary for error reporting. + + Args: + artist: Artist name + title: Song title + video_id: YouTube video ID (optional) + channel_name: Channel name (optional) + file_path: File path (optional) + + Returns: + Dictionary with error context + """ + context = { + "artist": artist, + "title": title, + "timestamp": None # Could be added if needed + } + + if video_id: + context["video_id"] = video_id + + if channel_name: + context["channel_name"] = channel_name + + if file_path: + context["file_path"] = str(file_path) + + return context \ No newline at end of file diff --git a/karaoke_downloader/video_downloader.py b/karaoke_downloader/video_downloader.py index 95c76b9..23e5d91 100644 --- a/karaoke_downloader/video_downloader.py +++ b/karaoke_downloader/video_downloader.py @@ -8,6 +8,8 @@ from pathlib import Path from karaoke_downloader.id3_utils import add_id3_tags from karaoke_downloader.songlist_manager import mark_songlist_song_downloaded from karaoke_downloader.download_planner import save_plan_cache +from karaoke_downloader.youtube_utils import build_yt_dlp_command, execute_yt_dlp_command, show_available_formats +from karaoke_downloader.error_utils import handle_yt_dlp_error, handle_file_validation_error, log_error # Constants DEFAULT_FILENAME_LENGTH_LIMIT = 100 @@ -88,34 +90,27 @@ def download_single_video(output_path, video_id, config, yt_dlp_path, print(f"⬇️ Downloading: {artist} - {title} -> {output_path}") video_url = f"https://www.youtube.com/watch?v={video_id}" - dlp_cmd = [ - str(yt_dlp_path), - "--no-check-certificates", - "--ignore-errors", - "--no-warnings", - "-o", str(output_path), - "-f", config["download_settings"]["format"], - video_url - ] - print(f"🔧 Running command: {' '.join(dlp_cmd)}") + # Build command using centralized utility + cmd = build_yt_dlp_command(yt_dlp_path, video_url, output_path, config) + + print(f"🔧 Running command: {' '.join(cmd)}") print(f"📺 Resolution settings: {config.get('download_settings', {}).get('preferred_resolution', 'Unknown')}") print(f"🎬 Format string: {config.get('download_settings', {}).get('format', 'Unknown')}") # Debug: Show available formats (optional) if config.get('debug_show_formats', False): - show_available_formats(yt_dlp_path, video_url) + show_available_formats(video_url, yt_dlp_path) try: - result = subprocess.run(dlp_cmd, capture_output=True, text=True, check=True) + result = execute_yt_dlp_command(cmd) print(f"✅ yt-dlp completed successfully") print(f"📄 yt-dlp stdout: {result.stdout}") except subprocess.CalledProcessError as e: - print(f"❌ yt-dlp failed with exit code {e.returncode}") - print(f"❌ yt-dlp stderr: {e.stderr}") + error = handle_yt_dlp_error(e, artist, title, video_id, channel_name) + log_error(error) # Mark song as failed in tracking - error_msg = f"yt-dlp failed with exit code {e.returncode}: {e.stderr}" - _mark_song_failed_standalone(artist, title, video_id, channel_name, error_msg) + _mark_song_failed_standalone(artist, title, video_id, channel_name, error.message) return False # Verify download @@ -138,19 +133,7 @@ def _mark_song_failed_standalone(artist, title, video_id, channel_name, error_me tracker.mark_song_failed(artist, title, video_id, channel_name, error_message) print(f"🏷️ Marked song as failed: {artist} - {title}") -def show_available_formats(yt_dlp_path, video_url): - """Show available formats for debugging.""" - print(f"🔍 Checking available formats for: {video_url}") - format_cmd = [ - str(yt_dlp_path), - "--list-formats", - video_url - ] - try: - format_result = subprocess.run(format_cmd, capture_output=True, text=True, timeout=DEFAULT_FORMAT_CHECK_TIMEOUT) - print(f"📋 Available formats:\n{format_result.stdout}") - except Exception as e: - print(f"⚠️ Could not check formats: {e}") +# Note: show_available_formats is now imported from youtube_utils def verify_download(output_path, artist, title, video_id=None, channel_name=None): """Verify that the download was successful.""" diff --git a/karaoke_downloader/youtube_utils.py b/karaoke_downloader/youtube_utils.py index 4b87295..676861d 100644 --- a/karaoke_downloader/youtube_utils.py +++ b/karaoke_downloader/youtube_utils.py @@ -1,15 +1,116 @@ -import re +""" +YouTube utilities for channel info, playlist info, and yt-dlp command generation. +""" + +import subprocess +import json from pathlib import Path +from typing import List, Dict, Any, Optional -def get_channel_info(channel_url): - if '@' in channel_url: - channel_name = channel_url.split('@')[1].split('/')[0] - channel_id = f"@{channel_name}" - else: - channel_name = "unknown_channel" - channel_id = "unknown_channel" - channel_name = re.sub(r'[<>:"/\\|?*]', '_', channel_name) - return channel_name, channel_id +def get_channel_info(channel_url: str, yt_dlp_path: str = "downloader/yt-dlp.exe") -> Dict[str, Any]: + """Get channel information using yt-dlp.""" + try: + cmd = [ + yt_dlp_path, + "--dump-json", + "--no-playlist", + channel_url + ] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return json.loads(result.stdout) + except subprocess.CalledProcessError as e: + print(f"❌ Failed to get channel info: {e}") + return {} -def get_playlist_info(playlist_url): - return get_channel_info(playlist_url) \ No newline at end of file +def get_playlist_info(playlist_url: str, yt_dlp_path: str = "downloader/yt-dlp.exe") -> List[Dict[str, Any]]: + """Get playlist information using yt-dlp.""" + try: + cmd = [ + yt_dlp_path, + "--dump-json", + "--flat-playlist", + playlist_url + ] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + videos = [] + for line in result.stdout.strip().split('\n'): + if line.strip(): + videos.append(json.loads(line)) + return videos + except subprocess.CalledProcessError as e: + print(f"❌ Failed to get playlist info: {e}") + return [] + +def build_yt_dlp_command( + yt_dlp_path: str, + video_url: str, + output_path: Path, + config: Dict[str, Any], + additional_args: Optional[List[str]] = None +) -> List[str]: + """ + Build a standardized yt-dlp command with consistent arguments. + + Args: + yt_dlp_path: Path to yt-dlp executable + video_url: YouTube video URL + output_path: Output file path + config: Configuration dictionary with download settings + additional_args: Optional additional arguments to append + + Returns: + List of command arguments for subprocess.run + """ + cmd = [ + str(yt_dlp_path), + "--no-check-certificates", + "--ignore-errors", + "--no-warnings", + "-o", str(output_path), + "-f", config.get("download_settings", {}).get("format", "best[height<=720][ext=mp4]/best[height<=720]/best[ext=mp4]/best"), + video_url + ] + + # Add any additional arguments + if additional_args: + cmd.extend(additional_args) + + return cmd + +def execute_yt_dlp_command(cmd: List[str], timeout: Optional[int] = None) -> subprocess.CompletedProcess: + """ + Execute a yt-dlp command with standardized error handling. + + Args: + cmd: Command list to execute + timeout: Optional timeout in seconds + + Returns: + CompletedProcess object + + Raises: + subprocess.CalledProcessError: If the command fails + subprocess.TimeoutExpired: If the command times out + """ + return subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=timeout) + +def show_available_formats(video_url: str, yt_dlp_path: str = "downloader/yt-dlp.exe", timeout: int = 30) -> None: + """ + Show available formats for a video (debugging utility). + + Args: + video_url: YouTube video URL + yt_dlp_path: Path to yt-dlp executable + timeout: Timeout in seconds + """ + print(f"🔍 Checking available formats for: {video_url}") + format_cmd = [ + str(yt_dlp_path), + "--list-formats", + video_url + ] + try: + format_result = subprocess.run(format_cmd, capture_output=True, text=True, timeout=timeout) + print(f"📋 Available formats:\n{format_result.stdout}") + except Exception as e: + print(f"⚠️ Could not check formats: {e}") \ No newline at end of file