""" Download pipeline that abstracts the complete download → verify → tag → track process. """ from pathlib import Path from typing import Dict, Any, Optional, Tuple, List import subprocess from karaoke_downloader.youtube_utils import build_yt_dlp_command, execute_yt_dlp_command, show_available_formats from karaoke_downloader.error_utils import handle_yt_dlp_error, handle_file_validation_error, log_error from karaoke_downloader.id3_utils import add_id3_tags from karaoke_downloader.video_downloader import sanitize_filename, is_valid_mp4 from karaoke_downloader.songlist_manager import mark_songlist_song_downloaded class DownloadPipeline: """ Handles the complete download pipeline: download → verify → tag → track """ def __init__( self, yt_dlp_path: str, config: Dict[str, Any], downloads_dir: Path, songlist_tracking: Optional[Dict] = None, tracker=None ): self.yt_dlp_path = yt_dlp_path self.config = config self.downloads_dir = downloads_dir self.songlist_tracking = songlist_tracking or {} self.tracker = tracker def execute_pipeline( self, video_id: str, artist: str, title: str, channel_name: str, video_title: Optional[str] = None ) -> bool: """ Execute the complete download pipeline for a single video. Args: video_id: YouTube video ID artist: Artist name title: Song title channel_name: Channel name video_title: Original video title (optional) Returns: True if successful, False otherwise """ try: # Step 1: Prepare file path filename = sanitize_filename(artist, title) output_path = self.downloads_dir / channel_name / filename # Step 2: Download video if not self._download_video(video_id, output_path, artist, title): return False # Step 3: Verify download if not self._verify_download(output_path, artist, title, video_id, channel_name): return False # Step 4: Add ID3 tags if not self._add_tags(output_path, artist, title, channel_name): return False # Step 5: Track download if not self._track_download(output_path, artist, title, video_id, channel_name): return False print(f"✅ Pipeline completed successfully: {artist} - {title}") return True except Exception as e: print(f"❌ Pipeline failed for {artist} - {title}: {e}") return False def _download_video(self, video_id: str, output_path: Path, artist: str, title: str) -> bool: """Step 1: Download the video using yt-dlp.""" output_path.parent.mkdir(parents=True, exist_ok=True) print(f"⬇️ Downloading: {artist} - {title} -> {output_path}") video_url = f"https://www.youtube.com/watch?v={video_id}" # Build command using centralized utility cmd = build_yt_dlp_command( self.yt_dlp_path, video_url, output_path, self.config ) print(f"🔧 Running command: {' '.join(cmd)}") print(f"📺 Resolution settings: {self.config.get('download_settings', {}).get('preferred_resolution', 'Unknown')}") print(f"🎬 Format string: {self.config.get('download_settings', {}).get('format', 'Unknown')}") # Debug: Show available formats (optional) if self.config.get('debug_show_formats', False): show_available_formats(video_url, self.yt_dlp_path) try: result = execute_yt_dlp_command(cmd) print(f"✅ yt-dlp completed successfully") print(f"📄 yt-dlp stdout: {result.stdout}") return True except subprocess.CalledProcessError as e: error = handle_yt_dlp_error(e, artist, title, video_id) log_error(error) return False def _verify_download(self, output_path: Path, artist: str, title: str, video_id: str, channel_name: str) -> bool: """Step 2: Verify that the download was successful.""" if not output_path.exists(): print(f"❌ Download failed: file does not exist: {output_path}") # Check if yt-dlp saved it somewhere else possible_files = list(output_path.parent.glob("*.mp4")) if possible_files: print(f"🔍 Found these files in the directory: {[f.name for f in possible_files]}") # Look for a file that matches our pattern (artist - title) artist_part = artist.lower() title_part = title.lower() for file in possible_files: file_lower = file.stem.lower() if artist_part in file_lower and any(word in file_lower for word in title_part.split()): print(f"🎯 Found matching file: {file.name}") output_path = file break else: print(f"❌ No matching file found for: {artist} - {title}") return False else: return False # Validate file if not is_valid_mp4(output_path): error = handle_file_validation_error( "File is not a valid MP4", output_path, artist, title, video_id, channel_name ) log_error(error) return False print(f"✅ Download verified: {output_path}") return True def _add_tags(self, output_path: Path, artist: str, title: str, channel_name: str) -> bool: """Step 3: Add ID3 tags to the downloaded file.""" try: add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name) print(f"🏷️ Added ID3 tags: {artist} - {title}") return True except Exception as e: print(f"⚠️ Failed to add ID3 tags: {e}") # Don't fail the pipeline for tag issues return True def _track_download(self, output_path: Path, artist: str, title: str, video_id: str, channel_name: str) -> bool: """Step 4: Track the download in the tracking system.""" try: # Track in songlist if available if self.songlist_tracking is not None: mark_songlist_song_downloaded( self.songlist_tracking, artist, title, channel_name, output_path ) # Track in main tracking system if available if self.tracker is not None: file_size = output_path.stat().st_size if output_path.exists() else None self.tracker.mark_song_downloaded( artist, title, video_id, channel_name, output_path, file_size ) print(f"📊 Tracked download: {artist} - {title}") return True except Exception as e: print(f"⚠️ Failed to track download: {e}") # Don't fail the pipeline for tracking issues return True def batch_execute( self, videos: List[Dict[str, Any]], channel_name: str, limit: Optional[int] = None ) -> Tuple[int, int]: """ Execute the pipeline for multiple videos. Args: videos: List of video dictionaries with 'id', 'title', etc. channel_name: Channel name limit: Optional limit on number of videos to process Returns: Tuple of (successful_downloads, total_attempted) """ if limit: videos = videos[:limit] successful = 0 total = len(videos) for i, video in enumerate(videos, 1): video_id = video['id'] video_title = video.get('title', '') # Extract artist and title from video title from karaoke_downloader.id3_utils import extract_artist_title artist, title = extract_artist_title(video_title) print(f" ({i}/{total}) Processing: {artist} - {title}") if self.execute_pipeline(video_id, artist, title, channel_name, video_title): successful += 1 else: print(f" ❌ Failed to process: {artist} - {title}") return successful, total