300 lines
10 KiB
Python
300 lines
10 KiB
Python
"""
|
|
Download pipeline that abstracts the complete download → verify → tag → track process.
|
|
"""
|
|
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
from karaoke_downloader.config_manager import AppConfig
|
|
|
|
from karaoke_downloader.error_utils import (
|
|
handle_file_validation_error,
|
|
handle_yt_dlp_error,
|
|
log_error,
|
|
)
|
|
from karaoke_downloader.id3_utils import add_id3_tags
|
|
from karaoke_downloader.songlist_manager import mark_songlist_song_downloaded
|
|
from karaoke_downloader.video_downloader import is_valid_mp4, sanitize_filename
|
|
from karaoke_downloader.youtube_utils import (
|
|
build_yt_dlp_command,
|
|
execute_yt_dlp_command,
|
|
show_available_formats,
|
|
)
|
|
|
|
|
|
class DownloadPipeline:
|
|
"""
|
|
Handles the complete download pipeline: download → verify → tag → track
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
yt_dlp_path: str,
|
|
config: Union[AppConfig, Dict[str, Any]],
|
|
downloads_dir: Path,
|
|
songlist_tracking: Optional[Dict] = None,
|
|
tracker=None,
|
|
):
|
|
self.yt_dlp_path = yt_dlp_path
|
|
self.config = config
|
|
self.downloads_dir = downloads_dir
|
|
self.songlist_tracking = songlist_tracking or {}
|
|
self.tracker = tracker
|
|
|
|
def execute_pipeline(
|
|
self,
|
|
video_id: str,
|
|
artist: str,
|
|
title: str,
|
|
channel_name: str,
|
|
video_title: Optional[str] = None,
|
|
) -> bool:
|
|
"""
|
|
Execute the complete download pipeline for a single video.
|
|
|
|
Args:
|
|
video_id: YouTube video ID
|
|
artist: Artist name
|
|
title: Song title
|
|
channel_name: Channel name
|
|
video_title: Original video title (optional)
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
try:
|
|
# Step 1: Prepare file path
|
|
filename = sanitize_filename(artist, title)
|
|
output_path = self.downloads_dir / channel_name / filename
|
|
|
|
# Step 2: Download video
|
|
if not self._download_video(video_id, output_path, artist, title, channel_name):
|
|
return False
|
|
|
|
# Step 3: Verify download
|
|
if not self._verify_download(
|
|
output_path, artist, title, video_id, channel_name
|
|
):
|
|
return False
|
|
|
|
# Step 4: Add ID3 tags
|
|
if not self._add_tags(output_path, artist, title, channel_name):
|
|
return False
|
|
|
|
# Step 5: Track download
|
|
if not self._track_download(
|
|
output_path, artist, title, video_id, channel_name
|
|
):
|
|
return False
|
|
|
|
print(f"✅ Pipeline completed successfully: {artist} - {title}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"❌ Pipeline failed for {artist} - {title}: {e}")
|
|
# Mark song as failed in tracking
|
|
if self.tracker:
|
|
self.tracker.mark_song_failed(
|
|
artist, title, video_id, channel_name, f"Pipeline failed: {e}"
|
|
)
|
|
return False
|
|
|
|
def _download_video(
|
|
self, video_id: str, output_path: Path, artist: str, title: str, channel_name: str
|
|
) -> bool:
|
|
"""Step 1: Download the video using yt-dlp."""
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
print(f"⬇️ Downloading: {artist} - {title} -> {output_path}")
|
|
|
|
video_url = f"https://www.youtube.com/watch?v={video_id}"
|
|
|
|
# Build command using centralized utility
|
|
cmd = build_yt_dlp_command(
|
|
self.yt_dlp_path, video_url, output_path, self.config
|
|
)
|
|
|
|
print(f"🔧 Running command: {' '.join(cmd)}")
|
|
print(
|
|
f"📺 Resolution settings: {self.config.download_settings.preferred_resolution}"
|
|
)
|
|
print(f"🎬 Format string: {self.config.download_settings.format}")
|
|
|
|
# Debug: Show available formats (optional)
|
|
if (
|
|
hasattr(self.config, "debug_show_formats")
|
|
and self.config.debug_show_formats
|
|
):
|
|
show_available_formats(video_url, self.yt_dlp_path)
|
|
|
|
try:
|
|
result = execute_yt_dlp_command(cmd)
|
|
print(f"✅ yt-dlp completed successfully")
|
|
print(f"📄 yt-dlp stdout: {result.stdout}")
|
|
return True
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
error = handle_yt_dlp_error(e, artist, title, video_id)
|
|
log_error(error)
|
|
# Mark song as failed in tracking
|
|
if self.tracker:
|
|
self.tracker.mark_song_failed(
|
|
artist, title, video_id, channel_name, error.message
|
|
)
|
|
return False
|
|
|
|
def _verify_download(
|
|
self,
|
|
output_path: Path,
|
|
artist: str,
|
|
title: str,
|
|
video_id: str,
|
|
channel_name: str,
|
|
) -> bool:
|
|
"""Step 2: Verify that the download was successful."""
|
|
if not output_path.exists():
|
|
print(f"❌ Download failed: file does not exist: {output_path}")
|
|
# Check if yt-dlp saved it somewhere else
|
|
possible_files = list(output_path.parent.glob("*.mp4"))
|
|
if possible_files:
|
|
print(
|
|
f"🔍 Found these files in the directory: {[f.name for f in possible_files]}"
|
|
)
|
|
# Look for a file that matches our pattern (artist - title)
|
|
artist_part = artist.lower()
|
|
title_part = title.lower()
|
|
for file in possible_files:
|
|
file_lower = file.stem.lower()
|
|
if artist_part in file_lower and any(
|
|
word in file_lower for word in title_part.split()
|
|
):
|
|
print(f"🎯 Found matching file: {file.name}")
|
|
output_path = file
|
|
break
|
|
else:
|
|
print(f"❌ No matching file found for: {artist} - {title}")
|
|
# Mark song as failed in tracking
|
|
if self.tracker:
|
|
error_msg = f"Download failed: file does not exist and no matching file found"
|
|
self.tracker.mark_song_failed(
|
|
artist, title, video_id, channel_name, error_msg
|
|
)
|
|
return False
|
|
else:
|
|
# Mark song as failed in tracking
|
|
if self.tracker:
|
|
error_msg = f"Download failed: file does not exist"
|
|
self.tracker.mark_song_failed(
|
|
artist, title, video_id, channel_name, error_msg
|
|
)
|
|
return False
|
|
|
|
# Validate file
|
|
if not is_valid_mp4(output_path):
|
|
error = handle_file_validation_error(
|
|
"File is not a valid MP4",
|
|
output_path,
|
|
artist,
|
|
title,
|
|
video_id,
|
|
channel_name,
|
|
)
|
|
log_error(error)
|
|
# Mark song as failed in tracking
|
|
if self.tracker:
|
|
self.tracker.mark_song_failed(
|
|
artist, title, video_id, channel_name, error.message
|
|
)
|
|
return False
|
|
|
|
print(f"✅ Download verified: {output_path}")
|
|
return True
|
|
|
|
def _add_tags(
|
|
self, output_path: Path, artist: str, title: str, channel_name: str
|
|
) -> bool:
|
|
"""Step 3: Add ID3 tags to the downloaded file."""
|
|
try:
|
|
add_id3_tags(
|
|
output_path, f"{artist} - {title} (Karaoke Version)", channel_name
|
|
)
|
|
print(f"🏷️ Added ID3 tags: {artist} - {title}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"⚠️ Failed to add ID3 tags: {e}")
|
|
# Don't fail the pipeline for tag issues
|
|
return True
|
|
|
|
def _track_download(
|
|
self,
|
|
output_path: Path,
|
|
artist: str,
|
|
title: str,
|
|
video_id: str,
|
|
channel_name: str,
|
|
) -> bool:
|
|
"""Step 4: Track the download in the tracking system."""
|
|
try:
|
|
# Track in songlist if available
|
|
if self.songlist_tracking is not None:
|
|
mark_songlist_song_downloaded(
|
|
self.songlist_tracking, artist, title, channel_name, output_path
|
|
)
|
|
|
|
# Track in main tracking system if available
|
|
if self.tracker is not None:
|
|
file_size = output_path.stat().st_size if output_path.exists() else None
|
|
self.tracker.mark_song_downloaded(
|
|
artist, title, video_id, channel_name, output_path, file_size
|
|
)
|
|
|
|
print(f"📊 Tracked download: {artist} - {title}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"⚠️ Failed to track download: {e}")
|
|
# Don't fail the pipeline for tracking issues
|
|
return True
|
|
|
|
def batch_execute(
|
|
self,
|
|
videos: List[Dict[str, Any]],
|
|
channel_name: str,
|
|
limit: Optional[int] = None,
|
|
) -> Tuple[int, int]:
|
|
"""
|
|
Execute the pipeline for multiple videos.
|
|
|
|
Args:
|
|
videos: List of video dictionaries with 'id', 'title', etc.
|
|
channel_name: Channel name
|
|
limit: Optional limit on number of videos to process
|
|
|
|
Returns:
|
|
Tuple of (successful_downloads, total_attempted)
|
|
"""
|
|
if limit:
|
|
videos = videos[:limit]
|
|
|
|
successful = 0
|
|
total = len(videos)
|
|
|
|
for i, video in enumerate(videos, 1):
|
|
video_id = video["id"]
|
|
video_title = video.get("title", "")
|
|
|
|
# Extract artist and title from video title
|
|
from karaoke_downloader.id3_utils import extract_artist_title
|
|
|
|
artist, title = extract_artist_title(video_title)
|
|
|
|
print(f" ({i}/{total}) Processing: {artist} - {title}")
|
|
|
|
if self.execute_pipeline(
|
|
video_id, artist, title, channel_name, video_title
|
|
):
|
|
successful += 1
|
|
else:
|
|
print(f" ❌ Failed to process: {artist} - {title}")
|
|
|
|
return successful, total
|