320 lines
10 KiB
Python
320 lines
10 KiB
Python
"""
|
|
Core video download logic and file validation.
|
|
Handles the actual downloading and post-processing of videos.
|
|
"""
|
|
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Optional, Tuple, Union
|
|
from karaoke_downloader.config_manager import AppConfig
|
|
|
|
from karaoke_downloader.download_planner import save_plan_cache
|
|
from karaoke_downloader.error_utils import (
|
|
handle_file_validation_error,
|
|
handle_yt_dlp_error,
|
|
log_error,
|
|
)
|
|
from karaoke_downloader.file_utils import (
|
|
cleanup_temp_files,
|
|
ensure_directory_exists,
|
|
is_valid_mp4_file,
|
|
sanitize_filename,
|
|
)
|
|
from karaoke_downloader.id3_utils import add_id3_tags
|
|
from karaoke_downloader.songlist_manager import mark_songlist_song_downloaded
|
|
from karaoke_downloader.youtube_utils import (
|
|
build_yt_dlp_command,
|
|
execute_yt_dlp_command,
|
|
show_available_formats,
|
|
)
|
|
|
|
# Constants
|
|
DEFAULT_FORMAT_CHECK_TIMEOUT = 30
|
|
|
|
|
|
def is_valid_mp4(file_path: Path) -> bool:
|
|
"""
|
|
Check if a file is a valid MP4 file.
|
|
Uses ffprobe if available, otherwise checks file extension and size.
|
|
|
|
Args:
|
|
file_path: Path to the file to check
|
|
|
|
Returns:
|
|
True if file is a valid MP4, False otherwise
|
|
"""
|
|
return is_valid_mp4_file(file_path)
|
|
|
|
|
|
def download_video_and_track(
|
|
yt_dlp_path,
|
|
config,
|
|
downloads_dir,
|
|
songlist_tracking,
|
|
channel_name,
|
|
channel_url,
|
|
video_id,
|
|
video_title,
|
|
artist,
|
|
title,
|
|
filename,
|
|
):
|
|
"""
|
|
Download a single video and track its status.
|
|
Returns True if successful, False otherwise.
|
|
"""
|
|
output_path = downloads_dir / channel_name / filename
|
|
return download_single_video(
|
|
output_path,
|
|
video_id,
|
|
config,
|
|
yt_dlp_path,
|
|
artist,
|
|
title,
|
|
channel_name,
|
|
songlist_tracking,
|
|
)
|
|
|
|
|
|
def download_single_video(
|
|
output_path: Path,
|
|
video_id: str,
|
|
config: Union[AppConfig, Dict[str, Any]],
|
|
yt_dlp_path: str,
|
|
artist: str,
|
|
title: str,
|
|
channel_name: str,
|
|
songlist_tracking: Dict[str, Any],
|
|
) -> bool:
|
|
"""
|
|
Download a single video and handle post-processing.
|
|
|
|
Args:
|
|
output_path: Output file path
|
|
video_id: YouTube video ID
|
|
config: Configuration dictionary
|
|
yt_dlp_path: Path to yt-dlp executable
|
|
artist: Song artist name
|
|
title: Song title
|
|
channel_name: Channel name
|
|
songlist_tracking: Songlist tracking data
|
|
|
|
Returns:
|
|
True if successful, False otherwise
|
|
"""
|
|
ensure_directory_exists(output_path.parent)
|
|
print(f"⬇️ Downloading: {artist} - {title} -> {output_path}")
|
|
|
|
video_url = f"https://www.youtube.com/watch?v={video_id}"
|
|
|
|
# Debug: Show the video_id and URL being used
|
|
print(f"🔍 DEBUG: video_id = '{video_id}'")
|
|
print(f"🔍 DEBUG: video_url = '{video_url}'")
|
|
|
|
# Build command using centralized utility
|
|
cmd = build_yt_dlp_command(yt_dlp_path, video_url, output_path, config)
|
|
|
|
print(f"🔧 Running command: {' '.join(cmd)}")
|
|
print(f"📺 Resolution settings: {config.download_settings.preferred_resolution}")
|
|
print(f"🎬 Format string: {config.download_settings.format}")
|
|
|
|
# Debug: Show available formats (optional)
|
|
if hasattr(config, "debug_show_formats") and config.debug_show_formats:
|
|
show_available_formats(video_url, yt_dlp_path)
|
|
|
|
try:
|
|
result = execute_yt_dlp_command(cmd)
|
|
print(f"✅ yt-dlp completed successfully")
|
|
print(f"📄 yt-dlp stdout: {result.stdout}")
|
|
except subprocess.CalledProcessError as e:
|
|
error = handle_yt_dlp_error(e, artist, title, video_id, channel_name)
|
|
log_error(error)
|
|
# Mark song as failed in tracking
|
|
_mark_song_failed_standalone(
|
|
artist, title, video_id, channel_name, error.message
|
|
)
|
|
return False
|
|
|
|
# Verify download
|
|
if not verify_download(output_path, artist, title, video_id, channel_name):
|
|
return False
|
|
|
|
# Post-processing
|
|
add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name)
|
|
mark_songlist_song_downloaded(
|
|
songlist_tracking, artist, title, channel_name, output_path
|
|
)
|
|
|
|
# Clean up temporary files
|
|
cleanup_temp_files(output_path.with_suffix(""))
|
|
|
|
print(f"✅ Downloaded and tracked: {artist} - {title}")
|
|
print(f"🎉 All post-processing complete for: {output_path}")
|
|
|
|
return True
|
|
|
|
|
|
def _mark_song_failed_standalone(artist, title, video_id, channel_name, error_message):
|
|
"""Standalone helper to mark a song as failed in tracking."""
|
|
from karaoke_downloader.tracking_manager import TrackingManager
|
|
|
|
tracker = TrackingManager()
|
|
tracker.mark_song_failed(artist, title, video_id, channel_name, error_message)
|
|
print(f"🏷️ Marked song as failed: {artist} - {title}")
|
|
|
|
|
|
# Note: show_available_formats is now imported from youtube_utils
|
|
|
|
|
|
def verify_download(output_path, artist, title, video_id=None, channel_name=None):
|
|
"""Verify that the download was successful."""
|
|
if not output_path.exists():
|
|
print(f"❌ Download failed: file does not exist: {output_path}")
|
|
# Check if yt-dlp saved it somewhere else
|
|
possible_files = list(output_path.parent.glob("*.mp4"))
|
|
if possible_files:
|
|
print(
|
|
f"🔍 Found these files in the directory: {[f.name for f in possible_files]}"
|
|
)
|
|
# Look for a file that matches our pattern (artist - title)
|
|
artist_part = artist.lower()
|
|
title_part = title.lower()
|
|
for file in possible_files:
|
|
file_lower = file.stem.lower()
|
|
if artist_part in file_lower and any(
|
|
word in file_lower for word in title_part.split()
|
|
):
|
|
print(f"🎯 Found matching file: {file.name}")
|
|
output_path = file
|
|
break
|
|
else:
|
|
print(f"❌ No matching file found for: {artist} - {title}")
|
|
# Mark song as failed if we have the required info
|
|
if video_id and channel_name:
|
|
error_msg = f"Download failed: file does not exist and no matching file found"
|
|
_mark_song_failed_standalone(
|
|
artist, title, video_id, channel_name, error_msg
|
|
)
|
|
return False
|
|
else:
|
|
# Mark song as failed if we have the required info
|
|
if video_id and channel_name:
|
|
error_msg = f"Download failed: file does not exist"
|
|
_mark_song_failed_standalone(
|
|
artist, title, video_id, channel_name, error_msg
|
|
)
|
|
return False
|
|
|
|
if output_path.stat().st_size == 0:
|
|
print(f"❌ Download failed: file is empty (0 bytes): {output_path}")
|
|
return False
|
|
|
|
# Optional MP4 validation
|
|
# if not is_valid_mp4(output_path):
|
|
# print(f"❌ File is not a valid MP4: {output_path}")
|
|
# return False
|
|
|
|
return True
|
|
|
|
|
|
def execute_download_plan(
|
|
download_plan,
|
|
unmatched,
|
|
cache_file,
|
|
config,
|
|
yt_dlp_path,
|
|
downloads_dir,
|
|
songlist_tracking,
|
|
limit=None,
|
|
):
|
|
"""
|
|
Execute a download plan with progress tracking and cache management.
|
|
|
|
Args:
|
|
download_plan: List of download items to process
|
|
unmatched: List of unmatched songs
|
|
cache_file: Path to cache file for progress tracking
|
|
config: Configuration dictionary
|
|
yt_dlp_path: Path to yt-dlp executable
|
|
downloads_dir: Directory for downloads
|
|
songlist_tracking: Songlist tracking data
|
|
limit: Optional limit on number of downloads
|
|
|
|
Returns:
|
|
tuple: (downloaded_count, success)
|
|
"""
|
|
downloaded_count = 0
|
|
total_to_download = limit if limit is not None else len(download_plan)
|
|
|
|
# Use reverse iteration to avoid index shifting issues when removing items
|
|
for idx in range(len(download_plan) - 1, -1, -1):
|
|
if limit is not None and downloaded_count >= limit:
|
|
break
|
|
|
|
item = download_plan[idx]
|
|
artist = item["artist"]
|
|
title = item["title"]
|
|
channel_name = item["channel_name"]
|
|
channel_url = item["channel_url"]
|
|
video_id = item["video_id"]
|
|
video_title = item["video_title"]
|
|
|
|
print(f"\n⬇️ Downloading {downloaded_count + 1} of {total_to_download}:")
|
|
print(f" 📋 Songlist: {artist} - {title}")
|
|
print(f" 🎬 Video: {video_title} ({channel_name})")
|
|
if "match_score" in item:
|
|
print(f" 🎯 Match Score: {item['match_score']:.1f}%")
|
|
|
|
# Create filename
|
|
filename = sanitize_filename(artist, title)
|
|
output_path = downloads_dir / channel_name / filename
|
|
|
|
# Download the file
|
|
success = download_single_video(
|
|
output_path,
|
|
video_id,
|
|
config,
|
|
yt_dlp_path,
|
|
artist,
|
|
title,
|
|
channel_name,
|
|
songlist_tracking,
|
|
)
|
|
|
|
if success:
|
|
downloaded_count += 1
|
|
# Remove completed item from plan and update cache
|
|
download_plan.pop(idx)
|
|
save_plan_cache(cache_file, download_plan, unmatched)
|
|
print(
|
|
f"🗑️ Removed completed item from download plan. {len(download_plan)} items remaining."
|
|
)
|
|
|
|
# Delete cache if all items are complete
|
|
if len(download_plan) == 0:
|
|
cleanup_cache(cache_file)
|
|
|
|
print(f"🎉 Downloaded {downloaded_count} songlist songs.")
|
|
print(
|
|
f"📊 Summary: Found {downloaded_count} songs, {len(unmatched)} songs not found."
|
|
)
|
|
|
|
# Final cleanup
|
|
cleanup_cache(cache_file)
|
|
|
|
return downloaded_count, True
|
|
|
|
|
|
def cleanup_cache(cache_file):
|
|
"""Clean up the cache file."""
|
|
if cache_file.exists():
|
|
try:
|
|
cache_file.unlink()
|
|
print(f"🗑️ Deleted download plan cache: {cache_file.name}")
|
|
except Exception as e:
|
|
print(f"⚠️ Could not delete download plan cache: {e}")
|
|
|
|
|
|
# Note: should_skip_song_standalone function has been removed and replaced with SongValidator class
|
|
# Use karaoke_downloader.song_validator.create_song_validator() instead
|