390 lines
14 KiB
Python
390 lines
14 KiB
Python
import argparse
|
||
import os
|
||
import sys
|
||
|
||
from pathlib import Path
|
||
|
||
from karaoke_downloader.downloader import KaraokeDownloader
|
||
|
||
# Constants
|
||
DEFAULT_FUZZY_THRESHOLD = 85
|
||
DEFAULT_LATEST_PER_CHANNEL_LIMIT = 5
|
||
DEFAULT_DISPLAY_LIMIT = 10
|
||
DEFAULT_CACHE_DURATION_HOURS = 24
|
||
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(
|
||
description="Karaoke Video Downloader - Download YouTube playlists and channel videos for karaoke",
|
||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
epilog="""
|
||
Examples:
|
||
python download_karaoke.py https://www.youtube.com/playlist?list=XYZ
|
||
python download_karaoke.py https://www.youtube.com/@SingKingKaraoke/videos
|
||
python download_karaoke.py --file data/channels.txt
|
||
python download_karaoke.py --reset-channel SingKingKaraoke --delete-files
|
||
""",
|
||
)
|
||
parser.add_argument(
|
||
"url", nargs="?", help="YouTube playlist or channel URL to download"
|
||
)
|
||
parser.add_argument(
|
||
"--file",
|
||
"-f",
|
||
help="Text file containing playlist or channel URLs (one per line)",
|
||
)
|
||
parser.add_argument(
|
||
"--status",
|
||
"-s",
|
||
action="store_true",
|
||
help="Show download status and statistics",
|
||
)
|
||
parser.add_argument(
|
||
"--report",
|
||
"-r",
|
||
metavar="PLAYLIST_ID",
|
||
help="Generate a detailed report for a specific playlist",
|
||
)
|
||
parser.add_argument(
|
||
"--sync",
|
||
"--sync-only",
|
||
action="store_true",
|
||
help="Only sync playlist without downloading (update tracking)",
|
||
)
|
||
parser.add_argument(
|
||
"--cleanup", action="store_true", help="Clean up orphaned tracking entries"
|
||
)
|
||
parser.add_argument(
|
||
"--resolution",
|
||
"--res",
|
||
choices=["480p", "720p", "1080p", "1440p", "2160p"],
|
||
default="720p",
|
||
help="Preferred video resolution (default: 720p)",
|
||
)
|
||
parser.add_argument(
|
||
"--limit",
|
||
"-l",
|
||
type=int,
|
||
help="Limit the number of videos to download (e.g., --limit 10)",
|
||
)
|
||
parser.add_argument(
|
||
"--refresh",
|
||
"--force-refresh",
|
||
action="store_true",
|
||
help="Force refresh channel cache (ignore cached data)",
|
||
)
|
||
parser.add_argument(
|
||
"--cache-info", action="store_true", help="Show channel cache information"
|
||
)
|
||
parser.add_argument(
|
||
"--clear-cache",
|
||
metavar="CHANNEL_ID",
|
||
nargs="?",
|
||
const="all",
|
||
help="Clear cache for specific channel or all channels (use --clear-cache all)",
|
||
)
|
||
parser.add_argument(
|
||
"--cache-duration",
|
||
type=int,
|
||
metavar="HOURS",
|
||
help="Set cache duration in hours (default: 24)",
|
||
)
|
||
parser.add_argument(
|
||
"--songlist-priority",
|
||
action="store_true",
|
||
help="Prioritize downloads based on data/songList.json (default: enabled)",
|
||
)
|
||
parser.add_argument(
|
||
"--no-songlist-priority",
|
||
action="store_true",
|
||
help="Disable songlist prioritization",
|
||
)
|
||
parser.add_argument(
|
||
"--songlist-only",
|
||
action="store_true",
|
||
help="Only download songs that are in the songlist (skip all others)",
|
||
)
|
||
parser.add_argument(
|
||
"--songlist-focus",
|
||
nargs="+",
|
||
metavar="PLAYLIST_TITLE",
|
||
help='Focus on specific playlists by title (e.g., --songlist-focus "2025 - Apple Top 50" "2024 - Billboard Hot 100")',
|
||
)
|
||
parser.add_argument(
|
||
"--songlist-status",
|
||
action="store_true",
|
||
help="Show songlist download status and statistics",
|
||
)
|
||
parser.add_argument(
|
||
"--reset-channel",
|
||
metavar="CHANNEL_NAME",
|
||
help="Reset all tracking and files for a channel",
|
||
)
|
||
parser.add_argument(
|
||
"--reset-songlist",
|
||
action="store_true",
|
||
help="When used with --reset-channel, also reset songlist songs for this channel",
|
||
)
|
||
parser.add_argument(
|
||
"--reset-songlist-all",
|
||
action="store_true",
|
||
help="Reset all songlist tracking and delete all songlist-downloaded files (global)",
|
||
)
|
||
parser.add_argument(
|
||
"--clear-server-duplicates",
|
||
action="store_true",
|
||
help="Clear server duplicates tracking (allows re-checking songs against server)",
|
||
)
|
||
parser.add_argument(
|
||
"--version", "-v", action="version", version="Karaoke Playlist Downloader v1.0"
|
||
)
|
||
parser.add_argument(
|
||
"--force-download-plan",
|
||
action="store_true",
|
||
help="Force refresh the download plan cache (re-scan all channels for matches)",
|
||
)
|
||
parser.add_argument(
|
||
"--latest-per-channel",
|
||
action="store_true",
|
||
help="Download the latest N videos from each channel (use with --limit)",
|
||
)
|
||
parser.add_argument(
|
||
"--fuzzy-match",
|
||
action="store_true",
|
||
help="Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)",
|
||
)
|
||
parser.add_argument(
|
||
"--fuzzy-threshold",
|
||
type=int,
|
||
default=DEFAULT_FUZZY_THRESHOLD,
|
||
help=f"Fuzzy match threshold (0-100, default {DEFAULT_FUZZY_THRESHOLD})",
|
||
)
|
||
parser.add_argument(
|
||
"--parallel",
|
||
action="store_true",
|
||
help="Enable parallel downloads for improved speed",
|
||
)
|
||
parser.add_argument(
|
||
"--workers",
|
||
type=int,
|
||
default=3,
|
||
help="Number of parallel download workers (default: 3, max: 10)",
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
# Validate workers argument
|
||
if args.workers < 1 or args.workers > 10:
|
||
print("❌ Error: --workers must be between 1 and 10")
|
||
sys.exit(1)
|
||
|
||
yt_dlp_path = Path("downloader/yt-dlp.exe")
|
||
if not yt_dlp_path.exists():
|
||
print("❌ Error: yt-dlp.exe not found in downloader/ directory")
|
||
print("Please ensure yt-dlp.exe is present in the downloader/ folder")
|
||
sys.exit(1)
|
||
|
||
downloader = KaraokeDownloader()
|
||
|
||
# Set parallel download options
|
||
if args.parallel:
|
||
downloader.enable_parallel_downloads = True
|
||
downloader.parallel_workers = args.workers
|
||
print(f"⚡ Parallel downloads enabled with {args.workers} workers")
|
||
else:
|
||
downloader.enable_parallel_downloads = False
|
||
print("🐌 Sequential downloads enabled")
|
||
|
||
if args.limit:
|
||
downloader.download_limit = args.limit
|
||
print(f"🎯 Download limit set to: {args.limit} videos")
|
||
if args.no_songlist_priority:
|
||
downloader.use_songlist_priority = False
|
||
print("🎯 Songlist prioritization disabled")
|
||
else:
|
||
downloader.use_songlist_priority = True
|
||
print("🎯 Songlist prioritization enabled")
|
||
if args.songlist_only:
|
||
downloader.songlist_only = True
|
||
print("🎯 Songlist-only mode enabled (will only download songlist songs)")
|
||
|
||
if args.songlist_focus:
|
||
downloader.songlist_focus_titles = args.songlist_focus
|
||
downloader.songlist_only = True # Enable songlist-only mode when focusing
|
||
print(
|
||
f"🎯 Songlist focus mode enabled for playlists: {', '.join(args.songlist_focus)}"
|
||
)
|
||
if args.resolution != "720p":
|
||
downloader.config_manager.update_resolution(args.resolution)
|
||
|
||
# --- NEW: Reset channel CLI command ---
|
||
if args.reset_channel:
|
||
downloader.reset_channel_downloads(
|
||
args.reset_channel,
|
||
reset_songlist=args.reset_songlist,
|
||
delete_files=True, # Always delete files if they exist
|
||
)
|
||
sys.exit(0)
|
||
# --- END NEW ---
|
||
|
||
# --- NEW: If no URL or file is provided, but --songlist-only is set, use all channels in data/channels.txt ---
|
||
if args.songlist_only and not args.url and not args.file:
|
||
channels_file = Path("data/channels.txt")
|
||
if channels_file.exists():
|
||
args.file = str(channels_file)
|
||
print(
|
||
"📋 No URL or --file provided, defaulting to all channels in data/channels.txt for songlist-only mode."
|
||
)
|
||
else:
|
||
print(
|
||
"❌ No URL, --file, or data/channels.txt found. Please provide a channel URL or a file with channel URLs."
|
||
)
|
||
sys.exit(1)
|
||
# --- END NEW ---
|
||
|
||
if args.reset_songlist_all:
|
||
from karaoke_downloader.downloader import reset_songlist_all
|
||
|
||
reset_songlist_all()
|
||
print("✅ All songlist tracking and files have been reset.")
|
||
sys.exit(0)
|
||
|
||
if args.clear_server_duplicates:
|
||
from karaoke_downloader.server_manager import save_server_duplicates_tracking
|
||
|
||
save_server_duplicates_tracking({})
|
||
print("✅ Server duplicates tracking has been cleared.")
|
||
print("ℹ️ Songs will be re-checked against the server on next run.")
|
||
sys.exit(0)
|
||
|
||
if args.status:
|
||
stats = downloader.tracker.get_statistics()
|
||
print("🎤 Karaoke Downloader Status")
|
||
print("=" * 40)
|
||
print(f"Total Songs: {stats['total_songs']}")
|
||
print(f"Total Playlists: {stats['total_playlists']}")
|
||
print(f"Downloaded Songs: {stats['downloaded_songs']}")
|
||
print(f"Failed Songs: {stats['failed_songs']}")
|
||
print(f"Partial Downloads: {stats['partial_songs']}")
|
||
print(f"Total Size: {stats['total_size_mb']} MB")
|
||
print(f"Last Updated: {stats['last_updated']}")
|
||
sys.exit(0)
|
||
elif args.cache_info:
|
||
cache_info = downloader.tracker.get_cache_info()
|
||
print("💾 Channel Cache Information")
|
||
print("=" * 40)
|
||
print(f"Total Channels: {cache_info['total_channels']}")
|
||
print(f"Total Cached Videos: {cache_info['total_cached_videos']}")
|
||
print(f"Cache Duration: {cache_info['cache_duration_hours']} hours")
|
||
print(f"Last Updated: {cache_info['last_updated']}")
|
||
sys.exit(0)
|
||
elif args.clear_cache:
|
||
if args.clear_cache == "all":
|
||
downloader.tracker.clear_channel_cache()
|
||
print("🧹 Cleared all channel caches")
|
||
else:
|
||
downloader.tracker.clear_channel_cache(args.clear_cache)
|
||
print(f"🧹 Cleared cache for channel: {args.clear_cache}")
|
||
sys.exit(0)
|
||
elif args.cache_duration:
|
||
downloader.tracker.set_cache_duration(args.cache_duration)
|
||
print(f"⏰ Cache duration set to {args.cache_duration} hours")
|
||
sys.exit(0)
|
||
elif args.report:
|
||
report = downloader.tracker.export_playlist_report(args.report)
|
||
print(report)
|
||
sys.exit(0)
|
||
elif args.cleanup:
|
||
orphaned = downloader.tracker.cleanup_orphaned_files(downloader.downloads_dir)
|
||
print(f"🧹 Cleaned up {len(orphaned)} orphaned tracking entries")
|
||
sys.exit(0)
|
||
elif args.songlist_status:
|
||
songlist = downloader._load_songlist()
|
||
tracking = downloader.songlist_tracking
|
||
print("📋 Songlist Download Status")
|
||
print("=" * 40)
|
||
print(f"Total songs in list: {len(songlist)}")
|
||
print(f"Songs downloaded: {len(tracking)}")
|
||
print(f"Songs remaining: {len(songlist) - len(tracking)}")
|
||
print(f"Progress: {(len(tracking) / len(songlist) * 100):.1f}%")
|
||
if tracking:
|
||
print(f"\n📁 Downloaded songs:")
|
||
for key, info in list(tracking.items())[:10]:
|
||
print(
|
||
f" • {info['artist']} - {info['title']} (from {info['channel']})"
|
||
)
|
||
if len(tracking) > 10:
|
||
print(f" ... and {len(tracking) - 10} more")
|
||
sys.exit(0)
|
||
elif args.songlist_only or args.songlist_focus:
|
||
# Use provided file or default to data/channels.txt
|
||
channel_file = args.file if args.file else "data/channels.txt"
|
||
if not os.path.exists(channel_file):
|
||
print(f"❌ Channel file not found: {channel_file}")
|
||
sys.exit(1)
|
||
with open(channel_file, "r", encoding="utf-8") as f:
|
||
channel_urls = [
|
||
line.strip()
|
||
for line in f
|
||
if line.strip() and not line.strip().startswith("#")
|
||
]
|
||
limit = args.limit if args.limit else None
|
||
force_refresh_download_plan = (
|
||
args.force_download_plan if hasattr(args, "force_download_plan") else False
|
||
)
|
||
fuzzy_match = args.fuzzy_match if hasattr(args, "fuzzy_match") else False
|
||
fuzzy_threshold = (
|
||
args.fuzzy_threshold
|
||
if hasattr(args, "fuzzy_threshold")
|
||
else DEFAULT_FUZZY_THRESHOLD
|
||
)
|
||
success = downloader.download_songlist_across_channels(
|
||
channel_urls,
|
||
limit=limit,
|
||
force_refresh_download_plan=force_refresh_download_plan,
|
||
fuzzy_match=fuzzy_match,
|
||
fuzzy_threshold=fuzzy_threshold,
|
||
)
|
||
elif args.latest_per_channel:
|
||
# Use provided file or default to data/channels.txt
|
||
channel_file = args.file if args.file else "data/channels.txt"
|
||
if not os.path.exists(channel_file):
|
||
print(f"❌ Channel file not found: {channel_file}")
|
||
sys.exit(1)
|
||
with open(channel_file, "r", encoding="utf-8") as f:
|
||
channel_urls = [
|
||
line.strip()
|
||
for line in f
|
||
if line.strip() and not line.strip().startswith("#")
|
||
]
|
||
limit = args.limit if args.limit else DEFAULT_LATEST_PER_CHANNEL_LIMIT
|
||
force_refresh_download_plan = (
|
||
args.force_download_plan if hasattr(args, "force_download_plan") else False
|
||
)
|
||
fuzzy_match = args.fuzzy_match if hasattr(args, "fuzzy_match") else False
|
||
fuzzy_threshold = (
|
||
args.fuzzy_threshold
|
||
if hasattr(args, "fuzzy_threshold")
|
||
else DEFAULT_FUZZY_THRESHOLD
|
||
)
|
||
success = downloader.download_latest_per_channel(
|
||
channel_urls,
|
||
limit=limit,
|
||
force_refresh_download_plan=force_refresh_download_plan,
|
||
fuzzy_match=fuzzy_match,
|
||
fuzzy_threshold=fuzzy_threshold,
|
||
)
|
||
elif args.url:
|
||
success = downloader.download_channel_videos(
|
||
args.url, force_refresh=args.refresh
|
||
)
|
||
else:
|
||
parser.print_help()
|
||
sys.exit(1)
|
||
downloader.tracker.force_save()
|
||
if success:
|
||
print("\n🎤 All downloads completed successfully!")
|
||
sys.exit(0)
|
||
else:
|
||
print("\n⚠️ Some downloads may have failed. Check the logs for details.")
|
||
sys.exit(1)
|