KaraokeVideoDownloader/karaoke_downloader/cli.py

561 lines
21 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import argparse
import os
import sys
from pathlib import Path
import json
from karaoke_downloader.downloader import KaraokeDownloader
# Constants
DEFAULT_FUZZY_THRESHOLD = 85
DEFAULT_LATEST_PER_CHANNEL_LIMIT = 5
DEFAULT_DISPLAY_LIMIT = 10
DEFAULT_CACHE_DURATION_HOURS = 24
def main():
parser = argparse.ArgumentParser(
description="Karaoke Video Downloader - Download YouTube playlists and channel videos for karaoke",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python download_karaoke.py https://www.youtube.com/playlist?list=XYZ
python download_karaoke.py https://www.youtube.com/@SingKingKaraoke/videos
python download_karaoke.py --file data/channels.txt
python download_karaoke.py --reset-channel SingKingKaraoke --delete-files
""",
)
parser.add_argument(
"url", nargs="?", help="YouTube playlist or channel URL to download"
)
parser.add_argument(
"--file",
"-f",
help="Text file containing playlist or channel URLs (one per line)",
)
parser.add_argument(
"--status",
"-s",
action="store_true",
help="Show download status and statistics",
)
parser.add_argument(
"--report",
"-r",
metavar="PLAYLIST_ID",
help="Generate a detailed report for a specific playlist",
)
parser.add_argument(
"--sync",
"--sync-only",
action="store_true",
help="Only sync playlist without downloading (update tracking)",
)
parser.add_argument(
"--cleanup", action="store_true", help="Clean up orphaned tracking entries"
)
parser.add_argument(
"--resolution",
"--res",
choices=["480p", "720p", "1080p", "1440p", "2160p"],
default="720p",
help="Preferred video resolution (default: 720p)",
)
parser.add_argument(
"--limit",
"-l",
type=int,
help="Limit the number of videos to download (e.g., --limit 10)",
)
parser.add_argument(
"--refresh",
"--force-refresh",
action="store_true",
help="Force refresh channel cache (ignore cached data)",
)
parser.add_argument(
"--cache-info", action="store_true", help="Show channel cache information"
)
parser.add_argument(
"--clear-cache",
metavar="CHANNEL_ID",
nargs="?",
const="all",
help="Clear cache for specific channel or all channels (use --clear-cache all)",
)
parser.add_argument(
"--cache-duration",
type=int,
metavar="HOURS",
help="Set cache duration in hours (default: 24)",
)
parser.add_argument(
"--songlist-priority",
action="store_true",
help="Prioritize downloads based on data/songList.json (default: enabled)",
)
parser.add_argument(
"--no-songlist-priority",
action="store_true",
help="Disable songlist prioritization",
)
parser.add_argument(
"--generate-unmatched-report",
action="store_true",
help="Generate a report of songs that couldn't be found in any channel",
)
parser.add_argument(
"--songlist-only",
action="store_true",
help="Only download songs that are in the songlist (skip all others)",
)
parser.add_argument(
"--songlist-focus",
nargs="+",
metavar="PLAYLIST_TITLE",
help='Focus on specific playlists by title (e.g., --songlist-focus "2025 - Apple Top 50" "2024 - Billboard Hot 100")',
)
parser.add_argument(
"--force",
action="store_true",
help="Force download from channels regardless of whether songs are already downloaded, on server, or marked as duplicates",
)
parser.add_argument(
"--songlist-status",
action="store_true",
help="Show songlist download status and statistics",
)
parser.add_argument(
"--reset-channel",
metavar="CHANNEL_NAME",
help="Reset all tracking and files for a channel",
)
parser.add_argument(
"--reset-songlist",
action="store_true",
help="When used with --reset-channel, also reset songlist songs for this channel",
)
parser.add_argument(
"--reset-songlist-all",
action="store_true",
help="Reset all songlist tracking and delete all songlist-downloaded files (global)",
)
parser.add_argument(
"--clear-server-duplicates",
action="store_true",
help="Clear server duplicates tracking (allows re-checking songs against server)",
)
parser.add_argument(
"--version", "-v", action="version", version="Karaoke Playlist Downloader v1.0"
)
parser.add_argument(
"--force-download-plan",
action="store_true",
help="Force refresh the download plan cache (re-scan all channels for matches)",
)
parser.add_argument(
"--latest-per-channel",
action="store_true",
help="Download the latest N videos from each channel (use with --limit)",
)
parser.add_argument(
"--fuzzy-match",
action="store_true",
help="Enable fuzzy matching for songlist-to-video matching (uses rapidfuzz if available)",
)
parser.add_argument(
"--fuzzy-threshold",
type=int,
default=DEFAULT_FUZZY_THRESHOLD,
help=f"Fuzzy match threshold (0-100, default {DEFAULT_FUZZY_THRESHOLD})",
)
parser.add_argument(
"--parallel",
action="store_true",
help="Enable parallel downloads for improved speed",
)
parser.add_argument(
"--workers",
type=int,
default=3,
help="Number of parallel download workers (default: 3, max: 10)",
)
parser.add_argument(
"--generate-songlist",
nargs="+",
metavar="DIRECTORY",
help="Generate song list from MP4 files with ID3 tags in specified directories",
)
parser.add_argument(
"--no-append-songlist",
action="store_true",
help="Create a new song list instead of appending when using --generate-songlist",
)
args = parser.parse_args()
# Validate workers argument
if args.workers < 1 or args.workers > 10:
print("❌ Error: --workers must be between 1 and 10")
sys.exit(1)
yt_dlp_path = Path("downloader/yt-dlp.exe")
if not yt_dlp_path.exists():
print("❌ Error: yt-dlp.exe not found in downloader/ directory")
print("Please ensure yt-dlp.exe is present in the downloader/ folder")
sys.exit(1)
downloader = KaraokeDownloader()
# Set parallel download options
if args.parallel:
downloader.enable_parallel_downloads = True
downloader.parallel_workers = args.workers
print(f"⚡ Parallel downloads enabled with {args.workers} workers")
else:
downloader.enable_parallel_downloads = False
print("🐌 Sequential downloads enabled")
if args.limit:
downloader.download_limit = args.limit
print(f"🎯 Download limit set to: {args.limit} videos")
if args.no_songlist_priority:
downloader.use_songlist_priority = False
print("🎯 Songlist prioritization disabled")
else:
downloader.use_songlist_priority = True
print("🎯 Songlist prioritization enabled")
if args.songlist_only:
downloader.songlist_only = True
print("🎯 Songlist-only mode enabled (will only download songlist songs)")
if args.songlist_focus:
downloader.songlist_focus_titles = args.songlist_focus
downloader.songlist_only = True # Enable songlist-only mode when focusing
print(
f"🎯 Songlist focus mode enabled for playlists: {', '.join(args.songlist_focus)}"
)
if args.force:
downloader.force_download = True
print("💪 Force mode enabled - will download regardless of existing files or server duplicates")
if args.resolution != "720p":
downloader.config_manager.update_resolution(args.resolution)
# --- NEW: Reset channel CLI command ---
if args.reset_channel:
downloader.reset_channel_downloads(
args.reset_channel,
reset_songlist=args.reset_songlist,
delete_files=True, # Always delete files if they exist
)
sys.exit(0)
# --- END NEW ---
# --- NEW: If no URL or file is provided, but --songlist-only is set, use all channels in data/channels.txt ---
if args.songlist_only and not args.url and not args.file:
channels_file = Path("data/channels.txt")
if channels_file.exists():
args.file = str(channels_file)
print(
"📋 No URL or --file provided, defaulting to all channels in data/channels.txt for songlist-only mode."
)
else:
print(
"❌ No URL, --file, or data/channels.txt found. Please provide a channel URL or a file with channel URLs."
)
sys.exit(1)
# --- END NEW ---
if args.reset_songlist_all:
from karaoke_downloader.downloader import reset_songlist_all
reset_songlist_all()
print("✅ All songlist tracking and files have been reset.")
sys.exit(0)
if args.clear_server_duplicates:
from karaoke_downloader.server_manager import save_server_duplicates_tracking
save_server_duplicates_tracking({})
print("✅ Server duplicates tracking has been cleared.")
print(" Songs will be re-checked against the server on next run.")
sys.exit(0)
if args.generate_songlist:
from karaoke_downloader.songlist_generator import SongListGenerator
print("🎵 Generating song list from MP4 files with ID3 tags...")
generator = SongListGenerator()
try:
generator.generate_songlist_from_multiple_directories(
args.generate_songlist,
append=not args.no_append_songlist
)
print("✅ Song list generation completed successfully!")
except Exception as e:
print(f"❌ Error generating song list: {e}")
sys.exit(1)
sys.exit(0)
if args.status:
stats = downloader.tracker.get_statistics()
print("🎤 Karaoke Downloader Status")
print("=" * 40)
print(f"Total Songs: {stats['total_songs']}")
print(f"Total Playlists: {stats['total_playlists']}")
print(f"Downloaded Songs: {stats['downloaded_songs']}")
print(f"Failed Songs: {stats['failed_songs']}")
print(f"Partial Downloads: {stats['partial_songs']}")
print(f"Total Size: {stats['total_size_mb']} MB")
print(f"Last Updated: {stats['last_updated']}")
sys.exit(0)
elif args.cache_info:
cache_info = downloader.tracker.get_cache_info()
print("💾 Channel Cache Information")
print("=" * 40)
print(f"Total Channels: {cache_info['total_channels']}")
print(f"Total Cached Videos: {cache_info['total_cached_videos']}")
print(f"Cache Duration: {cache_info['cache_duration_hours']} hours")
print(f"Last Updated: {cache_info['last_updated']}")
sys.exit(0)
elif args.clear_cache:
if args.clear_cache == "all":
downloader.tracker.clear_channel_cache()
print("🧹 Cleared all channel caches")
else:
downloader.tracker.clear_channel_cache(args.clear_cache)
print(f"🧹 Cleared cache for channel: {args.clear_cache}")
sys.exit(0)
elif args.cache_duration:
downloader.tracker.set_cache_duration(args.cache_duration)
print(f"⏰ Cache duration set to {args.cache_duration} hours")
sys.exit(0)
elif args.report:
report = downloader.tracker.export_playlist_report(args.report)
print(report)
sys.exit(0)
elif args.cleanup:
orphaned = downloader.tracker.cleanup_orphaned_files(downloader.downloads_dir)
print(f"🧹 Cleaned up {len(orphaned)} orphaned tracking entries")
sys.exit(0)
elif args.generate_unmatched_report:
from karaoke_downloader.download_planner import generate_unmatched_report
from karaoke_downloader.songlist_manager import load_songlist
print("🔍 Generating unmatched songs report...")
# Load songlist based on focus mode
if args.songlist_focus:
# Load focused playlists
songlist_file = Path("data/songList.json")
if not songlist_file.exists():
print("⚠️ Songlist file not found: data/songList.json")
sys.exit(1)
try:
with open(songlist_file, "r", encoding="utf-8") as f:
raw_data = json.load(f)
# Filter playlists by title
focused_playlists = []
print(f"🔍 Looking for playlists: {args.songlist_focus}")
print(f"🔍 Available playlists in songList.json:")
for i, playlist in enumerate(raw_data[:5]): # Show first 5 playlists
print(f" {i+1}. '{playlist.get('title', 'NO TITLE')}'")
if len(raw_data) > 5:
print(f" ... and {len(raw_data) - 5} more playlists")
for playlist in raw_data:
playlist_title = playlist.get("title", "")
if playlist_title in args.songlist_focus:
focused_playlists.append(playlist)
print(f"✅ Found matching playlist: '{playlist_title}'")
if not focused_playlists:
print(
f"⚠️ No playlists found matching the specified titles: {', '.join(args.songlist_focus)}"
)
sys.exit(1)
# Flatten the focused playlists into songs
focused_songs = []
seen = set()
for playlist in focused_playlists:
if "songs" in playlist:
for song in playlist["songs"]:
if "artist" in song and "title" in song:
artist = song["artist"].strip()
title = song["title"].strip()
key = f"{artist.lower()}_{title.lower()}"
if key in seen:
continue
seen.add(key)
focused_songs.append(
{
"artist": artist,
"title": title,
"position": song.get("position", 0),
}
)
songlist = focused_songs
print(
f"\n🎯 Songlist focus mode: {len(focused_songs)} songs from {len(focused_playlists)} playlists selected"
)
print(f"🎯 Focused playlists: {', '.join(args.songlist_focus)}")
except (json.JSONDecodeError, FileNotFoundError) as e:
print(f"⚠️ Could not load songlist for filtering: {e}")
sys.exit(1)
else:
# Load all songs from songlist
songlist = load_songlist()
if not songlist:
print("❌ No songlist found. Please ensure data/songList.json exists.")
sys.exit(1)
# Load channel URLs
channel_file = args.file if args.file else "data/channels.txt"
if not os.path.exists(channel_file):
print(f"❌ Channel file not found: {channel_file}")
sys.exit(1)
with open(channel_file, "r", encoding='utf-8') as f:
channel_urls = [
line.strip()
for line in f
if line.strip() and not line.strip().startswith("#")
]
print(f"📋 Analyzing {len(songlist)} songs against {len(channel_urls)} channels...")
# Build download plan to get unmatched songs
from karaoke_downloader.download_planner import build_download_plan
fuzzy_match = args.fuzzy_match if hasattr(args, "fuzzy_match") else False
fuzzy_threshold = (
args.fuzzy_threshold
if hasattr(args, "fuzzy_threshold")
else DEFAULT_FUZZY_THRESHOLD
)
try:
download_plan, unmatched = build_download_plan(
channel_urls,
songlist,
downloader.tracker,
downloader.yt_dlp_path,
fuzzy_match=fuzzy_match,
fuzzy_threshold=fuzzy_threshold,
)
if unmatched:
report_file = generate_unmatched_report(unmatched)
print(f"\n📋 Unmatched songs report generated successfully!")
print(f"📁 Report saved to: {report_file}")
print(f"📊 Summary: {len(download_plan)} songs found, {len(unmatched)} songs not found")
print(f"\n🔍 First 10 unmatched songs:")
for i, song in enumerate(unmatched[:10], 1):
print(f" {i:2d}. {song['artist']} - {song['title']}")
if len(unmatched) > 10:
print(f" ... and {len(unmatched) - 10} more songs")
else:
print(f"\n✅ All {len(songlist)} songs were found in the channels!")
except Exception as e:
print(f"❌ Error generating report: {e}")
sys.exit(1)
sys.exit(0)
elif args.songlist_status:
songlist = downloader._load_songlist()
tracking = downloader.songlist_tracking
print("📋 Songlist Download Status")
print("=" * 40)
print(f"Total songs in list: {len(songlist)}")
print(f"Songs downloaded: {len(tracking)}")
print(f"Songs remaining: {len(songlist) - len(tracking)}")
print(f"Progress: {(len(tracking) / len(songlist) * 100):.1f}%")
if tracking:
print(f"\n📁 Downloaded songs:")
for key, info in list(tracking.items())[:10]:
print(
f"{info['artist']} - {info['title']} (from {info['channel']})"
)
if len(tracking) > 10:
print(f" ... and {len(tracking) - 10} more")
sys.exit(0)
elif args.songlist_only or args.songlist_focus:
# Use provided file or default to data/channels.txt
channel_file = args.file if args.file else "data/channels.txt"
if not os.path.exists(channel_file):
print(f"❌ Channel file not found: {channel_file}")
sys.exit(1)
with open(channel_file, "r", encoding="utf-8") as f:
channel_urls = [
line.strip()
for line in f
if line.strip() and not line.strip().startswith("#")
]
limit = args.limit if args.limit else None
force_refresh_download_plan = (
args.force_download_plan if hasattr(args, "force_download_plan") else False
)
fuzzy_match = args.fuzzy_match if hasattr(args, "fuzzy_match") else False
fuzzy_threshold = (
args.fuzzy_threshold
if hasattr(args, "fuzzy_threshold")
else DEFAULT_FUZZY_THRESHOLD
)
success = downloader.download_songlist_across_channels(
channel_urls,
limit=limit,
force_refresh_download_plan=force_refresh_download_plan,
fuzzy_match=fuzzy_match,
fuzzy_threshold=fuzzy_threshold,
force_download=args.force,
)
elif args.latest_per_channel:
# Use provided file or default to data/channels.txt
channel_file = args.file if args.file else "data/channels.txt"
if not os.path.exists(channel_file):
print(f"❌ Channel file not found: {channel_file}")
sys.exit(1)
with open(channel_file, "r", encoding="utf-8") as f:
channel_urls = [
line.strip()
for line in f
if line.strip() and not line.strip().startswith("#")
]
limit = args.limit if args.limit else DEFAULT_LATEST_PER_CHANNEL_LIMIT
force_refresh_download_plan = (
args.force_download_plan if hasattr(args, "force_download_plan") else False
)
fuzzy_match = args.fuzzy_match if hasattr(args, "fuzzy_match") else False
fuzzy_threshold = (
args.fuzzy_threshold
if hasattr(args, "fuzzy_threshold")
else DEFAULT_FUZZY_THRESHOLD
)
success = downloader.download_latest_per_channel(
channel_urls,
limit=limit,
force_refresh_download_plan=force_refresh_download_plan,
fuzzy_match=fuzzy_match,
fuzzy_threshold=fuzzy_threshold,
force_download=args.force,
)
elif args.url:
success = downloader.download_channel_videos(
args.url, force_refresh=args.refresh
)
else:
parser.print_help()
sys.exit(1)
downloader.tracker.force_save()
if success:
print("\n🎤 All downloads completed successfully!")
sys.exit(0)
else:
print("\n⚠️ Some downloads may have failed. Check the logs for details.")
sys.exit(1)