diff --git a/PRD.md b/PRD.md index e8cce7e..a5f0342 100644 --- a/PRD.md +++ b/PRD.md @@ -177,6 +177,7 @@ KaroakeVideoDownloader/ - `--songlist-priority`: Prioritize songlist songs in download queue - `--songlist-only`: Download only songs from the songlist - `--songlist-focus ...`: Focus on specific playlists by title (e.g., `--songlist-focus "2025 - Apple Top 50" "2024 - Billboard Hot 100"`) +- `--songlist-file `: Custom songlist file path to use with --songlist-focus (default: data/songList.json) - `--force`: **Force download from channels, bypassing all existing file checks and re-downloading if necessary** - `--songlist-status`: Show songlist download progress - `--limit `: Limit number of downloads (enables fast mode with early exit) diff --git a/README.md b/README.md index ee35aea..6353d3f 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,11 @@ python download_karaoke.py --parallel --workers 5 --songlist-only --limit 10 python download_karaoke.py --songlist-focus "2025 - Apple Top 50" "2024 - Billboard Hot 100" ``` +### Focus on Specific Playlists from Custom File +```bash +python download_karaoke.py --songlist-focus "CCKaraoke" --songlist-file "data/my_custom_songlist.json" +``` + ### Force Download from Channels (Bypass All Existing File Checks) ```bash python download_karaoke.py --songlist-focus "2025 - Apple Top 50" --force @@ -255,6 +260,7 @@ KaroakeVideoDownloader/ - `--songlist-priority`: Prioritize songlist songs in download queue - `--songlist-only`: Download only songs from the songlist - `--songlist-focus ...`: Focus on specific playlists by title (e.g., `--songlist-focus "2025 - Apple Top 50" "2024 - Billboard Hot 100"`) +- `--songlist-file `: Custom songlist file path to use with --songlist-focus (default: data/songList.json) - `--songlist-status`: Show songlist download progress - `--limit `: Limit number of downloads (enables fast mode with early exit) - `--resolution <720p|1080p|...>`: Override resolution @@ -292,6 +298,9 @@ python download_karaoke.py --songlist-only # Focused fuzzy matching (target specific playlists with flexible matching) python download_karaoke.py --songlist-focus "2025 - Apple Top 50" --fuzzy-match --fuzzy-threshold 80 --limit 10 +# Focus on specific playlists from a custom file +python download_karaoke.py --songlist-focus "CCKaraoke" --songlist-file "data/my_custom_songlist.json" --limit 10 + # Force download with fuzzy matching (bypass all existing file checks) python download_karaoke.py --songlist-focus "2025 - Apple Top 50" --force --fuzzy-match --fuzzy-threshold 80 --limit 10 diff --git a/commands.txt b/commands.txt index c41cf8e..c22db3c 100644 --- a/commands.txt +++ b/commands.txt @@ -65,6 +65,12 @@ python download_karaoke.py --songlist-focus "2025 - Apple Top 50" --limit 5 # Focus on specific playlists with parallel processing python download_karaoke.py --parallel --workers 3 --songlist-focus "2025 - Apple Top 50" --limit 5 +# Focus on specific playlists from a custom songlist file +python download_karaoke.py --songlist-focus "CCKaraoke" --songlist-file "data/my_custom_songlist.json" + +# Focus on specific playlists from a custom file with force mode +python download_karaoke.py --songlist-focus "CCKaraoke" --songlist-file "data/my_custom_songlist.json" --force + # Force download from channels regardless of existing files or server duplicates python download_karaoke.py --songlist-focus "2025 - Apple Top 50" --force @@ -82,15 +88,30 @@ python download_karaoke.py --songlist-status ## ๐Ÿ“Š UNMATCHED SONGS REPORTS -# Generate report of songs that couldn't be found in any channel +# Generate report of songs that couldn't be found in any channel (standalone) python download_karaoke.py --generate-unmatched-report -# Generate report with fuzzy matching enabled +# Generate report with fuzzy matching enabled (standalone) python download_karaoke.py --generate-unmatched-report --fuzzy-match --fuzzy-threshold 85 -# Generate report using a specific channel file +# Generate report using a specific channel file (standalone) python download_karaoke.py --generate-unmatched-report --file data/my_channels.txt +# Generate report from a custom songlist file (standalone) +python download_karaoke.py --generate-unmatched-report --songlist-file "data/my_custom_songlist.json" + +# Generate report with focus on specific playlists from a custom file (standalone) +python download_karaoke.py --songlist-focus "CCKaraoke" --songlist-file "data/my_custom_songlist.json" --generate-unmatched-report + +# Download songs AND generate unmatched report (additive feature) +python download_karaoke.py --songlist-only --limit 10 --generate-unmatched-report + +# Download with fuzzy matching AND generate unmatched report +python download_karaoke.py --songlist-only --fuzzy-match --fuzzy-threshold 85 --limit 10 --generate-unmatched-report + +# Download from specific playlists AND generate unmatched report +python download_karaoke.py --songlist-focus "CCKaraoke" --limit 10 --generate-unmatched-report + # Generate report with custom fuzzy threshold python download_karaoke.py --generate-unmatched-report --fuzzy-match --fuzzy-threshold 80 diff --git a/karaoke_downloader/cli.py b/karaoke_downloader/cli.py index 37dab88..4c51eb5 100644 --- a/karaoke_downloader/cli.py +++ b/karaoke_downloader/cli.py @@ -103,7 +103,7 @@ Examples: parser.add_argument( "--generate-unmatched-report", action="store_true", - help="Generate a report of songs that couldn't be found in any channel", + help="Generate a report of songs that couldn't be found in any channel (runs after downloads)", ) parser.add_argument( "--songlist-only", @@ -116,6 +116,11 @@ Examples: metavar="PLAYLIST_TITLE", help='Focus on specific playlists by title (e.g., --songlist-focus "2025 - Apple Top 50" "2024 - Billboard Hot 100")', ) + parser.add_argument( + "--songlist-file", + metavar="FILE_PATH", + help="Custom songlist file path to use with --songlist-focus (default: data/songList.json)", + ) parser.add_argument( "--force", action="store_true", @@ -235,6 +240,9 @@ Examples: print( f"๐ŸŽฏ Songlist focus mode enabled for playlists: {', '.join(args.songlist_focus)}" ) + if args.songlist_file: + downloader.songlist_file_path = args.songlist_file + print(f"๐Ÿ“ Using custom songlist file: {args.songlist_file}") if args.force: downloader.force_download = True print("๐Ÿ’ช Force mode enabled - will download regardless of existing files or server duplicates") @@ -337,134 +345,6 @@ Examples: elif args.cleanup: orphaned = downloader.tracker.cleanup_orphaned_files(downloader.downloads_dir) print(f"๐Ÿงน Cleaned up {len(orphaned)} orphaned tracking entries") - sys.exit(0) - elif args.generate_unmatched_report: - from karaoke_downloader.download_planner import generate_unmatched_report - from karaoke_downloader.songlist_manager import load_songlist - - print("๐Ÿ” Generating unmatched songs report...") - - # Load songlist based on focus mode - if args.songlist_focus: - # Load focused playlists - songlist_file = Path("data/songList.json") - if not songlist_file.exists(): - print("โš ๏ธ Songlist file not found: data/songList.json") - sys.exit(1) - - try: - with open(songlist_file, "r", encoding="utf-8") as f: - raw_data = json.load(f) - - # Filter playlists by title - focused_playlists = [] - print(f"๐Ÿ” Looking for playlists: {args.songlist_focus}") - print(f"๐Ÿ” Available playlists in songList.json:") - for i, playlist in enumerate(raw_data[:5]): # Show first 5 playlists - print(f" {i+1}. '{playlist.get('title', 'NO TITLE')}'") - if len(raw_data) > 5: - print(f" ... and {len(raw_data) - 5} more playlists") - - for playlist in raw_data: - playlist_title = playlist.get("title", "") - if playlist_title in args.songlist_focus: - focused_playlists.append(playlist) - print(f"โœ… Found matching playlist: '{playlist_title}'") - - if not focused_playlists: - print( - f"โš ๏ธ No playlists found matching the specified titles: {', '.join(args.songlist_focus)}" - ) - sys.exit(1) - - # Flatten the focused playlists into songs - focused_songs = [] - seen = set() - for playlist in focused_playlists: - if "songs" in playlist: - for song in playlist["songs"]: - if "artist" in song and "title" in song: - artist = song["artist"].strip() - title = song["title"].strip() - key = f"{artist.lower()}_{title.lower()}" - if key in seen: - continue - seen.add(key) - focused_songs.append( - { - "artist": artist, - "title": title, - "position": song.get("position", 0), - } - ) - - songlist = focused_songs - print( - f"\n๐ŸŽฏ Songlist focus mode: {len(focused_songs)} songs from {len(focused_playlists)} playlists selected" - ) - print(f"๐ŸŽฏ Focused playlists: {', '.join(args.songlist_focus)}") - - except (json.JSONDecodeError, FileNotFoundError) as e: - print(f"โš ๏ธ Could not load songlist for filtering: {e}") - sys.exit(1) - else: - # Load all songs from songlist - songlist = load_songlist() - if not songlist: - print("โŒ No songlist found. Please ensure data/songList.json exists.") - sys.exit(1) - - # Load channel URLs - channel_file = args.file if args.file else "data/channels.txt" - if not os.path.exists(channel_file): - print(f"โŒ Channel file not found: {channel_file}") - sys.exit(1) - - with open(channel_file, "r", encoding='utf-8') as f: - channel_urls = [ - line.strip() - for line in f - if line.strip() and not line.strip().startswith("#") - ] - - print(f"๐Ÿ“‹ Analyzing {len(songlist)} songs against {len(channel_urls)} channels...") - - # Build download plan to get unmatched songs - from karaoke_downloader.download_planner import build_download_plan - fuzzy_match = args.fuzzy_match if hasattr(args, "fuzzy_match") else False - fuzzy_threshold = ( - args.fuzzy_threshold - if hasattr(args, "fuzzy_threshold") - else DEFAULT_FUZZY_THRESHOLD - ) - - try: - download_plan, unmatched = build_download_plan( - channel_urls, - songlist, - downloader.tracker, - downloader.yt_dlp_path, - fuzzy_match=fuzzy_match, - fuzzy_threshold=fuzzy_threshold, - ) - - if unmatched: - report_file = generate_unmatched_report(unmatched) - print(f"\n๐Ÿ“‹ Unmatched songs report generated successfully!") - print(f"๐Ÿ“ Report saved to: {report_file}") - print(f"๐Ÿ“Š Summary: {len(download_plan)} songs found, {len(unmatched)} songs not found") - print(f"\n๐Ÿ” First 10 unmatched songs:") - for i, song in enumerate(unmatched[:10], 1): - print(f" {i:2d}. {song['artist']} - {song['title']}") - if len(unmatched) > 10: - print(f" ... and {len(unmatched) - 10} more songs") - else: - print(f"\nโœ… All {len(songlist)} songs were found in the channels!") - - except Exception as e: - print(f"โŒ Error generating report: {e}") - sys.exit(1) - sys.exit(0) elif args.songlist_status: songlist = downloader._load_songlist() @@ -551,6 +431,127 @@ Examples: else: parser.print_help() sys.exit(1) + + # Generate unmatched report if requested (additive feature) + if args.generate_unmatched_report: + from karaoke_downloader.download_planner import generate_unmatched_report, build_download_plan + from karaoke_downloader.songlist_manager import load_songlist + + print("\n๐Ÿ” Generating unmatched songs report...") + + # Load songlist based on focus mode + if args.songlist_focus: + # Load focused playlists + songlist_file_path = args.songlist_file if args.songlist_file else "data/songList.json" + songlist_file = Path(songlist_file_path) + if not songlist_file.exists(): + print(f"โš ๏ธ Songlist file not found: {songlist_file_path}") + else: + try: + with open(songlist_file, "r", encoding="utf-8") as f: + raw_data = json.load(f) + + # Filter playlists by title + focused_playlists = [] + for playlist in raw_data: + playlist_title = playlist.get("title", "") + if playlist_title in args.songlist_focus: + focused_playlists.append(playlist) + + if focused_playlists: + # Flatten the focused playlists into songs + focused_songs = [] + seen = set() + for playlist in focused_playlists: + if "songs" in playlist: + for song in playlist["songs"]: + if "artist" in song and "title" in song: + artist = song["artist"].strip() + title = song["title"].strip() + key = f"{artist.lower()}_{title.lower()}" + if key in seen: + continue + seen.add(key) + focused_songs.append( + { + "artist": artist, + "title": title, + "position": song.get("position", 0), + } + ) + + songlist = focused_songs + else: + print(f"โš ๏ธ No playlists found matching: {', '.join(args.songlist_focus)}") + songlist = [] + + except (json.JSONDecodeError, FileNotFoundError) as e: + print(f"โš ๏ธ Could not load songlist for report: {e}") + songlist = [] + else: + # Load all songs from songlist + songlist_path = args.songlist_file if args.songlist_file else "data/songList.json" + songlist = load_songlist(songlist_path) + + if songlist: + # Load channel URLs + channel_file = args.file if args.file else "data/channels.txt" + if os.path.exists(channel_file): + with open(channel_file, "r", encoding='utf-8') as f: + channel_urls = [ + line.strip() + for line in f + if line.strip() and not line.strip().startswith("#") + ] + + print(f"๐Ÿ“‹ Analyzing {len(songlist)} songs against {len(channel_urls)} channels...") + + # Build download plan to get unmatched songs + fuzzy_match = args.fuzzy_match if hasattr(args, "fuzzy_match") else False + fuzzy_threshold = ( + args.fuzzy_threshold + if hasattr(args, "fuzzy_threshold") + else DEFAULT_FUZZY_THRESHOLD + ) + + try: + download_plan, unmatched = build_download_plan( + channel_urls, + songlist, + downloader.tracker, + downloader.yt_dlp_path, + fuzzy_match=fuzzy_match, + fuzzy_threshold=fuzzy_threshold, + ) + + if unmatched: + report_file = generate_unmatched_report(unmatched) + print(f"\n๐Ÿ“‹ Unmatched songs report generated successfully!") + print(f"๐Ÿ“ Report saved to: {report_file}") + print(f"๐Ÿ“Š Summary: {len(download_plan)} songs found, {len(unmatched)} songs not found") + print(f"\n๐Ÿ” First 10 unmatched songs:") + for i, song in enumerate(unmatched[:10], 1): + print(f" {i:2d}. {song['artist']} - {song['title']}") + if len(unmatched) > 10: + print(f" ... and {len(unmatched) - 10} more songs") + else: + print(f"\nโœ… All {len(songlist)} songs were found in the channels!") + + except Exception as e: + print(f"โŒ Error generating report: {e}") + else: + print(f"โŒ Channel file not found: {channel_file}") + else: + print("โŒ No songlist available for report generation") + + # If no download command was specified but generate-unmatched-report was used, exit here + if not any([args.songlist_only, args.songlist_focus, args.latest_per_channel, args.url]): + if args.generate_unmatched_report: + sys.exit(0) + else: + parser.print_help() + sys.exit(1) + downloader.tracker.force_save() if success: print("\n๐ŸŽค All downloads completed successfully!") diff --git a/karaoke_downloader/downloader.py b/karaoke_downloader/downloader.py index 1ab9db4..6abdebd 100644 --- a/karaoke_downloader/downloader.py +++ b/karaoke_downloader/downloader.py @@ -115,6 +115,7 @@ class KaraokeDownloader: self.use_songlist_priority = True self.download_limit = None self.force_download = False + self.songlist_file_path = "data/songList.json" # Default songlist file path def _load_config(self): """Load configuration using the config manager.""" @@ -183,7 +184,7 @@ class KaraokeDownloader: """Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching.""" channel_name, channel_id = get_channel_info(url) print(f"\n๐ŸŽฌ Downloading from channel: {channel_name} ({url})") - songlist = load_songlist() + songlist = load_songlist(self.songlist_file_path) if not songlist: print("โš ๏ธ No songlist loaded. Skipping.") return False @@ -325,9 +326,9 @@ class KaraokeDownloader: # Apply songlist focus filtering if specified if self.songlist_focus_titles: # Load the raw songlist data to filter by playlist titles - songlist_file = Path("data/songList.json") + songlist_file = Path(self.songlist_file_path) if not songlist_file.exists(): - print("โš ๏ธ Songlist file not found: data/songList.json") + print(f"โš ๏ธ Songlist file not found: {self.songlist_file_path}") return False try: @@ -383,11 +384,11 @@ class KaraokeDownloader: print(f"๐ŸŽฏ Focused playlists: {', '.join(self.songlist_focus_titles)}") except (json.JSONDecodeError, FileNotFoundError) as e: - print(f"โš ๏ธ Could not load songlist for filtering: {e}") + print(f"โš ๏ธ Could not load songlist for filtering from {self.songlist_file_path}: {e}") return False else: # Load songlist normally (flattened from all playlists) - songlist = load_songlist() + songlist = load_songlist(self.songlist_file_path) if not songlist: print("โš ๏ธ No songlist loaded. Skipping.") return False