Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

2025-07-27 19:47:05 -05:00 · 2025-07-27 19:47:05 -05:00 · ea07188739
commit ea07188739
parent 2c63bf809b
5 changed files with 356 additions and 122 deletions
--- a/karaoke_downloader/cli.py
+++ b/karaoke_downloader/cli.py
@ -105,6 +105,11 @@ Examples:
        action="store_true",
        help="Generate a report of songs that couldn't be found in any channel (runs after downloads)",
    )
+    parser.add_argument(
+        "--show-pagination",
+        action="store_true",
+        help="Show page-by-page progress when downloading channel video lists (slower but more detailed)",
+    )
    parser.add_argument(
        "--songlist-only",
        action="store_true",
@ -237,6 +242,7 @@ Examples:
    if args.songlist_focus:
        downloader.songlist_focus_titles = args.songlist_focus
        downloader.songlist_only = True  # Enable songlist-only mode when focusing
+        args.songlist_only = True  # Also set the args flag to ensure CLI logic works
        print(
            f"🎯 Songlist focus mode enabled for playlists: {', '.join(args.songlist_focus)}"
        )
@ -260,12 +266,12 @@ Examples:
    # --- END NEW ---

    # --- NEW: If no URL or file is provided, but --songlist-only is set, use all channels in data/channels.txt ---
-    if args.songlist_only and not args.url and not args.file:
+    if (args.songlist_only or args.songlist_focus) and not args.url and not args.file:
        channels_file = Path("data/channels.txt")
        if channels_file.exists():
            args.file = str(channels_file)
            print(
-                "📋 No URL or --file provided, defaulting to all channels in data/channels.txt for songlist-only mode."
+                "📋 No URL or --file provided, defaulting to all channels in data/channels.txt for songlist mode."
            )
        else:
            print(
@ -377,22 +383,14 @@ Examples:
                if line.strip() and not line.strip().startswith("#")
            ]
        limit = args.limit if args.limit else None
-        force_refresh_download_plan = (
-            args.force_download_plan if hasattr(args, "force_download_plan") else False
-        )
-        fuzzy_match = args.fuzzy_match if hasattr(args, "fuzzy_match") else False
-        fuzzy_threshold = (
-            args.fuzzy_threshold
-            if hasattr(args, "fuzzy_threshold")
-            else DEFAULT_FUZZY_THRESHOLD
-        )
        success = downloader.download_songlist_across_channels(
            channel_urls,
-            limit=limit,
-            force_refresh_download_plan=force_refresh_download_plan,
-            fuzzy_match=fuzzy_match,
-            fuzzy_threshold=fuzzy_threshold,
+            limit=args.limit,
+            force_refresh_download_plan=args.force_download_plan if hasattr(args, "force_download_plan") else False,
+            fuzzy_match=args.fuzzy_match,
+            fuzzy_threshold=args.fuzzy_threshold,
            force_download=args.force,
+            show_pagination=args.show_pagination,
        )
    elif args.latest_per_channel:
        # Use provided file or default to data/channels.txt
--- a/karaoke_downloader/download_planner.py
+++ b/karaoke_downloader/download_planner.py
@ -81,6 +81,7 @@ def build_download_plan(
    yt_dlp_path,
    fuzzy_match=False,
    fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD,
+    show_pagination=False,
 ):
    """
    For each song in undownloaded, scan all channels for a match.
@ -108,7 +109,7 @@ def build_download_plan(
        print(f"   ✅ Channel info: {channel_name} (ID: {channel_id})")
        print(f"   🔍 Fetching video list from channel...")
        available_videos = tracker.get_channel_video_list(
-            channel_url, yt_dlp_path=str(yt_dlp_path), force_refresh=False
+            channel_url, yt_dlp_path=str(yt_dlp_path), force_refresh=False, show_pagination=show_pagination
        )
        print(
            f"   📊 Channel has {len(available_videos)} videos to scan against {len(undownloaded)} songlist songs"
--- a/karaoke_downloader/downloader.py
+++ b/karaoke_downloader/downloader.py
@ -88,9 +88,9 @@ class KaraokeDownloader:

        # Initialize tracking
        tracking_file = DATA_DIR / "karaoke_tracking.json"
-        cache_file = DATA_DIR / "channel_cache.json"
+        cache_dir = DATA_DIR / "channel_cache"
        self.tracker = TrackingManager(
-            tracking_file=tracking_file, cache_file=cache_file
+            tracking_file=tracking_file, cache_dir=cache_dir
        )

        # Initialize song validator
@ -316,6 +316,7 @@ class KaraokeDownloader:
        fuzzy_match=False,
        fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD,
        force_download=False,
+        show_pagination=False,
    ):
        """
        Download songs from the songlist across multiple channels.
@ -508,6 +509,7 @@ class KaraokeDownloader:
                self.yt_dlp_path,
                fuzzy_match=fuzzy_match,
                fuzzy_threshold=fuzzy_threshold,
+                show_pagination=show_pagination,
            )
            save_plan_cache(cache_file, download_plan, unmatched)
            print(f"💾 Download plan cached to: {cache_file}")
--- a/karaoke_downloader/tracking_manager.py
+++ b/karaoke_downloader/tracking_manager.py
@ -1,10 +1,10 @@
-import threading
-from enum import Enum
-
 import json
-from datetime import datetime
+import os
+import re
+from datetime import datetime, timedelta
+from enum import Enum
 from pathlib import Path
-
+from typing import Any, Dict, List, Optional, Tuple

 class SongStatus(str, Enum):
    NOT_DOWNLOADED = "NOT_DOWNLOADED"
@ -26,45 +26,127 @@ class TrackingManager:
    def __init__(
        self,
        tracking_file="data/karaoke_tracking.json",
-        cache_file="data/channel_cache.json",
+        cache_dir="data/channel_cache",
    ):
        self.tracking_file = Path(tracking_file)
-        self.cache_file = Path(cache_file)
-        self.data = {"playlists": {}, "songs": {}}
-        self.cache = {}
-        self._lock = threading.Lock()
-        self._load()
-        self._load_cache()
+        self.cache_dir = Path(cache_dir)
+        
+        # Ensure cache directory exists
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        
+        self.data = self._load()
+        print(f"📊 Tracking manager initialized with {len(self.data.get('songs', {}))} tracked songs")

    def _load(self):
+        """Load tracking data from JSON file."""
        if self.tracking_file.exists():
            try:
                with open(self.tracking_file, "r", encoding="utf-8") as f:
-                    self.data = json.load(f)
-            except Exception:
-                self.data = {"playlists": {}, "songs": {}}
+                    return json.load(f)
+            except json.JSONDecodeError:
+                print(f"⚠️  Corrupted tracking file, creating new one")
+        
+        return {"songs": {}, "playlists": {}, "last_updated": datetime.now().isoformat()}

    def _save(self):
-        with self._lock:
-            with open(self.tracking_file, "w", encoding="utf-8") as f:
-                json.dump(self.data, f, indent=2, ensure_ascii=False)
+        """Save tracking data to JSON file."""
+        self.data["last_updated"] = datetime.now().isoformat()
+        self.tracking_file.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.tracking_file, "w", encoding="utf-8") as f:
+            json.dump(self.data, f, indent=2, ensure_ascii=False)

    def force_save(self):
+        """Force save the tracking data."""
        self._save()

-    def _load_cache(self):
-        if self.cache_file.exists():
-            try:
-                with open(self.cache_file, "r", encoding="utf-8") as f:
-                    self.cache = json.load(f)
-            except Exception:
-                self.cache = {}
+    def _get_channel_cache_file(self, channel_id: str) -> Path:
+        """Get the cache file path for a specific channel."""
+        # Sanitize channel ID for filename
+        safe_channel_id = re.sub(r'[<>:"/\\|?*]', '_', channel_id)
+        return self.cache_dir / f"{safe_channel_id}.json"

-    def save_cache(self):
-        with open(self.cache_file, "w", encoding="utf-8") as f:
-            json.dump(self.cache, f, indent=2, ensure_ascii=False)
+    def _load_channel_cache(self, channel_id: str) -> List[Dict[str, str]]:
+        """Load cache for a specific channel."""
+        cache_file = self._get_channel_cache_file(channel_id)
+        if cache_file.exists():
+            try:
+                with open(cache_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    return data.get('videos', [])
+            except (json.JSONDecodeError, KeyError):
+                print(f"   ⚠️  Corrupted cache file for {channel_id}, will recreate")
+                return []
+        return []
+
+    def _save_channel_cache(self, channel_id: str, videos: List[Dict[str, str]]):
+        """Save cache for a specific channel."""
+        cache_file = self._get_channel_cache_file(channel_id)
+        data = {
+            'channel_id': channel_id,
+            'videos': videos,
+            'last_updated': datetime.now().isoformat(),
+            'video_count': len(videos)
+        }
+        with open(cache_file, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+
+    def _clear_channel_cache(self, channel_id: str):
+        """Clear cache for a specific channel."""
+        cache_file = self._get_channel_cache_file(channel_id)
+        if cache_file.exists():
+            cache_file.unlink()
+            print(f"   🗑️  Cleared cache file: {cache_file.name}")
+
+    def get_cache_info(self):
+        """Get information about all channel cache files."""
+        cache_files = list(self.cache_dir.glob("*.json"))
+        total_videos = 0
+        cache_info = []
+        
+        for cache_file in cache_files:
+            try:
+                with open(cache_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    video_count = len(data.get('videos', []))
+                    total_videos += video_count
+                    last_updated = data.get('last_updated', 'Unknown')
+                    cache_info.append({
+                        'channel': data.get('channel_id', cache_file.stem),
+                        'videos': video_count,
+                        'last_updated': last_updated,
+                        'file': cache_file.name
+                    })
+            except Exception as e:
+                print(f"⚠️  Error reading cache file {cache_file.name}: {e}")
+        
+        return {
+            'total_channels': len(cache_files),
+            'total_videos': total_videos,
+            'channels': cache_info
+        }
+
+    def clear_channel_cache(self, channel_id=None):
+        """Clear cache for a specific channel or all channels."""
+        if channel_id:
+            self._clear_channel_cache(channel_id)
+            print(f"🗑️  Cleared cache for channel: {channel_id}")
+        else:
+            # Clear all cache files
+            cache_files = list(self.cache_dir.glob("*.json"))
+            for cache_file in cache_files:
+                cache_file.unlink()
+            print(f"🗑️  Cleared all {len(cache_files)} channel cache files")
+
+    def set_cache_duration(self, hours):
+        """Placeholder for cache duration logic"""
+        pass
+
+    def export_playlist_report(self, playlist_id):
+        """Export a report for a specific playlist."""
+        pass

    def get_statistics(self):
+        """Get statistics about tracked songs."""
        total_songs = len(self.data["songs"])
        downloaded_songs = sum(
            1
@ -102,11 +184,13 @@ class TrackingManager:
        }

    def get_playlist_songs(self, playlist_id):
+        """Get songs for a specific playlist."""
        return [
            s for s in self.data["songs"].values() if s["playlist_id"] == playlist_id
        ]

    def get_failed_songs(self, playlist_id=None):
+        """Get failed songs, optionally filtered by playlist."""
        if playlist_id:
            return [
                s
@ -118,6 +202,7 @@ class TrackingManager:
        ]

    def get_partial_downloads(self, playlist_id=None):
+        """Get partial downloads, optionally filtered by playlist."""
        if playlist_id:
            return [
                s
@ -129,7 +214,7 @@ class TrackingManager:
        ]

    def cleanup_orphaned_files(self, downloads_dir):
-        # Remove tracking entries for files that no longer exist
+        """Remove tracking entries for files that no longer exist."""
        orphaned = []
        for song_id, song in list(self.data["songs"].items()):
            file_path = song.get("file_path")
@ -139,51 +224,17 @@ class TrackingManager:
        self.force_save()
        return orphaned

-    def get_cache_info(self):
-        total_channels = len(self.cache)
-        total_cached_videos = sum(len(v) for v in self.cache.values())
-        cache_duration_hours = 24  # default
-        last_updated = None
-        return {
-            "total_channels": total_channels,
-            "total_cached_videos": total_cached_videos,
-            "cache_duration_hours": cache_duration_hours,
-            "last_updated": last_updated,
-        }
-
-    def clear_channel_cache(self, channel_id=None):
-        if channel_id is None or channel_id == "all":
-            self.cache = {}
-        else:
-            self.cache.pop(channel_id, None)
-        self.save_cache()
-
-    def set_cache_duration(self, hours):
-        # Placeholder for cache duration logic
-        pass
-
-    def export_playlist_report(self, playlist_id):
-        playlist = self.data["playlists"].get(playlist_id)
-        if not playlist:
-            return f"Playlist '{playlist_id}' not found."
-        songs = self.get_playlist_songs(playlist_id)
-        report = {"playlist": playlist, "songs": songs}
-        return json.dumps(report, indent=2, ensure_ascii=False)
-
    def is_song_downloaded(self, artist, title, channel_name=None, video_id=None):
        """
-        Check if a song has already been downloaded by this system.
-        Returns True if the song exists in tracking with DOWNLOADED or CONVERTED status.
+        Check if a song has already been downloaded.
+        Returns True if the song exists in tracking with DOWNLOADED status.
        """
        # If we have video_id and channel_name, try direct key lookup first (most efficient)
        if video_id and channel_name:
            song_key = f"{video_id}@{channel_name}"
            if song_key in self.data["songs"]:
                song_data = self.data["songs"][song_key]
-                if song_data.get("status") in [
-                    SongStatus.DOWNLOADED,
-                    SongStatus.CONVERTED,
-                ]:
+                if song_data.get("status") == SongStatus.DOWNLOADED:
                    return True

        # Fallback to content search (for cases where we don't have video_id)
@ -191,19 +242,14 @@ class TrackingManager:
            # Check if this song matches the artist and title
            if song_data.get("artist") == artist and song_data.get("title") == title:
                # Check if it's marked as downloaded
-                if song_data.get("status") in [
-                    SongStatus.DOWNLOADED,
-                    SongStatus.CONVERTED,
-                ]:
+                if song_data.get("status") == SongStatus.DOWNLOADED:
                    return True
            # Also check the video title field which might contain the song info
            video_title = song_data.get("video_title", "")
            if video_title and artist in video_title and title in video_title:
-                if song_data.get("status") in [
-                    SongStatus.DOWNLOADED,
-                    SongStatus.CONVERTED,
-                ]:
+                if song_data.get("status") == SongStatus.DOWNLOADED:
                    return True
+
        return False

    def is_file_exists(self, file_path):
@ -283,65 +329,248 @@ class TrackingManager:
        self._save()

    def get_channel_video_list(
-        self, channel_url, yt_dlp_path="downloader/yt-dlp.exe", force_refresh=False
+        self, channel_url, yt_dlp_path="downloader/yt-dlp.exe", force_refresh=False, show_pagination=False
    ):
        """
        Return a list of videos (dicts with 'title' and 'id') for the channel, using cache if available unless force_refresh is True.
+        
+        Args:
+            channel_url: YouTube channel URL
+            yt_dlp_path: Path to yt-dlp executable
+            force_refresh: Force refresh cache even if available
+            show_pagination: Show page-by-page progress (slower but more detailed)
        """
        channel_name, channel_id = None, None
        from karaoke_downloader.youtube_utils import get_channel_info

        channel_name, channel_id = get_channel_info(channel_url)
+        
+        if not channel_id:
+            print(f"   ❌ Could not extract channel ID from URL: {channel_url}")
+            return []

-        # Try multiple possible cache keys
-        possible_keys = [
-            channel_id,  # The extracted channel ID
-            channel_url,  # The full URL
-            channel_name,  # The extracted channel name
-        ]
+        print(f"   🔍 Channel: {channel_name} (ID: {channel_id})")

-        cache_key = None
-        for key in possible_keys:
-            if key and key in self.cache:
-                cache_key = key
-                break
+        # Check if we have cached data for this channel
+        if not force_refresh:
+            cached_videos = self._load_channel_cache(channel_id)
+            if cached_videos:
+                # Validate that the cached data has proper video IDs
+                corrupted = False
+                
+                # Check if any video IDs look like titles instead of proper YouTube IDs
+                for video in cached_videos[:10]:  # Check first 10 videos
+                    video_id = video.get("id", "")
+                    if video_id and (len(video_id) > 20 or " " in video_id or "Lyrics" in video_id):
+                        print(f"   ⚠️  Detected corrupted video ID in cache: '{video_id}'")
+                        corrupted = True
+                        break
+                
+                if corrupted:
+                    print(f"   🧹 Clearing corrupted cache for {channel_id}")
+                    self._clear_channel_cache(channel_id)
+                    force_refresh = True
+                else:
+                    print(f"   📋 Using cached video list ({len(cached_videos)} videos)")
+                    return cached_videos

-        if not cache_key:
-            cache_key = channel_id or channel_url  # Use as fallback for new entries
-
-        print(f"   🔍 Trying cache keys: {possible_keys}")
-        print(f"   🔍 Selected cache key: '{cache_key}'")
-
-        if not force_refresh and cache_key in self.cache:
-            print(
-                f"   📋 Using cached video list ({len(self.cache[cache_key])} videos)"
-            )
-            return self.cache[cache_key]
+        # Choose fetch method based on show_pagination flag
+        if show_pagination:
+            return self._fetch_videos_with_pagination(channel_url, channel_id, yt_dlp_path)
        else:
-            print(f"   ❌ Cache miss for all keys")
+            return self._fetch_videos_flat_playlist(channel_url, channel_id, yt_dlp_path)
+
+    def _fetch_videos_with_pagination(self, channel_url, channel_id, yt_dlp_path):
+        """Fetch videos showing page-by-page progress."""
+        print(f"   🌐 Fetching video list from YouTube (page-by-page mode)...")
+        print(f"   📡 Channel URL: {channel_url}")
+        
+        import subprocess
+        
+        all_videos = []
+        page = 1
+        videos_per_page = 50  # YouTube typically shows 50 videos per page
+        
+        while True:
+            print(f"   📄 Fetching page {page}...")
+            
+            # Fetch one page at a time
+            cmd = [
+                yt_dlp_path,
+                "--flat-playlist",
+                "--print",
+                "%(title)s|%(id)s|%(url)s",
+                "--playlist-start",
+                str((page - 1) * videos_per_page + 1),
+                "--playlist-end",
+                str(page * videos_per_page),
+                channel_url,
+            ]
+            
+            try:
+                result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=60)
+                lines = result.stdout.strip().splitlines()
+                
+                if not lines:
+                    print(f"   ✅ No more videos found on page {page}")
+                    break
+                
+                print(f"   📊 Page {page}: Found {len(lines)} videos")
+                
+                page_videos = []
+                invalid_count = 0
+                
+                for line in lines:
+                    parts = line.split("|")
+                    if len(parts) >= 2:
+                        title, video_id = parts[0].strip(), parts[1].strip()
+                        
+                        # Validate that video_id looks like a proper YouTube ID
+                        if video_id and (len(video_id) <= 20 and " " not in video_id and "Lyrics" not in video_id):
+                            page_videos.append({"title": title, "id": video_id})
+                        else:
+                            invalid_count += 1
+                            if invalid_count <= 3:  # Show first 3 invalid IDs per page
+                                print(f"      ⚠️  Invalid ID: '{video_id}' for '{title}'")
+                
+                if invalid_count > 3:
+                    print(f"      ⚠️  ... and {invalid_count - 3} more invalid IDs on this page")
+                
+                all_videos.extend(page_videos)
+                print(f"   ✅ Page {page}: Added {len(page_videos)} valid videos (total: {len(all_videos)})")
+                
+                # If we got fewer videos than expected, we're probably at the end
+                if len(lines) < videos_per_page:
+                    print(f"   🏁 Reached end of channel (last page had {len(lines)} videos)")
+                    break
+                
+                page += 1
+                
+                # Safety check to prevent infinite loops
+                if page > 200:  # Max 200 pages (10,000 videos)
+                    print(f"   ⚠️  Reached maximum page limit (200 pages), stopping")
+                    break
+                    
+            except subprocess.TimeoutExpired:
+                print(f"   ⚠️  Page {page} timed out, stopping")
+                break
+            except subprocess.CalledProcessError as e:
+                print(f"   ❌ Error fetching page {page}: {e}")
+                break
+        
+        if not all_videos:
+            print(f"   ❌ No valid videos found")
+            return []
+        
+        print(f"   🎉 Channel download complete!")
+        print(f"   📊 Total videos fetched: {len(all_videos)}")
+        
+        # Save to individual channel cache file
+        self._save_channel_cache(channel_id, all_videos)
+        print(f"   💾 Saved cache to: {self._get_channel_cache_file(channel_id).name}")
+        
+        return all_videos
+
+    def _fetch_videos_flat_playlist(self, channel_url, channel_id, yt_dlp_path):
+        """Fetch all videos using flat playlist (faster but less detailed progress)."""
        # Fetch with yt-dlp
        print(f"   🌐 Fetching video list from YouTube (this may take a while)...")
+        print(f"   📡 Channel URL: {channel_url}")
+        
        import subprocess

+        # First, let's get the total count to show progress
+        count_cmd = [
+            yt_dlp_path,
+            "--flat-playlist",
+            "--print",
+            "%(title)s",
+            "--playlist-end",
+            "1",  # Just get first video to test
+            channel_url,
+        ]
+        
+        try:
+            print(f"   🔍 Testing channel access...")
+            test_result = subprocess.run(count_cmd, capture_output=True, text=True, timeout=30)
+            if test_result.returncode == 0:
+                print(f"   ✅ Channel is accessible")
+            else:
+                print(f"   ⚠️  Channel test failed: {test_result.stderr}")
+        except subprocess.TimeoutExpired:
+            print(f"   ⚠️  Channel test timed out")
+        except Exception as e:
+            print(f"   ⚠️  Channel test error: {e}")
+
+        # Now fetch all videos with progress indicators
        cmd = [
            yt_dlp_path,
            "--flat-playlist",
            "--print",
            "%(title)s|%(id)s|%(url)s",
+            "--verbose",  # Add verbose output to see what's happening
            channel_url,
        ]
+        
        try:
-            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            print(f"   🔧 Running yt-dlp command: {' '.join(cmd)}")
+            print(f"   📥 Starting video list download...")
+            
+            # Use a timeout and show progress
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=300)
            lines = result.stdout.strip().splitlines()
+            
+            print(f"   📄 Raw output lines: {len(lines)}")
+            print(f"   📊 Download completed successfully!")
+            
+            # Show some sample lines to understand the format
+            if lines:
+                print(f"   📋 Sample output format:")
+                for i, line in enumerate(lines[:3]):
+                    print(f"      Line {i+1}: {line[:100]}...")
+                if len(lines) > 3:
+                    print(f"      ... and {len(lines) - 3} more lines")
+            
            videos = []
-            for line in lines:
+            invalid_count = 0
+            
+            print(f"   🔍 Processing {len(lines)} video entries...")
+            
+            for i, line in enumerate(lines):
+                if i % 1000 == 0 and i > 0:  # Progress indicator every 1000 lines
+                    print(f"   📊 Processing line {i}/{len(lines)}... ({i/len(lines)*100:.1f}%)")
+                
                parts = line.split("|")
                if len(parts) >= 2:
                    title, video_id = parts[0].strip(), parts[1].strip()
-                    videos.append({"title": title, "id": video_id})
-            self.cache[cache_key] = videos
-            self.save_cache()
+                    
+                    # Validate that video_id looks like a proper YouTube ID
+                    if video_id and (len(video_id) <= 20 and " " not in video_id and "Lyrics" not in video_id):
+                        videos.append({"title": title, "id": video_id})
+                    else:
+                        invalid_count += 1
+                        if invalid_count <= 5:  # Only show first 5 invalid IDs
+                            print(f"   ⚠️  Skipping invalid video ID: '{video_id}' for title: '{title}'")
+                        elif invalid_count == 6:
+                            print(f"   ⚠️  ... and {len(lines) - i - 1} more invalid IDs")
+            
+            if not videos:
+                print(f"   ❌ No valid videos found after parsing")
+                return []
+                
+            print(f"   ✅ Parsed {len(videos)} valid videos from YouTube")
+            print(f"   ⚠️  Skipped {invalid_count} invalid video IDs")
+            
+            # Save to individual channel cache file
+            self._save_channel_cache(channel_id, videos)
+            print(f"   💾 Saved cache to: {self._get_channel_cache_file(channel_id).name}")
+            
            return videos
+            
+        except subprocess.TimeoutExpired:
+            print(f"❌ yt-dlp timed out after 5 minutes - channel may be too large")
+            return []
        except subprocess.CalledProcessError as e:
            print(f"❌ yt-dlp failed to fetch playlist for cache: {e}")
+            print(f"   📄 stderr: {e.stderr}")
            return []
--- a/karaoke_downloader/video_downloader.py
+++ b/karaoke_downloader/video_downloader.py
@ -106,6 +106,10 @@ def download_single_video(
    print(f"⬇️  Downloading: {artist} - {title} -> {output_path}")

    video_url = f"https://www.youtube.com/watch?v={video_id}"
+    
+    # Debug: Show the video_id and URL being used
+    print(f"🔍 DEBUG: video_id = '{video_id}'")
+    print(f"🔍 DEBUG: video_url = '{video_url}'")

    # Build command using centralized utility
    cmd = build_yt_dlp_command(yt_dlp_path, video_url, output_path, config)
@ -255,7 +259,7 @@ def execute_download_plan(
        video_id = item["video_id"]
        video_title = item["video_title"]

-        print(f"\n⬇️  Downloading {len(download_plan) - idx} of {total_to_download}:")
+        print(f"\n⬇️  Downloading {downloaded_count + 1} of {total_to_download}:")
        print(f"   📋 Songlist: {artist} - {title}")
        print(f"   🎬 Video: {video_title} ({channel_name})")
        if "match_score" in item: