KaraokeVideoDownloader/download_karaoke.py

#!/usr/bin/env python3
"""
Karaoke Playlist Downloader
A Python-based Windows command-line tool that wraps yt-dlp.exe to batch-download karaoke videos from YouTube playlists.
"""

import os
import sys
import argparse
import subprocess
import json
import re
from pathlib import Path
from urllib.parse import urlparse, parse_qs
import logging
from datetime import datetime
from tracking_manager import TrackingManager, SongStatus, FormatType

# Try to import mutagen for ID3 tagging
try:
    from mutagen.mp4 import MP4, MP4Tags
    MUTAGEN_AVAILABLE = True
except ImportError:
    MUTAGEN_AVAILABLE = False
    print("⚠️ mutagen not available - ID3 tagging will be disabled")
    print("   Install with: pip install mutagen")


class KaraokeDownloader:
    def __init__(self):
        self.yt_dlp_path = Path("downloader/yt-dlp.exe")
        self.downloads_dir = Path("downloads")
        self.logs_dir = Path("logs")

        # Create necessary directories
        self.downloads_dir.mkdir(exist_ok=True)
        self.logs_dir.mkdir(exist_ok=True)

        # Initialize tracking manager
        self.tracker = TrackingManager()

        # Load configuration
        self.config = self._load_config()

        # Songlist tracking file
        self.songlist_tracking_file = Path("songlist_tracking.json")
        self.songlist_tracking = self._load_songlist_tracking()

    def _get_channel_info(self, channel_url):
        """Extract channel information from URL."""
        # Fallback: extract channel info from URL (faster and more reliable)
        if '@' in channel_url:
            # Handle @username format
            channel_name = channel_url.split('@')[1].split('/')[0]
            channel_id = f"@{channel_name}"
        else:
            # Handle other formats
            channel_name = "unknown_channel"
            channel_id = "unknown_channel"

        # Clean the channel name for use as directory name
        channel_name = re.sub(r'[<>:"/\\|?*]', '_', channel_name)
        return channel_name, channel_id

    def _get_playlist_info(self, playlist_url):
        """Extract playlist information from URL (backward compatibility)."""
        return self._get_channel_info(playlist_url)

    def _load_config(self):
        """Load configuration from config.json file."""
        config_file = Path("config.json")
        if config_file.exists():
            try:
                with open(config_file, 'r', encoding='utf-8') as f:
                    return json.load(f)
            except (json.JSONDecodeError, FileNotFoundError) as e:
                print(f"Warning: Could not load config.json: {e}")

        # Return default configuration
        return {
            "download_settings": {
                "format": "best[height<=720][ext=mp4]/best[height<=720]/best[ext=mp4]/best",
                "preferred_resolution": "720p",
                "audio_format": "mp3",
                "audio_quality": "0",
                "subtitle_language": "en",
                "subtitle_format": "srt",
                "write_metadata": False,
                "write_thumbnail": False,
                "write_description": False,
                "write_annotations": False,
                "write_comments": False,
                "write_subtitles": False,
                "embed_metadata": False,
                "add_metadata": False,
                "continue_downloads": True,
                "no_overwrites": True,
                "ignore_errors": True,
                "no_warnings": False
            },
            "folder_structure": {
                "downloads_dir": "downloads",
                "logs_dir": "logs",
                "tracking_file": "karaoke_tracking.json"
            },
            "logging": {
                "level": "INFO",
                "format": "%(asctime)s - %(levelname)s - %(message)s",
                "include_console": True,
                "include_file": True
            },
            "yt_dlp_path": "downloader/yt-dlp.exe"
        }

    def _load_songlist(self):
        """Load songlist from docs/songList.json file."""
        songlist_file = Path("docs/songList.json")
        if not songlist_file.exists():
            print("⚠️ Songlist file not found: docs/songList.json")
            return []

        try:
            with open(songlist_file, 'r', encoding='utf-8') as f:
                data = json.load(f)

            # Extract all songs from all categories
            all_songs = []
            for category in data:
                if "songs" in category:
                    for song in category["songs"]:
                        if "artist" in song and "title" in song:
                            all_songs.append({
                                "artist": song["artist"].strip(),
                                "title": song["title"].strip(),
                                "position": song.get("position", 0)
                            })

            print(f"📋 Loaded {len(all_songs)} songs from songlist")
            return all_songs

        except (json.JSONDecodeError, FileNotFoundError) as e:
            print(f"⚠️ Could not load songlist: {e}")
            return []

    def _load_songlist_tracking(self):
        """Load songlist tracking data."""
        if not self.songlist_tracking_file.exists():
            return {}

        try:
            with open(self.songlist_tracking_file, 'r', encoding='utf-8') as f:
                return json.load(f)
        except (json.JSONDecodeError, FileNotFoundError) as e:
            print(f"⚠️ Could not load songlist tracking: {e}")
            return {}

    def _save_songlist_tracking(self):
        """Save songlist tracking data."""
        try:
            with open(self.songlist_tracking_file, 'w', encoding='utf-8') as f:
                json.dump(self.songlist_tracking, f, indent=2, ensure_ascii=False)
        except Exception as e:
            print(f"⚠️ Could not save songlist tracking: {e}")

    def _is_songlist_song_downloaded(self, artist, title):
        """Check if a songlist song has been downloaded from any channel."""
        key = f"{artist.lower()}_{self._normalize_title(title)}"
        return key in self.songlist_tracking

    def _mark_songlist_song_downloaded(self, artist, title, channel_name, file_path):
        """Mark a songlist song as downloaded."""
        key = f"{artist.lower()}_{self._normalize_title(title)}"
        self.songlist_tracking[key] = {
            "artist": artist,
            "title": title,
            "channel": channel_name,
            "file_path": str(file_path),
            "downloaded_at": datetime.now().isoformat()
        }
        self._save_songlist_tracking()

    def _normalize_title(self, title):
        """Normalize title for comparison (remove karaoke suffixes, etc.)."""
        # Remove common karaoke suffixes
        normalized = title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()
        # Remove extra spaces and convert to lowercase for comparison
        return " ".join(normalized.split()).lower()

    def _find_songlist_matches(self, cached_videos, songlist, songlist_only=False):
        """Find videos that match songs in the songlist."""
        matches = []
        songlist_lookup = {}
        unique_matches = set()
        already_downloaded = 0
        new_downloads = 0

        # Create lookup for songlist songs (normalized)
        for song in songlist:
            normalized_title = self._normalize_title(song["title"])
            key = f"{song['artist'].lower()}_{normalized_title}"
            songlist_lookup[key] = song

        print(f"🔍 Searching for {len(songlist)} songlist songs in {len(cached_videos)} cached videos...")

        for video in cached_videos:
            # Extract artist and title from video title
            artist, title = self._extract_artist_title(video['title'])
            normalized_title = self._normalize_title(title)
            key = f"{artist.lower()}_{normalized_title}"

            if key in songlist_lookup:
                songlist_song = songlist_lookup[key]

                # Check if already downloaded from any channel
                if self._is_songlist_song_downloaded(artist, title):
                    already_downloaded += 1
                    print(f"⏭️ Already downloaded: {artist} - {title}")
                    continue

                matches.append({
                    'video': video,
                    'songlist_song': songlist_song,
                    'priority_score': songlist_song.get('position', 9999)  # Lower position = higher priority
                })
                unique_matches.add(key)
                new_downloads += 1
                print(f"🎯 Found match: {artist} - {title} (position {songlist_song.get('position', 'N/A')})")

        print(f"🎯 Matching Results:")
        print(f"   • Total matches found: {len(matches) + already_downloaded}")
        print(f"   • Unique songs matched: {len(unique_matches) + already_downloaded}")
        print(f"   • Already downloaded: {already_downloaded}")
        print(f"   • New downloads available: {new_downloads}")
        print(f"   • Duplicate matches: {len(matches) - len(unique_matches)}")

        return matches

    def _get_playlist_name(self, playlist_url):
        """Extract playlist name from URL (backward compatibility)."""
        return self._get_playlist_info(playlist_url)[0]

    def _setup_logging(self, playlist_name):
        """Setup logging for the current playlist."""
        log_file = self.logs_dir / f"{playlist_name}.log"

        # Configure logging
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler(log_file),
                logging.StreamHandler(sys.stdout)
            ]
        )

        return logging.getLogger(__name__)

    def _get_channel_videos(self, channel_url):
        """Get list of videos from a channel."""
        try:
            cmd = [
                str(self.yt_dlp_path),
                "--flat-playlist",
                "--print", "id,title,duration,upload_date",
                channel_url
            ]

            result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
            if result.returncode == 0 and result.stdout.strip():
                videos = []
                lines = result.stdout.strip().split('\n')
                for i in range(0, len(lines), 4):
                    if i + 3 < len(lines):
                        video_id = lines[i].strip()
                        title = lines[i + 1].strip()
                        duration = lines[i + 2].strip()
                        upload_date = lines[i + 3].strip()
                        try:
                            duration_int = int(duration) if duration != 'NA' else None
                        except ValueError:
                            duration_int = None

                        videos.append({
                            'video_id': video_id,
                            'title': title,
                            'duration': duration_int,
                            'upload_date': upload_date
                        })
                return videos
        except (subprocess.TimeoutExpired, subprocess.CalledProcessError) as e:
            print(f"Error getting channel videos: {e}")

        return []

    def _get_playlist_songs(self, playlist_url):
        """Get list of songs in a playlist (backward compatibility)."""
        return self._get_channel_videos(playlist_url)

    def _extract_artist_title(self, video_title):
        """Extract artist and title from karaoke video title."""
        # Remove common karaoke suffixes
        title = video_title.replace("(Karaoke Version)", "").replace("(Karaoke)", "").strip()

        # Look for common patterns
        if " - " in title:
            # Format: "Artist - Song Title"
            parts = title.split(" - ", 1)
            if len(parts) == 2:
                artist = parts[0].strip()
                song_title = parts[1].strip()

                # Handle special cases
                if artist == "d4vd, Hyunjin":
                    return "d4vd, Hyunjin", song_title
                elif artist == "Adrianne Lenker":
                    return "Adrianne Lenker", song_title
                else:
                    return artist, song_title

        # Handle special cases without clear artist - title pattern
        if "Nothing Beats A Jet2 Holiday" in title:
            return "Jet2", "Nothing Beats A Jet2 Holiday"
        elif "Spin The Wheel" in title:
            return "SingKingKaraoke", "Spin The Wheel - Your Song Requests"

        # If no clear pattern, assume the whole title is the song name
        return "Unknown Artist", title

    def _add_id3_tags(self, file_path, video_title, channel_name):
        """Add ID3 tags to the downloaded MP4 file."""
        if not MUTAGEN_AVAILABLE:
            print("⚠️ mutagen not available - skipping ID3 tagging")
            return

        try:
            artist, title = self._extract_artist_title(video_title)

            # Load the MP4 file
            mp4 = MP4(str(file_path))

            # Add metadata tags
            mp4['\xa9nam'] = title  # Title
            mp4['\xa9ART'] = artist  # Artist
            mp4['\xa9alb'] = f"{channel_name} Karaoke"  # Album
            mp4['\xa9gen'] = "Karaoke"  # Genre
            mp4['\xa9day'] = str(datetime.now().year)  # Year

            # Save the metadata
            mp4.save()

            print(f"📝 Added ID3 tags: Artist='{artist}', Title='{title}'")

        except Exception as e:
            print(f"⚠️ Could not add ID3 tags: {e}")

    def _cleanup_extra_files(self, mp4_file_path):
        """Remove extra files (info.json, meta) that yt-dlp creates alongside MP4 files."""
        try:
            # Get the base path without extension
            base_path = mp4_file_path.with_suffix('')

            # Files to remove
            extra_files = [
                base_path.with_suffix('.info.json'),
                base_path.with_suffix('.meta')
            ]

            removed_count = 0
            for extra_file in extra_files:
                if extra_file.exists():
                    extra_file.unlink()
                    removed_count += 1
                    print(f"🧹 Removed extra file: {extra_file.name}")

            if removed_count > 0:
                print(f"🧹 Cleaned up {removed_count} extra file(s)")

        except Exception as e:
            print(f"⚠️ Could not clean up extra files: {e}")

    def download_channel_videos(self, channel_url, force_refresh=False):
        """Download all videos from a YouTube channel."""
        print(f"🎤 Starting download for channel: {channel_url}")

        # Get channel info
        print("🔍 Extracting channel information...")
        channel_name, channel_id = self._get_channel_info(channel_url)
        print(f"📺 Channel name: {channel_name}")
        print(f"🆔 Channel ID: {channel_id}")

        # Setup logging
        print("📝 Setting up logging...")
        logger = self._setup_logging(channel_name)
        logger.info(f"Starting download for channel: {channel_url}")

        # Add channel to tracking
        print("📋 Adding channel to tracking system...")
        self.tracker.add_playlist(channel_id, channel_name, channel_url)

        # Check cache first (unless force refresh is requested)
        print("💾 Checking channel cache...")
        current_videos = None
        if not force_refresh:
            current_videos = self.tracker.get_cached_channel_videos(channel_id)
            if current_videos:
                print(f"📋 Using cached channel data: {len(current_videos)} videos")
            else:
                print("📋 Cache miss or expired, fetching fresh channel data...")

        # Fetch fresh data if needed
        if not current_videos:
            print("📋 Fetching channel videos from YouTube...")
            current_videos = self._get_channel_videos(channel_url)
            print(f"🎵 Found {len(current_videos)} videos in channel")

            # Cache the results
            if current_videos:
                print("💾 Caching channel data for future use...")
                self.tracker.cache_channel_videos(channel_id, current_videos)
                print(f"💾 Cached channel data for future use")
        else:
            print(f"🎵 Using {len(current_videos)} cached videos")

        # Sync channel with tracking
        print("🔄 Syncing channel with tracking system...")
        sync_result = self.tracker.sync_playlist(channel_id, current_videos)
        print(f"🔄 Sync result: {sync_result['added']} new, {sync_result['removed']} removed, {sync_result['total']} total")

        # Create channel-specific downloads directory
        print("📁 Creating downloads directory...")
        downloads_dir = self.downloads_dir / channel_name
        downloads_dir.mkdir(parents=True, exist_ok=True)
        print(f"📁 Downloads will be saved to: {downloads_dir}")

        # Load songlist for prioritization (if enabled)
        songlist = []
        if hasattr(self, 'use_songlist_priority') and self.use_songlist_priority:
            print("📋 Loading songlist for prioritization...")
            songlist = self._load_songlist()

            # Show songlist statistics
            unique_songs = len(set(f"{song['artist']}_{song['title']}" for song in songlist))
            print(f"📊 Songlist Statistics:")
            print(f"   • Total songs in list: {len(songlist)}")
            print(f"   • Unique songs: {unique_songs}")
            print(f"   • Duplicates in list: {len(songlist) - unique_songs}")
        else:
            print("📋 Songlist prioritization disabled")

        # Get videos that need downloading
        print("🔍 Checking which videos need to be downloaded...")
        videos_to_download = []
        found_existing = 0

        # Get list of existing MP4 files once (more efficient)
        existing_files = set()
        for file_path in downloads_dir.glob(f"*.mp4"):
            if file_path.is_file():
                existing_files.add(file_path.stem)

        # Also check root downloads directory
        for file_path in self.downloads_dir.glob(f"*.mp4"):
            if file_path.is_file():
                existing_files.add(file_path.stem)

        print(f"📁 Found {len(existing_files)} existing MP4 files in directories")

        # Separate videos into songlist matches and others
        songlist_matches = []
        other_videos = []

        for video in current_videos:
            if not self.tracker.is_song_downloaded(video['video_id'], channel_id):
                if video['title'] in existing_files:
                    # File exists but not tracked, mark as downloaded
                    self.tracker.update_song_status(
                        video['video_id'],
                        channel_id,
                        SongStatus.DOWNLOADED,
                        FormatType.MP4,
                        downloads_dir / f"{video['title']}.mp4"  # Assume it's in channel dir
                    )
                    found_existing += 1
                else:
                    other_videos.append(video)

        # Find songlist matches and prioritize them
        if songlist:
            songlist_matches = self._find_songlist_matches(other_videos, songlist, songlist_only=getattr(self, 'songlist_only', False))

            # Remove matched videos from other_videos to avoid duplicates
            matched_video_ids = {match['video']['video_id'] for match in songlist_matches}
            other_videos = [v for v in other_videos if v['video_id'] not in matched_video_ids]

            # Sort songlist matches by priority (lower position = higher priority)
            songlist_matches.sort(key=lambda x: x['priority_score'])

            # Add songlist matches first, then other videos (unless songlist_only is enabled)
            if getattr(self, 'songlist_only', False):
                videos_to_download = [match['video'] for match in songlist_matches]
                print(f"🎯 Songlist-only mode: {len(songlist_matches)} songlist matches")
            else:
                videos_to_download = [match['video'] for match in songlist_matches] + other_videos
                print(f"🎯 Prioritized {len(songlist_matches)} songlist matches")
                print(f"📝 Added {len(other_videos)} other videos")
        else:
            videos_to_download = other_videos

        if found_existing > 0:
            print(f"📁 Found {found_existing} existing files that weren't tracked")

        # Apply limit if specified
        if hasattr(self, 'download_limit') and self.download_limit:
            videos_to_download = videos_to_download[:self.download_limit]
            print(f"📥 Videos to download: {len(videos_to_download)} (limited to {self.download_limit})")
        else:
            print(f"📥 Videos to download: {len(videos_to_download)}")

        if not videos_to_download:
            print("✅ All videos already downloaded!")
            return True

        # Build yt-dlp command for each video
        for i, video in enumerate(videos_to_download, 1):
            print(f"\n🎵 Downloading {i} of {len(videos_to_download)}: {video['title']}")

            # Update status to downloading
            self.tracker.update_song_status(
                video['video_id'],
                channel_id,
                SongStatus.DOWNLOADING
            )

            # Build yt-dlp command for this specific video using config
            download_settings = self.config["download_settings"]

            cmd = [
                str(self.yt_dlp_path),
                "--format", download_settings["format"],
                "--output", str(downloads_dir / "%(title)s.%(ext)s"),
            ]

            # Only add metadata options if enabled in config
            if download_settings.get("add_metadata", False):
                cmd.append("--add-metadata")
            if download_settings.get("embed_metadata", False):
                cmd.append("--embed-metadata")
            if download_settings.get("write_metadata", False):
                cmd.append("--write-info-json")

            # Add optional settings based on config (only essential ones)
            if download_settings.get("no_overwrites", True):
                cmd.append("--no-overwrites")
            if download_settings.get("continue_downloads", True):
                cmd.append("--continue")
            if download_settings.get("ignore_errors", True):
                cmd.append("--ignore-errors")
            if download_settings.get("no_warnings", False):
                cmd.append("--no-warnings")

            # Add progress and display options
            cmd.extend([
                "--progress",
                "--newline",
                "--console-title",
                f"https://www.youtube.com/watch?v={video['video_id']}"
            ])

            try:
                print(f"🚀 Starting download for: {video['title']}")
                logger.info(f"Starting download for video ID: {video['video_id']}")

                # Run yt-dlp
                process = subprocess.Popen(
                    cmd,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    text=True,
                    bufsize=1,
                    universal_newlines=True
                )

                # Process output in real-time
                for line in process.stdout:
                    line = line.strip()
                    if line:
                        print(line)
                        logger.info(line)

                process.wait()

                # Check if download was successful (more lenient - if MP4 exists, consider success)
                downloaded_file = None

                # Look for the file in the channel directory by title (since yt-dlp uses title as filename)
                for file_path in downloads_dir.glob(f"*.mp4"):
                    if file_path.is_file():
                        # Check if this file matches the video title
                        file_name = file_path.stem  # filename without extension
                        if file_name == video['title']:
                            downloaded_file = file_path
                            break

                # If not found, also check the root downloads directory
                if not downloaded_file:
                    for file_path in self.downloads_dir.glob(f"*.mp4"):
                        if file_path.is_file():
                            # Check if this file matches the video title
                            file_name = file_path.stem  # filename without extension
                            if file_name == video['title']:
                                downloaded_file = file_path
                                break

                if downloaded_file:
                    # MP4 file exists, consider download successful
                    format_type = FormatType.MP4

                    # Update tracking with successful download
                    self.tracker.update_song_status(
                        video['video_id'],
                        channel_id,
                        SongStatus.DOWNLOADED,
                        format_type,
                        downloaded_file
                    )

                    print(f"✅ Successfully downloaded: {video['title']} ({format_type})")
                    logger.info(f"Successfully downloaded: {video['title']} ({format_type})")

                    # Add ID3 tags to the downloaded file
                    self._add_id3_tags(downloaded_file, video['title'], channel_name)

                    # Clean up extra files (info.json, meta files)
                    self._cleanup_extra_files(downloaded_file)

                    # Mark as downloaded in songlist tracking if it's a songlist song
                    if hasattr(self, 'use_songlist_priority') and self.use_songlist_priority:
                        artist, title = self._extract_artist_title(video['title'])
                        self._mark_songlist_song_downloaded(artist, title, channel_name, downloaded_file)
                        print(f"📋 Marked songlist song as downloaded: {artist} - {title}")
                else:
                    # No MP4 file found, mark as failed
                    self.tracker.update_song_status(
                        video['video_id'],
                        channel_id,
                        SongStatus.FAILED,
                        error_message=f"Download failed - no MP4 file found (return code: {process.returncode})"
                    )
                    print(f"❌ Download failed for: {video['title']} (return code: {process.returncode})")
                    logger.error(f"Download failed for: {video['title']} (return code: {process.returncode})")

            except subprocess.CalledProcessError as e:
                error_msg = f"Error running yt-dlp for {video['title']}: {e}"
                print(f"❌ {error_msg}")
                logger.error(error_msg)

                self.tracker.update_song_status(
                    video['video_id'],
                    channel_id,
                    SongStatus.FAILED,
                    error_message=str(e)
                )

            except KeyboardInterrupt:
                print("\n⏹️ Download interrupted by user")
                logger.info("Download interrupted by user")

                # Mark current video as partial
                self.tracker.update_song_status(
                    video['video_id'],
                    channel_id,
                    SongStatus.PARTIAL
                )
                return False

        # Force save tracking data
        self.tracker.force_save()

        # Show final statistics
        stats = self.tracker.get_statistics()
        print(f"\n📊 Download Statistics:")
        print(f"   Total songs: {stats['total_songs']}")
        print(f"   Downloaded: {stats['downloaded_songs']}")
        print(f"   Failed: {stats['failed_songs']}")
        print(f"   Partial: {stats['partial_songs']}")
        print(f"   Total size: {stats['total_size_mb']} MB")

        return True

    def download_playlist(self, playlist_url):
        """Download all videos from a YouTube playlist (backward compatibility)."""
        return self.download_channel_videos(playlist_url)

    def download_from_file(self, file_path, force_refresh=False):
        """Download multiple playlists from a text file."""
        file_path = Path(file_path)

        if not file_path.exists():
            print(f"❌ File not found: {file_path}")
            return False

        print(f"📄 Reading playlists from: {file_path}")

        with open(file_path, 'r') as f:
            playlist_urls = [line.strip() for line in f if line.strip()]

        print(f"📋 Found {len(playlist_urls)} URL(s) to download")

        success_count = 0
        for i, url in enumerate(playlist_urls, 1):
            print(f"\n{'='*50}")
            print(f"📥 Processing URL {i}/{len(playlist_urls)}: {url}")
            print(f"{'='*50}")

            if self.download_channel_videos(url, force_refresh=force_refresh):
                success_count += 1

        print(f"\n🎉 Download summary: {success_count}/{len(playlist_urls)} URLs completed successfully")

        # Force save any pending tracking data
        self.tracker.force_save()

        return success_count == len(playlist_urls)


def main():
    parser = argparse.ArgumentParser(
        description="Karaoke Video Downloader - Download YouTube playlists and channel videos for karaoke",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python download_karaoke.py https://www.youtube.com/playlist?list=XYZ
  python download_karaoke.py https://www.youtube.com/@SingKingKaraoke/videos
  python download_karaoke.py --file playlists.txt
  python download_karaoke.py --limit 5 https://www.youtube.com/@SingKingKaraoke/videos
  python download_karaoke.py --refresh https://www.youtube.com/@SingKingKaraoke/videos
  python download_karaoke.py --cache-info
  python download_karaoke.py --clear-cache all
        """
    )

    parser.add_argument(
        'url',
        nargs='?',
        help='YouTube playlist or channel URL to download'
    )

    parser.add_argument(
        '--file', '-f',
        help='Text file containing playlist or channel URLs (one per line)'
    )

    parser.add_argument(
        '--status', '-s',
        action='store_true',
        help='Show download status and statistics'
    )

    parser.add_argument(
        '--report', '-r',
        metavar='PLAYLIST_ID',
        help='Generate a detailed report for a specific playlist'
    )

    parser.add_argument(
        '--sync', '--sync-only',
        action='store_true',
        help='Only sync playlist without downloading (update tracking)'
    )

    parser.add_argument(
        '--cleanup',
        action='store_true',
        help='Clean up orphaned tracking entries'
    )

    parser.add_argument(
        '--resolution', '--res',
        choices=['480p', '720p', '1080p', '1440p', '2160p'],
        default='720p',
        help='Preferred video resolution (default: 720p)'
    )

    parser.add_argument(
        '--limit', '-l',
        type=int,
        help='Limit the number of videos to download (e.g., --limit 10)'
    )

    parser.add_argument(
        '--refresh', '--force-refresh',
        action='store_true',
        help='Force refresh channel cache (ignore cached data)'
    )

    parser.add_argument(
        '--cache-info',
        action='store_true',
        help='Show channel cache information'
    )

    parser.add_argument(
        '--clear-cache',
        metavar='CHANNEL_ID',
        nargs='?',
        const='all',
        help='Clear cache for specific channel or all channels (use --clear-cache all)'
    )

    parser.add_argument(
        '--cache-duration',
        type=int,
        metavar='HOURS',
        help='Set cache duration in hours (default: 24)'
    )

    parser.add_argument(
        '--songlist-priority',
        action='store_true',
        help='Prioritize downloads based on docs/songList.json (default: enabled)'
    )

    parser.add_argument(
        '--no-songlist-priority',
        action='store_true',
        help='Disable songlist prioritization'
    )

    parser.add_argument(
        '--songlist-only',
        action='store_true',
        help='Only download songs that are in the songlist (skip all others)'
    )

    parser.add_argument(
        '--songlist-status',
        action='store_true',
        help='Show songlist download status and statistics'
    )

    parser.add_argument(
        '--version', '-v',
        action='version',
        version='Karaoke Playlist Downloader v1.0'
    )

    args = parser.parse_args()

    # Check if yt-dlp.exe exists
    yt_dlp_path = Path("downloader/yt-dlp.exe")
    if not yt_dlp_path.exists():
        print("❌ Error: yt-dlp.exe not found in downloader/ directory")
        print("Please ensure yt-dlp.exe is present in the downloader/ folder")
        sys.exit(1)

    # Initialize downloader
    downloader = KaraokeDownloader()

    # Set download limit if specified
    if args.limit:
        downloader.download_limit = args.limit
        print(f"🎯 Download limit set to: {args.limit} videos")

    # Set songlist priority setting
    if args.no_songlist_priority:
        downloader.use_songlist_priority = False
        print("🎯 Songlist prioritization disabled")
    else:
        downloader.use_songlist_priority = True
        print("🎯 Songlist prioritization enabled")

    # Set songlist-only mode
    if args.songlist_only:
        downloader.songlist_only = True
        print("🎯 Songlist-only mode enabled (will only download songlist songs)")

    # Update resolution if specified
    if args.resolution != '720p':
        resolution_map = {
            '480p': '480',
            '720p': '720',
            '1080p': '1080',
            '1440p': '1440',
            '2160p': '2160'
        }
        height = resolution_map[args.resolution]
        downloader.config["download_settings"]["format"] = f"best[height<={height}][ext=mp4]/best[height<={height}]/best[ext=mp4]/best"
        downloader.config["download_settings"]["preferred_resolution"] = args.resolution
        print(f"🎬 Using resolution: {args.resolution}")

    # Process arguments
    if args.status:
        # Show status and statistics
        stats = downloader.tracker.get_statistics()
        print("🎤 Karaoke Downloader Status")
        print("=" * 40)
        print(f"Total Songs: {stats['total_songs']}")
        print(f"Total Playlists: {stats['total_playlists']}")
        print(f"Downloaded Songs: {stats['downloaded_songs']}")
        print(f"Failed Songs: {stats['failed_songs']}")
        print(f"Partial Downloads: {stats['partial_songs']}")
        print(f"Total Size: {stats['total_size_mb']} MB")
        print(f"Last Updated: {stats['last_updated']}")
        sys.exit(0)

    elif args.cache_info:
        # Show cache information
        cache_info = downloader.tracker.get_cache_info()
        print("💾 Channel Cache Information")
        print("=" * 40)
        print(f"Total Channels: {cache_info['total_channels']}")
        print(f"Total Cached Videos: {cache_info['total_cached_videos']}")
        print(f"Cache Duration: {cache_info['cache_duration_hours']} hours")
        print(f"Last Updated: {cache_info['last_updated']}")
        sys.exit(0)

    elif args.clear_cache:
        # Clear cache
        if args.clear_cache == 'all':
            downloader.tracker.clear_channel_cache()
            print("🧹 Cleared all channel caches")
        else:
            downloader.tracker.clear_channel_cache(args.clear_cache)
            print(f"🧹 Cleared cache for channel: {args.clear_cache}")
        sys.exit(0)

    elif args.cache_duration:
        # Set cache duration
        downloader.tracker.set_cache_duration(args.cache_duration)
        print(f"⏰ Cache duration set to {args.cache_duration} hours")
        sys.exit(0)

    elif args.report:
        # Generate playlist report
        report = downloader.tracker.export_playlist_report(args.report)
        print(report)
        sys.exit(0)

    elif args.cleanup:
        # Clean up orphaned entries
        orphaned = downloader.tracker.cleanup_orphaned_files(downloader.downloads_dir)
        print(f"🧹 Cleaned up {len(orphaned)} orphaned tracking entries")
        sys.exit(0)

    elif args.songlist_status:
        # Show songlist status and statistics
        songlist = downloader._load_songlist()
        tracking = downloader.songlist_tracking

        print("📋 Songlist Download Status")
        print("=" * 40)
        print(f"Total songs in list: {len(songlist)}")
        print(f"Songs downloaded: {len(tracking)}")
        print(f"Songs remaining: {len(songlist) - len(tracking)}")
        print(f"Progress: {(len(tracking) / len(songlist) * 100):.1f}%")

        if tracking:
            print(f"\n📁 Downloaded songs:")
            for key, info in list(tracking.items())[:10]:  # Show first 10
                print(f"   • {info['artist']} - {info['title']} (from {info['channel']})")
            if len(tracking) > 10:
                print(f"   ... and {len(tracking) - 10} more")

        sys.exit(0)

    elif args.file:
        success = downloader.download_from_file(args.file, force_refresh=args.refresh)
    elif args.url:
        success = downloader.download_channel_videos(args.url, force_refresh=args.refresh)
    else:
        parser.print_help()
        sys.exit(1)

    # Force save any pending tracking data before exit
    downloader.tracker.force_save()

    if success:
        print("\n🎤 All downloads completed successfully!")
        sys.exit(0)
    else:
        print("\n⚠️ Some downloads may have failed. Check the logs for details.")
        sys.exit(1)


if __name__ == "__main__":
    main()