KaraokeVideoDownloader/karaoke_downloader/downloader.py

import hashlib
import logging
import os
import re
import sys

import json
import subprocess
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from karaoke_downloader.cache_manager import (
    delete_plan_cache,
    get_download_plan_cache_file,
    load_cached_plan,
    save_plan_cache,
)
from karaoke_downloader.channel_manager import (
    download_from_file,
    reset_channel_downloads,
)
from karaoke_downloader.config_manager import get_config_manager, load_config
from karaoke_downloader.data_path_manager import get_data_path_manager
from karaoke_downloader.download_pipeline import DownloadPipeline
from karaoke_downloader.download_planner import build_download_plan
from karaoke_downloader.error_utils import handle_yt_dlp_error, log_error
from karaoke_downloader.file_utils import ensure_directory_exists, sanitize_filename
from karaoke_downloader.fuzzy_matcher import (
    create_song_key,
    create_video_key,
    get_similarity_function,
    is_exact_match,
    is_fuzzy_match,
)
from karaoke_downloader.id3_utils import add_id3_tags
from karaoke_downloader.channel_parser import ChannelParser
from karaoke_downloader.server_manager import (
    check_and_mark_server_duplicate,
    is_song_marked_as_server_duplicate,
    is_song_on_server,
    load_server_duplicates_tracking,
    load_server_songs,
)
from karaoke_downloader.song_validator import create_song_validator
from karaoke_downloader.songlist_manager import (
    is_songlist_song_downloaded,
    load_songlist,
    load_songlist_tracking,
    mark_songlist_song_downloaded,
    normalize_title,
    save_songlist_tracking,
)
from karaoke_downloader.tracking_manager import FormatType, SongStatus, TrackingManager
from karaoke_downloader.video_downloader import (
    download_video_and_track,
    execute_download_plan,
    is_valid_mp4,
    cleanup_cache,
)
from karaoke_downloader.parallel_downloader import (
    ParallelDownloader,
    DownloadTask,
    create_parallel_downloader,
)
from karaoke_downloader.youtube_utils import get_channel_info, get_playlist_info
from karaoke_downloader.manual_video_manager import is_manual_channel, get_manual_channel_info, get_manual_videos_for_channel

# Constants
DEFAULT_FUZZY_THRESHOLD = 85
DEFAULT_CACHE_EXPIRATION_DAYS = 1
DEFAULT_DISPLAY_LIMIT = 10

DATA_DIR = Path("data")


class KaraokeDownloader:
    def __init__(self):
        # Load configuration
        self.config_manager = get_config_manager()
        self.config = self.config_manager.load_config()

        # Initialize paths
        self.yt_dlp_path = self.config.yt_dlp_path  # Keep as string for command parsing
        self.downloads_dir = Path(self.config.folder_structure.downloads_dir)
        self.logs_dir = Path(self.config.folder_structure.logs_dir)

        # Ensure directories exist
        ensure_directory_exists(self.downloads_dir)
        ensure_directory_exists(self.logs_dir)

        # Initialize tracking
        data_path_manager = get_data_path_manager()
        tracking_file = data_path_manager.get_karaoke_tracking_path()
        cache_dir = data_path_manager.get_channel_cache_dir()
        self.tracker = TrackingManager(
            tracking_file=tracking_file, cache_dir=cache_dir
        )

        # Initialize song validator
        self.song_validator = create_song_validator(self.tracker, self.downloads_dir)

        # Load songlist tracking
        self.songlist_tracking_file = DATA_DIR / "songlist_tracking.json"
        self.songlist_tracking = load_songlist_tracking(
            str(self.songlist_tracking_file)
        )

        # Load server songs for availability checking
        self.server_songs = load_server_songs()

        # Initialize channel parser for title parsing
        self.channel_parser = ChannelParser()

        # Parallel download settings
        self.enable_parallel_downloads = False
        self.parallel_workers = 3

        # Songlist focus mode attributes
        self.songlist_focus_titles = None
        self.songlist_only = False
        self.use_songlist_priority = True

        # Download mode attributes
        self.dry_run = False
        self.download_limit = None
        self.force_download = False
        self.songlist_file_path = str(get_data_path_manager().get_songlist_path())  # Default songlist file path

    def _load_config(self):
        """Load configuration using the config manager."""
        return self.config_manager.load_config()

    def _should_skip_song(
        self,
        artist,
        title,
        channel_name,
        video_id,
        video_title,
        server_songs=None,
        server_duplicates_tracking=None,
    ):
        """
        Check if a song should be skipped using the centralized SongValidator.

        Returns:
            tuple: (should_skip, reason, total_filtered)
        """
        return self.song_validator.should_skip_song(
            artist,
            title,
            channel_name,
            video_id,
            video_title,
            server_songs,
            server_duplicates_tracking,
            self.force_download,
        )

    def _mark_song_failed(self, artist, title, video_id, channel_name, error_message):
        """
        Mark a song as failed in tracking using the SongValidator.
        """
        self.song_validator.mark_song_failed(
            artist, title, video_id, channel_name, error_message
        )

    def _handle_download_failure(
        self, artist, title, video_id, channel_name, error_type, error_details=""
    ):
        """
        Handle download failures using the SongValidator.

        Args:
            artist: Song artist
            title: Song title
            video_id: YouTube video ID
            channel_name: Channel name
            error_type: Type of error (e.g., "yt-dlp failed", "file verification failed")
            error_details: Additional error details
        """
        self.song_validator.handle_download_failure(
            artist, title, video_id, channel_name, error_type, error_details
        )

    def download_channel_videos(
        self,
        url,
        force_refresh=False,
        fuzzy_match=False,
        fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD,
        force_download=False,
        dry_run=False,
    ):
        """Download videos from a channel or playlist URL, respecting songlist-only and limit flags. Supports fuzzy matching."""

        # Check if this is a manual channel
        from karaoke_downloader.manual_video_manager import is_manual_channel, get_manual_channel_info, get_manual_videos_for_channel

        if is_manual_channel(url):
            channel_name, channel_id = get_manual_channel_info(url)
            print(f"\n🎬 Downloading from manual channel: {channel_name} ({url})")

            # Load manual videos
            manual_videos = get_manual_videos_for_channel(channel_name)
            if not manual_videos:
                print("⚠️ No manual videos found. Skipping.")
                return False

            # Convert to the expected format
            available_videos = []
            for video in manual_videos:
                available_videos.append({
                    "title": video.get("title", ""),
                    "id": video.get("id", ""),
                    "url": video.get("url", "")
                })

            print(f"📋 Found {len(available_videos)} manual videos")

            # Process manual videos (skip yt-dlp)
            return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download, dry_run)

        # Regular YouTube channel processing - USE TRACKING MANAGER CACHE
        channel_name, channel_id = get_channel_info(url)
        print(f"\n🎬 Downloading from channel: {channel_name} ({url})")

        # Use tracking manager to get videos (with cache support)
        available_videos = self.tracker.get_channel_video_list(
            url,
            str(self.yt_dlp_path),
            force_refresh=force_refresh
        )

        if not available_videos:
            print("⚠️ No videos found for this channel. Skipping.")
            return False

        print(f"📋 Found {len(available_videos)} videos from channel")

        # Process videos for download
        return self._process_videos_for_download(available_videos, channel_name, force_refresh, fuzzy_match, fuzzy_threshold, force_download, dry_run)

    def download_all_channel_videos(
        self,
        url,
        force_refresh=False,
        force_download=False,
        limit=None,
        dry_run=False,
    ):
        """Download ALL videos from a channel, skipping existing files and songs in songs.json."""

        # Check if this is a manual channel
        from karaoke_downloader.manual_video_manager import is_manual_channel, get_manual_channel_info, get_manual_videos_for_channel

        if is_manual_channel(url):
            channel_name, channel_id = get_manual_channel_info(url)
            print(f"\n🎬 Downloading ALL videos from manual channel: {channel_name} ({url})")

            # Load manual videos
            manual_videos = get_manual_videos_for_channel(channel_name)
            if not manual_videos:
                print("⚠️ No manual videos found. Skipping.")
                return False

            # Convert to the expected format
            available_videos = []
            for video in manual_videos:
                available_videos.append({
                    "title": video.get("title", ""),
                    "id": video.get("id", ""),
                    "url": video.get("url", "")
                })

            print(f"📋 Found {len(available_videos)} manual videos")

            # Process all manual videos (skip songlist filtering)
            return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit, dry_run)

        # Regular YouTube channel processing - USE TRACKING MANAGER CACHE
        channel_name, channel_id = get_channel_info(url)
        print(f"\n🎬 Downloading ALL videos from channel: {channel_name} ({url})")

        # Use tracking manager to get videos (with cache support)
        available_videos = self.tracker.get_channel_video_list(
            url,
            str(self.yt_dlp_path),
            force_refresh=force_refresh
        )

        if not available_videos:
            print("⚠️ No videos found for this channel. Skipping.")
            return False

        print(f"📋 Found {len(available_videos)} videos from channel")

        # Process all videos for download (skip songlist filtering)
        return self._process_all_videos_for_download(available_videos, channel_name, force_refresh, force_download, limit, dry_run)

    def _process_all_videos_for_download(self, available_videos, channel_name, force_refresh=False, force_download=False, limit=None, dry_run=False):
        """Process ALL videos for download (no songlist filtering)."""
        print(f"🔍 Processing {len(available_videos)} videos for download (no songlist filtering)...")

        # Load server songs and duplicates tracking for availability checking
        server_songs = load_server_songs()
        server_duplicates_tracking = load_server_duplicates_tracking()

        # Load songlist to check if songs are already in songs.json
        songlist = load_songlist(self.songlist_file_path)
        songlist_artists_titles = set()
        if songlist:
            for song in songlist:
                song_key = f"{song['artist']} - {song['title']}"
                songlist_artists_titles.add(song_key)

        # Process all videos, skipping existing files and songs in songs.json
        videos_to_download = []
        skipped_count = 0

        for video in available_videos:
            video_title = video["title"]
            video_id = video["id"]

            # Extract artist and title
            artist, extracted_title = self.channel_parser.extract_artist_title(video_title, channel_name)
            if not artist and not extracted_title:
                # Fallback: use the full title
                artist = ""
                extracted_title = video_title

            # Create filename
            filename = f"{artist} - {extracted_title}.mp4" if artist and extracted_title else f"{video_title}.mp4"
            output_path = self.downloads_dir / channel_name / filename

            # Check if file already exists
            if output_path.exists() and not force_download:
                print(f"   ⏭️  Skipping existing file: {filename}")
                skipped_count += 1
                continue

            # Check if song is already in songs.json
            song_key = f"{artist} - {extracted_title}"
            if song_key in songlist_artists_titles and not force_download:
                # Check if there's already an MP4 version in songs.json
                if song_key in server_songs:
                    song_info = server_songs[song_key]
                    if song_info.get("is_mp4", False):
                        print(f"   ⏭️  Skipping song in songs.json (MP4): {song_key}")
                        skipped_count += 1
                        continue
                    else:
                        print(f"   📥 Found MP3 version in songs.json, will download MP4: {song_key}")
                        # Continue to download the MP4 version
                else:
                    print(f"   📥 Found song in songlist, will download MP4: {song_key}")
                    # Continue to download the MP4 version

            # Check if already downloaded (tracking)
            if self.tracker.is_song_downloaded(artist, extracted_title, channel_name, video_id) and not force_download:
                print(f"   ⏭️  Skipping already tracked: {song_key}")
                skipped_count += 1
                continue

            # Check if on server
            if not force_download and check_and_mark_server_duplicate(
                server_songs,
                server_duplicates_tracking,
                artist,
                extracted_title,
                video_title,
                channel_name,
            ):
                print(f"   ⏭️  Skipping server duplicate: {song_key}")
                skipped_count += 1
                continue

            # Add to download list
            videos_to_download.append((video, artist, extracted_title, filename))

        print(f"📊 Found {len(videos_to_download)} videos to download (skipped {skipped_count})")

        # Apply limit if specified
        if limit is not None:
            original_count = len(videos_to_download)
            videos_to_download = videos_to_download[:limit]
            print(f"🎯 Limited to first {limit} videos (was {original_count} total)")

        if not videos_to_download:
            print("🎵 No new videos to download.")
            return True

        # Create download plan for caching and resuming
        download_plan = []
        for video, artist, title, filename in videos_to_download:
            download_plan.append({
                "video_id": video["id"],
                "artist": artist,
                "title": title,
                "filename": filename,
                "channel_name": channel_name,
                "video_title": video.get("title", ""),
                "force_download": force_download
            })

        # Cache the download plan for resuming
        import hashlib
        from karaoke_downloader.cache_manager import get_download_plan_cache_file, save_plan_cache, load_cached_plan

        plan_kwargs = {
            "channel": channel_name,
            "total_videos": len(available_videos),
            "force_download": force_download,
        }
        if limit:
            plan_kwargs["limit"] = limit

        cache_file = get_download_plan_cache_file("all_videos", **plan_kwargs)

        # Check for existing cache
        cached_plan, _ = load_cached_plan(cache_file)
        if cached_plan and not force_refresh:
            print(f"\n📋 Found existing download plan cache: {cache_file.name}")
            print(f"   🎬 Cached videos to download: {len(cached_plan)}")
            download_plan = cached_plan
        else:
            # Create new download plan
            download_plan = []
            for video, artist, title, filename in videos_to_download:
                download_plan.append({
                    "video_id": video["id"],
                    "artist": artist,
                    "title": title,
                    "filename": filename,
                    "channel_name": channel_name,
                    "video_title": video.get("title", ""),
                    "force_download": force_download
                })

            # Save the new plan to cache
            save_plan_cache(cache_file, download_plan, [])  # No unmatched for all-videos mode

        # Show download plan summary
        print(f"\n📋 Download Plan Summary:")
        print(f"   📺 Channel: {channel_name}")
        print(f"   🎬 Total videos to download: {len(videos_to_download)}")
        print(f"   ⏭️  Videos skipped: {skipped_count}")
        if limit:
            print(f"   🎯 Limit applied: {limit} videos")
        print(f"   📁 Output directory: downloads/{channel_name}/")
        print(f"   💾 Download plan cached to: {cache_file.name}")

        # Use unified download workflow
        downloaded_count, success = self.execute_unified_download_workflow(
            download_plan=download_plan,
            cache_file=cache_file,
            limit=limit,
            show_progress=True,
            dry_run=dry_run,
        )

        return success

    def download_songlist_across_channels(
        self,
        channel_urls,
        limit=None,
        force_refresh_download_plan=False,
        fuzzy_match=False,
        fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD,
        force_download=False,
        show_pagination=False,
        parallel_channels=False,
        max_channel_workers=3,
        dry_run=False,
    ):
        """
        Download songs from the songlist across multiple channels.
        """
        # Set force download flag
        self.force_download = force_download

        # Apply songlist focus filtering if specified
        if self.songlist_focus_titles:
            # Load the raw songlist data to filter by playlist titles
            songlist_file = Path(self.songlist_file_path)
            if not songlist_file.exists():
                print(f"⚠️ Songlist file not found: {self.songlist_file_path}")
                return False

            try:
                with open(songlist_file, "r", encoding="utf-8") as f:
                    raw_data = json.load(f)

                # Filter playlists by title
                focused_playlists = []
                print(f"🔍 Looking for playlists: {self.songlist_focus_titles}")
                print(f"🔍 Available playlists in songList.json:")
                for i, playlist in enumerate(raw_data[:5]):  # Show first 5 playlists
                    print(f"   {i+1}. '{playlist.get('title', 'NO TITLE')}'")
                if len(raw_data) > 5:
                    print(f"   ... and {len(raw_data) - 5} more playlists")

                for playlist in raw_data:
                    playlist_title = playlist.get("title", "")
                    if playlist_title in self.songlist_focus_titles:
                        focused_playlists.append(playlist)
                        print(f"✅ Found matching playlist: '{playlist_title}'")

                if not focused_playlists:
                    print(
                        f"⚠️ No playlists found matching the specified titles: {', '.join(self.songlist_focus_titles)}"
                    )
                    return False

                # Flatten the focused playlists into songs
                focused_songs = []
                seen = set()
                for playlist in focused_playlists:
                    if "songs" in playlist:
                        for song in playlist["songs"]:
                            if "artist" in song and "title" in song:
                                artist = song["artist"].strip()
                                title = song["title"].strip()
                                key = f"{artist.lower()}_{title.lower()}"
                                if key in seen:
                                    continue
                                seen.add(key)
                                focused_songs.append(
                                    {
                                        "artist": artist,
                                        "title": title,
                                        "position": song.get("position", 0),
                                    }
                                )

                songlist = focused_songs
                print(
                    f"\n🎯 Songlist focus mode: {len(focused_songs)} songs from {len(focused_playlists)} playlists selected"
                )
                print(f"🎯 Focused playlists: {', '.join(self.songlist_focus_titles)}")

            except (json.JSONDecodeError, FileNotFoundError) as e:
                print(f"⚠️ Could not load songlist for filtering from {self.songlist_file_path}: {e}")
                return False
        else:
            # Load songlist normally (flattened from all playlists)
            songlist = load_songlist(self.songlist_file_path)
            if not songlist:
                print("⚠️ No songlist loaded. Skipping.")
                return False

        # Filter for songs not yet downloaded
        if self.force_download:
            undownloaded = songlist
            print(f"\n🎯 {len(songlist)} total unique songs in songlist.")
            print(f"💪 Force mode enabled - will download all songs regardless of existing status")
        else:
            undownloaded = [
                s
                for s in songlist
                if not is_songlist_song_downloaded(
                    self.songlist_tracking, s["artist"], s["title"]
                )
            ]
            print(f"\n🎯 {len(songlist)} total unique songs in songlist.")
            print(f"\n🎯 {len(undownloaded)} unique songlist songs to download.")

        # Load server songs and duplicates tracking for availability checking
        server_songs = load_server_songs()
        server_duplicates_tracking = load_server_duplicates_tracking()

        # Initialize counters for logging
        server_available_mp4 = 0
        server_available_other = 0
        marked_duplicates = 0

        # Further filter out songs already on server or marked as duplicates
        if self.force_download:
            not_on_server = undownloaded
            print(f"💪 Force mode enabled - will download all songs regardless of server status")
        else:
            not_on_server = []

            for song in undownloaded:
                artist, title = song["artist"], song["title"]

                # Check if already marked as server duplicate
                if is_song_marked_as_server_duplicate(
                    server_duplicates_tracking, artist, title
                ):
                    marked_duplicates += 1
                    continue

                # Check if already on server and mark for future skipping
                if check_and_mark_server_duplicate(
                    server_songs,
                    server_duplicates_tracking,
                    artist,
                    title,
                    f"{artist} - {title}",
                    "songlist",
                ):
                    server_available_mp4 += 1
                    continue

                # Check if song is on server but in different format (MP3/CDG)
                key = f"{artist.lower()}_{normalize_title(title)}"
                if key in server_songs:
                    song_info = server_songs[key]
                    if song_info.get("is_mp3", False) or song_info.get("is_cdg", False):
                        server_available_other += 1
                        print(f"🎵 Found {artist} - {title} on server as {song_info.get('is_mp3', False) and 'MP3' or 'CDG'} format, will download video version")

                not_on_server.append(song)

        # Apply limit to undownloaded list before logging
        # This ensures that only the specified number of songs are processed and logged,
        # providing accurate counts when using --limit
        if limit is not None:
            original_count = len(not_on_server)
            not_on_server = not_on_server[:limit]
            print(f"\n🎯 Limited to first {limit} songs (was {original_count} total)")

        undownloaded = not_on_server

        # Now log the counts based on the limited list
        if server_available_mp4 > 0:
            print(
                f"\n🎵 {server_available_mp4} songs already available as MP4 on server, skipping."
            )
        if server_available_other > 0:
            # Only count songs that are in the limited list
            limited_server_other = sum(1 for song in not_on_server
                                     if f"{song['artist'].lower()}_{normalize_title(song['title'])}" in server_songs)
            if limited_server_other > 0:
                print(
                    f"\n🎵 {limited_server_other} songs found on server as MP3/CDG, will download video versions."
                )
        if marked_duplicates > 0:
            print(
                f"\n🏷️ {marked_duplicates} songs previously marked as server duplicates, skipping."
            )

        print(f"\n🎯 {len(undownloaded)} songs need to be downloaded.")
        if not undownloaded:
            print("🎵 All songlist songs already downloaded.")
            return True

        # --- Download plan building (same for both normal and focus modes) ---
        # --- Download plan cache logic ---
        plan_mode = "songlist"
        # Include only parameters that affect the plan generation (exclude limit since it only affects execution)
        plan_kwargs = {
            "channels": len(channel_urls),
            "fuzzy": fuzzy_match,
            "threshold": fuzzy_threshold,
        }
        # Add channel URLs hash to ensure same channels = same cache
        channels_hash = hashlib.md5(
            "|".join(sorted(channel_urls)).encode()
        ).hexdigest()[:8]
        plan_kwargs["channels_hash"] = channels_hash
        cache_file = get_download_plan_cache_file(plan_mode, **plan_kwargs)
        use_cache = False
        download_plan, unmatched = load_cached_plan(cache_file)
        if (
            not force_refresh_download_plan
            and download_plan is not None
            and unmatched is not None
        ):
            use_cache = True
            print(f"\n📋 Using cached download plan from: {cache_file}")
        if not use_cache:
            print(f"\n🔍 Pre-scanning {len(channel_urls)} channels for matches...")
            print(f"🔍 Scanning {len(undownloaded)} songs against all channels...")
            download_plan, unmatched = build_download_plan(
                channel_urls,
                undownloaded,
                self.tracker,
                self.yt_dlp_path,
                fuzzy_match=fuzzy_match,
                fuzzy_threshold=fuzzy_threshold,
                show_pagination=show_pagination,
                parallel_channels=parallel_channels,
                max_channel_workers=max_channel_workers,
            )
            save_plan_cache(cache_file, download_plan, unmatched)
            print(f"💾 Download plan cached to: {cache_file}")
        print(
            f"\n📊 Download plan ready: {len(download_plan)} songs will be downloaded."
        )
        print(f"❌ {len(unmatched)} songs could not be found in any channel.")
        if unmatched:
            print("Unmatched songs:")
            for song in unmatched[:DEFAULT_DISPLAY_LIMIT]:
                print(f"   - {song['artist']} - {song['title']}")
            if len(unmatched) > DEFAULT_DISPLAY_LIMIT:
                print(f"   ...and {len(unmatched)-DEFAULT_DISPLAY_LIMIT} more.")

        # --- Download phase ---
        downloaded_count, success = self.execute_unified_download_workflow(
            download_plan=download_plan,
            cache_file=cache_file,
            limit=limit,
            dry_run=dry_run,
        )
        return success

    def download_latest_per_channel(
        self,
        channel_urls,
        limit=None,
        force_refresh_download_plan=False,
        fuzzy_match=False,
        fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD,
        force_download=False,
        dry_run=False,
    ):
        """
        Download the latest N videos from each channel.

        Args:
            channel_urls: List of channel URLs to process
            limit: Number of latest videos to download from each channel
            force_refresh_download_plan: Force refresh the download plan cache
            fuzzy_match: Whether to use fuzzy matching
            fuzzy_threshold: Threshold for fuzzy matching
            force_download: Force download regardless of existing files

        Returns:
            bool: True if successful, False otherwise
        """
        print(f"\n🎬 Downloading latest {limit} videos from {len(channel_urls)} channels")

        # Build download plan for latest videos from each channel
        download_plan = []
        total_videos_found = 0

        for i, channel_url in enumerate(channel_urls, 1):
            print(f"\n🚦 Processing channel {i}/{len(channel_urls)}: {channel_url}")

            # Get channel info
            channel_name, channel_id = get_channel_info(channel_url)
            print(f"   ✅ Channel: {channel_name}")

            # Get videos from channel
            available_videos = self.tracker.get_channel_video_list(
                channel_url,
                str(self.yt_dlp_path),
                force_refresh=False
            )

            if not available_videos:
                print(f"   ⚠️  No videos found for {channel_name}")
                continue

            print(f"   📊 Found {len(available_videos)} videos")

            # Take the latest N videos (they're already sorted by date)
            latest_videos = available_videos[:limit] if limit else available_videos
            print(f"   🎯 Processing latest {len(latest_videos)} videos")

            # Process each video
            for video in latest_videos:
                video_title = video["title"]
                video_id = video["id"]

                # Extract artist and title
                artist, extracted_title = self.channel_parser.extract_artist_title(video_title, channel_name)
                if not artist and not extracted_title:
                    # Fallback: use the full title
                    artist = ""
                    extracted_title = video_title

                # Create filename
                filename = sanitize_filename(artist, extracted_title)

                # Add to download plan
                download_plan.append({
                    "video_id": video_id,
                    "artist": artist,
                    "title": extracted_title,
                    "filename": filename,
                    "channel_name": channel_name,
                    "video_title": video_title,
                    "force_download": force_download
                })

                total_videos_found += 1

        print(f"\n📋 Download plan created: {total_videos_found} videos from {len(channel_urls)} channels")

        if not download_plan:
            print("❌ No videos to download")
            return False

        # Create cache file for progress tracking
        import hashlib
        from karaoke_downloader.cache_manager import get_download_plan_cache_file, save_plan_cache

        plan_kwargs = {
            "channels": len(channel_urls),
            "limit_per_channel": limit,
            "force_download": force_download,
        }

        # Add channel URLs hash to ensure same channels = same cache
        channels_hash = hashlib.md5(
            "|".join(sorted(channel_urls)).encode()
        ).hexdigest()[:8]
        plan_kwargs["channels_hash"] = channels_hash

        cache_file = get_download_plan_cache_file("latest_per_channel", **plan_kwargs)

        # Save the plan to cache
        save_plan_cache(cache_file, download_plan, [])  # No unmatched for latest-per-channel mode

        print(f"💾 Download plan cached to: {cache_file.name}")

        # Use unified download workflow
        downloaded_count, success = self.execute_unified_download_workflow(
            download_plan=download_plan,
            cache_file=cache_file,
            limit=None,  # Limit already applied during plan building
            show_progress=True,
            dry_run=dry_run,
        )

        return success

    def _process_videos_for_download(self, available_videos, channel_name, force_refresh=False, fuzzy_match=False, fuzzy_threshold=DEFAULT_FUZZY_THRESHOLD, force_download=False, dry_run=False):
        """Process videos for download (used for both manual and regular channels)."""
        songlist = load_songlist(self.songlist_file_path)

        # For manual videos with force_download, bypass songlist filtering
        if force_download and channel_name == "@ManualVideos":
            print(f"💪 Force mode enabled - downloading all {len(available_videos)} manual videos")

            # Download all videos directly without songlist filtering
            success_count = 0
            for i, video in enumerate(available_videos, 1):
                title = video["title"]
                video_id = video["id"]

                print(f"\n⬇️  Downloading {i}/{len(available_videos)}: {title}")
                print(f"   🎬 Video: {title} ({channel_name})")

                # Create filename from title
                artist, extracted_title = self.channel_parser.extract_artist_title(title, channel_name)
                if not artist and not extracted_title:
                    # Fallback: use the full title as filename
                    filename = sanitize_filename("", title)
                else:
                    filename = sanitize_filename(artist, extracted_title)

                # Download the video
                success = self._download_single_video(video, channel_name, filename, force_download=True)
                if success:
                    success_count += 1

            print(f"\n✅ Downloaded {success_count}/{len(available_videos)} manual videos")
            return success_count > 0

        # Regular songlist-based processing
        if not songlist:
            print("⚠️ No songlist loaded. Skipping.")
            return False

        # Load server songs and duplicates tracking for availability checking
        server_songs = load_server_songs()
        server_duplicates_tracking = load_server_duplicates_tracking()

        limit = getattr(self.config, "limit", 1)

        # Normalize songlist for matching
        normalized_songlist = {
            create_song_key(s["artist"], s["title"]): s for s in songlist
        }
        matches = []
        similarity = get_similarity_function()

        print(f"🔍 Scanning {len(available_videos)} videos for songlist matches...")

        for video in available_videos:
            title = video["title"]
            video_id = video["id"]

            # Extract artist and title using channel parser
            artist, extracted_title = self.channel_parser.extract_artist_title(title, channel_name)

            if not artist and not extracted_title:
                continue

            song_key = create_song_key(artist, extracted_title)

            # Check for exact matches first
            if song_key in normalized_songlist:
                song_data = normalized_songlist[song_key]
                matches.append({
                    "video": video,
                    "song": song_data,
                    "match_type": "exact",
                    "match_score": 100.0,
                    "artist": artist,
                    "title": extracted_title
                })
                print(f"   ✅ Exact match: {artist} - {extracted_title}")
                continue

            # Check for fuzzy matches if enabled
            if fuzzy_match:
                best_match = None
                best_score = 0

                for song_key, song_data in normalized_songlist.items():
                    score = similarity(f"{artist} {extracted_title}", f"{song_data['artist']} {song_data['title']}")
                    if score > best_score and score >= fuzzy_threshold:
                        best_score = score
                        best_match = song_data

                if best_match:
                    matches.append({
                        "video": video,
                        "song": best_match,
                        "match_type": "fuzzy",
                        "match_score": best_score,
                        "artist": artist,
                        "title": extracted_title
                    })
                    print(f"   🎯 Fuzzy match ({best_score:.1f}%): {artist} - {extracted_title} -> {best_match['artist']} - {best_match['title']}")

        print(f"📊 Found {len(matches)} matches out of {len(available_videos)} videos")

        if not matches:
            print("❌ No matches found in songlist")
            return False

        # Sort matches by score (exact matches first, then by fuzzy score)
        matches.sort(key=lambda x: (x["match_type"] != "exact", -x["match_score"]))

        # Limit downloads
        if limit:
            matches = matches[:limit]
            print(f"🎯 Limiting to {len(matches)} downloads")

        # Convert matches to a download plan
        download_plan = []
        for match in matches:
            video = match["video"]
            song = match["song"]
            artist = match["artist"]
            title = match["title"]
            video_id = video["id"]

            # Create filename
            filename = sanitize_filename(artist, title)
            output_path = self.downloads_dir / channel_name / filename

            # Add to download plan
            download_plan.append({
                "video_id": video_id,
                "artist": artist,
                "title": title,
                "filename": filename,
                "channel_name": channel_name,
                "video_title": video["title"],
                "force_download": force_download
            })

        # Use the unified download workflow
        downloaded_count, success = self.execute_unified_download_workflow(
            download_plan=download_plan,
            cache_file=None, # No specific cache file for this mode
            limit=limit,
            show_progress=True,
            dry_run=dry_run,
        )

        return success

    def _download_single_video(self, video, channel_name, filename, force_download=False):
        """Download a single video using the download pipeline."""
        video_id = video["id"]
        video_title = video["title"]

        # Extract artist and title for tracking
        artist, extracted_title = self.channel_parser.extract_artist_title(video_title, channel_name)
        if not artist and not extracted_title:
            # Fallback: use the full title
            artist = ""
            extracted_title = video_title

        output_path = self.downloads_dir / channel_name / filename

        # Use the download pipeline
        pipeline = DownloadPipeline(
            yt_dlp_path=str(self.yt_dlp_path),
            config=self.config,
            downloads_dir=self.downloads_dir,
            songlist_tracking=self.songlist_tracking,
            tracker=self.tracker,
        )

        success = pipeline.execute_pipeline(
            video_id=video_id,
            artist=artist,
            title=extracted_title,
            channel_name=channel_name,
            video_title=video_title
        )

        if success:
            print(f"✅ Successfully downloaded: {video_title}")
        else:
            print(f"❌ Failed to download: {video_title}")

        return success

    def execute_unified_download_workflow(
        self,
        download_plan,
        cache_file=None,
        limit=None,
        show_progress=True,
        dry_run=False,
    ):
        """
        Unified download workflow that all download modes use.

        Args:
            download_plan: List of download items with video_id, artist, title, channel_name, video_title
            cache_file: Optional cache file for progress tracking
            limit: Optional limit on number of downloads
            show_progress: Whether to show progress information
            dry_run: If True, only show the plan without downloading

        Returns:
            tuple: (downloaded_count, success)
        """
        if not download_plan:
            print("📋 No videos to download in plan")
            return 0, True

        total_to_download = len(download_plan)
        if limit:
            total_to_download = min(limit, total_to_download)
            download_plan = download_plan[:limit]

        if show_progress:
            print(f"\n🎬 Starting downloads: {total_to_download} videos")
            print(f"   📁 Output directory: downloads/")
            if cache_file:
                print(f"   💾 Progress tracking: {cache_file.name}")

        # Handle dry-run mode
        if dry_run:
            print(f"\n🔍 DRY RUN MODE - No downloads will be performed")
            print(f"📋 Download plan preview:")
            print(f"   📊 Total videos in plan: {len(download_plan)}")
            print(f"   📁 Output directory: downloads/")

            # Show first few items as preview
            preview_count = min(5, len(download_plan))
            print(f"\n📋 Preview of first {preview_count} videos:")
            for i, item in enumerate(download_plan[:preview_count], 1):
                print(f"   {i:2d}. {item['artist']} - {item['title']} ({item['channel_name']})")

            if len(download_plan) > preview_count:
                print(f"   ... and {len(download_plan) - preview_count} more videos")

            print(f"\n✅ Dry run completed - {len(download_plan)} videos would be downloaded")
            return len(download_plan), True

        # Choose execution method based on parallel settings
        if self.enable_parallel_downloads:
            return self._execute_parallel_downloads(download_plan, cache_file, show_progress)
        else:
            return self._execute_sequential_downloads(download_plan, cache_file, show_progress)

    def _execute_sequential_downloads(self, download_plan, cache_file, show_progress):
        """Execute downloads sequentially using the download pipeline."""
        success_count = 0
        total_to_download = len(download_plan)

        # Create download pipeline
        pipeline = DownloadPipeline(
            yt_dlp_path=str(self.yt_dlp_path),
            config=self.config,
            downloads_dir=self.downloads_dir,
            songlist_tracking=self.songlist_tracking,
            tracker=self.tracker,
        )

        for i, plan_item in enumerate(download_plan, 1):
            if show_progress:
                print(f"\n⬇️  Downloading {i}/{total_to_download}: {plan_item['artist']} - {plan_item['title']}")
                print(f"   🎬 Video: {plan_item['video_title']} ({plan_item['channel_name']})")

            success = pipeline.execute_pipeline(
                video_id=plan_item["video_id"],
                artist=plan_item["artist"],
                title=plan_item["title"],
                channel_name=plan_item["channel_name"],
                video_title=plan_item["video_title"],
            )

            if success:
                success_count += 1
                if show_progress:
                    print(f"✅ Successfully downloaded: {plan_item['artist']} - {plan_item['title']}")
            else:
                if show_progress:
                    print(f"❌ Failed to download: {plan_item['artist']} - {plan_item['title']}")

            # Update cache if provided
            if cache_file:
                # Remove completed item from plan and update cache
                download_plan.remove(plan_item)
                from karaoke_downloader.cache_manager import save_plan_cache
                save_plan_cache(cache_file, download_plan, [])  # No unmatched for unified workflow

                if not download_plan:  # All downloads completed
                    from karaoke_downloader.cache_manager import delete_plan_cache
                    delete_plan_cache(cache_file)
                    if show_progress:
                        print("🗑️  All downloads completed, deleted download plan cache.")

        if show_progress:
            print(f"\n🎉 Download complete! {success_count}/{total_to_download} videos downloaded successfully")

        return success_count, success_count > 0

    def _execute_parallel_downloads(self, download_plan, cache_file, show_progress):
        """Execute downloads in parallel using the parallel downloader."""
        from karaoke_downloader.parallel_downloader import create_parallel_downloader

        # Create parallel downloader
        parallel_downloader = create_parallel_downloader(
            yt_dlp_path=str(self.yt_dlp_path),
            config=self.config,
            downloads_dir=self.downloads_dir,
            max_workers=self.parallel_workers,
            songlist_tracking=self.songlist_tracking,
            tracker=self.tracker,
        )

        # Convert download plan to tasks
        tasks = []
        for item in download_plan:
            from karaoke_downloader.parallel_downloader import DownloadTask
            task = DownloadTask(
                video_id=item["video_id"],
                artist=item["artist"],
                title=item["title"],
                channel_name=item["channel_name"],
                video_title=item["video_title"],
            )
            tasks.append(task)

        # Add tasks to the downloader queue
        parallel_downloader.add_download_tasks(tasks)

        # Execute parallel downloads
        results = parallel_downloader.execute_downloads(show_progress=show_progress)

        # Count successes
        success_count = sum(1 for result in results if result.success)
        total_to_download = len(tasks)

        if show_progress:
            print(f"\n🎉 Parallel download complete! {success_count}/{total_to_download} videos downloaded successfully")

        return success_count, success_count > 0


def reset_songlist_all():
    """Delete all files tracked in songlist_tracking.json, clear songlist_tracking.json, and remove songlist songs from karaoke_tracking.json."""
    import json
    from pathlib import Path

    # Load songlist tracking
            data_path_manager = get_data_path_manager()
        songlist_tracking_file = data_path_manager.get_songlist_tracking_path()
        karaoke_tracking_file = data_path_manager.get_karaoke_tracking_path()
    if songlist_tracking_file.exists():
        with open(songlist_tracking_file, "r", encoding="utf-8") as f:
            tracking = json.load(f)
    else:
        tracking = {}
    # Delete all files tracked
    for entry in tracking.values():
        file_path = entry.get("file_path")
        if file_path:
            p = Path(file_path)
            try:
                if p.exists():
                    p.unlink()
                    print(f"🗑️ Deleted: {p}")
            except Exception as e:
                print(f"⚠️ Could not delete {p}: {e}")
    # Clear songlist_tracking.json
    songlist_tracking_file.write_text("{}", encoding="utf-8")
    print("🧹 Cleared songlist_tracking.json")
    # Remove songlist songs from karaoke_tracking.json
    if karaoke_tracking_file.exists():
        with open(karaoke_tracking_file, "r", encoding="utf-8") as f:
            karaoke_data = json.load(f)
        song_keys_to_remove = []
        for song_id, song in karaoke_data.get("songs", {}).items():
            artist = song.get("artist", "")
            title = song.get("title", song.get("name", ""))
            key = f"{artist.lower()}_{normalize_title(title)}"
            if key in tracking:
                song_keys_to_remove.append(song_id)
        for song_id in song_keys_to_remove:
            del karaoke_data["songs"][song_id]
        with open(karaoke_tracking_file, "w", encoding="utf-8") as f:
            json.dump(karaoke_data, f, indent=2, ensure_ascii=False)
        print(
            f"🧹 Removed {len(song_keys_to_remove)} songlist songs from karaoke_tracking.json"
        )
    print("✅ Global songlist reset complete.")

    # For brevity, the rest of the class methods should be copied here from the original download_karaoke.py,
    # updating all references to use the new karaoke_downloader.* imports as needed.