Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>

2025-07-24 10:00:09 -05:00 · 2025-07-24 10:00:09 -05:00 · 6a2eb2bac5
commit 6a2eb2bac5
parent 997c294c89
5 changed files with 55827 additions and 21 deletions
--- a/data/channel_cache.json
+++ b/data/channel_cache.json
--- a/data/config.json
+++ b/data/config.json
@ -1,22 +1,26 @@
 {
  "download_settings":  {
-    "format": "best[height<=720][ext=mp4]/best[height<=720]/best[ext=mp4]/best",
-    "preferred_resolution": "720p",
-    "audio_format": "mp3",
-    "audio_quality": "0",
+    "format": "bestvideo[height=720][ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]",
+    "outtmpl": "%(title)s_720p.%(ext)s",
+    "merge_output_format": "mp4",
+    "noplaylist": true,
+    "postprocessors": [{
+        "key": "FFmpegExtractAudio",
+        "preferredcodec": "mp3",
+        "preferredquality": "0"
+    }],
+    "writesubtitles": false,
    "subtitle_language": "en",
    "subtitle_format": "srt",
-    "write_metadata": false,
-    "write_thumbnail": false,
-    "write_description": false,
-    "write_annotations": false,
-    "write_comments": false,
-    "write_subtitles": false,
+    "writedescription": false,
+    "writeannotations": false,
+    "writeinfojson": false,
+    "writecomments": false,
+    "writethumbnail": false,
    "embed_metadata": false,
-    "add_metadata": false,
-    "continue_downloads": true,
-    "no_overwrites": true,
-    "ignore_errors": true,
+    "continuedl": true,
+    "nooverwrites": true,
+    "ignoreerrors": true,
    "no_warnings": false
 },
  "folder_structure": {
--- a/karaoke_downloader/cli.py
+++ b/karaoke_downloader/cli.py
@ -33,6 +33,7 @@ Examples:
    parser.add_argument('--songlist-status', action='store_true', help='Show songlist download status and statistics')
    parser.add_argument('--reset-channel', metavar='CHANNEL_NAME', help='Reset all tracking and files for a channel')
    parser.add_argument('--reset-songlist', action='store_true', help='When used with --reset-channel, also reset songlist songs for this channel')
+    parser.add_argument('--reset-songlist-all', action='store_true', help='Reset all songlist tracking and delete all songlist-downloaded files (global)')
    parser.add_argument('--version', '-v', action='version', version='Karaoke Playlist Downloader v1.0')
    args = parser.parse_args()

@ -78,6 +79,23 @@ Examples:
        sys.exit(0)
    # --- END NEW ---

+    # --- NEW: If no URL or file is provided, but --songlist-only is set, use all channels in data/channels.txt ---
+    if args.songlist_only and not args.url and not args.file:
+        channels_file = Path("data/channels.txt")
+        if channels_file.exists():
+            args.file = str(channels_file)
+            print("📋 No URL or --file provided, defaulting to all channels in data/channels.txt for songlist-only mode.")
+        else:
+            print("❌ No URL, --file, or data/channels.txt found. Please provide a channel URL or a file with channel URLs.")
+            sys.exit(1)
+    # --- END NEW ---
+
+    if args.reset_songlist_all:
+        from karaoke_downloader.downloader import reset_songlist_all
+        reset_songlist_all()
+        print('✅ All songlist tracking and files have been reset.')
+        sys.exit(0)
+
    if args.status:
        stats = downloader.tracker.get_statistics()
        print("🎤 Karaoke Downloader Status")
@ -135,8 +153,12 @@ Examples:
            if len(tracking) > 10:
                print(f"   ... and {len(tracking) - 10} more")
        sys.exit(0)
-    elif args.file:
-        success = downloader.download_from_file(args.file, force_refresh=args.refresh)
+    elif args.songlist_only and args.file:
+        # Read all channel URLs from file
+        with open(args.file, "r", encoding="utf-8") as f:
+            channel_urls = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")]
+        limit = args.limit if args.limit else None
+        success = downloader.download_songlist_across_channels(channel_urls, limit=limit)
    elif args.url:
        success = downloader.download_channel_videos(args.url, force_refresh=args.refresh)
    else:
--- a/karaoke_downloader/downloader.py
+++ b/karaoke_downloader/downloader.py
@ -141,7 +141,245 @@ class KaraokeDownloader:
        if not reset_songlist:
            print(f"   Songlist songs were preserved.")

-    # ... (rest of the KaraokeDownloader methods, updated to use DATA_DIR for all data file paths) ...
+    def download_from_file(self, file_path, force_refresh=False):
+        file = Path(file_path)
+        if not file.exists():
+            print(f"❌ File not found: {file_path}")
+            return False
+        with open(file, "r", encoding="utf-8") as f:
+            urls = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")]
+        if not urls:
+            print(f"❌ No URLs found in {file_path}")
+            return False
+        all_success = True
+        for url in urls:
+            print(f"\n➡️  Processing: {url}")
+            success = self.download_channel_videos(url, force_refresh=force_refresh)
+            if not success:
+                all_success = False
+        return all_success
+
+    def download_channel_videos(self, url, force_refresh=False):
+        """
+        Download videos from a channel or playlist URL, respecting songlist-only and limit flags.
+        Only download the first N matches from the songlist (N = self.config.get('limit', 1)).
+        """
+        channel_name, channel_id = get_channel_info(url)
+        print(f"\n🎬 Downloading from channel: {channel_name} ({url})")
+        songlist = load_songlist()
+        if not songlist:
+            print("⚠️ No songlist loaded. Skipping.")
+            return False
+        # Get limit from config or default to 1
+        limit = self.config.get('limit', 1)
+        # --- Get channel video list with yt-dlp --flat-playlist ---
+        cmd = [
+            str(self.yt_dlp_path),
+            '--flat-playlist',
+            '--print', '%(title)s|%(id)s|%(url)s',
+            url
+        ]
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            lines = result.stdout.strip().splitlines()
+        except subprocess.CalledProcessError as e:
+            print(f"❌ yt-dlp failed to fetch playlist: {e}")
+            return False
+        # Build a list of available videos
+        available_videos = []
+        for line in lines:
+            parts = line.split('|')
+            if len(parts) >= 2:
+                title, video_id = parts[0].strip(), parts[1].strip()
+                available_videos.append({'title': title, 'id': video_id})
+        # Normalize songlist for matching
+        normalized_songlist = {
+            f"{s['artist'].lower()}_{normalize_title(s['title'])}": s for s in songlist
+        }
+        # Find matches
+        matches = []
+        for video in available_videos:
+            artist, title = extract_artist_title(video['title'])
+            key = f"{artist.lower()}_{normalize_title(title)}"
+            if key in normalized_songlist:
+                # Check if already downloaded
+                if not is_songlist_song_downloaded(self.songlist_tracking, artist, title):
+                    matches.append((video, normalized_songlist[key]))
+            if len(matches) >= limit:
+                break
+        if not matches:
+            print("🎵 No new songlist matches found for this channel.")
+            return True
+        # Download only the first N matches
+        for video, song in matches:
+            artist, title = song['artist'], song['title']
+            output_path = self.downloads_dir / channel_name / f"{artist} - {title} (Karaoke Version).mp4"
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+            print(f"⬇️  Downloading: {artist} - {title} -> {output_path}")
+            # Download by video ID
+            video_url = f"https://www.youtube.com/watch?v={video['id']}"
+            cmd = [
+                str(self.yt_dlp_path),
+                "-o", str(output_path),
+                "-f", self.config["download_settings"]["format"],
+                video_url
+            ]
+            try:
+                subprocess.run(cmd, check=True)
+            except subprocess.CalledProcessError as e:
+                print(f"❌ yt-dlp failed: {e}")
+                continue
+            if not output_path.exists() or output_path.stat().st_size == 0:
+                print(f"❌ Download failed or file is empty: {output_path}")
+                continue
+            if not self._is_valid_mp4(output_path):
+                print(f"❌ File is not a valid MP4: {output_path}")
+                continue
+            add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name)
+            mark_songlist_song_downloaded(self.songlist_tracking, artist, title, channel_name, output_path)
+            print(f"✅ Downloaded and tracked: {artist} - {title}")
+            print(f"🎉 All post-processing complete for: {output_path}")
+        return True
+
+    def download_songlist_across_channels(self, channel_urls, limit=None):
+        """
+        For each song in the songlist, try each channel in order and download from the first channel where it is found.
+        Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted.
+        """
+        songlist = load_songlist()
+        if not songlist:
+            print("⚠️ No songlist loaded. Skipping.")
+            return False
+        # Filter for songs not yet downloaded
+        undownloaded = [s for s in songlist if not is_songlist_song_downloaded(self.songlist_tracking, s['artist'], s['title'])]
+        print(f"🎯 {len(undownloaded)} songlist songs to download.")
+        if not undownloaded:
+            print("🎵 All songlist songs already downloaded.")
+            return True
+        print("🔍 Songs to search for:")
+        for song in undownloaded:
+            print(f"   - {song['artist']} - {song['title']}")
+        downloaded_count = 0
+        attempted = set()
+        # Keep looping until limit is reached or no more undownloaded songs
+        while undownloaded and (limit is None or downloaded_count < limit):
+            for song in list(undownloaded):
+                if limit is not None and downloaded_count >= limit:
+                    break
+                artist, title = song['artist'], song['title']
+                if (artist, title) in attempted:
+                    continue
+                found = False
+                for channel_url in channel_urls:
+                    channel_name, channel_id = get_channel_info(channel_url)
+                    print(f"\n🎬 Checking channel: {channel_name} ({channel_url})")
+                    print(f"   Searching for: {artist} - {title}")
+                    available_videos = self.tracker.get_channel_video_list(
+                        channel_url,
+                        yt_dlp_path=str(self.yt_dlp_path),
+                        force_refresh=False
+                    )
+                    for video in available_videos:
+                        v_artist, v_title = extract_artist_title(video['title'])
+                        if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
+                           (normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
+                            # Download this song from this channel
+                            output_path = self.downloads_dir / channel_name / f"{artist} - {title} (Karaoke Version).mp4"
+                            output_path.parent.mkdir(parents=True, exist_ok=True)
+                            print(f"⬇️  Downloading: {artist} - {title} -> {output_path}")
+                            video_url = f"https://www.youtube.com/watch?v={video['id']}"
+                            dlp_cmd = [
+                                str(self.yt_dlp_path),
+                                "-o", str(output_path),
+                                "-f", self.config["download_settings"]["format"],
+                                video_url
+                            ]
+                            try:
+                                subprocess.run(dlp_cmd, check=True)
+                            except subprocess.CalledProcessError as e:
+                                print(f"❌ yt-dlp failed: {e}")
+                                continue
+                            if not output_path.exists() or output_path.stat().st_size == 0:
+                                print(f"❌ Download failed or file is empty: {output_path}")
+                                continue
+                            if not self._is_valid_mp4(output_path):
+                                print(f"❌ File is not a valid MP4: {output_path}")
+                                continue
+                            add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name)
+                            mark_songlist_song_downloaded(self.songlist_tracking, artist, title, channel_name, output_path)
+                            print(f"✅ Downloaded and tracked: {artist} - {title}")
+                            print(f"🎉 All post-processing complete for: {output_path}")
+                            downloaded_count += 1
+                            found = True
+                            break  # Only download from first channel where found
+                    if found:
+                        break
+                attempted.add((artist, title))
+                if found:
+                    undownloaded.remove(song)
+            # If no new downloads in this pass, break to avoid infinite loop
+            if downloaded_count == 0 or (limit is not None and downloaded_count >= limit):
+                break
+        if undownloaded:
+            print(f"⚠️ {len(undownloaded)} songlist songs could not be found in any channel:")
+            for song in undownloaded:
+                print(f"   - {song['artist']} - {song['title']}")
+        print(f"🎉 Downloaded {downloaded_count} songlist songs.")
+        return True
+
+    def _is_valid_mp4(self, file_path):
+        """Check if the file is a valid MP4 using ffprobe, if available."""
+        try:
+            cmd = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)]
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            return "mp4" in result.stdout or "h264" in result.stdout or "hevc" in result.stdout
+        except Exception:
+            # If ffprobe is not available, skip the check
+            return True
+
+def reset_songlist_all():
+    """Delete all files tracked in songlist_tracking.json, clear songlist_tracking.json, and remove songlist songs from karaoke_tracking.json."""
+    import json
+    from pathlib import Path
+    # Load songlist tracking
+    songlist_tracking_file = Path('data/songlist_tracking.json')
+    karaoke_tracking_file = Path('data/karaoke_tracking.json')
+    if songlist_tracking_file.exists():
+        with open(songlist_tracking_file, 'r', encoding='utf-8') as f:
+            tracking = json.load(f)
+    else:
+        tracking = {}
+    # Delete all files tracked
+    for entry in tracking.values():
+        file_path = entry.get('file_path')
+        if file_path:
+            p = Path(file_path)
+            try:
+                if p.exists():
+                    p.unlink()
+                    print(f"🗑️ Deleted: {p}")
+            except Exception as e:
+                print(f"⚠️ Could not delete {p}: {e}")
+    # Clear songlist_tracking.json
+    songlist_tracking_file.write_text("{}", encoding="utf-8")
+    print("🧹 Cleared songlist_tracking.json")
+    # Remove songlist songs from karaoke_tracking.json
+    if karaoke_tracking_file.exists():
+        with open(karaoke_tracking_file, 'r', encoding='utf-8') as f:
+            karaoke_data = json.load(f)
+        song_keys_to_remove = []
+        for song_id, song in karaoke_data.get('songs', {}).items():
+            artist = song.get('artist', '')
+            title = song.get('title', song.get('name', ''))
+            key = f"{artist.lower()}_{normalize_title(title)}"
+            if key in tracking:
+                song_keys_to_remove.append(song_id)
+        for song_id in song_keys_to_remove:
+            del karaoke_data['songs'][song_id]
+        with open(karaoke_tracking_file, 'w', encoding='utf-8') as f:
+            json.dump(karaoke_data, f, indent=2, ensure_ascii=False)
+        print(f"🧹 Removed {len(song_keys_to_remove)} songlist songs from karaoke_tracking.json")
+    print("✅ Global songlist reset complete.")

    # For brevity, the rest of the class methods should be copied here from the original download_karaoke.py,
    # updating all references to use the new karaoke_downloader.* imports as needed. 
--- a/karaoke_downloader/tracking_manager.py
+++ b/karaoke_downloader/tracking_manager.py
@ -134,3 +134,37 @@ class TrackingManager:
            "songs": songs
        }
        return json.dumps(report, indent=2, ensure_ascii=False)
+
+    def get_channel_video_list(self, channel_url, yt_dlp_path="downloader/yt-dlp.exe", force_refresh=False):
+        """
+        Return a list of videos (dicts with 'title' and 'id') for the channel, using cache if available unless force_refresh is True.
+        """
+        channel_name, channel_id = None, None
+        from karaoke_downloader.youtube_utils import get_channel_info
+        channel_name, channel_id = get_channel_info(channel_url)
+        cache_key = channel_id or channel_url
+        if not force_refresh and cache_key in self.cache:
+            return self.cache[cache_key]
+        # Fetch with yt-dlp
+        import subprocess
+        cmd = [
+            yt_dlp_path,
+            '--flat-playlist',
+            '--print', '%(title)s|%(id)s|%(url)s',
+            channel_url
+        ]
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            lines = result.stdout.strip().splitlines()
+            videos = []
+            for line in lines:
+                parts = line.split('|')
+                if len(parts) >= 2:
+                    title, video_id = parts[0].strip(), parts[1].strip()
+                    videos.append({'title': title, 'id': video_id})
+            self.cache[cache_key] = videos
+            self.save_cache()
+            return videos
+        except subprocess.CalledProcessError as e:
+            print(f"❌ yt-dlp failed to fetch playlist for cache: {e}")
+            return []