Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>
This commit is contained in:
parent
997c294c89
commit
6a2eb2bac5
55508
data/channel_cache.json
55508
data/channel_cache.json
File diff suppressed because it is too large
Load Diff
@ -1,22 +1,26 @@
|
|||||||
{
|
{
|
||||||
"download_settings": {
|
"download_settings": {
|
||||||
"format": "best[height<=720][ext=mp4]/best[height<=720]/best[ext=mp4]/best",
|
"format": "bestvideo[height=720][ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]",
|
||||||
"preferred_resolution": "720p",
|
"outtmpl": "%(title)s_720p.%(ext)s",
|
||||||
"audio_format": "mp3",
|
"merge_output_format": "mp4",
|
||||||
"audio_quality": "0",
|
"noplaylist": true,
|
||||||
|
"postprocessors": [{
|
||||||
|
"key": "FFmpegExtractAudio",
|
||||||
|
"preferredcodec": "mp3",
|
||||||
|
"preferredquality": "0"
|
||||||
|
}],
|
||||||
|
"writesubtitles": false,
|
||||||
"subtitle_language": "en",
|
"subtitle_language": "en",
|
||||||
"subtitle_format": "srt",
|
"subtitle_format": "srt",
|
||||||
"write_metadata": false,
|
"writedescription": false,
|
||||||
"write_thumbnail": false,
|
"writeannotations": false,
|
||||||
"write_description": false,
|
"writeinfojson": false,
|
||||||
"write_annotations": false,
|
"writecomments": false,
|
||||||
"write_comments": false,
|
"writethumbnail": false,
|
||||||
"write_subtitles": false,
|
|
||||||
"embed_metadata": false,
|
"embed_metadata": false,
|
||||||
"add_metadata": false,
|
"continuedl": true,
|
||||||
"continue_downloads": true,
|
"nooverwrites": true,
|
||||||
"no_overwrites": true,
|
"ignoreerrors": true,
|
||||||
"ignore_errors": true,
|
|
||||||
"no_warnings": false
|
"no_warnings": false
|
||||||
},
|
},
|
||||||
"folder_structure": {
|
"folder_structure": {
|
||||||
|
|||||||
@ -33,6 +33,7 @@ Examples:
|
|||||||
parser.add_argument('--songlist-status', action='store_true', help='Show songlist download status and statistics')
|
parser.add_argument('--songlist-status', action='store_true', help='Show songlist download status and statistics')
|
||||||
parser.add_argument('--reset-channel', metavar='CHANNEL_NAME', help='Reset all tracking and files for a channel')
|
parser.add_argument('--reset-channel', metavar='CHANNEL_NAME', help='Reset all tracking and files for a channel')
|
||||||
parser.add_argument('--reset-songlist', action='store_true', help='When used with --reset-channel, also reset songlist songs for this channel')
|
parser.add_argument('--reset-songlist', action='store_true', help='When used with --reset-channel, also reset songlist songs for this channel')
|
||||||
|
parser.add_argument('--reset-songlist-all', action='store_true', help='Reset all songlist tracking and delete all songlist-downloaded files (global)')
|
||||||
parser.add_argument('--version', '-v', action='version', version='Karaoke Playlist Downloader v1.0')
|
parser.add_argument('--version', '-v', action='version', version='Karaoke Playlist Downloader v1.0')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@ -78,6 +79,23 @@ Examples:
|
|||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
# --- END NEW ---
|
# --- END NEW ---
|
||||||
|
|
||||||
|
# --- NEW: If no URL or file is provided, but --songlist-only is set, use all channels in data/channels.txt ---
|
||||||
|
if args.songlist_only and not args.url and not args.file:
|
||||||
|
channels_file = Path("data/channels.txt")
|
||||||
|
if channels_file.exists():
|
||||||
|
args.file = str(channels_file)
|
||||||
|
print("📋 No URL or --file provided, defaulting to all channels in data/channels.txt for songlist-only mode.")
|
||||||
|
else:
|
||||||
|
print("❌ No URL, --file, or data/channels.txt found. Please provide a channel URL or a file with channel URLs.")
|
||||||
|
sys.exit(1)
|
||||||
|
# --- END NEW ---
|
||||||
|
|
||||||
|
if args.reset_songlist_all:
|
||||||
|
from karaoke_downloader.downloader import reset_songlist_all
|
||||||
|
reset_songlist_all()
|
||||||
|
print('✅ All songlist tracking and files have been reset.')
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
if args.status:
|
if args.status:
|
||||||
stats = downloader.tracker.get_statistics()
|
stats = downloader.tracker.get_statistics()
|
||||||
print("🎤 Karaoke Downloader Status")
|
print("🎤 Karaoke Downloader Status")
|
||||||
@ -135,8 +153,12 @@ Examples:
|
|||||||
if len(tracking) > 10:
|
if len(tracking) > 10:
|
||||||
print(f" ... and {len(tracking) - 10} more")
|
print(f" ... and {len(tracking) - 10} more")
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
elif args.file:
|
elif args.songlist_only and args.file:
|
||||||
success = downloader.download_from_file(args.file, force_refresh=args.refresh)
|
# Read all channel URLs from file
|
||||||
|
with open(args.file, "r", encoding="utf-8") as f:
|
||||||
|
channel_urls = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")]
|
||||||
|
limit = args.limit if args.limit else None
|
||||||
|
success = downloader.download_songlist_across_channels(channel_urls, limit=limit)
|
||||||
elif args.url:
|
elif args.url:
|
||||||
success = downloader.download_channel_videos(args.url, force_refresh=args.refresh)
|
success = downloader.download_channel_videos(args.url, force_refresh=args.refresh)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -141,7 +141,245 @@ class KaraokeDownloader:
|
|||||||
if not reset_songlist:
|
if not reset_songlist:
|
||||||
print(f" Songlist songs were preserved.")
|
print(f" Songlist songs were preserved.")
|
||||||
|
|
||||||
# ... (rest of the KaraokeDownloader methods, updated to use DATA_DIR for all data file paths) ...
|
def download_from_file(self, file_path, force_refresh=False):
|
||||||
|
file = Path(file_path)
|
||||||
|
if not file.exists():
|
||||||
|
print(f"❌ File not found: {file_path}")
|
||||||
|
return False
|
||||||
|
with open(file, "r", encoding="utf-8") as f:
|
||||||
|
urls = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")]
|
||||||
|
if not urls:
|
||||||
|
print(f"❌ No URLs found in {file_path}")
|
||||||
|
return False
|
||||||
|
all_success = True
|
||||||
|
for url in urls:
|
||||||
|
print(f"\n➡️ Processing: {url}")
|
||||||
|
success = self.download_channel_videos(url, force_refresh=force_refresh)
|
||||||
|
if not success:
|
||||||
|
all_success = False
|
||||||
|
return all_success
|
||||||
|
|
||||||
|
def download_channel_videos(self, url, force_refresh=False):
|
||||||
|
"""
|
||||||
|
Download videos from a channel or playlist URL, respecting songlist-only and limit flags.
|
||||||
|
Only download the first N matches from the songlist (N = self.config.get('limit', 1)).
|
||||||
|
"""
|
||||||
|
channel_name, channel_id = get_channel_info(url)
|
||||||
|
print(f"\n🎬 Downloading from channel: {channel_name} ({url})")
|
||||||
|
songlist = load_songlist()
|
||||||
|
if not songlist:
|
||||||
|
print("⚠️ No songlist loaded. Skipping.")
|
||||||
|
return False
|
||||||
|
# Get limit from config or default to 1
|
||||||
|
limit = self.config.get('limit', 1)
|
||||||
|
# --- Get channel video list with yt-dlp --flat-playlist ---
|
||||||
|
cmd = [
|
||||||
|
str(self.yt_dlp_path),
|
||||||
|
'--flat-playlist',
|
||||||
|
'--print', '%(title)s|%(id)s|%(url)s',
|
||||||
|
url
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||||
|
lines = result.stdout.strip().splitlines()
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"❌ yt-dlp failed to fetch playlist: {e}")
|
||||||
|
return False
|
||||||
|
# Build a list of available videos
|
||||||
|
available_videos = []
|
||||||
|
for line in lines:
|
||||||
|
parts = line.split('|')
|
||||||
|
if len(parts) >= 2:
|
||||||
|
title, video_id = parts[0].strip(), parts[1].strip()
|
||||||
|
available_videos.append({'title': title, 'id': video_id})
|
||||||
|
# Normalize songlist for matching
|
||||||
|
normalized_songlist = {
|
||||||
|
f"{s['artist'].lower()}_{normalize_title(s['title'])}": s for s in songlist
|
||||||
|
}
|
||||||
|
# Find matches
|
||||||
|
matches = []
|
||||||
|
for video in available_videos:
|
||||||
|
artist, title = extract_artist_title(video['title'])
|
||||||
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
|
if key in normalized_songlist:
|
||||||
|
# Check if already downloaded
|
||||||
|
if not is_songlist_song_downloaded(self.songlist_tracking, artist, title):
|
||||||
|
matches.append((video, normalized_songlist[key]))
|
||||||
|
if len(matches) >= limit:
|
||||||
|
break
|
||||||
|
if not matches:
|
||||||
|
print("🎵 No new songlist matches found for this channel.")
|
||||||
|
return True
|
||||||
|
# Download only the first N matches
|
||||||
|
for video, song in matches:
|
||||||
|
artist, title = song['artist'], song['title']
|
||||||
|
output_path = self.downloads_dir / channel_name / f"{artist} - {title} (Karaoke Version).mp4"
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
print(f"⬇️ Downloading: {artist} - {title} -> {output_path}")
|
||||||
|
# Download by video ID
|
||||||
|
video_url = f"https://www.youtube.com/watch?v={video['id']}"
|
||||||
|
cmd = [
|
||||||
|
str(self.yt_dlp_path),
|
||||||
|
"-o", str(output_path),
|
||||||
|
"-f", self.config["download_settings"]["format"],
|
||||||
|
video_url
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
subprocess.run(cmd, check=True)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"❌ yt-dlp failed: {e}")
|
||||||
|
continue
|
||||||
|
if not output_path.exists() or output_path.stat().st_size == 0:
|
||||||
|
print(f"❌ Download failed or file is empty: {output_path}")
|
||||||
|
continue
|
||||||
|
if not self._is_valid_mp4(output_path):
|
||||||
|
print(f"❌ File is not a valid MP4: {output_path}")
|
||||||
|
continue
|
||||||
|
add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name)
|
||||||
|
mark_songlist_song_downloaded(self.songlist_tracking, artist, title, channel_name, output_path)
|
||||||
|
print(f"✅ Downloaded and tracked: {artist} - {title}")
|
||||||
|
print(f"🎉 All post-processing complete for: {output_path}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def download_songlist_across_channels(self, channel_urls, limit=None):
|
||||||
|
"""
|
||||||
|
For each song in the songlist, try each channel in order and download from the first channel where it is found.
|
||||||
|
Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted.
|
||||||
|
"""
|
||||||
|
songlist = load_songlist()
|
||||||
|
if not songlist:
|
||||||
|
print("⚠️ No songlist loaded. Skipping.")
|
||||||
|
return False
|
||||||
|
# Filter for songs not yet downloaded
|
||||||
|
undownloaded = [s for s in songlist if not is_songlist_song_downloaded(self.songlist_tracking, s['artist'], s['title'])]
|
||||||
|
print(f"🎯 {len(undownloaded)} songlist songs to download.")
|
||||||
|
if not undownloaded:
|
||||||
|
print("🎵 All songlist songs already downloaded.")
|
||||||
|
return True
|
||||||
|
print("🔍 Songs to search for:")
|
||||||
|
for song in undownloaded:
|
||||||
|
print(f" - {song['artist']} - {song['title']}")
|
||||||
|
downloaded_count = 0
|
||||||
|
attempted = set()
|
||||||
|
# Keep looping until limit is reached or no more undownloaded songs
|
||||||
|
while undownloaded and (limit is None or downloaded_count < limit):
|
||||||
|
for song in list(undownloaded):
|
||||||
|
if limit is not None and downloaded_count >= limit:
|
||||||
|
break
|
||||||
|
artist, title = song['artist'], song['title']
|
||||||
|
if (artist, title) in attempted:
|
||||||
|
continue
|
||||||
|
found = False
|
||||||
|
for channel_url in channel_urls:
|
||||||
|
channel_name, channel_id = get_channel_info(channel_url)
|
||||||
|
print(f"\n🎬 Checking channel: {channel_name} ({channel_url})")
|
||||||
|
print(f" Searching for: {artist} - {title}")
|
||||||
|
available_videos = self.tracker.get_channel_video_list(
|
||||||
|
channel_url,
|
||||||
|
yt_dlp_path=str(self.yt_dlp_path),
|
||||||
|
force_refresh=False
|
||||||
|
)
|
||||||
|
for video in available_videos:
|
||||||
|
v_artist, v_title = extract_artist_title(video['title'])
|
||||||
|
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
|
||||||
|
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
|
||||||
|
# Download this song from this channel
|
||||||
|
output_path = self.downloads_dir / channel_name / f"{artist} - {title} (Karaoke Version).mp4"
|
||||||
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
print(f"⬇️ Downloading: {artist} - {title} -> {output_path}")
|
||||||
|
video_url = f"https://www.youtube.com/watch?v={video['id']}"
|
||||||
|
dlp_cmd = [
|
||||||
|
str(self.yt_dlp_path),
|
||||||
|
"-o", str(output_path),
|
||||||
|
"-f", self.config["download_settings"]["format"],
|
||||||
|
video_url
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
subprocess.run(dlp_cmd, check=True)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"❌ yt-dlp failed: {e}")
|
||||||
|
continue
|
||||||
|
if not output_path.exists() or output_path.stat().st_size == 0:
|
||||||
|
print(f"❌ Download failed or file is empty: {output_path}")
|
||||||
|
continue
|
||||||
|
if not self._is_valid_mp4(output_path):
|
||||||
|
print(f"❌ File is not a valid MP4: {output_path}")
|
||||||
|
continue
|
||||||
|
add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name)
|
||||||
|
mark_songlist_song_downloaded(self.songlist_tracking, artist, title, channel_name, output_path)
|
||||||
|
print(f"✅ Downloaded and tracked: {artist} - {title}")
|
||||||
|
print(f"🎉 All post-processing complete for: {output_path}")
|
||||||
|
downloaded_count += 1
|
||||||
|
found = True
|
||||||
|
break # Only download from first channel where found
|
||||||
|
if found:
|
||||||
|
break
|
||||||
|
attempted.add((artist, title))
|
||||||
|
if found:
|
||||||
|
undownloaded.remove(song)
|
||||||
|
# If no new downloads in this pass, break to avoid infinite loop
|
||||||
|
if downloaded_count == 0 or (limit is not None and downloaded_count >= limit):
|
||||||
|
break
|
||||||
|
if undownloaded:
|
||||||
|
print(f"⚠️ {len(undownloaded)} songlist songs could not be found in any channel:")
|
||||||
|
for song in undownloaded:
|
||||||
|
print(f" - {song['artist']} - {song['title']}")
|
||||||
|
print(f"🎉 Downloaded {downloaded_count} songlist songs.")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _is_valid_mp4(self, file_path):
|
||||||
|
"""Check if the file is a valid MP4 using ffprobe, if available."""
|
||||||
|
try:
|
||||||
|
cmd = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||||
|
return "mp4" in result.stdout or "h264" in result.stdout or "hevc" in result.stdout
|
||||||
|
except Exception:
|
||||||
|
# If ffprobe is not available, skip the check
|
||||||
|
return True
|
||||||
|
|
||||||
|
def reset_songlist_all():
|
||||||
|
"""Delete all files tracked in songlist_tracking.json, clear songlist_tracking.json, and remove songlist songs from karaoke_tracking.json."""
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
# Load songlist tracking
|
||||||
|
songlist_tracking_file = Path('data/songlist_tracking.json')
|
||||||
|
karaoke_tracking_file = Path('data/karaoke_tracking.json')
|
||||||
|
if songlist_tracking_file.exists():
|
||||||
|
with open(songlist_tracking_file, 'r', encoding='utf-8') as f:
|
||||||
|
tracking = json.load(f)
|
||||||
|
else:
|
||||||
|
tracking = {}
|
||||||
|
# Delete all files tracked
|
||||||
|
for entry in tracking.values():
|
||||||
|
file_path = entry.get('file_path')
|
||||||
|
if file_path:
|
||||||
|
p = Path(file_path)
|
||||||
|
try:
|
||||||
|
if p.exists():
|
||||||
|
p.unlink()
|
||||||
|
print(f"🗑️ Deleted: {p}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Could not delete {p}: {e}")
|
||||||
|
# Clear songlist_tracking.json
|
||||||
|
songlist_tracking_file.write_text("{}", encoding="utf-8")
|
||||||
|
print("🧹 Cleared songlist_tracking.json")
|
||||||
|
# Remove songlist songs from karaoke_tracking.json
|
||||||
|
if karaoke_tracking_file.exists():
|
||||||
|
with open(karaoke_tracking_file, 'r', encoding='utf-8') as f:
|
||||||
|
karaoke_data = json.load(f)
|
||||||
|
song_keys_to_remove = []
|
||||||
|
for song_id, song in karaoke_data.get('songs', {}).items():
|
||||||
|
artist = song.get('artist', '')
|
||||||
|
title = song.get('title', song.get('name', ''))
|
||||||
|
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||||
|
if key in tracking:
|
||||||
|
song_keys_to_remove.append(song_id)
|
||||||
|
for song_id in song_keys_to_remove:
|
||||||
|
del karaoke_data['songs'][song_id]
|
||||||
|
with open(karaoke_tracking_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(karaoke_data, f, indent=2, ensure_ascii=False)
|
||||||
|
print(f"🧹 Removed {len(song_keys_to_remove)} songlist songs from karaoke_tracking.json")
|
||||||
|
print("✅ Global songlist reset complete.")
|
||||||
|
|
||||||
# For brevity, the rest of the class methods should be copied here from the original download_karaoke.py,
|
# For brevity, the rest of the class methods should be copied here from the original download_karaoke.py,
|
||||||
# updating all references to use the new karaoke_downloader.* imports as needed.
|
# updating all references to use the new karaoke_downloader.* imports as needed.
|
||||||
@ -134,3 +134,37 @@ class TrackingManager:
|
|||||||
"songs": songs
|
"songs": songs
|
||||||
}
|
}
|
||||||
return json.dumps(report, indent=2, ensure_ascii=False)
|
return json.dumps(report, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
def get_channel_video_list(self, channel_url, yt_dlp_path="downloader/yt-dlp.exe", force_refresh=False):
|
||||||
|
"""
|
||||||
|
Return a list of videos (dicts with 'title' and 'id') for the channel, using cache if available unless force_refresh is True.
|
||||||
|
"""
|
||||||
|
channel_name, channel_id = None, None
|
||||||
|
from karaoke_downloader.youtube_utils import get_channel_info
|
||||||
|
channel_name, channel_id = get_channel_info(channel_url)
|
||||||
|
cache_key = channel_id or channel_url
|
||||||
|
if not force_refresh and cache_key in self.cache:
|
||||||
|
return self.cache[cache_key]
|
||||||
|
# Fetch with yt-dlp
|
||||||
|
import subprocess
|
||||||
|
cmd = [
|
||||||
|
yt_dlp_path,
|
||||||
|
'--flat-playlist',
|
||||||
|
'--print', '%(title)s|%(id)s|%(url)s',
|
||||||
|
channel_url
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||||
|
lines = result.stdout.strip().splitlines()
|
||||||
|
videos = []
|
||||||
|
for line in lines:
|
||||||
|
parts = line.split('|')
|
||||||
|
if len(parts) >= 2:
|
||||||
|
title, video_id = parts[0].strip(), parts[1].strip()
|
||||||
|
videos.append({'title': title, 'id': video_id})
|
||||||
|
self.cache[cache_key] = videos
|
||||||
|
self.save_cache()
|
||||||
|
return videos
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"❌ yt-dlp failed to fetch playlist for cache: {e}")
|
||||||
|
return []
|
||||||
Loading…
Reference in New Issue
Block a user