Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>
This commit is contained in:
parent
997c294c89
commit
6a2eb2bac5
55510
data/channel_cache.json
55510
data/channel_cache.json
File diff suppressed because it is too large
Load Diff
@ -1,24 +1,28 @@
|
||||
{
|
||||
"download_settings": {
|
||||
"format": "best[height<=720][ext=mp4]/best[height<=720]/best[ext=mp4]/best",
|
||||
"preferred_resolution": "720p",
|
||||
"audio_format": "mp3",
|
||||
"audio_quality": "0",
|
||||
"download_settings": {
|
||||
"format": "bestvideo[height=720][ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]",
|
||||
"outtmpl": "%(title)s_720p.%(ext)s",
|
||||
"merge_output_format": "mp4",
|
||||
"noplaylist": true,
|
||||
"postprocessors": [{
|
||||
"key": "FFmpegExtractAudio",
|
||||
"preferredcodec": "mp3",
|
||||
"preferredquality": "0"
|
||||
}],
|
||||
"writesubtitles": false,
|
||||
"subtitle_language": "en",
|
||||
"subtitle_format": "srt",
|
||||
"write_metadata": false,
|
||||
"write_thumbnail": false,
|
||||
"write_description": false,
|
||||
"write_annotations": false,
|
||||
"write_comments": false,
|
||||
"write_subtitles": false,
|
||||
"writedescription": false,
|
||||
"writeannotations": false,
|
||||
"writeinfojson": false,
|
||||
"writecomments": false,
|
||||
"writethumbnail": false,
|
||||
"embed_metadata": false,
|
||||
"add_metadata": false,
|
||||
"continue_downloads": true,
|
||||
"no_overwrites": true,
|
||||
"ignore_errors": true,
|
||||
"continuedl": true,
|
||||
"nooverwrites": true,
|
||||
"ignoreerrors": true,
|
||||
"no_warnings": false
|
||||
},
|
||||
},
|
||||
"folder_structure": {
|
||||
"downloads_dir": "downloads",
|
||||
"logs_dir": "logs",
|
||||
|
||||
@ -33,6 +33,7 @@ Examples:
|
||||
parser.add_argument('--songlist-status', action='store_true', help='Show songlist download status and statistics')
|
||||
parser.add_argument('--reset-channel', metavar='CHANNEL_NAME', help='Reset all tracking and files for a channel')
|
||||
parser.add_argument('--reset-songlist', action='store_true', help='When used with --reset-channel, also reset songlist songs for this channel')
|
||||
parser.add_argument('--reset-songlist-all', action='store_true', help='Reset all songlist tracking and delete all songlist-downloaded files (global)')
|
||||
parser.add_argument('--version', '-v', action='version', version='Karaoke Playlist Downloader v1.0')
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -78,6 +79,23 @@ Examples:
|
||||
sys.exit(0)
|
||||
# --- END NEW ---
|
||||
|
||||
# --- NEW: If no URL or file is provided, but --songlist-only is set, use all channels in data/channels.txt ---
|
||||
if args.songlist_only and not args.url and not args.file:
|
||||
channels_file = Path("data/channels.txt")
|
||||
if channels_file.exists():
|
||||
args.file = str(channels_file)
|
||||
print("📋 No URL or --file provided, defaulting to all channels in data/channels.txt for songlist-only mode.")
|
||||
else:
|
||||
print("❌ No URL, --file, or data/channels.txt found. Please provide a channel URL or a file with channel URLs.")
|
||||
sys.exit(1)
|
||||
# --- END NEW ---
|
||||
|
||||
if args.reset_songlist_all:
|
||||
from karaoke_downloader.downloader import reset_songlist_all
|
||||
reset_songlist_all()
|
||||
print('✅ All songlist tracking and files have been reset.')
|
||||
sys.exit(0)
|
||||
|
||||
if args.status:
|
||||
stats = downloader.tracker.get_statistics()
|
||||
print("🎤 Karaoke Downloader Status")
|
||||
@ -135,8 +153,12 @@ Examples:
|
||||
if len(tracking) > 10:
|
||||
print(f" ... and {len(tracking) - 10} more")
|
||||
sys.exit(0)
|
||||
elif args.file:
|
||||
success = downloader.download_from_file(args.file, force_refresh=args.refresh)
|
||||
elif args.songlist_only and args.file:
|
||||
# Read all channel URLs from file
|
||||
with open(args.file, "r", encoding="utf-8") as f:
|
||||
channel_urls = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")]
|
||||
limit = args.limit if args.limit else None
|
||||
success = downloader.download_songlist_across_channels(channel_urls, limit=limit)
|
||||
elif args.url:
|
||||
success = downloader.download_channel_videos(args.url, force_refresh=args.refresh)
|
||||
else:
|
||||
|
||||
@ -141,7 +141,245 @@ class KaraokeDownloader:
|
||||
if not reset_songlist:
|
||||
print(f" Songlist songs were preserved.")
|
||||
|
||||
# ... (rest of the KaraokeDownloader methods, updated to use DATA_DIR for all data file paths) ...
|
||||
def download_from_file(self, file_path, force_refresh=False):
|
||||
file = Path(file_path)
|
||||
if not file.exists():
|
||||
print(f"❌ File not found: {file_path}")
|
||||
return False
|
||||
with open(file, "r", encoding="utf-8") as f:
|
||||
urls = [line.strip() for line in f if line.strip() and not line.strip().startswith("#")]
|
||||
if not urls:
|
||||
print(f"❌ No URLs found in {file_path}")
|
||||
return False
|
||||
all_success = True
|
||||
for url in urls:
|
||||
print(f"\n➡️ Processing: {url}")
|
||||
success = self.download_channel_videos(url, force_refresh=force_refresh)
|
||||
if not success:
|
||||
all_success = False
|
||||
return all_success
|
||||
|
||||
def download_channel_videos(self, url, force_refresh=False):
|
||||
"""
|
||||
Download videos from a channel or playlist URL, respecting songlist-only and limit flags.
|
||||
Only download the first N matches from the songlist (N = self.config.get('limit', 1)).
|
||||
"""
|
||||
channel_name, channel_id = get_channel_info(url)
|
||||
print(f"\n🎬 Downloading from channel: {channel_name} ({url})")
|
||||
songlist = load_songlist()
|
||||
if not songlist:
|
||||
print("⚠️ No songlist loaded. Skipping.")
|
||||
return False
|
||||
# Get limit from config or default to 1
|
||||
limit = self.config.get('limit', 1)
|
||||
# --- Get channel video list with yt-dlp --flat-playlist ---
|
||||
cmd = [
|
||||
str(self.yt_dlp_path),
|
||||
'--flat-playlist',
|
||||
'--print', '%(title)s|%(id)s|%(url)s',
|
||||
url
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
lines = result.stdout.strip().splitlines()
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ yt-dlp failed to fetch playlist: {e}")
|
||||
return False
|
||||
# Build a list of available videos
|
||||
available_videos = []
|
||||
for line in lines:
|
||||
parts = line.split('|')
|
||||
if len(parts) >= 2:
|
||||
title, video_id = parts[0].strip(), parts[1].strip()
|
||||
available_videos.append({'title': title, 'id': video_id})
|
||||
# Normalize songlist for matching
|
||||
normalized_songlist = {
|
||||
f"{s['artist'].lower()}_{normalize_title(s['title'])}": s for s in songlist
|
||||
}
|
||||
# Find matches
|
||||
matches = []
|
||||
for video in available_videos:
|
||||
artist, title = extract_artist_title(video['title'])
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
if key in normalized_songlist:
|
||||
# Check if already downloaded
|
||||
if not is_songlist_song_downloaded(self.songlist_tracking, artist, title):
|
||||
matches.append((video, normalized_songlist[key]))
|
||||
if len(matches) >= limit:
|
||||
break
|
||||
if not matches:
|
||||
print("🎵 No new songlist matches found for this channel.")
|
||||
return True
|
||||
# Download only the first N matches
|
||||
for video, song in matches:
|
||||
artist, title = song['artist'], song['title']
|
||||
output_path = self.downloads_dir / channel_name / f"{artist} - {title} (Karaoke Version).mp4"
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
print(f"⬇️ Downloading: {artist} - {title} -> {output_path}")
|
||||
# Download by video ID
|
||||
video_url = f"https://www.youtube.com/watch?v={video['id']}"
|
||||
cmd = [
|
||||
str(self.yt_dlp_path),
|
||||
"-o", str(output_path),
|
||||
"-f", self.config["download_settings"]["format"],
|
||||
video_url
|
||||
]
|
||||
try:
|
||||
subprocess.run(cmd, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ yt-dlp failed: {e}")
|
||||
continue
|
||||
if not output_path.exists() or output_path.stat().st_size == 0:
|
||||
print(f"❌ Download failed or file is empty: {output_path}")
|
||||
continue
|
||||
if not self._is_valid_mp4(output_path):
|
||||
print(f"❌ File is not a valid MP4: {output_path}")
|
||||
continue
|
||||
add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name)
|
||||
mark_songlist_song_downloaded(self.songlist_tracking, artist, title, channel_name, output_path)
|
||||
print(f"✅ Downloaded and tracked: {artist} - {title}")
|
||||
print(f"🎉 All post-processing complete for: {output_path}")
|
||||
return True
|
||||
|
||||
def download_songlist_across_channels(self, channel_urls, limit=None):
|
||||
"""
|
||||
For each song in the songlist, try each channel in order and download from the first channel where it is found.
|
||||
Download up to 'limit' songs, skipping any that cannot be found, until the limit is reached or all possible matches are exhausted.
|
||||
"""
|
||||
songlist = load_songlist()
|
||||
if not songlist:
|
||||
print("⚠️ No songlist loaded. Skipping.")
|
||||
return False
|
||||
# Filter for songs not yet downloaded
|
||||
undownloaded = [s for s in songlist if not is_songlist_song_downloaded(self.songlist_tracking, s['artist'], s['title'])]
|
||||
print(f"🎯 {len(undownloaded)} songlist songs to download.")
|
||||
if not undownloaded:
|
||||
print("🎵 All songlist songs already downloaded.")
|
||||
return True
|
||||
print("🔍 Songs to search for:")
|
||||
for song in undownloaded:
|
||||
print(f" - {song['artist']} - {song['title']}")
|
||||
downloaded_count = 0
|
||||
attempted = set()
|
||||
# Keep looping until limit is reached or no more undownloaded songs
|
||||
while undownloaded and (limit is None or downloaded_count < limit):
|
||||
for song in list(undownloaded):
|
||||
if limit is not None and downloaded_count >= limit:
|
||||
break
|
||||
artist, title = song['artist'], song['title']
|
||||
if (artist, title) in attempted:
|
||||
continue
|
||||
found = False
|
||||
for channel_url in channel_urls:
|
||||
channel_name, channel_id = get_channel_info(channel_url)
|
||||
print(f"\n🎬 Checking channel: {channel_name} ({channel_url})")
|
||||
print(f" Searching for: {artist} - {title}")
|
||||
available_videos = self.tracker.get_channel_video_list(
|
||||
channel_url,
|
||||
yt_dlp_path=str(self.yt_dlp_path),
|
||||
force_refresh=False
|
||||
)
|
||||
for video in available_videos:
|
||||
v_artist, v_title = extract_artist_title(video['title'])
|
||||
if (normalize_title(v_artist) == normalize_title(artist) and normalize_title(v_title) == normalize_title(title)) or \
|
||||
(normalize_title(video['title']) == normalize_title(f"{artist} - {title}")):
|
||||
# Download this song from this channel
|
||||
output_path = self.downloads_dir / channel_name / f"{artist} - {title} (Karaoke Version).mp4"
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
print(f"⬇️ Downloading: {artist} - {title} -> {output_path}")
|
||||
video_url = f"https://www.youtube.com/watch?v={video['id']}"
|
||||
dlp_cmd = [
|
||||
str(self.yt_dlp_path),
|
||||
"-o", str(output_path),
|
||||
"-f", self.config["download_settings"]["format"],
|
||||
video_url
|
||||
]
|
||||
try:
|
||||
subprocess.run(dlp_cmd, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ yt-dlp failed: {e}")
|
||||
continue
|
||||
if not output_path.exists() or output_path.stat().st_size == 0:
|
||||
print(f"❌ Download failed or file is empty: {output_path}")
|
||||
continue
|
||||
if not self._is_valid_mp4(output_path):
|
||||
print(f"❌ File is not a valid MP4: {output_path}")
|
||||
continue
|
||||
add_id3_tags(output_path, f"{artist} - {title} (Karaoke Version)", channel_name)
|
||||
mark_songlist_song_downloaded(self.songlist_tracking, artist, title, channel_name, output_path)
|
||||
print(f"✅ Downloaded and tracked: {artist} - {title}")
|
||||
print(f"🎉 All post-processing complete for: {output_path}")
|
||||
downloaded_count += 1
|
||||
found = True
|
||||
break # Only download from first channel where found
|
||||
if found:
|
||||
break
|
||||
attempted.add((artist, title))
|
||||
if found:
|
||||
undownloaded.remove(song)
|
||||
# If no new downloads in this pass, break to avoid infinite loop
|
||||
if downloaded_count == 0 or (limit is not None and downloaded_count >= limit):
|
||||
break
|
||||
if undownloaded:
|
||||
print(f"⚠️ {len(undownloaded)} songlist songs could not be found in any channel:")
|
||||
for song in undownloaded:
|
||||
print(f" - {song['artist']} - {song['title']}")
|
||||
print(f"🎉 Downloaded {downloaded_count} songlist songs.")
|
||||
return True
|
||||
|
||||
def _is_valid_mp4(self, file_path):
|
||||
"""Check if the file is a valid MP4 using ffprobe, if available."""
|
||||
try:
|
||||
cmd = ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=codec_name", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
return "mp4" in result.stdout or "h264" in result.stdout or "hevc" in result.stdout
|
||||
except Exception:
|
||||
# If ffprobe is not available, skip the check
|
||||
return True
|
||||
|
||||
def reset_songlist_all():
|
||||
"""Delete all files tracked in songlist_tracking.json, clear songlist_tracking.json, and remove songlist songs from karaoke_tracking.json."""
|
||||
import json
|
||||
from pathlib import Path
|
||||
# Load songlist tracking
|
||||
songlist_tracking_file = Path('data/songlist_tracking.json')
|
||||
karaoke_tracking_file = Path('data/karaoke_tracking.json')
|
||||
if songlist_tracking_file.exists():
|
||||
with open(songlist_tracking_file, 'r', encoding='utf-8') as f:
|
||||
tracking = json.load(f)
|
||||
else:
|
||||
tracking = {}
|
||||
# Delete all files tracked
|
||||
for entry in tracking.values():
|
||||
file_path = entry.get('file_path')
|
||||
if file_path:
|
||||
p = Path(file_path)
|
||||
try:
|
||||
if p.exists():
|
||||
p.unlink()
|
||||
print(f"🗑️ Deleted: {p}")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Could not delete {p}: {e}")
|
||||
# Clear songlist_tracking.json
|
||||
songlist_tracking_file.write_text("{}", encoding="utf-8")
|
||||
print("🧹 Cleared songlist_tracking.json")
|
||||
# Remove songlist songs from karaoke_tracking.json
|
||||
if karaoke_tracking_file.exists():
|
||||
with open(karaoke_tracking_file, 'r', encoding='utf-8') as f:
|
||||
karaoke_data = json.load(f)
|
||||
song_keys_to_remove = []
|
||||
for song_id, song in karaoke_data.get('songs', {}).items():
|
||||
artist = song.get('artist', '')
|
||||
title = song.get('title', song.get('name', ''))
|
||||
key = f"{artist.lower()}_{normalize_title(title)}"
|
||||
if key in tracking:
|
||||
song_keys_to_remove.append(song_id)
|
||||
for song_id in song_keys_to_remove:
|
||||
del karaoke_data['songs'][song_id]
|
||||
with open(karaoke_tracking_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(karaoke_data, f, indent=2, ensure_ascii=False)
|
||||
print(f"🧹 Removed {len(song_keys_to_remove)} songlist songs from karaoke_tracking.json")
|
||||
print("✅ Global songlist reset complete.")
|
||||
|
||||
# For brevity, the rest of the class methods should be copied here from the original download_karaoke.py,
|
||||
# updating all references to use the new karaoke_downloader.* imports as needed.
|
||||
@ -133,4 +133,38 @@ class TrackingManager:
|
||||
"playlist": playlist,
|
||||
"songs": songs
|
||||
}
|
||||
return json.dumps(report, indent=2, ensure_ascii=False)
|
||||
return json.dumps(report, indent=2, ensure_ascii=False)
|
||||
|
||||
def get_channel_video_list(self, channel_url, yt_dlp_path="downloader/yt-dlp.exe", force_refresh=False):
|
||||
"""
|
||||
Return a list of videos (dicts with 'title' and 'id') for the channel, using cache if available unless force_refresh is True.
|
||||
"""
|
||||
channel_name, channel_id = None, None
|
||||
from karaoke_downloader.youtube_utils import get_channel_info
|
||||
channel_name, channel_id = get_channel_info(channel_url)
|
||||
cache_key = channel_id or channel_url
|
||||
if not force_refresh and cache_key in self.cache:
|
||||
return self.cache[cache_key]
|
||||
# Fetch with yt-dlp
|
||||
import subprocess
|
||||
cmd = [
|
||||
yt_dlp_path,
|
||||
'--flat-playlist',
|
||||
'--print', '%(title)s|%(id)s|%(url)s',
|
||||
channel_url
|
||||
]
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
lines = result.stdout.strip().splitlines()
|
||||
videos = []
|
||||
for line in lines:
|
||||
parts = line.split('|')
|
||||
if len(parts) >= 2:
|
||||
title, video_id = parts[0].strip(), parts[1].strip()
|
||||
videos.append({'title': title, 'id': video_id})
|
||||
self.cache[cache_key] = videos
|
||||
self.save_cache()
|
||||
return videos
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ yt-dlp failed to fetch playlist for cache: {e}")
|
||||
return []
|
||||
Loading…
Reference in New Issue
Block a user