Signed-off-by: mbrucedogs <mbrucedogs@gmail.com>
This commit is contained in:
parent
c864af7794
commit
24a6a37efd
@ -7966,5 +7966,69 @@
|
||||
"channel": "@KaraokeOnVEVO",
|
||||
"marked_at": "2025-07-28T08:08:58.214983",
|
||||
"reason": "already_on_server"
|
||||
},
|
||||
"zara larsson_midnight sun": {
|
||||
"artist": "Zara Larsson",
|
||||
"title": "Midnight Sun",
|
||||
"video_title": "Zara Larsson Midnight Sun",
|
||||
"channel": "@sing2karaoke",
|
||||
"marked_at": "2025-07-28T09:18:12.805038",
|
||||
"reason": "already_on_server"
|
||||
},
|
||||
"imagine dragons, j i d_enemy": {
|
||||
"artist": "Imagine Dragons, J I D",
|
||||
"title": "Enemy",
|
||||
"video_title": "Imagine Dragons, J I D Enemy",
|
||||
"channel": "@sing2karaoke",
|
||||
"marked_at": "2025-07-28T09:18:12.822951",
|
||||
"reason": "already_on_server"
|
||||
},
|
||||
"jonas blue, why don't we_don't wake me up": {
|
||||
"artist": "Jonas Blue, Why Don't We",
|
||||
"title": "Don't Wake Me Up",
|
||||
"video_title": "Jonas Blue, Why Don't We Don't Wake Me Up",
|
||||
"channel": "@sing2karaoke",
|
||||
"marked_at": "2025-07-28T09:18:12.844018",
|
||||
"reason": "already_on_server"
|
||||
},
|
||||
"rex orange county_pluto projector": {
|
||||
"artist": "Rex Orange County",
|
||||
"title": "Pluto Projector",
|
||||
"video_title": "Rex Orange County Pluto Projector",
|
||||
"channel": "@sing2karaoke",
|
||||
"marked_at": "2025-07-28T09:18:12.858730",
|
||||
"reason": "already_on_server"
|
||||
},
|
||||
"charlie puth_light switch": {
|
||||
"artist": "Charlie Puth",
|
||||
"title": "Light Switch",
|
||||
"video_title": "Charlie Puth Light Switch",
|
||||
"channel": "@sing2karaoke",
|
||||
"marked_at": "2025-07-28T09:18:12.878327",
|
||||
"reason": "already_on_server"
|
||||
},
|
||||
"the rolling stones_(i can't get no) satisfaction": {
|
||||
"artist": "The Rolling Stones",
|
||||
"title": "(I Can't Get No) Satisfaction",
|
||||
"video_title": "(I Can't Get No) Satisfaction - The Rolling Stones KARAOKE Without Backing Vocals",
|
||||
"channel": "@VocalStarKaraoke",
|
||||
"marked_at": "2025-07-28T09:18:13.023146",
|
||||
"reason": "already_on_server"
|
||||
},
|
||||
"lauren spencer smith_fingers crossed": {
|
||||
"artist": "Lauren Spencer Smith",
|
||||
"title": "Fingers Crossed",
|
||||
"video_title": "Lauren Spencer Smith Fingers Crossed",
|
||||
"channel": "@sing2karaoke",
|
||||
"marked_at": "2025-07-28T09:20:07.067847",
|
||||
"reason": "already_on_server"
|
||||
},
|
||||
"tems_crazy tings": {
|
||||
"artist": "Tems",
|
||||
"title": "Crazy Tings",
|
||||
"video_title": "Tems Crazy Tings",
|
||||
"channel": "@sing2karaoke",
|
||||
"marked_at": "2025-07-28T09:20:07.089571",
|
||||
"reason": "already_on_server"
|
||||
}
|
||||
}
|
||||
254
karaoke_downloader/channel_parser.py
Normal file
254
karaoke_downloader/channel_parser.py
Normal file
@ -0,0 +1,254 @@
|
||||
"""
|
||||
Channel-specific parsing utilities for extracting artist and title from video titles.
|
||||
|
||||
This module handles the different title formats used by various karaoke channels,
|
||||
providing channel-specific parsing rules to extract artist and title information
|
||||
correctly for ID3 tagging and filename generation.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class ChannelParser:
|
||||
"""Handles channel-specific parsing of video titles to extract artist and title."""
|
||||
|
||||
def __init__(self, channels_file: str = "data/channels.json"):
|
||||
"""Initialize the parser with channel configuration."""
|
||||
self.channels_file = Path(channels_file)
|
||||
self.channels_config = self._load_channels_config()
|
||||
|
||||
def _load_channels_config(self) -> Dict[str, Any]:
|
||||
"""Load the channels configuration from JSON file."""
|
||||
if not self.channels_file.exists():
|
||||
raise FileNotFoundError(f"Channels configuration file not found: {self.channels_file}")
|
||||
|
||||
with open(self.channels_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
def get_channel_config(self, channel_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get the configuration for a specific channel."""
|
||||
for channel in self.channels_config.get("channels", []):
|
||||
if channel["name"] == channel_name:
|
||||
return channel
|
||||
return None
|
||||
|
||||
def extract_artist_title(self, video_title: str, channel_name: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Extract artist and title from a video title using channel-specific parsing rules.
|
||||
|
||||
Args:
|
||||
video_title: The full video title from YouTube
|
||||
channel_name: The name of the channel (must match config)
|
||||
|
||||
Returns:
|
||||
Tuple of (artist, title) - both may be empty strings if parsing fails
|
||||
"""
|
||||
channel_config = self.get_channel_config(channel_name)
|
||||
if not channel_config:
|
||||
# Fallback to global settings
|
||||
return self._fallback_parse(video_title)
|
||||
|
||||
parsing_rules = channel_config.get("parsing_rules", {})
|
||||
format_type = parsing_rules.get("format", "artist_title_separator")
|
||||
|
||||
if format_type == "artist_title_separator":
|
||||
return self._parse_artist_title_separator(video_title, parsing_rules)
|
||||
elif format_type == "artist_title_spaces":
|
||||
return self._parse_artist_title_spaces(video_title, parsing_rules)
|
||||
elif format_type == "title_artist_pipe":
|
||||
return self._parse_title_artist_pipe(video_title, parsing_rules)
|
||||
else:
|
||||
return self._fallback_parse(video_title)
|
||||
|
||||
def _parse_artist_title_separator(self, video_title: str, rules: Dict[str, Any]) -> Tuple[str, str]:
|
||||
"""Parse format: 'Artist - Title' or 'Title - Artist'."""
|
||||
separator = rules.get("separator", " - ")
|
||||
artist_first = rules.get("artist_first", True)
|
||||
|
||||
if separator not in video_title:
|
||||
return "", video_title.strip()
|
||||
|
||||
parts = video_title.split(separator, 1)
|
||||
if len(parts) != 2:
|
||||
return "", video_title.strip()
|
||||
|
||||
part1, part2 = parts[0].strip(), parts[1].strip()
|
||||
|
||||
# Apply cleanup to both parts
|
||||
part1_clean = self._cleanup_title(part1, rules.get("title_cleanup", {}))
|
||||
part2_clean = self._cleanup_title(part2, rules.get("title_cleanup", {}))
|
||||
|
||||
if artist_first:
|
||||
return part1_clean, part2_clean
|
||||
else:
|
||||
return part2_clean, part1_clean
|
||||
|
||||
def _parse_artist_title_spaces(self, video_title: str, rules: Dict[str, Any]) -> Tuple[str, str]:
|
||||
"""Parse format: 'Artist Title' (multiple spaces)."""
|
||||
separator = rules.get("separator", " ")
|
||||
multi_artist_sep = rules.get("multi_artist_separator", ", ")
|
||||
|
||||
# Try multiple space patterns to handle inconsistent spacing
|
||||
# Look for the LAST occurrence of multiple spaces to handle cases with commas
|
||||
space_patterns = [" ", " ", " "] # 3, 2, 4 spaces
|
||||
|
||||
for pattern in space_patterns:
|
||||
if pattern in video_title:
|
||||
# Split on the LAST occurrence of the pattern
|
||||
last_index = video_title.rfind(pattern)
|
||||
if last_index != -1:
|
||||
artist_part = video_title[:last_index].strip()
|
||||
title_part = video_title[last_index + len(pattern):].strip()
|
||||
|
||||
# Handle multiple artists (e.g., "Artist1, Artist2")
|
||||
if multi_artist_sep in artist_part:
|
||||
# Keep the full artist string as is
|
||||
artist = artist_part
|
||||
else:
|
||||
artist = artist_part
|
||||
|
||||
title = self._cleanup_title(title_part, rules.get("title_cleanup", {}))
|
||||
|
||||
return artist, title
|
||||
|
||||
# Try dash patterns as fallback for inconsistent formatting
|
||||
dash_patterns = [" - ", " – ", " -"] # Regular dash, en dash, dash without trailing space
|
||||
|
||||
for pattern in dash_patterns:
|
||||
if pattern in video_title:
|
||||
# Split on the LAST occurrence of the pattern
|
||||
last_index = video_title.rfind(pattern)
|
||||
if last_index != -1:
|
||||
artist_part = video_title[:last_index].strip()
|
||||
title_part = video_title[last_index + len(pattern):].strip()
|
||||
|
||||
# Handle multiple artists (e.g., "Artist1, Artist2")
|
||||
if multi_artist_sep in artist_part:
|
||||
# Keep the full artist string as is
|
||||
artist = artist_part
|
||||
else:
|
||||
artist = artist_part
|
||||
|
||||
title = self._cleanup_title(title_part, rules.get("title_cleanup", {}))
|
||||
|
||||
return artist, title
|
||||
|
||||
# If no pattern matches, return empty artist and full title
|
||||
return "", video_title.strip()
|
||||
|
||||
def _parse_title_artist_pipe(self, video_title: str, rules: Dict[str, Any]) -> Tuple[str, str]:
|
||||
"""Parse format: 'Title | Artist'."""
|
||||
separator = rules.get("separator", " | ")
|
||||
|
||||
if separator not in video_title:
|
||||
return "", video_title.strip()
|
||||
|
||||
parts = video_title.split(separator, 1)
|
||||
if len(parts) != 2:
|
||||
return "", video_title.strip()
|
||||
|
||||
title_part, artist_part = parts[0].strip(), parts[1].strip()
|
||||
|
||||
title = self._cleanup_title(title_part, rules.get("title_cleanup", {}))
|
||||
artist = self._cleanup_title(artist_part, rules.get("artist_cleanup", {}))
|
||||
|
||||
return artist, title
|
||||
|
||||
def _cleanup_title(self, text: str, cleanup_rules: Dict[str, Any]) -> str:
|
||||
"""Apply cleanup rules to remove suffixes and normalize text."""
|
||||
if not cleanup_rules:
|
||||
return text.strip()
|
||||
|
||||
cleaned = text.strip()
|
||||
|
||||
# Handle remove_suffix rule
|
||||
if "remove_suffix" in cleanup_rules:
|
||||
suffixes = cleanup_rules["remove_suffix"].get("suffixes", [])
|
||||
for suffix in suffixes:
|
||||
if cleaned.endswith(suffix):
|
||||
cleaned = cleaned[:-len(suffix)].strip()
|
||||
break
|
||||
|
||||
return cleaned
|
||||
|
||||
def _fallback_parse(self, video_title: str) -> Tuple[str, str]:
|
||||
"""Fallback parsing using global settings."""
|
||||
global_settings = self.channels_config.get("global_parsing_settings", {})
|
||||
fallback_format = global_settings.get("fallback_format", "artist_title_separator")
|
||||
fallback_separator = global_settings.get("fallback_separator", " - ")
|
||||
|
||||
if fallback_format == "artist_title_separator":
|
||||
if fallback_separator in video_title:
|
||||
parts = video_title.split(fallback_separator, 1)
|
||||
if len(parts) == 2:
|
||||
artist = parts[0].strip()
|
||||
title = parts[1].strip()
|
||||
# Apply global suffix cleanup
|
||||
for suffix in global_settings.get("common_suffixes", []):
|
||||
if title.endswith(suffix):
|
||||
title = title[:-len(suffix)].strip()
|
||||
break
|
||||
return artist, title
|
||||
|
||||
# If all else fails, return empty artist and full title
|
||||
return "", video_title.strip()
|
||||
|
||||
def is_playlist_title(self, video_title: str, channel_name: str) -> bool:
|
||||
"""Check if a video title appears to be a playlist rather than a single song."""
|
||||
channel_config = self.get_channel_config(channel_name)
|
||||
if not channel_config:
|
||||
return self._is_playlist_by_global_rules(video_title)
|
||||
|
||||
parsing_rules = channel_config.get("parsing_rules", {})
|
||||
playlist_indicators = parsing_rules.get("playlist_indicators", [])
|
||||
|
||||
if not playlist_indicators:
|
||||
return self._is_playlist_by_global_rules(video_title)
|
||||
|
||||
title_upper = video_title.upper()
|
||||
for indicator in playlist_indicators:
|
||||
if indicator.upper() in title_upper:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _is_playlist_by_global_rules(self, video_title: str) -> bool:
|
||||
"""Check if title is a playlist using global rules."""
|
||||
global_settings = self.channels_config.get("global_parsing_settings", {})
|
||||
playlist_indicators = global_settings.get("playlist_indicators", [])
|
||||
|
||||
title_upper = video_title.upper()
|
||||
for indicator in playlist_indicators:
|
||||
if indicator.upper() in title_upper:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_all_channel_names(self) -> List[str]:
|
||||
"""Get a list of all configured channel names."""
|
||||
return [channel["name"] for channel in self.channels_config.get("channels", [])]
|
||||
|
||||
def get_channel_url(self, channel_name: str) -> Optional[str]:
|
||||
"""Get the URL for a specific channel."""
|
||||
channel_config = self.get_channel_config(channel_name)
|
||||
return channel_config.get("url") if channel_config else None
|
||||
|
||||
|
||||
# Convenience function for backward compatibility
|
||||
def extract_artist_title(video_title: str, channel_name: str, channels_file: str = "data/channels.json") -> Tuple[str, str]:
|
||||
"""
|
||||
Convenience function to extract artist and title from a video title.
|
||||
|
||||
Args:
|
||||
video_title: The full video title from YouTube
|
||||
channel_name: The name of the channel
|
||||
channels_file: Path to the channels configuration file
|
||||
|
||||
Returns:
|
||||
Tuple of (artist, title)
|
||||
"""
|
||||
parser = ChannelParser(channels_file)
|
||||
return parser.extract_artist_title(video_title, channel_name)
|
||||
@ -1,17 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Karaoke Video Downloader CLI
|
||||
Command-line interface for the karaoke video downloader.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
from pathlib import Path
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
from karaoke_downloader.channel_parser import ChannelParser
|
||||
from karaoke_downloader.config_manager import AppConfig
|
||||
from karaoke_downloader.downloader import KaraokeDownloader
|
||||
|
||||
# Constants
|
||||
DEFAULT_LATEST_PER_CHANNEL_LIMIT = 10
|
||||
DEFAULT_FUZZY_THRESHOLD = 85
|
||||
DEFAULT_LATEST_PER_CHANNEL_LIMIT = 5
|
||||
DEFAULT_DISPLAY_LIMIT = 10
|
||||
DEFAULT_CACHE_DURATION_HOURS = 24
|
||||
|
||||
|
||||
def load_channels_from_json(channels_file: str = "data/channels.json") -> List[str]:
|
||||
"""
|
||||
Load channel URLs from the new JSON format.
|
||||
|
||||
Args:
|
||||
channels_file: Path to the channels.json file
|
||||
|
||||
Returns:
|
||||
List of channel URLs
|
||||
"""
|
||||
try:
|
||||
parser = ChannelParser(channels_file)
|
||||
channels = parser.channels_config.get("channels", [])
|
||||
return [channel["url"] for channel in channels]
|
||||
except Exception as e:
|
||||
print(f"❌ Error loading channels from {channels_file}: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def load_channels_from_text(channels_file: str = "data/channels.txt") -> List[str]:
|
||||
"""
|
||||
Load channel URLs from the old text format (for backward compatibility).
|
||||
|
||||
Args:
|
||||
channels_file: Path to the channels.txt file
|
||||
|
||||
Returns:
|
||||
List of channel URLs
|
||||
"""
|
||||
try:
|
||||
with open(channels_file, "r", encoding="utf-8") as f:
|
||||
return [
|
||||
line.strip()
|
||||
for line in f
|
||||
if line.strip() and not line.strip().startswith("#")
|
||||
]
|
||||
except Exception as e:
|
||||
print(f"❌ Error loading channels from {channels_file}: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def load_channels(channel_file: str = None) -> List[str]:
|
||||
"""
|
||||
Load channel URLs from either JSON or text format.
|
||||
|
||||
Args:
|
||||
channel_file: Path to the channel file (optional)
|
||||
|
||||
Returns:
|
||||
List of channel URLs
|
||||
"""
|
||||
if channel_file:
|
||||
# Use the specified file
|
||||
if channel_file.endswith('.json'):
|
||||
return load_channels_from_json(channel_file)
|
||||
else:
|
||||
return load_channels_from_text(channel_file)
|
||||
else:
|
||||
# Try JSON first, then fall back to text
|
||||
json_file = "data/channels.json"
|
||||
txt_file = "data/channels.txt"
|
||||
|
||||
if os.path.exists(json_file):
|
||||
print(f"📋 Using new JSON format: {json_file}")
|
||||
return load_channels_from_json(json_file)
|
||||
elif os.path.exists(txt_file):
|
||||
print(f"📋 Using legacy text format: {txt_file}")
|
||||
return load_channels_from_text(txt_file)
|
||||
else:
|
||||
print("❌ No channel file found. Please create data/channels.json or data/channels.txt")
|
||||
return []
|
||||
|
||||
|
||||
def main():
|
||||
@ -282,17 +360,16 @@ Examples:
|
||||
sys.exit(0)
|
||||
# --- END NEW ---
|
||||
|
||||
# --- NEW: If no URL or file is provided, but --songlist-only is set, use all channels in data/channels.txt ---
|
||||
# --- NEW: If no URL or file is provided, but --songlist-only is set, use all channels ---
|
||||
if (args.songlist_only or args.songlist_focus) and not args.url and not args.file:
|
||||
channels_file = Path("data/channels.txt")
|
||||
if channels_file.exists():
|
||||
args.file = str(channels_file)
|
||||
channel_urls = load_channels()
|
||||
if channel_urls:
|
||||
print(
|
||||
"📋 No URL or --file provided, defaulting to all channels in data/channels.txt for songlist mode."
|
||||
"📋 No URL or --file provided, defaulting to all configured channels for songlist mode."
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"❌ No URL, --file, or data/channels.txt found. Please provide a channel URL or a file with channel URLs."
|
||||
"❌ No URL, --file, or channel configuration found. Please provide a channel URL or create data/channels.json."
|
||||
)
|
||||
sys.exit(1)
|
||||
# --- END NEW ---
|
||||
@ -388,17 +465,11 @@ Examples:
|
||||
print(f" ... and {len(tracking) - 10} more")
|
||||
sys.exit(0)
|
||||
elif args.songlist_only or args.songlist_focus:
|
||||
# Use provided file or default to data/channels.txt
|
||||
channel_file = args.file if args.file else "data/channels.txt"
|
||||
if not os.path.exists(channel_file):
|
||||
print(f"❌ Channel file not found: {channel_file}")
|
||||
# Use provided file or default to channels configuration
|
||||
channel_urls = load_channels(args.file)
|
||||
if not channel_urls:
|
||||
print(f"❌ No channels found in configuration")
|
||||
sys.exit(1)
|
||||
with open(channel_file, "r", encoding="utf-8") as f:
|
||||
channel_urls = [
|
||||
line.strip()
|
||||
for line in f
|
||||
if line.strip() and not line.strip().startswith("#")
|
||||
]
|
||||
limit = args.limit if args.limit else None
|
||||
success = downloader.download_songlist_across_channels(
|
||||
channel_urls,
|
||||
@ -412,17 +483,11 @@ Examples:
|
||||
max_channel_workers=args.channel_workers,
|
||||
)
|
||||
elif args.latest_per_channel:
|
||||
# Use provided file or default to data/channels.txt
|
||||
channel_file = args.file if args.file else "data/channels.txt"
|
||||
if not os.path.exists(channel_file):
|
||||
print(f"❌ Channel file not found: {channel_file}")
|
||||
# Use provided file or default to channels configuration
|
||||
channel_urls = load_channels(args.file)
|
||||
if not channel_urls:
|
||||
print(f"❌ No channels found in configuration")
|
||||
sys.exit(1)
|
||||
with open(channel_file, "r", encoding="utf-8") as f:
|
||||
channel_urls = [
|
||||
line.strip()
|
||||
for line in f
|
||||
if line.strip() and not line.strip().startswith("#")
|
||||
]
|
||||
limit = args.limit if args.limit else DEFAULT_LATEST_PER_CHANNEL_LIMIT
|
||||
force_refresh_download_plan = (
|
||||
args.force_download_plan if hasattr(args, "force_download_plan") else False
|
||||
@ -448,17 +513,11 @@ Examples:
|
||||
else:
|
||||
# Default behavior: download from channels (equivalent to --latest-per-channel)
|
||||
print("🎯 No specific mode specified, defaulting to download from channels")
|
||||
channel_file = args.file if args.file else "data/channels.txt"
|
||||
if not os.path.exists(channel_file):
|
||||
print(f"❌ Channel file not found: {channel_file}")
|
||||
print("Please provide a channel URL or ensure data/channels.txt exists")
|
||||
channel_urls = load_channels(args.file)
|
||||
if not channel_urls:
|
||||
print(f"❌ No channels found in configuration")
|
||||
print("Please provide a channel URL or create data/channels.json")
|
||||
sys.exit(1)
|
||||
with open(channel_file, "r", encoding="utf-8") as f:
|
||||
channel_urls = [
|
||||
line.strip()
|
||||
for line in f
|
||||
if line.strip() and not line.strip().startswith("#")
|
||||
]
|
||||
limit = args.limit if args.limit else DEFAULT_LATEST_PER_CHANNEL_LIMIT
|
||||
force_refresh_download_plan = (
|
||||
args.force_download_plan if hasattr(args, "force_download_plan") else False
|
||||
|
||||
@ -297,9 +297,10 @@ class DownloadPipeline:
|
||||
video_title = video.get("title", "")
|
||||
|
||||
# Extract artist and title from video title
|
||||
from karaoke_downloader.id3_utils import extract_artist_title
|
||||
from karaoke_downloader.channel_parser import ChannelParser
|
||||
|
||||
artist, title = extract_artist_title(video_title)
|
||||
channel_parser = ChannelParser()
|
||||
artist, title = channel_parser.extract_artist_title(video_title, channel_name)
|
||||
|
||||
print(f" ({i}/{total}) Processing: {artist} - {title}")
|
||||
|
||||
|
||||
@ -17,17 +17,16 @@ from karaoke_downloader.cache_manager import (
|
||||
load_cached_plan,
|
||||
save_plan_cache,
|
||||
)
|
||||
# Import all fuzzy matching functions including the enhanced extract_artist_title
|
||||
# This ensures consistent parsing across all modules and supports multiple video title formats
|
||||
# Import all fuzzy matching functions
|
||||
from karaoke_downloader.fuzzy_matcher import (
|
||||
create_song_key,
|
||||
create_video_key,
|
||||
extract_artist_title,
|
||||
get_similarity_function,
|
||||
is_exact_match,
|
||||
is_fuzzy_match,
|
||||
normalize_title,
|
||||
)
|
||||
from karaoke_downloader.channel_parser import ChannelParser
|
||||
from karaoke_downloader.youtube_utils import get_channel_info
|
||||
|
||||
# Constants
|
||||
@ -127,10 +126,11 @@ def _scan_channel_for_matches(
|
||||
video_matches = []
|
||||
|
||||
# Pre-process video titles for efficient matching
|
||||
channel_parser = ChannelParser()
|
||||
if fuzzy_match:
|
||||
# For fuzzy matching, create normalized video keys
|
||||
for video in available_videos:
|
||||
v_artist, v_title = extract_artist_title(video["title"])
|
||||
v_artist, v_title = channel_parser.extract_artist_title(video["title"], channel_name)
|
||||
video_key = create_song_key(v_artist, v_title)
|
||||
|
||||
# Find best match among remaining songs
|
||||
@ -162,7 +162,7 @@ def _scan_channel_for_matches(
|
||||
else:
|
||||
# For exact matching, use direct key comparison
|
||||
for video in available_videos:
|
||||
v_artist, v_title = extract_artist_title(video["title"])
|
||||
v_artist, v_title = channel_parser.extract_artist_title(video["title"], channel_name)
|
||||
video_key = create_song_key(v_artist, v_title)
|
||||
|
||||
if video_key in song_keys:
|
||||
@ -241,10 +241,11 @@ def build_download_plan(
|
||||
video_matches = []
|
||||
|
||||
# Pre-process video titles for efficient matching
|
||||
channel_parser = ChannelParser()
|
||||
if fuzzy_match:
|
||||
# For fuzzy matching, create normalized video keys
|
||||
for video in available_videos:
|
||||
v_artist, v_title = extract_artist_title(video["title"])
|
||||
v_artist, v_title = channel_parser.extract_artist_title(video["title"], channel_name)
|
||||
video_key = create_song_key(v_artist, v_title)
|
||||
|
||||
# Find best match among remaining songs (thread-safe)
|
||||
@ -283,7 +284,7 @@ def build_download_plan(
|
||||
else:
|
||||
# For exact matching, use direct key comparison
|
||||
for video in available_videos:
|
||||
v_artist, v_title = extract_artist_title(video["title"])
|
||||
v_artist, v_title = channel_parser.extract_artist_title(video["title"], channel_name)
|
||||
video_key = create_song_key(v_artist, v_title)
|
||||
|
||||
with song_lookup_lock:
|
||||
@ -345,10 +346,11 @@ def build_download_plan(
|
||||
video_matches = [] # Initialize video_matches for this channel
|
||||
|
||||
# Pre-process video titles for efficient matching
|
||||
channel_parser = ChannelParser()
|
||||
if fuzzy_match:
|
||||
# For fuzzy matching, create normalized video keys
|
||||
for video in available_videos:
|
||||
v_artist, v_title = extract_artist_title(video["title"])
|
||||
v_artist, v_title = channel_parser.extract_artist_title(video["title"], channel_name)
|
||||
video_key = create_song_key(v_artist, v_title)
|
||||
|
||||
# Find best match among remaining songs
|
||||
@ -381,7 +383,7 @@ def build_download_plan(
|
||||
else:
|
||||
# For exact matching, use direct key comparison
|
||||
for video in available_videos:
|
||||
v_artist, v_title = extract_artist_title(video["title"])
|
||||
v_artist, v_title = channel_parser.extract_artist_title(video["title"], channel_name)
|
||||
video_key = create_song_key(v_artist, v_title)
|
||||
|
||||
if video_key in song_keys:
|
||||
|
||||
@ -32,7 +32,8 @@ from karaoke_downloader.fuzzy_matcher import (
|
||||
is_exact_match,
|
||||
is_fuzzy_match,
|
||||
)
|
||||
from karaoke_downloader.id3_utils import add_id3_tags, extract_artist_title
|
||||
from karaoke_downloader.id3_utils import add_id3_tags
|
||||
from karaoke_downloader.channel_parser import ChannelParser
|
||||
from karaoke_downloader.server_manager import (
|
||||
check_and_mark_server_duplicate,
|
||||
is_song_marked_as_server_duplicate,
|
||||
@ -105,6 +106,9 @@ class KaraokeDownloader:
|
||||
# Load server songs for availability checking
|
||||
self.server_songs = load_server_songs()
|
||||
|
||||
# Initialize channel parser for title parsing
|
||||
self.channel_parser = ChannelParser()
|
||||
|
||||
# Parallel download settings
|
||||
self.enable_parallel_downloads = False
|
||||
self.parallel_workers = 3
|
||||
@ -220,7 +224,7 @@ class KaraokeDownloader:
|
||||
matches = []
|
||||
similarity = get_similarity_function()
|
||||
for video in available_videos:
|
||||
artist, title = extract_artist_title(video["title"])
|
||||
artist, title = self.channel_parser.extract_artist_title(video["title"], channel_name)
|
||||
key = create_song_key(artist, title)
|
||||
if fuzzy_match:
|
||||
# Fuzzy match against all songlist keys
|
||||
@ -702,7 +706,7 @@ class KaraokeDownloader:
|
||||
)
|
||||
|
||||
# Extract artist and title for tracking
|
||||
artist, title_clean = extract_artist_title(title)
|
||||
artist, title_clean = self.channel_parser.extract_artist_title(title, channel_name)
|
||||
|
||||
task = DownloadTask(
|
||||
video_id=video_id,
|
||||
@ -810,7 +814,7 @@ class KaraokeDownloader:
|
||||
)
|
||||
filename = f"{channel_name} - {safe_title}.mp4"
|
||||
# Extract artist and title for tracking
|
||||
artist, title_clean = extract_artist_title(title)
|
||||
artist, title_clean = self.channel_parser.extract_artist_title(title, channel_name)
|
||||
|
||||
print(
|
||||
f" ({v_idx+1}/{len(videos)}) Processing: {artist} - {title_clean}"
|
||||
@ -942,7 +946,7 @@ class KaraokeDownloader:
|
||||
# Pre-filter videos to exclude known duplicates before processing
|
||||
pre_filtered_videos = []
|
||||
for video in available_videos:
|
||||
artist, title = extract_artist_title(video["title"])
|
||||
artist, title = self.channel_parser.extract_artist_title(video["title"], channel_name)
|
||||
song_key = create_song_key(artist, title)
|
||||
if song_key not in known_duplicate_keys:
|
||||
pre_filtered_videos.append(video)
|
||||
@ -960,7 +964,7 @@ class KaraokeDownloader:
|
||||
break # We have enough videos for this channel
|
||||
|
||||
videos_checked += 1
|
||||
artist, title = extract_artist_title(video["title"])
|
||||
artist, title = self.channel_parser.extract_artist_title(video["title"], channel_name)
|
||||
|
||||
# Check if should skip this song during planning phase
|
||||
should_skip, reason, filtered_count = self._should_skip_song(
|
||||
|
||||
@ -54,11 +54,18 @@ def sanitize_filename(
|
||||
)
|
||||
safe_artist = safe_artist.strip()
|
||||
|
||||
# Create filename
|
||||
# Create filename - handle empty artist case
|
||||
if not safe_artist or safe_artist.strip() == "":
|
||||
# If no artist, just use the title
|
||||
filename = f"{safe_title}.mp4"
|
||||
else:
|
||||
filename = f"{safe_artist} - {safe_title}.mp4"
|
||||
|
||||
# Limit filename length if needed
|
||||
if len(filename) > max_length:
|
||||
if not safe_artist or safe_artist.strip() == "":
|
||||
filename = f"{safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4"
|
||||
else:
|
||||
filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4"
|
||||
|
||||
return filename
|
||||
@ -81,6 +88,14 @@ def generate_possible_filenames(
|
||||
safe_title = sanitize_title_for_filenames(title)
|
||||
safe_artist = artist.replace("'", "").replace('"', "").strip()
|
||||
|
||||
# Handle empty artist case
|
||||
if not safe_artist or safe_artist.strip() == "":
|
||||
return [
|
||||
f"{safe_title}.mp4", # Songlist mode (no artist)
|
||||
f"{channel_name} - {safe_title}.mp4", # Latest-per-channel mode
|
||||
f"{safe_title} (Karaoke Version).mp4", # Channel videos mode (no artist)
|
||||
]
|
||||
else:
|
||||
return [
|
||||
f"{safe_artist} - {safe_title}.mp4", # Songlist mode
|
||||
f"{channel_name} - {safe_title}.mp4", # Latest-per-channel mode
|
||||
@ -131,6 +146,9 @@ def check_file_exists_with_patterns(
|
||||
# Apply length limits if needed
|
||||
safe_artist = artist.replace("'", "").replace('"', "").strip()
|
||||
safe_title = sanitize_title_for_filenames(title)
|
||||
if not safe_artist or safe_artist.strip() == "":
|
||||
filename = f"{safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4"
|
||||
else:
|
||||
filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4"
|
||||
|
||||
# Check for exact filename match
|
||||
|
||||
Loading…
Reference in New Issue
Block a user