KaraokeVideoDownloader/karaoke_downloader/file_utils.py

182 lines
5.5 KiB
Python

"""
File utilities for filename sanitization, path operations, and file validation.
Centralizes common file operations to eliminate code duplication.
"""
import re
from pathlib import Path
from typing import List, Optional, Tuple
# Constants for filename operations
DEFAULT_FILENAME_LENGTH_LIMIT = 100
DEFAULT_ARTIST_LENGTH_LIMIT = 30
DEFAULT_TITLE_LENGTH_LIMIT = 60
# Windows invalid characters
INVALID_FILENAME_CHARS = ['?', ':', '*', '"', '<', '>', '|', '/', '\\']
def sanitize_filename(artist: str, title: str, max_length: int = DEFAULT_FILENAME_LENGTH_LIMIT) -> str:
"""
Create a safe filename from artist and title.
Args:
artist: Song artist name
title: Song title
max_length: Maximum filename length (default: 100)
Returns:
Sanitized filename string
"""
# Clean up title
safe_title = title.replace("(From ", "").replace(")", "").replace(" - ", " ").replace(":", "")
safe_title = safe_title.replace("'", "").replace('"', "")
# Clean up artist
safe_artist = artist.replace("'", "").replace('"', "").strip()
# Remove invalid characters
for char in INVALID_FILENAME_CHARS:
safe_title = safe_title.replace(char, "")
safe_artist = safe_artist.replace(char, "")
# Remove problematic patterns
safe_title = safe_title.replace("...", "").replace("..", "").replace(".", "").strip()
safe_artist = safe_artist.strip()
# Create filename
filename = f"{safe_artist} - {safe_title}.mp4"
# Limit filename length if needed
if len(filename) > max_length:
filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4"
return filename
def generate_possible_filenames(artist: str, title: str, channel_name: str) -> List[str]:
"""
Generate possible filename patterns for different download modes.
Args:
artist: Song artist name
title: Song title
channel_name: Channel name
Returns:
List of possible filename patterns
"""
safe_title = sanitize_title_for_filenames(title)
safe_artist = artist.replace("'", "").replace('"', "").strip()
return [
f"{safe_artist} - {safe_title}.mp4", # Songlist mode
f"{channel_name} - {safe_title}.mp4", # Latest-per-channel mode
f"{safe_artist} - {safe_title} (Karaoke Version).mp4" # Channel videos mode
]
def sanitize_title_for_filenames(title: str) -> str:
"""
Sanitize title specifically for filename generation.
Args:
title: Song title
Returns:
Sanitized title string
"""
safe_title = title
for char in INVALID_FILENAME_CHARS:
safe_title = safe_title.replace(char, "")
safe_title = safe_title.replace("...", "").replace("..", "").replace(".", "").strip()
return safe_title
def check_file_exists_with_patterns(
downloads_dir: Path,
channel_name: str,
artist: str,
title: str
) -> Tuple[bool, Optional[Path]]:
"""
Check if a file exists using multiple possible filename patterns.
Args:
downloads_dir: Base downloads directory
channel_name: Channel name
artist: Song artist
title: Song title
Returns:
Tuple of (exists, file_path) where file_path is None if not found
"""
possible_filenames = generate_possible_filenames(artist, title, channel_name)
channel_dir = downloads_dir / channel_name
for filename in possible_filenames:
if len(filename) > DEFAULT_FILENAME_LENGTH_LIMIT:
# Apply length limits if needed
safe_artist = artist.replace("'", "").replace('"', "").strip()
safe_title = sanitize_title_for_filenames(title)
filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4"
file_path = channel_dir / filename
if file_path.exists() and file_path.stat().st_size > 0:
return True, file_path
return False, None
def ensure_directory_exists(directory: Path) -> None:
"""
Ensure a directory exists, creating it if necessary.
Args:
directory: Directory path to ensure exists
"""
directory.mkdir(parents=True, exist_ok=True)
def is_valid_mp4_file(file_path: Path) -> bool:
"""
Check if a file is a valid MP4 file.
Args:
file_path: Path to the file to check
Returns:
True if file is a valid MP4, False otherwise
"""
if not file_path.exists():
return False
# Check file size
if file_path.stat().st_size == 0:
return False
# Check file extension
if file_path.suffix.lower() != '.mp4':
return False
# Basic MP4 header check (first 4 bytes should be 'ftyp')
try:
with open(file_path, 'rb') as f:
header = f.read(8)
if len(header) >= 8 and header[4:8] == b'ftyp':
return True
except (IOError, OSError):
pass
return False
def cleanup_temp_files(file_path: Path) -> None:
"""
Clean up temporary files created by yt-dlp.
Args:
file_path: Base file path (without extension)
"""
temp_extensions = ['.info.json', '.meta', '.webp', '.jpg', '.png']
for ext in temp_extensions:
temp_file = file_path.with_suffix(ext)
if temp_file.exists():
try:
temp_file.unlink()
except (IOError, OSError):
pass # Ignore cleanup errors