201 lines
5.5 KiB
Python
201 lines
5.5 KiB
Python
"""
|
|
File utilities for filename sanitization, path operations, and file validation.
|
|
Centralizes common file operations to eliminate code duplication.
|
|
"""
|
|
|
|
import re
|
|
|
|
from pathlib import Path
|
|
from typing import List, Optional, Tuple
|
|
|
|
# Constants for filename operations
|
|
DEFAULT_FILENAME_LENGTH_LIMIT = 100
|
|
DEFAULT_ARTIST_LENGTH_LIMIT = 30
|
|
DEFAULT_TITLE_LENGTH_LIMIT = 60
|
|
|
|
# Windows invalid characters
|
|
INVALID_FILENAME_CHARS = ["?", ":", "*", '"', "<", ">", "|", "/", "\\"]
|
|
|
|
|
|
def sanitize_filename(
|
|
artist: str, title: str, max_length: int = DEFAULT_FILENAME_LENGTH_LIMIT
|
|
) -> str:
|
|
"""
|
|
Create a safe filename from artist and title.
|
|
|
|
Args:
|
|
artist: Song artist name
|
|
title: Song title
|
|
max_length: Maximum filename length (default: 100)
|
|
|
|
Returns:
|
|
Sanitized filename string
|
|
"""
|
|
# Clean up title
|
|
safe_title = (
|
|
title.replace("(From ", "")
|
|
.replace(")", "")
|
|
.replace(" - ", " ")
|
|
.replace(":", "")
|
|
)
|
|
safe_title = safe_title.replace("'", "").replace('"', "")
|
|
|
|
# Clean up artist
|
|
safe_artist = artist.replace("'", "").replace('"', "").strip()
|
|
|
|
# Remove invalid characters
|
|
for char in INVALID_FILENAME_CHARS:
|
|
safe_title = safe_title.replace(char, "")
|
|
safe_artist = safe_artist.replace(char, "")
|
|
|
|
# Remove problematic patterns
|
|
safe_title = (
|
|
safe_title.replace("...", "").replace("..", "").replace(".", "").strip()
|
|
)
|
|
safe_artist = safe_artist.strip()
|
|
|
|
# Create filename
|
|
filename = f"{safe_artist} - {safe_title}.mp4"
|
|
|
|
# Limit filename length if needed
|
|
if len(filename) > max_length:
|
|
filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4"
|
|
|
|
return filename
|
|
|
|
|
|
def generate_possible_filenames(
|
|
artist: str, title: str, channel_name: str
|
|
) -> List[str]:
|
|
"""
|
|
Generate possible filename patterns for different download modes.
|
|
|
|
Args:
|
|
artist: Song artist name
|
|
title: Song title
|
|
channel_name: Channel name
|
|
|
|
Returns:
|
|
List of possible filename patterns
|
|
"""
|
|
safe_title = sanitize_title_for_filenames(title)
|
|
safe_artist = artist.replace("'", "").replace('"', "").strip()
|
|
|
|
return [
|
|
f"{safe_artist} - {safe_title}.mp4", # Songlist mode
|
|
f"{channel_name} - {safe_title}.mp4", # Latest-per-channel mode
|
|
f"{safe_artist} - {safe_title} (Karaoke Version).mp4", # Channel videos mode
|
|
]
|
|
|
|
|
|
def sanitize_title_for_filenames(title: str) -> str:
|
|
"""
|
|
Sanitize title specifically for filename generation.
|
|
|
|
Args:
|
|
title: Song title
|
|
|
|
Returns:
|
|
Sanitized title string
|
|
"""
|
|
safe_title = title
|
|
for char in INVALID_FILENAME_CHARS:
|
|
safe_title = safe_title.replace(char, "")
|
|
safe_title = (
|
|
safe_title.replace("...", "").replace("..", "").replace(".", "").strip()
|
|
)
|
|
return safe_title
|
|
|
|
|
|
def check_file_exists_with_patterns(
|
|
downloads_dir: Path, channel_name: str, artist: str, title: str
|
|
) -> Tuple[bool, Optional[Path]]:
|
|
"""
|
|
Check if a file exists using multiple possible filename patterns.
|
|
|
|
Args:
|
|
downloads_dir: Base downloads directory
|
|
channel_name: Channel name
|
|
artist: Song artist
|
|
title: Song title
|
|
|
|
Returns:
|
|
Tuple of (exists, file_path) where file_path is None if not found
|
|
"""
|
|
possible_filenames = generate_possible_filenames(artist, title, channel_name)
|
|
channel_dir = downloads_dir / channel_name
|
|
|
|
for filename in possible_filenames:
|
|
if len(filename) > DEFAULT_FILENAME_LENGTH_LIMIT:
|
|
# Apply length limits if needed
|
|
safe_artist = artist.replace("'", "").replace('"', "").strip()
|
|
safe_title = sanitize_title_for_filenames(title)
|
|
filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4"
|
|
|
|
file_path = channel_dir / filename
|
|
if file_path.exists() and file_path.stat().st_size > 0:
|
|
return True, file_path
|
|
|
|
return False, None
|
|
|
|
|
|
def ensure_directory_exists(directory: Path) -> None:
|
|
"""
|
|
Ensure a directory exists, creating it if necessary.
|
|
|
|
Args:
|
|
directory: Directory path to ensure exists
|
|
"""
|
|
directory.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
def is_valid_mp4_file(file_path: Path) -> bool:
|
|
"""
|
|
Check if a file is a valid MP4 file.
|
|
|
|
Args:
|
|
file_path: Path to the file to check
|
|
|
|
Returns:
|
|
True if file is a valid MP4, False otherwise
|
|
"""
|
|
if not file_path.exists():
|
|
return False
|
|
|
|
# Check file size
|
|
if file_path.stat().st_size == 0:
|
|
return False
|
|
|
|
# Check file extension
|
|
if file_path.suffix.lower() != ".mp4":
|
|
return False
|
|
|
|
# Basic MP4 header check (first 4 bytes should be 'ftyp')
|
|
try:
|
|
with open(file_path, "rb") as f:
|
|
header = f.read(8)
|
|
if len(header) >= 8 and header[4:8] == b"ftyp":
|
|
return True
|
|
except (IOError, OSError):
|
|
pass
|
|
|
|
return False
|
|
|
|
|
|
def cleanup_temp_files(file_path: Path) -> None:
|
|
"""
|
|
Clean up temporary files created by yt-dlp.
|
|
|
|
Args:
|
|
file_path: Base file path (without extension)
|
|
"""
|
|
temp_extensions = [".info.json", ".meta", ".webp", ".jpg", ".png"]
|
|
|
|
for ext in temp_extensions:
|
|
temp_file = file_path.with_suffix(ext)
|
|
if temp_file.exists():
|
|
try:
|
|
temp_file.unlink()
|
|
except (IOError, OSError):
|
|
pass # Ignore cleanup errors
|