""" File utilities for filename sanitization, path operations, and file validation. Centralizes common file operations to eliminate code duplication. """ import re from pathlib import Path from typing import List, Optional, Tuple # Constants for filename operations DEFAULT_FILENAME_LENGTH_LIMIT = 100 DEFAULT_ARTIST_LENGTH_LIMIT = 30 DEFAULT_TITLE_LENGTH_LIMIT = 60 # Windows invalid characters INVALID_FILENAME_CHARS = ["?", ":", "*", '"', "<", ">", "|", "/", "\\"] def sanitize_filename( artist: str, title: str, max_length: int = DEFAULT_FILENAME_LENGTH_LIMIT ) -> str: """ Create a safe filename from artist and title. Args: artist: Song artist name title: Song title max_length: Maximum filename length (default: 100) Returns: Sanitized filename string """ # Clean up title safe_title = ( title.replace("(From ", "") .replace(")", "") .replace(" - ", " ") .replace(":", "") ) safe_title = safe_title.replace("'", "").replace('"', "") # Clean up artist safe_artist = artist.replace("'", "").replace('"', "").strip() # Remove invalid characters for char in INVALID_FILENAME_CHARS: safe_title = safe_title.replace(char, "") safe_artist = safe_artist.replace(char, "") # Remove problematic patterns safe_title = ( safe_title.replace("...", "").replace("..", "").replace(".", "").strip() ) safe_artist = safe_artist.strip() # Create filename filename = f"{safe_artist} - {safe_title}.mp4" # Limit filename length if needed if len(filename) > max_length: filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4" return filename def generate_possible_filenames( artist: str, title: str, channel_name: str ) -> List[str]: """ Generate possible filename patterns for different download modes. Args: artist: Song artist name title: Song title channel_name: Channel name Returns: List of possible filename patterns """ safe_title = sanitize_title_for_filenames(title) safe_artist = artist.replace("'", "").replace('"', "").strip() return [ f"{safe_artist} - {safe_title}.mp4", # Songlist mode f"{channel_name} - {safe_title}.mp4", # Latest-per-channel mode f"{safe_artist} - {safe_title} (Karaoke Version).mp4", # Channel videos mode ] def sanitize_title_for_filenames(title: str) -> str: """ Sanitize title specifically for filename generation. Args: title: Song title Returns: Sanitized title string """ safe_title = title for char in INVALID_FILENAME_CHARS: safe_title = safe_title.replace(char, "") safe_title = ( safe_title.replace("...", "").replace("..", "").replace(".", "").strip() ) return safe_title def check_file_exists_with_patterns( downloads_dir: Path, channel_name: str, artist: str, title: str ) -> Tuple[bool, Optional[Path]]: """ Check if a file exists using multiple possible filename patterns. Args: downloads_dir: Base downloads directory channel_name: Channel name artist: Song artist title: Song title Returns: Tuple of (exists, file_path) where file_path is None if not found """ possible_filenames = generate_possible_filenames(artist, title, channel_name) channel_dir = downloads_dir / channel_name for filename in possible_filenames: if len(filename) > DEFAULT_FILENAME_LENGTH_LIMIT: # Apply length limits if needed safe_artist = artist.replace("'", "").replace('"', "").strip() safe_title = sanitize_title_for_filenames(title) filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4" file_path = channel_dir / filename if file_path.exists() and file_path.stat().st_size > 0: return True, file_path return False, None def ensure_directory_exists(directory: Path) -> None: """ Ensure a directory exists, creating it if necessary. Args: directory: Directory path to ensure exists """ directory.mkdir(parents=True, exist_ok=True) def is_valid_mp4_file(file_path: Path) -> bool: """ Check if a file is a valid MP4 file. Args: file_path: Path to the file to check Returns: True if file is a valid MP4, False otherwise """ if not file_path.exists(): return False # Check file size if file_path.stat().st_size == 0: return False # Check file extension if file_path.suffix.lower() != ".mp4": return False # Basic MP4 header check (first 4 bytes should be 'ftyp') try: with open(file_path, "rb") as f: header = f.read(8) if len(header) >= 8 and header[4:8] == b"ftyp": return True except (IOError, OSError): pass return False def cleanup_temp_files(file_path: Path) -> None: """ Clean up temporary files created by yt-dlp. Args: file_path: Base file path (without extension) """ temp_extensions = [".info.json", ".meta", ".webp", ".jpg", ".png"] for ext in temp_extensions: temp_file = file_path.with_suffix(ext) if temp_file.exists(): try: temp_file.unlink() except (IOError, OSError): pass # Ignore cleanup errors