""" File utilities for filename sanitization, path operations, and file validation. Centralizes common file operations to eliminate code duplication. """ import re from pathlib import Path from typing import List, Optional, Tuple # Constants for filename operations DEFAULT_FILENAME_LENGTH_LIMIT = 100 DEFAULT_ARTIST_LENGTH_LIMIT = 30 DEFAULT_TITLE_LENGTH_LIMIT = 60 # Windows invalid characters INVALID_FILENAME_CHARS = ["?", ":", "*", '"', "<", ">", "|", "/", "\\"] def sanitize_filename( artist: str, title: str, max_length: int = DEFAULT_FILENAME_LENGTH_LIMIT ) -> str: """ Create a safe filename from artist and title. Args: artist: Song artist name title: Song title max_length: Maximum filename length (default: 100) Returns: Sanitized filename string """ # Clean up title safe_title = ( title.replace("(From ", "") .replace(" - ", " ") .replace(":", "") ) safe_title = safe_title.replace("'", "").replace('"', "") # Clean up artist safe_artist = artist.replace("'", "").replace('"', "").strip() # Remove invalid characters for char in INVALID_FILENAME_CHARS: safe_title = safe_title.replace(char, "") safe_artist = safe_artist.replace(char, "") # Remove problematic patterns safe_title = ( safe_title.replace("...", "").replace("..", "").replace(".", "").strip() ) safe_artist = safe_artist.strip() # Create filename - handle empty artist case if not safe_artist or safe_artist.strip() == "": # If no artist, just use the title filename = f"{safe_title}.mp4" else: filename = f"{safe_artist} - {safe_title}.mp4" # Limit filename length if needed if len(filename) > max_length: if not safe_artist or safe_artist.strip() == "": filename = f"{safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4" else: filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4" return filename def generate_possible_filenames( artist: str, title: str, channel_name: str ) -> List[str]: """ Generate possible filename patterns for different download modes. Args: artist: Song artist name title: Song title channel_name: Channel name Returns: List of possible filename patterns """ safe_title = sanitize_title_for_filenames(title) safe_artist = artist.replace("'", "").replace('"', "").strip() # Handle empty artist case if not safe_artist or safe_artist.strip() == "": return [ f"{safe_title}.mp4", # Songlist mode (no artist) f"{channel_name} - {safe_title}.mp4", # Latest-per-channel mode f"{safe_title} (Karaoke Version).mp4", # Channel videos mode (no artist) ] else: return [ f"{safe_artist} - {safe_title}.mp4", # Songlist mode f"{channel_name} - {safe_title}.mp4", # Latest-per-channel mode f"{safe_artist} - {safe_title} (Karaoke Version).mp4", # Channel videos mode ] def sanitize_title_for_filenames(title: str) -> str: """ Sanitize title specifically for filename generation. Args: title: Song title Returns: Sanitized title string """ safe_title = title for char in INVALID_FILENAME_CHARS: safe_title = safe_title.replace(char, "") safe_title = ( safe_title.replace("...", "").replace("..", "").replace(".", "").strip() ) return safe_title def check_file_exists_with_patterns( downloads_dir: Path, channel_name: str, artist: str, title: str ) -> Tuple[bool, Optional[Path]]: """ Check if a file exists using multiple possible filename patterns. Also checks for files with (2), (3), etc. suffixes that yt-dlp might create. Args: downloads_dir: Base downloads directory channel_name: Channel name artist: Song artist title: Song title Returns: Tuple of (exists, file_path) where file_path is None if not found """ possible_filenames = generate_possible_filenames(artist, title, channel_name) channel_dir = downloads_dir / channel_name for filename in possible_filenames: if len(filename) > DEFAULT_FILENAME_LENGTH_LIMIT: # Apply length limits if needed safe_artist = artist.replace("'", "").replace('"', "").strip() safe_title = sanitize_title_for_filenames(title) if not safe_artist or safe_artist.strip() == "": filename = f"{safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4" else: filename = f"{safe_artist[:DEFAULT_ARTIST_LENGTH_LIMIT]} - {safe_title[:DEFAULT_TITLE_LENGTH_LIMIT]}.mp4" # Check for exact filename match file_path = channel_dir / filename if file_path.exists() and file_path.stat().st_size > 0: return True, file_path # Check for files with (2), (3), etc. suffixes base_name = filename.replace(".mp4", "") for suffix in range(2, 10): # Check up to (9) suffixed_filename = f"{base_name} ({suffix}).mp4" suffixed_path = channel_dir / suffixed_filename if suffixed_path.exists() and suffixed_path.stat().st_size > 0: return True, suffixed_path return False, None def get_unique_filename( downloads_dir: Path, channel_name: str, artist: str, title: str ) -> Tuple[Path, bool]: """ Get a unique filename for download, checking for existing files including duplicates. Args: downloads_dir: Base downloads directory channel_name: Channel name artist: Song artist title: Song title Returns: Tuple of (file_path, is_existing) where is_existing indicates if a file already exists """ filename = sanitize_filename(artist, title) channel_dir = downloads_dir / channel_name file_path = channel_dir / filename # Check if file already exists exists, existing_path = check_file_exists_with_patterns(downloads_dir, channel_name, artist, title) if exists and existing_path: print(f"📁 File already exists: {existing_path.name}") return existing_path, True return file_path, False def ensure_directory_exists(directory: Path) -> None: """ Ensure a directory exists, creating it if necessary. Args: directory: Directory path to ensure exists """ directory.mkdir(parents=True, exist_ok=True) def is_valid_mp4_file(file_path: Path) -> bool: """ Check if a file is a valid MP4 file. Args: file_path: Path to the file to check Returns: True if file is a valid MP4, False otherwise """ if not file_path.exists(): return False # Check file size if file_path.stat().st_size == 0: return False # Check file extension if file_path.suffix.lower() != ".mp4": return False # Basic MP4 header check (first 4 bytes should be 'ftyp') try: with open(file_path, "rb") as f: header = f.read(8) if len(header) >= 8 and header[4:8] == b"ftyp": return True except (IOError, OSError): pass return False def cleanup_temp_files(file_path: Path) -> None: """ Clean up temporary files created by yt-dlp. Args: file_path: Base file path (without extension) """ temp_extensions = [".info.json", ".meta", ".webp", ".jpg", ".png"] for ext in temp_extensions: temp_file = file_path.with_suffix(ext) if temp_file.exists(): try: temp_file.unlink() except (IOError, OSError): pass # Ignore cleanup errors